1from __future__ import annotations
2
3import functools
4import logging
5import typing
6import warnings
7from types import TracebackType
8from urllib.parse import urljoin
9
10from ._collections import HTTPHeaderDict, RecentlyUsedContainer
11from ._request_methods import RequestMethods
12from .connection import ProxyConfig
13from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme
14from .exceptions import (
15 LocationValueError,
16 MaxRetryError,
17 ProxySchemeUnknown,
18 URLSchemeUnknown,
19)
20from .response import BaseHTTPResponse
21from .util.connection import _TYPE_SOCKET_OPTIONS
22from .util.proxy import connection_requires_http_tunnel
23from .util.retry import Retry
24from .util.timeout import Timeout
25from .util.url import Url, parse_url
26
27if typing.TYPE_CHECKING:
28 import ssl
29
30 from typing_extensions import Self
31
32__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]
33
34
35log = logging.getLogger(__name__)
36
37SSL_KEYWORDS = (
38 "key_file",
39 "cert_file",
40 "cert_reqs",
41 "ca_certs",
42 "ca_cert_data",
43 "ssl_version",
44 "ssl_minimum_version",
45 "ssl_maximum_version",
46 "ca_cert_dir",
47 "ssl_context",
48 "key_password",
49 "server_hostname",
50)
51# Default value for `blocksize` - a new parameter introduced to
52# http.client.HTTPConnection & http.client.HTTPSConnection in Python 3.7
53_DEFAULT_BLOCKSIZE = 16384
54
55
56class PoolKey(typing.NamedTuple):
57 """
58 All known keyword arguments that could be provided to the pool manager, its
59 pools, or the underlying connections.
60
61 All custom key schemes should include the fields in this key at a minimum.
62 """
63
64 key_scheme: str
65 key_host: str
66 key_port: int | None
67 key_timeout: Timeout | float | int | None
68 key_retries: Retry | bool | int | None
69 key_block: bool | None
70 key_source_address: tuple[str, int] | None
71 key_key_file: str | None
72 key_key_password: str | None
73 key_cert_file: str | None
74 key_cert_reqs: str | None
75 key_ca_certs: str | None
76 key_ca_cert_data: str | bytes | None
77 key_ssl_version: int | str | None
78 key_ssl_minimum_version: ssl.TLSVersion | None
79 key_ssl_maximum_version: ssl.TLSVersion | None
80 key_ca_cert_dir: str | None
81 key_ssl_context: ssl.SSLContext | None
82 key_maxsize: int | None
83 key_headers: frozenset[tuple[str, str]] | None
84 key__proxy: Url | None
85 key__proxy_headers: frozenset[tuple[str, str]] | None
86 key__proxy_config: ProxyConfig | None
87 key_socket_options: _TYPE_SOCKET_OPTIONS | None
88 key__socks_options: frozenset[tuple[str, str]] | None
89 key_assert_hostname: bool | str | None
90 key_assert_fingerprint: str | None
91 key_server_hostname: str | None
92 key_blocksize: int | None
93
94
95def _default_key_normalizer(
96 key_class: type[PoolKey], request_context: dict[str, typing.Any]
97) -> PoolKey:
98 """
99 Create a pool key out of a request context dictionary.
100
101 According to RFC 3986, both the scheme and host are case-insensitive.
102 Therefore, this function normalizes both before constructing the pool
103 key for an HTTPS request. If you wish to change this behaviour, provide
104 alternate callables to ``key_fn_by_scheme``.
105
106 :param key_class:
107 The class to use when constructing the key. This should be a namedtuple
108 with the ``scheme`` and ``host`` keys at a minimum.
109 :type key_class: namedtuple
110 :param request_context:
111 A dictionary-like object that contain the context for a request.
112 :type request_context: dict
113
114 :return: A namedtuple that can be used as a connection pool key.
115 :rtype: PoolKey
116 """
117 # Since we mutate the dictionary, make a copy first
118 context = request_context.copy()
119 context["scheme"] = context["scheme"].lower()
120 context["host"] = context["host"].lower()
121
122 # These are both dictionaries and need to be transformed into frozensets
123 for key in ("headers", "_proxy_headers", "_socks_options"):
124 if key in context and context[key] is not None:
125 context[key] = frozenset(context[key].items())
126
127 # The socket_options key may be a list and needs to be transformed into a
128 # tuple.
129 socket_opts = context.get("socket_options")
130 if socket_opts is not None:
131 context["socket_options"] = tuple(socket_opts)
132
133 # Map the kwargs to the names in the namedtuple - this is necessary since
134 # namedtuples can't have fields starting with '_'.
135 for key in list(context.keys()):
136 context["key_" + key] = context.pop(key)
137
138 # Default to ``None`` for keys missing from the context
139 for field in key_class._fields:
140 if field not in context:
141 context[field] = None
142
143 # Default key_blocksize to _DEFAULT_BLOCKSIZE if missing from the context
144 if context.get("key_blocksize") is None:
145 context["key_blocksize"] = _DEFAULT_BLOCKSIZE
146
147 return key_class(**context)
148
149
150#: A dictionary that maps a scheme to a callable that creates a pool key.
151#: This can be used to alter the way pool keys are constructed, if desired.
152#: Each PoolManager makes a copy of this dictionary so they can be configured
153#: globally here, or individually on the instance.
154key_fn_by_scheme = {
155 "http": functools.partial(_default_key_normalizer, PoolKey),
156 "https": functools.partial(_default_key_normalizer, PoolKey),
157}
158
159pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool}
160
161
162class PoolManager(RequestMethods):
163 """
164 Allows for arbitrary requests while transparently keeping track of
165 necessary connection pools for you.
166
167 :param num_pools:
168 Number of connection pools to cache before discarding the least
169 recently used pool.
170
171 :param headers:
172 Headers to include with all requests, unless other headers are given
173 explicitly.
174
175 :param \\**connection_pool_kw:
176 Additional parameters are used to create fresh
177 :class:`urllib3.connectionpool.ConnectionPool` instances.
178
179 Example:
180
181 .. code-block:: python
182
183 import urllib3
184
185 http = urllib3.PoolManager(num_pools=2)
186
187 resp1 = http.request("GET", "https://google.com/")
188 resp2 = http.request("GET", "https://google.com/mail")
189 resp3 = http.request("GET", "https://yahoo.com/")
190
191 print(len(http.pools))
192 # 2
193
194 """
195
196 proxy: Url | None = None
197 proxy_config: ProxyConfig | None = None
198
199 def __init__(
200 self,
201 num_pools: int = 10,
202 headers: typing.Mapping[str, str] | None = None,
203 **connection_pool_kw: typing.Any,
204 ) -> None:
205 super().__init__(headers)
206 # PoolManager handles redirects itself in PoolManager.urlopen().
207 # It always passes redirect=False to the underlying connection pool to
208 # suppress per-pool redirect handling. If the user supplied a non-Retry
209 # value (int/bool/etc) for retries and we let the pool normalize it
210 # while redirect=False, the resulting Retry object would have redirect
211 # handling disabled, which can interfere with PoolManager's own
212 # redirect logic. Normalize here so redirects remain governed solely by
213 # PoolManager logic.
214 if "retries" in connection_pool_kw:
215 retries = connection_pool_kw["retries"]
216 if not isinstance(retries, Retry):
217 retries = Retry.from_int(retries)
218 connection_pool_kw = connection_pool_kw.copy()
219 connection_pool_kw["retries"] = retries
220 self.connection_pool_kw = connection_pool_kw
221
222 self.pools: RecentlyUsedContainer[PoolKey, HTTPConnectionPool]
223 self.pools = RecentlyUsedContainer(num_pools)
224
225 # Locally set the pool classes and keys so other PoolManagers can
226 # override them.
227 self.pool_classes_by_scheme = pool_classes_by_scheme
228 self.key_fn_by_scheme = key_fn_by_scheme.copy()
229
230 def __enter__(self) -> Self:
231 return self
232
233 def __exit__(
234 self,
235 exc_type: type[BaseException] | None,
236 exc_val: BaseException | None,
237 exc_tb: TracebackType | None,
238 ) -> typing.Literal[False]:
239 self.clear()
240 # Return False to re-raise any potential exceptions
241 return False
242
243 def _new_pool(
244 self,
245 scheme: str,
246 host: str,
247 port: int,
248 request_context: dict[str, typing.Any] | None = None,
249 ) -> HTTPConnectionPool:
250 """
251 Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and
252 any additional pool keyword arguments.
253
254 If ``request_context`` is provided, it is provided as keyword arguments
255 to the pool class used. This method is used to actually create the
256 connection pools handed out by :meth:`connection_from_url` and
257 companion methods. It is intended to be overridden for customization.
258 """
259 pool_cls: type[HTTPConnectionPool] = self.pool_classes_by_scheme[scheme]
260 if request_context is None:
261 request_context = self.connection_pool_kw.copy()
262
263 # Default blocksize to _DEFAULT_BLOCKSIZE if missing or explicitly
264 # set to 'None' in the request_context.
265 if request_context.get("blocksize") is None:
266 request_context["blocksize"] = _DEFAULT_BLOCKSIZE
267
268 # Although the context has everything necessary to create the pool,
269 # this function has historically only used the scheme, host, and port
270 # in the positional args. When an API change is acceptable these can
271 # be removed.
272 for key in ("scheme", "host", "port"):
273 request_context.pop(key, None)
274
275 if scheme == "http":
276 for kw in SSL_KEYWORDS:
277 request_context.pop(kw, None)
278
279 return pool_cls(host, port, **request_context)
280
281 def clear(self) -> None:
282 """
283 Empty our store of pools and direct them all to close.
284
285 This will not affect in-flight connections, but they will not be
286 re-used after completion.
287 """
288 self.pools.clear()
289
290 def connection_from_host(
291 self,
292 host: str | None,
293 port: int | None = None,
294 scheme: str | None = "http",
295 pool_kwargs: dict[str, typing.Any] | None = None,
296 ) -> HTTPConnectionPool:
297 """
298 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme.
299
300 If ``port`` isn't given, it will be derived from the ``scheme`` using
301 ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is
302 provided, it is merged with the instance's ``connection_pool_kw``
303 variable and used to create the new connection pool, if one is
304 needed.
305 """
306
307 if not host:
308 raise LocationValueError("No host specified.")
309
310 request_context = self._merge_pool_kwargs(pool_kwargs)
311 request_context["scheme"] = scheme or "http"
312 if not port:
313 port = port_by_scheme.get(request_context["scheme"].lower(), 80)
314 request_context["port"] = port
315 request_context["host"] = host
316
317 return self.connection_from_context(request_context)
318
319 def connection_from_context(
320 self, request_context: dict[str, typing.Any]
321 ) -> HTTPConnectionPool:
322 """
323 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context.
324
325 ``request_context`` must at least contain the ``scheme`` key and its
326 value must be a key in ``key_fn_by_scheme`` instance variable.
327 """
328 if "strict" in request_context:
329 warnings.warn(
330 "The 'strict' parameter is no longer needed on Python 3+. "
331 "This will raise an error in urllib3 v2.1.0.",
332 DeprecationWarning,
333 )
334 request_context.pop("strict")
335
336 scheme = request_context["scheme"].lower()
337 pool_key_constructor = self.key_fn_by_scheme.get(scheme)
338 if not pool_key_constructor:
339 raise URLSchemeUnknown(scheme)
340 pool_key = pool_key_constructor(request_context)
341
342 return self.connection_from_pool_key(pool_key, request_context=request_context)
343
344 def connection_from_pool_key(
345 self, pool_key: PoolKey, request_context: dict[str, typing.Any]
346 ) -> HTTPConnectionPool:
347 """
348 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key.
349
350 ``pool_key`` should be a namedtuple that only contains immutable
351 objects. At a minimum it must have the ``scheme``, ``host``, and
352 ``port`` fields.
353 """
354 with self.pools.lock:
355 # If the scheme, host, or port doesn't match existing open
356 # connections, open a new ConnectionPool.
357 pool = self.pools.get(pool_key)
358 if pool:
359 return pool
360
361 # Make a fresh ConnectionPool of the desired type
362 scheme = request_context["scheme"]
363 host = request_context["host"]
364 port = request_context["port"]
365 pool = self._new_pool(scheme, host, port, request_context=request_context)
366 self.pools[pool_key] = pool
367
368 return pool
369
370 def connection_from_url(
371 self, url: str, pool_kwargs: dict[str, typing.Any] | None = None
372 ) -> HTTPConnectionPool:
373 """
374 Similar to :func:`urllib3.connectionpool.connection_from_url`.
375
376 If ``pool_kwargs`` is not provided and a new pool needs to be
377 constructed, ``self.connection_pool_kw`` is used to initialize
378 the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs``
379 is provided, it is used instead. Note that if a new pool does not
380 need to be created for the request, the provided ``pool_kwargs`` are
381 not used.
382 """
383 u = parse_url(url)
384 return self.connection_from_host(
385 u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs
386 )
387
388 def _merge_pool_kwargs(
389 self, override: dict[str, typing.Any] | None
390 ) -> dict[str, typing.Any]:
391 """
392 Merge a dictionary of override values for self.connection_pool_kw.
393
394 This does not modify self.connection_pool_kw and returns a new dict.
395 Any keys in the override dictionary with a value of ``None`` are
396 removed from the merged dictionary.
397 """
398 base_pool_kwargs = self.connection_pool_kw.copy()
399 if override:
400 for key, value in override.items():
401 if value is None:
402 try:
403 del base_pool_kwargs[key]
404 except KeyError:
405 pass
406 else:
407 base_pool_kwargs[key] = value
408 return base_pool_kwargs
409
410 def _proxy_requires_url_absolute_form(self, parsed_url: Url) -> bool:
411 """
412 Indicates if the proxy requires the complete destination URL in the
413 request. Normally this is only needed when not using an HTTP CONNECT
414 tunnel.
415 """
416 if self.proxy is None:
417 return False
418
419 return not connection_requires_http_tunnel(
420 self.proxy, self.proxy_config, parsed_url.scheme
421 )
422
423 def urlopen( # type: ignore[override]
424 self, method: str, url: str, redirect: bool = True, **kw: typing.Any
425 ) -> BaseHTTPResponse:
426 """
427 Same as :meth:`urllib3.HTTPConnectionPool.urlopen`
428 with custom cross-host redirect logic and only sends the request-uri
429 portion of the ``url``.
430
431 The given ``url`` parameter must be absolute, such that an appropriate
432 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
433 """
434 u = parse_url(url)
435
436 if u.scheme is None:
437 warnings.warn(
438 "URLs without a scheme (ie 'https://') are deprecated and will raise an error "
439 "in a future version of urllib3. To avoid this DeprecationWarning ensure all URLs "
440 "start with 'https://' or 'http://'. Read more in this issue: "
441 "https://github.com/urllib3/urllib3/issues/2920",
442 category=DeprecationWarning,
443 stacklevel=2,
444 )
445
446 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
447
448 kw["assert_same_host"] = False
449 kw["redirect"] = False
450
451 if "headers" not in kw:
452 kw["headers"] = self.headers
453
454 if self._proxy_requires_url_absolute_form(u):
455 response = conn.urlopen(method, url, **kw)
456 else:
457 response = conn.urlopen(method, u.request_uri, **kw)
458
459 redirect_location = redirect and response.get_redirect_location()
460 if not redirect_location:
461 return response
462
463 # Support relative URLs for redirecting.
464 redirect_location = urljoin(url, redirect_location)
465
466 if response.status == 303:
467 # Change the method according to RFC 9110, Section 15.4.4.
468 method = "GET"
469 # And lose the body not to transfer anything sensitive.
470 kw["body"] = None
471 kw["headers"] = HTTPHeaderDict(kw["headers"])._prepare_for_method_change()
472
473 retries = kw.get("retries", response.retries)
474 if not isinstance(retries, Retry):
475 retries = Retry.from_int(retries, redirect=redirect)
476
477 # Strip headers marked as unsafe to forward to the redirected location.
478 # Check remove_headers_on_redirect to avoid a potential network call within
479 # conn.is_same_host() which may use socket.gethostbyname() in the future.
480 if retries.remove_headers_on_redirect and not conn.is_same_host(
481 redirect_location
482 ):
483 new_headers = kw["headers"].copy()
484 for header in kw["headers"]:
485 if header.lower() in retries.remove_headers_on_redirect:
486 new_headers.pop(header, None)
487 kw["headers"] = new_headers
488
489 try:
490 retries = retries.increment(method, url, response=response, _pool=conn)
491 except MaxRetryError:
492 if retries.raise_on_redirect:
493 response.drain_conn()
494 raise
495 return response
496
497 kw["retries"] = retries
498 kw["redirect"] = redirect
499
500 log.info("Redirecting %s -> %s", url, redirect_location)
501
502 response.drain_conn()
503 return self.urlopen(method, redirect_location, **kw)
504
505
506class ProxyManager(PoolManager):
507 """
508 Behaves just like :class:`PoolManager`, but sends all requests through
509 the defined proxy, using the CONNECT method for HTTPS URLs.
510
511 :param proxy_url:
512 The URL of the proxy to be used.
513
514 :param proxy_headers:
515 A dictionary containing headers that will be sent to the proxy. In case
516 of HTTP they are being sent with each request, while in the
517 HTTPS/CONNECT case they are sent only once. Could be used for proxy
518 authentication.
519
520 :param proxy_ssl_context:
521 The proxy SSL context is used to establish the TLS connection to the
522 proxy when using HTTPS proxies.
523
524 :param use_forwarding_for_https:
525 (Defaults to False) If set to True will forward requests to the HTTPS
526 proxy to be made on behalf of the client instead of creating a TLS
527 tunnel via the CONNECT method. **Enabling this flag means that request
528 and response headers and content will be visible from the HTTPS proxy**
529 whereas tunneling keeps request and response headers and content
530 private. IP address, target hostname, SNI, and port are always visible
531 to an HTTPS proxy even when this flag is disabled.
532
533 :param proxy_assert_hostname:
534 The hostname of the certificate to verify against.
535
536 :param proxy_assert_fingerprint:
537 The fingerprint of the certificate to verify against.
538
539 Example:
540
541 .. code-block:: python
542
543 import urllib3
544
545 proxy = urllib3.ProxyManager("https://localhost:3128/")
546
547 resp1 = proxy.request("GET", "https://google.com/")
548 resp2 = proxy.request("GET", "https://httpbin.org/")
549
550 print(len(proxy.pools))
551 # 1
552
553 resp3 = proxy.request("GET", "https://httpbin.org/")
554 resp4 = proxy.request("GET", "https://twitter.com/")
555
556 print(len(proxy.pools))
557 # 3
558
559 """
560
561 def __init__(
562 self,
563 proxy_url: str,
564 num_pools: int = 10,
565 headers: typing.Mapping[str, str] | None = None,
566 proxy_headers: typing.Mapping[str, str] | None = None,
567 proxy_ssl_context: ssl.SSLContext | None = None,
568 use_forwarding_for_https: bool = False,
569 proxy_assert_hostname: None | str | typing.Literal[False] = None,
570 proxy_assert_fingerprint: str | None = None,
571 **connection_pool_kw: typing.Any,
572 ) -> None:
573 if isinstance(proxy_url, HTTPConnectionPool):
574 str_proxy_url = f"{proxy_url.scheme}://{proxy_url.host}:{proxy_url.port}"
575 else:
576 str_proxy_url = proxy_url
577 proxy = parse_url(str_proxy_url)
578
579 if proxy.scheme not in ("http", "https"):
580 raise ProxySchemeUnknown(proxy.scheme)
581
582 if not proxy.port:
583 port = port_by_scheme.get(proxy.scheme, 80)
584 proxy = proxy._replace(port=port)
585
586 self.proxy = proxy
587 self.proxy_headers = proxy_headers or {}
588 self.proxy_ssl_context = proxy_ssl_context
589 self.proxy_config = ProxyConfig(
590 proxy_ssl_context,
591 use_forwarding_for_https,
592 proxy_assert_hostname,
593 proxy_assert_fingerprint,
594 )
595
596 connection_pool_kw["_proxy"] = self.proxy
597 connection_pool_kw["_proxy_headers"] = self.proxy_headers
598 connection_pool_kw["_proxy_config"] = self.proxy_config
599
600 super().__init__(num_pools, headers, **connection_pool_kw)
601
602 def connection_from_host(
603 self,
604 host: str | None,
605 port: int | None = None,
606 scheme: str | None = "http",
607 pool_kwargs: dict[str, typing.Any] | None = None,
608 ) -> HTTPConnectionPool:
609 if scheme == "https":
610 return super().connection_from_host(
611 host, port, scheme, pool_kwargs=pool_kwargs
612 )
613
614 return super().connection_from_host(
615 self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs # type: ignore[union-attr]
616 )
617
618 def _set_proxy_headers(
619 self, url: str, headers: typing.Mapping[str, str] | None = None
620 ) -> typing.Mapping[str, str]:
621 """
622 Sets headers needed by proxies: specifically, the Accept and Host
623 headers. Only sets headers not provided by the user.
624 """
625 headers_ = {"Accept": "*/*"}
626
627 netloc = parse_url(url).netloc
628 if netloc:
629 headers_["Host"] = netloc
630
631 if headers:
632 headers_.update(headers)
633 return headers_
634
635 def urlopen( # type: ignore[override]
636 self, method: str, url: str, redirect: bool = True, **kw: typing.Any
637 ) -> BaseHTTPResponse:
638 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
639 u = parse_url(url)
640 if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme):
641 # For connections using HTTP CONNECT, httplib sets the necessary
642 # headers on the CONNECT to the proxy. If we're not using CONNECT,
643 # we'll definitely need to set 'Host' at the very least.
644 headers = kw.get("headers", self.headers)
645 kw["headers"] = self._set_proxy_headers(url, headers)
646
647 return super().urlopen(method, url, redirect=redirect, **kw)
648
649
650def proxy_from_url(url: str, **kw: typing.Any) -> ProxyManager:
651 return ProxyManager(proxy_url=url, **kw)