1from __future__ import annotations
2
3import functools
4import logging
5import typing
6import warnings
7from types import TracebackType
8from urllib.parse import urljoin
9
10from ._collections import HTTPHeaderDict, RecentlyUsedContainer
11from ._request_methods import RequestMethods
12from .connection import ProxyConfig
13from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme
14from .exceptions import (
15 LocationValueError,
16 MaxRetryError,
17 ProxySchemeUnknown,
18 URLSchemeUnknown,
19)
20from .response import BaseHTTPResponse
21from .util.connection import _TYPE_SOCKET_OPTIONS
22from .util.proxy import connection_requires_http_tunnel
23from .util.retry import Retry
24from .util.timeout import Timeout
25from .util.url import Url, parse_url
26
27if typing.TYPE_CHECKING:
28 import ssl
29
30 from typing_extensions import Self
31
32__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]
33
34
35log = logging.getLogger(__name__)
36
37SSL_KEYWORDS = (
38 "key_file",
39 "cert_file",
40 "cert_reqs",
41 "ca_certs",
42 "ca_cert_data",
43 "ssl_version",
44 "ssl_minimum_version",
45 "ssl_maximum_version",
46 "ca_cert_dir",
47 "ssl_context",
48 "key_password",
49 "server_hostname",
50)
51# Default value for `blocksize` - a new parameter introduced to
52# http.client.HTTPConnection & http.client.HTTPSConnection in Python 3.7
53_DEFAULT_BLOCKSIZE = 16384
54
55
56class PoolKey(typing.NamedTuple):
57 """
58 All known keyword arguments that could be provided to the pool manager, its
59 pools, or the underlying connections.
60
61 All custom key schemes should include the fields in this key at a minimum.
62 """
63
64 key_scheme: str
65 key_host: str
66 key_port: int | None
67 key_timeout: Timeout | float | int | None
68 key_retries: Retry | bool | int | None
69 key_block: bool | None
70 key_source_address: tuple[str, int] | None
71 key_key_file: str | None
72 key_key_password: str | None
73 key_cert_file: str | None
74 key_cert_reqs: str | None
75 key_ca_certs: str | None
76 key_ca_cert_data: str | bytes | None
77 key_ssl_version: int | str | None
78 key_ssl_minimum_version: ssl.TLSVersion | None
79 key_ssl_maximum_version: ssl.TLSVersion | None
80 key_ca_cert_dir: str | None
81 key_ssl_context: ssl.SSLContext | None
82 key_maxsize: int | None
83 key_headers: frozenset[tuple[str, str]] | None
84 key__proxy: Url | None
85 key__proxy_headers: frozenset[tuple[str, str]] | None
86 key__proxy_config: ProxyConfig | None
87 key_socket_options: _TYPE_SOCKET_OPTIONS | None
88 key__socks_options: frozenset[tuple[str, str]] | None
89 key_assert_hostname: bool | str | None
90 key_assert_fingerprint: str | None
91 key_server_hostname: str | None
92 key_blocksize: int | None
93
94
95def _default_key_normalizer(
96 key_class: type[PoolKey], request_context: dict[str, typing.Any]
97) -> PoolKey:
98 """
99 Create a pool key out of a request context dictionary.
100
101 According to RFC 3986, both the scheme and host are case-insensitive.
102 Therefore, this function normalizes both before constructing the pool
103 key for an HTTPS request. If you wish to change this behaviour, provide
104 alternate callables to ``key_fn_by_scheme``.
105
106 :param key_class:
107 The class to use when constructing the key. This should be a namedtuple
108 with the ``scheme`` and ``host`` keys at a minimum.
109 :type key_class: namedtuple
110 :param request_context:
111 A dictionary-like object that contain the context for a request.
112 :type request_context: dict
113
114 :return: A namedtuple that can be used as a connection pool key.
115 :rtype: PoolKey
116 """
117 # Since we mutate the dictionary, make a copy first
118 context = request_context.copy()
119 context["scheme"] = context["scheme"].lower()
120 context["host"] = context["host"].lower()
121
122 # These are both dictionaries and need to be transformed into frozensets
123 for key in ("headers", "_proxy_headers", "_socks_options"):
124 if key in context and context[key] is not None:
125 context[key] = frozenset(context[key].items())
126
127 # The socket_options key may be a list and needs to be transformed into a
128 # tuple.
129 socket_opts = context.get("socket_options")
130 if socket_opts is not None:
131 context["socket_options"] = tuple(socket_opts)
132
133 # Map the kwargs to the names in the namedtuple - this is necessary since
134 # namedtuples can't have fields starting with '_'.
135 for key in list(context.keys()):
136 context["key_" + key] = context.pop(key)
137
138 # Default to ``None`` for keys missing from the context
139 for field in key_class._fields:
140 if field not in context:
141 context[field] = None
142
143 # Default key_blocksize to _DEFAULT_BLOCKSIZE if missing from the context
144 if context.get("key_blocksize") is None:
145 context["key_blocksize"] = _DEFAULT_BLOCKSIZE
146
147 return key_class(**context)
148
149
150#: A dictionary that maps a scheme to a callable that creates a pool key.
151#: This can be used to alter the way pool keys are constructed, if desired.
152#: Each PoolManager makes a copy of this dictionary so they can be configured
153#: globally here, or individually on the instance.
154key_fn_by_scheme = {
155 "http": functools.partial(_default_key_normalizer, PoolKey),
156 "https": functools.partial(_default_key_normalizer, PoolKey),
157}
158
159pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool}
160
161
162class PoolManager(RequestMethods):
163 """
164 Allows for arbitrary requests while transparently keeping track of
165 necessary connection pools for you.
166
167 :param num_pools:
168 Number of connection pools to cache before discarding the least
169 recently used pool.
170
171 :param headers:
172 Headers to include with all requests, unless other headers are given
173 explicitly.
174
175 :param \\**connection_pool_kw:
176 Additional parameters are used to create fresh
177 :class:`urllib3.connectionpool.ConnectionPool` instances.
178
179 Example:
180
181 .. code-block:: python
182
183 import urllib3
184
185 http = urllib3.PoolManager(num_pools=2)
186
187 resp1 = http.request("GET", "https://google.com/")
188 resp2 = http.request("GET", "https://google.com/mail")
189 resp3 = http.request("GET", "https://yahoo.com/")
190
191 print(len(http.pools))
192 # 2
193
194 """
195
196 proxy: Url | None = None
197 proxy_config: ProxyConfig | None = None
198
199 def __init__(
200 self,
201 num_pools: int = 10,
202 headers: typing.Mapping[str, str] | None = None,
203 **connection_pool_kw: typing.Any,
204 ) -> None:
205 super().__init__(headers)
206 if "retries" in connection_pool_kw:
207 retries = connection_pool_kw["retries"]
208 if not isinstance(retries, Retry):
209 # When Retry is initialized, raise_on_redirect is based
210 # on a redirect boolean value.
211 # But requests made via a pool manager always set
212 # redirect to False, and raise_on_redirect always ends
213 # up being False consequently.
214 # Here we fix the issue by setting raise_on_redirect to
215 # a value needed by the pool manager without considering
216 # the redirect boolean.
217 raise_on_redirect = retries is not False
218 retries = Retry.from_int(retries, redirect=False)
219 retries.raise_on_redirect = raise_on_redirect
220 connection_pool_kw = connection_pool_kw.copy()
221 connection_pool_kw["retries"] = retries
222 self.connection_pool_kw = connection_pool_kw
223
224 self.pools: RecentlyUsedContainer[PoolKey, HTTPConnectionPool]
225 self.pools = RecentlyUsedContainer(num_pools)
226
227 # Locally set the pool classes and keys so other PoolManagers can
228 # override them.
229 self.pool_classes_by_scheme = pool_classes_by_scheme
230 self.key_fn_by_scheme = key_fn_by_scheme.copy()
231
232 def __enter__(self) -> Self:
233 return self
234
235 def __exit__(
236 self,
237 exc_type: type[BaseException] | None,
238 exc_val: BaseException | None,
239 exc_tb: TracebackType | None,
240 ) -> typing.Literal[False]:
241 self.clear()
242 # Return False to re-raise any potential exceptions
243 return False
244
245 def _new_pool(
246 self,
247 scheme: str,
248 host: str,
249 port: int,
250 request_context: dict[str, typing.Any] | None = None,
251 ) -> HTTPConnectionPool:
252 """
253 Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and
254 any additional pool keyword arguments.
255
256 If ``request_context`` is provided, it is provided as keyword arguments
257 to the pool class used. This method is used to actually create the
258 connection pools handed out by :meth:`connection_from_url` and
259 companion methods. It is intended to be overridden for customization.
260 """
261 pool_cls: type[HTTPConnectionPool] = self.pool_classes_by_scheme[scheme]
262 if request_context is None:
263 request_context = self.connection_pool_kw.copy()
264
265 # Default blocksize to _DEFAULT_BLOCKSIZE if missing or explicitly
266 # set to 'None' in the request_context.
267 if request_context.get("blocksize") is None:
268 request_context["blocksize"] = _DEFAULT_BLOCKSIZE
269
270 # Although the context has everything necessary to create the pool,
271 # this function has historically only used the scheme, host, and port
272 # in the positional args. When an API change is acceptable these can
273 # be removed.
274 for key in ("scheme", "host", "port"):
275 request_context.pop(key, None)
276
277 if scheme == "http":
278 for kw in SSL_KEYWORDS:
279 request_context.pop(kw, None)
280
281 return pool_cls(host, port, **request_context)
282
283 def clear(self) -> None:
284 """
285 Empty our store of pools and direct them all to close.
286
287 This will not affect in-flight connections, but they will not be
288 re-used after completion.
289 """
290 self.pools.clear()
291
292 def connection_from_host(
293 self,
294 host: str | None,
295 port: int | None = None,
296 scheme: str | None = "http",
297 pool_kwargs: dict[str, typing.Any] | None = None,
298 ) -> HTTPConnectionPool:
299 """
300 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme.
301
302 If ``port`` isn't given, it will be derived from the ``scheme`` using
303 ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is
304 provided, it is merged with the instance's ``connection_pool_kw``
305 variable and used to create the new connection pool, if one is
306 needed.
307 """
308
309 if not host:
310 raise LocationValueError("No host specified.")
311
312 request_context = self._merge_pool_kwargs(pool_kwargs)
313 request_context["scheme"] = scheme or "http"
314 if not port:
315 port = port_by_scheme.get(request_context["scheme"].lower(), 80)
316 request_context["port"] = port
317 request_context["host"] = host
318
319 return self.connection_from_context(request_context)
320
321 def connection_from_context(
322 self, request_context: dict[str, typing.Any]
323 ) -> HTTPConnectionPool:
324 """
325 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context.
326
327 ``request_context`` must at least contain the ``scheme`` key and its
328 value must be a key in ``key_fn_by_scheme`` instance variable.
329 """
330 if "strict" in request_context:
331 warnings.warn(
332 "The 'strict' parameter is no longer needed on Python 3+. "
333 "This will raise an error in urllib3 v2.1.0.",
334 DeprecationWarning,
335 )
336 request_context.pop("strict")
337
338 scheme = request_context["scheme"].lower()
339 pool_key_constructor = self.key_fn_by_scheme.get(scheme)
340 if not pool_key_constructor:
341 raise URLSchemeUnknown(scheme)
342 pool_key = pool_key_constructor(request_context)
343
344 return self.connection_from_pool_key(pool_key, request_context=request_context)
345
346 def connection_from_pool_key(
347 self, pool_key: PoolKey, request_context: dict[str, typing.Any]
348 ) -> HTTPConnectionPool:
349 """
350 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key.
351
352 ``pool_key`` should be a namedtuple that only contains immutable
353 objects. At a minimum it must have the ``scheme``, ``host``, and
354 ``port`` fields.
355 """
356 with self.pools.lock:
357 # If the scheme, host, or port doesn't match existing open
358 # connections, open a new ConnectionPool.
359 pool = self.pools.get(pool_key)
360 if pool:
361 return pool
362
363 # Make a fresh ConnectionPool of the desired type
364 scheme = request_context["scheme"]
365 host = request_context["host"]
366 port = request_context["port"]
367 pool = self._new_pool(scheme, host, port, request_context=request_context)
368 self.pools[pool_key] = pool
369
370 return pool
371
372 def connection_from_url(
373 self, url: str, pool_kwargs: dict[str, typing.Any] | None = None
374 ) -> HTTPConnectionPool:
375 """
376 Similar to :func:`urllib3.connectionpool.connection_from_url`.
377
378 If ``pool_kwargs`` is not provided and a new pool needs to be
379 constructed, ``self.connection_pool_kw`` is used to initialize
380 the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs``
381 is provided, it is used instead. Note that if a new pool does not
382 need to be created for the request, the provided ``pool_kwargs`` are
383 not used.
384 """
385 u = parse_url(url)
386 return self.connection_from_host(
387 u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs
388 )
389
390 def _merge_pool_kwargs(
391 self, override: dict[str, typing.Any] | None
392 ) -> dict[str, typing.Any]:
393 """
394 Merge a dictionary of override values for self.connection_pool_kw.
395
396 This does not modify self.connection_pool_kw and returns a new dict.
397 Any keys in the override dictionary with a value of ``None`` are
398 removed from the merged dictionary.
399 """
400 base_pool_kwargs = self.connection_pool_kw.copy()
401 if override:
402 for key, value in override.items():
403 if value is None:
404 try:
405 del base_pool_kwargs[key]
406 except KeyError:
407 pass
408 else:
409 base_pool_kwargs[key] = value
410 return base_pool_kwargs
411
412 def _proxy_requires_url_absolute_form(self, parsed_url: Url) -> bool:
413 """
414 Indicates if the proxy requires the complete destination URL in the
415 request. Normally this is only needed when not using an HTTP CONNECT
416 tunnel.
417 """
418 if self.proxy is None:
419 return False
420
421 return not connection_requires_http_tunnel(
422 self.proxy, self.proxy_config, parsed_url.scheme
423 )
424
425 def urlopen( # type: ignore[override]
426 self, method: str, url: str, redirect: bool = True, **kw: typing.Any
427 ) -> BaseHTTPResponse:
428 """
429 Same as :meth:`urllib3.HTTPConnectionPool.urlopen`
430 with custom cross-host redirect logic and only sends the request-uri
431 portion of the ``url``.
432
433 The given ``url`` parameter must be absolute, such that an appropriate
434 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
435 """
436 u = parse_url(url)
437
438 if u.scheme is None:
439 warnings.warn(
440 "URLs without a scheme (ie 'https://') are deprecated and will raise an error "
441 "in a future version of urllib3. To avoid this DeprecationWarning ensure all URLs "
442 "start with 'https://' or 'http://'. Read more in this issue: "
443 "https://github.com/urllib3/urllib3/issues/2920",
444 category=DeprecationWarning,
445 stacklevel=2,
446 )
447
448 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
449
450 kw["assert_same_host"] = False
451 kw["redirect"] = False
452
453 if "headers" not in kw:
454 kw["headers"] = self.headers
455
456 if self._proxy_requires_url_absolute_form(u):
457 response = conn.urlopen(method, url, **kw)
458 else:
459 response = conn.urlopen(method, u.request_uri, **kw)
460
461 redirect_location = redirect and response.get_redirect_location()
462 if not redirect_location:
463 return response
464
465 # Support relative URLs for redirecting.
466 redirect_location = urljoin(url, redirect_location)
467
468 if response.status == 303:
469 # Change the method according to RFC 9110, Section 15.4.4.
470 method = "GET"
471 # And lose the body not to transfer anything sensitive.
472 kw["body"] = None
473 kw["headers"] = HTTPHeaderDict(kw["headers"])._prepare_for_method_change()
474
475 retries = kw.get("retries", response.retries)
476 if not isinstance(retries, Retry):
477 retries = Retry.from_int(retries, redirect=redirect)
478
479 # Strip headers marked as unsafe to forward to the redirected location.
480 # Check remove_headers_on_redirect to avoid a potential network call within
481 # conn.is_same_host() which may use socket.gethostbyname() in the future.
482 if retries.remove_headers_on_redirect and not conn.is_same_host(
483 redirect_location
484 ):
485 new_headers = kw["headers"].copy()
486 for header in kw["headers"]:
487 if header.lower() in retries.remove_headers_on_redirect:
488 new_headers.pop(header, None)
489 kw["headers"] = new_headers
490
491 try:
492 retries = retries.increment(method, url, response=response, _pool=conn)
493 except MaxRetryError:
494 if retries.raise_on_redirect:
495 response.drain_conn()
496 raise
497 return response
498
499 kw["retries"] = retries
500 kw["redirect"] = redirect
501
502 log.info("Redirecting %s -> %s", url, redirect_location)
503
504 response.drain_conn()
505 return self.urlopen(method, redirect_location, **kw)
506
507
508class ProxyManager(PoolManager):
509 """
510 Behaves just like :class:`PoolManager`, but sends all requests through
511 the defined proxy, using the CONNECT method for HTTPS URLs.
512
513 :param proxy_url:
514 The URL of the proxy to be used.
515
516 :param proxy_headers:
517 A dictionary containing headers that will be sent to the proxy. In case
518 of HTTP they are being sent with each request, while in the
519 HTTPS/CONNECT case they are sent only once. Could be used for proxy
520 authentication.
521
522 :param proxy_ssl_context:
523 The proxy SSL context is used to establish the TLS connection to the
524 proxy when using HTTPS proxies.
525
526 :param use_forwarding_for_https:
527 (Defaults to False) If set to True will forward requests to the HTTPS
528 proxy to be made on behalf of the client instead of creating a TLS
529 tunnel via the CONNECT method. **Enabling this flag means that request
530 and response headers and content will be visible from the HTTPS proxy**
531 whereas tunneling keeps request and response headers and content
532 private. IP address, target hostname, SNI, and port are always visible
533 to an HTTPS proxy even when this flag is disabled.
534
535 :param proxy_assert_hostname:
536 The hostname of the certificate to verify against.
537
538 :param proxy_assert_fingerprint:
539 The fingerprint of the certificate to verify against.
540
541 Example:
542
543 .. code-block:: python
544
545 import urllib3
546
547 proxy = urllib3.ProxyManager("https://localhost:3128/")
548
549 resp1 = proxy.request("GET", "https://google.com/")
550 resp2 = proxy.request("GET", "https://httpbin.org/")
551
552 print(len(proxy.pools))
553 # 1
554
555 resp3 = proxy.request("GET", "https://httpbin.org/")
556 resp4 = proxy.request("GET", "https://twitter.com/")
557
558 print(len(proxy.pools))
559 # 3
560
561 """
562
563 def __init__(
564 self,
565 proxy_url: str,
566 num_pools: int = 10,
567 headers: typing.Mapping[str, str] | None = None,
568 proxy_headers: typing.Mapping[str, str] | None = None,
569 proxy_ssl_context: ssl.SSLContext | None = None,
570 use_forwarding_for_https: bool = False,
571 proxy_assert_hostname: None | str | typing.Literal[False] = None,
572 proxy_assert_fingerprint: str | None = None,
573 **connection_pool_kw: typing.Any,
574 ) -> None:
575 if isinstance(proxy_url, HTTPConnectionPool):
576 str_proxy_url = f"{proxy_url.scheme}://{proxy_url.host}:{proxy_url.port}"
577 else:
578 str_proxy_url = proxy_url
579 proxy = parse_url(str_proxy_url)
580
581 if proxy.scheme not in ("http", "https"):
582 raise ProxySchemeUnknown(proxy.scheme)
583
584 if not proxy.port:
585 port = port_by_scheme.get(proxy.scheme, 80)
586 proxy = proxy._replace(port=port)
587
588 self.proxy = proxy
589 self.proxy_headers = proxy_headers or {}
590 self.proxy_ssl_context = proxy_ssl_context
591 self.proxy_config = ProxyConfig(
592 proxy_ssl_context,
593 use_forwarding_for_https,
594 proxy_assert_hostname,
595 proxy_assert_fingerprint,
596 )
597
598 connection_pool_kw["_proxy"] = self.proxy
599 connection_pool_kw["_proxy_headers"] = self.proxy_headers
600 connection_pool_kw["_proxy_config"] = self.proxy_config
601
602 super().__init__(num_pools, headers, **connection_pool_kw)
603
604 def connection_from_host(
605 self,
606 host: str | None,
607 port: int | None = None,
608 scheme: str | None = "http",
609 pool_kwargs: dict[str, typing.Any] | None = None,
610 ) -> HTTPConnectionPool:
611 if scheme == "https":
612 return super().connection_from_host(
613 host, port, scheme, pool_kwargs=pool_kwargs
614 )
615
616 return super().connection_from_host(
617 self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs # type: ignore[union-attr]
618 )
619
620 def _set_proxy_headers(
621 self, url: str, headers: typing.Mapping[str, str] | None = None
622 ) -> typing.Mapping[str, str]:
623 """
624 Sets headers needed by proxies: specifically, the Accept and Host
625 headers. Only sets headers not provided by the user.
626 """
627 headers_ = {"Accept": "*/*"}
628
629 netloc = parse_url(url).netloc
630 if netloc:
631 headers_["Host"] = netloc
632
633 if headers:
634 headers_.update(headers)
635 return headers_
636
637 def urlopen( # type: ignore[override]
638 self, method: str, url: str, redirect: bool = True, **kw: typing.Any
639 ) -> BaseHTTPResponse:
640 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
641 u = parse_url(url)
642 if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme):
643 # For connections using HTTP CONNECT, httplib sets the necessary
644 # headers on the CONNECT to the proxy. If we're not using CONNECT,
645 # we'll definitely need to set 'Host' at the very least.
646 headers = kw.get("headers", self.headers)
647 kw["headers"] = self._set_proxy_headers(url, headers)
648
649 return super().urlopen(method, url, redirect=redirect, **kw)
650
651
652def proxy_from_url(url: str, **kw: typing.Any) -> ProxyManager:
653 return ProxyManager(proxy_url=url, **kw)