1from __future__ import annotations
2
3import functools
4import logging
5import typing
6import warnings
7from types import TracebackType
8from urllib.parse import urljoin
9
10from ._collections import HTTPHeaderDict, RecentlyUsedContainer
11from ._request_methods import RequestMethods
12from .connection import ProxyConfig
13from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme
14from .exceptions import (
15 LocationValueError,
16 MaxRetryError,
17 ProxySchemeUnknown,
18 URLSchemeUnknown,
19)
20from .response import BaseHTTPResponse
21from .util.connection import _TYPE_SOCKET_OPTIONS
22from .util.proxy import connection_requires_http_tunnel
23from .util.retry import Retry
24from .util.timeout import Timeout
25from .util.url import Url, parse_url
26
27if typing.TYPE_CHECKING:
28 import ssl
29
30 from typing_extensions import Self
31
32__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]
33
34
35log = logging.getLogger(__name__)
36
37SSL_KEYWORDS = (
38 "key_file",
39 "cert_file",
40 "cert_reqs",
41 "ca_certs",
42 "ca_cert_data",
43 "ssl_version",
44 "ssl_minimum_version",
45 "ssl_maximum_version",
46 "ca_cert_dir",
47 "ssl_context",
48 "key_password",
49 "server_hostname",
50)
51# Default value for `blocksize` - a new parameter introduced to
52# http.client.HTTPConnection & http.client.HTTPSConnection in Python 3.7
53_DEFAULT_BLOCKSIZE = 16384
54
55
56class PoolKey(typing.NamedTuple):
57 """
58 All known keyword arguments that could be provided to the pool manager, its
59 pools, or the underlying connections.
60
61 All custom key schemes should include the fields in this key at a minimum.
62 """
63
64 key_scheme: str
65 key_host: str
66 key_port: int | None
67 key_timeout: Timeout | float | int | None
68 key_retries: Retry | bool | int | None
69 key_block: bool | None
70 key_source_address: tuple[str, int] | None
71 key_key_file: str | None
72 key_key_password: str | None
73 key_cert_file: str | None
74 key_cert_reqs: str | None
75 key_ca_certs: str | None
76 key_ca_cert_data: str | bytes | None
77 key_ssl_version: int | str | None
78 key_ssl_minimum_version: ssl.TLSVersion | None
79 key_ssl_maximum_version: ssl.TLSVersion | None
80 key_ca_cert_dir: str | None
81 key_ssl_context: ssl.SSLContext | None
82 key_maxsize: int | None
83 key_headers: frozenset[tuple[str, str]] | None
84 key__proxy: Url | None
85 key__proxy_headers: frozenset[tuple[str, str]] | None
86 key__proxy_config: ProxyConfig | None
87 key_socket_options: _TYPE_SOCKET_OPTIONS | None
88 key__socks_options: frozenset[tuple[str, str]] | None
89 key_assert_hostname: bool | str | None
90 key_assert_fingerprint: str | None
91 key_server_hostname: str | None
92 key_blocksize: int | None
93
94
95def _default_key_normalizer(
96 key_class: type[PoolKey], request_context: dict[str, typing.Any]
97) -> PoolKey:
98 """
99 Create a pool key out of a request context dictionary.
100
101 According to RFC 3986, both the scheme and host are case-insensitive.
102 Therefore, this function normalizes both before constructing the pool
103 key for an HTTPS request. If you wish to change this behaviour, provide
104 alternate callables to ``key_fn_by_scheme``.
105
106 :param key_class:
107 The class to use when constructing the key. This should be a namedtuple
108 with the ``scheme`` and ``host`` keys at a minimum.
109 :type key_class: namedtuple
110 :param request_context:
111 A dictionary-like object that contain the context for a request.
112 :type request_context: dict
113
114 :return: A namedtuple that can be used as a connection pool key.
115 :rtype: PoolKey
116 """
117 # Since we mutate the dictionary, make a copy first
118 context = request_context.copy()
119 context["scheme"] = context["scheme"].lower()
120 context["host"] = context["host"].lower()
121
122 # These are both dictionaries and need to be transformed into frozensets
123 for key in ("headers", "_proxy_headers", "_socks_options"):
124 if key in context and context[key] is not None:
125 context[key] = frozenset(context[key].items())
126
127 # The socket_options key may be a list and needs to be transformed into a
128 # tuple.
129 socket_opts = context.get("socket_options")
130 if socket_opts is not None:
131 context["socket_options"] = tuple(socket_opts)
132
133 # Map the kwargs to the names in the namedtuple - this is necessary since
134 # namedtuples can't have fields starting with '_'.
135 for key in list(context.keys()):
136 context["key_" + key] = context.pop(key)
137
138 # Default to ``None`` for keys missing from the context
139 for field in key_class._fields:
140 if field not in context:
141 context[field] = None
142
143 # Default key_blocksize to _DEFAULT_BLOCKSIZE if missing from the context
144 if context.get("key_blocksize") is None:
145 context["key_blocksize"] = _DEFAULT_BLOCKSIZE
146
147 return key_class(**context)
148
149
150#: A dictionary that maps a scheme to a callable that creates a pool key.
151#: This can be used to alter the way pool keys are constructed, if desired.
152#: Each PoolManager makes a copy of this dictionary so they can be configured
153#: globally here, or individually on the instance.
154key_fn_by_scheme = {
155 "http": functools.partial(_default_key_normalizer, PoolKey),
156 "https": functools.partial(_default_key_normalizer, PoolKey),
157}
158
159pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool}
160
161
162class PoolManager(RequestMethods):
163 """
164 Allows for arbitrary requests while transparently keeping track of
165 necessary connection pools for you.
166
167 :param num_pools:
168 Number of connection pools to cache before discarding the least
169 recently used pool.
170
171 :param headers:
172 Headers to include with all requests, unless other headers are given
173 explicitly.
174
175 :param \\**connection_pool_kw:
176 Additional parameters are used to create fresh
177 :class:`urllib3.connectionpool.ConnectionPool` instances.
178
179 Example:
180
181 .. code-block:: python
182
183 import urllib3
184
185 http = urllib3.PoolManager(num_pools=2)
186
187 resp1 = http.request("GET", "https://google.com/")
188 resp2 = http.request("GET", "https://google.com/mail")
189 resp3 = http.request("GET", "https://yahoo.com/")
190
191 print(len(http.pools))
192 # 2
193
194 """
195
196 proxy: Url | None = None
197 proxy_config: ProxyConfig | None = None
198
199 def __init__(
200 self,
201 num_pools: int = 10,
202 headers: typing.Mapping[str, str] | None = None,
203 **connection_pool_kw: typing.Any,
204 ) -> None:
205 super().__init__(headers)
206 self.connection_pool_kw = connection_pool_kw
207
208 self.pools: RecentlyUsedContainer[PoolKey, HTTPConnectionPool]
209 self.pools = RecentlyUsedContainer(num_pools)
210
211 # Locally set the pool classes and keys so other PoolManagers can
212 # override them.
213 self.pool_classes_by_scheme = pool_classes_by_scheme
214 self.key_fn_by_scheme = key_fn_by_scheme.copy()
215
216 def __enter__(self) -> Self:
217 return self
218
219 def __exit__(
220 self,
221 exc_type: type[BaseException] | None,
222 exc_val: BaseException | None,
223 exc_tb: TracebackType | None,
224 ) -> typing.Literal[False]:
225 self.clear()
226 # Return False to re-raise any potential exceptions
227 return False
228
229 def _new_pool(
230 self,
231 scheme: str,
232 host: str,
233 port: int,
234 request_context: dict[str, typing.Any] | None = None,
235 ) -> HTTPConnectionPool:
236 """
237 Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and
238 any additional pool keyword arguments.
239
240 If ``request_context`` is provided, it is provided as keyword arguments
241 to the pool class used. This method is used to actually create the
242 connection pools handed out by :meth:`connection_from_url` and
243 companion methods. It is intended to be overridden for customization.
244 """
245 pool_cls: type[HTTPConnectionPool] = self.pool_classes_by_scheme[scheme]
246 if request_context is None:
247 request_context = self.connection_pool_kw.copy()
248
249 # Default blocksize to _DEFAULT_BLOCKSIZE if missing or explicitly
250 # set to 'None' in the request_context.
251 if request_context.get("blocksize") is None:
252 request_context["blocksize"] = _DEFAULT_BLOCKSIZE
253
254 # Although the context has everything necessary to create the pool,
255 # this function has historically only used the scheme, host, and port
256 # in the positional args. When an API change is acceptable these can
257 # be removed.
258 for key in ("scheme", "host", "port"):
259 request_context.pop(key, None)
260
261 if scheme == "http":
262 for kw in SSL_KEYWORDS:
263 request_context.pop(kw, None)
264
265 return pool_cls(host, port, **request_context)
266
267 def clear(self) -> None:
268 """
269 Empty our store of pools and direct them all to close.
270
271 This will not affect in-flight connections, but they will not be
272 re-used after completion.
273 """
274 self.pools.clear()
275
276 def connection_from_host(
277 self,
278 host: str | None,
279 port: int | None = None,
280 scheme: str | None = "http",
281 pool_kwargs: dict[str, typing.Any] | None = None,
282 ) -> HTTPConnectionPool:
283 """
284 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme.
285
286 If ``port`` isn't given, it will be derived from the ``scheme`` using
287 ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is
288 provided, it is merged with the instance's ``connection_pool_kw``
289 variable and used to create the new connection pool, if one is
290 needed.
291 """
292
293 if not host:
294 raise LocationValueError("No host specified.")
295
296 request_context = self._merge_pool_kwargs(pool_kwargs)
297 request_context["scheme"] = scheme or "http"
298 if not port:
299 port = port_by_scheme.get(request_context["scheme"].lower(), 80)
300 request_context["port"] = port
301 request_context["host"] = host
302
303 return self.connection_from_context(request_context)
304
305 def connection_from_context(
306 self, request_context: dict[str, typing.Any]
307 ) -> HTTPConnectionPool:
308 """
309 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context.
310
311 ``request_context`` must at least contain the ``scheme`` key and its
312 value must be a key in ``key_fn_by_scheme`` instance variable.
313 """
314 if "strict" in request_context:
315 warnings.warn(
316 "The 'strict' parameter is no longer needed on Python 3+. "
317 "This will raise an error in urllib3 v2.1.0.",
318 DeprecationWarning,
319 )
320 request_context.pop("strict")
321
322 scheme = request_context["scheme"].lower()
323 pool_key_constructor = self.key_fn_by_scheme.get(scheme)
324 if not pool_key_constructor:
325 raise URLSchemeUnknown(scheme)
326 pool_key = pool_key_constructor(request_context)
327
328 return self.connection_from_pool_key(pool_key, request_context=request_context)
329
330 def connection_from_pool_key(
331 self, pool_key: PoolKey, request_context: dict[str, typing.Any]
332 ) -> HTTPConnectionPool:
333 """
334 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key.
335
336 ``pool_key`` should be a namedtuple that only contains immutable
337 objects. At a minimum it must have the ``scheme``, ``host``, and
338 ``port`` fields.
339 """
340 with self.pools.lock:
341 # If the scheme, host, or port doesn't match existing open
342 # connections, open a new ConnectionPool.
343 pool = self.pools.get(pool_key)
344 if pool:
345 return pool
346
347 # Make a fresh ConnectionPool of the desired type
348 scheme = request_context["scheme"]
349 host = request_context["host"]
350 port = request_context["port"]
351 pool = self._new_pool(scheme, host, port, request_context=request_context)
352 self.pools[pool_key] = pool
353
354 return pool
355
356 def connection_from_url(
357 self, url: str, pool_kwargs: dict[str, typing.Any] | None = None
358 ) -> HTTPConnectionPool:
359 """
360 Similar to :func:`urllib3.connectionpool.connection_from_url`.
361
362 If ``pool_kwargs`` is not provided and a new pool needs to be
363 constructed, ``self.connection_pool_kw`` is used to initialize
364 the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs``
365 is provided, it is used instead. Note that if a new pool does not
366 need to be created for the request, the provided ``pool_kwargs`` are
367 not used.
368 """
369 u = parse_url(url)
370 return self.connection_from_host(
371 u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs
372 )
373
374 def _merge_pool_kwargs(
375 self, override: dict[str, typing.Any] | None
376 ) -> dict[str, typing.Any]:
377 """
378 Merge a dictionary of override values for self.connection_pool_kw.
379
380 This does not modify self.connection_pool_kw and returns a new dict.
381 Any keys in the override dictionary with a value of ``None`` are
382 removed from the merged dictionary.
383 """
384 base_pool_kwargs = self.connection_pool_kw.copy()
385 if override:
386 for key, value in override.items():
387 if value is None:
388 try:
389 del base_pool_kwargs[key]
390 except KeyError:
391 pass
392 else:
393 base_pool_kwargs[key] = value
394 return base_pool_kwargs
395
396 def _proxy_requires_url_absolute_form(self, parsed_url: Url) -> bool:
397 """
398 Indicates if the proxy requires the complete destination URL in the
399 request. Normally this is only needed when not using an HTTP CONNECT
400 tunnel.
401 """
402 if self.proxy is None:
403 return False
404
405 return not connection_requires_http_tunnel(
406 self.proxy, self.proxy_config, parsed_url.scheme
407 )
408
409 def urlopen( # type: ignore[override]
410 self, method: str, url: str, redirect: bool = True, **kw: typing.Any
411 ) -> BaseHTTPResponse:
412 """
413 Same as :meth:`urllib3.HTTPConnectionPool.urlopen`
414 with custom cross-host redirect logic and only sends the request-uri
415 portion of the ``url``.
416
417 The given ``url`` parameter must be absolute, such that an appropriate
418 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
419 """
420 u = parse_url(url)
421
422 if u.scheme is None:
423 warnings.warn(
424 "URLs without a scheme (ie 'https://') are deprecated and will raise an error "
425 "in a future version of urllib3. To avoid this DeprecationWarning ensure all URLs "
426 "start with 'https://' or 'http://'. Read more in this issue: "
427 "https://github.com/urllib3/urllib3/issues/2920",
428 category=DeprecationWarning,
429 stacklevel=2,
430 )
431
432 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
433
434 kw["assert_same_host"] = False
435 kw["redirect"] = False
436
437 if "headers" not in kw:
438 kw["headers"] = self.headers
439
440 if self._proxy_requires_url_absolute_form(u):
441 response = conn.urlopen(method, url, **kw)
442 else:
443 response = conn.urlopen(method, u.request_uri, **kw)
444
445 redirect_location = redirect and response.get_redirect_location()
446 if not redirect_location:
447 return response
448
449 # Support relative URLs for redirecting.
450 redirect_location = urljoin(url, redirect_location)
451
452 if response.status == 303:
453 # Change the method according to RFC 9110, Section 15.4.4.
454 method = "GET"
455 # And lose the body not to transfer anything sensitive.
456 kw["body"] = None
457 kw["headers"] = HTTPHeaderDict(kw["headers"])._prepare_for_method_change()
458
459 retries = kw.get("retries")
460 if not isinstance(retries, Retry):
461 retries = Retry.from_int(retries, redirect=redirect)
462
463 # Strip headers marked as unsafe to forward to the redirected location.
464 # Check remove_headers_on_redirect to avoid a potential network call within
465 # conn.is_same_host() which may use socket.gethostbyname() in the future.
466 if retries.remove_headers_on_redirect and not conn.is_same_host(
467 redirect_location
468 ):
469 new_headers = kw["headers"].copy()
470 for header in kw["headers"]:
471 if header.lower() in retries.remove_headers_on_redirect:
472 new_headers.pop(header, None)
473 kw["headers"] = new_headers
474
475 try:
476 retries = retries.increment(method, url, response=response, _pool=conn)
477 except MaxRetryError:
478 if retries.raise_on_redirect:
479 response.drain_conn()
480 raise
481 return response
482
483 kw["retries"] = retries
484 kw["redirect"] = redirect
485
486 log.info("Redirecting %s -> %s", url, redirect_location)
487
488 response.drain_conn()
489 return self.urlopen(method, redirect_location, **kw)
490
491
492class ProxyManager(PoolManager):
493 """
494 Behaves just like :class:`PoolManager`, but sends all requests through
495 the defined proxy, using the CONNECT method for HTTPS URLs.
496
497 :param proxy_url:
498 The URL of the proxy to be used.
499
500 :param proxy_headers:
501 A dictionary containing headers that will be sent to the proxy. In case
502 of HTTP they are being sent with each request, while in the
503 HTTPS/CONNECT case they are sent only once. Could be used for proxy
504 authentication.
505
506 :param proxy_ssl_context:
507 The proxy SSL context is used to establish the TLS connection to the
508 proxy when using HTTPS proxies.
509
510 :param use_forwarding_for_https:
511 (Defaults to False) If set to True will forward requests to the HTTPS
512 proxy to be made on behalf of the client instead of creating a TLS
513 tunnel via the CONNECT method. **Enabling this flag means that request
514 and response headers and content will be visible from the HTTPS proxy**
515 whereas tunneling keeps request and response headers and content
516 private. IP address, target hostname, SNI, and port are always visible
517 to an HTTPS proxy even when this flag is disabled.
518
519 :param proxy_assert_hostname:
520 The hostname of the certificate to verify against.
521
522 :param proxy_assert_fingerprint:
523 The fingerprint of the certificate to verify against.
524
525 Example:
526
527 .. code-block:: python
528
529 import urllib3
530
531 proxy = urllib3.ProxyManager("https://localhost:3128/")
532
533 resp1 = proxy.request("GET", "https://google.com/")
534 resp2 = proxy.request("GET", "https://httpbin.org/")
535
536 print(len(proxy.pools))
537 # 1
538
539 resp3 = proxy.request("GET", "https://httpbin.org/")
540 resp4 = proxy.request("GET", "https://twitter.com/")
541
542 print(len(proxy.pools))
543 # 3
544
545 """
546
547 def __init__(
548 self,
549 proxy_url: str,
550 num_pools: int = 10,
551 headers: typing.Mapping[str, str] | None = None,
552 proxy_headers: typing.Mapping[str, str] | None = None,
553 proxy_ssl_context: ssl.SSLContext | None = None,
554 use_forwarding_for_https: bool = False,
555 proxy_assert_hostname: None | str | typing.Literal[False] = None,
556 proxy_assert_fingerprint: str | None = None,
557 **connection_pool_kw: typing.Any,
558 ) -> None:
559 if isinstance(proxy_url, HTTPConnectionPool):
560 str_proxy_url = f"{proxy_url.scheme}://{proxy_url.host}:{proxy_url.port}"
561 else:
562 str_proxy_url = proxy_url
563 proxy = parse_url(str_proxy_url)
564
565 if proxy.scheme not in ("http", "https"):
566 raise ProxySchemeUnknown(proxy.scheme)
567
568 if not proxy.port:
569 port = port_by_scheme.get(proxy.scheme, 80)
570 proxy = proxy._replace(port=port)
571
572 self.proxy = proxy
573 self.proxy_headers = proxy_headers or {}
574 self.proxy_ssl_context = proxy_ssl_context
575 self.proxy_config = ProxyConfig(
576 proxy_ssl_context,
577 use_forwarding_for_https,
578 proxy_assert_hostname,
579 proxy_assert_fingerprint,
580 )
581
582 connection_pool_kw["_proxy"] = self.proxy
583 connection_pool_kw["_proxy_headers"] = self.proxy_headers
584 connection_pool_kw["_proxy_config"] = self.proxy_config
585
586 super().__init__(num_pools, headers, **connection_pool_kw)
587
588 def connection_from_host(
589 self,
590 host: str | None,
591 port: int | None = None,
592 scheme: str | None = "http",
593 pool_kwargs: dict[str, typing.Any] | None = None,
594 ) -> HTTPConnectionPool:
595 if scheme == "https":
596 return super().connection_from_host(
597 host, port, scheme, pool_kwargs=pool_kwargs
598 )
599
600 return super().connection_from_host(
601 self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs # type: ignore[union-attr]
602 )
603
604 def _set_proxy_headers(
605 self, url: str, headers: typing.Mapping[str, str] | None = None
606 ) -> typing.Mapping[str, str]:
607 """
608 Sets headers needed by proxies: specifically, the Accept and Host
609 headers. Only sets headers not provided by the user.
610 """
611 headers_ = {"Accept": "*/*"}
612
613 netloc = parse_url(url).netloc
614 if netloc:
615 headers_["Host"] = netloc
616
617 if headers:
618 headers_.update(headers)
619 return headers_
620
621 def urlopen( # type: ignore[override]
622 self, method: str, url: str, redirect: bool = True, **kw: typing.Any
623 ) -> BaseHTTPResponse:
624 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
625 u = parse_url(url)
626 if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme):
627 # For connections using HTTP CONNECT, httplib sets the necessary
628 # headers on the CONNECT to the proxy. If we're not using CONNECT,
629 # we'll definitely need to set 'Host' at the very least.
630 headers = kw.get("headers", self.headers)
631 kw["headers"] = self._set_proxy_headers(url, headers)
632
633 return super().urlopen(method, url, redirect=redirect, **kw)
634
635
636def proxy_from_url(url: str, **kw: typing.Any) -> ProxyManager:
637 return ProxyManager(proxy_url=url, **kw)