Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/urllib3/poolmanager.py: 35%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import functools
4import logging
5import typing
6import warnings
7from types import TracebackType
8from urllib.parse import urljoin
10from ._collections import HTTPHeaderDict, RecentlyUsedContainer
11from ._request_methods import RequestMethods
12from .connection import ProxyConfig
13from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme
14from .exceptions import (
15 LocationValueError,
16 MaxRetryError,
17 ProxySchemeUnknown,
18 URLSchemeUnknown,
19)
20from .response import BaseHTTPResponse
21from .util.connection import _TYPE_SOCKET_OPTIONS
22from .util.proxy import connection_requires_http_tunnel
23from .util.retry import Retry
24from .util.timeout import Timeout
25from .util.url import Url, parse_url
27if typing.TYPE_CHECKING:
28 import ssl
30 from typing_extensions import Self
32__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]
35log = logging.getLogger(__name__)
37SSL_KEYWORDS = (
38 "key_file",
39 "cert_file",
40 "cert_reqs",
41 "ca_certs",
42 "ca_cert_data",
43 "ssl_version",
44 "ssl_minimum_version",
45 "ssl_maximum_version",
46 "ca_cert_dir",
47 "ssl_context",
48 "key_password",
49 "server_hostname",
50)
51# Default value for `blocksize` - a new parameter introduced to
52# http.client.HTTPConnection & http.client.HTTPSConnection in Python 3.7
53_DEFAULT_BLOCKSIZE = 16384
56class PoolKey(typing.NamedTuple):
57 """
58 All known keyword arguments that could be provided to the pool manager, its
59 pools, or the underlying connections.
61 All custom key schemes should include the fields in this key at a minimum.
62 """
64 key_scheme: str
65 key_host: str
66 key_port: int | None
67 key_timeout: Timeout | float | int | None
68 key_retries: Retry | bool | int | None
69 key_block: bool | None
70 key_source_address: tuple[str, int] | None
71 key_key_file: str | None
72 key_key_password: str | None
73 key_cert_file: str | None
74 key_cert_reqs: str | None
75 key_ca_certs: str | None
76 key_ca_cert_data: str | bytes | None
77 key_ssl_version: int | str | None
78 key_ssl_minimum_version: ssl.TLSVersion | None
79 key_ssl_maximum_version: ssl.TLSVersion | None
80 key_ca_cert_dir: str | None
81 key_ssl_context: ssl.SSLContext | None
82 key_maxsize: int | None
83 key_headers: frozenset[tuple[str, str]] | None
84 key__proxy: Url | None
85 key__proxy_headers: frozenset[tuple[str, str]] | None
86 key__proxy_config: ProxyConfig | None
87 key_socket_options: _TYPE_SOCKET_OPTIONS | None
88 key__socks_options: frozenset[tuple[str, str]] | None
89 key_assert_hostname: bool | str | None
90 key_assert_fingerprint: str | None
91 key_server_hostname: str | None
92 key_blocksize: int | None
95def _default_key_normalizer(
96 key_class: type[PoolKey], request_context: dict[str, typing.Any]
97) -> PoolKey:
98 """
99 Create a pool key out of a request context dictionary.
101 According to RFC 3986, both the scheme and host are case-insensitive.
102 Therefore, this function normalizes both before constructing the pool
103 key for an HTTPS request. If you wish to change this behaviour, provide
104 alternate callables to ``key_fn_by_scheme``.
106 :param key_class:
107 The class to use when constructing the key. This should be a namedtuple
108 with the ``scheme`` and ``host`` keys at a minimum.
109 :type key_class: namedtuple
110 :param request_context:
111 A dictionary-like object that contain the context for a request.
112 :type request_context: dict
114 :return: A namedtuple that can be used as a connection pool key.
115 :rtype: PoolKey
116 """
117 # Since we mutate the dictionary, make a copy first
118 context = request_context.copy()
119 context["scheme"] = context["scheme"].lower()
120 context["host"] = context["host"].lower()
122 # These are both dictionaries and need to be transformed into frozensets
123 for key in ("headers", "_proxy_headers", "_socks_options"):
124 if key in context and context[key] is not None:
125 context[key] = frozenset(context[key].items())
127 # The socket_options key may be a list and needs to be transformed into a
128 # tuple.
129 socket_opts = context.get("socket_options")
130 if socket_opts is not None:
131 context["socket_options"] = tuple(socket_opts)
133 # Map the kwargs to the names in the namedtuple - this is necessary since
134 # namedtuples can't have fields starting with '_'.
135 for key in list(context.keys()):
136 context["key_" + key] = context.pop(key)
138 # Default to ``None`` for keys missing from the context
139 for field in key_class._fields:
140 if field not in context:
141 context[field] = None
143 # Default key_blocksize to _DEFAULT_BLOCKSIZE if missing from the context
144 if context.get("key_blocksize") is None:
145 context["key_blocksize"] = _DEFAULT_BLOCKSIZE
147 return key_class(**context)
150#: A dictionary that maps a scheme to a callable that creates a pool key.
151#: This can be used to alter the way pool keys are constructed, if desired.
152#: Each PoolManager makes a copy of this dictionary so they can be configured
153#: globally here, or individually on the instance.
154key_fn_by_scheme = {
155 "http": functools.partial(_default_key_normalizer, PoolKey),
156 "https": functools.partial(_default_key_normalizer, PoolKey),
157}
159pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool}
162class PoolManager(RequestMethods):
163 """
164 Allows for arbitrary requests while transparently keeping track of
165 necessary connection pools for you.
167 :param num_pools:
168 Number of connection pools to cache before discarding the least
169 recently used pool.
171 :param headers:
172 Headers to include with all requests, unless other headers are given
173 explicitly.
175 :param \\**connection_pool_kw:
176 Additional parameters are used to create fresh
177 :class:`urllib3.connectionpool.ConnectionPool` instances.
179 Example:
181 .. code-block:: python
183 import urllib3
185 http = urllib3.PoolManager(num_pools=2)
187 resp1 = http.request("GET", "https://google.com/")
188 resp2 = http.request("GET", "https://google.com/mail")
189 resp3 = http.request("GET", "https://yahoo.com/")
191 print(len(http.pools))
192 # 2
194 """
196 proxy: Url | None = None
197 proxy_config: ProxyConfig | None = None
199 def __init__(
200 self,
201 num_pools: int = 10,
202 headers: typing.Mapping[str, str] | None = None,
203 **connection_pool_kw: typing.Any,
204 ) -> None:
205 super().__init__(headers)
206 # PoolManager handles redirects itself in PoolManager.urlopen().
207 # It always passes redirect=False to the underlying connection pool to
208 # suppress per-pool redirect handling. If the user supplied a non-Retry
209 # value (int/bool/etc) for retries and we let the pool normalize it
210 # while redirect=False, the resulting Retry object would have redirect
211 # handling disabled, which can interfere with PoolManager's own
212 # redirect logic. Normalize here so redirects remain governed solely by
213 # PoolManager logic.
214 if "retries" in connection_pool_kw:
215 retries = connection_pool_kw["retries"]
216 if not isinstance(retries, Retry):
217 retries = Retry.from_int(retries)
218 connection_pool_kw = connection_pool_kw.copy()
219 connection_pool_kw["retries"] = retries
220 self.connection_pool_kw = connection_pool_kw
222 self.pools: RecentlyUsedContainer[PoolKey, HTTPConnectionPool]
223 self.pools = RecentlyUsedContainer(num_pools)
225 # Locally set the pool classes and keys so other PoolManagers can
226 # override them.
227 self.pool_classes_by_scheme = pool_classes_by_scheme
228 self.key_fn_by_scheme = key_fn_by_scheme.copy()
230 def __enter__(self) -> Self:
231 return self
233 def __exit__(
234 self,
235 exc_type: type[BaseException] | None,
236 exc_val: BaseException | None,
237 exc_tb: TracebackType | None,
238 ) -> typing.Literal[False]:
239 self.clear()
240 # Return False to re-raise any potential exceptions
241 return False
243 def _new_pool(
244 self,
245 scheme: str,
246 host: str,
247 port: int,
248 request_context: dict[str, typing.Any] | None = None,
249 ) -> HTTPConnectionPool:
250 """
251 Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and
252 any additional pool keyword arguments.
254 If ``request_context`` is provided, it is provided as keyword arguments
255 to the pool class used. This method is used to actually create the
256 connection pools handed out by :meth:`connection_from_url` and
257 companion methods. It is intended to be overridden for customization.
258 """
259 pool_cls: type[HTTPConnectionPool] = self.pool_classes_by_scheme[scheme]
260 if request_context is None:
261 request_context = self.connection_pool_kw.copy()
263 # Default blocksize to _DEFAULT_BLOCKSIZE if missing or explicitly
264 # set to 'None' in the request_context.
265 if request_context.get("blocksize") is None:
266 request_context["blocksize"] = _DEFAULT_BLOCKSIZE
268 # Although the context has everything necessary to create the pool,
269 # this function has historically only used the scheme, host, and port
270 # in the positional args. When an API change is acceptable these can
271 # be removed.
272 for key in ("scheme", "host", "port"):
273 request_context.pop(key, None)
275 if scheme == "http":
276 for kw in SSL_KEYWORDS:
277 request_context.pop(kw, None)
279 return pool_cls(host, port, **request_context)
281 def clear(self) -> None:
282 """
283 Empty our store of pools and direct them all to close.
285 This will not affect in-flight connections, but they will not be
286 re-used after completion.
287 """
288 self.pools.clear()
290 def connection_from_host(
291 self,
292 host: str | None,
293 port: int | None = None,
294 scheme: str | None = "http",
295 pool_kwargs: dict[str, typing.Any] | None = None,
296 ) -> HTTPConnectionPool:
297 """
298 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme.
300 If ``port`` isn't given, it will be derived from the ``scheme`` using
301 ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is
302 provided, it is merged with the instance's ``connection_pool_kw``
303 variable and used to create the new connection pool, if one is
304 needed.
305 """
307 if not host:
308 raise LocationValueError("No host specified.")
310 request_context = self._merge_pool_kwargs(pool_kwargs)
311 request_context["scheme"] = scheme or "http"
312 if not port:
313 port = port_by_scheme.get(request_context["scheme"].lower(), 80)
314 request_context["port"] = port
315 request_context["host"] = host
317 return self.connection_from_context(request_context)
319 def connection_from_context(
320 self, request_context: dict[str, typing.Any]
321 ) -> HTTPConnectionPool:
322 """
323 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context.
325 ``request_context`` must at least contain the ``scheme`` key and its
326 value must be a key in ``key_fn_by_scheme`` instance variable.
327 """
328 if "strict" in request_context:
329 warnings.warn(
330 "The 'strict' parameter is no longer needed on Python 3+. "
331 "This will raise an error in urllib3 v2.1.0.",
332 DeprecationWarning,
333 )
334 request_context.pop("strict")
336 scheme = request_context["scheme"].lower()
337 pool_key_constructor = self.key_fn_by_scheme.get(scheme)
338 if not pool_key_constructor:
339 raise URLSchemeUnknown(scheme)
340 pool_key = pool_key_constructor(request_context)
342 return self.connection_from_pool_key(pool_key, request_context=request_context)
344 def connection_from_pool_key(
345 self, pool_key: PoolKey, request_context: dict[str, typing.Any]
346 ) -> HTTPConnectionPool:
347 """
348 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key.
350 ``pool_key`` should be a namedtuple that only contains immutable
351 objects. At a minimum it must have the ``scheme``, ``host``, and
352 ``port`` fields.
353 """
354 with self.pools.lock:
355 # If the scheme, host, or port doesn't match existing open
356 # connections, open a new ConnectionPool.
357 pool = self.pools.get(pool_key)
358 if pool:
359 return pool
361 # Make a fresh ConnectionPool of the desired type
362 scheme = request_context["scheme"]
363 host = request_context["host"]
364 port = request_context["port"]
365 pool = self._new_pool(scheme, host, port, request_context=request_context)
366 self.pools[pool_key] = pool
368 return pool
370 def connection_from_url(
371 self, url: str, pool_kwargs: dict[str, typing.Any] | None = None
372 ) -> HTTPConnectionPool:
373 """
374 Similar to :func:`urllib3.connectionpool.connection_from_url`.
376 If ``pool_kwargs`` is not provided and a new pool needs to be
377 constructed, ``self.connection_pool_kw`` is used to initialize
378 the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs``
379 is provided, it is used instead. Note that if a new pool does not
380 need to be created for the request, the provided ``pool_kwargs`` are
381 not used.
382 """
383 u = parse_url(url)
384 return self.connection_from_host(
385 u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs
386 )
388 def _merge_pool_kwargs(
389 self, override: dict[str, typing.Any] | None
390 ) -> dict[str, typing.Any]:
391 """
392 Merge a dictionary of override values for self.connection_pool_kw.
394 This does not modify self.connection_pool_kw and returns a new dict.
395 Any keys in the override dictionary with a value of ``None`` are
396 removed from the merged dictionary.
397 """
398 base_pool_kwargs = self.connection_pool_kw.copy()
399 if override:
400 for key, value in override.items():
401 if value is None:
402 try:
403 del base_pool_kwargs[key]
404 except KeyError:
405 pass
406 else:
407 base_pool_kwargs[key] = value
408 return base_pool_kwargs
410 def _proxy_requires_url_absolute_form(self, parsed_url: Url) -> bool:
411 """
412 Indicates if the proxy requires the complete destination URL in the
413 request. Normally this is only needed when not using an HTTP CONNECT
414 tunnel.
415 """
416 if self.proxy is None:
417 return False
419 return not connection_requires_http_tunnel(
420 self.proxy, self.proxy_config, parsed_url.scheme
421 )
423 def urlopen( # type: ignore[override]
424 self, method: str, url: str, redirect: bool = True, **kw: typing.Any
425 ) -> BaseHTTPResponse:
426 """
427 Same as :meth:`urllib3.HTTPConnectionPool.urlopen`
428 with custom cross-host redirect logic and only sends the request-uri
429 portion of the ``url``.
431 The given ``url`` parameter must be absolute, such that an appropriate
432 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
433 """
434 u = parse_url(url)
436 if u.scheme is None:
437 warnings.warn(
438 "URLs without a scheme (ie 'https://') are deprecated and will raise an error "
439 "in a future version of urllib3. To avoid this DeprecationWarning ensure all URLs "
440 "start with 'https://' or 'http://'. Read more in this issue: "
441 "https://github.com/urllib3/urllib3/issues/2920",
442 category=DeprecationWarning,
443 stacklevel=2,
444 )
446 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
448 kw["assert_same_host"] = False
449 kw["redirect"] = False
451 if "headers" not in kw:
452 kw["headers"] = self.headers
454 if self._proxy_requires_url_absolute_form(u):
455 response = conn.urlopen(method, url, **kw)
456 else:
457 response = conn.urlopen(method, u.request_uri, **kw)
459 redirect_location = redirect and response.get_redirect_location()
460 if not redirect_location:
461 return response
463 # Support relative URLs for redirecting.
464 redirect_location = urljoin(url, redirect_location)
466 if response.status == 303:
467 # Change the method according to RFC 9110, Section 15.4.4.
468 method = "GET"
469 # And lose the body not to transfer anything sensitive.
470 kw["body"] = None
471 kw["headers"] = HTTPHeaderDict(kw["headers"])._prepare_for_method_change()
473 retries = kw.get("retries", response.retries)
474 if not isinstance(retries, Retry):
475 retries = Retry.from_int(retries, redirect=redirect)
477 # Strip headers marked as unsafe to forward to the redirected location.
478 # Check remove_headers_on_redirect to avoid a potential network call within
479 # conn.is_same_host() which may use socket.gethostbyname() in the future.
480 if retries.remove_headers_on_redirect and not conn.is_same_host(
481 redirect_location
482 ):
483 new_headers = kw["headers"].copy()
484 for header in kw["headers"]:
485 if header.lower() in retries.remove_headers_on_redirect:
486 new_headers.pop(header, None)
487 kw["headers"] = new_headers
489 try:
490 retries = retries.increment(method, url, response=response, _pool=conn)
491 except MaxRetryError:
492 if retries.raise_on_redirect:
493 response.drain_conn()
494 raise
495 return response
497 kw["retries"] = retries
498 kw["redirect"] = redirect
500 log.info("Redirecting %s -> %s", url, redirect_location)
502 response.drain_conn()
503 return self.urlopen(method, redirect_location, **kw)
506class ProxyManager(PoolManager):
507 """
508 Behaves just like :class:`PoolManager`, but sends all requests through
509 the defined proxy, using the CONNECT method for HTTPS URLs.
511 :param proxy_url:
512 The URL of the proxy to be used.
514 :param proxy_headers:
515 A dictionary containing headers that will be sent to the proxy. In case
516 of HTTP they are being sent with each request, while in the
517 HTTPS/CONNECT case they are sent only once. Could be used for proxy
518 authentication.
520 :param proxy_ssl_context:
521 The proxy SSL context is used to establish the TLS connection to the
522 proxy when using HTTPS proxies.
524 :param use_forwarding_for_https:
525 (Defaults to False) If set to True will forward requests to the HTTPS
526 proxy to be made on behalf of the client instead of creating a TLS
527 tunnel via the CONNECT method. **Enabling this flag means that request
528 and response headers and content will be visible from the HTTPS proxy**
529 whereas tunneling keeps request and response headers and content
530 private. IP address, target hostname, SNI, and port are always visible
531 to an HTTPS proxy even when this flag is disabled.
533 :param proxy_assert_hostname:
534 The hostname of the certificate to verify against.
536 :param proxy_assert_fingerprint:
537 The fingerprint of the certificate to verify against.
539 Example:
541 .. code-block:: python
543 import urllib3
545 proxy = urllib3.ProxyManager("https://localhost:3128/")
547 resp1 = proxy.request("GET", "https://google.com/")
548 resp2 = proxy.request("GET", "https://httpbin.org/")
550 print(len(proxy.pools))
551 # 1
553 resp3 = proxy.request("GET", "https://httpbin.org/")
554 resp4 = proxy.request("GET", "https://twitter.com/")
556 print(len(proxy.pools))
557 # 3
559 """
561 def __init__(
562 self,
563 proxy_url: str,
564 num_pools: int = 10,
565 headers: typing.Mapping[str, str] | None = None,
566 proxy_headers: typing.Mapping[str, str] | None = None,
567 proxy_ssl_context: ssl.SSLContext | None = None,
568 use_forwarding_for_https: bool = False,
569 proxy_assert_hostname: None | str | typing.Literal[False] = None,
570 proxy_assert_fingerprint: str | None = None,
571 **connection_pool_kw: typing.Any,
572 ) -> None:
573 if isinstance(proxy_url, HTTPConnectionPool):
574 str_proxy_url = f"{proxy_url.scheme}://{proxy_url.host}:{proxy_url.port}"
575 else:
576 str_proxy_url = proxy_url
577 proxy = parse_url(str_proxy_url)
579 if proxy.scheme not in ("http", "https"):
580 raise ProxySchemeUnknown(proxy.scheme)
582 if not proxy.port:
583 port = port_by_scheme.get(proxy.scheme, 80)
584 proxy = proxy._replace(port=port)
586 self.proxy = proxy
587 self.proxy_headers = proxy_headers or {}
588 self.proxy_ssl_context = proxy_ssl_context
589 self.proxy_config = ProxyConfig(
590 proxy_ssl_context,
591 use_forwarding_for_https,
592 proxy_assert_hostname,
593 proxy_assert_fingerprint,
594 )
596 connection_pool_kw["_proxy"] = self.proxy
597 connection_pool_kw["_proxy_headers"] = self.proxy_headers
598 connection_pool_kw["_proxy_config"] = self.proxy_config
600 super().__init__(num_pools, headers, **connection_pool_kw)
602 def connection_from_host(
603 self,
604 host: str | None,
605 port: int | None = None,
606 scheme: str | None = "http",
607 pool_kwargs: dict[str, typing.Any] | None = None,
608 ) -> HTTPConnectionPool:
609 if scheme == "https":
610 return super().connection_from_host(
611 host, port, scheme, pool_kwargs=pool_kwargs
612 )
614 return super().connection_from_host(
615 self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs # type: ignore[union-attr]
616 )
618 def _set_proxy_headers(
619 self, url: str, headers: typing.Mapping[str, str] | None = None
620 ) -> typing.Mapping[str, str]:
621 """
622 Sets headers needed by proxies: specifically, the Accept and Host
623 headers. Only sets headers not provided by the user.
624 """
625 headers_ = {"Accept": "*/*"}
627 netloc = parse_url(url).netloc
628 if netloc:
629 headers_["Host"] = netloc
631 if headers:
632 headers_.update(headers)
633 return headers_
635 def urlopen( # type: ignore[override]
636 self, method: str, url: str, redirect: bool = True, **kw: typing.Any
637 ) -> BaseHTTPResponse:
638 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
639 u = parse_url(url)
640 if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme):
641 # For connections using HTTP CONNECT, httplib sets the necessary
642 # headers on the CONNECT to the proxy. If we're not using CONNECT,
643 # we'll definitely need to set 'Host' at the very least.
644 headers = kw.get("headers", self.headers)
645 kw["headers"] = self._set_proxy_headers(url, headers)
647 return super().urlopen(method, url, redirect=redirect, **kw)
650def proxy_from_url(url: str, **kw: typing.Any) -> ProxyManager:
651 return ProxyManager(proxy_url=url, **kw)