Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/poolmanager.py: 36%
234 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:45 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:45 +0000
1from __future__ import annotations
3import functools
4import logging
5import typing
6import warnings
7from types import TracebackType
8from urllib.parse import urljoin
10from ._collections import HTTPHeaderDict, RecentlyUsedContainer
11from ._request_methods import RequestMethods
12from .connection import ProxyConfig
13from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme
14from .exceptions import (
15 LocationValueError,
16 MaxRetryError,
17 ProxySchemeUnknown,
18 URLSchemeUnknown,
19)
20from .response import BaseHTTPResponse
21from .util.connection import _TYPE_SOCKET_OPTIONS
22from .util.proxy import connection_requires_http_tunnel
23from .util.retry import Retry
24from .util.timeout import Timeout
25from .util.url import Url, parse_url
27if typing.TYPE_CHECKING:
28 import ssl
29 from typing import Literal
31__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"]
34log = logging.getLogger(__name__)
36SSL_KEYWORDS = (
37 "key_file",
38 "cert_file",
39 "cert_reqs",
40 "ca_certs",
41 "ca_cert_data",
42 "ssl_version",
43 "ssl_minimum_version",
44 "ssl_maximum_version",
45 "ca_cert_dir",
46 "ssl_context",
47 "key_password",
48 "server_hostname",
49)
50# Default value for `blocksize` - a new parameter introduced to
51# http.client.HTTPConnection & http.client.HTTPSConnection in Python 3.7
52_DEFAULT_BLOCKSIZE = 16384
54_SelfT = typing.TypeVar("_SelfT")
57class PoolKey(typing.NamedTuple):
58 """
59 All known keyword arguments that could be provided to the pool manager, its
60 pools, or the underlying connections.
62 All custom key schemes should include the fields in this key at a minimum.
63 """
65 key_scheme: str
66 key_host: str
67 key_port: int | None
68 key_timeout: Timeout | float | int | None
69 key_retries: Retry | bool | int | None
70 key_block: bool | None
71 key_source_address: tuple[str, int] | None
72 key_key_file: str | None
73 key_key_password: str | None
74 key_cert_file: str | None
75 key_cert_reqs: str | None
76 key_ca_certs: str | None
77 key_ca_cert_data: str | bytes | None
78 key_ssl_version: int | str | None
79 key_ssl_minimum_version: ssl.TLSVersion | None
80 key_ssl_maximum_version: ssl.TLSVersion | None
81 key_ca_cert_dir: str | None
82 key_ssl_context: ssl.SSLContext | None
83 key_maxsize: int | None
84 key_headers: frozenset[tuple[str, str]] | None
85 key__proxy: Url | None
86 key__proxy_headers: frozenset[tuple[str, str]] | None
87 key__proxy_config: ProxyConfig | None
88 key_socket_options: _TYPE_SOCKET_OPTIONS | None
89 key__socks_options: frozenset[tuple[str, str]] | None
90 key_assert_hostname: bool | str | None
91 key_assert_fingerprint: str | None
92 key_server_hostname: str | None
93 key_blocksize: int | None
96def _default_key_normalizer(
97 key_class: type[PoolKey], request_context: dict[str, typing.Any]
98) -> PoolKey:
99 """
100 Create a pool key out of a request context dictionary.
102 According to RFC 3986, both the scheme and host are case-insensitive.
103 Therefore, this function normalizes both before constructing the pool
104 key for an HTTPS request. If you wish to change this behaviour, provide
105 alternate callables to ``key_fn_by_scheme``.
107 :param key_class:
108 The class to use when constructing the key. This should be a namedtuple
109 with the ``scheme`` and ``host`` keys at a minimum.
110 :type key_class: namedtuple
111 :param request_context:
112 A dictionary-like object that contain the context for a request.
113 :type request_context: dict
115 :return: A namedtuple that can be used as a connection pool key.
116 :rtype: PoolKey
117 """
118 # Since we mutate the dictionary, make a copy first
119 context = request_context.copy()
120 context["scheme"] = context["scheme"].lower()
121 context["host"] = context["host"].lower()
123 # These are both dictionaries and need to be transformed into frozensets
124 for key in ("headers", "_proxy_headers", "_socks_options"):
125 if key in context and context[key] is not None:
126 context[key] = frozenset(context[key].items())
128 # The socket_options key may be a list and needs to be transformed into a
129 # tuple.
130 socket_opts = context.get("socket_options")
131 if socket_opts is not None:
132 context["socket_options"] = tuple(socket_opts)
134 # Map the kwargs to the names in the namedtuple - this is necessary since
135 # namedtuples can't have fields starting with '_'.
136 for key in list(context.keys()):
137 context["key_" + key] = context.pop(key)
139 # Default to ``None`` for keys missing from the context
140 for field in key_class._fields:
141 if field not in context:
142 context[field] = None
144 # Default key_blocksize to _DEFAULT_BLOCKSIZE if missing from the context
145 if context.get("key_blocksize") is None:
146 context["key_blocksize"] = _DEFAULT_BLOCKSIZE
148 return key_class(**context)
151#: A dictionary that maps a scheme to a callable that creates a pool key.
152#: This can be used to alter the way pool keys are constructed, if desired.
153#: Each PoolManager makes a copy of this dictionary so they can be configured
154#: globally here, or individually on the instance.
155key_fn_by_scheme = {
156 "http": functools.partial(_default_key_normalizer, PoolKey),
157 "https": functools.partial(_default_key_normalizer, PoolKey),
158}
160pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool}
163class PoolManager(RequestMethods):
164 """
165 Allows for arbitrary requests while transparently keeping track of
166 necessary connection pools for you.
168 :param num_pools:
169 Number of connection pools to cache before discarding the least
170 recently used pool.
172 :param headers:
173 Headers to include with all requests, unless other headers are given
174 explicitly.
176 :param \\**connection_pool_kw:
177 Additional parameters are used to create fresh
178 :class:`urllib3.connectionpool.ConnectionPool` instances.
180 Example:
182 .. code-block:: python
184 import urllib3
186 http = urllib3.PoolManager(num_pools=2)
188 resp1 = http.request("GET", "https://google.com/")
189 resp2 = http.request("GET", "https://google.com/mail")
190 resp3 = http.request("GET", "https://yahoo.com/")
192 print(len(http.pools))
193 # 2
195 """
197 proxy: Url | None = None
198 proxy_config: ProxyConfig | None = None
200 def __init__(
201 self,
202 num_pools: int = 10,
203 headers: typing.Mapping[str, str] | None = None,
204 **connection_pool_kw: typing.Any,
205 ) -> None:
206 super().__init__(headers)
207 self.connection_pool_kw = connection_pool_kw
209 self.pools: RecentlyUsedContainer[PoolKey, HTTPConnectionPool]
210 self.pools = RecentlyUsedContainer(num_pools)
212 # Locally set the pool classes and keys so other PoolManagers can
213 # override them.
214 self.pool_classes_by_scheme = pool_classes_by_scheme
215 self.key_fn_by_scheme = key_fn_by_scheme.copy()
217 def __enter__(self: _SelfT) -> _SelfT:
218 return self
220 def __exit__(
221 self,
222 exc_type: type[BaseException] | None,
223 exc_val: BaseException | None,
224 exc_tb: TracebackType | None,
225 ) -> Literal[False]:
226 self.clear()
227 # Return False to re-raise any potential exceptions
228 return False
230 def _new_pool(
231 self,
232 scheme: str,
233 host: str,
234 port: int,
235 request_context: dict[str, typing.Any] | None = None,
236 ) -> HTTPConnectionPool:
237 """
238 Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and
239 any additional pool keyword arguments.
241 If ``request_context`` is provided, it is provided as keyword arguments
242 to the pool class used. This method is used to actually create the
243 connection pools handed out by :meth:`connection_from_url` and
244 companion methods. It is intended to be overridden for customization.
245 """
246 pool_cls: type[HTTPConnectionPool] = self.pool_classes_by_scheme[scheme]
247 if request_context is None:
248 request_context = self.connection_pool_kw.copy()
250 # Default blocksize to _DEFAULT_BLOCKSIZE if missing or explicitly
251 # set to 'None' in the request_context.
252 if request_context.get("blocksize") is None:
253 request_context["blocksize"] = _DEFAULT_BLOCKSIZE
255 # Although the context has everything necessary to create the pool,
256 # this function has historically only used the scheme, host, and port
257 # in the positional args. When an API change is acceptable these can
258 # be removed.
259 for key in ("scheme", "host", "port"):
260 request_context.pop(key, None)
262 if scheme == "http":
263 for kw in SSL_KEYWORDS:
264 request_context.pop(kw, None)
266 return pool_cls(host, port, **request_context)
268 def clear(self) -> None:
269 """
270 Empty our store of pools and direct them all to close.
272 This will not affect in-flight connections, but they will not be
273 re-used after completion.
274 """
275 self.pools.clear()
277 def connection_from_host(
278 self,
279 host: str | None,
280 port: int | None = None,
281 scheme: str | None = "http",
282 pool_kwargs: dict[str, typing.Any] | None = None,
283 ) -> HTTPConnectionPool:
284 """
285 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme.
287 If ``port`` isn't given, it will be derived from the ``scheme`` using
288 ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is
289 provided, it is merged with the instance's ``connection_pool_kw``
290 variable and used to create the new connection pool, if one is
291 needed.
292 """
294 if not host:
295 raise LocationValueError("No host specified.")
297 request_context = self._merge_pool_kwargs(pool_kwargs)
298 request_context["scheme"] = scheme or "http"
299 if not port:
300 port = port_by_scheme.get(request_context["scheme"].lower(), 80)
301 request_context["port"] = port
302 request_context["host"] = host
304 return self.connection_from_context(request_context)
306 def connection_from_context(
307 self, request_context: dict[str, typing.Any]
308 ) -> HTTPConnectionPool:
309 """
310 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context.
312 ``request_context`` must at least contain the ``scheme`` key and its
313 value must be a key in ``key_fn_by_scheme`` instance variable.
314 """
315 if "strict" in request_context:
316 warnings.warn(
317 "The 'strict' parameter is no longer needed on Python 3+. "
318 "This will raise an error in urllib3 v2.1.0.",
319 DeprecationWarning,
320 )
321 request_context.pop("strict")
323 scheme = request_context["scheme"].lower()
324 pool_key_constructor = self.key_fn_by_scheme.get(scheme)
325 if not pool_key_constructor:
326 raise URLSchemeUnknown(scheme)
327 pool_key = pool_key_constructor(request_context)
329 return self.connection_from_pool_key(pool_key, request_context=request_context)
331 def connection_from_pool_key(
332 self, pool_key: PoolKey, request_context: dict[str, typing.Any]
333 ) -> HTTPConnectionPool:
334 """
335 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key.
337 ``pool_key`` should be a namedtuple that only contains immutable
338 objects. At a minimum it must have the ``scheme``, ``host``, and
339 ``port`` fields.
340 """
341 with self.pools.lock:
342 # If the scheme, host, or port doesn't match existing open
343 # connections, open a new ConnectionPool.
344 pool = self.pools.get(pool_key)
345 if pool:
346 return pool
348 # Make a fresh ConnectionPool of the desired type
349 scheme = request_context["scheme"]
350 host = request_context["host"]
351 port = request_context["port"]
352 pool = self._new_pool(scheme, host, port, request_context=request_context)
353 self.pools[pool_key] = pool
355 return pool
357 def connection_from_url(
358 self, url: str, pool_kwargs: dict[str, typing.Any] | None = None
359 ) -> HTTPConnectionPool:
360 """
361 Similar to :func:`urllib3.connectionpool.connection_from_url`.
363 If ``pool_kwargs`` is not provided and a new pool needs to be
364 constructed, ``self.connection_pool_kw`` is used to initialize
365 the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs``
366 is provided, it is used instead. Note that if a new pool does not
367 need to be created for the request, the provided ``pool_kwargs`` are
368 not used.
369 """
370 u = parse_url(url)
371 return self.connection_from_host(
372 u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs
373 )
375 def _merge_pool_kwargs(
376 self, override: dict[str, typing.Any] | None
377 ) -> dict[str, typing.Any]:
378 """
379 Merge a dictionary of override values for self.connection_pool_kw.
381 This does not modify self.connection_pool_kw and returns a new dict.
382 Any keys in the override dictionary with a value of ``None`` are
383 removed from the merged dictionary.
384 """
385 base_pool_kwargs = self.connection_pool_kw.copy()
386 if override:
387 for key, value in override.items():
388 if value is None:
389 try:
390 del base_pool_kwargs[key]
391 except KeyError:
392 pass
393 else:
394 base_pool_kwargs[key] = value
395 return base_pool_kwargs
397 def _proxy_requires_url_absolute_form(self, parsed_url: Url) -> bool:
398 """
399 Indicates if the proxy requires the complete destination URL in the
400 request. Normally this is only needed when not using an HTTP CONNECT
401 tunnel.
402 """
403 if self.proxy is None:
404 return False
406 return not connection_requires_http_tunnel(
407 self.proxy, self.proxy_config, parsed_url.scheme
408 )
410 def urlopen( # type: ignore[override]
411 self, method: str, url: str, redirect: bool = True, **kw: typing.Any
412 ) -> BaseHTTPResponse:
413 """
414 Same as :meth:`urllib3.HTTPConnectionPool.urlopen`
415 with custom cross-host redirect logic and only sends the request-uri
416 portion of the ``url``.
418 The given ``url`` parameter must be absolute, such that an appropriate
419 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it.
420 """
421 u = parse_url(url)
423 if u.scheme is None:
424 warnings.warn(
425 "URLs without a scheme (ie 'https://') are deprecated and will raise an error "
426 "in a future version of urllib3. To avoid this DeprecationWarning ensure all URLs "
427 "start with 'https://' or 'http://'. Read more in this issue: "
428 "https://github.com/urllib3/urllib3/issues/2920",
429 category=DeprecationWarning,
430 stacklevel=2,
431 )
433 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme)
435 kw["assert_same_host"] = False
436 kw["redirect"] = False
438 if "headers" not in kw:
439 kw["headers"] = self.headers
441 if self._proxy_requires_url_absolute_form(u):
442 response = conn.urlopen(method, url, **kw)
443 else:
444 response = conn.urlopen(method, u.request_uri, **kw)
446 redirect_location = redirect and response.get_redirect_location()
447 if not redirect_location:
448 return response
450 # Support relative URLs for redirecting.
451 redirect_location = urljoin(url, redirect_location)
453 if response.status == 303:
454 # Change the method according to RFC 9110, Section 15.4.4.
455 method = "GET"
456 # And lose the body not to transfer anything sensitive.
457 kw["body"] = None
458 kw["headers"] = HTTPHeaderDict(kw["headers"])._prepare_for_method_change()
460 retries = kw.get("retries")
461 if not isinstance(retries, Retry):
462 retries = Retry.from_int(retries, redirect=redirect)
464 # Strip headers marked as unsafe to forward to the redirected location.
465 # Check remove_headers_on_redirect to avoid a potential network call within
466 # conn.is_same_host() which may use socket.gethostbyname() in the future.
467 if retries.remove_headers_on_redirect and not conn.is_same_host(
468 redirect_location
469 ):
470 new_headers = kw["headers"].copy()
471 for header in kw["headers"]:
472 if header.lower() in retries.remove_headers_on_redirect:
473 new_headers.pop(header, None)
474 kw["headers"] = new_headers
476 try:
477 retries = retries.increment(method, url, response=response, _pool=conn)
478 except MaxRetryError:
479 if retries.raise_on_redirect:
480 response.drain_conn()
481 raise
482 return response
484 kw["retries"] = retries
485 kw["redirect"] = redirect
487 log.info("Redirecting %s -> %s", url, redirect_location)
489 response.drain_conn()
490 return self.urlopen(method, redirect_location, **kw)
493class ProxyManager(PoolManager):
494 """
495 Behaves just like :class:`PoolManager`, but sends all requests through
496 the defined proxy, using the CONNECT method for HTTPS URLs.
498 :param proxy_url:
499 The URL of the proxy to be used.
501 :param proxy_headers:
502 A dictionary containing headers that will be sent to the proxy. In case
503 of HTTP they are being sent with each request, while in the
504 HTTPS/CONNECT case they are sent only once. Could be used for proxy
505 authentication.
507 :param proxy_ssl_context:
508 The proxy SSL context is used to establish the TLS connection to the
509 proxy when using HTTPS proxies.
511 :param use_forwarding_for_https:
512 (Defaults to False) If set to True will forward requests to the HTTPS
513 proxy to be made on behalf of the client instead of creating a TLS
514 tunnel via the CONNECT method. **Enabling this flag means that request
515 and response headers and content will be visible from the HTTPS proxy**
516 whereas tunneling keeps request and response headers and content
517 private. IP address, target hostname, SNI, and port are always visible
518 to an HTTPS proxy even when this flag is disabled.
520 :param proxy_assert_hostname:
521 The hostname of the certificate to verify against.
523 :param proxy_assert_fingerprint:
524 The fingerprint of the certificate to verify against.
526 Example:
528 .. code-block:: python
530 import urllib3
532 proxy = urllib3.ProxyManager("https://localhost:3128/")
534 resp1 = proxy.request("GET", "https://google.com/")
535 resp2 = proxy.request("GET", "https://httpbin.org/")
537 print(len(proxy.pools))
538 # 1
540 resp3 = proxy.request("GET", "https://httpbin.org/")
541 resp4 = proxy.request("GET", "https://twitter.com/")
543 print(len(proxy.pools))
544 # 3
546 """
548 def __init__(
549 self,
550 proxy_url: str,
551 num_pools: int = 10,
552 headers: typing.Mapping[str, str] | None = None,
553 proxy_headers: typing.Mapping[str, str] | None = None,
554 proxy_ssl_context: ssl.SSLContext | None = None,
555 use_forwarding_for_https: bool = False,
556 proxy_assert_hostname: None | str | Literal[False] = None,
557 proxy_assert_fingerprint: str | None = None,
558 **connection_pool_kw: typing.Any,
559 ) -> None:
560 if isinstance(proxy_url, HTTPConnectionPool):
561 str_proxy_url = f"{proxy_url.scheme}://{proxy_url.host}:{proxy_url.port}"
562 else:
563 str_proxy_url = proxy_url
564 proxy = parse_url(str_proxy_url)
566 if proxy.scheme not in ("http", "https"):
567 raise ProxySchemeUnknown(proxy.scheme)
569 if not proxy.port:
570 port = port_by_scheme.get(proxy.scheme, 80)
571 proxy = proxy._replace(port=port)
573 self.proxy = proxy
574 self.proxy_headers = proxy_headers or {}
575 self.proxy_ssl_context = proxy_ssl_context
576 self.proxy_config = ProxyConfig(
577 proxy_ssl_context,
578 use_forwarding_for_https,
579 proxy_assert_hostname,
580 proxy_assert_fingerprint,
581 )
583 connection_pool_kw["_proxy"] = self.proxy
584 connection_pool_kw["_proxy_headers"] = self.proxy_headers
585 connection_pool_kw["_proxy_config"] = self.proxy_config
587 super().__init__(num_pools, headers, **connection_pool_kw)
589 def connection_from_host(
590 self,
591 host: str | None,
592 port: int | None = None,
593 scheme: str | None = "http",
594 pool_kwargs: dict[str, typing.Any] | None = None,
595 ) -> HTTPConnectionPool:
596 if scheme == "https":
597 return super().connection_from_host(
598 host, port, scheme, pool_kwargs=pool_kwargs
599 )
601 return super().connection_from_host(
602 self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs # type: ignore[union-attr]
603 )
605 def _set_proxy_headers(
606 self, url: str, headers: typing.Mapping[str, str] | None = None
607 ) -> typing.Mapping[str, str]:
608 """
609 Sets headers needed by proxies: specifically, the Accept and Host
610 headers. Only sets headers not provided by the user.
611 """
612 headers_ = {"Accept": "*/*"}
614 netloc = parse_url(url).netloc
615 if netloc:
616 headers_["Host"] = netloc
618 if headers:
619 headers_.update(headers)
620 return headers_
622 def urlopen( # type: ignore[override]
623 self, method: str, url: str, redirect: bool = True, **kw: typing.Any
624 ) -> BaseHTTPResponse:
625 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute."
626 u = parse_url(url)
627 if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme):
628 # For connections using HTTP CONNECT, httplib sets the necessary
629 # headers on the CONNECT to the proxy. If we're not using CONNECT,
630 # we'll definitely need to set 'Host' at the very least.
631 headers = kw.get("headers", self.headers)
632 kw["headers"] = self._set_proxy_headers(url, headers)
634 return super().urlopen(method, url, redirect=redirect, **kw)
637def proxy_from_url(url: str, **kw: typing.Any) -> ProxyManager:
638 return ProxyManager(proxy_url=url, **kw)