Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/poolmanager.py: 36%

234 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 06:40 +0000

1from __future__ import annotations 

2 

3import functools 

4import logging 

5import typing 

6import warnings 

7from types import TracebackType 

8from urllib.parse import urljoin 

9 

10from ._collections import HTTPHeaderDict, RecentlyUsedContainer 

11from ._request_methods import RequestMethods 

12from .connection import ProxyConfig 

13from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme 

14from .exceptions import ( 

15 LocationValueError, 

16 MaxRetryError, 

17 ProxySchemeUnknown, 

18 URLSchemeUnknown, 

19) 

20from .response import BaseHTTPResponse 

21from .util.connection import _TYPE_SOCKET_OPTIONS 

22from .util.proxy import connection_requires_http_tunnel 

23from .util.retry import Retry 

24from .util.timeout import Timeout 

25from .util.url import Url, parse_url 

26 

27if typing.TYPE_CHECKING: 

28 import ssl 

29 from typing import Literal 

30 

31__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"] 

32 

33 

34log = logging.getLogger(__name__) 

35 

36SSL_KEYWORDS = ( 

37 "key_file", 

38 "cert_file", 

39 "cert_reqs", 

40 "ca_certs", 

41 "ca_cert_data", 

42 "ssl_version", 

43 "ssl_minimum_version", 

44 "ssl_maximum_version", 

45 "ca_cert_dir", 

46 "ssl_context", 

47 "key_password", 

48 "server_hostname", 

49) 

50# Default value for `blocksize` - a new parameter introduced to 

51# http.client.HTTPConnection & http.client.HTTPSConnection in Python 3.7 

52_DEFAULT_BLOCKSIZE = 16384 

53 

54_SelfT = typing.TypeVar("_SelfT") 

55 

56 

57class PoolKey(typing.NamedTuple): 

58 """ 

59 All known keyword arguments that could be provided to the pool manager, its 

60 pools, or the underlying connections. 

61 

62 All custom key schemes should include the fields in this key at a minimum. 

63 """ 

64 

65 key_scheme: str 

66 key_host: str 

67 key_port: int | None 

68 key_timeout: Timeout | float | int | None 

69 key_retries: Retry | bool | int | None 

70 key_block: bool | None 

71 key_source_address: tuple[str, int] | None 

72 key_key_file: str | None 

73 key_key_password: str | None 

74 key_cert_file: str | None 

75 key_cert_reqs: str | None 

76 key_ca_certs: str | None 

77 key_ca_cert_data: str | bytes | None 

78 key_ssl_version: int | str | None 

79 key_ssl_minimum_version: ssl.TLSVersion | None 

80 key_ssl_maximum_version: ssl.TLSVersion | None 

81 key_ca_cert_dir: str | None 

82 key_ssl_context: ssl.SSLContext | None 

83 key_maxsize: int | None 

84 key_headers: frozenset[tuple[str, str]] | None 

85 key__proxy: Url | None 

86 key__proxy_headers: frozenset[tuple[str, str]] | None 

87 key__proxy_config: ProxyConfig | None 

88 key_socket_options: _TYPE_SOCKET_OPTIONS | None 

89 key__socks_options: frozenset[tuple[str, str]] | None 

90 key_assert_hostname: bool | str | None 

91 key_assert_fingerprint: str | None 

92 key_server_hostname: str | None 

93 key_blocksize: int | None 

94 

95 

96def _default_key_normalizer( 

97 key_class: type[PoolKey], request_context: dict[str, typing.Any] 

98) -> PoolKey: 

99 """ 

100 Create a pool key out of a request context dictionary. 

101 

102 According to RFC 3986, both the scheme and host are case-insensitive. 

103 Therefore, this function normalizes both before constructing the pool 

104 key for an HTTPS request. If you wish to change this behaviour, provide 

105 alternate callables to ``key_fn_by_scheme``. 

106 

107 :param key_class: 

108 The class to use when constructing the key. This should be a namedtuple 

109 with the ``scheme`` and ``host`` keys at a minimum. 

110 :type key_class: namedtuple 

111 :param request_context: 

112 A dictionary-like object that contain the context for a request. 

113 :type request_context: dict 

114 

115 :return: A namedtuple that can be used as a connection pool key. 

116 :rtype: PoolKey 

117 """ 

118 # Since we mutate the dictionary, make a copy first 

119 context = request_context.copy() 

120 context["scheme"] = context["scheme"].lower() 

121 context["host"] = context["host"].lower() 

122 

123 # These are both dictionaries and need to be transformed into frozensets 

124 for key in ("headers", "_proxy_headers", "_socks_options"): 

125 if key in context and context[key] is not None: 

126 context[key] = frozenset(context[key].items()) 

127 

128 # The socket_options key may be a list and needs to be transformed into a 

129 # tuple. 

130 socket_opts = context.get("socket_options") 

131 if socket_opts is not None: 

132 context["socket_options"] = tuple(socket_opts) 

133 

134 # Map the kwargs to the names in the namedtuple - this is necessary since 

135 # namedtuples can't have fields starting with '_'. 

136 for key in list(context.keys()): 

137 context["key_" + key] = context.pop(key) 

138 

139 # Default to ``None`` for keys missing from the context 

140 for field in key_class._fields: 

141 if field not in context: 

142 context[field] = None 

143 

144 # Default key_blocksize to _DEFAULT_BLOCKSIZE if missing from the context 

145 if context.get("key_blocksize") is None: 

146 context["key_blocksize"] = _DEFAULT_BLOCKSIZE 

147 

148 return key_class(**context) 

149 

150 

151#: A dictionary that maps a scheme to a callable that creates a pool key. 

152#: This can be used to alter the way pool keys are constructed, if desired. 

153#: Each PoolManager makes a copy of this dictionary so they can be configured 

154#: globally here, or individually on the instance. 

155key_fn_by_scheme = { 

156 "http": functools.partial(_default_key_normalizer, PoolKey), 

157 "https": functools.partial(_default_key_normalizer, PoolKey), 

158} 

159 

160pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool} 

161 

162 

163class PoolManager(RequestMethods): 

164 """ 

165 Allows for arbitrary requests while transparently keeping track of 

166 necessary connection pools for you. 

167 

168 :param num_pools: 

169 Number of connection pools to cache before discarding the least 

170 recently used pool. 

171 

172 :param headers: 

173 Headers to include with all requests, unless other headers are given 

174 explicitly. 

175 

176 :param \\**connection_pool_kw: 

177 Additional parameters are used to create fresh 

178 :class:`urllib3.connectionpool.ConnectionPool` instances. 

179 

180 Example: 

181 

182 .. code-block:: python 

183 

184 import urllib3 

185 

186 http = urllib3.PoolManager(num_pools=2) 

187 

188 resp1 = http.request("GET", "https://google.com/") 

189 resp2 = http.request("GET", "https://google.com/mail") 

190 resp3 = http.request("GET", "https://yahoo.com/") 

191 

192 print(len(http.pools)) 

193 # 2 

194 

195 """ 

196 

197 proxy: Url | None = None 

198 proxy_config: ProxyConfig | None = None 

199 

200 def __init__( 

201 self, 

202 num_pools: int = 10, 

203 headers: typing.Mapping[str, str] | None = None, 

204 **connection_pool_kw: typing.Any, 

205 ) -> None: 

206 super().__init__(headers) 

207 self.connection_pool_kw = connection_pool_kw 

208 

209 self.pools: RecentlyUsedContainer[PoolKey, HTTPConnectionPool] 

210 self.pools = RecentlyUsedContainer(num_pools) 

211 

212 # Locally set the pool classes and keys so other PoolManagers can 

213 # override them. 

214 self.pool_classes_by_scheme = pool_classes_by_scheme 

215 self.key_fn_by_scheme = key_fn_by_scheme.copy() 

216 

217 def __enter__(self: _SelfT) -> _SelfT: 

218 return self 

219 

220 def __exit__( 

221 self, 

222 exc_type: type[BaseException] | None, 

223 exc_val: BaseException | None, 

224 exc_tb: TracebackType | None, 

225 ) -> Literal[False]: 

226 self.clear() 

227 # Return False to re-raise any potential exceptions 

228 return False 

229 

230 def _new_pool( 

231 self, 

232 scheme: str, 

233 host: str, 

234 port: int, 

235 request_context: dict[str, typing.Any] | None = None, 

236 ) -> HTTPConnectionPool: 

237 """ 

238 Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and 

239 any additional pool keyword arguments. 

240 

241 If ``request_context`` is provided, it is provided as keyword arguments 

242 to the pool class used. This method is used to actually create the 

243 connection pools handed out by :meth:`connection_from_url` and 

244 companion methods. It is intended to be overridden for customization. 

245 """ 

246 pool_cls: type[HTTPConnectionPool] = self.pool_classes_by_scheme[scheme] 

247 if request_context is None: 

248 request_context = self.connection_pool_kw.copy() 

249 

250 # Default blocksize to _DEFAULT_BLOCKSIZE if missing or explicitly 

251 # set to 'None' in the request_context. 

252 if request_context.get("blocksize") is None: 

253 request_context["blocksize"] = _DEFAULT_BLOCKSIZE 

254 

255 # Although the context has everything necessary to create the pool, 

256 # this function has historically only used the scheme, host, and port 

257 # in the positional args. When an API change is acceptable these can 

258 # be removed. 

259 for key in ("scheme", "host", "port"): 

260 request_context.pop(key, None) 

261 

262 if scheme == "http": 

263 for kw in SSL_KEYWORDS: 

264 request_context.pop(kw, None) 

265 

266 return pool_cls(host, port, **request_context) 

267 

268 def clear(self) -> None: 

269 """ 

270 Empty our store of pools and direct them all to close. 

271 

272 This will not affect in-flight connections, but they will not be 

273 re-used after completion. 

274 """ 

275 self.pools.clear() 

276 

277 def connection_from_host( 

278 self, 

279 host: str | None, 

280 port: int | None = None, 

281 scheme: str | None = "http", 

282 pool_kwargs: dict[str, typing.Any] | None = None, 

283 ) -> HTTPConnectionPool: 

284 """ 

285 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme. 

286 

287 If ``port`` isn't given, it will be derived from the ``scheme`` using 

288 ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is 

289 provided, it is merged with the instance's ``connection_pool_kw`` 

290 variable and used to create the new connection pool, if one is 

291 needed. 

292 """ 

293 

294 if not host: 

295 raise LocationValueError("No host specified.") 

296 

297 request_context = self._merge_pool_kwargs(pool_kwargs) 

298 request_context["scheme"] = scheme or "http" 

299 if not port: 

300 port = port_by_scheme.get(request_context["scheme"].lower(), 80) 

301 request_context["port"] = port 

302 request_context["host"] = host 

303 

304 return self.connection_from_context(request_context) 

305 

306 def connection_from_context( 

307 self, request_context: dict[str, typing.Any] 

308 ) -> HTTPConnectionPool: 

309 """ 

310 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context. 

311 

312 ``request_context`` must at least contain the ``scheme`` key and its 

313 value must be a key in ``key_fn_by_scheme`` instance variable. 

314 """ 

315 if "strict" in request_context: 

316 warnings.warn( 

317 "The 'strict' parameter is no longer needed on Python 3+. " 

318 "This will raise an error in urllib3 v2.1.0.", 

319 DeprecationWarning, 

320 ) 

321 request_context.pop("strict") 

322 

323 scheme = request_context["scheme"].lower() 

324 pool_key_constructor = self.key_fn_by_scheme.get(scheme) 

325 if not pool_key_constructor: 

326 raise URLSchemeUnknown(scheme) 

327 pool_key = pool_key_constructor(request_context) 

328 

329 return self.connection_from_pool_key(pool_key, request_context=request_context) 

330 

331 def connection_from_pool_key( 

332 self, pool_key: PoolKey, request_context: dict[str, typing.Any] 

333 ) -> HTTPConnectionPool: 

334 """ 

335 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key. 

336 

337 ``pool_key`` should be a namedtuple that only contains immutable 

338 objects. At a minimum it must have the ``scheme``, ``host``, and 

339 ``port`` fields. 

340 """ 

341 with self.pools.lock: 

342 # If the scheme, host, or port doesn't match existing open 

343 # connections, open a new ConnectionPool. 

344 pool = self.pools.get(pool_key) 

345 if pool: 

346 return pool 

347 

348 # Make a fresh ConnectionPool of the desired type 

349 scheme = request_context["scheme"] 

350 host = request_context["host"] 

351 port = request_context["port"] 

352 pool = self._new_pool(scheme, host, port, request_context=request_context) 

353 self.pools[pool_key] = pool 

354 

355 return pool 

356 

357 def connection_from_url( 

358 self, url: str, pool_kwargs: dict[str, typing.Any] | None = None 

359 ) -> HTTPConnectionPool: 

360 """ 

361 Similar to :func:`urllib3.connectionpool.connection_from_url`. 

362 

363 If ``pool_kwargs`` is not provided and a new pool needs to be 

364 constructed, ``self.connection_pool_kw`` is used to initialize 

365 the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs`` 

366 is provided, it is used instead. Note that if a new pool does not 

367 need to be created for the request, the provided ``pool_kwargs`` are 

368 not used. 

369 """ 

370 u = parse_url(url) 

371 return self.connection_from_host( 

372 u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs 

373 ) 

374 

375 def _merge_pool_kwargs( 

376 self, override: dict[str, typing.Any] | None 

377 ) -> dict[str, typing.Any]: 

378 """ 

379 Merge a dictionary of override values for self.connection_pool_kw. 

380 

381 This does not modify self.connection_pool_kw and returns a new dict. 

382 Any keys in the override dictionary with a value of ``None`` are 

383 removed from the merged dictionary. 

384 """ 

385 base_pool_kwargs = self.connection_pool_kw.copy() 

386 if override: 

387 for key, value in override.items(): 

388 if value is None: 

389 try: 

390 del base_pool_kwargs[key] 

391 except KeyError: 

392 pass 

393 else: 

394 base_pool_kwargs[key] = value 

395 return base_pool_kwargs 

396 

397 def _proxy_requires_url_absolute_form(self, parsed_url: Url) -> bool: 

398 """ 

399 Indicates if the proxy requires the complete destination URL in the 

400 request. Normally this is only needed when not using an HTTP CONNECT 

401 tunnel. 

402 """ 

403 if self.proxy is None: 

404 return False 

405 

406 return not connection_requires_http_tunnel( 

407 self.proxy, self.proxy_config, parsed_url.scheme 

408 ) 

409 

410 def urlopen( # type: ignore[override] 

411 self, method: str, url: str, redirect: bool = True, **kw: typing.Any 

412 ) -> BaseHTTPResponse: 

413 """ 

414 Same as :meth:`urllib3.HTTPConnectionPool.urlopen` 

415 with custom cross-host redirect logic and only sends the request-uri 

416 portion of the ``url``. 

417 

418 The given ``url`` parameter must be absolute, such that an appropriate 

419 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. 

420 """ 

421 u = parse_url(url) 

422 

423 if u.scheme is None: 

424 warnings.warn( 

425 "URLs without a scheme (ie 'https://') are deprecated and will raise an error " 

426 "in a future version of urllib3. To avoid this DeprecationWarning ensure all URLs " 

427 "start with 'https://' or 'http://'. Read more in this issue: " 

428 "https://github.com/urllib3/urllib3/issues/2920", 

429 category=DeprecationWarning, 

430 stacklevel=2, 

431 ) 

432 

433 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) 

434 

435 kw["assert_same_host"] = False 

436 kw["redirect"] = False 

437 

438 if "headers" not in kw: 

439 kw["headers"] = self.headers 

440 

441 if self._proxy_requires_url_absolute_form(u): 

442 response = conn.urlopen(method, url, **kw) 

443 else: 

444 response = conn.urlopen(method, u.request_uri, **kw) 

445 

446 redirect_location = redirect and response.get_redirect_location() 

447 if not redirect_location: 

448 return response 

449 

450 # Support relative URLs for redirecting. 

451 redirect_location = urljoin(url, redirect_location) 

452 

453 if response.status == 303: 

454 # Change the method according to RFC 9110, Section 15.4.4. 

455 method = "GET" 

456 # And lose the body not to transfer anything sensitive. 

457 kw["body"] = None 

458 kw["headers"] = HTTPHeaderDict(kw["headers"])._prepare_for_method_change() 

459 

460 retries = kw.get("retries") 

461 if not isinstance(retries, Retry): 

462 retries = Retry.from_int(retries, redirect=redirect) 

463 

464 # Strip headers marked as unsafe to forward to the redirected location. 

465 # Check remove_headers_on_redirect to avoid a potential network call within 

466 # conn.is_same_host() which may use socket.gethostbyname() in the future. 

467 if retries.remove_headers_on_redirect and not conn.is_same_host( 

468 redirect_location 

469 ): 

470 new_headers = kw["headers"].copy() 

471 for header in kw["headers"]: 

472 if header.lower() in retries.remove_headers_on_redirect: 

473 new_headers.pop(header, None) 

474 kw["headers"] = new_headers 

475 

476 try: 

477 retries = retries.increment(method, url, response=response, _pool=conn) 

478 except MaxRetryError: 

479 if retries.raise_on_redirect: 

480 response.drain_conn() 

481 raise 

482 return response 

483 

484 kw["retries"] = retries 

485 kw["redirect"] = redirect 

486 

487 log.info("Redirecting %s -> %s", url, redirect_location) 

488 

489 response.drain_conn() 

490 return self.urlopen(method, redirect_location, **kw) 

491 

492 

493class ProxyManager(PoolManager): 

494 """ 

495 Behaves just like :class:`PoolManager`, but sends all requests through 

496 the defined proxy, using the CONNECT method for HTTPS URLs. 

497 

498 :param proxy_url: 

499 The URL of the proxy to be used. 

500 

501 :param proxy_headers: 

502 A dictionary containing headers that will be sent to the proxy. In case 

503 of HTTP they are being sent with each request, while in the 

504 HTTPS/CONNECT case they are sent only once. Could be used for proxy 

505 authentication. 

506 

507 :param proxy_ssl_context: 

508 The proxy SSL context is used to establish the TLS connection to the 

509 proxy when using HTTPS proxies. 

510 

511 :param use_forwarding_for_https: 

512 (Defaults to False) If set to True will forward requests to the HTTPS 

513 proxy to be made on behalf of the client instead of creating a TLS 

514 tunnel via the CONNECT method. **Enabling this flag means that request 

515 and response headers and content will be visible from the HTTPS proxy** 

516 whereas tunneling keeps request and response headers and content 

517 private. IP address, target hostname, SNI, and port are always visible 

518 to an HTTPS proxy even when this flag is disabled. 

519 

520 :param proxy_assert_hostname: 

521 The hostname of the certificate to verify against. 

522 

523 :param proxy_assert_fingerprint: 

524 The fingerprint of the certificate to verify against. 

525 

526 Example: 

527 

528 .. code-block:: python 

529 

530 import urllib3 

531 

532 proxy = urllib3.ProxyManager("https://localhost:3128/") 

533 

534 resp1 = proxy.request("GET", "https://google.com/") 

535 resp2 = proxy.request("GET", "https://httpbin.org/") 

536 

537 print(len(proxy.pools)) 

538 # 1 

539 

540 resp3 = proxy.request("GET", "https://httpbin.org/") 

541 resp4 = proxy.request("GET", "https://twitter.com/") 

542 

543 print(len(proxy.pools)) 

544 # 3 

545 

546 """ 

547 

548 def __init__( 

549 self, 

550 proxy_url: str, 

551 num_pools: int = 10, 

552 headers: typing.Mapping[str, str] | None = None, 

553 proxy_headers: typing.Mapping[str, str] | None = None, 

554 proxy_ssl_context: ssl.SSLContext | None = None, 

555 use_forwarding_for_https: bool = False, 

556 proxy_assert_hostname: None | str | Literal[False] = None, 

557 proxy_assert_fingerprint: str | None = None, 

558 **connection_pool_kw: typing.Any, 

559 ) -> None: 

560 if isinstance(proxy_url, HTTPConnectionPool): 

561 str_proxy_url = f"{proxy_url.scheme}://{proxy_url.host}:{proxy_url.port}" 

562 else: 

563 str_proxy_url = proxy_url 

564 proxy = parse_url(str_proxy_url) 

565 

566 if proxy.scheme not in ("http", "https"): 

567 raise ProxySchemeUnknown(proxy.scheme) 

568 

569 if not proxy.port: 

570 port = port_by_scheme.get(proxy.scheme, 80) 

571 proxy = proxy._replace(port=port) 

572 

573 self.proxy = proxy 

574 self.proxy_headers = proxy_headers or {} 

575 self.proxy_ssl_context = proxy_ssl_context 

576 self.proxy_config = ProxyConfig( 

577 proxy_ssl_context, 

578 use_forwarding_for_https, 

579 proxy_assert_hostname, 

580 proxy_assert_fingerprint, 

581 ) 

582 

583 connection_pool_kw["_proxy"] = self.proxy 

584 connection_pool_kw["_proxy_headers"] = self.proxy_headers 

585 connection_pool_kw["_proxy_config"] = self.proxy_config 

586 

587 super().__init__(num_pools, headers, **connection_pool_kw) 

588 

589 def connection_from_host( 

590 self, 

591 host: str | None, 

592 port: int | None = None, 

593 scheme: str | None = "http", 

594 pool_kwargs: dict[str, typing.Any] | None = None, 

595 ) -> HTTPConnectionPool: 

596 if scheme == "https": 

597 return super().connection_from_host( 

598 host, port, scheme, pool_kwargs=pool_kwargs 

599 ) 

600 

601 return super().connection_from_host( 

602 self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs # type: ignore[union-attr] 

603 ) 

604 

605 def _set_proxy_headers( 

606 self, url: str, headers: typing.Mapping[str, str] | None = None 

607 ) -> typing.Mapping[str, str]: 

608 """ 

609 Sets headers needed by proxies: specifically, the Accept and Host 

610 headers. Only sets headers not provided by the user. 

611 """ 

612 headers_ = {"Accept": "*/*"} 

613 

614 netloc = parse_url(url).netloc 

615 if netloc: 

616 headers_["Host"] = netloc 

617 

618 if headers: 

619 headers_.update(headers) 

620 return headers_ 

621 

622 def urlopen( # type: ignore[override] 

623 self, method: str, url: str, redirect: bool = True, **kw: typing.Any 

624 ) -> BaseHTTPResponse: 

625 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." 

626 u = parse_url(url) 

627 if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme): 

628 # For connections using HTTP CONNECT, httplib sets the necessary 

629 # headers on the CONNECT to the proxy. If we're not using CONNECT, 

630 # we'll definitely need to set 'Host' at the very least. 

631 headers = kw.get("headers", self.headers) 

632 kw["headers"] = self._set_proxy_headers(url, headers) 

633 

634 return super().urlopen(method, url, redirect=redirect, **kw) 

635 

636 

637def proxy_from_url(url: str, **kw: typing.Any) -> ProxyManager: 

638 return ProxyManager(proxy_url=url, **kw)