Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/poolmanager.py: 36%

231 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:32 +0000

1from __future__ import annotations 

2 

3import functools 

4import logging 

5import typing 

6import warnings 

7from types import TracebackType 

8from urllib.parse import urljoin 

9 

10from ._collections import RecentlyUsedContainer 

11from ._request_methods import RequestMethods 

12from .connection import ProxyConfig 

13from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme 

14from .exceptions import ( 

15 LocationValueError, 

16 MaxRetryError, 

17 ProxySchemeUnknown, 

18 URLSchemeUnknown, 

19) 

20from .response import BaseHTTPResponse 

21from .util.connection import _TYPE_SOCKET_OPTIONS 

22from .util.proxy import connection_requires_http_tunnel 

23from .util.retry import Retry 

24from .util.timeout import Timeout 

25from .util.url import Url, parse_url 

26 

27if typing.TYPE_CHECKING: 

28 import ssl 

29 

30 from typing_extensions import Literal 

31 

32__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"] 

33 

34 

35log = logging.getLogger(__name__) 

36 

37SSL_KEYWORDS = ( 

38 "key_file", 

39 "cert_file", 

40 "cert_reqs", 

41 "ca_certs", 

42 "ssl_version", 

43 "ssl_minimum_version", 

44 "ssl_maximum_version", 

45 "ca_cert_dir", 

46 "ssl_context", 

47 "key_password", 

48 "server_hostname", 

49) 

50# Default value for `blocksize` - a new parameter introduced to 

51# http.client.HTTPConnection & http.client.HTTPSConnection in Python 3.7 

52_DEFAULT_BLOCKSIZE = 16384 

53 

54_SelfT = typing.TypeVar("_SelfT") 

55 

56 

57class PoolKey(typing.NamedTuple): 

58 """ 

59 All known keyword arguments that could be provided to the pool manager, its 

60 pools, or the underlying connections. 

61 

62 All custom key schemes should include the fields in this key at a minimum. 

63 """ 

64 

65 key_scheme: str 

66 key_host: str 

67 key_port: int | None 

68 key_timeout: Timeout | float | int | None 

69 key_retries: Retry | bool | int | None 

70 key_block: bool | None 

71 key_source_address: tuple[str, int] | None 

72 key_key_file: str | None 

73 key_key_password: str | None 

74 key_cert_file: str | None 

75 key_cert_reqs: str | None 

76 key_ca_certs: str | None 

77 key_ssl_version: int | str | None 

78 key_ssl_minimum_version: ssl.TLSVersion | None 

79 key_ssl_maximum_version: ssl.TLSVersion | None 

80 key_ca_cert_dir: str | None 

81 key_ssl_context: ssl.SSLContext | None 

82 key_maxsize: int | None 

83 key_headers: frozenset[tuple[str, str]] | None 

84 key__proxy: Url | None 

85 key__proxy_headers: frozenset[tuple[str, str]] | None 

86 key__proxy_config: ProxyConfig | None 

87 key_socket_options: _TYPE_SOCKET_OPTIONS | None 

88 key__socks_options: frozenset[tuple[str, str]] | None 

89 key_assert_hostname: bool | str | None 

90 key_assert_fingerprint: str | None 

91 key_server_hostname: str | None 

92 key_blocksize: int | None 

93 

94 

95def _default_key_normalizer( 

96 key_class: type[PoolKey], request_context: dict[str, typing.Any] 

97) -> PoolKey: 

98 """ 

99 Create a pool key out of a request context dictionary. 

100 

101 According to RFC 3986, both the scheme and host are case-insensitive. 

102 Therefore, this function normalizes both before constructing the pool 

103 key for an HTTPS request. If you wish to change this behaviour, provide 

104 alternate callables to ``key_fn_by_scheme``. 

105 

106 :param key_class: 

107 The class to use when constructing the key. This should be a namedtuple 

108 with the ``scheme`` and ``host`` keys at a minimum. 

109 :type key_class: namedtuple 

110 :param request_context: 

111 A dictionary-like object that contain the context for a request. 

112 :type request_context: dict 

113 

114 :return: A namedtuple that can be used as a connection pool key. 

115 :rtype: PoolKey 

116 """ 

117 # Since we mutate the dictionary, make a copy first 

118 context = request_context.copy() 

119 context["scheme"] = context["scheme"].lower() 

120 context["host"] = context["host"].lower() 

121 

122 # These are both dictionaries and need to be transformed into frozensets 

123 for key in ("headers", "_proxy_headers", "_socks_options"): 

124 if key in context and context[key] is not None: 

125 context[key] = frozenset(context[key].items()) 

126 

127 # The socket_options key may be a list and needs to be transformed into a 

128 # tuple. 

129 socket_opts = context.get("socket_options") 

130 if socket_opts is not None: 

131 context["socket_options"] = tuple(socket_opts) 

132 

133 # Map the kwargs to the names in the namedtuple - this is necessary since 

134 # namedtuples can't have fields starting with '_'. 

135 for key in list(context.keys()): 

136 context["key_" + key] = context.pop(key) 

137 

138 # Default to ``None`` for keys missing from the context 

139 for field in key_class._fields: 

140 if field not in context: 

141 context[field] = None 

142 

143 # Default key_blocksize to _DEFAULT_BLOCKSIZE if missing from the context 

144 if context.get("key_blocksize") is None: 

145 context["key_blocksize"] = _DEFAULT_BLOCKSIZE 

146 

147 return key_class(**context) 

148 

149 

150#: A dictionary that maps a scheme to a callable that creates a pool key. 

151#: This can be used to alter the way pool keys are constructed, if desired. 

152#: Each PoolManager makes a copy of this dictionary so they can be configured 

153#: globally here, or individually on the instance. 

154key_fn_by_scheme = { 

155 "http": functools.partial(_default_key_normalizer, PoolKey), 

156 "https": functools.partial(_default_key_normalizer, PoolKey), 

157} 

158 

159pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool} 

160 

161 

162class PoolManager(RequestMethods): 

163 """ 

164 Allows for arbitrary requests while transparently keeping track of 

165 necessary connection pools for you. 

166 

167 :param num_pools: 

168 Number of connection pools to cache before discarding the least 

169 recently used pool. 

170 

171 :param headers: 

172 Headers to include with all requests, unless other headers are given 

173 explicitly. 

174 

175 :param \\**connection_pool_kw: 

176 Additional parameters are used to create fresh 

177 :class:`urllib3.connectionpool.ConnectionPool` instances. 

178 

179 Example: 

180 

181 .. code-block:: python 

182 

183 import urllib3 

184 

185 http = urllib3.PoolManager(num_pools=2) 

186 

187 resp1 = http.request("GET", "https://google.com/") 

188 resp2 = http.request("GET", "https://google.com/mail") 

189 resp3 = http.request("GET", "https://yahoo.com/") 

190 

191 print(len(http.pools)) 

192 # 2 

193 

194 """ 

195 

196 proxy: Url | None = None 

197 proxy_config: ProxyConfig | None = None 

198 

199 def __init__( 

200 self, 

201 num_pools: int = 10, 

202 headers: typing.Mapping[str, str] | None = None, 

203 **connection_pool_kw: typing.Any, 

204 ) -> None: 

205 super().__init__(headers) 

206 self.connection_pool_kw = connection_pool_kw 

207 

208 self.pools: RecentlyUsedContainer[PoolKey, HTTPConnectionPool] 

209 self.pools = RecentlyUsedContainer(num_pools) 

210 

211 # Locally set the pool classes and keys so other PoolManagers can 

212 # override them. 

213 self.pool_classes_by_scheme = pool_classes_by_scheme 

214 self.key_fn_by_scheme = key_fn_by_scheme.copy() 

215 

216 def __enter__(self: _SelfT) -> _SelfT: 

217 return self 

218 

219 def __exit__( 

220 self, 

221 exc_type: type[BaseException] | None, 

222 exc_val: BaseException | None, 

223 exc_tb: TracebackType | None, 

224 ) -> Literal[False]: 

225 self.clear() 

226 # Return False to re-raise any potential exceptions 

227 return False 

228 

229 def _new_pool( 

230 self, 

231 scheme: str, 

232 host: str, 

233 port: int, 

234 request_context: dict[str, typing.Any] | None = None, 

235 ) -> HTTPConnectionPool: 

236 """ 

237 Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and 

238 any additional pool keyword arguments. 

239 

240 If ``request_context`` is provided, it is provided as keyword arguments 

241 to the pool class used. This method is used to actually create the 

242 connection pools handed out by :meth:`connection_from_url` and 

243 companion methods. It is intended to be overridden for customization. 

244 """ 

245 pool_cls: type[HTTPConnectionPool] = self.pool_classes_by_scheme[scheme] 

246 if request_context is None: 

247 request_context = self.connection_pool_kw.copy() 

248 

249 # Default blocksize to _DEFAULT_BLOCKSIZE if missing or explicitly 

250 # set to 'None' in the request_context. 

251 if request_context.get("blocksize") is None: 

252 request_context["blocksize"] = _DEFAULT_BLOCKSIZE 

253 

254 # Although the context has everything necessary to create the pool, 

255 # this function has historically only used the scheme, host, and port 

256 # in the positional args. When an API change is acceptable these can 

257 # be removed. 

258 for key in ("scheme", "host", "port"): 

259 request_context.pop(key, None) 

260 

261 if scheme == "http": 

262 for kw in SSL_KEYWORDS: 

263 request_context.pop(kw, None) 

264 

265 return pool_cls(host, port, **request_context) 

266 

267 def clear(self) -> None: 

268 """ 

269 Empty our store of pools and direct them all to close. 

270 

271 This will not affect in-flight connections, but they will not be 

272 re-used after completion. 

273 """ 

274 self.pools.clear() 

275 

276 def connection_from_host( 

277 self, 

278 host: str | None, 

279 port: int | None = None, 

280 scheme: str | None = "http", 

281 pool_kwargs: dict[str, typing.Any] | None = None, 

282 ) -> HTTPConnectionPool: 

283 """ 

284 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme. 

285 

286 If ``port`` isn't given, it will be derived from the ``scheme`` using 

287 ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is 

288 provided, it is merged with the instance's ``connection_pool_kw`` 

289 variable and used to create the new connection pool, if one is 

290 needed. 

291 """ 

292 

293 if not host: 

294 raise LocationValueError("No host specified.") 

295 

296 request_context = self._merge_pool_kwargs(pool_kwargs) 

297 request_context["scheme"] = scheme or "http" 

298 if not port: 

299 port = port_by_scheme.get(request_context["scheme"].lower(), 80) 

300 request_context["port"] = port 

301 request_context["host"] = host 

302 

303 return self.connection_from_context(request_context) 

304 

305 def connection_from_context( 

306 self, request_context: dict[str, typing.Any] 

307 ) -> HTTPConnectionPool: 

308 """ 

309 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context. 

310 

311 ``request_context`` must at least contain the ``scheme`` key and its 

312 value must be a key in ``key_fn_by_scheme`` instance variable. 

313 """ 

314 if "strict" in request_context: 

315 warnings.warn( 

316 "The 'strict' parameter is no longer needed on Python 3+. " 

317 "This will raise an error in urllib3 v2.1.0.", 

318 DeprecationWarning, 

319 ) 

320 request_context.pop("strict") 

321 

322 scheme = request_context["scheme"].lower() 

323 pool_key_constructor = self.key_fn_by_scheme.get(scheme) 

324 if not pool_key_constructor: 

325 raise URLSchemeUnknown(scheme) 

326 pool_key = pool_key_constructor(request_context) 

327 

328 return self.connection_from_pool_key(pool_key, request_context=request_context) 

329 

330 def connection_from_pool_key( 

331 self, pool_key: PoolKey, request_context: dict[str, typing.Any] 

332 ) -> HTTPConnectionPool: 

333 """ 

334 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key. 

335 

336 ``pool_key`` should be a namedtuple that only contains immutable 

337 objects. At a minimum it must have the ``scheme``, ``host``, and 

338 ``port`` fields. 

339 """ 

340 with self.pools.lock: 

341 # If the scheme, host, or port doesn't match existing open 

342 # connections, open a new ConnectionPool. 

343 pool = self.pools.get(pool_key) 

344 if pool: 

345 return pool 

346 

347 # Make a fresh ConnectionPool of the desired type 

348 scheme = request_context["scheme"] 

349 host = request_context["host"] 

350 port = request_context["port"] 

351 pool = self._new_pool(scheme, host, port, request_context=request_context) 

352 self.pools[pool_key] = pool 

353 

354 return pool 

355 

356 def connection_from_url( 

357 self, url: str, pool_kwargs: dict[str, typing.Any] | None = None 

358 ) -> HTTPConnectionPool: 

359 """ 

360 Similar to :func:`urllib3.connectionpool.connection_from_url`. 

361 

362 If ``pool_kwargs`` is not provided and a new pool needs to be 

363 constructed, ``self.connection_pool_kw`` is used to initialize 

364 the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs`` 

365 is provided, it is used instead. Note that if a new pool does not 

366 need to be created for the request, the provided ``pool_kwargs`` are 

367 not used. 

368 """ 

369 u = parse_url(url) 

370 return self.connection_from_host( 

371 u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs 

372 ) 

373 

374 def _merge_pool_kwargs( 

375 self, override: dict[str, typing.Any] | None 

376 ) -> dict[str, typing.Any]: 

377 """ 

378 Merge a dictionary of override values for self.connection_pool_kw. 

379 

380 This does not modify self.connection_pool_kw and returns a new dict. 

381 Any keys in the override dictionary with a value of ``None`` are 

382 removed from the merged dictionary. 

383 """ 

384 base_pool_kwargs = self.connection_pool_kw.copy() 

385 if override: 

386 for key, value in override.items(): 

387 if value is None: 

388 try: 

389 del base_pool_kwargs[key] 

390 except KeyError: 

391 pass 

392 else: 

393 base_pool_kwargs[key] = value 

394 return base_pool_kwargs 

395 

396 def _proxy_requires_url_absolute_form(self, parsed_url: Url) -> bool: 

397 """ 

398 Indicates if the proxy requires the complete destination URL in the 

399 request. Normally this is only needed when not using an HTTP CONNECT 

400 tunnel. 

401 """ 

402 if self.proxy is None: 

403 return False 

404 

405 return not connection_requires_http_tunnel( 

406 self.proxy, self.proxy_config, parsed_url.scheme 

407 ) 

408 

409 def urlopen( # type: ignore[override] 

410 self, method: str, url: str, redirect: bool = True, **kw: typing.Any 

411 ) -> BaseHTTPResponse: 

412 """ 

413 Same as :meth:`urllib3.HTTPConnectionPool.urlopen` 

414 with custom cross-host redirect logic and only sends the request-uri 

415 portion of the ``url``. 

416 

417 The given ``url`` parameter must be absolute, such that an appropriate 

418 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. 

419 """ 

420 u = parse_url(url) 

421 

422 if u.scheme is None: 

423 warnings.warn( 

424 "URLs without a scheme (ie 'https://') are deprecated and will raise an error " 

425 "in a future version of urllib3. To avoid this DeprecationWarning ensure all URLs " 

426 "start with 'https://' or 'http://'. Read more in this issue: " 

427 "https://github.com/urllib3/urllib3/issues/2920", 

428 category=DeprecationWarning, 

429 stacklevel=2, 

430 ) 

431 

432 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) 

433 

434 kw["assert_same_host"] = False 

435 kw["redirect"] = False 

436 

437 if "headers" not in kw: 

438 kw["headers"] = self.headers 

439 

440 if self._proxy_requires_url_absolute_form(u): 

441 response = conn.urlopen(method, url, **kw) 

442 else: 

443 response = conn.urlopen(method, u.request_uri, **kw) 

444 

445 redirect_location = redirect and response.get_redirect_location() 

446 if not redirect_location: 

447 return response 

448 

449 # Support relative URLs for redirecting. 

450 redirect_location = urljoin(url, redirect_location) 

451 

452 # RFC 7231, Section 6.4.4 

453 if response.status == 303: 

454 method = "GET" 

455 

456 retries = kw.get("retries") 

457 if not isinstance(retries, Retry): 

458 retries = Retry.from_int(retries, redirect=redirect) 

459 

460 # Strip headers marked as unsafe to forward to the redirected location. 

461 # Check remove_headers_on_redirect to avoid a potential network call within 

462 # conn.is_same_host() which may use socket.gethostbyname() in the future. 

463 if retries.remove_headers_on_redirect and not conn.is_same_host( 

464 redirect_location 

465 ): 

466 new_headers = kw["headers"].copy() 

467 for header in kw["headers"]: 

468 if header.lower() in retries.remove_headers_on_redirect: 

469 new_headers.pop(header, None) 

470 kw["headers"] = new_headers 

471 

472 try: 

473 retries = retries.increment(method, url, response=response, _pool=conn) 

474 except MaxRetryError: 

475 if retries.raise_on_redirect: 

476 response.drain_conn() 

477 raise 

478 return response 

479 

480 kw["retries"] = retries 

481 kw["redirect"] = redirect 

482 

483 log.info("Redirecting %s -> %s", url, redirect_location) 

484 

485 response.drain_conn() 

486 return self.urlopen(method, redirect_location, **kw) 

487 

488 

489class ProxyManager(PoolManager): 

490 """ 

491 Behaves just like :class:`PoolManager`, but sends all requests through 

492 the defined proxy, using the CONNECT method for HTTPS URLs. 

493 

494 :param proxy_url: 

495 The URL of the proxy to be used. 

496 

497 :param proxy_headers: 

498 A dictionary containing headers that will be sent to the proxy. In case 

499 of HTTP they are being sent with each request, while in the 

500 HTTPS/CONNECT case they are sent only once. Could be used for proxy 

501 authentication. 

502 

503 :param proxy_ssl_context: 

504 The proxy SSL context is used to establish the TLS connection to the 

505 proxy when using HTTPS proxies. 

506 

507 :param use_forwarding_for_https: 

508 (Defaults to False) If set to True will forward requests to the HTTPS 

509 proxy to be made on behalf of the client instead of creating a TLS 

510 tunnel via the CONNECT method. **Enabling this flag means that request 

511 and response headers and content will be visible from the HTTPS proxy** 

512 whereas tunneling keeps request and response headers and content 

513 private. IP address, target hostname, SNI, and port are always visible 

514 to an HTTPS proxy even when this flag is disabled. 

515 

516 :param proxy_assert_hostname: 

517 The hostname of the certificate to verify against. 

518 

519 :param proxy_assert_fingerprint: 

520 The fingerprint of the certificate to verify against. 

521 

522 Example: 

523 

524 .. code-block:: python 

525 

526 import urllib3 

527 

528 proxy = urllib3.ProxyManager("https://localhost:3128/") 

529 

530 resp1 = proxy.request("GET", "https://google.com/") 

531 resp2 = proxy.request("GET", "https://httpbin.org/") 

532 

533 print(len(proxy.pools)) 

534 # 1 

535 

536 resp3 = proxy.request("GET", "https://httpbin.org/") 

537 resp4 = proxy.request("GET", "https://twitter.com/") 

538 

539 print(len(proxy.pools)) 

540 # 3 

541 

542 """ 

543 

544 def __init__( 

545 self, 

546 proxy_url: str, 

547 num_pools: int = 10, 

548 headers: typing.Mapping[str, str] | None = None, 

549 proxy_headers: typing.Mapping[str, str] | None = None, 

550 proxy_ssl_context: ssl.SSLContext | None = None, 

551 use_forwarding_for_https: bool = False, 

552 proxy_assert_hostname: None | str | Literal[False] = None, 

553 proxy_assert_fingerprint: str | None = None, 

554 **connection_pool_kw: typing.Any, 

555 ) -> None: 

556 if isinstance(proxy_url, HTTPConnectionPool): 

557 str_proxy_url = f"{proxy_url.scheme}://{proxy_url.host}:{proxy_url.port}" 

558 else: 

559 str_proxy_url = proxy_url 

560 proxy = parse_url(str_proxy_url) 

561 

562 if proxy.scheme not in ("http", "https"): 

563 raise ProxySchemeUnknown(proxy.scheme) 

564 

565 if not proxy.port: 

566 port = port_by_scheme.get(proxy.scheme, 80) 

567 proxy = proxy._replace(port=port) 

568 

569 self.proxy = proxy 

570 self.proxy_headers = proxy_headers or {} 

571 self.proxy_ssl_context = proxy_ssl_context 

572 self.proxy_config = ProxyConfig( 

573 proxy_ssl_context, 

574 use_forwarding_for_https, 

575 proxy_assert_hostname, 

576 proxy_assert_fingerprint, 

577 ) 

578 

579 connection_pool_kw["_proxy"] = self.proxy 

580 connection_pool_kw["_proxy_headers"] = self.proxy_headers 

581 connection_pool_kw["_proxy_config"] = self.proxy_config 

582 

583 super().__init__(num_pools, headers, **connection_pool_kw) 

584 

585 def connection_from_host( 

586 self, 

587 host: str | None, 

588 port: int | None = None, 

589 scheme: str | None = "http", 

590 pool_kwargs: dict[str, typing.Any] | None = None, 

591 ) -> HTTPConnectionPool: 

592 if scheme == "https": 

593 return super().connection_from_host( 

594 host, port, scheme, pool_kwargs=pool_kwargs 

595 ) 

596 

597 return super().connection_from_host( 

598 self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs # type: ignore[union-attr] 

599 ) 

600 

601 def _set_proxy_headers( 

602 self, url: str, headers: typing.Mapping[str, str] | None = None 

603 ) -> typing.Mapping[str, str]: 

604 """ 

605 Sets headers needed by proxies: specifically, the Accept and Host 

606 headers. Only sets headers not provided by the user. 

607 """ 

608 headers_ = {"Accept": "*/*"} 

609 

610 netloc = parse_url(url).netloc 

611 if netloc: 

612 headers_["Host"] = netloc 

613 

614 if headers: 

615 headers_.update(headers) 

616 return headers_ 

617 

618 def urlopen( # type: ignore[override] 

619 self, method: str, url: str, redirect: bool = True, **kw: typing.Any 

620 ) -> BaseHTTPResponse: 

621 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." 

622 u = parse_url(url) 

623 if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme): 

624 # For connections using HTTP CONNECT, httplib sets the necessary 

625 # headers on the CONNECT to the proxy. If we're not using CONNECT, 

626 # we'll definitely need to set 'Host' at the very least. 

627 headers = kw.get("headers", self.headers) 

628 kw["headers"] = self._set_proxy_headers(url, headers) 

629 

630 return super().urlopen(method, url, redirect=redirect, **kw) 

631 

632 

633def proxy_from_url(url: str, **kw: typing.Any) -> ProxyManager: 

634 return ProxyManager(proxy_url=url, **kw)