Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/poolmanager.py: 26%

194 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 06:51 +0000

1from __future__ import absolute_import 

2 

3import collections 

4import functools 

5import logging 

6 

7from ._collections import HTTPHeaderDict, RecentlyUsedContainer 

8from .connectionpool import HTTPConnectionPool, HTTPSConnectionPool, port_by_scheme 

9from .exceptions import ( 

10 LocationValueError, 

11 MaxRetryError, 

12 ProxySchemeUnknown, 

13 ProxySchemeUnsupported, 

14 URLSchemeUnknown, 

15) 

16from .packages import six 

17from .packages.six.moves.urllib.parse import urljoin 

18from .request import RequestMethods 

19from .util.proxy import connection_requires_http_tunnel 

20from .util.retry import Retry 

21from .util.url import parse_url 

22 

23__all__ = ["PoolManager", "ProxyManager", "proxy_from_url"] 

24 

25 

26log = logging.getLogger(__name__) 

27 

28SSL_KEYWORDS = ( 

29 "key_file", 

30 "cert_file", 

31 "cert_reqs", 

32 "ca_certs", 

33 "ssl_version", 

34 "ca_cert_dir", 

35 "ssl_context", 

36 "key_password", 

37 "server_hostname", 

38) 

39 

40# All known keyword arguments that could be provided to the pool manager, its 

41# pools, or the underlying connections. This is used to construct a pool key. 

42_key_fields = ( 

43 "key_scheme", # str 

44 "key_host", # str 

45 "key_port", # int 

46 "key_timeout", # int or float or Timeout 

47 "key_retries", # int or Retry 

48 "key_strict", # bool 

49 "key_block", # bool 

50 "key_source_address", # str 

51 "key_key_file", # str 

52 "key_key_password", # str 

53 "key_cert_file", # str 

54 "key_cert_reqs", # str 

55 "key_ca_certs", # str 

56 "key_ssl_version", # str 

57 "key_ca_cert_dir", # str 

58 "key_ssl_context", # instance of ssl.SSLContext or urllib3.util.ssl_.SSLContext 

59 "key_maxsize", # int 

60 "key_headers", # dict 

61 "key__proxy", # parsed proxy url 

62 "key__proxy_headers", # dict 

63 "key__proxy_config", # class 

64 "key_socket_options", # list of (level (int), optname (int), value (int or str)) tuples 

65 "key__socks_options", # dict 

66 "key_assert_hostname", # bool or string 

67 "key_assert_fingerprint", # str 

68 "key_server_hostname", # str 

69) 

70 

71#: The namedtuple class used to construct keys for the connection pool. 

72#: All custom key schemes should include the fields in this key at a minimum. 

73PoolKey = collections.namedtuple("PoolKey", _key_fields) 

74 

75_proxy_config_fields = ("ssl_context", "use_forwarding_for_https") 

76ProxyConfig = collections.namedtuple("ProxyConfig", _proxy_config_fields) 

77 

78 

79def _default_key_normalizer(key_class, request_context): 

80 """ 

81 Create a pool key out of a request context dictionary. 

82 

83 According to RFC 3986, both the scheme and host are case-insensitive. 

84 Therefore, this function normalizes both before constructing the pool 

85 key for an HTTPS request. If you wish to change this behaviour, provide 

86 alternate callables to ``key_fn_by_scheme``. 

87 

88 :param key_class: 

89 The class to use when constructing the key. This should be a namedtuple 

90 with the ``scheme`` and ``host`` keys at a minimum. 

91 :type key_class: namedtuple 

92 :param request_context: 

93 A dictionary-like object that contain the context for a request. 

94 :type request_context: dict 

95 

96 :return: A namedtuple that can be used as a connection pool key. 

97 :rtype: PoolKey 

98 """ 

99 # Since we mutate the dictionary, make a copy first 

100 context = request_context.copy() 

101 context["scheme"] = context["scheme"].lower() 

102 context["host"] = context["host"].lower() 

103 

104 # These are both dictionaries and need to be transformed into frozensets 

105 for key in ("headers", "_proxy_headers", "_socks_options"): 

106 if key in context and context[key] is not None: 

107 context[key] = frozenset(context[key].items()) 

108 

109 # The socket_options key may be a list and needs to be transformed into a 

110 # tuple. 

111 socket_opts = context.get("socket_options") 

112 if socket_opts is not None: 

113 context["socket_options"] = tuple(socket_opts) 

114 

115 # Map the kwargs to the names in the namedtuple - this is necessary since 

116 # namedtuples can't have fields starting with '_'. 

117 for key in list(context.keys()): 

118 context["key_" + key] = context.pop(key) 

119 

120 # Default to ``None`` for keys missing from the context 

121 for field in key_class._fields: 

122 if field not in context: 

123 context[field] = None 

124 

125 return key_class(**context) 

126 

127 

128#: A dictionary that maps a scheme to a callable that creates a pool key. 

129#: This can be used to alter the way pool keys are constructed, if desired. 

130#: Each PoolManager makes a copy of this dictionary so they can be configured 

131#: globally here, or individually on the instance. 

132key_fn_by_scheme = { 

133 "http": functools.partial(_default_key_normalizer, PoolKey), 

134 "https": functools.partial(_default_key_normalizer, PoolKey), 

135} 

136 

137pool_classes_by_scheme = {"http": HTTPConnectionPool, "https": HTTPSConnectionPool} 

138 

139 

140class PoolManager(RequestMethods): 

141 """ 

142 Allows for arbitrary requests while transparently keeping track of 

143 necessary connection pools for you. 

144 

145 :param num_pools: 

146 Number of connection pools to cache before discarding the least 

147 recently used pool. 

148 

149 :param headers: 

150 Headers to include with all requests, unless other headers are given 

151 explicitly. 

152 

153 :param \\**connection_pool_kw: 

154 Additional parameters are used to create fresh 

155 :class:`urllib3.connectionpool.ConnectionPool` instances. 

156 

157 Example:: 

158 

159 >>> manager = PoolManager(num_pools=2) 

160 >>> r = manager.request('GET', 'http://google.com/') 

161 >>> r = manager.request('GET', 'http://google.com/mail') 

162 >>> r = manager.request('GET', 'http://yahoo.com/') 

163 >>> len(manager.pools) 

164 2 

165 

166 """ 

167 

168 proxy = None 

169 proxy_config = None 

170 

171 def __init__(self, num_pools=10, headers=None, **connection_pool_kw): 

172 RequestMethods.__init__(self, headers) 

173 self.connection_pool_kw = connection_pool_kw 

174 self.pools = RecentlyUsedContainer(num_pools) 

175 

176 # Locally set the pool classes and keys so other PoolManagers can 

177 # override them. 

178 self.pool_classes_by_scheme = pool_classes_by_scheme 

179 self.key_fn_by_scheme = key_fn_by_scheme.copy() 

180 

181 def __enter__(self): 

182 return self 

183 

184 def __exit__(self, exc_type, exc_val, exc_tb): 

185 self.clear() 

186 # Return False to re-raise any potential exceptions 

187 return False 

188 

189 def _new_pool(self, scheme, host, port, request_context=None): 

190 """ 

191 Create a new :class:`urllib3.connectionpool.ConnectionPool` based on host, port, scheme, and 

192 any additional pool keyword arguments. 

193 

194 If ``request_context`` is provided, it is provided as keyword arguments 

195 to the pool class used. This method is used to actually create the 

196 connection pools handed out by :meth:`connection_from_url` and 

197 companion methods. It is intended to be overridden for customization. 

198 """ 

199 pool_cls = self.pool_classes_by_scheme[scheme] 

200 if request_context is None: 

201 request_context = self.connection_pool_kw.copy() 

202 

203 # Although the context has everything necessary to create the pool, 

204 # this function has historically only used the scheme, host, and port 

205 # in the positional args. When an API change is acceptable these can 

206 # be removed. 

207 for key in ("scheme", "host", "port"): 

208 request_context.pop(key, None) 

209 

210 if scheme == "http": 

211 for kw in SSL_KEYWORDS: 

212 request_context.pop(kw, None) 

213 

214 return pool_cls(host, port, **request_context) 

215 

216 def clear(self): 

217 """ 

218 Empty our store of pools and direct them all to close. 

219 

220 This will not affect in-flight connections, but they will not be 

221 re-used after completion. 

222 """ 

223 self.pools.clear() 

224 

225 def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): 

226 """ 

227 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the host, port, and scheme. 

228 

229 If ``port`` isn't given, it will be derived from the ``scheme`` using 

230 ``urllib3.connectionpool.port_by_scheme``. If ``pool_kwargs`` is 

231 provided, it is merged with the instance's ``connection_pool_kw`` 

232 variable and used to create the new connection pool, if one is 

233 needed. 

234 """ 

235 

236 if not host: 

237 raise LocationValueError("No host specified.") 

238 

239 request_context = self._merge_pool_kwargs(pool_kwargs) 

240 request_context["scheme"] = scheme or "http" 

241 if not port: 

242 port = port_by_scheme.get(request_context["scheme"].lower(), 80) 

243 request_context["port"] = port 

244 request_context["host"] = host 

245 

246 return self.connection_from_context(request_context) 

247 

248 def connection_from_context(self, request_context): 

249 """ 

250 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the request context. 

251 

252 ``request_context`` must at least contain the ``scheme`` key and its 

253 value must be a key in ``key_fn_by_scheme`` instance variable. 

254 """ 

255 scheme = request_context["scheme"].lower() 

256 pool_key_constructor = self.key_fn_by_scheme.get(scheme) 

257 if not pool_key_constructor: 

258 raise URLSchemeUnknown(scheme) 

259 pool_key = pool_key_constructor(request_context) 

260 

261 return self.connection_from_pool_key(pool_key, request_context=request_context) 

262 

263 def connection_from_pool_key(self, pool_key, request_context=None): 

264 """ 

265 Get a :class:`urllib3.connectionpool.ConnectionPool` based on the provided pool key. 

266 

267 ``pool_key`` should be a namedtuple that only contains immutable 

268 objects. At a minimum it must have the ``scheme``, ``host``, and 

269 ``port`` fields. 

270 """ 

271 with self.pools.lock: 

272 # If the scheme, host, or port doesn't match existing open 

273 # connections, open a new ConnectionPool. 

274 pool = self.pools.get(pool_key) 

275 if pool: 

276 return pool 

277 

278 # Make a fresh ConnectionPool of the desired type 

279 scheme = request_context["scheme"] 

280 host = request_context["host"] 

281 port = request_context["port"] 

282 pool = self._new_pool(scheme, host, port, request_context=request_context) 

283 self.pools[pool_key] = pool 

284 

285 return pool 

286 

287 def connection_from_url(self, url, pool_kwargs=None): 

288 """ 

289 Similar to :func:`urllib3.connectionpool.connection_from_url`. 

290 

291 If ``pool_kwargs`` is not provided and a new pool needs to be 

292 constructed, ``self.connection_pool_kw`` is used to initialize 

293 the :class:`urllib3.connectionpool.ConnectionPool`. If ``pool_kwargs`` 

294 is provided, it is used instead. Note that if a new pool does not 

295 need to be created for the request, the provided ``pool_kwargs`` are 

296 not used. 

297 """ 

298 u = parse_url(url) 

299 return self.connection_from_host( 

300 u.host, port=u.port, scheme=u.scheme, pool_kwargs=pool_kwargs 

301 ) 

302 

303 def _merge_pool_kwargs(self, override): 

304 """ 

305 Merge a dictionary of override values for self.connection_pool_kw. 

306 

307 This does not modify self.connection_pool_kw and returns a new dict. 

308 Any keys in the override dictionary with a value of ``None`` are 

309 removed from the merged dictionary. 

310 """ 

311 base_pool_kwargs = self.connection_pool_kw.copy() 

312 if override: 

313 for key, value in override.items(): 

314 if value is None: 

315 try: 

316 del base_pool_kwargs[key] 

317 except KeyError: 

318 pass 

319 else: 

320 base_pool_kwargs[key] = value 

321 return base_pool_kwargs 

322 

323 def _proxy_requires_url_absolute_form(self, parsed_url): 

324 """ 

325 Indicates if the proxy requires the complete destination URL in the 

326 request. Normally this is only needed when not using an HTTP CONNECT 

327 tunnel. 

328 """ 

329 if self.proxy is None: 

330 return False 

331 

332 return not connection_requires_http_tunnel( 

333 self.proxy, self.proxy_config, parsed_url.scheme 

334 ) 

335 

336 def _validate_proxy_scheme_url_selection(self, url_scheme): 

337 """ 

338 Validates that were not attempting to do TLS in TLS connections on 

339 Python2 or with unsupported SSL implementations. 

340 """ 

341 if self.proxy is None or url_scheme != "https": 

342 return 

343 

344 if self.proxy.scheme != "https": 

345 return 

346 

347 if six.PY2 and not self.proxy_config.use_forwarding_for_https: 

348 raise ProxySchemeUnsupported( 

349 "Contacting HTTPS destinations through HTTPS proxies " 

350 "'via CONNECT tunnels' is not supported in Python 2" 

351 ) 

352 

353 def urlopen(self, method, url, redirect=True, **kw): 

354 """ 

355 Same as :meth:`urllib3.HTTPConnectionPool.urlopen` 

356 with custom cross-host redirect logic and only sends the request-uri 

357 portion of the ``url``. 

358 

359 The given ``url`` parameter must be absolute, such that an appropriate 

360 :class:`urllib3.connectionpool.ConnectionPool` can be chosen for it. 

361 """ 

362 u = parse_url(url) 

363 self._validate_proxy_scheme_url_selection(u.scheme) 

364 

365 conn = self.connection_from_host(u.host, port=u.port, scheme=u.scheme) 

366 

367 kw["assert_same_host"] = False 

368 kw["redirect"] = False 

369 

370 if "headers" not in kw: 

371 kw["headers"] = self.headers.copy() 

372 

373 if self._proxy_requires_url_absolute_form(u): 

374 response = conn.urlopen(method, url, **kw) 

375 else: 

376 response = conn.urlopen(method, u.request_uri, **kw) 

377 

378 redirect_location = redirect and response.get_redirect_location() 

379 if not redirect_location: 

380 return response 

381 

382 # Support relative URLs for redirecting. 

383 redirect_location = urljoin(url, redirect_location) 

384 

385 if response.status == 303: 

386 # Change the method according to RFC 9110, Section 15.4.4. 

387 method = "GET" 

388 # And lose the body not to transfer anything sensitive. 

389 kw["body"] = None 

390 kw["headers"] = HTTPHeaderDict(kw["headers"])._prepare_for_method_change() 

391 

392 retries = kw.get("retries") 

393 if not isinstance(retries, Retry): 

394 retries = Retry.from_int(retries, redirect=redirect) 

395 

396 # Strip headers marked as unsafe to forward to the redirected location. 

397 # Check remove_headers_on_redirect to avoid a potential network call within 

398 # conn.is_same_host() which may use socket.gethostbyname() in the future. 

399 if retries.remove_headers_on_redirect and not conn.is_same_host( 

400 redirect_location 

401 ): 

402 headers = list(six.iterkeys(kw["headers"])) 

403 for header in headers: 

404 if header.lower() in retries.remove_headers_on_redirect: 

405 kw["headers"].pop(header, None) 

406 

407 try: 

408 retries = retries.increment(method, url, response=response, _pool=conn) 

409 except MaxRetryError: 

410 if retries.raise_on_redirect: 

411 response.drain_conn() 

412 raise 

413 return response 

414 

415 kw["retries"] = retries 

416 kw["redirect"] = redirect 

417 

418 log.info("Redirecting %s -> %s", url, redirect_location) 

419 

420 response.drain_conn() 

421 return self.urlopen(method, redirect_location, **kw) 

422 

423 

424class ProxyManager(PoolManager): 

425 """ 

426 Behaves just like :class:`PoolManager`, but sends all requests through 

427 the defined proxy, using the CONNECT method for HTTPS URLs. 

428 

429 :param proxy_url: 

430 The URL of the proxy to be used. 

431 

432 :param proxy_headers: 

433 A dictionary containing headers that will be sent to the proxy. In case 

434 of HTTP they are being sent with each request, while in the 

435 HTTPS/CONNECT case they are sent only once. Could be used for proxy 

436 authentication. 

437 

438 :param proxy_ssl_context: 

439 The proxy SSL context is used to establish the TLS connection to the 

440 proxy when using HTTPS proxies. 

441 

442 :param use_forwarding_for_https: 

443 (Defaults to False) If set to True will forward requests to the HTTPS 

444 proxy to be made on behalf of the client instead of creating a TLS 

445 tunnel via the CONNECT method. **Enabling this flag means that request 

446 and response headers and content will be visible from the HTTPS proxy** 

447 whereas tunneling keeps request and response headers and content 

448 private. IP address, target hostname, SNI, and port are always visible 

449 to an HTTPS proxy even when this flag is disabled. 

450 

451 Example: 

452 >>> proxy = urllib3.ProxyManager('http://localhost:3128/') 

453 >>> r1 = proxy.request('GET', 'http://google.com/') 

454 >>> r2 = proxy.request('GET', 'http://httpbin.org/') 

455 >>> len(proxy.pools) 

456 1 

457 >>> r3 = proxy.request('GET', 'https://httpbin.org/') 

458 >>> r4 = proxy.request('GET', 'https://twitter.com/') 

459 >>> len(proxy.pools) 

460 3 

461 

462 """ 

463 

464 def __init__( 

465 self, 

466 proxy_url, 

467 num_pools=10, 

468 headers=None, 

469 proxy_headers=None, 

470 proxy_ssl_context=None, 

471 use_forwarding_for_https=False, 

472 **connection_pool_kw 

473 ): 

474 

475 if isinstance(proxy_url, HTTPConnectionPool): 

476 proxy_url = "%s://%s:%i" % ( 

477 proxy_url.scheme, 

478 proxy_url.host, 

479 proxy_url.port, 

480 ) 

481 proxy = parse_url(proxy_url) 

482 

483 if proxy.scheme not in ("http", "https"): 

484 raise ProxySchemeUnknown(proxy.scheme) 

485 

486 if not proxy.port: 

487 port = port_by_scheme.get(proxy.scheme, 80) 

488 proxy = proxy._replace(port=port) 

489 

490 self.proxy = proxy 

491 self.proxy_headers = proxy_headers or {} 

492 self.proxy_ssl_context = proxy_ssl_context 

493 self.proxy_config = ProxyConfig(proxy_ssl_context, use_forwarding_for_https) 

494 

495 connection_pool_kw["_proxy"] = self.proxy 

496 connection_pool_kw["_proxy_headers"] = self.proxy_headers 

497 connection_pool_kw["_proxy_config"] = self.proxy_config 

498 

499 super(ProxyManager, self).__init__(num_pools, headers, **connection_pool_kw) 

500 

501 def connection_from_host(self, host, port=None, scheme="http", pool_kwargs=None): 

502 if scheme == "https": 

503 return super(ProxyManager, self).connection_from_host( 

504 host, port, scheme, pool_kwargs=pool_kwargs 

505 ) 

506 

507 return super(ProxyManager, self).connection_from_host( 

508 self.proxy.host, self.proxy.port, self.proxy.scheme, pool_kwargs=pool_kwargs 

509 ) 

510 

511 def _set_proxy_headers(self, url, headers=None): 

512 """ 

513 Sets headers needed by proxies: specifically, the Accept and Host 

514 headers. Only sets headers not provided by the user. 

515 """ 

516 headers_ = {"Accept": "*/*"} 

517 

518 netloc = parse_url(url).netloc 

519 if netloc: 

520 headers_["Host"] = netloc 

521 

522 if headers: 

523 headers_.update(headers) 

524 return headers_ 

525 

526 def urlopen(self, method, url, redirect=True, **kw): 

527 "Same as HTTP(S)ConnectionPool.urlopen, ``url`` must be absolute." 

528 u = parse_url(url) 

529 if not connection_requires_http_tunnel(self.proxy, self.proxy_config, u.scheme): 

530 # For connections using HTTP CONNECT, httplib sets the necessary 

531 # headers on the CONNECT to the proxy. If we're not using CONNECT, 

532 # we'll definitely need to set 'Host' at the very least. 

533 headers = kw.get("headers", self.headers) 

534 kw["headers"] = self._set_proxy_headers(url, headers) 

535 

536 return super(ProxyManager, self).urlopen(method, url, redirect=redirect, **kw) 

537 

538 

539def proxy_from_url(url, **kw): 

540 return ProxyManager(proxy_url=url, **kw)