Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/urllib3/connection.py: 27%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

389 statements  

1from __future__ import annotations 

2 

3import datetime 

4import http.client 

5import logging 

6import os 

7import re 

8import socket 

9import sys 

10import threading 

11import typing 

12import warnings 

13from http.client import HTTPConnection as _HTTPConnection 

14from http.client import HTTPException as HTTPException # noqa: F401 

15from http.client import ResponseNotReady 

16from socket import timeout as SocketTimeout 

17 

18if typing.TYPE_CHECKING: 

19 from .response import HTTPResponse 

20 from .util.ssl_ import _TYPE_PEER_CERT_RET_DICT 

21 from .util.ssltransport import SSLTransport 

22 

23from ._collections import HTTPHeaderDict 

24from .http2 import probe as http2_probe 

25from .util.response import assert_header_parsing 

26from .util.timeout import _DEFAULT_TIMEOUT, _TYPE_TIMEOUT, Timeout 

27from .util.util import to_str 

28from .util.wait import wait_for_read 

29 

30try: # Compiled with SSL? 

31 import ssl 

32 

33 BaseSSLError = ssl.SSLError 

34except (ImportError, AttributeError): 

35 ssl = None # type: ignore[assignment] 

36 

37 class BaseSSLError(BaseException): # type: ignore[no-redef] 

38 pass 

39 

40 

41from ._base_connection import _TYPE_BODY 

42from ._base_connection import ProxyConfig as ProxyConfig 

43from ._base_connection import _ResponseOptions as _ResponseOptions 

44from ._version import __version__ 

45from .exceptions import ( 

46 ConnectTimeoutError, 

47 HeaderParsingError, 

48 NameResolutionError, 

49 NewConnectionError, 

50 ProxyError, 

51 SystemTimeWarning, 

52) 

53from .util import SKIP_HEADER, SKIPPABLE_HEADERS, connection, ssl_ 

54from .util.request import body_to_chunks 

55from .util.ssl_ import assert_fingerprint as _assert_fingerprint 

56from .util.ssl_ import ( 

57 create_urllib3_context, 

58 is_ipaddress, 

59 resolve_cert_reqs, 

60 resolve_ssl_version, 

61 ssl_wrap_socket, 

62) 

63from .util.ssl_match_hostname import CertificateError, match_hostname 

64from .util.url import Url 

65 

66# Not a no-op, we're adding this to the namespace so it can be imported. 

67ConnectionError = ConnectionError 

68BrokenPipeError = BrokenPipeError 

69 

70 

71log = logging.getLogger(__name__) 

72 

73port_by_scheme = {"http": 80, "https": 443} 

74 

75# When it comes time to update this value as a part of regular maintenance 

76# (ie test_recent_date is failing) update it to ~6 months before the current date. 

77RECENT_DATE = datetime.date(2023, 6, 1) 

78 

79_CONTAINS_CONTROL_CHAR_RE = re.compile(r"[^-!#$%&'*+.^_`|~0-9a-zA-Z]") 

80 

81 

82class HTTPConnection(_HTTPConnection): 

83 """ 

84 Based on :class:`http.client.HTTPConnection` but provides an extra constructor 

85 backwards-compatibility layer between older and newer Pythons. 

86 

87 Additional keyword parameters are used to configure attributes of the connection. 

88 Accepted parameters include: 

89 

90 - ``source_address``: Set the source address for the current connection. 

91 - ``socket_options``: Set specific options on the underlying socket. If not specified, then 

92 defaults are loaded from ``HTTPConnection.default_socket_options`` which includes disabling 

93 Nagle's algorithm (sets TCP_NODELAY to 1) unless the connection is behind a proxy. 

94 

95 For example, if you wish to enable TCP Keep Alive in addition to the defaults, 

96 you might pass: 

97 

98 .. code-block:: python 

99 

100 HTTPConnection.default_socket_options + [ 

101 (socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1), 

102 ] 

103 

104 Or you may want to disable the defaults by passing an empty list (e.g., ``[]``). 

105 """ 

106 

107 default_port: typing.ClassVar[int] = port_by_scheme["http"] # type: ignore[misc] 

108 

109 #: Disable Nagle's algorithm by default. 

110 #: ``[(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1)]`` 

111 default_socket_options: typing.ClassVar[connection._TYPE_SOCKET_OPTIONS] = [ 

112 (socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) 

113 ] 

114 

115 #: Whether this connection verifies the host's certificate. 

116 is_verified: bool = False 

117 

118 #: Whether this proxy connection verified the proxy host's certificate. 

119 # If no proxy is currently connected to the value will be ``None``. 

120 proxy_is_verified: bool | None = None 

121 

122 blocksize: int 

123 source_address: tuple[str, int] | None 

124 socket_options: connection._TYPE_SOCKET_OPTIONS | None 

125 

126 _has_connected_to_proxy: bool 

127 _response_options: _ResponseOptions | None 

128 _tunnel_host: str | None 

129 _tunnel_port: int | None 

130 _tunnel_scheme: str | None 

131 

132 def __init__( 

133 self, 

134 host: str, 

135 port: int | None = None, 

136 *, 

137 timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, 

138 source_address: tuple[str, int] | None = None, 

139 blocksize: int = 16384, 

140 socket_options: None | ( 

141 connection._TYPE_SOCKET_OPTIONS 

142 ) = default_socket_options, 

143 proxy: Url | None = None, 

144 proxy_config: ProxyConfig | None = None, 

145 ) -> None: 

146 super().__init__( 

147 host=host, 

148 port=port, 

149 timeout=Timeout.resolve_default_timeout(timeout), 

150 source_address=source_address, 

151 blocksize=blocksize, 

152 ) 

153 self.socket_options = socket_options 

154 self.proxy = proxy 

155 self.proxy_config = proxy_config 

156 

157 self._has_connected_to_proxy = False 

158 self._response_options = None 

159 self._tunnel_host: str | None = None 

160 self._tunnel_port: int | None = None 

161 self._tunnel_scheme: str | None = None 

162 

163 @property 

164 def host(self) -> str: 

165 """ 

166 Getter method to remove any trailing dots that indicate the hostname is an FQDN. 

167 

168 In general, SSL certificates don't include the trailing dot indicating a 

169 fully-qualified domain name, and thus, they don't validate properly when 

170 checked against a domain name that includes the dot. In addition, some 

171 servers may not expect to receive the trailing dot when provided. 

172 

173 However, the hostname with trailing dot is critical to DNS resolution; doing a 

174 lookup with the trailing dot will properly only resolve the appropriate FQDN, 

175 whereas a lookup without a trailing dot will search the system's search domain 

176 list. Thus, it's important to keep the original host around for use only in 

177 those cases where it's appropriate (i.e., when doing DNS lookup to establish the 

178 actual TCP connection across which we're going to send HTTP requests). 

179 """ 

180 return self._dns_host.rstrip(".") 

181 

182 @host.setter 

183 def host(self, value: str) -> None: 

184 """ 

185 Setter for the `host` property. 

186 

187 We assume that only urllib3 uses the _dns_host attribute; httplib itself 

188 only uses `host`, and it seems reasonable that other libraries follow suit. 

189 """ 

190 self._dns_host = value 

191 

192 def _new_conn(self) -> socket.socket: 

193 """Establish a socket connection and set nodelay settings on it. 

194 

195 :return: New socket connection. 

196 """ 

197 try: 

198 sock = connection.create_connection( 

199 (self._dns_host, self.port), 

200 self.timeout, 

201 source_address=self.source_address, 

202 socket_options=self.socket_options, 

203 ) 

204 except socket.gaierror as e: 

205 raise NameResolutionError(self.host, self, e) from e 

206 except SocketTimeout as e: 

207 raise ConnectTimeoutError( 

208 self, 

209 f"Connection to {self.host} timed out. (connect timeout={self.timeout})", 

210 ) from e 

211 

212 except OSError as e: 

213 raise NewConnectionError( 

214 self, f"Failed to establish a new connection: {e}" 

215 ) from e 

216 

217 sys.audit("http.client.connect", self, self.host, self.port) 

218 

219 return sock 

220 

221 def set_tunnel( 

222 self, 

223 host: str, 

224 port: int | None = None, 

225 headers: typing.Mapping[str, str] | None = None, 

226 scheme: str = "http", 

227 ) -> None: 

228 if scheme not in ("http", "https"): 

229 raise ValueError( 

230 f"Invalid proxy scheme for tunneling: {scheme!r}, must be either 'http' or 'https'" 

231 ) 

232 super().set_tunnel(host, port=port, headers=headers) 

233 self._tunnel_scheme = scheme 

234 

235 if sys.version_info < (3, 11, 4): 

236 

237 def _tunnel(self) -> None: 

238 _MAXLINE = http.client._MAXLINE # type: ignore[attr-defined] 

239 connect = b"CONNECT %s:%d HTTP/1.0\r\n" % ( # type: ignore[str-format] 

240 self._tunnel_host.encode("ascii"), # type: ignore[union-attr] 

241 self._tunnel_port, 

242 ) 

243 headers = [connect] 

244 for header, value in self._tunnel_headers.items(): # type: ignore[attr-defined] 

245 headers.append(f"{header}: {value}\r\n".encode("latin-1")) 

246 headers.append(b"\r\n") 

247 # Making a single send() call instead of one per line encourages 

248 # the host OS to use a more optimal packet size instead of 

249 # potentially emitting a series of small packets. 

250 self.send(b"".join(headers)) 

251 del headers 

252 

253 response = self.response_class(self.sock, method=self._method) # type: ignore[attr-defined] 

254 try: 

255 (version, code, message) = response._read_status() # type: ignore[attr-defined] 

256 

257 if code != http.HTTPStatus.OK: 

258 self.close() 

259 raise OSError(f"Tunnel connection failed: {code} {message.strip()}") 

260 while True: 

261 line = response.fp.readline(_MAXLINE + 1) 

262 if len(line) > _MAXLINE: 

263 raise http.client.LineTooLong("header line") 

264 if not line: 

265 # for sites which EOF without sending a trailer 

266 break 

267 if line in (b"\r\n", b"\n", b""): 

268 break 

269 

270 if self.debuglevel > 0: 

271 print("header:", line.decode()) 

272 finally: 

273 response.close() 

274 

275 def connect(self) -> None: 

276 self.sock = self._new_conn() 

277 if self._tunnel_host: 

278 # If we're tunneling it means we're connected to our proxy. 

279 self._has_connected_to_proxy = True 

280 

281 # TODO: Fix tunnel so it doesn't depend on self.sock state. 

282 self._tunnel() 

283 

284 # If there's a proxy to be connected to we are fully connected. 

285 # This is set twice (once above and here) due to forwarding proxies 

286 # not using tunnelling. 

287 self._has_connected_to_proxy = bool(self.proxy) 

288 

289 if self._has_connected_to_proxy: 

290 self.proxy_is_verified = False 

291 

292 @property 

293 def is_closed(self) -> bool: 

294 return self.sock is None 

295 

296 @property 

297 def is_connected(self) -> bool: 

298 if self.sock is None: 

299 return False 

300 return not wait_for_read(self.sock, timeout=0.0) 

301 

302 @property 

303 def has_connected_to_proxy(self) -> bool: 

304 return self._has_connected_to_proxy 

305 

306 @property 

307 def proxy_is_forwarding(self) -> bool: 

308 """ 

309 Return True if a forwarding proxy is configured, else return False 

310 """ 

311 return bool(self.proxy) and self._tunnel_host is None 

312 

313 @property 

314 def proxy_is_tunneling(self) -> bool: 

315 """ 

316 Return True if a tunneling proxy is configured, else return False 

317 """ 

318 return self._tunnel_host is not None 

319 

320 def close(self) -> None: 

321 try: 

322 super().close() 

323 finally: 

324 # Reset all stateful properties so connection 

325 # can be re-used without leaking prior configs. 

326 self.sock = None 

327 self.is_verified = False 

328 self.proxy_is_verified = None 

329 self._has_connected_to_proxy = False 

330 self._response_options = None 

331 self._tunnel_host = None 

332 self._tunnel_port = None 

333 self._tunnel_scheme = None 

334 

335 def putrequest( 

336 self, 

337 method: str, 

338 url: str, 

339 skip_host: bool = False, 

340 skip_accept_encoding: bool = False, 

341 ) -> None: 

342 """""" 

343 # Empty docstring because the indentation of CPython's implementation 

344 # is broken but we don't want this method in our documentation. 

345 match = _CONTAINS_CONTROL_CHAR_RE.search(method) 

346 if match: 

347 raise ValueError( 

348 f"Method cannot contain non-token characters {method!r} (found at least {match.group()!r})" 

349 ) 

350 

351 return super().putrequest( 

352 method, url, skip_host=skip_host, skip_accept_encoding=skip_accept_encoding 

353 ) 

354 

355 def putheader(self, header: str, *values: str) -> None: # type: ignore[override] 

356 """""" 

357 if not any(isinstance(v, str) and v == SKIP_HEADER for v in values): 

358 super().putheader(header, *values) 

359 elif to_str(header.lower()) not in SKIPPABLE_HEADERS: 

360 skippable_headers = "', '".join( 

361 [str.title(header) for header in sorted(SKIPPABLE_HEADERS)] 

362 ) 

363 raise ValueError( 

364 f"urllib3.util.SKIP_HEADER only supports '{skippable_headers}'" 

365 ) 

366 

367 # `request` method's signature intentionally violates LSP. 

368 # urllib3's API is different from `http.client.HTTPConnection` and the subclassing is only incidental. 

369 def request( # type: ignore[override] 

370 self, 

371 method: str, 

372 url: str, 

373 body: _TYPE_BODY | None = None, 

374 headers: typing.Mapping[str, str] | None = None, 

375 *, 

376 chunked: bool = False, 

377 preload_content: bool = True, 

378 decode_content: bool = True, 

379 enforce_content_length: bool = True, 

380 ) -> None: 

381 # Update the inner socket's timeout value to send the request. 

382 # This only triggers if the connection is re-used. 

383 if self.sock is not None: 

384 self.sock.settimeout(self.timeout) 

385 

386 # Store these values to be fed into the HTTPResponse 

387 # object later. TODO: Remove this in favor of a real 

388 # HTTP lifecycle mechanism. 

389 

390 # We have to store these before we call .request() 

391 # because sometimes we can still salvage a response 

392 # off the wire even if we aren't able to completely 

393 # send the request body. 

394 self._response_options = _ResponseOptions( 

395 request_method=method, 

396 request_url=url, 

397 preload_content=preload_content, 

398 decode_content=decode_content, 

399 enforce_content_length=enforce_content_length, 

400 ) 

401 

402 if headers is None: 

403 headers = {} 

404 header_keys = frozenset(to_str(k.lower()) for k in headers) 

405 skip_accept_encoding = "accept-encoding" in header_keys 

406 skip_host = "host" in header_keys 

407 self.putrequest( 

408 method, url, skip_accept_encoding=skip_accept_encoding, skip_host=skip_host 

409 ) 

410 

411 # Transform the body into an iterable of sendall()-able chunks 

412 # and detect if an explicit Content-Length is doable. 

413 chunks_and_cl = body_to_chunks(body, method=method, blocksize=self.blocksize) 

414 chunks = chunks_and_cl.chunks 

415 content_length = chunks_and_cl.content_length 

416 

417 # When chunked is explicit set to 'True' we respect that. 

418 if chunked: 

419 if "transfer-encoding" not in header_keys: 

420 self.putheader("Transfer-Encoding", "chunked") 

421 else: 

422 # Detect whether a framing mechanism is already in use. If so 

423 # we respect that value, otherwise we pick chunked vs content-length 

424 # depending on the type of 'body'. 

425 if "content-length" in header_keys: 

426 chunked = False 

427 elif "transfer-encoding" in header_keys: 

428 chunked = True 

429 

430 # Otherwise we go off the recommendation of 'body_to_chunks()'. 

431 else: 

432 chunked = False 

433 if content_length is None: 

434 if chunks is not None: 

435 chunked = True 

436 self.putheader("Transfer-Encoding", "chunked") 

437 else: 

438 self.putheader("Content-Length", str(content_length)) 

439 

440 # Now that framing headers are out of the way we send all the other headers. 

441 if "user-agent" not in header_keys: 

442 self.putheader("User-Agent", _get_default_user_agent()) 

443 for header, value in headers.items(): 

444 self.putheader(header, value) 

445 self.endheaders() 

446 

447 # If we're given a body we start sending that in chunks. 

448 if chunks is not None: 

449 for chunk in chunks: 

450 # Sending empty chunks isn't allowed for TE: chunked 

451 # as it indicates the end of the body. 

452 if not chunk: 

453 continue 

454 if isinstance(chunk, str): 

455 chunk = chunk.encode("utf-8") 

456 if chunked: 

457 self.send(b"%x\r\n%b\r\n" % (len(chunk), chunk)) 

458 else: 

459 self.send(chunk) 

460 

461 # Regardless of whether we have a body or not, if we're in 

462 # chunked mode we want to send an explicit empty chunk. 

463 if chunked: 

464 self.send(b"0\r\n\r\n") 

465 

466 def request_chunked( 

467 self, 

468 method: str, 

469 url: str, 

470 body: _TYPE_BODY | None = None, 

471 headers: typing.Mapping[str, str] | None = None, 

472 ) -> None: 

473 """ 

474 Alternative to the common request method, which sends the 

475 body with chunked encoding and not as one block 

476 """ 

477 warnings.warn( 

478 "HTTPConnection.request_chunked() is deprecated and will be removed " 

479 "in urllib3 v2.1.0. Instead use HTTPConnection.request(..., chunked=True).", 

480 category=DeprecationWarning, 

481 stacklevel=2, 

482 ) 

483 self.request(method, url, body=body, headers=headers, chunked=True) 

484 

485 def getresponse( # type: ignore[override] 

486 self, 

487 ) -> HTTPResponse: 

488 """ 

489 Get the response from the server. 

490 

491 If the HTTPConnection is in the correct state, returns an instance of HTTPResponse or of whatever object is returned by the response_class variable. 

492 

493 If a request has not been sent or if a previous response has not be handled, ResponseNotReady is raised. If the HTTP response indicates that the connection should be closed, then it will be closed before the response is returned. When the connection is closed, the underlying socket is closed. 

494 """ 

495 # Raise the same error as http.client.HTTPConnection 

496 if self._response_options is None: 

497 raise ResponseNotReady() 

498 

499 # Reset this attribute for being used again. 

500 resp_options = self._response_options 

501 self._response_options = None 

502 

503 # Since the connection's timeout value may have been updated 

504 # we need to set the timeout on the socket. 

505 self.sock.settimeout(self.timeout) 

506 

507 # This is needed here to avoid circular import errors 

508 from .response import HTTPResponse 

509 

510 # Save a reference to the shutdown function before ownership is passed 

511 # to httplib_response 

512 # TODO should we implement it everywhere? 

513 _shutdown = getattr(self.sock, "shutdown", None) 

514 

515 # Get the response from http.client.HTTPConnection 

516 httplib_response = super().getresponse() 

517 

518 try: 

519 assert_header_parsing(httplib_response.msg) 

520 except (HeaderParsingError, TypeError) as hpe: 

521 log.warning( 

522 "Failed to parse headers (url=%s): %s", 

523 _url_from_connection(self, resp_options.request_url), 

524 hpe, 

525 exc_info=True, 

526 ) 

527 

528 headers = HTTPHeaderDict(httplib_response.msg.items()) 

529 

530 response = HTTPResponse( 

531 body=httplib_response, 

532 headers=headers, 

533 status=httplib_response.status, 

534 version=httplib_response.version, 

535 version_string=getattr(self, "_http_vsn_str", "HTTP/?"), 

536 reason=httplib_response.reason, 

537 preload_content=resp_options.preload_content, 

538 decode_content=resp_options.decode_content, 

539 original_response=httplib_response, 

540 enforce_content_length=resp_options.enforce_content_length, 

541 request_method=resp_options.request_method, 

542 request_url=resp_options.request_url, 

543 sock_shutdown=_shutdown, 

544 ) 

545 return response 

546 

547 

548class HTTPSConnection(HTTPConnection): 

549 """ 

550 Many of the parameters to this constructor are passed to the underlying SSL 

551 socket by means of :py:func:`urllib3.util.ssl_wrap_socket`. 

552 """ 

553 

554 default_port = port_by_scheme["https"] # type: ignore[misc] 

555 

556 cert_reqs: int | str | None = None 

557 ca_certs: str | None = None 

558 ca_cert_dir: str | None = None 

559 ca_cert_data: None | str | bytes = None 

560 ssl_version: int | str | None = None 

561 ssl_minimum_version: int | None = None 

562 ssl_maximum_version: int | None = None 

563 assert_fingerprint: str | None = None 

564 _connect_callback: typing.Callable[..., None] | None = None 

565 

566 def __init__( 

567 self, 

568 host: str, 

569 port: int | None = None, 

570 *, 

571 timeout: _TYPE_TIMEOUT = _DEFAULT_TIMEOUT, 

572 source_address: tuple[str, int] | None = None, 

573 blocksize: int = 16384, 

574 socket_options: None | ( 

575 connection._TYPE_SOCKET_OPTIONS 

576 ) = HTTPConnection.default_socket_options, 

577 proxy: Url | None = None, 

578 proxy_config: ProxyConfig | None = None, 

579 cert_reqs: int | str | None = None, 

580 assert_hostname: None | str | typing.Literal[False] = None, 

581 assert_fingerprint: str | None = None, 

582 server_hostname: str | None = None, 

583 ssl_context: ssl.SSLContext | None = None, 

584 ca_certs: str | None = None, 

585 ca_cert_dir: str | None = None, 

586 ca_cert_data: None | str | bytes = None, 

587 ssl_minimum_version: int | None = None, 

588 ssl_maximum_version: int | None = None, 

589 ssl_version: int | str | None = None, # Deprecated 

590 cert_file: str | None = None, 

591 key_file: str | None = None, 

592 key_password: str | None = None, 

593 ) -> None: 

594 super().__init__( 

595 host, 

596 port=port, 

597 timeout=timeout, 

598 source_address=source_address, 

599 blocksize=blocksize, 

600 socket_options=socket_options, 

601 proxy=proxy, 

602 proxy_config=proxy_config, 

603 ) 

604 

605 self.key_file = key_file 

606 self.cert_file = cert_file 

607 self.key_password = key_password 

608 self.ssl_context = ssl_context 

609 self.server_hostname = server_hostname 

610 self.assert_hostname = assert_hostname 

611 self.assert_fingerprint = assert_fingerprint 

612 self.ssl_version = ssl_version 

613 self.ssl_minimum_version = ssl_minimum_version 

614 self.ssl_maximum_version = ssl_maximum_version 

615 self.ca_certs = ca_certs and os.path.expanduser(ca_certs) 

616 self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir) 

617 self.ca_cert_data = ca_cert_data 

618 

619 # cert_reqs depends on ssl_context so calculate last. 

620 if cert_reqs is None: 

621 if self.ssl_context is not None: 

622 cert_reqs = self.ssl_context.verify_mode 

623 else: 

624 cert_reqs = resolve_cert_reqs(None) 

625 self.cert_reqs = cert_reqs 

626 self._connect_callback = None 

627 

628 def set_cert( 

629 self, 

630 key_file: str | None = None, 

631 cert_file: str | None = None, 

632 cert_reqs: int | str | None = None, 

633 key_password: str | None = None, 

634 ca_certs: str | None = None, 

635 assert_hostname: None | str | typing.Literal[False] = None, 

636 assert_fingerprint: str | None = None, 

637 ca_cert_dir: str | None = None, 

638 ca_cert_data: None | str | bytes = None, 

639 ) -> None: 

640 """ 

641 This method should only be called once, before the connection is used. 

642 """ 

643 warnings.warn( 

644 "HTTPSConnection.set_cert() is deprecated and will be removed " 

645 "in urllib3 v2.1.0. Instead provide the parameters to the " 

646 "HTTPSConnection constructor.", 

647 category=DeprecationWarning, 

648 stacklevel=2, 

649 ) 

650 

651 # If cert_reqs is not provided we'll assume CERT_REQUIRED unless we also 

652 # have an SSLContext object in which case we'll use its verify_mode. 

653 if cert_reqs is None: 

654 if self.ssl_context is not None: 

655 cert_reqs = self.ssl_context.verify_mode 

656 else: 

657 cert_reqs = resolve_cert_reqs(None) 

658 

659 self.key_file = key_file 

660 self.cert_file = cert_file 

661 self.cert_reqs = cert_reqs 

662 self.key_password = key_password 

663 self.assert_hostname = assert_hostname 

664 self.assert_fingerprint = assert_fingerprint 

665 self.ca_certs = ca_certs and os.path.expanduser(ca_certs) 

666 self.ca_cert_dir = ca_cert_dir and os.path.expanduser(ca_cert_dir) 

667 self.ca_cert_data = ca_cert_data 

668 

669 def connect(self) -> None: 

670 # Today we don't need to be doing this step before the /actual/ socket 

671 # connection, however in the future we'll need to decide whether to 

672 # create a new socket or re-use an existing "shared" socket as a part 

673 # of the HTTP/2 handshake dance. 

674 if self._tunnel_host is not None and self._tunnel_port is not None: 

675 probe_http2_host = self._tunnel_host 

676 probe_http2_port = self._tunnel_port 

677 else: 

678 probe_http2_host = self.host 

679 probe_http2_port = self.port 

680 

681 # Check if the target origin supports HTTP/2. 

682 # If the value comes back as 'None' it means that the current thread 

683 # is probing for HTTP/2 support. Otherwise, we're waiting for another 

684 # probe to complete, or we get a value right away. 

685 target_supports_http2: bool | None 

686 if "h2" in ssl_.ALPN_PROTOCOLS: 

687 target_supports_http2 = http2_probe.acquire_and_get( 

688 host=probe_http2_host, port=probe_http2_port 

689 ) 

690 else: 

691 # If HTTP/2 isn't going to be offered it doesn't matter if 

692 # the target supports HTTP/2. Don't want to make a probe. 

693 target_supports_http2 = False 

694 

695 if self._connect_callback is not None: 

696 self._connect_callback( 

697 "before connect", 

698 thread_id=threading.get_ident(), 

699 target_supports_http2=target_supports_http2, 

700 ) 

701 

702 try: 

703 sock: socket.socket | ssl.SSLSocket 

704 self.sock = sock = self._new_conn() 

705 server_hostname: str = self.host 

706 tls_in_tls = False 

707 

708 # Do we need to establish a tunnel? 

709 if self.proxy_is_tunneling: 

710 # We're tunneling to an HTTPS origin so need to do TLS-in-TLS. 

711 if self._tunnel_scheme == "https": 

712 # _connect_tls_proxy will verify and assign proxy_is_verified 

713 self.sock = sock = self._connect_tls_proxy(self.host, sock) 

714 tls_in_tls = True 

715 elif self._tunnel_scheme == "http": 

716 self.proxy_is_verified = False 

717 

718 # If we're tunneling it means we're connected to our proxy. 

719 self._has_connected_to_proxy = True 

720 

721 self._tunnel() 

722 # Override the host with the one we're requesting data from. 

723 server_hostname = typing.cast(str, self._tunnel_host) 

724 

725 if self.server_hostname is not None: 

726 server_hostname = self.server_hostname 

727 

728 is_time_off = datetime.date.today() < RECENT_DATE 

729 if is_time_off: 

730 warnings.warn( 

731 ( 

732 f"System time is way off (before {RECENT_DATE}). This will probably " 

733 "lead to SSL verification errors" 

734 ), 

735 SystemTimeWarning, 

736 ) 

737 

738 # Remove trailing '.' from fqdn hostnames to allow certificate validation 

739 server_hostname_rm_dot = server_hostname.rstrip(".") 

740 

741 sock_and_verified = _ssl_wrap_socket_and_match_hostname( 

742 sock=sock, 

743 cert_reqs=self.cert_reqs, 

744 ssl_version=self.ssl_version, 

745 ssl_minimum_version=self.ssl_minimum_version, 

746 ssl_maximum_version=self.ssl_maximum_version, 

747 ca_certs=self.ca_certs, 

748 ca_cert_dir=self.ca_cert_dir, 

749 ca_cert_data=self.ca_cert_data, 

750 cert_file=self.cert_file, 

751 key_file=self.key_file, 

752 key_password=self.key_password, 

753 server_hostname=server_hostname_rm_dot, 

754 ssl_context=self.ssl_context, 

755 tls_in_tls=tls_in_tls, 

756 assert_hostname=self.assert_hostname, 

757 assert_fingerprint=self.assert_fingerprint, 

758 ) 

759 self.sock = sock_and_verified.socket 

760 

761 # If an error occurs during connection/handshake we may need to release 

762 # our lock so another connection can probe the origin. 

763 except BaseException: 

764 if self._connect_callback is not None: 

765 self._connect_callback( 

766 "after connect failure", 

767 thread_id=threading.get_ident(), 

768 target_supports_http2=target_supports_http2, 

769 ) 

770 

771 if target_supports_http2 is None: 

772 http2_probe.set_and_release( 

773 host=probe_http2_host, port=probe_http2_port, supports_http2=None 

774 ) 

775 raise 

776 

777 # If this connection doesn't know if the origin supports HTTP/2 

778 # we report back to the HTTP/2 probe our result. 

779 if target_supports_http2 is None: 

780 supports_http2 = sock_and_verified.socket.selected_alpn_protocol() == "h2" 

781 http2_probe.set_and_release( 

782 host=probe_http2_host, 

783 port=probe_http2_port, 

784 supports_http2=supports_http2, 

785 ) 

786 

787 # Forwarding proxies can never have a verified target since 

788 # the proxy is the one doing the verification. Should instead 

789 # use a CONNECT tunnel in order to verify the target. 

790 # See: https://github.com/urllib3/urllib3/issues/3267. 

791 if self.proxy_is_forwarding: 

792 self.is_verified = False 

793 else: 

794 self.is_verified = sock_and_verified.is_verified 

795 

796 # If there's a proxy to be connected to we are fully connected. 

797 # This is set twice (once above and here) due to forwarding proxies 

798 # not using tunnelling. 

799 self._has_connected_to_proxy = bool(self.proxy) 

800 

801 # Set `self.proxy_is_verified` unless it's already set while 

802 # establishing a tunnel. 

803 if self._has_connected_to_proxy and self.proxy_is_verified is None: 

804 self.proxy_is_verified = sock_and_verified.is_verified 

805 

806 def _connect_tls_proxy(self, hostname: str, sock: socket.socket) -> ssl.SSLSocket: 

807 """ 

808 Establish a TLS connection to the proxy using the provided SSL context. 

809 """ 

810 # `_connect_tls_proxy` is called when self._tunnel_host is truthy. 

811 proxy_config = typing.cast(ProxyConfig, self.proxy_config) 

812 ssl_context = proxy_config.ssl_context 

813 sock_and_verified = _ssl_wrap_socket_and_match_hostname( 

814 sock, 

815 cert_reqs=self.cert_reqs, 

816 ssl_version=self.ssl_version, 

817 ssl_minimum_version=self.ssl_minimum_version, 

818 ssl_maximum_version=self.ssl_maximum_version, 

819 ca_certs=self.ca_certs, 

820 ca_cert_dir=self.ca_cert_dir, 

821 ca_cert_data=self.ca_cert_data, 

822 server_hostname=hostname, 

823 ssl_context=ssl_context, 

824 assert_hostname=proxy_config.assert_hostname, 

825 assert_fingerprint=proxy_config.assert_fingerprint, 

826 # Features that aren't implemented for proxies yet: 

827 cert_file=None, 

828 key_file=None, 

829 key_password=None, 

830 tls_in_tls=False, 

831 ) 

832 self.proxy_is_verified = sock_and_verified.is_verified 

833 return sock_and_verified.socket # type: ignore[return-value] 

834 

835 

836class _WrappedAndVerifiedSocket(typing.NamedTuple): 

837 """ 

838 Wrapped socket and whether the connection is 

839 verified after the TLS handshake 

840 """ 

841 

842 socket: ssl.SSLSocket | SSLTransport 

843 is_verified: bool 

844 

845 

846def _ssl_wrap_socket_and_match_hostname( 

847 sock: socket.socket, 

848 *, 

849 cert_reqs: None | str | int, 

850 ssl_version: None | str | int, 

851 ssl_minimum_version: int | None, 

852 ssl_maximum_version: int | None, 

853 cert_file: str | None, 

854 key_file: str | None, 

855 key_password: str | None, 

856 ca_certs: str | None, 

857 ca_cert_dir: str | None, 

858 ca_cert_data: None | str | bytes, 

859 assert_hostname: None | str | typing.Literal[False], 

860 assert_fingerprint: str | None, 

861 server_hostname: str | None, 

862 ssl_context: ssl.SSLContext | None, 

863 tls_in_tls: bool = False, 

864) -> _WrappedAndVerifiedSocket: 

865 """Logic for constructing an SSLContext from all TLS parameters, passing 

866 that down into ssl_wrap_socket, and then doing certificate verification 

867 either via hostname or fingerprint. This function exists to guarantee 

868 that both proxies and targets have the same behavior when connecting via TLS. 

869 """ 

870 default_ssl_context = False 

871 if ssl_context is None: 

872 default_ssl_context = True 

873 context = create_urllib3_context( 

874 ssl_version=resolve_ssl_version(ssl_version), 

875 ssl_minimum_version=ssl_minimum_version, 

876 ssl_maximum_version=ssl_maximum_version, 

877 cert_reqs=resolve_cert_reqs(cert_reqs), 

878 ) 

879 else: 

880 context = ssl_context 

881 

882 context.verify_mode = resolve_cert_reqs(cert_reqs) 

883 

884 # In some cases, we want to verify hostnames ourselves 

885 if ( 

886 # `ssl` can't verify fingerprints or alternate hostnames 

887 assert_fingerprint 

888 or assert_hostname 

889 # assert_hostname can be set to False to disable hostname checking 

890 or assert_hostname is False 

891 # We still support OpenSSL 1.0.2, which prevents us from verifying 

892 # hostnames easily: https://github.com/pyca/pyopenssl/pull/933 

893 or ssl_.IS_PYOPENSSL 

894 or not ssl_.HAS_NEVER_CHECK_COMMON_NAME 

895 ): 

896 context.check_hostname = False 

897 

898 # Try to load OS default certs if none are given. We need to do the hasattr() check 

899 # for custom pyOpenSSL SSLContext objects because they don't support 

900 # load_default_certs(). 

901 if ( 

902 not ca_certs 

903 and not ca_cert_dir 

904 and not ca_cert_data 

905 and default_ssl_context 

906 and hasattr(context, "load_default_certs") 

907 ): 

908 context.load_default_certs() 

909 

910 # Ensure that IPv6 addresses are in the proper format and don't have a 

911 # scope ID. Python's SSL module fails to recognize scoped IPv6 addresses 

912 # and interprets them as DNS hostnames. 

913 if server_hostname is not None: 

914 normalized = server_hostname.strip("[]") 

915 if "%" in normalized: 

916 normalized = normalized[: normalized.rfind("%")] 

917 if is_ipaddress(normalized): 

918 server_hostname = normalized 

919 

920 ssl_sock = ssl_wrap_socket( 

921 sock=sock, 

922 keyfile=key_file, 

923 certfile=cert_file, 

924 key_password=key_password, 

925 ca_certs=ca_certs, 

926 ca_cert_dir=ca_cert_dir, 

927 ca_cert_data=ca_cert_data, 

928 server_hostname=server_hostname, 

929 ssl_context=context, 

930 tls_in_tls=tls_in_tls, 

931 ) 

932 

933 try: 

934 if assert_fingerprint: 

935 _assert_fingerprint( 

936 ssl_sock.getpeercert(binary_form=True), assert_fingerprint 

937 ) 

938 elif ( 

939 context.verify_mode != ssl.CERT_NONE 

940 and not context.check_hostname 

941 and assert_hostname is not False 

942 ): 

943 cert: _TYPE_PEER_CERT_RET_DICT = ssl_sock.getpeercert() # type: ignore[assignment] 

944 

945 # Need to signal to our match_hostname whether to use 'commonName' or not. 

946 # If we're using our own constructed SSLContext we explicitly set 'False' 

947 # because PyPy hard-codes 'True' from SSLContext.hostname_checks_common_name. 

948 if default_ssl_context: 

949 hostname_checks_common_name = False 

950 else: 

951 hostname_checks_common_name = ( 

952 getattr(context, "hostname_checks_common_name", False) or False 

953 ) 

954 

955 _match_hostname( 

956 cert, 

957 assert_hostname or server_hostname, # type: ignore[arg-type] 

958 hostname_checks_common_name, 

959 ) 

960 

961 return _WrappedAndVerifiedSocket( 

962 socket=ssl_sock, 

963 is_verified=context.verify_mode == ssl.CERT_REQUIRED 

964 or bool(assert_fingerprint), 

965 ) 

966 except BaseException: 

967 ssl_sock.close() 

968 raise 

969 

970 

971def _match_hostname( 

972 cert: _TYPE_PEER_CERT_RET_DICT | None, 

973 asserted_hostname: str, 

974 hostname_checks_common_name: bool = False, 

975) -> None: 

976 # Our upstream implementation of ssl.match_hostname() 

977 # only applies this normalization to IP addresses so it doesn't 

978 # match DNS SANs so we do the same thing! 

979 stripped_hostname = asserted_hostname.strip("[]") 

980 if is_ipaddress(stripped_hostname): 

981 asserted_hostname = stripped_hostname 

982 

983 try: 

984 match_hostname(cert, asserted_hostname, hostname_checks_common_name) 

985 except CertificateError as e: 

986 log.warning( 

987 "Certificate did not match expected hostname: %s. Certificate: %s", 

988 asserted_hostname, 

989 cert, 

990 ) 

991 # Add cert to exception and reraise so client code can inspect 

992 # the cert when catching the exception, if they want to 

993 e._peer_cert = cert # type: ignore[attr-defined] 

994 raise 

995 

996 

997def _wrap_proxy_error(err: Exception, proxy_scheme: str | None) -> ProxyError: 

998 # Look for the phrase 'wrong version number', if found 

999 # then we should warn the user that we're very sure that 

1000 # this proxy is HTTP-only and they have a configuration issue. 

1001 error_normalized = " ".join(re.split("[^a-z]", str(err).lower())) 

1002 is_likely_http_proxy = ( 

1003 "wrong version number" in error_normalized 

1004 or "unknown protocol" in error_normalized 

1005 or "record layer failure" in error_normalized 

1006 ) 

1007 http_proxy_warning = ( 

1008 ". Your proxy appears to only use HTTP and not HTTPS, " 

1009 "try changing your proxy URL to be HTTP. See: " 

1010 "https://urllib3.readthedocs.io/en/latest/advanced-usage.html" 

1011 "#https-proxy-error-http-proxy" 

1012 ) 

1013 new_err = ProxyError( 

1014 f"Unable to connect to proxy" 

1015 f"{http_proxy_warning if is_likely_http_proxy and proxy_scheme == 'https' else ''}", 

1016 err, 

1017 ) 

1018 new_err.__cause__ = err 

1019 return new_err 

1020 

1021 

1022def _get_default_user_agent() -> str: 

1023 return f"python-urllib3/{__version__}" 

1024 

1025 

1026class DummyConnection: 

1027 """Used to detect a failed ConnectionCls import.""" 

1028 

1029 

1030if not ssl: 

1031 HTTPSConnection = DummyConnection # type: ignore[misc, assignment] # noqa: F811 

1032 

1033 

1034VerifiedHTTPSConnection = HTTPSConnection 

1035 

1036 

1037def _url_from_connection( 

1038 conn: HTTPConnection | HTTPSConnection, path: str | None = None 

1039) -> str: 

1040 """Returns the URL from a given connection. This is mainly used for testing and logging.""" 

1041 

1042 scheme = "https" if isinstance(conn, HTTPSConnection) else "http" 

1043 

1044 return Url(scheme=scheme, host=conn.host, port=conn.port, path=path).url