Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/urllib3/response.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

655 statements  

1from __future__ import annotations 

2 

3import collections 

4import io 

5import json as _json 

6import logging 

7import re 

8import socket 

9import sys 

10import typing 

11import warnings 

12import zlib 

13from contextlib import contextmanager 

14from http.client import HTTPMessage as _HttplibHTTPMessage 

15from http.client import HTTPResponse as _HttplibHTTPResponse 

16from socket import timeout as SocketTimeout 

17 

18if typing.TYPE_CHECKING: 

19 from ._base_connection import BaseHTTPConnection 

20 

21try: 

22 try: 

23 import brotlicffi as brotli # type: ignore[import-not-found] 

24 except ImportError: 

25 import brotli # type: ignore[import-not-found] 

26except ImportError: 

27 brotli = None 

28 

29from . import util 

30from ._base_connection import _TYPE_BODY 

31from ._collections import HTTPHeaderDict 

32from .connection import BaseSSLError, HTTPConnection, HTTPException 

33from .exceptions import ( 

34 BodyNotHttplibCompatible, 

35 DecodeError, 

36 HTTPError, 

37 IncompleteRead, 

38 InvalidChunkLength, 

39 InvalidHeader, 

40 ProtocolError, 

41 ReadTimeoutError, 

42 ResponseNotChunked, 

43 SSLError, 

44) 

45from .util.response import is_fp_closed, is_response_to_head 

46from .util.retry import Retry 

47 

48if typing.TYPE_CHECKING: 

49 from .connectionpool import HTTPConnectionPool 

50 

51log = logging.getLogger(__name__) 

52 

53 

54class ContentDecoder: 

55 def decompress(self, data: bytes) -> bytes: 

56 raise NotImplementedError() 

57 

58 def flush(self) -> bytes: 

59 raise NotImplementedError() 

60 

61 

62class DeflateDecoder(ContentDecoder): 

63 def __init__(self) -> None: 

64 self._first_try = True 

65 self._data = b"" 

66 self._obj = zlib.decompressobj() 

67 

68 def decompress(self, data: bytes) -> bytes: 

69 if not data: 

70 return data 

71 

72 if not self._first_try: 

73 return self._obj.decompress(data) 

74 

75 self._data += data 

76 try: 

77 decompressed = self._obj.decompress(data) 

78 if decompressed: 

79 self._first_try = False 

80 self._data = None # type: ignore[assignment] 

81 return decompressed 

82 except zlib.error: 

83 self._first_try = False 

84 self._obj = zlib.decompressobj(-zlib.MAX_WBITS) 

85 try: 

86 return self.decompress(self._data) 

87 finally: 

88 self._data = None # type: ignore[assignment] 

89 

90 def flush(self) -> bytes: 

91 return self._obj.flush() 

92 

93 

94class GzipDecoderState: 

95 FIRST_MEMBER = 0 

96 OTHER_MEMBERS = 1 

97 SWALLOW_DATA = 2 

98 

99 

100class GzipDecoder(ContentDecoder): 

101 def __init__(self) -> None: 

102 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

103 self._state = GzipDecoderState.FIRST_MEMBER 

104 

105 def decompress(self, data: bytes) -> bytes: 

106 ret = bytearray() 

107 if self._state == GzipDecoderState.SWALLOW_DATA or not data: 

108 return bytes(ret) 

109 while True: 

110 try: 

111 ret += self._obj.decompress(data) 

112 except zlib.error: 

113 previous_state = self._state 

114 # Ignore data after the first error 

115 self._state = GzipDecoderState.SWALLOW_DATA 

116 if previous_state == GzipDecoderState.OTHER_MEMBERS: 

117 # Allow trailing garbage acceptable in other gzip clients 

118 return bytes(ret) 

119 raise 

120 data = self._obj.unused_data 

121 if not data: 

122 return bytes(ret) 

123 self._state = GzipDecoderState.OTHER_MEMBERS 

124 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

125 

126 def flush(self) -> bytes: 

127 return self._obj.flush() 

128 

129 

130if brotli is not None: 

131 

132 class BrotliDecoder(ContentDecoder): 

133 # Supports both 'brotlipy' and 'Brotli' packages 

134 # since they share an import name. The top branches 

135 # are for 'brotlipy' and bottom branches for 'Brotli' 

136 def __init__(self) -> None: 

137 self._obj = brotli.Decompressor() 

138 if hasattr(self._obj, "decompress"): 

139 setattr(self, "decompress", self._obj.decompress) 

140 else: 

141 setattr(self, "decompress", self._obj.process) 

142 

143 def flush(self) -> bytes: 

144 if hasattr(self._obj, "flush"): 

145 return self._obj.flush() # type: ignore[no-any-return] 

146 return b"" 

147 

148 

149try: 

150 # Python 3.14+ 

151 from compression import zstd # type: ignore[import-not-found] # noqa: F401 

152 

153 HAS_ZSTD = True 

154 

155 class ZstdDecoder(ContentDecoder): 

156 def __init__(self) -> None: 

157 self._obj = zstd.ZstdDecompressor() 

158 

159 def decompress(self, data: bytes) -> bytes: 

160 if not data: 

161 return b"" 

162 data_parts = [self._obj.decompress(data)] 

163 while self._obj.eof and self._obj.unused_data: 

164 unused_data = self._obj.unused_data 

165 self._obj = zstd.ZstdDecompressor() 

166 data_parts.append(self._obj.decompress(unused_data)) 

167 return b"".join(data_parts) 

168 

169 def flush(self) -> bytes: 

170 if not self._obj.eof: 

171 raise DecodeError("Zstandard data is incomplete") 

172 return b"" 

173 

174except ImportError: 

175 try: 

176 # Python 3.13 and earlier require the 'zstandard' module. 

177 import zstandard as zstd 

178 

179 # The package 'zstandard' added the 'eof' property starting 

180 # in v0.18.0 which we require to ensure a complete and 

181 # valid zstd stream was fed into the ZstdDecoder. 

182 # See: https://github.com/urllib3/urllib3/pull/2624 

183 _zstd_version = tuple( 

184 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr] 

185 ) 

186 if _zstd_version < (0, 18): # Defensive: 

187 raise ImportError("zstandard module doesn't have eof") 

188 except (AttributeError, ImportError, ValueError): # Defensive: 

189 HAS_ZSTD = False 

190 else: 

191 HAS_ZSTD = True 

192 

193 class ZstdDecoder(ContentDecoder): # type: ignore[no-redef] 

194 def __init__(self) -> None: 

195 self._obj = zstd.ZstdDecompressor().decompressobj() 

196 

197 def decompress(self, data: bytes) -> bytes: 

198 if not data: 

199 return b"" 

200 data_parts = [self._obj.decompress(data)] 

201 while self._obj.eof and self._obj.unused_data: 

202 unused_data = self._obj.unused_data 

203 self._obj = zstd.ZstdDecompressor().decompressobj() 

204 data_parts.append(self._obj.decompress(unused_data)) 

205 return b"".join(data_parts) 

206 

207 def flush(self) -> bytes: 

208 ret = self._obj.flush() # note: this is a no-op 

209 if not self._obj.eof: 

210 raise DecodeError("Zstandard data is incomplete") 

211 return ret # type: ignore[no-any-return] 

212 

213 

214class MultiDecoder(ContentDecoder): 

215 """ 

216 From RFC7231: 

217 If one or more encodings have been applied to a representation, the 

218 sender that applied the encodings MUST generate a Content-Encoding 

219 header field that lists the content codings in the order in which 

220 they were applied. 

221 """ 

222 

223 def __init__(self, modes: str) -> None: 

224 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")] 

225 

226 def flush(self) -> bytes: 

227 return self._decoders[0].flush() 

228 

229 def decompress(self, data: bytes) -> bytes: 

230 for d in reversed(self._decoders): 

231 data = d.decompress(data) 

232 return data 

233 

234 

235def _get_decoder(mode: str) -> ContentDecoder: 

236 if "," in mode: 

237 return MultiDecoder(mode) 

238 

239 # According to RFC 9110 section 8.4.1.3, recipients should 

240 # consider x-gzip equivalent to gzip 

241 if mode in ("gzip", "x-gzip"): 

242 return GzipDecoder() 

243 

244 if brotli is not None and mode == "br": 

245 return BrotliDecoder() 

246 

247 if HAS_ZSTD and mode == "zstd": 

248 return ZstdDecoder() 

249 

250 return DeflateDecoder() 

251 

252 

253class BytesQueueBuffer: 

254 """Memory-efficient bytes buffer 

255 

256 To return decoded data in read() and still follow the BufferedIOBase API, we need a 

257 buffer to always return the correct amount of bytes. 

258 

259 This buffer should be filled using calls to put() 

260 

261 Our maximum memory usage is determined by the sum of the size of: 

262 

263 * self.buffer, which contains the full data 

264 * the largest chunk that we will copy in get() 

265 

266 The worst case scenario is a single chunk, in which case we'll make a full copy of 

267 the data inside get(). 

268 """ 

269 

270 def __init__(self) -> None: 

271 self.buffer: typing.Deque[bytes] = collections.deque() 

272 self._size: int = 0 

273 

274 def __len__(self) -> int: 

275 return self._size 

276 

277 def put(self, data: bytes) -> None: 

278 self.buffer.append(data) 

279 self._size += len(data) 

280 

281 def get(self, n: int) -> bytes: 

282 if n == 0: 

283 return b"" 

284 elif not self.buffer: 

285 raise RuntimeError("buffer is empty") 

286 elif n < 0: 

287 raise ValueError("n should be > 0") 

288 

289 fetched = 0 

290 ret = io.BytesIO() 

291 while fetched < n: 

292 remaining = n - fetched 

293 chunk = self.buffer.popleft() 

294 chunk_length = len(chunk) 

295 if remaining < chunk_length: 

296 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:] 

297 ret.write(left_chunk) 

298 self.buffer.appendleft(right_chunk) 

299 self._size -= remaining 

300 break 

301 else: 

302 ret.write(chunk) 

303 self._size -= chunk_length 

304 fetched += chunk_length 

305 

306 if not self.buffer: 

307 break 

308 

309 return ret.getvalue() 

310 

311 def get_all(self) -> bytes: 

312 buffer = self.buffer 

313 if not buffer: 

314 assert self._size == 0 

315 return b"" 

316 if len(buffer) == 1: 

317 result = buffer.pop() 

318 else: 

319 ret = io.BytesIO() 

320 ret.writelines(buffer.popleft() for _ in range(len(buffer))) 

321 result = ret.getvalue() 

322 self._size = 0 

323 return result 

324 

325 

326class BaseHTTPResponse(io.IOBase): 

327 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"] 

328 if brotli is not None: 

329 CONTENT_DECODERS += ["br"] 

330 if HAS_ZSTD: 

331 CONTENT_DECODERS += ["zstd"] 

332 REDIRECT_STATUSES = [301, 302, 303, 307, 308] 

333 

334 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error) 

335 if brotli is not None: 

336 DECODER_ERROR_CLASSES += (brotli.error,) 

337 

338 if HAS_ZSTD: 

339 DECODER_ERROR_CLASSES += (zstd.ZstdError,) 

340 

341 def __init__( 

342 self, 

343 *, 

344 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

345 status: int, 

346 version: int, 

347 version_string: str, 

348 reason: str | None, 

349 decode_content: bool, 

350 request_url: str | None, 

351 retries: Retry | None = None, 

352 ) -> None: 

353 if isinstance(headers, HTTPHeaderDict): 

354 self.headers = headers 

355 else: 

356 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type] 

357 self.status = status 

358 self.version = version 

359 self.version_string = version_string 

360 self.reason = reason 

361 self.decode_content = decode_content 

362 self._has_decoded_content = False 

363 self._request_url: str | None = request_url 

364 self.retries = retries 

365 

366 self.chunked = False 

367 tr_enc = self.headers.get("transfer-encoding", "").lower() 

368 # Don't incur the penalty of creating a list and then discarding it 

369 encodings = (enc.strip() for enc in tr_enc.split(",")) 

370 if "chunked" in encodings: 

371 self.chunked = True 

372 

373 self._decoder: ContentDecoder | None = None 

374 self.length_remaining: int | None 

375 

376 def get_redirect_location(self) -> str | None | typing.Literal[False]: 

377 """ 

378 Should we redirect and where to? 

379 

380 :returns: Truthy redirect location string if we got a redirect status 

381 code and valid location. ``None`` if redirect status and no 

382 location. ``False`` if not a redirect status code. 

383 """ 

384 if self.status in self.REDIRECT_STATUSES: 

385 return self.headers.get("location") 

386 return False 

387 

388 @property 

389 def data(self) -> bytes: 

390 raise NotImplementedError() 

391 

392 def json(self) -> typing.Any: 

393 """ 

394 Deserializes the body of the HTTP response as a Python object. 

395 

396 The body of the HTTP response must be encoded using UTF-8, as per 

397 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_. 

398 

399 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to 

400 your custom decoder instead. 

401 

402 If the body of the HTTP response is not decodable to UTF-8, a 

403 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a 

404 valid JSON document, a `json.JSONDecodeError` will be raised. 

405 

406 Read more :ref:`here <json_content>`. 

407 

408 :returns: The body of the HTTP response as a Python object. 

409 """ 

410 data = self.data.decode("utf-8") 

411 return _json.loads(data) 

412 

413 @property 

414 def url(self) -> str | None: 

415 raise NotImplementedError() 

416 

417 @url.setter 

418 def url(self, url: str | None) -> None: 

419 raise NotImplementedError() 

420 

421 @property 

422 def connection(self) -> BaseHTTPConnection | None: 

423 raise NotImplementedError() 

424 

425 @property 

426 def retries(self) -> Retry | None: 

427 return self._retries 

428 

429 @retries.setter 

430 def retries(self, retries: Retry | None) -> None: 

431 # Override the request_url if retries has a redirect location. 

432 if retries is not None and retries.history: 

433 self.url = retries.history[-1].redirect_location 

434 self._retries = retries 

435 

436 def stream( 

437 self, amt: int | None = 2**16, decode_content: bool | None = None 

438 ) -> typing.Iterator[bytes]: 

439 raise NotImplementedError() 

440 

441 def read( 

442 self, 

443 amt: int | None = None, 

444 decode_content: bool | None = None, 

445 cache_content: bool = False, 

446 ) -> bytes: 

447 raise NotImplementedError() 

448 

449 def read1( 

450 self, 

451 amt: int | None = None, 

452 decode_content: bool | None = None, 

453 ) -> bytes: 

454 raise NotImplementedError() 

455 

456 def read_chunked( 

457 self, 

458 amt: int | None = None, 

459 decode_content: bool | None = None, 

460 ) -> typing.Iterator[bytes]: 

461 raise NotImplementedError() 

462 

463 def release_conn(self) -> None: 

464 raise NotImplementedError() 

465 

466 def drain_conn(self) -> None: 

467 raise NotImplementedError() 

468 

469 def shutdown(self) -> None: 

470 raise NotImplementedError() 

471 

472 def close(self) -> None: 

473 raise NotImplementedError() 

474 

475 def _init_decoder(self) -> None: 

476 """ 

477 Set-up the _decoder attribute if necessary. 

478 """ 

479 # Note: content-encoding value should be case-insensitive, per RFC 7230 

480 # Section 3.2 

481 content_encoding = self.headers.get("content-encoding", "").lower() 

482 if self._decoder is None: 

483 if content_encoding in self.CONTENT_DECODERS: 

484 self._decoder = _get_decoder(content_encoding) 

485 elif "," in content_encoding: 

486 encodings = [ 

487 e.strip() 

488 for e in content_encoding.split(",") 

489 if e.strip() in self.CONTENT_DECODERS 

490 ] 

491 if encodings: 

492 self._decoder = _get_decoder(content_encoding) 

493 

494 def _decode( 

495 self, data: bytes, decode_content: bool | None, flush_decoder: bool 

496 ) -> bytes: 

497 """ 

498 Decode the data passed in and potentially flush the decoder. 

499 """ 

500 if not decode_content: 

501 if self._has_decoded_content: 

502 raise RuntimeError( 

503 "Calling read(decode_content=False) is not supported after " 

504 "read(decode_content=True) was called." 

505 ) 

506 return data 

507 

508 try: 

509 if self._decoder: 

510 data = self._decoder.decompress(data) 

511 self._has_decoded_content = True 

512 except self.DECODER_ERROR_CLASSES as e: 

513 content_encoding = self.headers.get("content-encoding", "").lower() 

514 raise DecodeError( 

515 "Received response with content-encoding: %s, but " 

516 "failed to decode it." % content_encoding, 

517 e, 

518 ) from e 

519 if flush_decoder: 

520 data += self._flush_decoder() 

521 

522 return data 

523 

524 def _flush_decoder(self) -> bytes: 

525 """ 

526 Flushes the decoder. Should only be called if the decoder is actually 

527 being used. 

528 """ 

529 if self._decoder: 

530 return self._decoder.decompress(b"") + self._decoder.flush() 

531 return b"" 

532 

533 # Compatibility methods for `io` module 

534 def readinto(self, b: bytearray) -> int: 

535 temp = self.read(len(b)) 

536 if len(temp) == 0: 

537 return 0 

538 else: 

539 b[: len(temp)] = temp 

540 return len(temp) 

541 

542 # Compatibility methods for http.client.HTTPResponse 

543 def getheaders(self) -> HTTPHeaderDict: 

544 warnings.warn( 

545 "HTTPResponse.getheaders() is deprecated and will be removed " 

546 "in urllib3 v2.6.0. Instead access HTTPResponse.headers directly.", 

547 category=DeprecationWarning, 

548 stacklevel=2, 

549 ) 

550 return self.headers 

551 

552 def getheader(self, name: str, default: str | None = None) -> str | None: 

553 warnings.warn( 

554 "HTTPResponse.getheader() is deprecated and will be removed " 

555 "in urllib3 v2.6.0. Instead use HTTPResponse.headers.get(name, default).", 

556 category=DeprecationWarning, 

557 stacklevel=2, 

558 ) 

559 return self.headers.get(name, default) 

560 

561 # Compatibility method for http.cookiejar 

562 def info(self) -> HTTPHeaderDict: 

563 return self.headers 

564 

565 def geturl(self) -> str | None: 

566 return self.url 

567 

568 

569class HTTPResponse(BaseHTTPResponse): 

570 """ 

571 HTTP Response container. 

572 

573 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is 

574 loaded and decoded on-demand when the ``data`` property is accessed. This 

575 class is also compatible with the Python standard library's :mod:`io` 

576 module, and can hence be treated as a readable object in the context of that 

577 framework. 

578 

579 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`: 

580 

581 :param preload_content: 

582 If True, the response's body will be preloaded during construction. 

583 

584 :param decode_content: 

585 If True, will attempt to decode the body based on the 

586 'content-encoding' header. 

587 

588 :param original_response: 

589 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse` 

590 object, it's convenient to include the original for debug purposes. It's 

591 otherwise unused. 

592 

593 :param retries: 

594 The retries contains the last :class:`~urllib3.util.retry.Retry` that 

595 was used during the request. 

596 

597 :param enforce_content_length: 

598 Enforce content length checking. Body returned by server must match 

599 value of Content-Length header, if present. Otherwise, raise error. 

600 """ 

601 

602 def __init__( 

603 self, 

604 body: _TYPE_BODY = "", 

605 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

606 status: int = 0, 

607 version: int = 0, 

608 version_string: str = "HTTP/?", 

609 reason: str | None = None, 

610 preload_content: bool = True, 

611 decode_content: bool = True, 

612 original_response: _HttplibHTTPResponse | None = None, 

613 pool: HTTPConnectionPool | None = None, 

614 connection: HTTPConnection | None = None, 

615 msg: _HttplibHTTPMessage | None = None, 

616 retries: Retry | None = None, 

617 enforce_content_length: bool = True, 

618 request_method: str | None = None, 

619 request_url: str | None = None, 

620 auto_close: bool = True, 

621 sock_shutdown: typing.Callable[[int], None] | None = None, 

622 ) -> None: 

623 super().__init__( 

624 headers=headers, 

625 status=status, 

626 version=version, 

627 version_string=version_string, 

628 reason=reason, 

629 decode_content=decode_content, 

630 request_url=request_url, 

631 retries=retries, 

632 ) 

633 

634 self.enforce_content_length = enforce_content_length 

635 self.auto_close = auto_close 

636 

637 self._body = None 

638 self._fp: _HttplibHTTPResponse | None = None 

639 self._original_response = original_response 

640 self._fp_bytes_read = 0 

641 self.msg = msg 

642 

643 if body and isinstance(body, (str, bytes)): 

644 self._body = body 

645 

646 self._pool = pool 

647 self._connection = connection 

648 

649 if hasattr(body, "read"): 

650 self._fp = body # type: ignore[assignment] 

651 self._sock_shutdown = sock_shutdown 

652 

653 # Are we using the chunked-style of transfer encoding? 

654 self.chunk_left: int | None = None 

655 

656 # Determine length of response 

657 self.length_remaining = self._init_length(request_method) 

658 

659 # Used to return the correct amount of bytes for partial read()s 

660 self._decoded_buffer = BytesQueueBuffer() 

661 

662 # If requested, preload the body. 

663 if preload_content and not self._body: 

664 self._body = self.read(decode_content=decode_content) 

665 

666 def release_conn(self) -> None: 

667 if not self._pool or not self._connection: 

668 return None 

669 

670 self._pool._put_conn(self._connection) 

671 self._connection = None 

672 

673 def drain_conn(self) -> None: 

674 """ 

675 Read and discard any remaining HTTP response data in the response connection. 

676 

677 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool. 

678 """ 

679 try: 

680 self.read() 

681 except (HTTPError, OSError, BaseSSLError, HTTPException): 

682 pass 

683 

684 @property 

685 def data(self) -> bytes: 

686 # For backwards-compat with earlier urllib3 0.4 and earlier. 

687 if self._body: 

688 return self._body # type: ignore[return-value] 

689 

690 if self._fp: 

691 return self.read(cache_content=True) 

692 

693 return None # type: ignore[return-value] 

694 

695 @property 

696 def connection(self) -> HTTPConnection | None: 

697 return self._connection 

698 

699 def isclosed(self) -> bool: 

700 return is_fp_closed(self._fp) 

701 

702 def tell(self) -> int: 

703 """ 

704 Obtain the number of bytes pulled over the wire so far. May differ from 

705 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read`` 

706 if bytes are encoded on the wire (e.g, compressed). 

707 """ 

708 return self._fp_bytes_read 

709 

710 def _init_length(self, request_method: str | None) -> int | None: 

711 """ 

712 Set initial length value for Response content if available. 

713 """ 

714 length: int | None 

715 content_length: str | None = self.headers.get("content-length") 

716 

717 if content_length is not None: 

718 if self.chunked: 

719 # This Response will fail with an IncompleteRead if it can't be 

720 # received as chunked. This method falls back to attempt reading 

721 # the response before raising an exception. 

722 log.warning( 

723 "Received response with both Content-Length and " 

724 "Transfer-Encoding set. This is expressly forbidden " 

725 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and " 

726 "attempting to process response as Transfer-Encoding: " 

727 "chunked." 

728 ) 

729 return None 

730 

731 try: 

732 # RFC 7230 section 3.3.2 specifies multiple content lengths can 

733 # be sent in a single Content-Length header 

734 # (e.g. Content-Length: 42, 42). This line ensures the values 

735 # are all valid ints and that as long as the `set` length is 1, 

736 # all values are the same. Otherwise, the header is invalid. 

737 lengths = {int(val) for val in content_length.split(",")} 

738 if len(lengths) > 1: 

739 raise InvalidHeader( 

740 "Content-Length contained multiple " 

741 "unmatching values (%s)" % content_length 

742 ) 

743 length = lengths.pop() 

744 except ValueError: 

745 length = None 

746 else: 

747 if length < 0: 

748 length = None 

749 

750 else: # if content_length is None 

751 length = None 

752 

753 # Convert status to int for comparison 

754 # In some cases, httplib returns a status of "_UNKNOWN" 

755 try: 

756 status = int(self.status) 

757 except ValueError: 

758 status = 0 

759 

760 # Check for responses that shouldn't include a body 

761 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD": 

762 length = 0 

763 

764 return length 

765 

766 @contextmanager 

767 def _error_catcher(self) -> typing.Generator[None]: 

768 """ 

769 Catch low-level python exceptions, instead re-raising urllib3 

770 variants, so that low-level exceptions are not leaked in the 

771 high-level api. 

772 

773 On exit, release the connection back to the pool. 

774 """ 

775 clean_exit = False 

776 

777 try: 

778 try: 

779 yield 

780 

781 except SocketTimeout as e: 

782 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but 

783 # there is yet no clean way to get at it from this context. 

784 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

785 

786 except BaseSSLError as e: 

787 # FIXME: Is there a better way to differentiate between SSLErrors? 

788 if "read operation timed out" not in str(e): 

789 # SSL errors related to framing/MAC get wrapped and reraised here 

790 raise SSLError(e) from e 

791 

792 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

793 

794 except IncompleteRead as e: 

795 if ( 

796 e.expected is not None 

797 and e.partial is not None 

798 and e.expected == -e.partial 

799 ): 

800 arg = "Response may not contain content." 

801 else: 

802 arg = f"Connection broken: {e!r}" 

803 raise ProtocolError(arg, e) from e 

804 

805 except (HTTPException, OSError) as e: 

806 raise ProtocolError(f"Connection broken: {e!r}", e) from e 

807 

808 # If no exception is thrown, we should avoid cleaning up 

809 # unnecessarily. 

810 clean_exit = True 

811 finally: 

812 # If we didn't terminate cleanly, we need to throw away our 

813 # connection. 

814 if not clean_exit: 

815 # The response may not be closed but we're not going to use it 

816 # anymore so close it now to ensure that the connection is 

817 # released back to the pool. 

818 if self._original_response: 

819 self._original_response.close() 

820 

821 # Closing the response may not actually be sufficient to close 

822 # everything, so if we have a hold of the connection close that 

823 # too. 

824 if self._connection: 

825 self._connection.close() 

826 

827 # If we hold the original response but it's closed now, we should 

828 # return the connection back to the pool. 

829 if self._original_response and self._original_response.isclosed(): 

830 self.release_conn() 

831 

832 def _fp_read( 

833 self, 

834 amt: int | None = None, 

835 *, 

836 read1: bool = False, 

837 ) -> bytes: 

838 """ 

839 Read a response with the thought that reading the number of bytes 

840 larger than can fit in a 32-bit int at a time via SSL in some 

841 known cases leads to an overflow error that has to be prevented 

842 if `amt` or `self.length_remaining` indicate that a problem may 

843 happen. 

844 

845 The known cases: 

846 * CPython < 3.9.7 because of a bug 

847 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900. 

848 * urllib3 injected with pyOpenSSL-backed SSL-support. 

849 * CPython < 3.10 only when `amt` does not fit 32-bit int. 

850 """ 

851 assert self._fp 

852 c_int_max = 2**31 - 1 

853 if ( 

854 (amt and amt > c_int_max) 

855 or ( 

856 amt is None 

857 and self.length_remaining 

858 and self.length_remaining > c_int_max 

859 ) 

860 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)): 

861 if read1: 

862 return self._fp.read1(c_int_max) 

863 buffer = io.BytesIO() 

864 # Besides `max_chunk_amt` being a maximum chunk size, it 

865 # affects memory overhead of reading a response by this 

866 # method in CPython. 

867 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum 

868 # chunk size that does not lead to an overflow error, but 

869 # 256 MiB is a compromise. 

870 max_chunk_amt = 2**28 

871 while amt is None or amt != 0: 

872 if amt is not None: 

873 chunk_amt = min(amt, max_chunk_amt) 

874 amt -= chunk_amt 

875 else: 

876 chunk_amt = max_chunk_amt 

877 data = self._fp.read(chunk_amt) 

878 if not data: 

879 break 

880 buffer.write(data) 

881 del data # to reduce peak memory usage by `max_chunk_amt`. 

882 return buffer.getvalue() 

883 elif read1: 

884 return self._fp.read1(amt) if amt is not None else self._fp.read1() 

885 else: 

886 # StringIO doesn't like amt=None 

887 return self._fp.read(amt) if amt is not None else self._fp.read() 

888 

889 def _raw_read( 

890 self, 

891 amt: int | None = None, 

892 *, 

893 read1: bool = False, 

894 ) -> bytes: 

895 """ 

896 Reads `amt` of bytes from the socket. 

897 """ 

898 if self._fp is None: 

899 return None # type: ignore[return-value] 

900 

901 fp_closed = getattr(self._fp, "closed", False) 

902 

903 with self._error_catcher(): 

904 data = self._fp_read(amt, read1=read1) if not fp_closed else b"" 

905 if amt is not None and amt != 0 and not data: 

906 # Platform-specific: Buggy versions of Python. 

907 # Close the connection when no data is returned 

908 # 

909 # This is redundant to what httplib/http.client _should_ 

910 # already do. However, versions of python released before 

911 # December 15, 2012 (http://bugs.python.org/issue16298) do 

912 # not properly close the connection in all cases. There is 

913 # no harm in redundantly calling close. 

914 self._fp.close() 

915 if ( 

916 self.enforce_content_length 

917 and self.length_remaining is not None 

918 and self.length_remaining != 0 

919 ): 

920 # This is an edge case that httplib failed to cover due 

921 # to concerns of backward compatibility. We're 

922 # addressing it here to make sure IncompleteRead is 

923 # raised during streaming, so all calls with incorrect 

924 # Content-Length are caught. 

925 raise IncompleteRead(self._fp_bytes_read, self.length_remaining) 

926 elif read1 and ( 

927 (amt != 0 and not data) or self.length_remaining == len(data) 

928 ): 

929 # All data has been read, but `self._fp.read1` in 

930 # CPython 3.12 and older doesn't always close 

931 # `http.client.HTTPResponse`, so we close it here. 

932 # See https://github.com/python/cpython/issues/113199 

933 self._fp.close() 

934 

935 if data: 

936 self._fp_bytes_read += len(data) 

937 if self.length_remaining is not None: 

938 self.length_remaining -= len(data) 

939 return data 

940 

941 def read( 

942 self, 

943 amt: int | None = None, 

944 decode_content: bool | None = None, 

945 cache_content: bool = False, 

946 ) -> bytes: 

947 """ 

948 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional 

949 parameters: ``decode_content`` and ``cache_content``. 

950 

951 :param amt: 

952 How much of the content to read. If specified, caching is skipped 

953 because it doesn't make sense to cache partial content as the full 

954 response. 

955 

956 :param decode_content: 

957 If True, will attempt to decode the body based on the 

958 'content-encoding' header. 

959 

960 :param cache_content: 

961 If True, will save the returned data such that the same result is 

962 returned despite of the state of the underlying file object. This 

963 is useful if you want the ``.data`` property to continue working 

964 after having ``.read()`` the file object. (Overridden if ``amt`` is 

965 set.) 

966 """ 

967 self._init_decoder() 

968 if decode_content is None: 

969 decode_content = self.decode_content 

970 

971 if amt and amt < 0: 

972 # Negative numbers and `None` should be treated the same. 

973 amt = None 

974 elif amt is not None: 

975 cache_content = False 

976 

977 if len(self._decoded_buffer) >= amt: 

978 return self._decoded_buffer.get(amt) 

979 

980 data = self._raw_read(amt) 

981 

982 flush_decoder = amt is None or (amt != 0 and not data) 

983 

984 if not data and len(self._decoded_buffer) == 0: 

985 return data 

986 

987 if amt is None: 

988 data = self._decode(data, decode_content, flush_decoder) 

989 if cache_content: 

990 self._body = data 

991 else: 

992 # do not waste memory on buffer when not decoding 

993 if not decode_content: 

994 if self._has_decoded_content: 

995 raise RuntimeError( 

996 "Calling read(decode_content=False) is not supported after " 

997 "read(decode_content=True) was called." 

998 ) 

999 return data 

1000 

1001 decoded_data = self._decode(data, decode_content, flush_decoder) 

1002 self._decoded_buffer.put(decoded_data) 

1003 

1004 while len(self._decoded_buffer) < amt and data: 

1005 # TODO make sure to initially read enough data to get past the headers 

1006 # For example, the GZ file header takes 10 bytes, we don't want to read 

1007 # it one byte at a time 

1008 data = self._raw_read(amt) 

1009 decoded_data = self._decode(data, decode_content, flush_decoder) 

1010 self._decoded_buffer.put(decoded_data) 

1011 data = self._decoded_buffer.get(amt) 

1012 

1013 return data 

1014 

1015 def read1( 

1016 self, 

1017 amt: int | None = None, 

1018 decode_content: bool | None = None, 

1019 ) -> bytes: 

1020 """ 

1021 Similar to ``http.client.HTTPResponse.read1`` and documented 

1022 in :meth:`io.BufferedReader.read1`, but with an additional parameter: 

1023 ``decode_content``. 

1024 

1025 :param amt: 

1026 How much of the content to read. 

1027 

1028 :param decode_content: 

1029 If True, will attempt to decode the body based on the 

1030 'content-encoding' header. 

1031 """ 

1032 if decode_content is None: 

1033 decode_content = self.decode_content 

1034 if amt and amt < 0: 

1035 # Negative numbers and `None` should be treated the same. 

1036 amt = None 

1037 # try and respond without going to the network 

1038 if self._has_decoded_content: 

1039 if not decode_content: 

1040 raise RuntimeError( 

1041 "Calling read1(decode_content=False) is not supported after " 

1042 "read1(decode_content=True) was called." 

1043 ) 

1044 if len(self._decoded_buffer) > 0: 

1045 if amt is None: 

1046 return self._decoded_buffer.get_all() 

1047 return self._decoded_buffer.get(amt) 

1048 if amt == 0: 

1049 return b"" 

1050 

1051 # FIXME, this method's type doesn't say returning None is possible 

1052 data = self._raw_read(amt, read1=True) 

1053 if not decode_content or data is None: 

1054 return data 

1055 

1056 self._init_decoder() 

1057 while True: 

1058 flush_decoder = not data 

1059 decoded_data = self._decode(data, decode_content, flush_decoder) 

1060 self._decoded_buffer.put(decoded_data) 

1061 if decoded_data or flush_decoder: 

1062 break 

1063 data = self._raw_read(8192, read1=True) 

1064 

1065 if amt is None: 

1066 return self._decoded_buffer.get_all() 

1067 return self._decoded_buffer.get(amt) 

1068 

1069 def stream( 

1070 self, amt: int | None = 2**16, decode_content: bool | None = None 

1071 ) -> typing.Generator[bytes]: 

1072 """ 

1073 A generator wrapper for the read() method. A call will block until 

1074 ``amt`` bytes have been read from the connection or until the 

1075 connection is closed. 

1076 

1077 :param amt: 

1078 How much of the content to read. The generator will return up to 

1079 much data per iteration, but may return less. This is particularly 

1080 likely when using compressed data. However, the empty string will 

1081 never be returned. 

1082 

1083 :param decode_content: 

1084 If True, will attempt to decode the body based on the 

1085 'content-encoding' header. 

1086 """ 

1087 if self.chunked and self.supports_chunked_reads(): 

1088 yield from self.read_chunked(amt, decode_content=decode_content) 

1089 else: 

1090 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0: 

1091 data = self.read(amt=amt, decode_content=decode_content) 

1092 

1093 if data: 

1094 yield data 

1095 

1096 # Overrides from io.IOBase 

1097 def readable(self) -> bool: 

1098 return True 

1099 

1100 def shutdown(self) -> None: 

1101 if not self._sock_shutdown: 

1102 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set") 

1103 if self._connection is None: 

1104 raise RuntimeError( 

1105 "Cannot shutdown as connection has already been released to the pool" 

1106 ) 

1107 self._sock_shutdown(socket.SHUT_RD) 

1108 

1109 def close(self) -> None: 

1110 self._sock_shutdown = None 

1111 

1112 if not self.closed and self._fp: 

1113 self._fp.close() 

1114 

1115 if self._connection: 

1116 self._connection.close() 

1117 

1118 if not self.auto_close: 

1119 io.IOBase.close(self) 

1120 

1121 @property 

1122 def closed(self) -> bool: 

1123 if not self.auto_close: 

1124 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return] 

1125 elif self._fp is None: 

1126 return True 

1127 elif hasattr(self._fp, "isclosed"): 

1128 return self._fp.isclosed() 

1129 elif hasattr(self._fp, "closed"): 

1130 return self._fp.closed 

1131 else: 

1132 return True 

1133 

1134 def fileno(self) -> int: 

1135 if self._fp is None: 

1136 raise OSError("HTTPResponse has no file to get a fileno from") 

1137 elif hasattr(self._fp, "fileno"): 

1138 return self._fp.fileno() 

1139 else: 

1140 raise OSError( 

1141 "The file-like object this HTTPResponse is wrapped " 

1142 "around has no file descriptor" 

1143 ) 

1144 

1145 def flush(self) -> None: 

1146 if ( 

1147 self._fp is not None 

1148 and hasattr(self._fp, "flush") 

1149 and not getattr(self._fp, "closed", False) 

1150 ): 

1151 return self._fp.flush() 

1152 

1153 def supports_chunked_reads(self) -> bool: 

1154 """ 

1155 Checks if the underlying file-like object looks like a 

1156 :class:`http.client.HTTPResponse` object. We do this by testing for 

1157 the fp attribute. If it is present we assume it returns raw chunks as 

1158 processed by read_chunked(). 

1159 """ 

1160 return hasattr(self._fp, "fp") 

1161 

1162 def _update_chunk_length(self) -> None: 

1163 # First, we'll figure out length of a chunk and then 

1164 # we'll try to read it from socket. 

1165 if self.chunk_left is not None: 

1166 return None 

1167 line = self._fp.fp.readline() # type: ignore[union-attr] 

1168 line = line.split(b";", 1)[0] 

1169 try: 

1170 self.chunk_left = int(line, 16) 

1171 except ValueError: 

1172 self.close() 

1173 if line: 

1174 # Invalid chunked protocol response, abort. 

1175 raise InvalidChunkLength(self, line) from None 

1176 else: 

1177 # Truncated at start of next chunk 

1178 raise ProtocolError("Response ended prematurely") from None 

1179 

1180 def _handle_chunk(self, amt: int | None) -> bytes: 

1181 returned_chunk = None 

1182 if amt is None: 

1183 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1184 returned_chunk = chunk 

1185 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1186 self.chunk_left = None 

1187 elif self.chunk_left is not None and amt < self.chunk_left: 

1188 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1189 self.chunk_left = self.chunk_left - amt 

1190 returned_chunk = value 

1191 elif amt == self.chunk_left: 

1192 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1193 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1194 self.chunk_left = None 

1195 returned_chunk = value 

1196 else: # amt > self.chunk_left 

1197 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1198 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1199 self.chunk_left = None 

1200 return returned_chunk # type: ignore[no-any-return] 

1201 

1202 def read_chunked( 

1203 self, amt: int | None = None, decode_content: bool | None = None 

1204 ) -> typing.Generator[bytes]: 

1205 """ 

1206 Similar to :meth:`HTTPResponse.read`, but with an additional 

1207 parameter: ``decode_content``. 

1208 

1209 :param amt: 

1210 How much of the content to read. If specified, caching is skipped 

1211 because it doesn't make sense to cache partial content as the full 

1212 response. 

1213 

1214 :param decode_content: 

1215 If True, will attempt to decode the body based on the 

1216 'content-encoding' header. 

1217 """ 

1218 self._init_decoder() 

1219 # FIXME: Rewrite this method and make it a class with a better structured logic. 

1220 if not self.chunked: 

1221 raise ResponseNotChunked( 

1222 "Response is not chunked. " 

1223 "Header 'transfer-encoding: chunked' is missing." 

1224 ) 

1225 if not self.supports_chunked_reads(): 

1226 raise BodyNotHttplibCompatible( 

1227 "Body should be http.client.HTTPResponse like. " 

1228 "It should have have an fp attribute which returns raw chunks." 

1229 ) 

1230 

1231 with self._error_catcher(): 

1232 # Don't bother reading the body of a HEAD request. 

1233 if self._original_response and is_response_to_head(self._original_response): 

1234 self._original_response.close() 

1235 return None 

1236 

1237 # If a response is already read and closed 

1238 # then return immediately. 

1239 if self._fp.fp is None: # type: ignore[union-attr] 

1240 return None 

1241 

1242 if amt and amt < 0: 

1243 # Negative numbers and `None` should be treated the same, 

1244 # but httplib handles only `None` correctly. 

1245 amt = None 

1246 

1247 while True: 

1248 self._update_chunk_length() 

1249 if self.chunk_left == 0: 

1250 break 

1251 chunk = self._handle_chunk(amt) 

1252 decoded = self._decode( 

1253 chunk, decode_content=decode_content, flush_decoder=False 

1254 ) 

1255 if decoded: 

1256 yield decoded 

1257 

1258 if decode_content: 

1259 # On CPython and PyPy, we should never need to flush the 

1260 # decoder. However, on Jython we *might* need to, so 

1261 # lets defensively do it anyway. 

1262 decoded = self._flush_decoder() 

1263 if decoded: # Platform-specific: Jython. 

1264 yield decoded 

1265 

1266 # Chunk content ends with \r\n: discard it. 

1267 while self._fp is not None: 

1268 line = self._fp.fp.readline() 

1269 if not line: 

1270 # Some sites may not end with '\r\n'. 

1271 break 

1272 if line == b"\r\n": 

1273 break 

1274 

1275 # We read everything; close the "file". 

1276 if self._original_response: 

1277 self._original_response.close() 

1278 

1279 @property 

1280 def url(self) -> str | None: 

1281 """ 

1282 Returns the URL that was the source of this response. 

1283 If the request that generated this response redirected, this method 

1284 will return the final redirect location. 

1285 """ 

1286 return self._request_url 

1287 

1288 @url.setter 

1289 def url(self, url: str) -> None: 

1290 self._request_url = url 

1291 

1292 def __iter__(self) -> typing.Iterator[bytes]: 

1293 buffer: list[bytes] = [] 

1294 for chunk in self.stream(decode_content=True): 

1295 if b"\n" in chunk: 

1296 chunks = chunk.split(b"\n") 

1297 yield b"".join(buffer) + chunks[0] + b"\n" 

1298 for x in chunks[1:-1]: 

1299 yield x + b"\n" 

1300 if chunks[-1]: 

1301 buffer = [chunks[-1]] 

1302 else: 

1303 buffer = [] 

1304 else: 

1305 buffer.append(chunk) 

1306 if buffer: 

1307 yield b"".join(buffer)