Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 23%

560 statements  

« prev     ^ index     » next       coverage.py v7.3.1, created at 2023-09-25 06:20 +0000

1from __future__ import annotations 

2 

3import collections 

4import io 

5import json as _json 

6import logging 

7import re 

8import sys 

9import typing 

10import warnings 

11import zlib 

12from contextlib import contextmanager 

13from http.client import HTTPMessage as _HttplibHTTPMessage 

14from http.client import HTTPResponse as _HttplibHTTPResponse 

15from socket import timeout as SocketTimeout 

16 

17try: 

18 try: 

19 import brotlicffi as brotli # type: ignore[import] 

20 except ImportError: 

21 import brotli # type: ignore[import] 

22except ImportError: 

23 brotli = None 

24 

25try: 

26 import zstandard as zstd # type: ignore[import] 

27 

28 # The package 'zstandard' added the 'eof' property starting 

29 # in v0.18.0 which we require to ensure a complete and 

30 # valid zstd stream was fed into the ZstdDecoder. 

31 # See: https://github.com/urllib3/urllib3/pull/2624 

32 _zstd_version = _zstd_version = tuple( 

33 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr] 

34 ) 

35 if _zstd_version < (0, 18): # Defensive: 

36 zstd = None 

37 

38except (AttributeError, ImportError, ValueError): # Defensive: 

39 zstd = None 

40 

41from . import util 

42from ._base_connection import _TYPE_BODY 

43from ._collections import HTTPHeaderDict 

44from .connection import BaseSSLError, HTTPConnection, HTTPException 

45from .exceptions import ( 

46 BodyNotHttplibCompatible, 

47 DecodeError, 

48 HTTPError, 

49 IncompleteRead, 

50 InvalidChunkLength, 

51 InvalidHeader, 

52 ProtocolError, 

53 ReadTimeoutError, 

54 ResponseNotChunked, 

55 SSLError, 

56) 

57from .util.response import is_fp_closed, is_response_to_head 

58from .util.retry import Retry 

59 

60if typing.TYPE_CHECKING: 

61 from typing_extensions import Literal 

62 

63 from .connectionpool import HTTPConnectionPool 

64 

65log = logging.getLogger(__name__) 

66 

67 

68class ContentDecoder: 

69 def decompress(self, data: bytes) -> bytes: 

70 raise NotImplementedError() 

71 

72 def flush(self) -> bytes: 

73 raise NotImplementedError() 

74 

75 

76class DeflateDecoder(ContentDecoder): 

77 def __init__(self) -> None: 

78 self._first_try = True 

79 self._data = b"" 

80 self._obj = zlib.decompressobj() 

81 

82 def decompress(self, data: bytes) -> bytes: 

83 if not data: 

84 return data 

85 

86 if not self._first_try: 

87 return self._obj.decompress(data) 

88 

89 self._data += data 

90 try: 

91 decompressed = self._obj.decompress(data) 

92 if decompressed: 

93 self._first_try = False 

94 self._data = None # type: ignore[assignment] 

95 return decompressed 

96 except zlib.error: 

97 self._first_try = False 

98 self._obj = zlib.decompressobj(-zlib.MAX_WBITS) 

99 try: 

100 return self.decompress(self._data) 

101 finally: 

102 self._data = None # type: ignore[assignment] 

103 

104 def flush(self) -> bytes: 

105 return self._obj.flush() 

106 

107 

108class GzipDecoderState: 

109 FIRST_MEMBER = 0 

110 OTHER_MEMBERS = 1 

111 SWALLOW_DATA = 2 

112 

113 

114class GzipDecoder(ContentDecoder): 

115 def __init__(self) -> None: 

116 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

117 self._state = GzipDecoderState.FIRST_MEMBER 

118 

119 def decompress(self, data: bytes) -> bytes: 

120 ret = bytearray() 

121 if self._state == GzipDecoderState.SWALLOW_DATA or not data: 

122 return bytes(ret) 

123 while True: 

124 try: 

125 ret += self._obj.decompress(data) 

126 except zlib.error: 

127 previous_state = self._state 

128 # Ignore data after the first error 

129 self._state = GzipDecoderState.SWALLOW_DATA 

130 if previous_state == GzipDecoderState.OTHER_MEMBERS: 

131 # Allow trailing garbage acceptable in other gzip clients 

132 return bytes(ret) 

133 raise 

134 data = self._obj.unused_data 

135 if not data: 

136 return bytes(ret) 

137 self._state = GzipDecoderState.OTHER_MEMBERS 

138 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

139 

140 def flush(self) -> bytes: 

141 return self._obj.flush() 

142 

143 

144if brotli is not None: 

145 

146 class BrotliDecoder(ContentDecoder): 

147 # Supports both 'brotlipy' and 'Brotli' packages 

148 # since they share an import name. The top branches 

149 # are for 'brotlipy' and bottom branches for 'Brotli' 

150 def __init__(self) -> None: 

151 self._obj = brotli.Decompressor() 

152 if hasattr(self._obj, "decompress"): 

153 setattr(self, "decompress", self._obj.decompress) 

154 else: 

155 setattr(self, "decompress", self._obj.process) 

156 

157 def flush(self) -> bytes: 

158 if hasattr(self._obj, "flush"): 

159 return self._obj.flush() # type: ignore[no-any-return] 

160 return b"" 

161 

162 

163if zstd is not None: 

164 

165 class ZstdDecoder(ContentDecoder): 

166 def __init__(self) -> None: 

167 self._obj = zstd.ZstdDecompressor().decompressobj() 

168 

169 def decompress(self, data: bytes) -> bytes: 

170 if not data: 

171 return b"" 

172 data_parts = [self._obj.decompress(data)] 

173 while self._obj.eof and self._obj.unused_data: 

174 unused_data = self._obj.unused_data 

175 self._obj = zstd.ZstdDecompressor().decompressobj() 

176 data_parts.append(self._obj.decompress(unused_data)) 

177 return b"".join(data_parts) 

178 

179 def flush(self) -> bytes: 

180 ret = self._obj.flush() # note: this is a no-op 

181 if not self._obj.eof: 

182 raise DecodeError("Zstandard data is incomplete") 

183 return ret # type: ignore[no-any-return] 

184 

185 

186class MultiDecoder(ContentDecoder): 

187 """ 

188 From RFC7231: 

189 If one or more encodings have been applied to a representation, the 

190 sender that applied the encodings MUST generate a Content-Encoding 

191 header field that lists the content codings in the order in which 

192 they were applied. 

193 """ 

194 

195 def __init__(self, modes: str) -> None: 

196 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")] 

197 

198 def flush(self) -> bytes: 

199 return self._decoders[0].flush() 

200 

201 def decompress(self, data: bytes) -> bytes: 

202 for d in reversed(self._decoders): 

203 data = d.decompress(data) 

204 return data 

205 

206 

207def _get_decoder(mode: str) -> ContentDecoder: 

208 if "," in mode: 

209 return MultiDecoder(mode) 

210 

211 if mode == "gzip": 

212 return GzipDecoder() 

213 

214 if brotli is not None and mode == "br": 

215 return BrotliDecoder() 

216 

217 if zstd is not None and mode == "zstd": 

218 return ZstdDecoder() 

219 

220 return DeflateDecoder() 

221 

222 

223class BytesQueueBuffer: 

224 """Memory-efficient bytes buffer 

225 

226 To return decoded data in read() and still follow the BufferedIOBase API, we need a 

227 buffer to always return the correct amount of bytes. 

228 

229 This buffer should be filled using calls to put() 

230 

231 Our maximum memory usage is determined by the sum of the size of: 

232 

233 * self.buffer, which contains the full data 

234 * the largest chunk that we will copy in get() 

235 

236 The worst case scenario is a single chunk, in which case we'll make a full copy of 

237 the data inside get(). 

238 """ 

239 

240 def __init__(self) -> None: 

241 self.buffer: typing.Deque[bytes] = collections.deque() 

242 self._size: int = 0 

243 

244 def __len__(self) -> int: 

245 return self._size 

246 

247 def put(self, data: bytes) -> None: 

248 self.buffer.append(data) 

249 self._size += len(data) 

250 

251 def get(self, n: int) -> bytes: 

252 if n == 0: 

253 return b"" 

254 elif not self.buffer: 

255 raise RuntimeError("buffer is empty") 

256 elif n < 0: 

257 raise ValueError("n should be > 0") 

258 

259 fetched = 0 

260 ret = io.BytesIO() 

261 while fetched < n: 

262 remaining = n - fetched 

263 chunk = self.buffer.popleft() 

264 chunk_length = len(chunk) 

265 if remaining < chunk_length: 

266 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:] 

267 ret.write(left_chunk) 

268 self.buffer.appendleft(right_chunk) 

269 self._size -= remaining 

270 break 

271 else: 

272 ret.write(chunk) 

273 self._size -= chunk_length 

274 fetched += chunk_length 

275 

276 if not self.buffer: 

277 break 

278 

279 return ret.getvalue() 

280 

281 

282class BaseHTTPResponse(io.IOBase): 

283 CONTENT_DECODERS = ["gzip", "deflate"] 

284 if brotli is not None: 

285 CONTENT_DECODERS += ["br"] 

286 if zstd is not None: 

287 CONTENT_DECODERS += ["zstd"] 

288 REDIRECT_STATUSES = [301, 302, 303, 307, 308] 

289 

290 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error) 

291 if brotli is not None: 

292 DECODER_ERROR_CLASSES += (brotli.error,) 

293 

294 if zstd is not None: 

295 DECODER_ERROR_CLASSES += (zstd.ZstdError,) 

296 

297 def __init__( 

298 self, 

299 *, 

300 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

301 status: int, 

302 version: int, 

303 reason: str | None, 

304 decode_content: bool, 

305 request_url: str | None, 

306 retries: Retry | None = None, 

307 ) -> None: 

308 if isinstance(headers, HTTPHeaderDict): 

309 self.headers = headers 

310 else: 

311 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type] 

312 self.status = status 

313 self.version = version 

314 self.reason = reason 

315 self.decode_content = decode_content 

316 self._has_decoded_content = False 

317 self._request_url: str | None = request_url 

318 self.retries = retries 

319 

320 self.chunked = False 

321 tr_enc = self.headers.get("transfer-encoding", "").lower() 

322 # Don't incur the penalty of creating a list and then discarding it 

323 encodings = (enc.strip() for enc in tr_enc.split(",")) 

324 if "chunked" in encodings: 

325 self.chunked = True 

326 

327 self._decoder: ContentDecoder | None = None 

328 

329 def get_redirect_location(self) -> str | None | Literal[False]: 

330 """ 

331 Should we redirect and where to? 

332 

333 :returns: Truthy redirect location string if we got a redirect status 

334 code and valid location. ``None`` if redirect status and no 

335 location. ``False`` if not a redirect status code. 

336 """ 

337 if self.status in self.REDIRECT_STATUSES: 

338 return self.headers.get("location") 

339 return False 

340 

341 @property 

342 def data(self) -> bytes: 

343 raise NotImplementedError() 

344 

345 def json(self) -> typing.Any: 

346 """ 

347 Parses the body of the HTTP response as JSON. 

348 

349 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to the decoder. 

350 

351 This method can raise either `UnicodeDecodeError` or `json.JSONDecodeError`. 

352 

353 Read more :ref:`here <json>`. 

354 """ 

355 data = self.data.decode("utf-8") 

356 return _json.loads(data) 

357 

358 @property 

359 def url(self) -> str | None: 

360 raise NotImplementedError() 

361 

362 @url.setter 

363 def url(self, url: str | None) -> None: 

364 raise NotImplementedError() 

365 

366 @property 

367 def connection(self) -> HTTPConnection | None: 

368 raise NotImplementedError() 

369 

370 @property 

371 def retries(self) -> Retry | None: 

372 return self._retries 

373 

374 @retries.setter 

375 def retries(self, retries: Retry | None) -> None: 

376 # Override the request_url if retries has a redirect location. 

377 if retries is not None and retries.history: 

378 self.url = retries.history[-1].redirect_location 

379 self._retries = retries 

380 

381 def stream( 

382 self, amt: int | None = 2**16, decode_content: bool | None = None 

383 ) -> typing.Iterator[bytes]: 

384 raise NotImplementedError() 

385 

386 def read( 

387 self, 

388 amt: int | None = None, 

389 decode_content: bool | None = None, 

390 cache_content: bool = False, 

391 ) -> bytes: 

392 raise NotImplementedError() 

393 

394 def read_chunked( 

395 self, 

396 amt: int | None = None, 

397 decode_content: bool | None = None, 

398 ) -> typing.Iterator[bytes]: 

399 raise NotImplementedError() 

400 

401 def release_conn(self) -> None: 

402 raise NotImplementedError() 

403 

404 def drain_conn(self) -> None: 

405 raise NotImplementedError() 

406 

407 def close(self) -> None: 

408 raise NotImplementedError() 

409 

410 def _init_decoder(self) -> None: 

411 """ 

412 Set-up the _decoder attribute if necessary. 

413 """ 

414 # Note: content-encoding value should be case-insensitive, per RFC 7230 

415 # Section 3.2 

416 content_encoding = self.headers.get("content-encoding", "").lower() 

417 if self._decoder is None: 

418 if content_encoding in self.CONTENT_DECODERS: 

419 self._decoder = _get_decoder(content_encoding) 

420 elif "," in content_encoding: 

421 encodings = [ 

422 e.strip() 

423 for e in content_encoding.split(",") 

424 if e.strip() in self.CONTENT_DECODERS 

425 ] 

426 if encodings: 

427 self._decoder = _get_decoder(content_encoding) 

428 

429 def _decode( 

430 self, data: bytes, decode_content: bool | None, flush_decoder: bool 

431 ) -> bytes: 

432 """ 

433 Decode the data passed in and potentially flush the decoder. 

434 """ 

435 if not decode_content: 

436 if self._has_decoded_content: 

437 raise RuntimeError( 

438 "Calling read(decode_content=False) is not supported after " 

439 "read(decode_content=True) was called." 

440 ) 

441 return data 

442 

443 try: 

444 if self._decoder: 

445 data = self._decoder.decompress(data) 

446 self._has_decoded_content = True 

447 except self.DECODER_ERROR_CLASSES as e: 

448 content_encoding = self.headers.get("content-encoding", "").lower() 

449 raise DecodeError( 

450 "Received response with content-encoding: %s, but " 

451 "failed to decode it." % content_encoding, 

452 e, 

453 ) from e 

454 if flush_decoder: 

455 data += self._flush_decoder() 

456 

457 return data 

458 

459 def _flush_decoder(self) -> bytes: 

460 """ 

461 Flushes the decoder. Should only be called if the decoder is actually 

462 being used. 

463 """ 

464 if self._decoder: 

465 return self._decoder.decompress(b"") + self._decoder.flush() 

466 return b"" 

467 

468 # Compatibility methods for `io` module 

469 def readinto(self, b: bytearray) -> int: 

470 temp = self.read(len(b)) 

471 if len(temp) == 0: 

472 return 0 

473 else: 

474 b[: len(temp)] = temp 

475 return len(temp) 

476 

477 # Compatibility methods for http.client.HTTPResponse 

478 def getheaders(self) -> HTTPHeaderDict: 

479 warnings.warn( 

480 "HTTPResponse.getheaders() is deprecated and will be removed " 

481 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.", 

482 category=DeprecationWarning, 

483 stacklevel=2, 

484 ) 

485 return self.headers 

486 

487 def getheader(self, name: str, default: str | None = None) -> str | None: 

488 warnings.warn( 

489 "HTTPResponse.getheader() is deprecated and will be removed " 

490 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).", 

491 category=DeprecationWarning, 

492 stacklevel=2, 

493 ) 

494 return self.headers.get(name, default) 

495 

496 # Compatibility method for http.cookiejar 

497 def info(self) -> HTTPHeaderDict: 

498 return self.headers 

499 

500 def geturl(self) -> str | None: 

501 return self.url 

502 

503 

504class HTTPResponse(BaseHTTPResponse): 

505 """ 

506 HTTP Response container. 

507 

508 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is 

509 loaded and decoded on-demand when the ``data`` property is accessed. This 

510 class is also compatible with the Python standard library's :mod:`io` 

511 module, and can hence be treated as a readable object in the context of that 

512 framework. 

513 

514 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`: 

515 

516 :param preload_content: 

517 If True, the response's body will be preloaded during construction. 

518 

519 :param decode_content: 

520 If True, will attempt to decode the body based on the 

521 'content-encoding' header. 

522 

523 :param original_response: 

524 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse` 

525 object, it's convenient to include the original for debug purposes. It's 

526 otherwise unused. 

527 

528 :param retries: 

529 The retries contains the last :class:`~urllib3.util.retry.Retry` that 

530 was used during the request. 

531 

532 :param enforce_content_length: 

533 Enforce content length checking. Body returned by server must match 

534 value of Content-Length header, if present. Otherwise, raise error. 

535 """ 

536 

537 def __init__( 

538 self, 

539 body: _TYPE_BODY = "", 

540 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

541 status: int = 0, 

542 version: int = 0, 

543 reason: str | None = None, 

544 preload_content: bool = True, 

545 decode_content: bool = True, 

546 original_response: _HttplibHTTPResponse | None = None, 

547 pool: HTTPConnectionPool | None = None, 

548 connection: HTTPConnection | None = None, 

549 msg: _HttplibHTTPMessage | None = None, 

550 retries: Retry | None = None, 

551 enforce_content_length: bool = True, 

552 request_method: str | None = None, 

553 request_url: str | None = None, 

554 auto_close: bool = True, 

555 ) -> None: 

556 super().__init__( 

557 headers=headers, 

558 status=status, 

559 version=version, 

560 reason=reason, 

561 decode_content=decode_content, 

562 request_url=request_url, 

563 retries=retries, 

564 ) 

565 

566 self.enforce_content_length = enforce_content_length 

567 self.auto_close = auto_close 

568 

569 self._body = None 

570 self._fp: _HttplibHTTPResponse | None = None 

571 self._original_response = original_response 

572 self._fp_bytes_read = 0 

573 self.msg = msg 

574 

575 if body and isinstance(body, (str, bytes)): 

576 self._body = body 

577 

578 self._pool = pool 

579 self._connection = connection 

580 

581 if hasattr(body, "read"): 

582 self._fp = body # type: ignore[assignment] 

583 

584 # Are we using the chunked-style of transfer encoding? 

585 self.chunk_left: int | None = None 

586 

587 # Determine length of response 

588 self.length_remaining = self._init_length(request_method) 

589 

590 # Used to return the correct amount of bytes for partial read()s 

591 self._decoded_buffer = BytesQueueBuffer() 

592 

593 # If requested, preload the body. 

594 if preload_content and not self._body: 

595 self._body = self.read(decode_content=decode_content) 

596 

597 def release_conn(self) -> None: 

598 if not self._pool or not self._connection: 

599 return None 

600 

601 self._pool._put_conn(self._connection) 

602 self._connection = None 

603 

604 def drain_conn(self) -> None: 

605 """ 

606 Read and discard any remaining HTTP response data in the response connection. 

607 

608 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool. 

609 """ 

610 try: 

611 self.read() 

612 except (HTTPError, OSError, BaseSSLError, HTTPException): 

613 pass 

614 

615 @property 

616 def data(self) -> bytes: 

617 # For backwards-compat with earlier urllib3 0.4 and earlier. 

618 if self._body: 

619 return self._body # type: ignore[return-value] 

620 

621 if self._fp: 

622 return self.read(cache_content=True) 

623 

624 return None # type: ignore[return-value] 

625 

626 @property 

627 def connection(self) -> HTTPConnection | None: 

628 return self._connection 

629 

630 def isclosed(self) -> bool: 

631 return is_fp_closed(self._fp) 

632 

633 def tell(self) -> int: 

634 """ 

635 Obtain the number of bytes pulled over the wire so far. May differ from 

636 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read`` 

637 if bytes are encoded on the wire (e.g, compressed). 

638 """ 

639 return self._fp_bytes_read 

640 

641 def _init_length(self, request_method: str | None) -> int | None: 

642 """ 

643 Set initial length value for Response content if available. 

644 """ 

645 length: int | None 

646 content_length: str | None = self.headers.get("content-length") 

647 

648 if content_length is not None: 

649 if self.chunked: 

650 # This Response will fail with an IncompleteRead if it can't be 

651 # received as chunked. This method falls back to attempt reading 

652 # the response before raising an exception. 

653 log.warning( 

654 "Received response with both Content-Length and " 

655 "Transfer-Encoding set. This is expressly forbidden " 

656 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and " 

657 "attempting to process response as Transfer-Encoding: " 

658 "chunked." 

659 ) 

660 return None 

661 

662 try: 

663 # RFC 7230 section 3.3.2 specifies multiple content lengths can 

664 # be sent in a single Content-Length header 

665 # (e.g. Content-Length: 42, 42). This line ensures the values 

666 # are all valid ints and that as long as the `set` length is 1, 

667 # all values are the same. Otherwise, the header is invalid. 

668 lengths = {int(val) for val in content_length.split(",")} 

669 if len(lengths) > 1: 

670 raise InvalidHeader( 

671 "Content-Length contained multiple " 

672 "unmatching values (%s)" % content_length 

673 ) 

674 length = lengths.pop() 

675 except ValueError: 

676 length = None 

677 else: 

678 if length < 0: 

679 length = None 

680 

681 else: # if content_length is None 

682 length = None 

683 

684 # Convert status to int for comparison 

685 # In some cases, httplib returns a status of "_UNKNOWN" 

686 try: 

687 status = int(self.status) 

688 except ValueError: 

689 status = 0 

690 

691 # Check for responses that shouldn't include a body 

692 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD": 

693 length = 0 

694 

695 return length 

696 

697 @contextmanager 

698 def _error_catcher(self) -> typing.Generator[None, None, None]: 

699 """ 

700 Catch low-level python exceptions, instead re-raising urllib3 

701 variants, so that low-level exceptions are not leaked in the 

702 high-level api. 

703 

704 On exit, release the connection back to the pool. 

705 """ 

706 clean_exit = False 

707 

708 try: 

709 try: 

710 yield 

711 

712 except SocketTimeout as e: 

713 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but 

714 # there is yet no clean way to get at it from this context. 

715 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

716 

717 except BaseSSLError as e: 

718 # FIXME: Is there a better way to differentiate between SSLErrors? 

719 if "read operation timed out" not in str(e): 

720 # SSL errors related to framing/MAC get wrapped and reraised here 

721 raise SSLError(e) from e 

722 

723 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

724 

725 except (HTTPException, OSError) as e: 

726 # This includes IncompleteRead. 

727 raise ProtocolError(f"Connection broken: {e!r}", e) from e 

728 

729 # If no exception is thrown, we should avoid cleaning up 

730 # unnecessarily. 

731 clean_exit = True 

732 finally: 

733 # If we didn't terminate cleanly, we need to throw away our 

734 # connection. 

735 if not clean_exit: 

736 # The response may not be closed but we're not going to use it 

737 # anymore so close it now to ensure that the connection is 

738 # released back to the pool. 

739 if self._original_response: 

740 self._original_response.close() 

741 

742 # Closing the response may not actually be sufficient to close 

743 # everything, so if we have a hold of the connection close that 

744 # too. 

745 if self._connection: 

746 self._connection.close() 

747 

748 # If we hold the original response but it's closed now, we should 

749 # return the connection back to the pool. 

750 if self._original_response and self._original_response.isclosed(): 

751 self.release_conn() 

752 

753 def _fp_read(self, amt: int | None = None) -> bytes: 

754 """ 

755 Read a response with the thought that reading the number of bytes 

756 larger than can fit in a 32-bit int at a time via SSL in some 

757 known cases leads to an overflow error that has to be prevented 

758 if `amt` or `self.length_remaining` indicate that a problem may 

759 happen. 

760 

761 The known cases: 

762 * 3.8 <= CPython < 3.9.7 because of a bug 

763 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900. 

764 * urllib3 injected with pyOpenSSL-backed SSL-support. 

765 * CPython < 3.10 only when `amt` does not fit 32-bit int. 

766 """ 

767 assert self._fp 

768 c_int_max = 2**31 - 1 

769 if ( 

770 ( 

771 (amt and amt > c_int_max) 

772 or (self.length_remaining and self.length_remaining > c_int_max) 

773 ) 

774 and not util.IS_SECURETRANSPORT 

775 and (util.IS_PYOPENSSL or sys.version_info < (3, 10)) 

776 ): 

777 buffer = io.BytesIO() 

778 # Besides `max_chunk_amt` being a maximum chunk size, it 

779 # affects memory overhead of reading a response by this 

780 # method in CPython. 

781 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum 

782 # chunk size that does not lead to an overflow error, but 

783 # 256 MiB is a compromise. 

784 max_chunk_amt = 2**28 

785 while amt is None or amt != 0: 

786 if amt is not None: 

787 chunk_amt = min(amt, max_chunk_amt) 

788 amt -= chunk_amt 

789 else: 

790 chunk_amt = max_chunk_amt 

791 data = self._fp.read(chunk_amt) 

792 if not data: 

793 break 

794 buffer.write(data) 

795 del data # to reduce peak memory usage by `max_chunk_amt`. 

796 return buffer.getvalue() 

797 else: 

798 # StringIO doesn't like amt=None 

799 return self._fp.read(amt) if amt is not None else self._fp.read() 

800 

801 def _raw_read( 

802 self, 

803 amt: int | None = None, 

804 ) -> bytes: 

805 """ 

806 Reads `amt` of bytes from the socket. 

807 """ 

808 if self._fp is None: 

809 return None # type: ignore[return-value] 

810 

811 fp_closed = getattr(self._fp, "closed", False) 

812 

813 with self._error_catcher(): 

814 data = self._fp_read(amt) if not fp_closed else b"" 

815 if amt is not None and amt != 0 and not data: 

816 # Platform-specific: Buggy versions of Python. 

817 # Close the connection when no data is returned 

818 # 

819 # This is redundant to what httplib/http.client _should_ 

820 # already do. However, versions of python released before 

821 # December 15, 2012 (http://bugs.python.org/issue16298) do 

822 # not properly close the connection in all cases. There is 

823 # no harm in redundantly calling close. 

824 self._fp.close() 

825 if ( 

826 self.enforce_content_length 

827 and self.length_remaining is not None 

828 and self.length_remaining != 0 

829 ): 

830 # This is an edge case that httplib failed to cover due 

831 # to concerns of backward compatibility. We're 

832 # addressing it here to make sure IncompleteRead is 

833 # raised during streaming, so all calls with incorrect 

834 # Content-Length are caught. 

835 raise IncompleteRead(self._fp_bytes_read, self.length_remaining) 

836 

837 if data: 

838 self._fp_bytes_read += len(data) 

839 if self.length_remaining is not None: 

840 self.length_remaining -= len(data) 

841 return data 

842 

843 def read( 

844 self, 

845 amt: int | None = None, 

846 decode_content: bool | None = None, 

847 cache_content: bool = False, 

848 ) -> bytes: 

849 """ 

850 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional 

851 parameters: ``decode_content`` and ``cache_content``. 

852 

853 :param amt: 

854 How much of the content to read. If specified, caching is skipped 

855 because it doesn't make sense to cache partial content as the full 

856 response. 

857 

858 :param decode_content: 

859 If True, will attempt to decode the body based on the 

860 'content-encoding' header. 

861 

862 :param cache_content: 

863 If True, will save the returned data such that the same result is 

864 returned despite of the state of the underlying file object. This 

865 is useful if you want the ``.data`` property to continue working 

866 after having ``.read()`` the file object. (Overridden if ``amt`` is 

867 set.) 

868 """ 

869 self._init_decoder() 

870 if decode_content is None: 

871 decode_content = self.decode_content 

872 

873 if amt is not None: 

874 cache_content = False 

875 

876 if len(self._decoded_buffer) >= amt: 

877 return self._decoded_buffer.get(amt) 

878 

879 data = self._raw_read(amt) 

880 

881 flush_decoder = amt is None or (amt != 0 and not data) 

882 

883 if not data and len(self._decoded_buffer) == 0: 

884 return data 

885 

886 if amt is None: 

887 data = self._decode(data, decode_content, flush_decoder) 

888 if cache_content: 

889 self._body = data 

890 else: 

891 # do not waste memory on buffer when not decoding 

892 if not decode_content: 

893 if self._has_decoded_content: 

894 raise RuntimeError( 

895 "Calling read(decode_content=False) is not supported after " 

896 "read(decode_content=True) was called." 

897 ) 

898 return data 

899 

900 decoded_data = self._decode(data, decode_content, flush_decoder) 

901 self._decoded_buffer.put(decoded_data) 

902 

903 while len(self._decoded_buffer) < amt and data: 

904 # TODO make sure to initially read enough data to get past the headers 

905 # For example, the GZ file header takes 10 bytes, we don't want to read 

906 # it one byte at a time 

907 data = self._raw_read(amt) 

908 decoded_data = self._decode(data, decode_content, flush_decoder) 

909 self._decoded_buffer.put(decoded_data) 

910 data = self._decoded_buffer.get(amt) 

911 

912 return data 

913 

914 def stream( 

915 self, amt: int | None = 2**16, decode_content: bool | None = None 

916 ) -> typing.Generator[bytes, None, None]: 

917 """ 

918 A generator wrapper for the read() method. A call will block until 

919 ``amt`` bytes have been read from the connection or until the 

920 connection is closed. 

921 

922 :param amt: 

923 How much of the content to read. The generator will return up to 

924 much data per iteration, but may return less. This is particularly 

925 likely when using compressed data. However, the empty string will 

926 never be returned. 

927 

928 :param decode_content: 

929 If True, will attempt to decode the body based on the 

930 'content-encoding' header. 

931 """ 

932 if self.chunked and self.supports_chunked_reads(): 

933 yield from self.read_chunked(amt, decode_content=decode_content) 

934 else: 

935 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0: 

936 data = self.read(amt=amt, decode_content=decode_content) 

937 

938 if data: 

939 yield data 

940 

941 # Overrides from io.IOBase 

942 def readable(self) -> bool: 

943 return True 

944 

945 def close(self) -> None: 

946 if not self.closed and self._fp: 

947 self._fp.close() 

948 

949 if self._connection: 

950 self._connection.close() 

951 

952 if not self.auto_close: 

953 io.IOBase.close(self) 

954 

955 @property 

956 def closed(self) -> bool: 

957 if not self.auto_close: 

958 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return] 

959 elif self._fp is None: 

960 return True 

961 elif hasattr(self._fp, "isclosed"): 

962 return self._fp.isclosed() 

963 elif hasattr(self._fp, "closed"): 

964 return self._fp.closed 

965 else: 

966 return True 

967 

968 def fileno(self) -> int: 

969 if self._fp is None: 

970 raise OSError("HTTPResponse has no file to get a fileno from") 

971 elif hasattr(self._fp, "fileno"): 

972 return self._fp.fileno() 

973 else: 

974 raise OSError( 

975 "The file-like object this HTTPResponse is wrapped " 

976 "around has no file descriptor" 

977 ) 

978 

979 def flush(self) -> None: 

980 if ( 

981 self._fp is not None 

982 and hasattr(self._fp, "flush") 

983 and not getattr(self._fp, "closed", False) 

984 ): 

985 return self._fp.flush() 

986 

987 def supports_chunked_reads(self) -> bool: 

988 """ 

989 Checks if the underlying file-like object looks like a 

990 :class:`http.client.HTTPResponse` object. We do this by testing for 

991 the fp attribute. If it is present we assume it returns raw chunks as 

992 processed by read_chunked(). 

993 """ 

994 return hasattr(self._fp, "fp") 

995 

996 def _update_chunk_length(self) -> None: 

997 # First, we'll figure out length of a chunk and then 

998 # we'll try to read it from socket. 

999 if self.chunk_left is not None: 

1000 return None 

1001 line = self._fp.fp.readline() # type: ignore[union-attr] 

1002 line = line.split(b";", 1)[0] 

1003 try: 

1004 self.chunk_left = int(line, 16) 

1005 except ValueError: 

1006 # Invalid chunked protocol response, abort. 

1007 self.close() 

1008 raise InvalidChunkLength(self, line) from None 

1009 

1010 def _handle_chunk(self, amt: int | None) -> bytes: 

1011 returned_chunk = None 

1012 if amt is None: 

1013 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1014 returned_chunk = chunk 

1015 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1016 self.chunk_left = None 

1017 elif self.chunk_left is not None and amt < self.chunk_left: 

1018 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1019 self.chunk_left = self.chunk_left - amt 

1020 returned_chunk = value 

1021 elif amt == self.chunk_left: 

1022 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1023 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1024 self.chunk_left = None 

1025 returned_chunk = value 

1026 else: # amt > self.chunk_left 

1027 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1028 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1029 self.chunk_left = None 

1030 return returned_chunk # type: ignore[no-any-return] 

1031 

1032 def read_chunked( 

1033 self, amt: int | None = None, decode_content: bool | None = None 

1034 ) -> typing.Generator[bytes, None, None]: 

1035 """ 

1036 Similar to :meth:`HTTPResponse.read`, but with an additional 

1037 parameter: ``decode_content``. 

1038 

1039 :param amt: 

1040 How much of the content to read. If specified, caching is skipped 

1041 because it doesn't make sense to cache partial content as the full 

1042 response. 

1043 

1044 :param decode_content: 

1045 If True, will attempt to decode the body based on the 

1046 'content-encoding' header. 

1047 """ 

1048 self._init_decoder() 

1049 # FIXME: Rewrite this method and make it a class with a better structured logic. 

1050 if not self.chunked: 

1051 raise ResponseNotChunked( 

1052 "Response is not chunked. " 

1053 "Header 'transfer-encoding: chunked' is missing." 

1054 ) 

1055 if not self.supports_chunked_reads(): 

1056 raise BodyNotHttplibCompatible( 

1057 "Body should be http.client.HTTPResponse like. " 

1058 "It should have have an fp attribute which returns raw chunks." 

1059 ) 

1060 

1061 with self._error_catcher(): 

1062 # Don't bother reading the body of a HEAD request. 

1063 if self._original_response and is_response_to_head(self._original_response): 

1064 self._original_response.close() 

1065 return None 

1066 

1067 # If a response is already read and closed 

1068 # then return immediately. 

1069 if self._fp.fp is None: # type: ignore[union-attr] 

1070 return None 

1071 

1072 while True: 

1073 self._update_chunk_length() 

1074 if self.chunk_left == 0: 

1075 break 

1076 chunk = self._handle_chunk(amt) 

1077 decoded = self._decode( 

1078 chunk, decode_content=decode_content, flush_decoder=False 

1079 ) 

1080 if decoded: 

1081 yield decoded 

1082 

1083 if decode_content: 

1084 # On CPython and PyPy, we should never need to flush the 

1085 # decoder. However, on Jython we *might* need to, so 

1086 # lets defensively do it anyway. 

1087 decoded = self._flush_decoder() 

1088 if decoded: # Platform-specific: Jython. 

1089 yield decoded 

1090 

1091 # Chunk content ends with \r\n: discard it. 

1092 while self._fp is not None: 

1093 line = self._fp.fp.readline() 

1094 if not line: 

1095 # Some sites may not end with '\r\n'. 

1096 break 

1097 if line == b"\r\n": 

1098 break 

1099 

1100 # We read everything; close the "file". 

1101 if self._original_response: 

1102 self._original_response.close() 

1103 

1104 @property 

1105 def url(self) -> str | None: 

1106 """ 

1107 Returns the URL that was the source of this response. 

1108 If the request that generated this response redirected, this method 

1109 will return the final redirect location. 

1110 """ 

1111 return self._request_url 

1112 

1113 @url.setter 

1114 def url(self, url: str) -> None: 

1115 self._request_url = url 

1116 

1117 def __iter__(self) -> typing.Iterator[bytes]: 

1118 buffer: list[bytes] = [] 

1119 for chunk in self.stream(decode_content=True): 

1120 if b"\n" in chunk: 

1121 chunks = chunk.split(b"\n") 

1122 yield b"".join(buffer) + chunks[0] + b"\n" 

1123 for x in chunks[1:-1]: 

1124 yield x + b"\n" 

1125 if chunks[-1]: 

1126 buffer = [chunks[-1]] 

1127 else: 

1128 buffer = [] 

1129 else: 

1130 buffer.append(chunk) 

1131 if buffer: 

1132 yield b"".join(buffer)