Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 23%

564 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:32 +0000

1from __future__ import annotations 

2 

3import collections 

4import io 

5import json as _json 

6import logging 

7import re 

8import sys 

9import typing 

10import warnings 

11import zlib 

12from contextlib import contextmanager 

13from http.client import HTTPMessage as _HttplibHTTPMessage 

14from http.client import HTTPResponse as _HttplibHTTPResponse 

15from socket import timeout as SocketTimeout 

16 

17try: 

18 try: 

19 import brotlicffi as brotli # type: ignore[import] 

20 except ImportError: 

21 import brotli # type: ignore[import] 

22except ImportError: 

23 brotli = None 

24 

25try: 

26 import zstandard as zstd # type: ignore[import] 

27 

28 # The package 'zstandard' added the 'eof' property starting 

29 # in v0.18.0 which we require to ensure a complete and 

30 # valid zstd stream was fed into the ZstdDecoder. 

31 # See: https://github.com/urllib3/urllib3/pull/2624 

32 _zstd_version = _zstd_version = tuple( 

33 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr] 

34 ) 

35 if _zstd_version < (0, 18): # Defensive: 

36 zstd = None 

37 

38except (AttributeError, ImportError, ValueError): # Defensive: 

39 zstd = None 

40 

41from . import util 

42from ._base_connection import _TYPE_BODY 

43from ._collections import HTTPHeaderDict 

44from .connection import BaseSSLError, HTTPConnection, HTTPException 

45from .exceptions import ( 

46 BodyNotHttplibCompatible, 

47 DecodeError, 

48 HTTPError, 

49 IncompleteRead, 

50 InvalidChunkLength, 

51 InvalidHeader, 

52 ProtocolError, 

53 ReadTimeoutError, 

54 ResponseNotChunked, 

55 SSLError, 

56) 

57from .util.response import is_fp_closed, is_response_to_head 

58from .util.retry import Retry 

59 

60if typing.TYPE_CHECKING: 

61 from typing_extensions import Literal 

62 

63 from .connectionpool import HTTPConnectionPool 

64 

65log = logging.getLogger(__name__) 

66 

67 

68class ContentDecoder: 

69 def decompress(self, data: bytes) -> bytes: 

70 raise NotImplementedError() 

71 

72 def flush(self) -> bytes: 

73 raise NotImplementedError() 

74 

75 

76class DeflateDecoder(ContentDecoder): 

77 def __init__(self) -> None: 

78 self._first_try = True 

79 self._data = b"" 

80 self._obj = zlib.decompressobj() 

81 

82 def decompress(self, data: bytes) -> bytes: 

83 if not data: 

84 return data 

85 

86 if not self._first_try: 

87 return self._obj.decompress(data) 

88 

89 self._data += data 

90 try: 

91 decompressed = self._obj.decompress(data) 

92 if decompressed: 

93 self._first_try = False 

94 self._data = None # type: ignore[assignment] 

95 return decompressed 

96 except zlib.error: 

97 self._first_try = False 

98 self._obj = zlib.decompressobj(-zlib.MAX_WBITS) 

99 try: 

100 return self.decompress(self._data) 

101 finally: 

102 self._data = None # type: ignore[assignment] 

103 

104 def flush(self) -> bytes: 

105 return self._obj.flush() 

106 

107 

108class GzipDecoderState: 

109 FIRST_MEMBER = 0 

110 OTHER_MEMBERS = 1 

111 SWALLOW_DATA = 2 

112 

113 

114class GzipDecoder(ContentDecoder): 

115 def __init__(self) -> None: 

116 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

117 self._state = GzipDecoderState.FIRST_MEMBER 

118 

119 def decompress(self, data: bytes) -> bytes: 

120 ret = bytearray() 

121 if self._state == GzipDecoderState.SWALLOW_DATA or not data: 

122 return bytes(ret) 

123 while True: 

124 try: 

125 ret += self._obj.decompress(data) 

126 except zlib.error: 

127 previous_state = self._state 

128 # Ignore data after the first error 

129 self._state = GzipDecoderState.SWALLOW_DATA 

130 if previous_state == GzipDecoderState.OTHER_MEMBERS: 

131 # Allow trailing garbage acceptable in other gzip clients 

132 return bytes(ret) 

133 raise 

134 data = self._obj.unused_data 

135 if not data: 

136 return bytes(ret) 

137 self._state = GzipDecoderState.OTHER_MEMBERS 

138 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

139 

140 def flush(self) -> bytes: 

141 return self._obj.flush() 

142 

143 

144if brotli is not None: 

145 

146 class BrotliDecoder(ContentDecoder): 

147 # Supports both 'brotlipy' and 'Brotli' packages 

148 # since they share an import name. The top branches 

149 # are for 'brotlipy' and bottom branches for 'Brotli' 

150 def __init__(self) -> None: 

151 self._obj = brotli.Decompressor() 

152 if hasattr(self._obj, "decompress"): 

153 setattr(self, "decompress", self._obj.decompress) 

154 else: 

155 setattr(self, "decompress", self._obj.process) 

156 

157 def flush(self) -> bytes: 

158 if hasattr(self._obj, "flush"): 

159 return self._obj.flush() # type: ignore[no-any-return] 

160 return b"" 

161 

162 

163if zstd is not None: 

164 

165 class ZstdDecoder(ContentDecoder): 

166 def __init__(self) -> None: 

167 self._obj = zstd.ZstdDecompressor().decompressobj() 

168 

169 def decompress(self, data: bytes) -> bytes: 

170 if not data: 

171 return b"" 

172 data_parts = [self._obj.decompress(data)] 

173 while self._obj.eof and self._obj.unused_data: 

174 unused_data = self._obj.unused_data 

175 self._obj = zstd.ZstdDecompressor().decompressobj() 

176 data_parts.append(self._obj.decompress(unused_data)) 

177 return b"".join(data_parts) 

178 

179 def flush(self) -> bytes: 

180 ret = self._obj.flush() # note: this is a no-op 

181 if not self._obj.eof: 

182 raise DecodeError("Zstandard data is incomplete") 

183 return ret # type: ignore[no-any-return] 

184 

185 

186class MultiDecoder(ContentDecoder): 

187 """ 

188 From RFC7231: 

189 If one or more encodings have been applied to a representation, the 

190 sender that applied the encodings MUST generate a Content-Encoding 

191 header field that lists the content codings in the order in which 

192 they were applied. 

193 """ 

194 

195 def __init__(self, modes: str) -> None: 

196 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")] 

197 

198 def flush(self) -> bytes: 

199 return self._decoders[0].flush() 

200 

201 def decompress(self, data: bytes) -> bytes: 

202 for d in reversed(self._decoders): 

203 data = d.decompress(data) 

204 return data 

205 

206 

207def _get_decoder(mode: str) -> ContentDecoder: 

208 if "," in mode: 

209 return MultiDecoder(mode) 

210 

211 if mode == "gzip": 

212 return GzipDecoder() 

213 

214 if brotli is not None and mode == "br": 

215 return BrotliDecoder() 

216 

217 if zstd is not None and mode == "zstd": 

218 return ZstdDecoder() 

219 

220 return DeflateDecoder() 

221 

222 

223class BytesQueueBuffer: 

224 """Memory-efficient bytes buffer 

225 

226 To return decoded data in read() and still follow the BufferedIOBase API, we need a 

227 buffer to always return the correct amount of bytes. 

228 

229 This buffer should be filled using calls to put() 

230 

231 Our maximum memory usage is determined by the sum of the size of: 

232 

233 * self.buffer, which contains the full data 

234 * the largest chunk that we will copy in get() 

235 

236 The worst case scenario is a single chunk, in which case we'll make a full copy of 

237 the data inside get(). 

238 """ 

239 

240 def __init__(self) -> None: 

241 self.buffer: typing.Deque[bytes] = collections.deque() 

242 self._size: int = 0 

243 

244 def __len__(self) -> int: 

245 return self._size 

246 

247 def put(self, data: bytes) -> None: 

248 self.buffer.append(data) 

249 self._size += len(data) 

250 

251 def get(self, n: int) -> bytes: 

252 if n == 0: 

253 return b"" 

254 elif not self.buffer: 

255 raise RuntimeError("buffer is empty") 

256 elif n < 0: 

257 raise ValueError("n should be > 0") 

258 

259 fetched = 0 

260 ret = io.BytesIO() 

261 while fetched < n: 

262 remaining = n - fetched 

263 chunk = self.buffer.popleft() 

264 chunk_length = len(chunk) 

265 if remaining < chunk_length: 

266 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:] 

267 ret.write(left_chunk) 

268 self.buffer.appendleft(right_chunk) 

269 self._size -= remaining 

270 break 

271 else: 

272 ret.write(chunk) 

273 self._size -= chunk_length 

274 fetched += chunk_length 

275 

276 if not self.buffer: 

277 break 

278 

279 return ret.getvalue() 

280 

281 

282class BaseHTTPResponse(io.IOBase): 

283 CONTENT_DECODERS = ["gzip", "deflate"] 

284 if brotli is not None: 

285 CONTENT_DECODERS += ["br"] 

286 if zstd is not None: 

287 CONTENT_DECODERS += ["zstd"] 

288 REDIRECT_STATUSES = [301, 302, 303, 307, 308] 

289 

290 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error) 

291 if brotli is not None: 

292 DECODER_ERROR_CLASSES += (brotli.error,) 

293 

294 if zstd is not None: 

295 DECODER_ERROR_CLASSES += (zstd.ZstdError,) 

296 

297 def __init__( 

298 self, 

299 *, 

300 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

301 status: int, 

302 version: int, 

303 reason: str | None, 

304 decode_content: bool, 

305 request_url: str | None, 

306 retries: Retry | None = None, 

307 ) -> None: 

308 if isinstance(headers, HTTPHeaderDict): 

309 self.headers = headers 

310 else: 

311 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type] 

312 self.status = status 

313 self.version = version 

314 self.reason = reason 

315 self.decode_content = decode_content 

316 self._has_decoded_content = False 

317 self._request_url: str | None = request_url 

318 self.retries = retries 

319 

320 self.chunked = False 

321 tr_enc = self.headers.get("transfer-encoding", "").lower() 

322 # Don't incur the penalty of creating a list and then discarding it 

323 encodings = (enc.strip() for enc in tr_enc.split(",")) 

324 if "chunked" in encodings: 

325 self.chunked = True 

326 

327 self._decoder: ContentDecoder | None = None 

328 

329 def get_redirect_location(self) -> str | None | Literal[False]: 

330 """ 

331 Should we redirect and where to? 

332 

333 :returns: Truthy redirect location string if we got a redirect status 

334 code and valid location. ``None`` if redirect status and no 

335 location. ``False`` if not a redirect status code. 

336 """ 

337 if self.status in self.REDIRECT_STATUSES: 

338 return self.headers.get("location") 

339 return False 

340 

341 @property 

342 def data(self) -> bytes: 

343 raise NotImplementedError() 

344 

345 def json(self) -> typing.Any: 

346 """ 

347 Parses the body of the HTTP response as JSON. 

348 

349 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to the decoder. 

350 

351 This method can raise either `UnicodeDecodeError` or `json.JSONDecodeError`. 

352 

353 Read more :ref:`here <json>`. 

354 """ 

355 data = self.data.decode("utf-8") 

356 return _json.loads(data) 

357 

358 @property 

359 def url(self) -> str | None: 

360 raise NotImplementedError() 

361 

362 @url.setter 

363 def url(self, url: str | None) -> None: 

364 raise NotImplementedError() 

365 

366 @property 

367 def connection(self) -> HTTPConnection | None: 

368 raise NotImplementedError() 

369 

370 @property 

371 def retries(self) -> Retry | None: 

372 return self._retries 

373 

374 @retries.setter 

375 def retries(self, retries: Retry | None) -> None: 

376 # Override the request_url if retries has a redirect location. 

377 if retries is not None and retries.history: 

378 self.url = retries.history[-1].redirect_location 

379 self._retries = retries 

380 

381 def stream( 

382 self, amt: int | None = 2**16, decode_content: bool | None = None 

383 ) -> typing.Iterator[bytes]: 

384 raise NotImplementedError() 

385 

386 def read( 

387 self, 

388 amt: int | None = None, 

389 decode_content: bool | None = None, 

390 cache_content: bool = False, 

391 ) -> bytes: 

392 raise NotImplementedError() 

393 

394 def read_chunked( 

395 self, 

396 amt: int | None = None, 

397 decode_content: bool | None = None, 

398 ) -> typing.Iterator[bytes]: 

399 raise NotImplementedError() 

400 

401 def release_conn(self) -> None: 

402 raise NotImplementedError() 

403 

404 def drain_conn(self) -> None: 

405 raise NotImplementedError() 

406 

407 def close(self) -> None: 

408 raise NotImplementedError() 

409 

410 def _init_decoder(self) -> None: 

411 """ 

412 Set-up the _decoder attribute if necessary. 

413 """ 

414 # Note: content-encoding value should be case-insensitive, per RFC 7230 

415 # Section 3.2 

416 content_encoding = self.headers.get("content-encoding", "").lower() 

417 if self._decoder is None: 

418 if content_encoding in self.CONTENT_DECODERS: 

419 self._decoder = _get_decoder(content_encoding) 

420 elif "," in content_encoding: 

421 encodings = [ 

422 e.strip() 

423 for e in content_encoding.split(",") 

424 if e.strip() in self.CONTENT_DECODERS 

425 ] 

426 if encodings: 

427 self._decoder = _get_decoder(content_encoding) 

428 

429 def _decode( 

430 self, data: bytes, decode_content: bool | None, flush_decoder: bool 

431 ) -> bytes: 

432 """ 

433 Decode the data passed in and potentially flush the decoder. 

434 """ 

435 if not decode_content: 

436 if self._has_decoded_content: 

437 raise RuntimeError( 

438 "Calling read(decode_content=False) is not supported after " 

439 "read(decode_content=True) was called." 

440 ) 

441 return data 

442 

443 try: 

444 if self._decoder: 

445 data = self._decoder.decompress(data) 

446 self._has_decoded_content = True 

447 except self.DECODER_ERROR_CLASSES as e: 

448 content_encoding = self.headers.get("content-encoding", "").lower() 

449 raise DecodeError( 

450 "Received response with content-encoding: %s, but " 

451 "failed to decode it." % content_encoding, 

452 e, 

453 ) from e 

454 if flush_decoder: 

455 data += self._flush_decoder() 

456 

457 return data 

458 

459 def _flush_decoder(self) -> bytes: 

460 """ 

461 Flushes the decoder. Should only be called if the decoder is actually 

462 being used. 

463 """ 

464 if self._decoder: 

465 return self._decoder.decompress(b"") + self._decoder.flush() 

466 return b"" 

467 

468 # Compatibility methods for `io` module 

469 def readinto(self, b: bytearray) -> int: 

470 temp = self.read(len(b)) 

471 if len(temp) == 0: 

472 return 0 

473 else: 

474 b[: len(temp)] = temp 

475 return len(temp) 

476 

477 # Compatibility methods for http.client.HTTPResponse 

478 def getheaders(self) -> HTTPHeaderDict: 

479 warnings.warn( 

480 "HTTPResponse.getheaders() is deprecated and will be removed " 

481 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.", 

482 category=DeprecationWarning, 

483 stacklevel=2, 

484 ) 

485 return self.headers 

486 

487 def getheader(self, name: str, default: str | None = None) -> str | None: 

488 warnings.warn( 

489 "HTTPResponse.getheader() is deprecated and will be removed " 

490 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).", 

491 category=DeprecationWarning, 

492 stacklevel=2, 

493 ) 

494 return self.headers.get(name, default) 

495 

496 # Compatibility method for http.cookiejar 

497 def info(self) -> HTTPHeaderDict: 

498 return self.headers 

499 

500 def geturl(self) -> str | None: 

501 return self.url 

502 

503 

504class HTTPResponse(BaseHTTPResponse): 

505 """ 

506 HTTP Response container. 

507 

508 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is 

509 loaded and decoded on-demand when the ``data`` property is accessed. This 

510 class is also compatible with the Python standard library's :mod:`io` 

511 module, and can hence be treated as a readable object in the context of that 

512 framework. 

513 

514 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`: 

515 

516 :param preload_content: 

517 If True, the response's body will be preloaded during construction. 

518 

519 :param decode_content: 

520 If True, will attempt to decode the body based on the 

521 'content-encoding' header. 

522 

523 :param original_response: 

524 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse` 

525 object, it's convenient to include the original for debug purposes. It's 

526 otherwise unused. 

527 

528 :param retries: 

529 The retries contains the last :class:`~urllib3.util.retry.Retry` that 

530 was used during the request. 

531 

532 :param enforce_content_length: 

533 Enforce content length checking. Body returned by server must match 

534 value of Content-Length header, if present. Otherwise, raise error. 

535 """ 

536 

537 def __init__( 

538 self, 

539 body: _TYPE_BODY = "", 

540 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

541 status: int = 0, 

542 version: int = 0, 

543 reason: str | None = None, 

544 preload_content: bool = True, 

545 decode_content: bool = True, 

546 original_response: _HttplibHTTPResponse | None = None, 

547 pool: HTTPConnectionPool | None = None, 

548 connection: HTTPConnection | None = None, 

549 msg: _HttplibHTTPMessage | None = None, 

550 retries: Retry | None = None, 

551 enforce_content_length: bool = True, 

552 request_method: str | None = None, 

553 request_url: str | None = None, 

554 auto_close: bool = True, 

555 ) -> None: 

556 super().__init__( 

557 headers=headers, 

558 status=status, 

559 version=version, 

560 reason=reason, 

561 decode_content=decode_content, 

562 request_url=request_url, 

563 retries=retries, 

564 ) 

565 

566 self.enforce_content_length = enforce_content_length 

567 self.auto_close = auto_close 

568 

569 self._body = None 

570 self._fp: _HttplibHTTPResponse | None = None 

571 self._original_response = original_response 

572 self._fp_bytes_read = 0 

573 self.msg = msg 

574 

575 if body and isinstance(body, (str, bytes)): 

576 self._body = body 

577 

578 self._pool = pool 

579 self._connection = connection 

580 

581 if hasattr(body, "read"): 

582 self._fp = body # type: ignore[assignment] 

583 

584 # Are we using the chunked-style of transfer encoding? 

585 self.chunk_left: int | None = None 

586 

587 # Determine length of response 

588 self.length_remaining = self._init_length(request_method) 

589 

590 # Used to return the correct amount of bytes for partial read()s 

591 self._decoded_buffer = BytesQueueBuffer() 

592 

593 # If requested, preload the body. 

594 if preload_content and not self._body: 

595 self._body = self.read(decode_content=decode_content) 

596 

597 def release_conn(self) -> None: 

598 if not self._pool or not self._connection: 

599 return None 

600 

601 self._pool._put_conn(self._connection) 

602 self._connection = None 

603 

604 def drain_conn(self) -> None: 

605 """ 

606 Read and discard any remaining HTTP response data in the response connection. 

607 

608 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool. 

609 """ 

610 try: 

611 self.read() 

612 except (HTTPError, OSError, BaseSSLError, HTTPException): 

613 pass 

614 

615 @property 

616 def data(self) -> bytes: 

617 # For backwards-compat with earlier urllib3 0.4 and earlier. 

618 if self._body: 

619 return self._body # type: ignore[return-value] 

620 

621 if self._fp: 

622 return self.read(cache_content=True) 

623 

624 return None # type: ignore[return-value] 

625 

626 @property 

627 def connection(self) -> HTTPConnection | None: 

628 return self._connection 

629 

630 def isclosed(self) -> bool: 

631 return is_fp_closed(self._fp) 

632 

633 def tell(self) -> int: 

634 """ 

635 Obtain the number of bytes pulled over the wire so far. May differ from 

636 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read`` 

637 if bytes are encoded on the wire (e.g, compressed). 

638 """ 

639 return self._fp_bytes_read 

640 

641 def _init_length(self, request_method: str | None) -> int | None: 

642 """ 

643 Set initial length value for Response content if available. 

644 """ 

645 length: int | None 

646 content_length: str | None = self.headers.get("content-length") 

647 

648 if content_length is not None: 

649 if self.chunked: 

650 # This Response will fail with an IncompleteRead if it can't be 

651 # received as chunked. This method falls back to attempt reading 

652 # the response before raising an exception. 

653 log.warning( 

654 "Received response with both Content-Length and " 

655 "Transfer-Encoding set. This is expressly forbidden " 

656 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and " 

657 "attempting to process response as Transfer-Encoding: " 

658 "chunked." 

659 ) 

660 return None 

661 

662 try: 

663 # RFC 7230 section 3.3.2 specifies multiple content lengths can 

664 # be sent in a single Content-Length header 

665 # (e.g. Content-Length: 42, 42). This line ensures the values 

666 # are all valid ints and that as long as the `set` length is 1, 

667 # all values are the same. Otherwise, the header is invalid. 

668 lengths = {int(val) for val in content_length.split(",")} 

669 if len(lengths) > 1: 

670 raise InvalidHeader( 

671 "Content-Length contained multiple " 

672 "unmatching values (%s)" % content_length 

673 ) 

674 length = lengths.pop() 

675 except ValueError: 

676 length = None 

677 else: 

678 if length < 0: 

679 length = None 

680 

681 else: # if content_length is None 

682 length = None 

683 

684 # Convert status to int for comparison 

685 # In some cases, httplib returns a status of "_UNKNOWN" 

686 try: 

687 status = int(self.status) 

688 except ValueError: 

689 status = 0 

690 

691 # Check for responses that shouldn't include a body 

692 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD": 

693 length = 0 

694 

695 return length 

696 

697 @contextmanager 

698 def _error_catcher(self) -> typing.Generator[None, None, None]: 

699 """ 

700 Catch low-level python exceptions, instead re-raising urllib3 

701 variants, so that low-level exceptions are not leaked in the 

702 high-level api. 

703 

704 On exit, release the connection back to the pool. 

705 """ 

706 clean_exit = False 

707 

708 try: 

709 try: 

710 yield 

711 

712 except SocketTimeout as e: 

713 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but 

714 # there is yet no clean way to get at it from this context. 

715 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

716 

717 except BaseSSLError as e: 

718 # FIXME: Is there a better way to differentiate between SSLErrors? 

719 if "read operation timed out" not in str(e): 

720 # SSL errors related to framing/MAC get wrapped and reraised here 

721 raise SSLError(e) from e 

722 

723 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

724 

725 except (HTTPException, OSError) as e: 

726 # This includes IncompleteRead. 

727 raise ProtocolError(f"Connection broken: {e!r}", e) from e 

728 

729 # If no exception is thrown, we should avoid cleaning up 

730 # unnecessarily. 

731 clean_exit = True 

732 finally: 

733 # If we didn't terminate cleanly, we need to throw away our 

734 # connection. 

735 if not clean_exit: 

736 # The response may not be closed but we're not going to use it 

737 # anymore so close it now to ensure that the connection is 

738 # released back to the pool. 

739 if self._original_response: 

740 self._original_response.close() 

741 

742 # Closing the response may not actually be sufficient to close 

743 # everything, so if we have a hold of the connection close that 

744 # too. 

745 if self._connection: 

746 self._connection.close() 

747 

748 # If we hold the original response but it's closed now, we should 

749 # return the connection back to the pool. 

750 if self._original_response and self._original_response.isclosed(): 

751 self.release_conn() 

752 

753 def _fp_read(self, amt: int | None = None) -> bytes: 

754 """ 

755 Read a response with the thought that reading the number of bytes 

756 larger than can fit in a 32-bit int at a time via SSL in some 

757 known cases leads to an overflow error that has to be prevented 

758 if `amt` or `self.length_remaining` indicate that a problem may 

759 happen. 

760 

761 The known cases: 

762 * 3.8 <= CPython < 3.9.7 because of a bug 

763 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900. 

764 * urllib3 injected with pyOpenSSL-backed SSL-support. 

765 * CPython < 3.10 only when `amt` does not fit 32-bit int. 

766 """ 

767 assert self._fp 

768 c_int_max = 2**31 - 1 

769 if ( 

770 ( 

771 (amt and amt > c_int_max) 

772 or (self.length_remaining and self.length_remaining > c_int_max) 

773 ) 

774 and not util.IS_SECURETRANSPORT 

775 and (util.IS_PYOPENSSL or sys.version_info < (3, 10)) 

776 ): 

777 buffer = io.BytesIO() 

778 # Besides `max_chunk_amt` being a maximum chunk size, it 

779 # affects memory overhead of reading a response by this 

780 # method in CPython. 

781 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum 

782 # chunk size that does not lead to an overflow error, but 

783 # 256 MiB is a compromise. 

784 max_chunk_amt = 2**28 

785 while amt is None or amt != 0: 

786 if amt is not None: 

787 chunk_amt = min(amt, max_chunk_amt) 

788 amt -= chunk_amt 

789 else: 

790 chunk_amt = max_chunk_amt 

791 data = self._fp.read(chunk_amt) 

792 if not data: 

793 break 

794 buffer.write(data) 

795 del data # to reduce peak memory usage by `max_chunk_amt`. 

796 return buffer.getvalue() 

797 else: 

798 # StringIO doesn't like amt=None 

799 return self._fp.read(amt) if amt is not None else self._fp.read() 

800 

801 def _raw_read( 

802 self, 

803 amt: int | None = None, 

804 ) -> bytes: 

805 """ 

806 Reads `amt` of bytes from the socket. 

807 """ 

808 if self._fp is None: 

809 return None # type: ignore[return-value] 

810 

811 fp_closed = getattr(self._fp, "closed", False) 

812 

813 with self._error_catcher(): 

814 data = self._fp_read(amt) if not fp_closed else b"" 

815 if amt is not None and amt != 0 and not data: 

816 # Platform-specific: Buggy versions of Python. 

817 # Close the connection when no data is returned 

818 # 

819 # This is redundant to what httplib/http.client _should_ 

820 # already do. However, versions of python released before 

821 # December 15, 2012 (http://bugs.python.org/issue16298) do 

822 # not properly close the connection in all cases. There is 

823 # no harm in redundantly calling close. 

824 self._fp.close() 

825 if ( 

826 self.enforce_content_length 

827 and self.length_remaining is not None 

828 and self.length_remaining != 0 

829 ): 

830 # This is an edge case that httplib failed to cover due 

831 # to concerns of backward compatibility. We're 

832 # addressing it here to make sure IncompleteRead is 

833 # raised during streaming, so all calls with incorrect 

834 # Content-Length are caught. 

835 raise IncompleteRead(self._fp_bytes_read, self.length_remaining) 

836 

837 if data: 

838 self._fp_bytes_read += len(data) 

839 if self.length_remaining is not None: 

840 self.length_remaining -= len(data) 

841 return data 

842 

843 def read( 

844 self, 

845 amt: int | None = None, 

846 decode_content: bool | None = None, 

847 cache_content: bool = False, 

848 ) -> bytes: 

849 """ 

850 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional 

851 parameters: ``decode_content`` and ``cache_content``. 

852 

853 :param amt: 

854 How much of the content to read. If specified, caching is skipped 

855 because it doesn't make sense to cache partial content as the full 

856 response. 

857 

858 :param decode_content: 

859 If True, will attempt to decode the body based on the 

860 'content-encoding' header. 

861 

862 :param cache_content: 

863 If True, will save the returned data such that the same result is 

864 returned despite of the state of the underlying file object. This 

865 is useful if you want the ``.data`` property to continue working 

866 after having ``.read()`` the file object. (Overridden if ``amt`` is 

867 set.) 

868 """ 

869 self._init_decoder() 

870 if decode_content is None: 

871 decode_content = self.decode_content 

872 

873 if amt is not None: 

874 cache_content = False 

875 

876 if len(self._decoded_buffer) >= amt: 

877 return self._decoded_buffer.get(amt) 

878 

879 data = self._raw_read(amt) 

880 

881 flush_decoder = False 

882 if amt is None: 

883 flush_decoder = True 

884 elif amt != 0 and not data: 

885 flush_decoder = True 

886 

887 if not data and len(self._decoded_buffer) == 0: 

888 return data 

889 

890 if amt is None: 

891 data = self._decode(data, decode_content, flush_decoder) 

892 if cache_content: 

893 self._body = data 

894 else: 

895 # do not waste memory on buffer when not decoding 

896 if not decode_content: 

897 if self._has_decoded_content: 

898 raise RuntimeError( 

899 "Calling read(decode_content=False) is not supported after " 

900 "read(decode_content=True) was called." 

901 ) 

902 return data 

903 

904 decoded_data = self._decode(data, decode_content, flush_decoder) 

905 self._decoded_buffer.put(decoded_data) 

906 

907 while len(self._decoded_buffer) < amt and data: 

908 # TODO make sure to initially read enough data to get past the headers 

909 # For example, the GZ file header takes 10 bytes, we don't want to read 

910 # it one byte at a time 

911 data = self._raw_read(amt) 

912 decoded_data = self._decode(data, decode_content, flush_decoder) 

913 self._decoded_buffer.put(decoded_data) 

914 data = self._decoded_buffer.get(amt) 

915 

916 return data 

917 

918 def stream( 

919 self, amt: int | None = 2**16, decode_content: bool | None = None 

920 ) -> typing.Generator[bytes, None, None]: 

921 """ 

922 A generator wrapper for the read() method. A call will block until 

923 ``amt`` bytes have been read from the connection or until the 

924 connection is closed. 

925 

926 :param amt: 

927 How much of the content to read. The generator will return up to 

928 much data per iteration, but may return less. This is particularly 

929 likely when using compressed data. However, the empty string will 

930 never be returned. 

931 

932 :param decode_content: 

933 If True, will attempt to decode the body based on the 

934 'content-encoding' header. 

935 """ 

936 if self.chunked and self.supports_chunked_reads(): 

937 yield from self.read_chunked(amt, decode_content=decode_content) 

938 else: 

939 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0: 

940 data = self.read(amt=amt, decode_content=decode_content) 

941 

942 if data: 

943 yield data 

944 

945 # Overrides from io.IOBase 

946 def readable(self) -> bool: 

947 return True 

948 

949 def close(self) -> None: 

950 if not self.closed and self._fp: 

951 self._fp.close() 

952 

953 if self._connection: 

954 self._connection.close() 

955 

956 if not self.auto_close: 

957 io.IOBase.close(self) 

958 

959 @property 

960 def closed(self) -> bool: 

961 if not self.auto_close: 

962 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return] 

963 elif self._fp is None: 

964 return True 

965 elif hasattr(self._fp, "isclosed"): 

966 return self._fp.isclosed() 

967 elif hasattr(self._fp, "closed"): 

968 return self._fp.closed 

969 else: 

970 return True 

971 

972 def fileno(self) -> int: 

973 if self._fp is None: 

974 raise OSError("HTTPResponse has no file to get a fileno from") 

975 elif hasattr(self._fp, "fileno"): 

976 return self._fp.fileno() 

977 else: 

978 raise OSError( 

979 "The file-like object this HTTPResponse is wrapped " 

980 "around has no file descriptor" 

981 ) 

982 

983 def flush(self) -> None: 

984 if ( 

985 self._fp is not None 

986 and hasattr(self._fp, "flush") 

987 and not getattr(self._fp, "closed", False) 

988 ): 

989 return self._fp.flush() 

990 

991 def supports_chunked_reads(self) -> bool: 

992 """ 

993 Checks if the underlying file-like object looks like a 

994 :class:`http.client.HTTPResponse` object. We do this by testing for 

995 the fp attribute. If it is present we assume it returns raw chunks as 

996 processed by read_chunked(). 

997 """ 

998 return hasattr(self._fp, "fp") 

999 

1000 def _update_chunk_length(self) -> None: 

1001 # First, we'll figure out length of a chunk and then 

1002 # we'll try to read it from socket. 

1003 if self.chunk_left is not None: 

1004 return None 

1005 line = self._fp.fp.readline() # type: ignore[union-attr] 

1006 line = line.split(b";", 1)[0] 

1007 try: 

1008 self.chunk_left = int(line, 16) 

1009 except ValueError: 

1010 # Invalid chunked protocol response, abort. 

1011 self.close() 

1012 raise InvalidChunkLength(self, line) from None 

1013 

1014 def _handle_chunk(self, amt: int | None) -> bytes: 

1015 returned_chunk = None 

1016 if amt is None: 

1017 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1018 returned_chunk = chunk 

1019 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1020 self.chunk_left = None 

1021 elif self.chunk_left is not None and amt < self.chunk_left: 

1022 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1023 self.chunk_left = self.chunk_left - amt 

1024 returned_chunk = value 

1025 elif amt == self.chunk_left: 

1026 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1027 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1028 self.chunk_left = None 

1029 returned_chunk = value 

1030 else: # amt > self.chunk_left 

1031 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1032 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1033 self.chunk_left = None 

1034 return returned_chunk # type: ignore[no-any-return] 

1035 

1036 def read_chunked( 

1037 self, amt: int | None = None, decode_content: bool | None = None 

1038 ) -> typing.Generator[bytes, None, None]: 

1039 """ 

1040 Similar to :meth:`HTTPResponse.read`, but with an additional 

1041 parameter: ``decode_content``. 

1042 

1043 :param amt: 

1044 How much of the content to read. If specified, caching is skipped 

1045 because it doesn't make sense to cache partial content as the full 

1046 response. 

1047 

1048 :param decode_content: 

1049 If True, will attempt to decode the body based on the 

1050 'content-encoding' header. 

1051 """ 

1052 self._init_decoder() 

1053 # FIXME: Rewrite this method and make it a class with a better structured logic. 

1054 if not self.chunked: 

1055 raise ResponseNotChunked( 

1056 "Response is not chunked. " 

1057 "Header 'transfer-encoding: chunked' is missing." 

1058 ) 

1059 if not self.supports_chunked_reads(): 

1060 raise BodyNotHttplibCompatible( 

1061 "Body should be http.client.HTTPResponse like. " 

1062 "It should have have an fp attribute which returns raw chunks." 

1063 ) 

1064 

1065 with self._error_catcher(): 

1066 # Don't bother reading the body of a HEAD request. 

1067 if self._original_response and is_response_to_head(self._original_response): 

1068 self._original_response.close() 

1069 return None 

1070 

1071 # If a response is already read and closed 

1072 # then return immediately. 

1073 if self._fp.fp is None: # type: ignore[union-attr] 

1074 return None 

1075 

1076 while True: 

1077 self._update_chunk_length() 

1078 if self.chunk_left == 0: 

1079 break 

1080 chunk = self._handle_chunk(amt) 

1081 decoded = self._decode( 

1082 chunk, decode_content=decode_content, flush_decoder=False 

1083 ) 

1084 if decoded: 

1085 yield decoded 

1086 

1087 if decode_content: 

1088 # On CPython and PyPy, we should never need to flush the 

1089 # decoder. However, on Jython we *might* need to, so 

1090 # lets defensively do it anyway. 

1091 decoded = self._flush_decoder() 

1092 if decoded: # Platform-specific: Jython. 

1093 yield decoded 

1094 

1095 # Chunk content ends with \r\n: discard it. 

1096 while self._fp is not None: 

1097 line = self._fp.fp.readline() 

1098 if not line: 

1099 # Some sites may not end with '\r\n'. 

1100 break 

1101 if line == b"\r\n": 

1102 break 

1103 

1104 # We read everything; close the "file". 

1105 if self._original_response: 

1106 self._original_response.close() 

1107 

1108 @property 

1109 def url(self) -> str | None: 

1110 """ 

1111 Returns the URL that was the source of this response. 

1112 If the request that generated this response redirected, this method 

1113 will return the final redirect location. 

1114 """ 

1115 return self._request_url 

1116 

1117 @url.setter 

1118 def url(self, url: str) -> None: 

1119 self._request_url = url 

1120 

1121 def __iter__(self) -> typing.Iterator[bytes]: 

1122 buffer: list[bytes] = [] 

1123 for chunk in self.stream(decode_content=True): 

1124 if b"\n" in chunk: 

1125 chunks = chunk.split(b"\n") 

1126 yield b"".join(buffer) + chunks[0] + b"\n" 

1127 for x in chunks[1:-1]: 

1128 yield x + b"\n" 

1129 if chunks[-1]: 

1130 buffer = [chunks[-1]] 

1131 else: 

1132 buffer = [] 

1133 else: 

1134 buffer.append(chunk) 

1135 if buffer: 

1136 yield b"".join(buffer)