Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 23%

559 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:35 +0000

1from __future__ import annotations 

2 

3import collections 

4import io 

5import json as _json 

6import logging 

7import re 

8import sys 

9import typing 

10import warnings 

11import zlib 

12from contextlib import contextmanager 

13from http.client import HTTPMessage as _HttplibHTTPMessage 

14from http.client import HTTPResponse as _HttplibHTTPResponse 

15from socket import timeout as SocketTimeout 

16 

17try: 

18 try: 

19 import brotlicffi as brotli # type: ignore[import] 

20 except ImportError: 

21 import brotli # type: ignore[import] 

22except ImportError: 

23 brotli = None 

24 

25try: 

26 import zstandard as zstd # type: ignore[import] 

27 

28 # The package 'zstandard' added the 'eof' property starting 

29 # in v0.18.0 which we require to ensure a complete and 

30 # valid zstd stream was fed into the ZstdDecoder. 

31 # See: https://github.com/urllib3/urllib3/pull/2624 

32 _zstd_version = _zstd_version = tuple( 

33 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr] 

34 ) 

35 if _zstd_version < (0, 18): # Defensive: 

36 zstd = None 

37 

38except (AttributeError, ImportError, ValueError): # Defensive: 

39 zstd = None 

40 

41from . import util 

42from ._base_connection import _TYPE_BODY 

43from ._collections import HTTPHeaderDict 

44from .connection import BaseSSLError, HTTPConnection, HTTPException 

45from .exceptions import ( 

46 BodyNotHttplibCompatible, 

47 DecodeError, 

48 HTTPError, 

49 IncompleteRead, 

50 InvalidChunkLength, 

51 InvalidHeader, 

52 ProtocolError, 

53 ReadTimeoutError, 

54 ResponseNotChunked, 

55 SSLError, 

56) 

57from .util.response import is_fp_closed, is_response_to_head 

58from .util.retry import Retry 

59 

60if typing.TYPE_CHECKING: 

61 from typing_extensions import Literal 

62 

63 from .connectionpool import HTTPConnectionPool 

64 

65log = logging.getLogger(__name__) 

66 

67 

68class ContentDecoder: 

69 def decompress(self, data: bytes) -> bytes: 

70 raise NotImplementedError() 

71 

72 def flush(self) -> bytes: 

73 raise NotImplementedError() 

74 

75 

76class DeflateDecoder(ContentDecoder): 

77 def __init__(self) -> None: 

78 self._first_try = True 

79 self._data = b"" 

80 self._obj = zlib.decompressobj() 

81 

82 def decompress(self, data: bytes) -> bytes: 

83 if not data: 

84 return data 

85 

86 if not self._first_try: 

87 return self._obj.decompress(data) 

88 

89 self._data += data 

90 try: 

91 decompressed = self._obj.decompress(data) 

92 if decompressed: 

93 self._first_try = False 

94 self._data = None # type: ignore[assignment] 

95 return decompressed 

96 except zlib.error: 

97 self._first_try = False 

98 self._obj = zlib.decompressobj(-zlib.MAX_WBITS) 

99 try: 

100 return self.decompress(self._data) 

101 finally: 

102 self._data = None # type: ignore[assignment] 

103 

104 def flush(self) -> bytes: 

105 return self._obj.flush() 

106 

107 

108class GzipDecoderState: 

109 FIRST_MEMBER = 0 

110 OTHER_MEMBERS = 1 

111 SWALLOW_DATA = 2 

112 

113 

114class GzipDecoder(ContentDecoder): 

115 def __init__(self) -> None: 

116 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

117 self._state = GzipDecoderState.FIRST_MEMBER 

118 

119 def decompress(self, data: bytes) -> bytes: 

120 ret = bytearray() 

121 if self._state == GzipDecoderState.SWALLOW_DATA or not data: 

122 return bytes(ret) 

123 while True: 

124 try: 

125 ret += self._obj.decompress(data) 

126 except zlib.error: 

127 previous_state = self._state 

128 # Ignore data after the first error 

129 self._state = GzipDecoderState.SWALLOW_DATA 

130 if previous_state == GzipDecoderState.OTHER_MEMBERS: 

131 # Allow trailing garbage acceptable in other gzip clients 

132 return bytes(ret) 

133 raise 

134 data = self._obj.unused_data 

135 if not data: 

136 return bytes(ret) 

137 self._state = GzipDecoderState.OTHER_MEMBERS 

138 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

139 

140 def flush(self) -> bytes: 

141 return self._obj.flush() 

142 

143 

144if brotli is not None: 

145 

146 class BrotliDecoder(ContentDecoder): 

147 # Supports both 'brotlipy' and 'Brotli' packages 

148 # since they share an import name. The top branches 

149 # are for 'brotlipy' and bottom branches for 'Brotli' 

150 def __init__(self) -> None: 

151 self._obj = brotli.Decompressor() 

152 if hasattr(self._obj, "decompress"): 

153 setattr(self, "decompress", self._obj.decompress) 

154 else: 

155 setattr(self, "decompress", self._obj.process) 

156 

157 def flush(self) -> bytes: 

158 if hasattr(self._obj, "flush"): 

159 return self._obj.flush() # type: ignore[no-any-return] 

160 return b"" 

161 

162 

163if zstd is not None: 

164 

165 class ZstdDecoder(ContentDecoder): 

166 def __init__(self) -> None: 

167 self._obj = zstd.ZstdDecompressor().decompressobj() 

168 

169 def decompress(self, data: bytes) -> bytes: 

170 if not data: 

171 return b"" 

172 return self._obj.decompress(data) # type: ignore[no-any-return] 

173 

174 def flush(self) -> bytes: 

175 ret = self._obj.flush() 

176 if not self._obj.eof: 

177 raise DecodeError("Zstandard data is incomplete") 

178 return ret # type: ignore[no-any-return] 

179 

180 

181class MultiDecoder(ContentDecoder): 

182 """ 

183 From RFC7231: 

184 If one or more encodings have been applied to a representation, the 

185 sender that applied the encodings MUST generate a Content-Encoding 

186 header field that lists the content codings in the order in which 

187 they were applied. 

188 """ 

189 

190 def __init__(self, modes: str) -> None: 

191 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")] 

192 

193 def flush(self) -> bytes: 

194 return self._decoders[0].flush() 

195 

196 def decompress(self, data: bytes) -> bytes: 

197 for d in reversed(self._decoders): 

198 data = d.decompress(data) 

199 return data 

200 

201 

202def _get_decoder(mode: str) -> ContentDecoder: 

203 if "," in mode: 

204 return MultiDecoder(mode) 

205 

206 if mode == "gzip": 

207 return GzipDecoder() 

208 

209 if brotli is not None and mode == "br": 

210 return BrotliDecoder() 

211 

212 if zstd is not None and mode == "zstd": 

213 return ZstdDecoder() 

214 

215 return DeflateDecoder() 

216 

217 

218class BytesQueueBuffer: 

219 """Memory-efficient bytes buffer 

220 

221 To return decoded data in read() and still follow the BufferedIOBase API, we need a 

222 buffer to always return the correct amount of bytes. 

223 

224 This buffer should be filled using calls to put() 

225 

226 Our maximum memory usage is determined by the sum of the size of: 

227 

228 * self.buffer, which contains the full data 

229 * the largest chunk that we will copy in get() 

230 

231 The worst case scenario is a single chunk, in which case we'll make a full copy of 

232 the data inside get(). 

233 """ 

234 

235 def __init__(self) -> None: 

236 self.buffer: typing.Deque[bytes] = collections.deque() 

237 self._size: int = 0 

238 

239 def __len__(self) -> int: 

240 return self._size 

241 

242 def put(self, data: bytes) -> None: 

243 self.buffer.append(data) 

244 self._size += len(data) 

245 

246 def get(self, n: int) -> bytes: 

247 if n == 0: 

248 return b"" 

249 elif not self.buffer: 

250 raise RuntimeError("buffer is empty") 

251 elif n < 0: 

252 raise ValueError("n should be > 0") 

253 

254 fetched = 0 

255 ret = io.BytesIO() 

256 while fetched < n: 

257 remaining = n - fetched 

258 chunk = self.buffer.popleft() 

259 chunk_length = len(chunk) 

260 if remaining < chunk_length: 

261 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:] 

262 ret.write(left_chunk) 

263 self.buffer.appendleft(right_chunk) 

264 self._size -= remaining 

265 break 

266 else: 

267 ret.write(chunk) 

268 self._size -= chunk_length 

269 fetched += chunk_length 

270 

271 if not self.buffer: 

272 break 

273 

274 return ret.getvalue() 

275 

276 

277class BaseHTTPResponse(io.IOBase): 

278 CONTENT_DECODERS = ["gzip", "deflate"] 

279 if brotli is not None: 

280 CONTENT_DECODERS += ["br"] 

281 if zstd is not None: 

282 CONTENT_DECODERS += ["zstd"] 

283 REDIRECT_STATUSES = [301, 302, 303, 307, 308] 

284 

285 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error) 

286 if brotli is not None: 

287 DECODER_ERROR_CLASSES += (brotli.error,) 

288 

289 if zstd is not None: 

290 DECODER_ERROR_CLASSES += (zstd.ZstdError,) 

291 

292 def __init__( 

293 self, 

294 *, 

295 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

296 status: int, 

297 version: int, 

298 reason: str | None, 

299 decode_content: bool, 

300 request_url: str | None, 

301 retries: Retry | None = None, 

302 ) -> None: 

303 if isinstance(headers, HTTPHeaderDict): 

304 self.headers = headers 

305 else: 

306 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type] 

307 self.status = status 

308 self.version = version 

309 self.reason = reason 

310 self.decode_content = decode_content 

311 self._has_decoded_content = False 

312 self._request_url: str | None = request_url 

313 self.retries = retries 

314 

315 self.chunked = False 

316 tr_enc = self.headers.get("transfer-encoding", "").lower() 

317 # Don't incur the penalty of creating a list and then discarding it 

318 encodings = (enc.strip() for enc in tr_enc.split(",")) 

319 if "chunked" in encodings: 

320 self.chunked = True 

321 

322 self._decoder: ContentDecoder | None = None 

323 

324 def get_redirect_location(self) -> str | None | Literal[False]: 

325 """ 

326 Should we redirect and where to? 

327 

328 :returns: Truthy redirect location string if we got a redirect status 

329 code and valid location. ``None`` if redirect status and no 

330 location. ``False`` if not a redirect status code. 

331 """ 

332 if self.status in self.REDIRECT_STATUSES: 

333 return self.headers.get("location") 

334 return False 

335 

336 @property 

337 def data(self) -> bytes: 

338 raise NotImplementedError() 

339 

340 def json(self) -> typing.Any: 

341 """ 

342 Parses the body of the HTTP response as JSON. 

343 

344 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to the decoder. 

345 

346 This method can raise either `UnicodeDecodeError` or `json.JSONDecodeError`. 

347 

348 Read more :ref:`here <json>`. 

349 """ 

350 data = self.data.decode("utf-8") 

351 return _json.loads(data) 

352 

353 @property 

354 def url(self) -> str | None: 

355 raise NotImplementedError() 

356 

357 @url.setter 

358 def url(self, url: str | None) -> None: 

359 raise NotImplementedError() 

360 

361 @property 

362 def connection(self) -> HTTPConnection | None: 

363 raise NotImplementedError() 

364 

365 @property 

366 def retries(self) -> Retry | None: 

367 return self._retries 

368 

369 @retries.setter 

370 def retries(self, retries: Retry | None) -> None: 

371 # Override the request_url if retries has a redirect location. 

372 if retries is not None and retries.history: 

373 self.url = retries.history[-1].redirect_location 

374 self._retries = retries 

375 

376 def stream( 

377 self, amt: int | None = 2**16, decode_content: bool | None = None 

378 ) -> typing.Iterator[bytes]: 

379 raise NotImplementedError() 

380 

381 def read( 

382 self, 

383 amt: int | None = None, 

384 decode_content: bool | None = None, 

385 cache_content: bool = False, 

386 ) -> bytes: 

387 raise NotImplementedError() 

388 

389 def read_chunked( 

390 self, 

391 amt: int | None = None, 

392 decode_content: bool | None = None, 

393 ) -> typing.Iterator[bytes]: 

394 raise NotImplementedError() 

395 

396 def release_conn(self) -> None: 

397 raise NotImplementedError() 

398 

399 def drain_conn(self) -> None: 

400 raise NotImplementedError() 

401 

402 def close(self) -> None: 

403 raise NotImplementedError() 

404 

405 def _init_decoder(self) -> None: 

406 """ 

407 Set-up the _decoder attribute if necessary. 

408 """ 

409 # Note: content-encoding value should be case-insensitive, per RFC 7230 

410 # Section 3.2 

411 content_encoding = self.headers.get("content-encoding", "").lower() 

412 if self._decoder is None: 

413 if content_encoding in self.CONTENT_DECODERS: 

414 self._decoder = _get_decoder(content_encoding) 

415 elif "," in content_encoding: 

416 encodings = [ 

417 e.strip() 

418 for e in content_encoding.split(",") 

419 if e.strip() in self.CONTENT_DECODERS 

420 ] 

421 if encodings: 

422 self._decoder = _get_decoder(content_encoding) 

423 

424 def _decode( 

425 self, data: bytes, decode_content: bool | None, flush_decoder: bool 

426 ) -> bytes: 

427 """ 

428 Decode the data passed in and potentially flush the decoder. 

429 """ 

430 if not decode_content: 

431 if self._has_decoded_content: 

432 raise RuntimeError( 

433 "Calling read(decode_content=False) is not supported after " 

434 "read(decode_content=True) was called." 

435 ) 

436 return data 

437 

438 try: 

439 if self._decoder: 

440 data = self._decoder.decompress(data) 

441 self._has_decoded_content = True 

442 except self.DECODER_ERROR_CLASSES as e: 

443 content_encoding = self.headers.get("content-encoding", "").lower() 

444 raise DecodeError( 

445 "Received response with content-encoding: %s, but " 

446 "failed to decode it." % content_encoding, 

447 e, 

448 ) from e 

449 if flush_decoder: 

450 data += self._flush_decoder() 

451 

452 return data 

453 

454 def _flush_decoder(self) -> bytes: 

455 """ 

456 Flushes the decoder. Should only be called if the decoder is actually 

457 being used. 

458 """ 

459 if self._decoder: 

460 return self._decoder.decompress(b"") + self._decoder.flush() 

461 return b"" 

462 

463 # Compatibility methods for `io` module 

464 def readinto(self, b: bytearray) -> int: 

465 temp = self.read(len(b)) 

466 if len(temp) == 0: 

467 return 0 

468 else: 

469 b[: len(temp)] = temp 

470 return len(temp) 

471 

472 # Compatibility methods for http.client.HTTPResponse 

473 def getheaders(self) -> HTTPHeaderDict: 

474 warnings.warn( 

475 "HTTPResponse.getheaders() is deprecated and will be removed " 

476 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.", 

477 category=DeprecationWarning, 

478 stacklevel=2, 

479 ) 

480 return self.headers 

481 

482 def getheader(self, name: str, default: str | None = None) -> str | None: 

483 warnings.warn( 

484 "HTTPResponse.getheader() is deprecated and will be removed " 

485 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).", 

486 category=DeprecationWarning, 

487 stacklevel=2, 

488 ) 

489 return self.headers.get(name, default) 

490 

491 # Compatibility method for http.cookiejar 

492 def info(self) -> HTTPHeaderDict: 

493 return self.headers 

494 

495 def geturl(self) -> str | None: 

496 return self.url 

497 

498 

499class HTTPResponse(BaseHTTPResponse): 

500 """ 

501 HTTP Response container. 

502 

503 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is 

504 loaded and decoded on-demand when the ``data`` property is accessed. This 

505 class is also compatible with the Python standard library's :mod:`io` 

506 module, and can hence be treated as a readable object in the context of that 

507 framework. 

508 

509 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`: 

510 

511 :param preload_content: 

512 If True, the response's body will be preloaded during construction. 

513 

514 :param decode_content: 

515 If True, will attempt to decode the body based on the 

516 'content-encoding' header. 

517 

518 :param original_response: 

519 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse` 

520 object, it's convenient to include the original for debug purposes. It's 

521 otherwise unused. 

522 

523 :param retries: 

524 The retries contains the last :class:`~urllib3.util.retry.Retry` that 

525 was used during the request. 

526 

527 :param enforce_content_length: 

528 Enforce content length checking. Body returned by server must match 

529 value of Content-Length header, if present. Otherwise, raise error. 

530 """ 

531 

532 def __init__( 

533 self, 

534 body: _TYPE_BODY = "", 

535 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

536 status: int = 0, 

537 version: int = 0, 

538 reason: str | None = None, 

539 preload_content: bool = True, 

540 decode_content: bool = True, 

541 original_response: _HttplibHTTPResponse | None = None, 

542 pool: HTTPConnectionPool | None = None, 

543 connection: HTTPConnection | None = None, 

544 msg: _HttplibHTTPMessage | None = None, 

545 retries: Retry | None = None, 

546 enforce_content_length: bool = True, 

547 request_method: str | None = None, 

548 request_url: str | None = None, 

549 auto_close: bool = True, 

550 ) -> None: 

551 super().__init__( 

552 headers=headers, 

553 status=status, 

554 version=version, 

555 reason=reason, 

556 decode_content=decode_content, 

557 request_url=request_url, 

558 retries=retries, 

559 ) 

560 

561 self.enforce_content_length = enforce_content_length 

562 self.auto_close = auto_close 

563 

564 self._body = None 

565 self._fp: _HttplibHTTPResponse | None = None 

566 self._original_response = original_response 

567 self._fp_bytes_read = 0 

568 self.msg = msg 

569 

570 if body and isinstance(body, (str, bytes)): 

571 self._body = body 

572 

573 self._pool = pool 

574 self._connection = connection 

575 

576 if hasattr(body, "read"): 

577 self._fp = body # type: ignore[assignment] 

578 

579 # Are we using the chunked-style of transfer encoding? 

580 self.chunk_left: int | None = None 

581 

582 # Determine length of response 

583 self.length_remaining = self._init_length(request_method) 

584 

585 # Used to return the correct amount of bytes for partial read()s 

586 self._decoded_buffer = BytesQueueBuffer() 

587 

588 # If requested, preload the body. 

589 if preload_content and not self._body: 

590 self._body = self.read(decode_content=decode_content) 

591 

592 def release_conn(self) -> None: 

593 if not self._pool or not self._connection: 

594 return None 

595 

596 self._pool._put_conn(self._connection) 

597 self._connection = None 

598 

599 def drain_conn(self) -> None: 

600 """ 

601 Read and discard any remaining HTTP response data in the response connection. 

602 

603 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool. 

604 """ 

605 try: 

606 self.read() 

607 except (HTTPError, OSError, BaseSSLError, HTTPException): 

608 pass 

609 

610 @property 

611 def data(self) -> bytes: 

612 # For backwards-compat with earlier urllib3 0.4 and earlier. 

613 if self._body: 

614 return self._body # type: ignore[return-value] 

615 

616 if self._fp: 

617 return self.read(cache_content=True) 

618 

619 return None # type: ignore[return-value] 

620 

621 @property 

622 def connection(self) -> HTTPConnection | None: 

623 return self._connection 

624 

625 def isclosed(self) -> bool: 

626 return is_fp_closed(self._fp) 

627 

628 def tell(self) -> int: 

629 """ 

630 Obtain the number of bytes pulled over the wire so far. May differ from 

631 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read`` 

632 if bytes are encoded on the wire (e.g, compressed). 

633 """ 

634 return self._fp_bytes_read 

635 

636 def _init_length(self, request_method: str | None) -> int | None: 

637 """ 

638 Set initial length value for Response content if available. 

639 """ 

640 length: int | None 

641 content_length: str | None = self.headers.get("content-length") 

642 

643 if content_length is not None: 

644 if self.chunked: 

645 # This Response will fail with an IncompleteRead if it can't be 

646 # received as chunked. This method falls back to attempt reading 

647 # the response before raising an exception. 

648 log.warning( 

649 "Received response with both Content-Length and " 

650 "Transfer-Encoding set. This is expressly forbidden " 

651 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and " 

652 "attempting to process response as Transfer-Encoding: " 

653 "chunked." 

654 ) 

655 return None 

656 

657 try: 

658 # RFC 7230 section 3.3.2 specifies multiple content lengths can 

659 # be sent in a single Content-Length header 

660 # (e.g. Content-Length: 42, 42). This line ensures the values 

661 # are all valid ints and that as long as the `set` length is 1, 

662 # all values are the same. Otherwise, the header is invalid. 

663 lengths = {int(val) for val in content_length.split(",")} 

664 if len(lengths) > 1: 

665 raise InvalidHeader( 

666 "Content-Length contained multiple " 

667 "unmatching values (%s)" % content_length 

668 ) 

669 length = lengths.pop() 

670 except ValueError: 

671 length = None 

672 else: 

673 if length < 0: 

674 length = None 

675 

676 else: # if content_length is None 

677 length = None 

678 

679 # Convert status to int for comparison 

680 # In some cases, httplib returns a status of "_UNKNOWN" 

681 try: 

682 status = int(self.status) 

683 except ValueError: 

684 status = 0 

685 

686 # Check for responses that shouldn't include a body 

687 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD": 

688 length = 0 

689 

690 return length 

691 

692 @contextmanager 

693 def _error_catcher(self) -> typing.Generator[None, None, None]: 

694 """ 

695 Catch low-level python exceptions, instead re-raising urllib3 

696 variants, so that low-level exceptions are not leaked in the 

697 high-level api. 

698 

699 On exit, release the connection back to the pool. 

700 """ 

701 clean_exit = False 

702 

703 try: 

704 try: 

705 yield 

706 

707 except SocketTimeout as e: 

708 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but 

709 # there is yet no clean way to get at it from this context. 

710 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

711 

712 except BaseSSLError as e: 

713 # FIXME: Is there a better way to differentiate between SSLErrors? 

714 if "read operation timed out" not in str(e): 

715 # SSL errors related to framing/MAC get wrapped and reraised here 

716 raise SSLError(e) from e 

717 

718 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

719 

720 except (HTTPException, OSError) as e: 

721 # This includes IncompleteRead. 

722 raise ProtocolError(f"Connection broken: {e!r}", e) from e 

723 

724 # If no exception is thrown, we should avoid cleaning up 

725 # unnecessarily. 

726 clean_exit = True 

727 finally: 

728 # If we didn't terminate cleanly, we need to throw away our 

729 # connection. 

730 if not clean_exit: 

731 # The response may not be closed but we're not going to use it 

732 # anymore so close it now to ensure that the connection is 

733 # released back to the pool. 

734 if self._original_response: 

735 self._original_response.close() 

736 

737 # Closing the response may not actually be sufficient to close 

738 # everything, so if we have a hold of the connection close that 

739 # too. 

740 if self._connection: 

741 self._connection.close() 

742 

743 # If we hold the original response but it's closed now, we should 

744 # return the connection back to the pool. 

745 if self._original_response and self._original_response.isclosed(): 

746 self.release_conn() 

747 

748 def _fp_read(self, amt: int | None = None) -> bytes: 

749 """ 

750 Read a response with the thought that reading the number of bytes 

751 larger than can fit in a 32-bit int at a time via SSL in some 

752 known cases leads to an overflow error that has to be prevented 

753 if `amt` or `self.length_remaining` indicate that a problem may 

754 happen. 

755 

756 The known cases: 

757 * 3.8 <= CPython < 3.9.7 because of a bug 

758 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900. 

759 * urllib3 injected with pyOpenSSL-backed SSL-support. 

760 * CPython < 3.10 only when `amt` does not fit 32-bit int. 

761 """ 

762 assert self._fp 

763 c_int_max = 2**31 - 1 

764 if ( 

765 ( 

766 (amt and amt > c_int_max) 

767 or (self.length_remaining and self.length_remaining > c_int_max) 

768 ) 

769 and not util.IS_SECURETRANSPORT 

770 and (util.IS_PYOPENSSL or sys.version_info < (3, 10)) 

771 ): 

772 buffer = io.BytesIO() 

773 # Besides `max_chunk_amt` being a maximum chunk size, it 

774 # affects memory overhead of reading a response by this 

775 # method in CPython. 

776 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum 

777 # chunk size that does not lead to an overflow error, but 

778 # 256 MiB is a compromise. 

779 max_chunk_amt = 2**28 

780 while amt is None or amt != 0: 

781 if amt is not None: 

782 chunk_amt = min(amt, max_chunk_amt) 

783 amt -= chunk_amt 

784 else: 

785 chunk_amt = max_chunk_amt 

786 data = self._fp.read(chunk_amt) 

787 if not data: 

788 break 

789 buffer.write(data) 

790 del data # to reduce peak memory usage by `max_chunk_amt`. 

791 return buffer.getvalue() 

792 else: 

793 # StringIO doesn't like amt=None 

794 return self._fp.read(amt) if amt is not None else self._fp.read() 

795 

796 def _raw_read( 

797 self, 

798 amt: int | None = None, 

799 ) -> bytes: 

800 """ 

801 Reads `amt` of bytes from the socket. 

802 """ 

803 if self._fp is None: 

804 return None # type: ignore[return-value] 

805 

806 fp_closed = getattr(self._fp, "closed", False) 

807 

808 with self._error_catcher(): 

809 data = self._fp_read(amt) if not fp_closed else b"" 

810 if amt is not None and amt != 0 and not data: 

811 # Platform-specific: Buggy versions of Python. 

812 # Close the connection when no data is returned 

813 # 

814 # This is redundant to what httplib/http.client _should_ 

815 # already do. However, versions of python released before 

816 # December 15, 2012 (http://bugs.python.org/issue16298) do 

817 # not properly close the connection in all cases. There is 

818 # no harm in redundantly calling close. 

819 self._fp.close() 

820 if ( 

821 self.enforce_content_length 

822 and self.length_remaining is not None 

823 and self.length_remaining != 0 

824 ): 

825 # This is an edge case that httplib failed to cover due 

826 # to concerns of backward compatibility. We're 

827 # addressing it here to make sure IncompleteRead is 

828 # raised during streaming, so all calls with incorrect 

829 # Content-Length are caught. 

830 raise IncompleteRead(self._fp_bytes_read, self.length_remaining) 

831 

832 if data: 

833 self._fp_bytes_read += len(data) 

834 if self.length_remaining is not None: 

835 self.length_remaining -= len(data) 

836 return data 

837 

838 def read( 

839 self, 

840 amt: int | None = None, 

841 decode_content: bool | None = None, 

842 cache_content: bool = False, 

843 ) -> bytes: 

844 """ 

845 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional 

846 parameters: ``decode_content`` and ``cache_content``. 

847 

848 :param amt: 

849 How much of the content to read. If specified, caching is skipped 

850 because it doesn't make sense to cache partial content as the full 

851 response. 

852 

853 :param decode_content: 

854 If True, will attempt to decode the body based on the 

855 'content-encoding' header. 

856 

857 :param cache_content: 

858 If True, will save the returned data such that the same result is 

859 returned despite of the state of the underlying file object. This 

860 is useful if you want the ``.data`` property to continue working 

861 after having ``.read()`` the file object. (Overridden if ``amt`` is 

862 set.) 

863 """ 

864 self._init_decoder() 

865 if decode_content is None: 

866 decode_content = self.decode_content 

867 

868 if amt is not None: 

869 cache_content = False 

870 

871 if len(self._decoded_buffer) >= amt: 

872 return self._decoded_buffer.get(amt) 

873 

874 data = self._raw_read(amt) 

875 

876 flush_decoder = False 

877 if amt is None: 

878 flush_decoder = True 

879 elif amt != 0 and not data: 

880 flush_decoder = True 

881 

882 if not data and len(self._decoded_buffer) == 0: 

883 return data 

884 

885 if amt is None: 

886 data = self._decode(data, decode_content, flush_decoder) 

887 if cache_content: 

888 self._body = data 

889 else: 

890 # do not waste memory on buffer when not decoding 

891 if not decode_content: 

892 if self._has_decoded_content: 

893 raise RuntimeError( 

894 "Calling read(decode_content=False) is not supported after " 

895 "read(decode_content=True) was called." 

896 ) 

897 return data 

898 

899 decoded_data = self._decode(data, decode_content, flush_decoder) 

900 self._decoded_buffer.put(decoded_data) 

901 

902 while len(self._decoded_buffer) < amt and data: 

903 # TODO make sure to initially read enough data to get past the headers 

904 # For example, the GZ file header takes 10 bytes, we don't want to read 

905 # it one byte at a time 

906 data = self._raw_read(amt) 

907 decoded_data = self._decode(data, decode_content, flush_decoder) 

908 self._decoded_buffer.put(decoded_data) 

909 data = self._decoded_buffer.get(amt) 

910 

911 return data 

912 

913 def stream( 

914 self, amt: int | None = 2**16, decode_content: bool | None = None 

915 ) -> typing.Generator[bytes, None, None]: 

916 """ 

917 A generator wrapper for the read() method. A call will block until 

918 ``amt`` bytes have been read from the connection or until the 

919 connection is closed. 

920 

921 :param amt: 

922 How much of the content to read. The generator will return up to 

923 much data per iteration, but may return less. This is particularly 

924 likely when using compressed data. However, the empty string will 

925 never be returned. 

926 

927 :param decode_content: 

928 If True, will attempt to decode the body based on the 

929 'content-encoding' header. 

930 """ 

931 if self.chunked and self.supports_chunked_reads(): 

932 yield from self.read_chunked(amt, decode_content=decode_content) 

933 else: 

934 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0: 

935 data = self.read(amt=amt, decode_content=decode_content) 

936 

937 if data: 

938 yield data 

939 

940 # Overrides from io.IOBase 

941 def readable(self) -> bool: 

942 return True 

943 

944 def close(self) -> None: 

945 if not self.closed and self._fp: 

946 self._fp.close() 

947 

948 if self._connection: 

949 self._connection.close() 

950 

951 if not self.auto_close: 

952 io.IOBase.close(self) 

953 

954 @property 

955 def closed(self) -> bool: 

956 if not self.auto_close: 

957 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return] 

958 elif self._fp is None: 

959 return True 

960 elif hasattr(self._fp, "isclosed"): 

961 return self._fp.isclosed() 

962 elif hasattr(self._fp, "closed"): 

963 return self._fp.closed 

964 else: 

965 return True 

966 

967 def fileno(self) -> int: 

968 if self._fp is None: 

969 raise OSError("HTTPResponse has no file to get a fileno from") 

970 elif hasattr(self._fp, "fileno"): 

971 return self._fp.fileno() 

972 else: 

973 raise OSError( 

974 "The file-like object this HTTPResponse is wrapped " 

975 "around has no file descriptor" 

976 ) 

977 

978 def flush(self) -> None: 

979 if ( 

980 self._fp is not None 

981 and hasattr(self._fp, "flush") 

982 and not getattr(self._fp, "closed", False) 

983 ): 

984 return self._fp.flush() 

985 

986 def supports_chunked_reads(self) -> bool: 

987 """ 

988 Checks if the underlying file-like object looks like a 

989 :class:`http.client.HTTPResponse` object. We do this by testing for 

990 the fp attribute. If it is present we assume it returns raw chunks as 

991 processed by read_chunked(). 

992 """ 

993 return hasattr(self._fp, "fp") 

994 

995 def _update_chunk_length(self) -> None: 

996 # First, we'll figure out length of a chunk and then 

997 # we'll try to read it from socket. 

998 if self.chunk_left is not None: 

999 return None 

1000 line = self._fp.fp.readline() # type: ignore[union-attr] 

1001 line = line.split(b";", 1)[0] 

1002 try: 

1003 self.chunk_left = int(line, 16) 

1004 except ValueError: 

1005 # Invalid chunked protocol response, abort. 

1006 self.close() 

1007 raise InvalidChunkLength(self, line) from None 

1008 

1009 def _handle_chunk(self, amt: int | None) -> bytes: 

1010 returned_chunk = None 

1011 if amt is None: 

1012 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1013 returned_chunk = chunk 

1014 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1015 self.chunk_left = None 

1016 elif self.chunk_left is not None and amt < self.chunk_left: 

1017 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1018 self.chunk_left = self.chunk_left - amt 

1019 returned_chunk = value 

1020 elif amt == self.chunk_left: 

1021 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1022 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1023 self.chunk_left = None 

1024 returned_chunk = value 

1025 else: # amt > self.chunk_left 

1026 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1027 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1028 self.chunk_left = None 

1029 return returned_chunk # type: ignore[no-any-return] 

1030 

1031 def read_chunked( 

1032 self, amt: int | None = None, decode_content: bool | None = None 

1033 ) -> typing.Generator[bytes, None, None]: 

1034 """ 

1035 Similar to :meth:`HTTPResponse.read`, but with an additional 

1036 parameter: ``decode_content``. 

1037 

1038 :param amt: 

1039 How much of the content to read. If specified, caching is skipped 

1040 because it doesn't make sense to cache partial content as the full 

1041 response. 

1042 

1043 :param decode_content: 

1044 If True, will attempt to decode the body based on the 

1045 'content-encoding' header. 

1046 """ 

1047 self._init_decoder() 

1048 # FIXME: Rewrite this method and make it a class with a better structured logic. 

1049 if not self.chunked: 

1050 raise ResponseNotChunked( 

1051 "Response is not chunked. " 

1052 "Header 'transfer-encoding: chunked' is missing." 

1053 ) 

1054 if not self.supports_chunked_reads(): 

1055 raise BodyNotHttplibCompatible( 

1056 "Body should be http.client.HTTPResponse like. " 

1057 "It should have have an fp attribute which returns raw chunks." 

1058 ) 

1059 

1060 with self._error_catcher(): 

1061 # Don't bother reading the body of a HEAD request. 

1062 if self._original_response and is_response_to_head(self._original_response): 

1063 self._original_response.close() 

1064 return None 

1065 

1066 # If a response is already read and closed 

1067 # then return immediately. 

1068 if self._fp.fp is None: # type: ignore[union-attr] 

1069 return None 

1070 

1071 while True: 

1072 self._update_chunk_length() 

1073 if self.chunk_left == 0: 

1074 break 

1075 chunk = self._handle_chunk(amt) 

1076 decoded = self._decode( 

1077 chunk, decode_content=decode_content, flush_decoder=False 

1078 ) 

1079 if decoded: 

1080 yield decoded 

1081 

1082 if decode_content: 

1083 # On CPython and PyPy, we should never need to flush the 

1084 # decoder. However, on Jython we *might* need to, so 

1085 # lets defensively do it anyway. 

1086 decoded = self._flush_decoder() 

1087 if decoded: # Platform-specific: Jython. 

1088 yield decoded 

1089 

1090 # Chunk content ends with \r\n: discard it. 

1091 while self._fp is not None: 

1092 line = self._fp.fp.readline() 

1093 if not line: 

1094 # Some sites may not end with '\r\n'. 

1095 break 

1096 if line == b"\r\n": 

1097 break 

1098 

1099 # We read everything; close the "file". 

1100 if self._original_response: 

1101 self._original_response.close() 

1102 

1103 @property 

1104 def url(self) -> str | None: 

1105 """ 

1106 Returns the URL that was the source of this response. 

1107 If the request that generated this response redirected, this method 

1108 will return the final redirect location. 

1109 """ 

1110 return self._request_url 

1111 

1112 @url.setter 

1113 def url(self, url: str) -> None: 

1114 self._request_url = url 

1115 

1116 def __iter__(self) -> typing.Iterator[bytes]: 

1117 buffer: list[bytes] = [] 

1118 for chunk in self.stream(decode_content=True): 

1119 if b"\n" in chunk: 

1120 chunks = chunk.split(b"\n") 

1121 yield b"".join(buffer) + chunks[0] + b"\n" 

1122 for x in chunks[1:-1]: 

1123 yield x + b"\n" 

1124 if chunks[-1]: 

1125 buffer = [chunks[-1]] 

1126 else: 

1127 buffer = [] 

1128 else: 

1129 buffer.append(chunk) 

1130 if buffer: 

1131 yield b"".join(buffer)