Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 23%

560 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 06:45 +0000

1from __future__ import annotations 

2 

3import collections 

4import io 

5import json as _json 

6import logging 

7import re 

8import sys 

9import typing 

10import warnings 

11import zlib 

12from contextlib import contextmanager 

13from http.client import HTTPMessage as _HttplibHTTPMessage 

14from http.client import HTTPResponse as _HttplibHTTPResponse 

15from socket import timeout as SocketTimeout 

16 

17try: 

18 try: 

19 import brotlicffi as brotli # type: ignore[import] 

20 except ImportError: 

21 import brotli # type: ignore[import] 

22except ImportError: 

23 brotli = None 

24 

25try: 

26 import zstandard as zstd # type: ignore[import] 

27 

28 # The package 'zstandard' added the 'eof' property starting 

29 # in v0.18.0 which we require to ensure a complete and 

30 # valid zstd stream was fed into the ZstdDecoder. 

31 # See: https://github.com/urllib3/urllib3/pull/2624 

32 _zstd_version = _zstd_version = tuple( 

33 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr] 

34 ) 

35 if _zstd_version < (0, 18): # Defensive: 

36 zstd = None 

37 

38except (AttributeError, ImportError, ValueError): # Defensive: 

39 zstd = None 

40 

41from . import util 

42from ._base_connection import _TYPE_BODY 

43from ._collections import HTTPHeaderDict 

44from .connection import BaseSSLError, HTTPConnection, HTTPException 

45from .exceptions import ( 

46 BodyNotHttplibCompatible, 

47 DecodeError, 

48 HTTPError, 

49 IncompleteRead, 

50 InvalidChunkLength, 

51 InvalidHeader, 

52 ProtocolError, 

53 ReadTimeoutError, 

54 ResponseNotChunked, 

55 SSLError, 

56) 

57from .util.response import is_fp_closed, is_response_to_head 

58from .util.retry import Retry 

59 

60if typing.TYPE_CHECKING: 

61 from typing import Literal 

62 

63 from .connectionpool import HTTPConnectionPool 

64 

65log = logging.getLogger(__name__) 

66 

67 

68class ContentDecoder: 

69 def decompress(self, data: bytes) -> bytes: 

70 raise NotImplementedError() 

71 

72 def flush(self) -> bytes: 

73 raise NotImplementedError() 

74 

75 

76class DeflateDecoder(ContentDecoder): 

77 def __init__(self) -> None: 

78 self._first_try = True 

79 self._data = b"" 

80 self._obj = zlib.decompressobj() 

81 

82 def decompress(self, data: bytes) -> bytes: 

83 if not data: 

84 return data 

85 

86 if not self._first_try: 

87 return self._obj.decompress(data) 

88 

89 self._data += data 

90 try: 

91 decompressed = self._obj.decompress(data) 

92 if decompressed: 

93 self._first_try = False 

94 self._data = None # type: ignore[assignment] 

95 return decompressed 

96 except zlib.error: 

97 self._first_try = False 

98 self._obj = zlib.decompressobj(-zlib.MAX_WBITS) 

99 try: 

100 return self.decompress(self._data) 

101 finally: 

102 self._data = None # type: ignore[assignment] 

103 

104 def flush(self) -> bytes: 

105 return self._obj.flush() 

106 

107 

108class GzipDecoderState: 

109 FIRST_MEMBER = 0 

110 OTHER_MEMBERS = 1 

111 SWALLOW_DATA = 2 

112 

113 

114class GzipDecoder(ContentDecoder): 

115 def __init__(self) -> None: 

116 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

117 self._state = GzipDecoderState.FIRST_MEMBER 

118 

119 def decompress(self, data: bytes) -> bytes: 

120 ret = bytearray() 

121 if self._state == GzipDecoderState.SWALLOW_DATA or not data: 

122 return bytes(ret) 

123 while True: 

124 try: 

125 ret += self._obj.decompress(data) 

126 except zlib.error: 

127 previous_state = self._state 

128 # Ignore data after the first error 

129 self._state = GzipDecoderState.SWALLOW_DATA 

130 if previous_state == GzipDecoderState.OTHER_MEMBERS: 

131 # Allow trailing garbage acceptable in other gzip clients 

132 return bytes(ret) 

133 raise 

134 data = self._obj.unused_data 

135 if not data: 

136 return bytes(ret) 

137 self._state = GzipDecoderState.OTHER_MEMBERS 

138 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

139 

140 def flush(self) -> bytes: 

141 return self._obj.flush() 

142 

143 

144if brotli is not None: 

145 

146 class BrotliDecoder(ContentDecoder): 

147 # Supports both 'brotlipy' and 'Brotli' packages 

148 # since they share an import name. The top branches 

149 # are for 'brotlipy' and bottom branches for 'Brotli' 

150 def __init__(self) -> None: 

151 self._obj = brotli.Decompressor() 

152 if hasattr(self._obj, "decompress"): 

153 setattr(self, "decompress", self._obj.decompress) 

154 else: 

155 setattr(self, "decompress", self._obj.process) 

156 

157 def flush(self) -> bytes: 

158 if hasattr(self._obj, "flush"): 

159 return self._obj.flush() # type: ignore[no-any-return] 

160 return b"" 

161 

162 

163if zstd is not None: 

164 

165 class ZstdDecoder(ContentDecoder): 

166 def __init__(self) -> None: 

167 self._obj = zstd.ZstdDecompressor().decompressobj() 

168 

169 def decompress(self, data: bytes) -> bytes: 

170 if not data: 

171 return b"" 

172 data_parts = [self._obj.decompress(data)] 

173 while self._obj.eof and self._obj.unused_data: 

174 unused_data = self._obj.unused_data 

175 self._obj = zstd.ZstdDecompressor().decompressobj() 

176 data_parts.append(self._obj.decompress(unused_data)) 

177 return b"".join(data_parts) 

178 

179 def flush(self) -> bytes: 

180 ret = self._obj.flush() # note: this is a no-op 

181 if not self._obj.eof: 

182 raise DecodeError("Zstandard data is incomplete") 

183 return ret # type: ignore[no-any-return] 

184 

185 

186class MultiDecoder(ContentDecoder): 

187 """ 

188 From RFC7231: 

189 If one or more encodings have been applied to a representation, the 

190 sender that applied the encodings MUST generate a Content-Encoding 

191 header field that lists the content codings in the order in which 

192 they were applied. 

193 """ 

194 

195 def __init__(self, modes: str) -> None: 

196 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")] 

197 

198 def flush(self) -> bytes: 

199 return self._decoders[0].flush() 

200 

201 def decompress(self, data: bytes) -> bytes: 

202 for d in reversed(self._decoders): 

203 data = d.decompress(data) 

204 return data 

205 

206 

207def _get_decoder(mode: str) -> ContentDecoder: 

208 if "," in mode: 

209 return MultiDecoder(mode) 

210 

211 # According to RFC 9110 section 8.4.1.3, recipients should 

212 # consider x-gzip equivalent to gzip 

213 if mode in ("gzip", "x-gzip"): 

214 return GzipDecoder() 

215 

216 if brotli is not None and mode == "br": 

217 return BrotliDecoder() 

218 

219 if zstd is not None and mode == "zstd": 

220 return ZstdDecoder() 

221 

222 return DeflateDecoder() 

223 

224 

225class BytesQueueBuffer: 

226 """Memory-efficient bytes buffer 

227 

228 To return decoded data in read() and still follow the BufferedIOBase API, we need a 

229 buffer to always return the correct amount of bytes. 

230 

231 This buffer should be filled using calls to put() 

232 

233 Our maximum memory usage is determined by the sum of the size of: 

234 

235 * self.buffer, which contains the full data 

236 * the largest chunk that we will copy in get() 

237 

238 The worst case scenario is a single chunk, in which case we'll make a full copy of 

239 the data inside get(). 

240 """ 

241 

242 def __init__(self) -> None: 

243 self.buffer: typing.Deque[bytes] = collections.deque() 

244 self._size: int = 0 

245 

246 def __len__(self) -> int: 

247 return self._size 

248 

249 def put(self, data: bytes) -> None: 

250 self.buffer.append(data) 

251 self._size += len(data) 

252 

253 def get(self, n: int) -> bytes: 

254 if n == 0: 

255 return b"" 

256 elif not self.buffer: 

257 raise RuntimeError("buffer is empty") 

258 elif n < 0: 

259 raise ValueError("n should be > 0") 

260 

261 fetched = 0 

262 ret = io.BytesIO() 

263 while fetched < n: 

264 remaining = n - fetched 

265 chunk = self.buffer.popleft() 

266 chunk_length = len(chunk) 

267 if remaining < chunk_length: 

268 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:] 

269 ret.write(left_chunk) 

270 self.buffer.appendleft(right_chunk) 

271 self._size -= remaining 

272 break 

273 else: 

274 ret.write(chunk) 

275 self._size -= chunk_length 

276 fetched += chunk_length 

277 

278 if not self.buffer: 

279 break 

280 

281 return ret.getvalue() 

282 

283 

284class BaseHTTPResponse(io.IOBase): 

285 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"] 

286 if brotli is not None: 

287 CONTENT_DECODERS += ["br"] 

288 if zstd is not None: 

289 CONTENT_DECODERS += ["zstd"] 

290 REDIRECT_STATUSES = [301, 302, 303, 307, 308] 

291 

292 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error) 

293 if brotli is not None: 

294 DECODER_ERROR_CLASSES += (brotli.error,) 

295 

296 if zstd is not None: 

297 DECODER_ERROR_CLASSES += (zstd.ZstdError,) 

298 

299 def __init__( 

300 self, 

301 *, 

302 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

303 status: int, 

304 version: int, 

305 reason: str | None, 

306 decode_content: bool, 

307 request_url: str | None, 

308 retries: Retry | None = None, 

309 ) -> None: 

310 if isinstance(headers, HTTPHeaderDict): 

311 self.headers = headers 

312 else: 

313 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type] 

314 self.status = status 

315 self.version = version 

316 self.reason = reason 

317 self.decode_content = decode_content 

318 self._has_decoded_content = False 

319 self._request_url: str | None = request_url 

320 self.retries = retries 

321 

322 self.chunked = False 

323 tr_enc = self.headers.get("transfer-encoding", "").lower() 

324 # Don't incur the penalty of creating a list and then discarding it 

325 encodings = (enc.strip() for enc in tr_enc.split(",")) 

326 if "chunked" in encodings: 

327 self.chunked = True 

328 

329 self._decoder: ContentDecoder | None = None 

330 

331 def get_redirect_location(self) -> str | None | Literal[False]: 

332 """ 

333 Should we redirect and where to? 

334 

335 :returns: Truthy redirect location string if we got a redirect status 

336 code and valid location. ``None`` if redirect status and no 

337 location. ``False`` if not a redirect status code. 

338 """ 

339 if self.status in self.REDIRECT_STATUSES: 

340 return self.headers.get("location") 

341 return False 

342 

343 @property 

344 def data(self) -> bytes: 

345 raise NotImplementedError() 

346 

347 def json(self) -> typing.Any: 

348 """ 

349 Parses the body of the HTTP response as JSON. 

350 

351 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to the decoder. 

352 

353 This method can raise either `UnicodeDecodeError` or `json.JSONDecodeError`. 

354 

355 Read more :ref:`here <json>`. 

356 """ 

357 data = self.data.decode("utf-8") 

358 return _json.loads(data) 

359 

360 @property 

361 def url(self) -> str | None: 

362 raise NotImplementedError() 

363 

364 @url.setter 

365 def url(self, url: str | None) -> None: 

366 raise NotImplementedError() 

367 

368 @property 

369 def connection(self) -> HTTPConnection | None: 

370 raise NotImplementedError() 

371 

372 @property 

373 def retries(self) -> Retry | None: 

374 return self._retries 

375 

376 @retries.setter 

377 def retries(self, retries: Retry | None) -> None: 

378 # Override the request_url if retries has a redirect location. 

379 if retries is not None and retries.history: 

380 self.url = retries.history[-1].redirect_location 

381 self._retries = retries 

382 

383 def stream( 

384 self, amt: int | None = 2**16, decode_content: bool | None = None 

385 ) -> typing.Iterator[bytes]: 

386 raise NotImplementedError() 

387 

388 def read( 

389 self, 

390 amt: int | None = None, 

391 decode_content: bool | None = None, 

392 cache_content: bool = False, 

393 ) -> bytes: 

394 raise NotImplementedError() 

395 

396 def read_chunked( 

397 self, 

398 amt: int | None = None, 

399 decode_content: bool | None = None, 

400 ) -> typing.Iterator[bytes]: 

401 raise NotImplementedError() 

402 

403 def release_conn(self) -> None: 

404 raise NotImplementedError() 

405 

406 def drain_conn(self) -> None: 

407 raise NotImplementedError() 

408 

409 def close(self) -> None: 

410 raise NotImplementedError() 

411 

412 def _init_decoder(self) -> None: 

413 """ 

414 Set-up the _decoder attribute if necessary. 

415 """ 

416 # Note: content-encoding value should be case-insensitive, per RFC 7230 

417 # Section 3.2 

418 content_encoding = self.headers.get("content-encoding", "").lower() 

419 if self._decoder is None: 

420 if content_encoding in self.CONTENT_DECODERS: 

421 self._decoder = _get_decoder(content_encoding) 

422 elif "," in content_encoding: 

423 encodings = [ 

424 e.strip() 

425 for e in content_encoding.split(",") 

426 if e.strip() in self.CONTENT_DECODERS 

427 ] 

428 if encodings: 

429 self._decoder = _get_decoder(content_encoding) 

430 

431 def _decode( 

432 self, data: bytes, decode_content: bool | None, flush_decoder: bool 

433 ) -> bytes: 

434 """ 

435 Decode the data passed in and potentially flush the decoder. 

436 """ 

437 if not decode_content: 

438 if self._has_decoded_content: 

439 raise RuntimeError( 

440 "Calling read(decode_content=False) is not supported after " 

441 "read(decode_content=True) was called." 

442 ) 

443 return data 

444 

445 try: 

446 if self._decoder: 

447 data = self._decoder.decompress(data) 

448 self._has_decoded_content = True 

449 except self.DECODER_ERROR_CLASSES as e: 

450 content_encoding = self.headers.get("content-encoding", "").lower() 

451 raise DecodeError( 

452 "Received response with content-encoding: %s, but " 

453 "failed to decode it." % content_encoding, 

454 e, 

455 ) from e 

456 if flush_decoder: 

457 data += self._flush_decoder() 

458 

459 return data 

460 

461 def _flush_decoder(self) -> bytes: 

462 """ 

463 Flushes the decoder. Should only be called if the decoder is actually 

464 being used. 

465 """ 

466 if self._decoder: 

467 return self._decoder.decompress(b"") + self._decoder.flush() 

468 return b"" 

469 

470 # Compatibility methods for `io` module 

471 def readinto(self, b: bytearray) -> int: 

472 temp = self.read(len(b)) 

473 if len(temp) == 0: 

474 return 0 

475 else: 

476 b[: len(temp)] = temp 

477 return len(temp) 

478 

479 # Compatibility methods for http.client.HTTPResponse 

480 def getheaders(self) -> HTTPHeaderDict: 

481 warnings.warn( 

482 "HTTPResponse.getheaders() is deprecated and will be removed " 

483 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.", 

484 category=DeprecationWarning, 

485 stacklevel=2, 

486 ) 

487 return self.headers 

488 

489 def getheader(self, name: str, default: str | None = None) -> str | None: 

490 warnings.warn( 

491 "HTTPResponse.getheader() is deprecated and will be removed " 

492 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).", 

493 category=DeprecationWarning, 

494 stacklevel=2, 

495 ) 

496 return self.headers.get(name, default) 

497 

498 # Compatibility method for http.cookiejar 

499 def info(self) -> HTTPHeaderDict: 

500 return self.headers 

501 

502 def geturl(self) -> str | None: 

503 return self.url 

504 

505 

506class HTTPResponse(BaseHTTPResponse): 

507 """ 

508 HTTP Response container. 

509 

510 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is 

511 loaded and decoded on-demand when the ``data`` property is accessed. This 

512 class is also compatible with the Python standard library's :mod:`io` 

513 module, and can hence be treated as a readable object in the context of that 

514 framework. 

515 

516 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`: 

517 

518 :param preload_content: 

519 If True, the response's body will be preloaded during construction. 

520 

521 :param decode_content: 

522 If True, will attempt to decode the body based on the 

523 'content-encoding' header. 

524 

525 :param original_response: 

526 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse` 

527 object, it's convenient to include the original for debug purposes. It's 

528 otherwise unused. 

529 

530 :param retries: 

531 The retries contains the last :class:`~urllib3.util.retry.Retry` that 

532 was used during the request. 

533 

534 :param enforce_content_length: 

535 Enforce content length checking. Body returned by server must match 

536 value of Content-Length header, if present. Otherwise, raise error. 

537 """ 

538 

539 def __init__( 

540 self, 

541 body: _TYPE_BODY = "", 

542 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

543 status: int = 0, 

544 version: int = 0, 

545 reason: str | None = None, 

546 preload_content: bool = True, 

547 decode_content: bool = True, 

548 original_response: _HttplibHTTPResponse | None = None, 

549 pool: HTTPConnectionPool | None = None, 

550 connection: HTTPConnection | None = None, 

551 msg: _HttplibHTTPMessage | None = None, 

552 retries: Retry | None = None, 

553 enforce_content_length: bool = True, 

554 request_method: str | None = None, 

555 request_url: str | None = None, 

556 auto_close: bool = True, 

557 ) -> None: 

558 super().__init__( 

559 headers=headers, 

560 status=status, 

561 version=version, 

562 reason=reason, 

563 decode_content=decode_content, 

564 request_url=request_url, 

565 retries=retries, 

566 ) 

567 

568 self.enforce_content_length = enforce_content_length 

569 self.auto_close = auto_close 

570 

571 self._body = None 

572 self._fp: _HttplibHTTPResponse | None = None 

573 self._original_response = original_response 

574 self._fp_bytes_read = 0 

575 self.msg = msg 

576 

577 if body and isinstance(body, (str, bytes)): 

578 self._body = body 

579 

580 self._pool = pool 

581 self._connection = connection 

582 

583 if hasattr(body, "read"): 

584 self._fp = body # type: ignore[assignment] 

585 

586 # Are we using the chunked-style of transfer encoding? 

587 self.chunk_left: int | None = None 

588 

589 # Determine length of response 

590 self.length_remaining = self._init_length(request_method) 

591 

592 # Used to return the correct amount of bytes for partial read()s 

593 self._decoded_buffer = BytesQueueBuffer() 

594 

595 # If requested, preload the body. 

596 if preload_content and not self._body: 

597 self._body = self.read(decode_content=decode_content) 

598 

599 def release_conn(self) -> None: 

600 if not self._pool or not self._connection: 

601 return None 

602 

603 self._pool._put_conn(self._connection) 

604 self._connection = None 

605 

606 def drain_conn(self) -> None: 

607 """ 

608 Read and discard any remaining HTTP response data in the response connection. 

609 

610 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool. 

611 """ 

612 try: 

613 self.read() 

614 except (HTTPError, OSError, BaseSSLError, HTTPException): 

615 pass 

616 

617 @property 

618 def data(self) -> bytes: 

619 # For backwards-compat with earlier urllib3 0.4 and earlier. 

620 if self._body: 

621 return self._body # type: ignore[return-value] 

622 

623 if self._fp: 

624 return self.read(cache_content=True) 

625 

626 return None # type: ignore[return-value] 

627 

628 @property 

629 def connection(self) -> HTTPConnection | None: 

630 return self._connection 

631 

632 def isclosed(self) -> bool: 

633 return is_fp_closed(self._fp) 

634 

635 def tell(self) -> int: 

636 """ 

637 Obtain the number of bytes pulled over the wire so far. May differ from 

638 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read`` 

639 if bytes are encoded on the wire (e.g, compressed). 

640 """ 

641 return self._fp_bytes_read 

642 

643 def _init_length(self, request_method: str | None) -> int | None: 

644 """ 

645 Set initial length value for Response content if available. 

646 """ 

647 length: int | None 

648 content_length: str | None = self.headers.get("content-length") 

649 

650 if content_length is not None: 

651 if self.chunked: 

652 # This Response will fail with an IncompleteRead if it can't be 

653 # received as chunked. This method falls back to attempt reading 

654 # the response before raising an exception. 

655 log.warning( 

656 "Received response with both Content-Length and " 

657 "Transfer-Encoding set. This is expressly forbidden " 

658 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and " 

659 "attempting to process response as Transfer-Encoding: " 

660 "chunked." 

661 ) 

662 return None 

663 

664 try: 

665 # RFC 7230 section 3.3.2 specifies multiple content lengths can 

666 # be sent in a single Content-Length header 

667 # (e.g. Content-Length: 42, 42). This line ensures the values 

668 # are all valid ints and that as long as the `set` length is 1, 

669 # all values are the same. Otherwise, the header is invalid. 

670 lengths = {int(val) for val in content_length.split(",")} 

671 if len(lengths) > 1: 

672 raise InvalidHeader( 

673 "Content-Length contained multiple " 

674 "unmatching values (%s)" % content_length 

675 ) 

676 length = lengths.pop() 

677 except ValueError: 

678 length = None 

679 else: 

680 if length < 0: 

681 length = None 

682 

683 else: # if content_length is None 

684 length = None 

685 

686 # Convert status to int for comparison 

687 # In some cases, httplib returns a status of "_UNKNOWN" 

688 try: 

689 status = int(self.status) 

690 except ValueError: 

691 status = 0 

692 

693 # Check for responses that shouldn't include a body 

694 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD": 

695 length = 0 

696 

697 return length 

698 

699 @contextmanager 

700 def _error_catcher(self) -> typing.Generator[None, None, None]: 

701 """ 

702 Catch low-level python exceptions, instead re-raising urllib3 

703 variants, so that low-level exceptions are not leaked in the 

704 high-level api. 

705 

706 On exit, release the connection back to the pool. 

707 """ 

708 clean_exit = False 

709 

710 try: 

711 try: 

712 yield 

713 

714 except SocketTimeout as e: 

715 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but 

716 # there is yet no clean way to get at it from this context. 

717 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

718 

719 except BaseSSLError as e: 

720 # FIXME: Is there a better way to differentiate between SSLErrors? 

721 if "read operation timed out" not in str(e): 

722 # SSL errors related to framing/MAC get wrapped and reraised here 

723 raise SSLError(e) from e 

724 

725 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

726 

727 except (HTTPException, OSError) as e: 

728 # This includes IncompleteRead. 

729 raise ProtocolError(f"Connection broken: {e!r}", e) from e 

730 

731 # If no exception is thrown, we should avoid cleaning up 

732 # unnecessarily. 

733 clean_exit = True 

734 finally: 

735 # If we didn't terminate cleanly, we need to throw away our 

736 # connection. 

737 if not clean_exit: 

738 # The response may not be closed but we're not going to use it 

739 # anymore so close it now to ensure that the connection is 

740 # released back to the pool. 

741 if self._original_response: 

742 self._original_response.close() 

743 

744 # Closing the response may not actually be sufficient to close 

745 # everything, so if we have a hold of the connection close that 

746 # too. 

747 if self._connection: 

748 self._connection.close() 

749 

750 # If we hold the original response but it's closed now, we should 

751 # return the connection back to the pool. 

752 if self._original_response and self._original_response.isclosed(): 

753 self.release_conn() 

754 

755 def _fp_read(self, amt: int | None = None) -> bytes: 

756 """ 

757 Read a response with the thought that reading the number of bytes 

758 larger than can fit in a 32-bit int at a time via SSL in some 

759 known cases leads to an overflow error that has to be prevented 

760 if `amt` or `self.length_remaining` indicate that a problem may 

761 happen. 

762 

763 The known cases: 

764 * 3.8 <= CPython < 3.9.7 because of a bug 

765 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900. 

766 * urllib3 injected with pyOpenSSL-backed SSL-support. 

767 * CPython < 3.10 only when `amt` does not fit 32-bit int. 

768 """ 

769 assert self._fp 

770 c_int_max = 2**31 - 1 

771 if ( 

772 (amt and amt > c_int_max) 

773 or (self.length_remaining and self.length_remaining > c_int_max) 

774 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)): 

775 buffer = io.BytesIO() 

776 # Besides `max_chunk_amt` being a maximum chunk size, it 

777 # affects memory overhead of reading a response by this 

778 # method in CPython. 

779 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum 

780 # chunk size that does not lead to an overflow error, but 

781 # 256 MiB is a compromise. 

782 max_chunk_amt = 2**28 

783 while amt is None or amt != 0: 

784 if amt is not None: 

785 chunk_amt = min(amt, max_chunk_amt) 

786 amt -= chunk_amt 

787 else: 

788 chunk_amt = max_chunk_amt 

789 data = self._fp.read(chunk_amt) 

790 if not data: 

791 break 

792 buffer.write(data) 

793 del data # to reduce peak memory usage by `max_chunk_amt`. 

794 return buffer.getvalue() 

795 else: 

796 # StringIO doesn't like amt=None 

797 return self._fp.read(amt) if amt is not None else self._fp.read() 

798 

799 def _raw_read( 

800 self, 

801 amt: int | None = None, 

802 ) -> bytes: 

803 """ 

804 Reads `amt` of bytes from the socket. 

805 """ 

806 if self._fp is None: 

807 return None # type: ignore[return-value] 

808 

809 fp_closed = getattr(self._fp, "closed", False) 

810 

811 with self._error_catcher(): 

812 data = self._fp_read(amt) if not fp_closed else b"" 

813 if amt is not None and amt != 0 and not data: 

814 # Platform-specific: Buggy versions of Python. 

815 # Close the connection when no data is returned 

816 # 

817 # This is redundant to what httplib/http.client _should_ 

818 # already do. However, versions of python released before 

819 # December 15, 2012 (http://bugs.python.org/issue16298) do 

820 # not properly close the connection in all cases. There is 

821 # no harm in redundantly calling close. 

822 self._fp.close() 

823 if ( 

824 self.enforce_content_length 

825 and self.length_remaining is not None 

826 and self.length_remaining != 0 

827 ): 

828 # This is an edge case that httplib failed to cover due 

829 # to concerns of backward compatibility. We're 

830 # addressing it here to make sure IncompleteRead is 

831 # raised during streaming, so all calls with incorrect 

832 # Content-Length are caught. 

833 raise IncompleteRead(self._fp_bytes_read, self.length_remaining) 

834 

835 if data: 

836 self._fp_bytes_read += len(data) 

837 if self.length_remaining is not None: 

838 self.length_remaining -= len(data) 

839 return data 

840 

841 def read( 

842 self, 

843 amt: int | None = None, 

844 decode_content: bool | None = None, 

845 cache_content: bool = False, 

846 ) -> bytes: 

847 """ 

848 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional 

849 parameters: ``decode_content`` and ``cache_content``. 

850 

851 :param amt: 

852 How much of the content to read. If specified, caching is skipped 

853 because it doesn't make sense to cache partial content as the full 

854 response. 

855 

856 :param decode_content: 

857 If True, will attempt to decode the body based on the 

858 'content-encoding' header. 

859 

860 :param cache_content: 

861 If True, will save the returned data such that the same result is 

862 returned despite of the state of the underlying file object. This 

863 is useful if you want the ``.data`` property to continue working 

864 after having ``.read()`` the file object. (Overridden if ``amt`` is 

865 set.) 

866 """ 

867 self._init_decoder() 

868 if decode_content is None: 

869 decode_content = self.decode_content 

870 

871 if amt is not None: 

872 cache_content = False 

873 

874 if len(self._decoded_buffer) >= amt: 

875 return self._decoded_buffer.get(amt) 

876 

877 data = self._raw_read(amt) 

878 

879 flush_decoder = amt is None or (amt != 0 and not data) 

880 

881 if not data and len(self._decoded_buffer) == 0: 

882 return data 

883 

884 if amt is None: 

885 data = self._decode(data, decode_content, flush_decoder) 

886 if cache_content: 

887 self._body = data 

888 else: 

889 # do not waste memory on buffer when not decoding 

890 if not decode_content: 

891 if self._has_decoded_content: 

892 raise RuntimeError( 

893 "Calling read(decode_content=False) is not supported after " 

894 "read(decode_content=True) was called." 

895 ) 

896 return data 

897 

898 decoded_data = self._decode(data, decode_content, flush_decoder) 

899 self._decoded_buffer.put(decoded_data) 

900 

901 while len(self._decoded_buffer) < amt and data: 

902 # TODO make sure to initially read enough data to get past the headers 

903 # For example, the GZ file header takes 10 bytes, we don't want to read 

904 # it one byte at a time 

905 data = self._raw_read(amt) 

906 decoded_data = self._decode(data, decode_content, flush_decoder) 

907 self._decoded_buffer.put(decoded_data) 

908 data = self._decoded_buffer.get(amt) 

909 

910 return data 

911 

912 def stream( 

913 self, amt: int | None = 2**16, decode_content: bool | None = None 

914 ) -> typing.Generator[bytes, None, None]: 

915 """ 

916 A generator wrapper for the read() method. A call will block until 

917 ``amt`` bytes have been read from the connection or until the 

918 connection is closed. 

919 

920 :param amt: 

921 How much of the content to read. The generator will return up to 

922 much data per iteration, but may return less. This is particularly 

923 likely when using compressed data. However, the empty string will 

924 never be returned. 

925 

926 :param decode_content: 

927 If True, will attempt to decode the body based on the 

928 'content-encoding' header. 

929 """ 

930 if self.chunked and self.supports_chunked_reads(): 

931 yield from self.read_chunked(amt, decode_content=decode_content) 

932 else: 

933 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0: 

934 data = self.read(amt=amt, decode_content=decode_content) 

935 

936 if data: 

937 yield data 

938 

939 # Overrides from io.IOBase 

940 def readable(self) -> bool: 

941 return True 

942 

943 def close(self) -> None: 

944 if not self.closed and self._fp: 

945 self._fp.close() 

946 

947 if self._connection: 

948 self._connection.close() 

949 

950 if not self.auto_close: 

951 io.IOBase.close(self) 

952 

953 @property 

954 def closed(self) -> bool: 

955 if not self.auto_close: 

956 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return] 

957 elif self._fp is None: 

958 return True 

959 elif hasattr(self._fp, "isclosed"): 

960 return self._fp.isclosed() 

961 elif hasattr(self._fp, "closed"): 

962 return self._fp.closed 

963 else: 

964 return True 

965 

966 def fileno(self) -> int: 

967 if self._fp is None: 

968 raise OSError("HTTPResponse has no file to get a fileno from") 

969 elif hasattr(self._fp, "fileno"): 

970 return self._fp.fileno() 

971 else: 

972 raise OSError( 

973 "The file-like object this HTTPResponse is wrapped " 

974 "around has no file descriptor" 

975 ) 

976 

977 def flush(self) -> None: 

978 if ( 

979 self._fp is not None 

980 and hasattr(self._fp, "flush") 

981 and not getattr(self._fp, "closed", False) 

982 ): 

983 return self._fp.flush() 

984 

985 def supports_chunked_reads(self) -> bool: 

986 """ 

987 Checks if the underlying file-like object looks like a 

988 :class:`http.client.HTTPResponse` object. We do this by testing for 

989 the fp attribute. If it is present we assume it returns raw chunks as 

990 processed by read_chunked(). 

991 """ 

992 return hasattr(self._fp, "fp") 

993 

994 def _update_chunk_length(self) -> None: 

995 # First, we'll figure out length of a chunk and then 

996 # we'll try to read it from socket. 

997 if self.chunk_left is not None: 

998 return None 

999 line = self._fp.fp.readline() # type: ignore[union-attr] 

1000 line = line.split(b";", 1)[0] 

1001 try: 

1002 self.chunk_left = int(line, 16) 

1003 except ValueError: 

1004 # Invalid chunked protocol response, abort. 

1005 self.close() 

1006 raise InvalidChunkLength(self, line) from None 

1007 

1008 def _handle_chunk(self, amt: int | None) -> bytes: 

1009 returned_chunk = None 

1010 if amt is None: 

1011 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1012 returned_chunk = chunk 

1013 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1014 self.chunk_left = None 

1015 elif self.chunk_left is not None and amt < self.chunk_left: 

1016 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1017 self.chunk_left = self.chunk_left - amt 

1018 returned_chunk = value 

1019 elif amt == self.chunk_left: 

1020 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1021 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1022 self.chunk_left = None 

1023 returned_chunk = value 

1024 else: # amt > self.chunk_left 

1025 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1026 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1027 self.chunk_left = None 

1028 return returned_chunk # type: ignore[no-any-return] 

1029 

1030 def read_chunked( 

1031 self, amt: int | None = None, decode_content: bool | None = None 

1032 ) -> typing.Generator[bytes, None, None]: 

1033 """ 

1034 Similar to :meth:`HTTPResponse.read`, but with an additional 

1035 parameter: ``decode_content``. 

1036 

1037 :param amt: 

1038 How much of the content to read. If specified, caching is skipped 

1039 because it doesn't make sense to cache partial content as the full 

1040 response. 

1041 

1042 :param decode_content: 

1043 If True, will attempt to decode the body based on the 

1044 'content-encoding' header. 

1045 """ 

1046 self._init_decoder() 

1047 # FIXME: Rewrite this method and make it a class with a better structured logic. 

1048 if not self.chunked: 

1049 raise ResponseNotChunked( 

1050 "Response is not chunked. " 

1051 "Header 'transfer-encoding: chunked' is missing." 

1052 ) 

1053 if not self.supports_chunked_reads(): 

1054 raise BodyNotHttplibCompatible( 

1055 "Body should be http.client.HTTPResponse like. " 

1056 "It should have have an fp attribute which returns raw chunks." 

1057 ) 

1058 

1059 with self._error_catcher(): 

1060 # Don't bother reading the body of a HEAD request. 

1061 if self._original_response and is_response_to_head(self._original_response): 

1062 self._original_response.close() 

1063 return None 

1064 

1065 # If a response is already read and closed 

1066 # then return immediately. 

1067 if self._fp.fp is None: # type: ignore[union-attr] 

1068 return None 

1069 

1070 while True: 

1071 self._update_chunk_length() 

1072 if self.chunk_left == 0: 

1073 break 

1074 chunk = self._handle_chunk(amt) 

1075 decoded = self._decode( 

1076 chunk, decode_content=decode_content, flush_decoder=False 

1077 ) 

1078 if decoded: 

1079 yield decoded 

1080 

1081 if decode_content: 

1082 # On CPython and PyPy, we should never need to flush the 

1083 # decoder. However, on Jython we *might* need to, so 

1084 # lets defensively do it anyway. 

1085 decoded = self._flush_decoder() 

1086 if decoded: # Platform-specific: Jython. 

1087 yield decoded 

1088 

1089 # Chunk content ends with \r\n: discard it. 

1090 while self._fp is not None: 

1091 line = self._fp.fp.readline() 

1092 if not line: 

1093 # Some sites may not end with '\r\n'. 

1094 break 

1095 if line == b"\r\n": 

1096 break 

1097 

1098 # We read everything; close the "file". 

1099 if self._original_response: 

1100 self._original_response.close() 

1101 

1102 @property 

1103 def url(self) -> str | None: 

1104 """ 

1105 Returns the URL that was the source of this response. 

1106 If the request that generated this response redirected, this method 

1107 will return the final redirect location. 

1108 """ 

1109 return self._request_url 

1110 

1111 @url.setter 

1112 def url(self, url: str) -> None: 

1113 self._request_url = url 

1114 

1115 def __iter__(self) -> typing.Iterator[bytes]: 

1116 buffer: list[bytes] = [] 

1117 for chunk in self.stream(decode_content=True): 

1118 if b"\n" in chunk: 

1119 chunks = chunk.split(b"\n") 

1120 yield b"".join(buffer) + chunks[0] + b"\n" 

1121 for x in chunks[1:-1]: 

1122 yield x + b"\n" 

1123 if chunks[-1]: 

1124 buffer = [chunks[-1]] 

1125 else: 

1126 buffer = [] 

1127 else: 

1128 buffer.append(chunk) 

1129 if buffer: 

1130 yield b"".join(buffer)