Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/urllib3/response.py: 21%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

625 statements  

1from __future__ import annotations 

2 

3import collections 

4import io 

5import json as _json 

6import logging 

7import socket 

8import sys 

9import typing 

10import zlib 

11from contextlib import contextmanager 

12from http.client import HTTPMessage as _HttplibHTTPMessage 

13from http.client import HTTPResponse as _HttplibHTTPResponse 

14from socket import timeout as SocketTimeout 

15 

16if typing.TYPE_CHECKING: 

17 from ._base_connection import BaseHTTPConnection 

18 

19try: 

20 try: 

21 import brotlicffi as brotli # type: ignore[import-not-found] 

22 except ImportError: 

23 import brotli # type: ignore[import-not-found] 

24except ImportError: 

25 brotli = None 

26 

27from . import util 

28from ._base_connection import _TYPE_BODY 

29from ._collections import HTTPHeaderDict 

30from .connection import BaseSSLError, HTTPConnection, HTTPException 

31from .exceptions import ( 

32 BodyNotHttplibCompatible, 

33 DecodeError, 

34 HTTPError, 

35 IncompleteRead, 

36 InvalidChunkLength, 

37 InvalidHeader, 

38 ProtocolError, 

39 ReadTimeoutError, 

40 ResponseNotChunked, 

41 SSLError, 

42) 

43from .util.response import is_fp_closed, is_response_to_head 

44from .util.retry import Retry 

45 

46if typing.TYPE_CHECKING: 

47 from .connectionpool import HTTPConnectionPool 

48 

49log = logging.getLogger(__name__) 

50 

51 

52class ContentDecoder: 

53 def decompress(self, data: bytes) -> bytes: 

54 raise NotImplementedError() 

55 

56 def flush(self) -> bytes: 

57 raise NotImplementedError() 

58 

59 

60class DeflateDecoder(ContentDecoder): 

61 def __init__(self) -> None: 

62 self._first_try = True 

63 self._data = b"" 

64 self._obj = zlib.decompressobj() 

65 

66 def decompress(self, data: bytes) -> bytes: 

67 if not data: 

68 return data 

69 

70 if not self._first_try: 

71 return self._obj.decompress(data) 

72 

73 self._data += data 

74 try: 

75 decompressed = self._obj.decompress(data) 

76 if decompressed: 

77 self._first_try = False 

78 self._data = None # type: ignore[assignment] 

79 return decompressed 

80 except zlib.error: 

81 self._first_try = False 

82 self._obj = zlib.decompressobj(-zlib.MAX_WBITS) 

83 try: 

84 return self.decompress(self._data) 

85 finally: 

86 self._data = None # type: ignore[assignment] 

87 

88 def flush(self) -> bytes: 

89 return self._obj.flush() 

90 

91 

92class GzipDecoderState: 

93 FIRST_MEMBER = 0 

94 OTHER_MEMBERS = 1 

95 SWALLOW_DATA = 2 

96 

97 

98class GzipDecoder(ContentDecoder): 

99 def __init__(self) -> None: 

100 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

101 self._state = GzipDecoderState.FIRST_MEMBER 

102 

103 def decompress(self, data: bytes) -> bytes: 

104 ret = bytearray() 

105 if self._state == GzipDecoderState.SWALLOW_DATA or not data: 

106 return bytes(ret) 

107 while True: 

108 try: 

109 ret += self._obj.decompress(data) 

110 except zlib.error: 

111 previous_state = self._state 

112 # Ignore data after the first error 

113 self._state = GzipDecoderState.SWALLOW_DATA 

114 if previous_state == GzipDecoderState.OTHER_MEMBERS: 

115 # Allow trailing garbage acceptable in other gzip clients 

116 return bytes(ret) 

117 raise 

118 data = self._obj.unused_data 

119 if not data: 

120 return bytes(ret) 

121 self._state = GzipDecoderState.OTHER_MEMBERS 

122 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

123 

124 def flush(self) -> bytes: 

125 return self._obj.flush() 

126 

127 

128if brotli is not None: 

129 

130 class BrotliDecoder(ContentDecoder): 

131 # Supports both 'brotlipy' and 'Brotli' packages 

132 # since they share an import name. The top branches 

133 # are for 'brotlipy' and bottom branches for 'Brotli' 

134 def __init__(self) -> None: 

135 self._obj = brotli.Decompressor() 

136 if hasattr(self._obj, "decompress"): 

137 setattr(self, "decompress", self._obj.decompress) 

138 else: 

139 setattr(self, "decompress", self._obj.process) 

140 

141 def flush(self) -> bytes: 

142 if hasattr(self._obj, "flush"): 

143 return self._obj.flush() # type: ignore[no-any-return] 

144 return b"" 

145 

146 

147try: 

148 if sys.version_info >= (3, 14): 

149 from compression import zstd 

150 else: 

151 from backports import zstd 

152except ImportError: 

153 HAS_ZSTD = False 

154else: 

155 HAS_ZSTD = True 

156 

157 class ZstdDecoder(ContentDecoder): 

158 def __init__(self) -> None: 

159 self._obj = zstd.ZstdDecompressor() 

160 

161 def decompress(self, data: bytes) -> bytes: 

162 if not data: 

163 return b"" 

164 data_parts = [self._obj.decompress(data)] 

165 while self._obj.eof and self._obj.unused_data: 

166 unused_data = self._obj.unused_data 

167 self._obj = zstd.ZstdDecompressor() 

168 data_parts.append(self._obj.decompress(unused_data)) 

169 return b"".join(data_parts) 

170 

171 def flush(self) -> bytes: 

172 if not self._obj.eof: 

173 raise DecodeError("Zstandard data is incomplete") 

174 return b"" 

175 

176 

177class MultiDecoder(ContentDecoder): 

178 """ 

179 From RFC7231: 

180 If one or more encodings have been applied to a representation, the 

181 sender that applied the encodings MUST generate a Content-Encoding 

182 header field that lists the content codings in the order in which 

183 they were applied. 

184 """ 

185 

186 def __init__(self, modes: str) -> None: 

187 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")] 

188 

189 def flush(self) -> bytes: 

190 return self._decoders[0].flush() 

191 

192 def decompress(self, data: bytes) -> bytes: 

193 for d in reversed(self._decoders): 

194 data = d.decompress(data) 

195 return data 

196 

197 

198def _get_decoder(mode: str) -> ContentDecoder: 

199 if "," in mode: 

200 return MultiDecoder(mode) 

201 

202 # According to RFC 9110 section 8.4.1.3, recipients should 

203 # consider x-gzip equivalent to gzip 

204 if mode in ("gzip", "x-gzip"): 

205 return GzipDecoder() 

206 

207 if brotli is not None and mode == "br": 

208 return BrotliDecoder() 

209 

210 if HAS_ZSTD and mode == "zstd": 

211 return ZstdDecoder() 

212 

213 return DeflateDecoder() 

214 

215 

216class BytesQueueBuffer: 

217 """Memory-efficient bytes buffer 

218 

219 To return decoded data in read() and still follow the BufferedIOBase API, we need a 

220 buffer to always return the correct amount of bytes. 

221 

222 This buffer should be filled using calls to put() 

223 

224 Our maximum memory usage is determined by the sum of the size of: 

225 

226 * self.buffer, which contains the full data 

227 * the largest chunk that we will copy in get() 

228 

229 The worst case scenario is a single chunk, in which case we'll make a full copy of 

230 the data inside get(). 

231 """ 

232 

233 def __init__(self) -> None: 

234 self.buffer: typing.Deque[bytes] = collections.deque() 

235 self._size: int = 0 

236 

237 def __len__(self) -> int: 

238 return self._size 

239 

240 def put(self, data: bytes) -> None: 

241 self.buffer.append(data) 

242 self._size += len(data) 

243 

244 def get(self, n: int) -> bytes: 

245 if n == 0: 

246 return b"" 

247 elif not self.buffer: 

248 raise RuntimeError("buffer is empty") 

249 elif n < 0: 

250 raise ValueError("n should be > 0") 

251 

252 fetched = 0 

253 ret = io.BytesIO() 

254 while fetched < n: 

255 remaining = n - fetched 

256 chunk = self.buffer.popleft() 

257 chunk_length = len(chunk) 

258 if remaining < chunk_length: 

259 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:] 

260 ret.write(left_chunk) 

261 self.buffer.appendleft(right_chunk) 

262 self._size -= remaining 

263 break 

264 else: 

265 ret.write(chunk) 

266 self._size -= chunk_length 

267 fetched += chunk_length 

268 

269 if not self.buffer: 

270 break 

271 

272 return ret.getvalue() 

273 

274 def get_all(self) -> bytes: 

275 buffer = self.buffer 

276 if not buffer: 

277 assert self._size == 0 

278 return b"" 

279 if len(buffer) == 1: 

280 result = buffer.pop() 

281 else: 

282 ret = io.BytesIO() 

283 ret.writelines(buffer.popleft() for _ in range(len(buffer))) 

284 result = ret.getvalue() 

285 self._size = 0 

286 return result 

287 

288 

289class BaseHTTPResponse(io.IOBase): 

290 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"] 

291 if brotli is not None: 

292 CONTENT_DECODERS += ["br"] 

293 if HAS_ZSTD: 

294 CONTENT_DECODERS += ["zstd"] 

295 REDIRECT_STATUSES = [301, 302, 303, 307, 308] 

296 

297 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error) 

298 if brotli is not None: 

299 DECODER_ERROR_CLASSES += (brotli.error,) 

300 

301 if HAS_ZSTD: 

302 DECODER_ERROR_CLASSES += (zstd.ZstdError,) 

303 

304 def __init__( 

305 self, 

306 *, 

307 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

308 status: int, 

309 version: int, 

310 version_string: str, 

311 reason: str | None, 

312 decode_content: bool, 

313 request_url: str | None, 

314 retries: Retry | None = None, 

315 ) -> None: 

316 if isinstance(headers, HTTPHeaderDict): 

317 self.headers = headers 

318 else: 

319 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type] 

320 self.status = status 

321 self.version = version 

322 self.version_string = version_string 

323 self.reason = reason 

324 self.decode_content = decode_content 

325 self._has_decoded_content = False 

326 self._request_url: str | None = request_url 

327 self.retries = retries 

328 

329 self.chunked = False 

330 tr_enc = self.headers.get("transfer-encoding", "").lower() 

331 # Don't incur the penalty of creating a list and then discarding it 

332 encodings = (enc.strip() for enc in tr_enc.split(",")) 

333 if "chunked" in encodings: 

334 self.chunked = True 

335 

336 self._decoder: ContentDecoder | None = None 

337 self.length_remaining: int | None 

338 

339 def get_redirect_location(self) -> str | None | typing.Literal[False]: 

340 """ 

341 Should we redirect and where to? 

342 

343 :returns: Truthy redirect location string if we got a redirect status 

344 code and valid location. ``None`` if redirect status and no 

345 location. ``False`` if not a redirect status code. 

346 """ 

347 if self.status in self.REDIRECT_STATUSES: 

348 return self.headers.get("location") 

349 return False 

350 

351 @property 

352 def data(self) -> bytes: 

353 raise NotImplementedError() 

354 

355 def json(self) -> typing.Any: 

356 """ 

357 Deserializes the body of the HTTP response as a Python object. 

358 

359 The body of the HTTP response must be encoded using UTF-8, as per 

360 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_. 

361 

362 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to 

363 your custom decoder instead. 

364 

365 If the body of the HTTP response is not decodable to UTF-8, a 

366 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a 

367 valid JSON document, a `json.JSONDecodeError` will be raised. 

368 

369 Read more :ref:`here <json_content>`. 

370 

371 :returns: The body of the HTTP response as a Python object. 

372 """ 

373 data = self.data.decode("utf-8") 

374 return _json.loads(data) 

375 

376 @property 

377 def url(self) -> str | None: 

378 raise NotImplementedError() 

379 

380 @url.setter 

381 def url(self, url: str | None) -> None: 

382 raise NotImplementedError() 

383 

384 @property 

385 def connection(self) -> BaseHTTPConnection | None: 

386 raise NotImplementedError() 

387 

388 @property 

389 def retries(self) -> Retry | None: 

390 return self._retries 

391 

392 @retries.setter 

393 def retries(self, retries: Retry | None) -> None: 

394 # Override the request_url if retries has a redirect location. 

395 if retries is not None and retries.history: 

396 self.url = retries.history[-1].redirect_location 

397 self._retries = retries 

398 

399 def stream( 

400 self, amt: int | None = 2**16, decode_content: bool | None = None 

401 ) -> typing.Iterator[bytes]: 

402 raise NotImplementedError() 

403 

404 def read( 

405 self, 

406 amt: int | None = None, 

407 decode_content: bool | None = None, 

408 cache_content: bool = False, 

409 ) -> bytes: 

410 raise NotImplementedError() 

411 

412 def read1( 

413 self, 

414 amt: int | None = None, 

415 decode_content: bool | None = None, 

416 ) -> bytes: 

417 raise NotImplementedError() 

418 

419 def read_chunked( 

420 self, 

421 amt: int | None = None, 

422 decode_content: bool | None = None, 

423 ) -> typing.Iterator[bytes]: 

424 raise NotImplementedError() 

425 

426 def release_conn(self) -> None: 

427 raise NotImplementedError() 

428 

429 def drain_conn(self) -> None: 

430 raise NotImplementedError() 

431 

432 def shutdown(self) -> None: 

433 raise NotImplementedError() 

434 

435 def close(self) -> None: 

436 raise NotImplementedError() 

437 

438 def _init_decoder(self) -> None: 

439 """ 

440 Set-up the _decoder attribute if necessary. 

441 """ 

442 # Note: content-encoding value should be case-insensitive, per RFC 7230 

443 # Section 3.2 

444 content_encoding = self.headers.get("content-encoding", "").lower() 

445 if self._decoder is None: 

446 if content_encoding in self.CONTENT_DECODERS: 

447 self._decoder = _get_decoder(content_encoding) 

448 elif "," in content_encoding: 

449 encodings = [ 

450 e.strip() 

451 for e in content_encoding.split(",") 

452 if e.strip() in self.CONTENT_DECODERS 

453 ] 

454 if encodings: 

455 self._decoder = _get_decoder(content_encoding) 

456 

457 def _decode( 

458 self, data: bytes, decode_content: bool | None, flush_decoder: bool 

459 ) -> bytes: 

460 """ 

461 Decode the data passed in and potentially flush the decoder. 

462 """ 

463 if not decode_content: 

464 if self._has_decoded_content: 

465 raise RuntimeError( 

466 "Calling read(decode_content=False) is not supported after " 

467 "read(decode_content=True) was called." 

468 ) 

469 return data 

470 

471 try: 

472 if self._decoder: 

473 data = self._decoder.decompress(data) 

474 self._has_decoded_content = True 

475 except self.DECODER_ERROR_CLASSES as e: 

476 content_encoding = self.headers.get("content-encoding", "").lower() 

477 raise DecodeError( 

478 "Received response with content-encoding: %s, but " 

479 "failed to decode it." % content_encoding, 

480 e, 

481 ) from e 

482 if flush_decoder: 

483 data += self._flush_decoder() 

484 

485 return data 

486 

487 def _flush_decoder(self) -> bytes: 

488 """ 

489 Flushes the decoder. Should only be called if the decoder is actually 

490 being used. 

491 """ 

492 if self._decoder: 

493 return self._decoder.decompress(b"") + self._decoder.flush() 

494 return b"" 

495 

496 # Compatibility methods for `io` module 

497 def readinto(self, b: bytearray) -> int: 

498 temp = self.read(len(b)) 

499 if len(temp) == 0: 

500 return 0 

501 else: 

502 b[: len(temp)] = temp 

503 return len(temp) 

504 

505 # Compatibility method for http.cookiejar 

506 def info(self) -> HTTPHeaderDict: 

507 return self.headers 

508 

509 def geturl(self) -> str | None: 

510 return self.url 

511 

512 

513class HTTPResponse(BaseHTTPResponse): 

514 """ 

515 HTTP Response container. 

516 

517 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is 

518 loaded and decoded on-demand when the ``data`` property is accessed. This 

519 class is also compatible with the Python standard library's :mod:`io` 

520 module, and can hence be treated as a readable object in the context of that 

521 framework. 

522 

523 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`: 

524 

525 :param preload_content: 

526 If True, the response's body will be preloaded during construction. 

527 

528 :param decode_content: 

529 If True, will attempt to decode the body based on the 

530 'content-encoding' header. 

531 

532 :param original_response: 

533 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse` 

534 object, it's convenient to include the original for debug purposes. It's 

535 otherwise unused. 

536 

537 :param retries: 

538 The retries contains the last :class:`~urllib3.util.retry.Retry` that 

539 was used during the request. 

540 

541 :param enforce_content_length: 

542 Enforce content length checking. Body returned by server must match 

543 value of Content-Length header, if present. Otherwise, raise error. 

544 """ 

545 

546 def __init__( 

547 self, 

548 body: _TYPE_BODY = "", 

549 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

550 status: int = 0, 

551 version: int = 0, 

552 version_string: str = "HTTP/?", 

553 reason: str | None = None, 

554 preload_content: bool = True, 

555 decode_content: bool = True, 

556 original_response: _HttplibHTTPResponse | None = None, 

557 pool: HTTPConnectionPool | None = None, 

558 connection: HTTPConnection | None = None, 

559 msg: _HttplibHTTPMessage | None = None, 

560 retries: Retry | None = None, 

561 enforce_content_length: bool = True, 

562 request_method: str | None = None, 

563 request_url: str | None = None, 

564 auto_close: bool = True, 

565 sock_shutdown: typing.Callable[[int], None] | None = None, 

566 ) -> None: 

567 super().__init__( 

568 headers=headers, 

569 status=status, 

570 version=version, 

571 version_string=version_string, 

572 reason=reason, 

573 decode_content=decode_content, 

574 request_url=request_url, 

575 retries=retries, 

576 ) 

577 

578 self.enforce_content_length = enforce_content_length 

579 self.auto_close = auto_close 

580 

581 self._body = None 

582 self._fp: _HttplibHTTPResponse | None = None 

583 self._original_response = original_response 

584 self._fp_bytes_read = 0 

585 self.msg = msg 

586 

587 if body and isinstance(body, (str, bytes)): 

588 self._body = body 

589 

590 self._pool = pool 

591 self._connection = connection 

592 

593 if hasattr(body, "read"): 

594 self._fp = body # type: ignore[assignment] 

595 self._sock_shutdown = sock_shutdown 

596 

597 # Are we using the chunked-style of transfer encoding? 

598 self.chunk_left: int | None = None 

599 

600 # Determine length of response 

601 self.length_remaining = self._init_length(request_method) 

602 

603 # Used to return the correct amount of bytes for partial read()s 

604 self._decoded_buffer = BytesQueueBuffer() 

605 

606 # If requested, preload the body. 

607 if preload_content and not self._body: 

608 self._body = self.read(decode_content=decode_content) 

609 

610 def release_conn(self) -> None: 

611 if not self._pool or not self._connection: 

612 return None 

613 

614 self._pool._put_conn(self._connection) 

615 self._connection = None 

616 

617 def drain_conn(self) -> None: 

618 """ 

619 Read and discard any remaining HTTP response data in the response connection. 

620 

621 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool. 

622 """ 

623 try: 

624 self.read() 

625 except (HTTPError, OSError, BaseSSLError, HTTPException): 

626 pass 

627 

628 @property 

629 def data(self) -> bytes: 

630 # For backwards-compat with earlier urllib3 0.4 and earlier. 

631 if self._body: 

632 return self._body # type: ignore[return-value] 

633 

634 if self._fp: 

635 return self.read(cache_content=True) 

636 

637 return None # type: ignore[return-value] 

638 

639 @property 

640 def connection(self) -> HTTPConnection | None: 

641 return self._connection 

642 

643 def isclosed(self) -> bool: 

644 return is_fp_closed(self._fp) 

645 

646 def tell(self) -> int: 

647 """ 

648 Obtain the number of bytes pulled over the wire so far. May differ from 

649 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read`` 

650 if bytes are encoded on the wire (e.g, compressed). 

651 """ 

652 return self._fp_bytes_read 

653 

654 def _init_length(self, request_method: str | None) -> int | None: 

655 """ 

656 Set initial length value for Response content if available. 

657 """ 

658 length: int | None 

659 content_length: str | None = self.headers.get("content-length") 

660 

661 if content_length is not None: 

662 if self.chunked: 

663 # This Response will fail with an IncompleteRead if it can't be 

664 # received as chunked. This method falls back to attempt reading 

665 # the response before raising an exception. 

666 log.warning( 

667 "Received response with both Content-Length and " 

668 "Transfer-Encoding set. This is expressly forbidden " 

669 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and " 

670 "attempting to process response as Transfer-Encoding: " 

671 "chunked." 

672 ) 

673 return None 

674 

675 try: 

676 # RFC 7230 section 3.3.2 specifies multiple content lengths can 

677 # be sent in a single Content-Length header 

678 # (e.g. Content-Length: 42, 42). This line ensures the values 

679 # are all valid ints and that as long as the `set` length is 1, 

680 # all values are the same. Otherwise, the header is invalid. 

681 lengths = {int(val) for val in content_length.split(",")} 

682 if len(lengths) > 1: 

683 raise InvalidHeader( 

684 "Content-Length contained multiple " 

685 "unmatching values (%s)" % content_length 

686 ) 

687 length = lengths.pop() 

688 except ValueError: 

689 length = None 

690 else: 

691 if length < 0: 

692 length = None 

693 

694 else: # if content_length is None 

695 length = None 

696 

697 # Convert status to int for comparison 

698 # In some cases, httplib returns a status of "_UNKNOWN" 

699 try: 

700 status = int(self.status) 

701 except ValueError: 

702 status = 0 

703 

704 # Check for responses that shouldn't include a body 

705 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD": 

706 length = 0 

707 

708 return length 

709 

710 @contextmanager 

711 def _error_catcher(self) -> typing.Generator[None]: 

712 """ 

713 Catch low-level python exceptions, instead re-raising urllib3 

714 variants, so that low-level exceptions are not leaked in the 

715 high-level api. 

716 

717 On exit, release the connection back to the pool. 

718 """ 

719 clean_exit = False 

720 

721 try: 

722 try: 

723 yield 

724 

725 except SocketTimeout as e: 

726 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but 

727 # there is yet no clean way to get at it from this context. 

728 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

729 

730 except BaseSSLError as e: 

731 # FIXME: Is there a better way to differentiate between SSLErrors? 

732 if "read operation timed out" not in str(e): 

733 # SSL errors related to framing/MAC get wrapped and reraised here 

734 raise SSLError(e) from e 

735 

736 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

737 

738 except IncompleteRead as e: 

739 if ( 

740 e.expected is not None 

741 and e.partial is not None 

742 and e.expected == -e.partial 

743 ): 

744 arg = "Response may not contain content." 

745 else: 

746 arg = f"Connection broken: {e!r}" 

747 raise ProtocolError(arg, e) from e 

748 

749 except (HTTPException, OSError) as e: 

750 raise ProtocolError(f"Connection broken: {e!r}", e) from e 

751 

752 # If no exception is thrown, we should avoid cleaning up 

753 # unnecessarily. 

754 clean_exit = True 

755 finally: 

756 # If we didn't terminate cleanly, we need to throw away our 

757 # connection. 

758 if not clean_exit: 

759 # The response may not be closed but we're not going to use it 

760 # anymore so close it now to ensure that the connection is 

761 # released back to the pool. 

762 if self._original_response: 

763 self._original_response.close() 

764 

765 # Closing the response may not actually be sufficient to close 

766 # everything, so if we have a hold of the connection close that 

767 # too. 

768 if self._connection: 

769 self._connection.close() 

770 

771 # If we hold the original response but it's closed now, we should 

772 # return the connection back to the pool. 

773 if self._original_response and self._original_response.isclosed(): 

774 self.release_conn() 

775 

776 def _fp_read( 

777 self, 

778 amt: int | None = None, 

779 *, 

780 read1: bool = False, 

781 ) -> bytes: 

782 """ 

783 Read a response with the thought that reading the number of bytes 

784 larger than can fit in a 32-bit int at a time via SSL in some 

785 known cases leads to an overflow error that has to be prevented 

786 if `amt` or `self.length_remaining` indicate that a problem may 

787 happen. 

788 

789 The known cases: 

790 * CPython < 3.9.7 because of a bug 

791 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900. 

792 * urllib3 injected with pyOpenSSL-backed SSL-support. 

793 * CPython < 3.10 only when `amt` does not fit 32-bit int. 

794 """ 

795 assert self._fp 

796 c_int_max = 2**31 - 1 

797 if ( 

798 (amt and amt > c_int_max) 

799 or ( 

800 amt is None 

801 and self.length_remaining 

802 and self.length_remaining > c_int_max 

803 ) 

804 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)): 

805 if read1: 

806 return self._fp.read1(c_int_max) 

807 buffer = io.BytesIO() 

808 # Besides `max_chunk_amt` being a maximum chunk size, it 

809 # affects memory overhead of reading a response by this 

810 # method in CPython. 

811 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum 

812 # chunk size that does not lead to an overflow error, but 

813 # 256 MiB is a compromise. 

814 max_chunk_amt = 2**28 

815 while amt is None or amt != 0: 

816 if amt is not None: 

817 chunk_amt = min(amt, max_chunk_amt) 

818 amt -= chunk_amt 

819 else: 

820 chunk_amt = max_chunk_amt 

821 data = self._fp.read(chunk_amt) 

822 if not data: 

823 break 

824 buffer.write(data) 

825 del data # to reduce peak memory usage by `max_chunk_amt`. 

826 return buffer.getvalue() 

827 elif read1: 

828 return self._fp.read1(amt) if amt is not None else self._fp.read1() 

829 else: 

830 # StringIO doesn't like amt=None 

831 return self._fp.read(amt) if amt is not None else self._fp.read() 

832 

833 def _raw_read( 

834 self, 

835 amt: int | None = None, 

836 *, 

837 read1: bool = False, 

838 ) -> bytes: 

839 """ 

840 Reads `amt` of bytes from the socket. 

841 """ 

842 if self._fp is None: 

843 return None # type: ignore[return-value] 

844 

845 fp_closed = getattr(self._fp, "closed", False) 

846 

847 with self._error_catcher(): 

848 data = self._fp_read(amt, read1=read1) if not fp_closed else b"" 

849 if amt is not None and amt != 0 and not data: 

850 # Platform-specific: Buggy versions of Python. 

851 # Close the connection when no data is returned 

852 # 

853 # This is redundant to what httplib/http.client _should_ 

854 # already do. However, versions of python released before 

855 # December 15, 2012 (http://bugs.python.org/issue16298) do 

856 # not properly close the connection in all cases. There is 

857 # no harm in redundantly calling close. 

858 self._fp.close() 

859 if ( 

860 self.enforce_content_length 

861 and self.length_remaining is not None 

862 and self.length_remaining != 0 

863 ): 

864 # This is an edge case that httplib failed to cover due 

865 # to concerns of backward compatibility. We're 

866 # addressing it here to make sure IncompleteRead is 

867 # raised during streaming, so all calls with incorrect 

868 # Content-Length are caught. 

869 raise IncompleteRead(self._fp_bytes_read, self.length_remaining) 

870 elif read1 and ( 

871 (amt != 0 and not data) or self.length_remaining == len(data) 

872 ): 

873 # All data has been read, but `self._fp.read1` in 

874 # CPython 3.12 and older doesn't always close 

875 # `http.client.HTTPResponse`, so we close it here. 

876 # See https://github.com/python/cpython/issues/113199 

877 self._fp.close() 

878 

879 if data: 

880 self._fp_bytes_read += len(data) 

881 if self.length_remaining is not None: 

882 self.length_remaining -= len(data) 

883 return data 

884 

885 def read( 

886 self, 

887 amt: int | None = None, 

888 decode_content: bool | None = None, 

889 cache_content: bool = False, 

890 ) -> bytes: 

891 """ 

892 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional 

893 parameters: ``decode_content`` and ``cache_content``. 

894 

895 :param amt: 

896 How much of the content to read. If specified, caching is skipped 

897 because it doesn't make sense to cache partial content as the full 

898 response. 

899 

900 :param decode_content: 

901 If True, will attempt to decode the body based on the 

902 'content-encoding' header. 

903 

904 :param cache_content: 

905 If True, will save the returned data such that the same result is 

906 returned despite of the state of the underlying file object. This 

907 is useful if you want the ``.data`` property to continue working 

908 after having ``.read()`` the file object. (Overridden if ``amt`` is 

909 set.) 

910 """ 

911 self._init_decoder() 

912 if decode_content is None: 

913 decode_content = self.decode_content 

914 

915 if amt and amt < 0: 

916 # Negative numbers and `None` should be treated the same. 

917 amt = None 

918 elif amt is not None: 

919 cache_content = False 

920 

921 if len(self._decoded_buffer) >= amt: 

922 return self._decoded_buffer.get(amt) 

923 

924 data = self._raw_read(amt) 

925 

926 flush_decoder = amt is None or (amt != 0 and not data) 

927 

928 if not data and len(self._decoded_buffer) == 0: 

929 return data 

930 

931 if amt is None: 

932 data = self._decode(data, decode_content, flush_decoder) 

933 if cache_content: 

934 self._body = data 

935 else: 

936 # do not waste memory on buffer when not decoding 

937 if not decode_content: 

938 if self._has_decoded_content: 

939 raise RuntimeError( 

940 "Calling read(decode_content=False) is not supported after " 

941 "read(decode_content=True) was called." 

942 ) 

943 return data 

944 

945 decoded_data = self._decode(data, decode_content, flush_decoder) 

946 self._decoded_buffer.put(decoded_data) 

947 

948 while len(self._decoded_buffer) < amt and data: 

949 # TODO make sure to initially read enough data to get past the headers 

950 # For example, the GZ file header takes 10 bytes, we don't want to read 

951 # it one byte at a time 

952 data = self._raw_read(amt) 

953 decoded_data = self._decode(data, decode_content, flush_decoder) 

954 self._decoded_buffer.put(decoded_data) 

955 data = self._decoded_buffer.get(amt) 

956 

957 return data 

958 

959 def read1( 

960 self, 

961 amt: int | None = None, 

962 decode_content: bool | None = None, 

963 ) -> bytes: 

964 """ 

965 Similar to ``http.client.HTTPResponse.read1`` and documented 

966 in :meth:`io.BufferedReader.read1`, but with an additional parameter: 

967 ``decode_content``. 

968 

969 :param amt: 

970 How much of the content to read. 

971 

972 :param decode_content: 

973 If True, will attempt to decode the body based on the 

974 'content-encoding' header. 

975 """ 

976 if decode_content is None: 

977 decode_content = self.decode_content 

978 if amt and amt < 0: 

979 # Negative numbers and `None` should be treated the same. 

980 amt = None 

981 # try and respond without going to the network 

982 if self._has_decoded_content: 

983 if not decode_content: 

984 raise RuntimeError( 

985 "Calling read1(decode_content=False) is not supported after " 

986 "read1(decode_content=True) was called." 

987 ) 

988 if len(self._decoded_buffer) > 0: 

989 if amt is None: 

990 return self._decoded_buffer.get_all() 

991 return self._decoded_buffer.get(amt) 

992 if amt == 0: 

993 return b"" 

994 

995 # FIXME, this method's type doesn't say returning None is possible 

996 data = self._raw_read(amt, read1=True) 

997 if not decode_content or data is None: 

998 return data 

999 

1000 self._init_decoder() 

1001 while True: 

1002 flush_decoder = not data 

1003 decoded_data = self._decode(data, decode_content, flush_decoder) 

1004 self._decoded_buffer.put(decoded_data) 

1005 if decoded_data or flush_decoder: 

1006 break 

1007 data = self._raw_read(8192, read1=True) 

1008 

1009 if amt is None: 

1010 return self._decoded_buffer.get_all() 

1011 return self._decoded_buffer.get(amt) 

1012 

1013 def stream( 

1014 self, amt: int | None = 2**16, decode_content: bool | None = None 

1015 ) -> typing.Generator[bytes]: 

1016 """ 

1017 A generator wrapper for the read() method. A call will block until 

1018 ``amt`` bytes have been read from the connection or until the 

1019 connection is closed. 

1020 

1021 :param amt: 

1022 How much of the content to read. The generator will return up to 

1023 much data per iteration, but may return less. This is particularly 

1024 likely when using compressed data. However, the empty string will 

1025 never be returned. 

1026 

1027 :param decode_content: 

1028 If True, will attempt to decode the body based on the 

1029 'content-encoding' header. 

1030 """ 

1031 if self.chunked and self.supports_chunked_reads(): 

1032 yield from self.read_chunked(amt, decode_content=decode_content) 

1033 else: 

1034 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0: 

1035 data = self.read(amt=amt, decode_content=decode_content) 

1036 

1037 if data: 

1038 yield data 

1039 

1040 # Overrides from io.IOBase 

1041 def readable(self) -> bool: 

1042 return True 

1043 

1044 def shutdown(self) -> None: 

1045 if not self._sock_shutdown: 

1046 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set") 

1047 if self._connection is None: 

1048 raise RuntimeError( 

1049 "Cannot shutdown as connection has already been released to the pool" 

1050 ) 

1051 self._sock_shutdown(socket.SHUT_RD) 

1052 

1053 def close(self) -> None: 

1054 self._sock_shutdown = None 

1055 

1056 if not self.closed and self._fp: 

1057 self._fp.close() 

1058 

1059 if self._connection: 

1060 self._connection.close() 

1061 

1062 if not self.auto_close: 

1063 io.IOBase.close(self) 

1064 

1065 @property 

1066 def closed(self) -> bool: 

1067 if not self.auto_close: 

1068 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return] 

1069 elif self._fp is None: 

1070 return True 

1071 elif hasattr(self._fp, "isclosed"): 

1072 return self._fp.isclosed() 

1073 elif hasattr(self._fp, "closed"): 

1074 return self._fp.closed 

1075 else: 

1076 return True 

1077 

1078 def fileno(self) -> int: 

1079 if self._fp is None: 

1080 raise OSError("HTTPResponse has no file to get a fileno from") 

1081 elif hasattr(self._fp, "fileno"): 

1082 return self._fp.fileno() 

1083 else: 

1084 raise OSError( 

1085 "The file-like object this HTTPResponse is wrapped " 

1086 "around has no file descriptor" 

1087 ) 

1088 

1089 def flush(self) -> None: 

1090 if ( 

1091 self._fp is not None 

1092 and hasattr(self._fp, "flush") 

1093 and not getattr(self._fp, "closed", False) 

1094 ): 

1095 return self._fp.flush() 

1096 

1097 def supports_chunked_reads(self) -> bool: 

1098 """ 

1099 Checks if the underlying file-like object looks like a 

1100 :class:`http.client.HTTPResponse` object. We do this by testing for 

1101 the fp attribute. If it is present we assume it returns raw chunks as 

1102 processed by read_chunked(). 

1103 """ 

1104 return hasattr(self._fp, "fp") 

1105 

1106 def _update_chunk_length(self) -> None: 

1107 # First, we'll figure out length of a chunk and then 

1108 # we'll try to read it from socket. 

1109 if self.chunk_left is not None: 

1110 return None 

1111 line = self._fp.fp.readline() # type: ignore[union-attr] 

1112 line = line.split(b";", 1)[0] 

1113 try: 

1114 self.chunk_left = int(line, 16) 

1115 except ValueError: 

1116 self.close() 

1117 if line: 

1118 # Invalid chunked protocol response, abort. 

1119 raise InvalidChunkLength(self, line) from None 

1120 else: 

1121 # Truncated at start of next chunk 

1122 raise ProtocolError("Response ended prematurely") from None 

1123 

1124 def _handle_chunk(self, amt: int | None) -> bytes: 

1125 returned_chunk = None 

1126 if amt is None: 

1127 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1128 returned_chunk = chunk 

1129 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1130 self.chunk_left = None 

1131 elif self.chunk_left is not None and amt < self.chunk_left: 

1132 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1133 self.chunk_left = self.chunk_left - amt 

1134 returned_chunk = value 

1135 elif amt == self.chunk_left: 

1136 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1137 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1138 self.chunk_left = None 

1139 returned_chunk = value 

1140 else: # amt > self.chunk_left 

1141 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1142 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1143 self.chunk_left = None 

1144 return returned_chunk # type: ignore[no-any-return] 

1145 

1146 def read_chunked( 

1147 self, amt: int | None = None, decode_content: bool | None = None 

1148 ) -> typing.Generator[bytes]: 

1149 """ 

1150 Similar to :meth:`HTTPResponse.read`, but with an additional 

1151 parameter: ``decode_content``. 

1152 

1153 :param amt: 

1154 How much of the content to read. If specified, caching is skipped 

1155 because it doesn't make sense to cache partial content as the full 

1156 response. 

1157 

1158 :param decode_content: 

1159 If True, will attempt to decode the body based on the 

1160 'content-encoding' header. 

1161 """ 

1162 self._init_decoder() 

1163 # FIXME: Rewrite this method and make it a class with a better structured logic. 

1164 if not self.chunked: 

1165 raise ResponseNotChunked( 

1166 "Response is not chunked. " 

1167 "Header 'transfer-encoding: chunked' is missing." 

1168 ) 

1169 if not self.supports_chunked_reads(): 

1170 raise BodyNotHttplibCompatible( 

1171 "Body should be http.client.HTTPResponse like. " 

1172 "It should have have an fp attribute which returns raw chunks." 

1173 ) 

1174 

1175 with self._error_catcher(): 

1176 # Don't bother reading the body of a HEAD request. 

1177 if self._original_response and is_response_to_head(self._original_response): 

1178 self._original_response.close() 

1179 return None 

1180 

1181 # If a response is already read and closed 

1182 # then return immediately. 

1183 if self._fp.fp is None: # type: ignore[union-attr] 

1184 return None 

1185 

1186 if amt and amt < 0: 

1187 # Negative numbers and `None` should be treated the same, 

1188 # but httplib handles only `None` correctly. 

1189 amt = None 

1190 

1191 while True: 

1192 self._update_chunk_length() 

1193 if self.chunk_left == 0: 

1194 break 

1195 chunk = self._handle_chunk(amt) 

1196 decoded = self._decode( 

1197 chunk, decode_content=decode_content, flush_decoder=False 

1198 ) 

1199 if decoded: 

1200 yield decoded 

1201 

1202 if decode_content: 

1203 # On CPython and PyPy, we should never need to flush the 

1204 # decoder. However, on Jython we *might* need to, so 

1205 # lets defensively do it anyway. 

1206 decoded = self._flush_decoder() 

1207 if decoded: # Platform-specific: Jython. 

1208 yield decoded 

1209 

1210 # Chunk content ends with \r\n: discard it. 

1211 while self._fp is not None: 

1212 line = self._fp.fp.readline() 

1213 if not line: 

1214 # Some sites may not end with '\r\n'. 

1215 break 

1216 if line == b"\r\n": 

1217 break 

1218 

1219 # We read everything; close the "file". 

1220 if self._original_response: 

1221 self._original_response.close() 

1222 

1223 @property 

1224 def url(self) -> str | None: 

1225 """ 

1226 Returns the URL that was the source of this response. 

1227 If the request that generated this response redirected, this method 

1228 will return the final redirect location. 

1229 """ 

1230 return self._request_url 

1231 

1232 @url.setter 

1233 def url(self, url: str | None) -> None: 

1234 self._request_url = url 

1235 

1236 def __iter__(self) -> typing.Iterator[bytes]: 

1237 buffer: list[bytes] = [] 

1238 for chunk in self.stream(decode_content=True): 

1239 if b"\n" in chunk: 

1240 chunks = chunk.split(b"\n") 

1241 yield b"".join(buffer) + chunks[0] + b"\n" 

1242 for x in chunks[1:-1]: 

1243 yield x + b"\n" 

1244 if chunks[-1]: 

1245 buffer = [chunks[-1]] 

1246 else: 

1247 buffer = [] 

1248 else: 

1249 buffer.append(chunk) 

1250 if buffer: 

1251 yield b"".join(buffer)