Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 22%

615 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-04-20 06:09 +0000

1from __future__ import annotations 

2 

3import collections 

4import io 

5import json as _json 

6import logging 

7import re 

8import sys 

9import typing 

10import warnings 

11import zlib 

12from contextlib import contextmanager 

13from http.client import HTTPMessage as _HttplibHTTPMessage 

14from http.client import HTTPResponse as _HttplibHTTPResponse 

15from socket import timeout as SocketTimeout 

16 

17if typing.TYPE_CHECKING: 

18 from ._base_connection import BaseHTTPConnection 

19 

20try: 

21 try: 

22 import brotlicffi as brotli # type: ignore[import-not-found] 

23 except ImportError: 

24 import brotli # type: ignore[import-not-found] 

25except ImportError: 

26 brotli = None 

27 

28try: 

29 import zstandard as zstd # type: ignore[import-not-found] 

30 

31 # The package 'zstandard' added the 'eof' property starting 

32 # in v0.18.0 which we require to ensure a complete and 

33 # valid zstd stream was fed into the ZstdDecoder. 

34 # See: https://github.com/urllib3/urllib3/pull/2624 

35 _zstd_version = _zstd_version = tuple( 

36 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr] 

37 ) 

38 if _zstd_version < (0, 18): # Defensive: 

39 zstd = None 

40 

41except (AttributeError, ImportError, ValueError): # Defensive: 

42 zstd = None 

43 

44from . import util 

45from ._base_connection import _TYPE_BODY 

46from ._collections import HTTPHeaderDict 

47from .connection import BaseSSLError, HTTPConnection, HTTPException 

48from .exceptions import ( 

49 BodyNotHttplibCompatible, 

50 DecodeError, 

51 HTTPError, 

52 IncompleteRead, 

53 InvalidChunkLength, 

54 InvalidHeader, 

55 ProtocolError, 

56 ReadTimeoutError, 

57 ResponseNotChunked, 

58 SSLError, 

59) 

60from .util.response import is_fp_closed, is_response_to_head 

61from .util.retry import Retry 

62 

63if typing.TYPE_CHECKING: 

64 from typing import Literal 

65 

66 from .connectionpool import HTTPConnectionPool 

67 

68log = logging.getLogger(__name__) 

69 

70 

71class ContentDecoder: 

72 def decompress(self, data: bytes) -> bytes: 

73 raise NotImplementedError() 

74 

75 def flush(self) -> bytes: 

76 raise NotImplementedError() 

77 

78 

79class DeflateDecoder(ContentDecoder): 

80 def __init__(self) -> None: 

81 self._first_try = True 

82 self._data = b"" 

83 self._obj = zlib.decompressobj() 

84 

85 def decompress(self, data: bytes) -> bytes: 

86 if not data: 

87 return data 

88 

89 if not self._first_try: 

90 return self._obj.decompress(data) 

91 

92 self._data += data 

93 try: 

94 decompressed = self._obj.decompress(data) 

95 if decompressed: 

96 self._first_try = False 

97 self._data = None # type: ignore[assignment] 

98 return decompressed 

99 except zlib.error: 

100 self._first_try = False 

101 self._obj = zlib.decompressobj(-zlib.MAX_WBITS) 

102 try: 

103 return self.decompress(self._data) 

104 finally: 

105 self._data = None # type: ignore[assignment] 

106 

107 def flush(self) -> bytes: 

108 return self._obj.flush() 

109 

110 

111class GzipDecoderState: 

112 FIRST_MEMBER = 0 

113 OTHER_MEMBERS = 1 

114 SWALLOW_DATA = 2 

115 

116 

117class GzipDecoder(ContentDecoder): 

118 def __init__(self) -> None: 

119 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

120 self._state = GzipDecoderState.FIRST_MEMBER 

121 

122 def decompress(self, data: bytes) -> bytes: 

123 ret = bytearray() 

124 if self._state == GzipDecoderState.SWALLOW_DATA or not data: 

125 return bytes(ret) 

126 while True: 

127 try: 

128 ret += self._obj.decompress(data) 

129 except zlib.error: 

130 previous_state = self._state 

131 # Ignore data after the first error 

132 self._state = GzipDecoderState.SWALLOW_DATA 

133 if previous_state == GzipDecoderState.OTHER_MEMBERS: 

134 # Allow trailing garbage acceptable in other gzip clients 

135 return bytes(ret) 

136 raise 

137 data = self._obj.unused_data 

138 if not data: 

139 return bytes(ret) 

140 self._state = GzipDecoderState.OTHER_MEMBERS 

141 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

142 

143 def flush(self) -> bytes: 

144 return self._obj.flush() 

145 

146 

147if brotli is not None: 

148 

149 class BrotliDecoder(ContentDecoder): 

150 # Supports both 'brotlipy' and 'Brotli' packages 

151 # since they share an import name. The top branches 

152 # are for 'brotlipy' and bottom branches for 'Brotli' 

153 def __init__(self) -> None: 

154 self._obj = brotli.Decompressor() 

155 if hasattr(self._obj, "decompress"): 

156 setattr(self, "decompress", self._obj.decompress) 

157 else: 

158 setattr(self, "decompress", self._obj.process) 

159 

160 def flush(self) -> bytes: 

161 if hasattr(self._obj, "flush"): 

162 return self._obj.flush() # type: ignore[no-any-return] 

163 return b"" 

164 

165 

166if zstd is not None: 

167 

168 class ZstdDecoder(ContentDecoder): 

169 def __init__(self) -> None: 

170 self._obj = zstd.ZstdDecompressor().decompressobj() 

171 

172 def decompress(self, data: bytes) -> bytes: 

173 if not data: 

174 return b"" 

175 data_parts = [self._obj.decompress(data)] 

176 while self._obj.eof and self._obj.unused_data: 

177 unused_data = self._obj.unused_data 

178 self._obj = zstd.ZstdDecompressor().decompressobj() 

179 data_parts.append(self._obj.decompress(unused_data)) 

180 return b"".join(data_parts) 

181 

182 def flush(self) -> bytes: 

183 ret = self._obj.flush() # note: this is a no-op 

184 if not self._obj.eof: 

185 raise DecodeError("Zstandard data is incomplete") 

186 return ret # type: ignore[no-any-return] 

187 

188 

189class MultiDecoder(ContentDecoder): 

190 """ 

191 From RFC7231: 

192 If one or more encodings have been applied to a representation, the 

193 sender that applied the encodings MUST generate a Content-Encoding 

194 header field that lists the content codings in the order in which 

195 they were applied. 

196 """ 

197 

198 def __init__(self, modes: str) -> None: 

199 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")] 

200 

201 def flush(self) -> bytes: 

202 return self._decoders[0].flush() 

203 

204 def decompress(self, data: bytes) -> bytes: 

205 for d in reversed(self._decoders): 

206 data = d.decompress(data) 

207 return data 

208 

209 

210def _get_decoder(mode: str) -> ContentDecoder: 

211 if "," in mode: 

212 return MultiDecoder(mode) 

213 

214 # According to RFC 9110 section 8.4.1.3, recipients should 

215 # consider x-gzip equivalent to gzip 

216 if mode in ("gzip", "x-gzip"): 

217 return GzipDecoder() 

218 

219 if brotli is not None and mode == "br": 

220 return BrotliDecoder() 

221 

222 if zstd is not None and mode == "zstd": 

223 return ZstdDecoder() 

224 

225 return DeflateDecoder() 

226 

227 

228class BytesQueueBuffer: 

229 """Memory-efficient bytes buffer 

230 

231 To return decoded data in read() and still follow the BufferedIOBase API, we need a 

232 buffer to always return the correct amount of bytes. 

233 

234 This buffer should be filled using calls to put() 

235 

236 Our maximum memory usage is determined by the sum of the size of: 

237 

238 * self.buffer, which contains the full data 

239 * the largest chunk that we will copy in get() 

240 

241 The worst case scenario is a single chunk, in which case we'll make a full copy of 

242 the data inside get(). 

243 """ 

244 

245 def __init__(self) -> None: 

246 self.buffer: typing.Deque[bytes] = collections.deque() 

247 self._size: int = 0 

248 

249 def __len__(self) -> int: 

250 return self._size 

251 

252 def put(self, data: bytes) -> None: 

253 self.buffer.append(data) 

254 self._size += len(data) 

255 

256 def get(self, n: int) -> bytes: 

257 if n == 0: 

258 return b"" 

259 elif not self.buffer: 

260 raise RuntimeError("buffer is empty") 

261 elif n < 0: 

262 raise ValueError("n should be > 0") 

263 

264 fetched = 0 

265 ret = io.BytesIO() 

266 while fetched < n: 

267 remaining = n - fetched 

268 chunk = self.buffer.popleft() 

269 chunk_length = len(chunk) 

270 if remaining < chunk_length: 

271 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:] 

272 ret.write(left_chunk) 

273 self.buffer.appendleft(right_chunk) 

274 self._size -= remaining 

275 break 

276 else: 

277 ret.write(chunk) 

278 self._size -= chunk_length 

279 fetched += chunk_length 

280 

281 if not self.buffer: 

282 break 

283 

284 return ret.getvalue() 

285 

286 def get_all(self) -> bytes: 

287 buffer = self.buffer 

288 if not buffer: 

289 assert self._size == 0 

290 return b"" 

291 if len(buffer) == 1: 

292 result = buffer.pop() 

293 else: 

294 ret = io.BytesIO() 

295 ret.writelines(buffer.popleft() for _ in range(len(buffer))) 

296 result = ret.getvalue() 

297 self._size = 0 

298 return result 

299 

300 

301class BaseHTTPResponse(io.IOBase): 

302 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"] 

303 if brotli is not None: 

304 CONTENT_DECODERS += ["br"] 

305 if zstd is not None: 

306 CONTENT_DECODERS += ["zstd"] 

307 REDIRECT_STATUSES = [301, 302, 303, 307, 308] 

308 

309 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error) 

310 if brotli is not None: 

311 DECODER_ERROR_CLASSES += (brotli.error,) 

312 

313 if zstd is not None: 

314 DECODER_ERROR_CLASSES += (zstd.ZstdError,) 

315 

316 def __init__( 

317 self, 

318 *, 

319 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

320 status: int, 

321 version: int, 

322 reason: str | None, 

323 decode_content: bool, 

324 request_url: str | None, 

325 retries: Retry | None = None, 

326 ) -> None: 

327 if isinstance(headers, HTTPHeaderDict): 

328 self.headers = headers 

329 else: 

330 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type] 

331 self.status = status 

332 self.version = version 

333 self.reason = reason 

334 self.decode_content = decode_content 

335 self._has_decoded_content = False 

336 self._request_url: str | None = request_url 

337 self.retries = retries 

338 

339 self.chunked = False 

340 tr_enc = self.headers.get("transfer-encoding", "").lower() 

341 # Don't incur the penalty of creating a list and then discarding it 

342 encodings = (enc.strip() for enc in tr_enc.split(",")) 

343 if "chunked" in encodings: 

344 self.chunked = True 

345 

346 self._decoder: ContentDecoder | None = None 

347 self.length_remaining: int | None 

348 

349 def get_redirect_location(self) -> str | None | Literal[False]: 

350 """ 

351 Should we redirect and where to? 

352 

353 :returns: Truthy redirect location string if we got a redirect status 

354 code and valid location. ``None`` if redirect status and no 

355 location. ``False`` if not a redirect status code. 

356 """ 

357 if self.status in self.REDIRECT_STATUSES: 

358 return self.headers.get("location") 

359 return False 

360 

361 @property 

362 def data(self) -> bytes: 

363 raise NotImplementedError() 

364 

365 def json(self) -> typing.Any: 

366 """ 

367 Parses the body of the HTTP response as JSON. 

368 

369 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to the decoder. 

370 

371 This method can raise either `UnicodeDecodeError` or `json.JSONDecodeError`. 

372 

373 Read more :ref:`here <json>`. 

374 """ 

375 data = self.data.decode("utf-8") 

376 return _json.loads(data) 

377 

378 @property 

379 def url(self) -> str | None: 

380 raise NotImplementedError() 

381 

382 @url.setter 

383 def url(self, url: str | None) -> None: 

384 raise NotImplementedError() 

385 

386 @property 

387 def connection(self) -> BaseHTTPConnection | None: 

388 raise NotImplementedError() 

389 

390 @property 

391 def retries(self) -> Retry | None: 

392 return self._retries 

393 

394 @retries.setter 

395 def retries(self, retries: Retry | None) -> None: 

396 # Override the request_url if retries has a redirect location. 

397 if retries is not None and retries.history: 

398 self.url = retries.history[-1].redirect_location 

399 self._retries = retries 

400 

401 def stream( 

402 self, amt: int | None = 2**16, decode_content: bool | None = None 

403 ) -> typing.Iterator[bytes]: 

404 raise NotImplementedError() 

405 

406 def read( 

407 self, 

408 amt: int | None = None, 

409 decode_content: bool | None = None, 

410 cache_content: bool = False, 

411 ) -> bytes: 

412 raise NotImplementedError() 

413 

414 def read1( 

415 self, 

416 amt: int | None = None, 

417 decode_content: bool | None = None, 

418 ) -> bytes: 

419 raise NotImplementedError() 

420 

421 def read_chunked( 

422 self, 

423 amt: int | None = None, 

424 decode_content: bool | None = None, 

425 ) -> typing.Iterator[bytes]: 

426 raise NotImplementedError() 

427 

428 def release_conn(self) -> None: 

429 raise NotImplementedError() 

430 

431 def drain_conn(self) -> None: 

432 raise NotImplementedError() 

433 

434 def close(self) -> None: 

435 raise NotImplementedError() 

436 

437 def _init_decoder(self) -> None: 

438 """ 

439 Set-up the _decoder attribute if necessary. 

440 """ 

441 # Note: content-encoding value should be case-insensitive, per RFC 7230 

442 # Section 3.2 

443 content_encoding = self.headers.get("content-encoding", "").lower() 

444 if self._decoder is None: 

445 if content_encoding in self.CONTENT_DECODERS: 

446 self._decoder = _get_decoder(content_encoding) 

447 elif "," in content_encoding: 

448 encodings = [ 

449 e.strip() 

450 for e in content_encoding.split(",") 

451 if e.strip() in self.CONTENT_DECODERS 

452 ] 

453 if encodings: 

454 self._decoder = _get_decoder(content_encoding) 

455 

456 def _decode( 

457 self, data: bytes, decode_content: bool | None, flush_decoder: bool 

458 ) -> bytes: 

459 """ 

460 Decode the data passed in and potentially flush the decoder. 

461 """ 

462 if not decode_content: 

463 if self._has_decoded_content: 

464 raise RuntimeError( 

465 "Calling read(decode_content=False) is not supported after " 

466 "read(decode_content=True) was called." 

467 ) 

468 return data 

469 

470 try: 

471 if self._decoder: 

472 data = self._decoder.decompress(data) 

473 self._has_decoded_content = True 

474 except self.DECODER_ERROR_CLASSES as e: 

475 content_encoding = self.headers.get("content-encoding", "").lower() 

476 raise DecodeError( 

477 "Received response with content-encoding: %s, but " 

478 "failed to decode it." % content_encoding, 

479 e, 

480 ) from e 

481 if flush_decoder: 

482 data += self._flush_decoder() 

483 

484 return data 

485 

486 def _flush_decoder(self) -> bytes: 

487 """ 

488 Flushes the decoder. Should only be called if the decoder is actually 

489 being used. 

490 """ 

491 if self._decoder: 

492 return self._decoder.decompress(b"") + self._decoder.flush() 

493 return b"" 

494 

495 # Compatibility methods for `io` module 

496 def readinto(self, b: bytearray) -> int: 

497 temp = self.read(len(b)) 

498 if len(temp) == 0: 

499 return 0 

500 else: 

501 b[: len(temp)] = temp 

502 return len(temp) 

503 

504 # Compatibility methods for http.client.HTTPResponse 

505 def getheaders(self) -> HTTPHeaderDict: 

506 warnings.warn( 

507 "HTTPResponse.getheaders() is deprecated and will be removed " 

508 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.", 

509 category=DeprecationWarning, 

510 stacklevel=2, 

511 ) 

512 return self.headers 

513 

514 def getheader(self, name: str, default: str | None = None) -> str | None: 

515 warnings.warn( 

516 "HTTPResponse.getheader() is deprecated and will be removed " 

517 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).", 

518 category=DeprecationWarning, 

519 stacklevel=2, 

520 ) 

521 return self.headers.get(name, default) 

522 

523 # Compatibility method for http.cookiejar 

524 def info(self) -> HTTPHeaderDict: 

525 return self.headers 

526 

527 def geturl(self) -> str | None: 

528 return self.url 

529 

530 

531class HTTPResponse(BaseHTTPResponse): 

532 """ 

533 HTTP Response container. 

534 

535 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is 

536 loaded and decoded on-demand when the ``data`` property is accessed. This 

537 class is also compatible with the Python standard library's :mod:`io` 

538 module, and can hence be treated as a readable object in the context of that 

539 framework. 

540 

541 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`: 

542 

543 :param preload_content: 

544 If True, the response's body will be preloaded during construction. 

545 

546 :param decode_content: 

547 If True, will attempt to decode the body based on the 

548 'content-encoding' header. 

549 

550 :param original_response: 

551 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse` 

552 object, it's convenient to include the original for debug purposes. It's 

553 otherwise unused. 

554 

555 :param retries: 

556 The retries contains the last :class:`~urllib3.util.retry.Retry` that 

557 was used during the request. 

558 

559 :param enforce_content_length: 

560 Enforce content length checking. Body returned by server must match 

561 value of Content-Length header, if present. Otherwise, raise error. 

562 """ 

563 

564 def __init__( 

565 self, 

566 body: _TYPE_BODY = "", 

567 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

568 status: int = 0, 

569 version: int = 0, 

570 reason: str | None = None, 

571 preload_content: bool = True, 

572 decode_content: bool = True, 

573 original_response: _HttplibHTTPResponse | None = None, 

574 pool: HTTPConnectionPool | None = None, 

575 connection: HTTPConnection | None = None, 

576 msg: _HttplibHTTPMessage | None = None, 

577 retries: Retry | None = None, 

578 enforce_content_length: bool = True, 

579 request_method: str | None = None, 

580 request_url: str | None = None, 

581 auto_close: bool = True, 

582 ) -> None: 

583 super().__init__( 

584 headers=headers, 

585 status=status, 

586 version=version, 

587 reason=reason, 

588 decode_content=decode_content, 

589 request_url=request_url, 

590 retries=retries, 

591 ) 

592 

593 self.enforce_content_length = enforce_content_length 

594 self.auto_close = auto_close 

595 

596 self._body = None 

597 self._fp: _HttplibHTTPResponse | None = None 

598 self._original_response = original_response 

599 self._fp_bytes_read = 0 

600 self.msg = msg 

601 

602 if body and isinstance(body, (str, bytes)): 

603 self._body = body 

604 

605 self._pool = pool 

606 self._connection = connection 

607 

608 if hasattr(body, "read"): 

609 self._fp = body # type: ignore[assignment] 

610 

611 # Are we using the chunked-style of transfer encoding? 

612 self.chunk_left: int | None = None 

613 

614 # Determine length of response 

615 self.length_remaining = self._init_length(request_method) 

616 

617 # Used to return the correct amount of bytes for partial read()s 

618 self._decoded_buffer = BytesQueueBuffer() 

619 

620 # If requested, preload the body. 

621 if preload_content and not self._body: 

622 self._body = self.read(decode_content=decode_content) 

623 

624 def release_conn(self) -> None: 

625 if not self._pool or not self._connection: 

626 return None 

627 

628 self._pool._put_conn(self._connection) 

629 self._connection = None 

630 

631 def drain_conn(self) -> None: 

632 """ 

633 Read and discard any remaining HTTP response data in the response connection. 

634 

635 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool. 

636 """ 

637 try: 

638 self.read() 

639 except (HTTPError, OSError, BaseSSLError, HTTPException): 

640 pass 

641 

642 @property 

643 def data(self) -> bytes: 

644 # For backwards-compat with earlier urllib3 0.4 and earlier. 

645 if self._body: 

646 return self._body # type: ignore[return-value] 

647 

648 if self._fp: 

649 return self.read(cache_content=True) 

650 

651 return None # type: ignore[return-value] 

652 

653 @property 

654 def connection(self) -> HTTPConnection | None: 

655 return self._connection 

656 

657 def isclosed(self) -> bool: 

658 return is_fp_closed(self._fp) 

659 

660 def tell(self) -> int: 

661 """ 

662 Obtain the number of bytes pulled over the wire so far. May differ from 

663 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read`` 

664 if bytes are encoded on the wire (e.g, compressed). 

665 """ 

666 return self._fp_bytes_read 

667 

668 def _init_length(self, request_method: str | None) -> int | None: 

669 """ 

670 Set initial length value for Response content if available. 

671 """ 

672 length: int | None 

673 content_length: str | None = self.headers.get("content-length") 

674 

675 if content_length is not None: 

676 if self.chunked: 

677 # This Response will fail with an IncompleteRead if it can't be 

678 # received as chunked. This method falls back to attempt reading 

679 # the response before raising an exception. 

680 log.warning( 

681 "Received response with both Content-Length and " 

682 "Transfer-Encoding set. This is expressly forbidden " 

683 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and " 

684 "attempting to process response as Transfer-Encoding: " 

685 "chunked." 

686 ) 

687 return None 

688 

689 try: 

690 # RFC 7230 section 3.3.2 specifies multiple content lengths can 

691 # be sent in a single Content-Length header 

692 # (e.g. Content-Length: 42, 42). This line ensures the values 

693 # are all valid ints and that as long as the `set` length is 1, 

694 # all values are the same. Otherwise, the header is invalid. 

695 lengths = {int(val) for val in content_length.split(",")} 

696 if len(lengths) > 1: 

697 raise InvalidHeader( 

698 "Content-Length contained multiple " 

699 "unmatching values (%s)" % content_length 

700 ) 

701 length = lengths.pop() 

702 except ValueError: 

703 length = None 

704 else: 

705 if length < 0: 

706 length = None 

707 

708 else: # if content_length is None 

709 length = None 

710 

711 # Convert status to int for comparison 

712 # In some cases, httplib returns a status of "_UNKNOWN" 

713 try: 

714 status = int(self.status) 

715 except ValueError: 

716 status = 0 

717 

718 # Check for responses that shouldn't include a body 

719 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD": 

720 length = 0 

721 

722 return length 

723 

724 @contextmanager 

725 def _error_catcher(self) -> typing.Generator[None, None, None]: 

726 """ 

727 Catch low-level python exceptions, instead re-raising urllib3 

728 variants, so that low-level exceptions are not leaked in the 

729 high-level api. 

730 

731 On exit, release the connection back to the pool. 

732 """ 

733 clean_exit = False 

734 

735 try: 

736 try: 

737 yield 

738 

739 except SocketTimeout as e: 

740 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but 

741 # there is yet no clean way to get at it from this context. 

742 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

743 

744 except BaseSSLError as e: 

745 # FIXME: Is there a better way to differentiate between SSLErrors? 

746 if "read operation timed out" not in str(e): 

747 # SSL errors related to framing/MAC get wrapped and reraised here 

748 raise SSLError(e) from e 

749 

750 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

751 

752 except IncompleteRead as e: 

753 if ( 

754 e.expected is not None 

755 and e.partial is not None 

756 and e.expected == -e.partial 

757 ): 

758 arg = "Response may not contain content." 

759 else: 

760 arg = f"Connection broken: {e!r}" 

761 raise ProtocolError(arg, e) from e 

762 

763 except (HTTPException, OSError) as e: 

764 raise ProtocolError(f"Connection broken: {e!r}", e) from e 

765 

766 # If no exception is thrown, we should avoid cleaning up 

767 # unnecessarily. 

768 clean_exit = True 

769 finally: 

770 # If we didn't terminate cleanly, we need to throw away our 

771 # connection. 

772 if not clean_exit: 

773 # The response may not be closed but we're not going to use it 

774 # anymore so close it now to ensure that the connection is 

775 # released back to the pool. 

776 if self._original_response: 

777 self._original_response.close() 

778 

779 # Closing the response may not actually be sufficient to close 

780 # everything, so if we have a hold of the connection close that 

781 # too. 

782 if self._connection: 

783 self._connection.close() 

784 

785 # If we hold the original response but it's closed now, we should 

786 # return the connection back to the pool. 

787 if self._original_response and self._original_response.isclosed(): 

788 self.release_conn() 

789 

790 def _fp_read( 

791 self, 

792 amt: int | None = None, 

793 *, 

794 read1: bool = False, 

795 ) -> bytes: 

796 """ 

797 Read a response with the thought that reading the number of bytes 

798 larger than can fit in a 32-bit int at a time via SSL in some 

799 known cases leads to an overflow error that has to be prevented 

800 if `amt` or `self.length_remaining` indicate that a problem may 

801 happen. 

802 

803 The known cases: 

804 * 3.8 <= CPython < 3.9.7 because of a bug 

805 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900. 

806 * urllib3 injected with pyOpenSSL-backed SSL-support. 

807 * CPython < 3.10 only when `amt` does not fit 32-bit int. 

808 """ 

809 assert self._fp 

810 c_int_max = 2**31 - 1 

811 if ( 

812 (amt and amt > c_int_max) 

813 or ( 

814 amt is None 

815 and self.length_remaining 

816 and self.length_remaining > c_int_max 

817 ) 

818 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)): 

819 if read1: 

820 return self._fp.read1(c_int_max) 

821 buffer = io.BytesIO() 

822 # Besides `max_chunk_amt` being a maximum chunk size, it 

823 # affects memory overhead of reading a response by this 

824 # method in CPython. 

825 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum 

826 # chunk size that does not lead to an overflow error, but 

827 # 256 MiB is a compromise. 

828 max_chunk_amt = 2**28 

829 while amt is None or amt != 0: 

830 if amt is not None: 

831 chunk_amt = min(amt, max_chunk_amt) 

832 amt -= chunk_amt 

833 else: 

834 chunk_amt = max_chunk_amt 

835 data = self._fp.read(chunk_amt) 

836 if not data: 

837 break 

838 buffer.write(data) 

839 del data # to reduce peak memory usage by `max_chunk_amt`. 

840 return buffer.getvalue() 

841 elif read1: 

842 return self._fp.read1(amt) if amt is not None else self._fp.read1() 

843 else: 

844 # StringIO doesn't like amt=None 

845 return self._fp.read(amt) if amt is not None else self._fp.read() 

846 

847 def _raw_read( 

848 self, 

849 amt: int | None = None, 

850 *, 

851 read1: bool = False, 

852 ) -> bytes: 

853 """ 

854 Reads `amt` of bytes from the socket. 

855 """ 

856 if self._fp is None: 

857 return None # type: ignore[return-value] 

858 

859 fp_closed = getattr(self._fp, "closed", False) 

860 

861 with self._error_catcher(): 

862 data = self._fp_read(amt, read1=read1) if not fp_closed else b"" 

863 if amt is not None and amt != 0 and not data: 

864 # Platform-specific: Buggy versions of Python. 

865 # Close the connection when no data is returned 

866 # 

867 # This is redundant to what httplib/http.client _should_ 

868 # already do. However, versions of python released before 

869 # December 15, 2012 (http://bugs.python.org/issue16298) do 

870 # not properly close the connection in all cases. There is 

871 # no harm in redundantly calling close. 

872 self._fp.close() 

873 if ( 

874 self.enforce_content_length 

875 and self.length_remaining is not None 

876 and self.length_remaining != 0 

877 ): 

878 # This is an edge case that httplib failed to cover due 

879 # to concerns of backward compatibility. We're 

880 # addressing it here to make sure IncompleteRead is 

881 # raised during streaming, so all calls with incorrect 

882 # Content-Length are caught. 

883 raise IncompleteRead(self._fp_bytes_read, self.length_remaining) 

884 elif read1 and ( 

885 (amt != 0 and not data) or self.length_remaining == len(data) 

886 ): 

887 # All data has been read, but `self._fp.read1` in 

888 # CPython 3.12 and older doesn't always close 

889 # `http.client.HTTPResponse`, so we close it here. 

890 # See https://github.com/python/cpython/issues/113199 

891 self._fp.close() 

892 

893 if data: 

894 self._fp_bytes_read += len(data) 

895 if self.length_remaining is not None: 

896 self.length_remaining -= len(data) 

897 return data 

898 

899 def read( 

900 self, 

901 amt: int | None = None, 

902 decode_content: bool | None = None, 

903 cache_content: bool = False, 

904 ) -> bytes: 

905 """ 

906 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional 

907 parameters: ``decode_content`` and ``cache_content``. 

908 

909 :param amt: 

910 How much of the content to read. If specified, caching is skipped 

911 because it doesn't make sense to cache partial content as the full 

912 response. 

913 

914 :param decode_content: 

915 If True, will attempt to decode the body based on the 

916 'content-encoding' header. 

917 

918 :param cache_content: 

919 If True, will save the returned data such that the same result is 

920 returned despite of the state of the underlying file object. This 

921 is useful if you want the ``.data`` property to continue working 

922 after having ``.read()`` the file object. (Overridden if ``amt`` is 

923 set.) 

924 """ 

925 self._init_decoder() 

926 if decode_content is None: 

927 decode_content = self.decode_content 

928 

929 if amt is not None: 

930 cache_content = False 

931 

932 if len(self._decoded_buffer) >= amt: 

933 return self._decoded_buffer.get(amt) 

934 

935 data = self._raw_read(amt) 

936 

937 flush_decoder = amt is None or (amt != 0 and not data) 

938 

939 if not data and len(self._decoded_buffer) == 0: 

940 return data 

941 

942 if amt is None: 

943 data = self._decode(data, decode_content, flush_decoder) 

944 if cache_content: 

945 self._body = data 

946 else: 

947 # do not waste memory on buffer when not decoding 

948 if not decode_content: 

949 if self._has_decoded_content: 

950 raise RuntimeError( 

951 "Calling read(decode_content=False) is not supported after " 

952 "read(decode_content=True) was called." 

953 ) 

954 return data 

955 

956 decoded_data = self._decode(data, decode_content, flush_decoder) 

957 self._decoded_buffer.put(decoded_data) 

958 

959 while len(self._decoded_buffer) < amt and data: 

960 # TODO make sure to initially read enough data to get past the headers 

961 # For example, the GZ file header takes 10 bytes, we don't want to read 

962 # it one byte at a time 

963 data = self._raw_read(amt) 

964 decoded_data = self._decode(data, decode_content, flush_decoder) 

965 self._decoded_buffer.put(decoded_data) 

966 data = self._decoded_buffer.get(amt) 

967 

968 return data 

969 

970 def read1( 

971 self, 

972 amt: int | None = None, 

973 decode_content: bool | None = None, 

974 ) -> bytes: 

975 """ 

976 Similar to ``http.client.HTTPResponse.read1`` and documented 

977 in :meth:`io.BufferedReader.read1`, but with an additional parameter: 

978 ``decode_content``. 

979 

980 :param amt: 

981 How much of the content to read. 

982 

983 :param decode_content: 

984 If True, will attempt to decode the body based on the 

985 'content-encoding' header. 

986 """ 

987 if decode_content is None: 

988 decode_content = self.decode_content 

989 # try and respond without going to the network 

990 if self._has_decoded_content: 

991 if not decode_content: 

992 raise RuntimeError( 

993 "Calling read1(decode_content=False) is not supported after " 

994 "read1(decode_content=True) was called." 

995 ) 

996 if len(self._decoded_buffer) > 0: 

997 if amt is None: 

998 return self._decoded_buffer.get_all() 

999 return self._decoded_buffer.get(amt) 

1000 if amt == 0: 

1001 return b"" 

1002 

1003 # FIXME, this method's type doesn't say returning None is possible 

1004 data = self._raw_read(amt, read1=True) 

1005 if not decode_content or data is None: 

1006 return data 

1007 

1008 self._init_decoder() 

1009 while True: 

1010 flush_decoder = not data 

1011 decoded_data = self._decode(data, decode_content, flush_decoder) 

1012 self._decoded_buffer.put(decoded_data) 

1013 if decoded_data or flush_decoder: 

1014 break 

1015 data = self._raw_read(8192, read1=True) 

1016 

1017 if amt is None: 

1018 return self._decoded_buffer.get_all() 

1019 return self._decoded_buffer.get(amt) 

1020 

1021 def stream( 

1022 self, amt: int | None = 2**16, decode_content: bool | None = None 

1023 ) -> typing.Generator[bytes, None, None]: 

1024 """ 

1025 A generator wrapper for the read() method. A call will block until 

1026 ``amt`` bytes have been read from the connection or until the 

1027 connection is closed. 

1028 

1029 :param amt: 

1030 How much of the content to read. The generator will return up to 

1031 much data per iteration, but may return less. This is particularly 

1032 likely when using compressed data. However, the empty string will 

1033 never be returned. 

1034 

1035 :param decode_content: 

1036 If True, will attempt to decode the body based on the 

1037 'content-encoding' header. 

1038 """ 

1039 if self.chunked and self.supports_chunked_reads(): 

1040 yield from self.read_chunked(amt, decode_content=decode_content) 

1041 else: 

1042 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0: 

1043 data = self.read(amt=amt, decode_content=decode_content) 

1044 

1045 if data: 

1046 yield data 

1047 

1048 # Overrides from io.IOBase 

1049 def readable(self) -> bool: 

1050 return True 

1051 

1052 def close(self) -> None: 

1053 if not self.closed and self._fp: 

1054 self._fp.close() 

1055 

1056 if self._connection: 

1057 self._connection.close() 

1058 

1059 if not self.auto_close: 

1060 io.IOBase.close(self) 

1061 

1062 @property 

1063 def closed(self) -> bool: 

1064 if not self.auto_close: 

1065 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return] 

1066 elif self._fp is None: 

1067 return True 

1068 elif hasattr(self._fp, "isclosed"): 

1069 return self._fp.isclosed() 

1070 elif hasattr(self._fp, "closed"): 

1071 return self._fp.closed 

1072 else: 

1073 return True 

1074 

1075 def fileno(self) -> int: 

1076 if self._fp is None: 

1077 raise OSError("HTTPResponse has no file to get a fileno from") 

1078 elif hasattr(self._fp, "fileno"): 

1079 return self._fp.fileno() 

1080 else: 

1081 raise OSError( 

1082 "The file-like object this HTTPResponse is wrapped " 

1083 "around has no file descriptor" 

1084 ) 

1085 

1086 def flush(self) -> None: 

1087 if ( 

1088 self._fp is not None 

1089 and hasattr(self._fp, "flush") 

1090 and not getattr(self._fp, "closed", False) 

1091 ): 

1092 return self._fp.flush() 

1093 

1094 def supports_chunked_reads(self) -> bool: 

1095 """ 

1096 Checks if the underlying file-like object looks like a 

1097 :class:`http.client.HTTPResponse` object. We do this by testing for 

1098 the fp attribute. If it is present we assume it returns raw chunks as 

1099 processed by read_chunked(). 

1100 """ 

1101 return hasattr(self._fp, "fp") 

1102 

1103 def _update_chunk_length(self) -> None: 

1104 # First, we'll figure out length of a chunk and then 

1105 # we'll try to read it from socket. 

1106 if self.chunk_left is not None: 

1107 return None 

1108 line = self._fp.fp.readline() # type: ignore[union-attr] 

1109 line = line.split(b";", 1)[0] 

1110 try: 

1111 self.chunk_left = int(line, 16) 

1112 except ValueError: 

1113 self.close() 

1114 if line: 

1115 # Invalid chunked protocol response, abort. 

1116 raise InvalidChunkLength(self, line) from None 

1117 else: 

1118 # Truncated at start of next chunk 

1119 raise ProtocolError("Response ended prematurely") from None 

1120 

1121 def _handle_chunk(self, amt: int | None) -> bytes: 

1122 returned_chunk = None 

1123 if amt is None: 

1124 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1125 returned_chunk = chunk 

1126 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1127 self.chunk_left = None 

1128 elif self.chunk_left is not None and amt < self.chunk_left: 

1129 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1130 self.chunk_left = self.chunk_left - amt 

1131 returned_chunk = value 

1132 elif amt == self.chunk_left: 

1133 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1134 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1135 self.chunk_left = None 

1136 returned_chunk = value 

1137 else: # amt > self.chunk_left 

1138 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1139 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1140 self.chunk_left = None 

1141 return returned_chunk # type: ignore[no-any-return] 

1142 

1143 def read_chunked( 

1144 self, amt: int | None = None, decode_content: bool | None = None 

1145 ) -> typing.Generator[bytes, None, None]: 

1146 """ 

1147 Similar to :meth:`HTTPResponse.read`, but with an additional 

1148 parameter: ``decode_content``. 

1149 

1150 :param amt: 

1151 How much of the content to read. If specified, caching is skipped 

1152 because it doesn't make sense to cache partial content as the full 

1153 response. 

1154 

1155 :param decode_content: 

1156 If True, will attempt to decode the body based on the 

1157 'content-encoding' header. 

1158 """ 

1159 self._init_decoder() 

1160 # FIXME: Rewrite this method and make it a class with a better structured logic. 

1161 if not self.chunked: 

1162 raise ResponseNotChunked( 

1163 "Response is not chunked. " 

1164 "Header 'transfer-encoding: chunked' is missing." 

1165 ) 

1166 if not self.supports_chunked_reads(): 

1167 raise BodyNotHttplibCompatible( 

1168 "Body should be http.client.HTTPResponse like. " 

1169 "It should have have an fp attribute which returns raw chunks." 

1170 ) 

1171 

1172 with self._error_catcher(): 

1173 # Don't bother reading the body of a HEAD request. 

1174 if self._original_response and is_response_to_head(self._original_response): 

1175 self._original_response.close() 

1176 return None 

1177 

1178 # If a response is already read and closed 

1179 # then return immediately. 

1180 if self._fp.fp is None: # type: ignore[union-attr] 

1181 return None 

1182 

1183 while True: 

1184 self._update_chunk_length() 

1185 if self.chunk_left == 0: 

1186 break 

1187 chunk = self._handle_chunk(amt) 

1188 decoded = self._decode( 

1189 chunk, decode_content=decode_content, flush_decoder=False 

1190 ) 

1191 if decoded: 

1192 yield decoded 

1193 

1194 if decode_content: 

1195 # On CPython and PyPy, we should never need to flush the 

1196 # decoder. However, on Jython we *might* need to, so 

1197 # lets defensively do it anyway. 

1198 decoded = self._flush_decoder() 

1199 if decoded: # Platform-specific: Jython. 

1200 yield decoded 

1201 

1202 # Chunk content ends with \r\n: discard it. 

1203 while self._fp is not None: 

1204 line = self._fp.fp.readline() 

1205 if not line: 

1206 # Some sites may not end with '\r\n'. 

1207 break 

1208 if line == b"\r\n": 

1209 break 

1210 

1211 # We read everything; close the "file". 

1212 if self._original_response: 

1213 self._original_response.close() 

1214 

1215 @property 

1216 def url(self) -> str | None: 

1217 """ 

1218 Returns the URL that was the source of this response. 

1219 If the request that generated this response redirected, this method 

1220 will return the final redirect location. 

1221 """ 

1222 return self._request_url 

1223 

1224 @url.setter 

1225 def url(self, url: str) -> None: 

1226 self._request_url = url 

1227 

1228 def __iter__(self) -> typing.Iterator[bytes]: 

1229 buffer: list[bytes] = [] 

1230 for chunk in self.stream(decode_content=True): 

1231 if b"\n" in chunk: 

1232 chunks = chunk.split(b"\n") 

1233 yield b"".join(buffer) + chunks[0] + b"\n" 

1234 for x in chunks[1:-1]: 

1235 yield x + b"\n" 

1236 if chunks[-1]: 

1237 buffer = [chunks[-1]] 

1238 else: 

1239 buffer = [] 

1240 else: 

1241 buffer.append(chunk) 

1242 if buffer: 

1243 yield b"".join(buffer)