Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/urllib3/response.py: 22%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

634 statements  

1from __future__ import annotations 

2 

3import collections 

4import io 

5import json as _json 

6import logging 

7import re 

8import socket 

9import sys 

10import typing 

11import warnings 

12import zlib 

13from contextlib import contextmanager 

14from http.client import HTTPMessage as _HttplibHTTPMessage 

15from http.client import HTTPResponse as _HttplibHTTPResponse 

16from socket import timeout as SocketTimeout 

17 

18if typing.TYPE_CHECKING: 

19 from ._base_connection import BaseHTTPConnection 

20 

21try: 

22 try: 

23 import brotlicffi as brotli # type: ignore[import-not-found] 

24 except ImportError: 

25 import brotli # type: ignore[import-not-found] 

26except ImportError: 

27 brotli = None 

28 

29try: 

30 import zstandard as zstd 

31except (AttributeError, ImportError, ValueError): # Defensive: 

32 HAS_ZSTD = False 

33else: 

34 # The package 'zstandard' added the 'eof' property starting 

35 # in v0.18.0 which we require to ensure a complete and 

36 # valid zstd stream was fed into the ZstdDecoder. 

37 # See: https://github.com/urllib3/urllib3/pull/2624 

38 _zstd_version = tuple( 

39 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr] 

40 ) 

41 if _zstd_version < (0, 18): # Defensive: 

42 HAS_ZSTD = False 

43 else: 

44 HAS_ZSTD = True 

45 

46from . import util 

47from ._base_connection import _TYPE_BODY 

48from ._collections import HTTPHeaderDict 

49from .connection import BaseSSLError, HTTPConnection, HTTPException 

50from .exceptions import ( 

51 BodyNotHttplibCompatible, 

52 DecodeError, 

53 HTTPError, 

54 IncompleteRead, 

55 InvalidChunkLength, 

56 InvalidHeader, 

57 ProtocolError, 

58 ReadTimeoutError, 

59 ResponseNotChunked, 

60 SSLError, 

61) 

62from .util.response import is_fp_closed, is_response_to_head 

63from .util.retry import Retry 

64 

65if typing.TYPE_CHECKING: 

66 from .connectionpool import HTTPConnectionPool 

67 

68log = logging.getLogger(__name__) 

69 

70 

71class ContentDecoder: 

72 def decompress(self, data: bytes) -> bytes: 

73 raise NotImplementedError() 

74 

75 def flush(self) -> bytes: 

76 raise NotImplementedError() 

77 

78 

79class DeflateDecoder(ContentDecoder): 

80 def __init__(self) -> None: 

81 self._first_try = True 

82 self._data = b"" 

83 self._obj = zlib.decompressobj() 

84 

85 def decompress(self, data: bytes) -> bytes: 

86 if not data: 

87 return data 

88 

89 if not self._first_try: 

90 return self._obj.decompress(data) 

91 

92 self._data += data 

93 try: 

94 decompressed = self._obj.decompress(data) 

95 if decompressed: 

96 self._first_try = False 

97 self._data = None # type: ignore[assignment] 

98 return decompressed 

99 except zlib.error: 

100 self._first_try = False 

101 self._obj = zlib.decompressobj(-zlib.MAX_WBITS) 

102 try: 

103 return self.decompress(self._data) 

104 finally: 

105 self._data = None # type: ignore[assignment] 

106 

107 def flush(self) -> bytes: 

108 return self._obj.flush() 

109 

110 

111class GzipDecoderState: 

112 FIRST_MEMBER = 0 

113 OTHER_MEMBERS = 1 

114 SWALLOW_DATA = 2 

115 

116 

117class GzipDecoder(ContentDecoder): 

118 def __init__(self) -> None: 

119 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

120 self._state = GzipDecoderState.FIRST_MEMBER 

121 

122 def decompress(self, data: bytes) -> bytes: 

123 ret = bytearray() 

124 if self._state == GzipDecoderState.SWALLOW_DATA or not data: 

125 return bytes(ret) 

126 while True: 

127 try: 

128 ret += self._obj.decompress(data) 

129 except zlib.error: 

130 previous_state = self._state 

131 # Ignore data after the first error 

132 self._state = GzipDecoderState.SWALLOW_DATA 

133 if previous_state == GzipDecoderState.OTHER_MEMBERS: 

134 # Allow trailing garbage acceptable in other gzip clients 

135 return bytes(ret) 

136 raise 

137 data = self._obj.unused_data 

138 if not data: 

139 return bytes(ret) 

140 self._state = GzipDecoderState.OTHER_MEMBERS 

141 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

142 

143 def flush(self) -> bytes: 

144 return self._obj.flush() 

145 

146 

147if brotli is not None: 

148 

149 class BrotliDecoder(ContentDecoder): 

150 # Supports both 'brotlipy' and 'Brotli' packages 

151 # since they share an import name. The top branches 

152 # are for 'brotlipy' and bottom branches for 'Brotli' 

153 def __init__(self) -> None: 

154 self._obj = brotli.Decompressor() 

155 if hasattr(self._obj, "decompress"): 

156 setattr(self, "decompress", self._obj.decompress) 

157 else: 

158 setattr(self, "decompress", self._obj.process) 

159 

160 def flush(self) -> bytes: 

161 if hasattr(self._obj, "flush"): 

162 return self._obj.flush() # type: ignore[no-any-return] 

163 return b"" 

164 

165 

166if HAS_ZSTD: 

167 

168 class ZstdDecoder(ContentDecoder): 

169 def __init__(self) -> None: 

170 self._obj = zstd.ZstdDecompressor().decompressobj() 

171 

172 def decompress(self, data: bytes) -> bytes: 

173 if not data: 

174 return b"" 

175 data_parts = [self._obj.decompress(data)] 

176 while self._obj.eof and self._obj.unused_data: 

177 unused_data = self._obj.unused_data 

178 self._obj = zstd.ZstdDecompressor().decompressobj() 

179 data_parts.append(self._obj.decompress(unused_data)) 

180 return b"".join(data_parts) 

181 

182 def flush(self) -> bytes: 

183 ret = self._obj.flush() # note: this is a no-op 

184 if not self._obj.eof: 

185 raise DecodeError("Zstandard data is incomplete") 

186 return ret 

187 

188 

189class MultiDecoder(ContentDecoder): 

190 """ 

191 From RFC7231: 

192 If one or more encodings have been applied to a representation, the 

193 sender that applied the encodings MUST generate a Content-Encoding 

194 header field that lists the content codings in the order in which 

195 they were applied. 

196 """ 

197 

198 def __init__(self, modes: str) -> None: 

199 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")] 

200 

201 def flush(self) -> bytes: 

202 return self._decoders[0].flush() 

203 

204 def decompress(self, data: bytes) -> bytes: 

205 for d in reversed(self._decoders): 

206 data = d.decompress(data) 

207 return data 

208 

209 

210def _get_decoder(mode: str) -> ContentDecoder: 

211 if "," in mode: 

212 return MultiDecoder(mode) 

213 

214 # According to RFC 9110 section 8.4.1.3, recipients should 

215 # consider x-gzip equivalent to gzip 

216 if mode in ("gzip", "x-gzip"): 

217 return GzipDecoder() 

218 

219 if brotli is not None and mode == "br": 

220 return BrotliDecoder() 

221 

222 if HAS_ZSTD and mode == "zstd": 

223 return ZstdDecoder() 

224 

225 return DeflateDecoder() 

226 

227 

228class BytesQueueBuffer: 

229 """Memory-efficient bytes buffer 

230 

231 To return decoded data in read() and still follow the BufferedIOBase API, we need a 

232 buffer to always return the correct amount of bytes. 

233 

234 This buffer should be filled using calls to put() 

235 

236 Our maximum memory usage is determined by the sum of the size of: 

237 

238 * self.buffer, which contains the full data 

239 * the largest chunk that we will copy in get() 

240 

241 The worst case scenario is a single chunk, in which case we'll make a full copy of 

242 the data inside get(). 

243 """ 

244 

245 def __init__(self) -> None: 

246 self.buffer: typing.Deque[bytes] = collections.deque() 

247 self._size: int = 0 

248 

249 def __len__(self) -> int: 

250 return self._size 

251 

252 def put(self, data: bytes) -> None: 

253 self.buffer.append(data) 

254 self._size += len(data) 

255 

256 def get(self, n: int) -> bytes: 

257 if n == 0: 

258 return b"" 

259 elif not self.buffer: 

260 raise RuntimeError("buffer is empty") 

261 elif n < 0: 

262 raise ValueError("n should be > 0") 

263 

264 fetched = 0 

265 ret = io.BytesIO() 

266 while fetched < n: 

267 remaining = n - fetched 

268 chunk = self.buffer.popleft() 

269 chunk_length = len(chunk) 

270 if remaining < chunk_length: 

271 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:] 

272 ret.write(left_chunk) 

273 self.buffer.appendleft(right_chunk) 

274 self._size -= remaining 

275 break 

276 else: 

277 ret.write(chunk) 

278 self._size -= chunk_length 

279 fetched += chunk_length 

280 

281 if not self.buffer: 

282 break 

283 

284 return ret.getvalue() 

285 

286 def get_all(self) -> bytes: 

287 buffer = self.buffer 

288 if not buffer: 

289 assert self._size == 0 

290 return b"" 

291 if len(buffer) == 1: 

292 result = buffer.pop() 

293 else: 

294 ret = io.BytesIO() 

295 ret.writelines(buffer.popleft() for _ in range(len(buffer))) 

296 result = ret.getvalue() 

297 self._size = 0 

298 return result 

299 

300 

301class BaseHTTPResponse(io.IOBase): 

302 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"] 

303 if brotli is not None: 

304 CONTENT_DECODERS += ["br"] 

305 if HAS_ZSTD: 

306 CONTENT_DECODERS += ["zstd"] 

307 REDIRECT_STATUSES = [301, 302, 303, 307, 308] 

308 

309 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error) 

310 if brotli is not None: 

311 DECODER_ERROR_CLASSES += (brotli.error,) 

312 

313 if HAS_ZSTD: 

314 DECODER_ERROR_CLASSES += (zstd.ZstdError,) 

315 

316 def __init__( 

317 self, 

318 *, 

319 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

320 status: int, 

321 version: int, 

322 version_string: str, 

323 reason: str | None, 

324 decode_content: bool, 

325 request_url: str | None, 

326 retries: Retry | None = None, 

327 ) -> None: 

328 if isinstance(headers, HTTPHeaderDict): 

329 self.headers = headers 

330 else: 

331 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type] 

332 self.status = status 

333 self.version = version 

334 self.version_string = version_string 

335 self.reason = reason 

336 self.decode_content = decode_content 

337 self._has_decoded_content = False 

338 self._request_url: str | None = request_url 

339 self.retries = retries 

340 

341 self.chunked = False 

342 tr_enc = self.headers.get("transfer-encoding", "").lower() 

343 # Don't incur the penalty of creating a list and then discarding it 

344 encodings = (enc.strip() for enc in tr_enc.split(",")) 

345 if "chunked" in encodings: 

346 self.chunked = True 

347 

348 self._decoder: ContentDecoder | None = None 

349 self.length_remaining: int | None 

350 

351 def get_redirect_location(self) -> str | None | typing.Literal[False]: 

352 """ 

353 Should we redirect and where to? 

354 

355 :returns: Truthy redirect location string if we got a redirect status 

356 code and valid location. ``None`` if redirect status and no 

357 location. ``False`` if not a redirect status code. 

358 """ 

359 if self.status in self.REDIRECT_STATUSES: 

360 return self.headers.get("location") 

361 return False 

362 

363 @property 

364 def data(self) -> bytes: 

365 raise NotImplementedError() 

366 

367 def json(self) -> typing.Any: 

368 """ 

369 Deserializes the body of the HTTP response as a Python object. 

370 

371 The body of the HTTP response must be encoded using UTF-8, as per 

372 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_. 

373 

374 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to 

375 your custom decoder instead. 

376 

377 If the body of the HTTP response is not decodable to UTF-8, a 

378 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a 

379 valid JSON document, a `json.JSONDecodeError` will be raised. 

380 

381 Read more :ref:`here <json_content>`. 

382 

383 :returns: The body of the HTTP response as a Python object. 

384 """ 

385 data = self.data.decode("utf-8") 

386 return _json.loads(data) 

387 

388 @property 

389 def url(self) -> str | None: 

390 raise NotImplementedError() 

391 

392 @url.setter 

393 def url(self, url: str | None) -> None: 

394 raise NotImplementedError() 

395 

396 @property 

397 def connection(self) -> BaseHTTPConnection | None: 

398 raise NotImplementedError() 

399 

400 @property 

401 def retries(self) -> Retry | None: 

402 return self._retries 

403 

404 @retries.setter 

405 def retries(self, retries: Retry | None) -> None: 

406 # Override the request_url if retries has a redirect location. 

407 if retries is not None and retries.history: 

408 self.url = retries.history[-1].redirect_location 

409 self._retries = retries 

410 

411 def stream( 

412 self, amt: int | None = 2**16, decode_content: bool | None = None 

413 ) -> typing.Iterator[bytes]: 

414 raise NotImplementedError() 

415 

416 def read( 

417 self, 

418 amt: int | None = None, 

419 decode_content: bool | None = None, 

420 cache_content: bool = False, 

421 ) -> bytes: 

422 raise NotImplementedError() 

423 

424 def read1( 

425 self, 

426 amt: int | None = None, 

427 decode_content: bool | None = None, 

428 ) -> bytes: 

429 raise NotImplementedError() 

430 

431 def read_chunked( 

432 self, 

433 amt: int | None = None, 

434 decode_content: bool | None = None, 

435 ) -> typing.Iterator[bytes]: 

436 raise NotImplementedError() 

437 

438 def release_conn(self) -> None: 

439 raise NotImplementedError() 

440 

441 def drain_conn(self) -> None: 

442 raise NotImplementedError() 

443 

444 def shutdown(self) -> None: 

445 raise NotImplementedError() 

446 

447 def close(self) -> None: 

448 raise NotImplementedError() 

449 

450 def _init_decoder(self) -> None: 

451 """ 

452 Set-up the _decoder attribute if necessary. 

453 """ 

454 # Note: content-encoding value should be case-insensitive, per RFC 7230 

455 # Section 3.2 

456 content_encoding = self.headers.get("content-encoding", "").lower() 

457 if self._decoder is None: 

458 if content_encoding in self.CONTENT_DECODERS: 

459 self._decoder = _get_decoder(content_encoding) 

460 elif "," in content_encoding: 

461 encodings = [ 

462 e.strip() 

463 for e in content_encoding.split(",") 

464 if e.strip() in self.CONTENT_DECODERS 

465 ] 

466 if encodings: 

467 self._decoder = _get_decoder(content_encoding) 

468 

469 def _decode( 

470 self, data: bytes, decode_content: bool | None, flush_decoder: bool 

471 ) -> bytes: 

472 """ 

473 Decode the data passed in and potentially flush the decoder. 

474 """ 

475 if not decode_content: 

476 if self._has_decoded_content: 

477 raise RuntimeError( 

478 "Calling read(decode_content=False) is not supported after " 

479 "read(decode_content=True) was called." 

480 ) 

481 return data 

482 

483 try: 

484 if self._decoder: 

485 data = self._decoder.decompress(data) 

486 self._has_decoded_content = True 

487 except self.DECODER_ERROR_CLASSES as e: 

488 content_encoding = self.headers.get("content-encoding", "").lower() 

489 raise DecodeError( 

490 "Received response with content-encoding: %s, but " 

491 "failed to decode it." % content_encoding, 

492 e, 

493 ) from e 

494 if flush_decoder: 

495 data += self._flush_decoder() 

496 

497 return data 

498 

499 def _flush_decoder(self) -> bytes: 

500 """ 

501 Flushes the decoder. Should only be called if the decoder is actually 

502 being used. 

503 """ 

504 if self._decoder: 

505 return self._decoder.decompress(b"") + self._decoder.flush() 

506 return b"" 

507 

508 # Compatibility methods for `io` module 

509 def readinto(self, b: bytearray) -> int: 

510 temp = self.read(len(b)) 

511 if len(temp) == 0: 

512 return 0 

513 else: 

514 b[: len(temp)] = temp 

515 return len(temp) 

516 

517 # Compatibility methods for http.client.HTTPResponse 

518 def getheaders(self) -> HTTPHeaderDict: 

519 warnings.warn( 

520 "HTTPResponse.getheaders() is deprecated and will be removed " 

521 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.", 

522 category=DeprecationWarning, 

523 stacklevel=2, 

524 ) 

525 return self.headers 

526 

527 def getheader(self, name: str, default: str | None = None) -> str | None: 

528 warnings.warn( 

529 "HTTPResponse.getheader() is deprecated and will be removed " 

530 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).", 

531 category=DeprecationWarning, 

532 stacklevel=2, 

533 ) 

534 return self.headers.get(name, default) 

535 

536 # Compatibility method for http.cookiejar 

537 def info(self) -> HTTPHeaderDict: 

538 return self.headers 

539 

540 def geturl(self) -> str | None: 

541 return self.url 

542 

543 

544class HTTPResponse(BaseHTTPResponse): 

545 """ 

546 HTTP Response container. 

547 

548 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is 

549 loaded and decoded on-demand when the ``data`` property is accessed. This 

550 class is also compatible with the Python standard library's :mod:`io` 

551 module, and can hence be treated as a readable object in the context of that 

552 framework. 

553 

554 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`: 

555 

556 :param preload_content: 

557 If True, the response's body will be preloaded during construction. 

558 

559 :param decode_content: 

560 If True, will attempt to decode the body based on the 

561 'content-encoding' header. 

562 

563 :param original_response: 

564 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse` 

565 object, it's convenient to include the original for debug purposes. It's 

566 otherwise unused. 

567 

568 :param retries: 

569 The retries contains the last :class:`~urllib3.util.retry.Retry` that 

570 was used during the request. 

571 

572 :param enforce_content_length: 

573 Enforce content length checking. Body returned by server must match 

574 value of Content-Length header, if present. Otherwise, raise error. 

575 """ 

576 

577 def __init__( 

578 self, 

579 body: _TYPE_BODY = "", 

580 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

581 status: int = 0, 

582 version: int = 0, 

583 version_string: str = "HTTP/?", 

584 reason: str | None = None, 

585 preload_content: bool = True, 

586 decode_content: bool = True, 

587 original_response: _HttplibHTTPResponse | None = None, 

588 pool: HTTPConnectionPool | None = None, 

589 connection: HTTPConnection | None = None, 

590 msg: _HttplibHTTPMessage | None = None, 

591 retries: Retry | None = None, 

592 enforce_content_length: bool = True, 

593 request_method: str | None = None, 

594 request_url: str | None = None, 

595 auto_close: bool = True, 

596 sock_shutdown: typing.Callable[[int], None] | None = None, 

597 ) -> None: 

598 super().__init__( 

599 headers=headers, 

600 status=status, 

601 version=version, 

602 version_string=version_string, 

603 reason=reason, 

604 decode_content=decode_content, 

605 request_url=request_url, 

606 retries=retries, 

607 ) 

608 

609 self.enforce_content_length = enforce_content_length 

610 self.auto_close = auto_close 

611 

612 self._body = None 

613 self._fp: _HttplibHTTPResponse | None = None 

614 self._original_response = original_response 

615 self._fp_bytes_read = 0 

616 self.msg = msg 

617 

618 if body and isinstance(body, (str, bytes)): 

619 self._body = body 

620 

621 self._pool = pool 

622 self._connection = connection 

623 

624 if hasattr(body, "read"): 

625 self._fp = body # type: ignore[assignment] 

626 self._sock_shutdown = sock_shutdown 

627 

628 # Are we using the chunked-style of transfer encoding? 

629 self.chunk_left: int | None = None 

630 

631 # Determine length of response 

632 self.length_remaining = self._init_length(request_method) 

633 

634 # Used to return the correct amount of bytes for partial read()s 

635 self._decoded_buffer = BytesQueueBuffer() 

636 

637 # If requested, preload the body. 

638 if preload_content and not self._body: 

639 self._body = self.read(decode_content=decode_content) 

640 

641 def release_conn(self) -> None: 

642 if not self._pool or not self._connection: 

643 return None 

644 

645 self._pool._put_conn(self._connection) 

646 self._connection = None 

647 

648 def drain_conn(self) -> None: 

649 """ 

650 Read and discard any remaining HTTP response data in the response connection. 

651 

652 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool. 

653 """ 

654 try: 

655 self.read() 

656 except (HTTPError, OSError, BaseSSLError, HTTPException): 

657 pass 

658 

659 @property 

660 def data(self) -> bytes: 

661 # For backwards-compat with earlier urllib3 0.4 and earlier. 

662 if self._body: 

663 return self._body # type: ignore[return-value] 

664 

665 if self._fp: 

666 return self.read(cache_content=True) 

667 

668 return None # type: ignore[return-value] 

669 

670 @property 

671 def connection(self) -> HTTPConnection | None: 

672 return self._connection 

673 

674 def isclosed(self) -> bool: 

675 return is_fp_closed(self._fp) 

676 

677 def tell(self) -> int: 

678 """ 

679 Obtain the number of bytes pulled over the wire so far. May differ from 

680 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read`` 

681 if bytes are encoded on the wire (e.g, compressed). 

682 """ 

683 return self._fp_bytes_read 

684 

685 def _init_length(self, request_method: str | None) -> int | None: 

686 """ 

687 Set initial length value for Response content if available. 

688 """ 

689 length: int | None 

690 content_length: str | None = self.headers.get("content-length") 

691 

692 if content_length is not None: 

693 if self.chunked: 

694 # This Response will fail with an IncompleteRead if it can't be 

695 # received as chunked. This method falls back to attempt reading 

696 # the response before raising an exception. 

697 log.warning( 

698 "Received response with both Content-Length and " 

699 "Transfer-Encoding set. This is expressly forbidden " 

700 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and " 

701 "attempting to process response as Transfer-Encoding: " 

702 "chunked." 

703 ) 

704 return None 

705 

706 try: 

707 # RFC 7230 section 3.3.2 specifies multiple content lengths can 

708 # be sent in a single Content-Length header 

709 # (e.g. Content-Length: 42, 42). This line ensures the values 

710 # are all valid ints and that as long as the `set` length is 1, 

711 # all values are the same. Otherwise, the header is invalid. 

712 lengths = {int(val) for val in content_length.split(",")} 

713 if len(lengths) > 1: 

714 raise InvalidHeader( 

715 "Content-Length contained multiple " 

716 "unmatching values (%s)" % content_length 

717 ) 

718 length = lengths.pop() 

719 except ValueError: 

720 length = None 

721 else: 

722 if length < 0: 

723 length = None 

724 

725 else: # if content_length is None 

726 length = None 

727 

728 # Convert status to int for comparison 

729 # In some cases, httplib returns a status of "_UNKNOWN" 

730 try: 

731 status = int(self.status) 

732 except ValueError: 

733 status = 0 

734 

735 # Check for responses that shouldn't include a body 

736 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD": 

737 length = 0 

738 

739 return length 

740 

741 @contextmanager 

742 def _error_catcher(self) -> typing.Generator[None]: 

743 """ 

744 Catch low-level python exceptions, instead re-raising urllib3 

745 variants, so that low-level exceptions are not leaked in the 

746 high-level api. 

747 

748 On exit, release the connection back to the pool. 

749 """ 

750 clean_exit = False 

751 

752 try: 

753 try: 

754 yield 

755 

756 except SocketTimeout as e: 

757 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but 

758 # there is yet no clean way to get at it from this context. 

759 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

760 

761 except BaseSSLError as e: 

762 # FIXME: Is there a better way to differentiate between SSLErrors? 

763 if "read operation timed out" not in str(e): 

764 # SSL errors related to framing/MAC get wrapped and reraised here 

765 raise SSLError(e) from e 

766 

767 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

768 

769 except IncompleteRead as e: 

770 if ( 

771 e.expected is not None 

772 and e.partial is not None 

773 and e.expected == -e.partial 

774 ): 

775 arg = "Response may not contain content." 

776 else: 

777 arg = f"Connection broken: {e!r}" 

778 raise ProtocolError(arg, e) from e 

779 

780 except (HTTPException, OSError) as e: 

781 raise ProtocolError(f"Connection broken: {e!r}", e) from e 

782 

783 # If no exception is thrown, we should avoid cleaning up 

784 # unnecessarily. 

785 clean_exit = True 

786 finally: 

787 # If we didn't terminate cleanly, we need to throw away our 

788 # connection. 

789 if not clean_exit: 

790 # The response may not be closed but we're not going to use it 

791 # anymore so close it now to ensure that the connection is 

792 # released back to the pool. 

793 if self._original_response: 

794 self._original_response.close() 

795 

796 # Closing the response may not actually be sufficient to close 

797 # everything, so if we have a hold of the connection close that 

798 # too. 

799 if self._connection: 

800 self._connection.close() 

801 

802 # If we hold the original response but it's closed now, we should 

803 # return the connection back to the pool. 

804 if self._original_response and self._original_response.isclosed(): 

805 self.release_conn() 

806 

807 def _fp_read( 

808 self, 

809 amt: int | None = None, 

810 *, 

811 read1: bool = False, 

812 ) -> bytes: 

813 """ 

814 Read a response with the thought that reading the number of bytes 

815 larger than can fit in a 32-bit int at a time via SSL in some 

816 known cases leads to an overflow error that has to be prevented 

817 if `amt` or `self.length_remaining` indicate that a problem may 

818 happen. 

819 

820 The known cases: 

821 * CPython < 3.9.7 because of a bug 

822 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900. 

823 * urllib3 injected with pyOpenSSL-backed SSL-support. 

824 * CPython < 3.10 only when `amt` does not fit 32-bit int. 

825 """ 

826 assert self._fp 

827 c_int_max = 2**31 - 1 

828 if ( 

829 (amt and amt > c_int_max) 

830 or ( 

831 amt is None 

832 and self.length_remaining 

833 and self.length_remaining > c_int_max 

834 ) 

835 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)): 

836 if read1: 

837 return self._fp.read1(c_int_max) 

838 buffer = io.BytesIO() 

839 # Besides `max_chunk_amt` being a maximum chunk size, it 

840 # affects memory overhead of reading a response by this 

841 # method in CPython. 

842 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum 

843 # chunk size that does not lead to an overflow error, but 

844 # 256 MiB is a compromise. 

845 max_chunk_amt = 2**28 

846 while amt is None or amt != 0: 

847 if amt is not None: 

848 chunk_amt = min(amt, max_chunk_amt) 

849 amt -= chunk_amt 

850 else: 

851 chunk_amt = max_chunk_amt 

852 data = self._fp.read(chunk_amt) 

853 if not data: 

854 break 

855 buffer.write(data) 

856 del data # to reduce peak memory usage by `max_chunk_amt`. 

857 return buffer.getvalue() 

858 elif read1: 

859 return self._fp.read1(amt) if amt is not None else self._fp.read1() 

860 else: 

861 # StringIO doesn't like amt=None 

862 return self._fp.read(amt) if amt is not None else self._fp.read() 

863 

864 def _raw_read( 

865 self, 

866 amt: int | None = None, 

867 *, 

868 read1: bool = False, 

869 ) -> bytes: 

870 """ 

871 Reads `amt` of bytes from the socket. 

872 """ 

873 if self._fp is None: 

874 return None # type: ignore[return-value] 

875 

876 fp_closed = getattr(self._fp, "closed", False) 

877 

878 with self._error_catcher(): 

879 data = self._fp_read(amt, read1=read1) if not fp_closed else b"" 

880 if amt is not None and amt != 0 and not data: 

881 # Platform-specific: Buggy versions of Python. 

882 # Close the connection when no data is returned 

883 # 

884 # This is redundant to what httplib/http.client _should_ 

885 # already do. However, versions of python released before 

886 # December 15, 2012 (http://bugs.python.org/issue16298) do 

887 # not properly close the connection in all cases. There is 

888 # no harm in redundantly calling close. 

889 self._fp.close() 

890 if ( 

891 self.enforce_content_length 

892 and self.length_remaining is not None 

893 and self.length_remaining != 0 

894 ): 

895 # This is an edge case that httplib failed to cover due 

896 # to concerns of backward compatibility. We're 

897 # addressing it here to make sure IncompleteRead is 

898 # raised during streaming, so all calls with incorrect 

899 # Content-Length are caught. 

900 raise IncompleteRead(self._fp_bytes_read, self.length_remaining) 

901 elif read1 and ( 

902 (amt != 0 and not data) or self.length_remaining == len(data) 

903 ): 

904 # All data has been read, but `self._fp.read1` in 

905 # CPython 3.12 and older doesn't always close 

906 # `http.client.HTTPResponse`, so we close it here. 

907 # See https://github.com/python/cpython/issues/113199 

908 self._fp.close() 

909 

910 if data: 

911 self._fp_bytes_read += len(data) 

912 if self.length_remaining is not None: 

913 self.length_remaining -= len(data) 

914 return data 

915 

916 def read( 

917 self, 

918 amt: int | None = None, 

919 decode_content: bool | None = None, 

920 cache_content: bool = False, 

921 ) -> bytes: 

922 """ 

923 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional 

924 parameters: ``decode_content`` and ``cache_content``. 

925 

926 :param amt: 

927 How much of the content to read. If specified, caching is skipped 

928 because it doesn't make sense to cache partial content as the full 

929 response. 

930 

931 :param decode_content: 

932 If True, will attempt to decode the body based on the 

933 'content-encoding' header. 

934 

935 :param cache_content: 

936 If True, will save the returned data such that the same result is 

937 returned despite of the state of the underlying file object. This 

938 is useful if you want the ``.data`` property to continue working 

939 after having ``.read()`` the file object. (Overridden if ``amt`` is 

940 set.) 

941 """ 

942 self._init_decoder() 

943 if decode_content is None: 

944 decode_content = self.decode_content 

945 

946 if amt and amt < 0: 

947 # Negative numbers and `None` should be treated the same. 

948 amt = None 

949 elif amt is not None: 

950 cache_content = False 

951 

952 if len(self._decoded_buffer) >= amt: 

953 return self._decoded_buffer.get(amt) 

954 

955 data = self._raw_read(amt) 

956 

957 flush_decoder = amt is None or (amt != 0 and not data) 

958 

959 if not data and len(self._decoded_buffer) == 0: 

960 return data 

961 

962 if amt is None: 

963 data = self._decode(data, decode_content, flush_decoder) 

964 if cache_content: 

965 self._body = data 

966 else: 

967 # do not waste memory on buffer when not decoding 

968 if not decode_content: 

969 if self._has_decoded_content: 

970 raise RuntimeError( 

971 "Calling read(decode_content=False) is not supported after " 

972 "read(decode_content=True) was called." 

973 ) 

974 return data 

975 

976 decoded_data = self._decode(data, decode_content, flush_decoder) 

977 self._decoded_buffer.put(decoded_data) 

978 

979 while len(self._decoded_buffer) < amt and data: 

980 # TODO make sure to initially read enough data to get past the headers 

981 # For example, the GZ file header takes 10 bytes, we don't want to read 

982 # it one byte at a time 

983 data = self._raw_read(amt) 

984 decoded_data = self._decode(data, decode_content, flush_decoder) 

985 self._decoded_buffer.put(decoded_data) 

986 data = self._decoded_buffer.get(amt) 

987 

988 return data 

989 

990 def read1( 

991 self, 

992 amt: int | None = None, 

993 decode_content: bool | None = None, 

994 ) -> bytes: 

995 """ 

996 Similar to ``http.client.HTTPResponse.read1`` and documented 

997 in :meth:`io.BufferedReader.read1`, but with an additional parameter: 

998 ``decode_content``. 

999 

1000 :param amt: 

1001 How much of the content to read. 

1002 

1003 :param decode_content: 

1004 If True, will attempt to decode the body based on the 

1005 'content-encoding' header. 

1006 """ 

1007 if decode_content is None: 

1008 decode_content = self.decode_content 

1009 if amt and amt < 0: 

1010 # Negative numbers and `None` should be treated the same. 

1011 amt = None 

1012 # try and respond without going to the network 

1013 if self._has_decoded_content: 

1014 if not decode_content: 

1015 raise RuntimeError( 

1016 "Calling read1(decode_content=False) is not supported after " 

1017 "read1(decode_content=True) was called." 

1018 ) 

1019 if len(self._decoded_buffer) > 0: 

1020 if amt is None: 

1021 return self._decoded_buffer.get_all() 

1022 return self._decoded_buffer.get(amt) 

1023 if amt == 0: 

1024 return b"" 

1025 

1026 # FIXME, this method's type doesn't say returning None is possible 

1027 data = self._raw_read(amt, read1=True) 

1028 if not decode_content or data is None: 

1029 return data 

1030 

1031 self._init_decoder() 

1032 while True: 

1033 flush_decoder = not data 

1034 decoded_data = self._decode(data, decode_content, flush_decoder) 

1035 self._decoded_buffer.put(decoded_data) 

1036 if decoded_data or flush_decoder: 

1037 break 

1038 data = self._raw_read(8192, read1=True) 

1039 

1040 if amt is None: 

1041 return self._decoded_buffer.get_all() 

1042 return self._decoded_buffer.get(amt) 

1043 

1044 def stream( 

1045 self, amt: int | None = 2**16, decode_content: bool | None = None 

1046 ) -> typing.Generator[bytes]: 

1047 """ 

1048 A generator wrapper for the read() method. A call will block until 

1049 ``amt`` bytes have been read from the connection or until the 

1050 connection is closed. 

1051 

1052 :param amt: 

1053 How much of the content to read. The generator will return up to 

1054 much data per iteration, but may return less. This is particularly 

1055 likely when using compressed data. However, the empty string will 

1056 never be returned. 

1057 

1058 :param decode_content: 

1059 If True, will attempt to decode the body based on the 

1060 'content-encoding' header. 

1061 """ 

1062 if self.chunked and self.supports_chunked_reads(): 

1063 yield from self.read_chunked(amt, decode_content=decode_content) 

1064 else: 

1065 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0: 

1066 data = self.read(amt=amt, decode_content=decode_content) 

1067 

1068 if data: 

1069 yield data 

1070 

1071 # Overrides from io.IOBase 

1072 def readable(self) -> bool: 

1073 return True 

1074 

1075 def shutdown(self) -> None: 

1076 if not self._sock_shutdown: 

1077 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set") 

1078 self._sock_shutdown(socket.SHUT_RD) 

1079 

1080 def close(self) -> None: 

1081 self._sock_shutdown = None 

1082 

1083 if not self.closed and self._fp: 

1084 self._fp.close() 

1085 

1086 if self._connection: 

1087 self._connection.close() 

1088 

1089 if not self.auto_close: 

1090 io.IOBase.close(self) 

1091 

1092 @property 

1093 def closed(self) -> bool: 

1094 if not self.auto_close: 

1095 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return] 

1096 elif self._fp is None: 

1097 return True 

1098 elif hasattr(self._fp, "isclosed"): 

1099 return self._fp.isclosed() 

1100 elif hasattr(self._fp, "closed"): 

1101 return self._fp.closed 

1102 else: 

1103 return True 

1104 

1105 def fileno(self) -> int: 

1106 if self._fp is None: 

1107 raise OSError("HTTPResponse has no file to get a fileno from") 

1108 elif hasattr(self._fp, "fileno"): 

1109 return self._fp.fileno() 

1110 else: 

1111 raise OSError( 

1112 "The file-like object this HTTPResponse is wrapped " 

1113 "around has no file descriptor" 

1114 ) 

1115 

1116 def flush(self) -> None: 

1117 if ( 

1118 self._fp is not None 

1119 and hasattr(self._fp, "flush") 

1120 and not getattr(self._fp, "closed", False) 

1121 ): 

1122 return self._fp.flush() 

1123 

1124 def supports_chunked_reads(self) -> bool: 

1125 """ 

1126 Checks if the underlying file-like object looks like a 

1127 :class:`http.client.HTTPResponse` object. We do this by testing for 

1128 the fp attribute. If it is present we assume it returns raw chunks as 

1129 processed by read_chunked(). 

1130 """ 

1131 return hasattr(self._fp, "fp") 

1132 

1133 def _update_chunk_length(self) -> None: 

1134 # First, we'll figure out length of a chunk and then 

1135 # we'll try to read it from socket. 

1136 if self.chunk_left is not None: 

1137 return None 

1138 line = self._fp.fp.readline() # type: ignore[union-attr] 

1139 line = line.split(b";", 1)[0] 

1140 try: 

1141 self.chunk_left = int(line, 16) 

1142 except ValueError: 

1143 self.close() 

1144 if line: 

1145 # Invalid chunked protocol response, abort. 

1146 raise InvalidChunkLength(self, line) from None 

1147 else: 

1148 # Truncated at start of next chunk 

1149 raise ProtocolError("Response ended prematurely") from None 

1150 

1151 def _handle_chunk(self, amt: int | None) -> bytes: 

1152 returned_chunk = None 

1153 if amt is None: 

1154 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1155 returned_chunk = chunk 

1156 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1157 self.chunk_left = None 

1158 elif self.chunk_left is not None and amt < self.chunk_left: 

1159 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1160 self.chunk_left = self.chunk_left - amt 

1161 returned_chunk = value 

1162 elif amt == self.chunk_left: 

1163 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1164 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1165 self.chunk_left = None 

1166 returned_chunk = value 

1167 else: # amt > self.chunk_left 

1168 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1169 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1170 self.chunk_left = None 

1171 return returned_chunk # type: ignore[no-any-return] 

1172 

1173 def read_chunked( 

1174 self, amt: int | None = None, decode_content: bool | None = None 

1175 ) -> typing.Generator[bytes]: 

1176 """ 

1177 Similar to :meth:`HTTPResponse.read`, but with an additional 

1178 parameter: ``decode_content``. 

1179 

1180 :param amt: 

1181 How much of the content to read. If specified, caching is skipped 

1182 because it doesn't make sense to cache partial content as the full 

1183 response. 

1184 

1185 :param decode_content: 

1186 If True, will attempt to decode the body based on the 

1187 'content-encoding' header. 

1188 """ 

1189 self._init_decoder() 

1190 # FIXME: Rewrite this method and make it a class with a better structured logic. 

1191 if not self.chunked: 

1192 raise ResponseNotChunked( 

1193 "Response is not chunked. " 

1194 "Header 'transfer-encoding: chunked' is missing." 

1195 ) 

1196 if not self.supports_chunked_reads(): 

1197 raise BodyNotHttplibCompatible( 

1198 "Body should be http.client.HTTPResponse like. " 

1199 "It should have have an fp attribute which returns raw chunks." 

1200 ) 

1201 

1202 with self._error_catcher(): 

1203 # Don't bother reading the body of a HEAD request. 

1204 if self._original_response and is_response_to_head(self._original_response): 

1205 self._original_response.close() 

1206 return None 

1207 

1208 # If a response is already read and closed 

1209 # then return immediately. 

1210 if self._fp.fp is None: # type: ignore[union-attr] 

1211 return None 

1212 

1213 if amt and amt < 0: 

1214 # Negative numbers and `None` should be treated the same, 

1215 # but httplib handles only `None` correctly. 

1216 amt = None 

1217 

1218 while True: 

1219 self._update_chunk_length() 

1220 if self.chunk_left == 0: 

1221 break 

1222 chunk = self._handle_chunk(amt) 

1223 decoded = self._decode( 

1224 chunk, decode_content=decode_content, flush_decoder=False 

1225 ) 

1226 if decoded: 

1227 yield decoded 

1228 

1229 if decode_content: 

1230 # On CPython and PyPy, we should never need to flush the 

1231 # decoder. However, on Jython we *might* need to, so 

1232 # lets defensively do it anyway. 

1233 decoded = self._flush_decoder() 

1234 if decoded: # Platform-specific: Jython. 

1235 yield decoded 

1236 

1237 # Chunk content ends with \r\n: discard it. 

1238 while self._fp is not None: 

1239 line = self._fp.fp.readline() 

1240 if not line: 

1241 # Some sites may not end with '\r\n'. 

1242 break 

1243 if line == b"\r\n": 

1244 break 

1245 

1246 # We read everything; close the "file". 

1247 if self._original_response: 

1248 self._original_response.close() 

1249 

1250 @property 

1251 def url(self) -> str | None: 

1252 """ 

1253 Returns the URL that was the source of this response. 

1254 If the request that generated this response redirected, this method 

1255 will return the final redirect location. 

1256 """ 

1257 return self._request_url 

1258 

1259 @url.setter 

1260 def url(self, url: str) -> None: 

1261 self._request_url = url 

1262 

1263 def __iter__(self) -> typing.Iterator[bytes]: 

1264 buffer: list[bytes] = [] 

1265 for chunk in self.stream(decode_content=True): 

1266 if b"\n" in chunk: 

1267 chunks = chunk.split(b"\n") 

1268 yield b"".join(buffer) + chunks[0] + b"\n" 

1269 for x in chunks[1:-1]: 

1270 yield x + b"\n" 

1271 if chunks[-1]: 

1272 buffer = [chunks[-1]] 

1273 else: 

1274 buffer = [] 

1275 else: 

1276 buffer.append(chunk) 

1277 if buffer: 

1278 yield b"".join(buffer)