Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 22%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

622 statements  

1from __future__ import annotations 

2 

3import collections 

4import io 

5import json as _json 

6import logging 

7import re 

8import sys 

9import typing 

10import warnings 

11import zlib 

12from contextlib import contextmanager 

13from http.client import HTTPMessage as _HttplibHTTPMessage 

14from http.client import HTTPResponse as _HttplibHTTPResponse 

15from socket import timeout as SocketTimeout 

16 

17if typing.TYPE_CHECKING: 

18 from ._base_connection import BaseHTTPConnection 

19 

20try: 

21 try: 

22 import brotlicffi as brotli # type: ignore[import-not-found] 

23 except ImportError: 

24 import brotli # type: ignore[import-not-found] 

25except ImportError: 

26 brotli = None 

27 

28try: 

29 import zstandard as zstd 

30except (AttributeError, ImportError, ValueError): # Defensive: 

31 HAS_ZSTD = False 

32else: 

33 # The package 'zstandard' added the 'eof' property starting 

34 # in v0.18.0 which we require to ensure a complete and 

35 # valid zstd stream was fed into the ZstdDecoder. 

36 # See: https://github.com/urllib3/urllib3/pull/2624 

37 _zstd_version = tuple( 

38 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr] 

39 ) 

40 if _zstd_version < (0, 18): # Defensive: 

41 HAS_ZSTD = False 

42 else: 

43 HAS_ZSTD = True 

44 

45from . import util 

46from ._base_connection import _TYPE_BODY 

47from ._collections import HTTPHeaderDict 

48from .connection import BaseSSLError, HTTPConnection, HTTPException 

49from .exceptions import ( 

50 BodyNotHttplibCompatible, 

51 DecodeError, 

52 HTTPError, 

53 IncompleteRead, 

54 InvalidChunkLength, 

55 InvalidHeader, 

56 ProtocolError, 

57 ReadTimeoutError, 

58 ResponseNotChunked, 

59 SSLError, 

60) 

61from .util.response import is_fp_closed, is_response_to_head 

62from .util.retry import Retry 

63 

64if typing.TYPE_CHECKING: 

65 from .connectionpool import HTTPConnectionPool 

66 

67log = logging.getLogger(__name__) 

68 

69 

70class ContentDecoder: 

71 def decompress(self, data: bytes) -> bytes: 

72 raise NotImplementedError() 

73 

74 def flush(self) -> bytes: 

75 raise NotImplementedError() 

76 

77 

78class DeflateDecoder(ContentDecoder): 

79 def __init__(self) -> None: 

80 self._first_try = True 

81 self._data = b"" 

82 self._obj = zlib.decompressobj() 

83 

84 def decompress(self, data: bytes) -> bytes: 

85 if not data: 

86 return data 

87 

88 if not self._first_try: 

89 return self._obj.decompress(data) 

90 

91 self._data += data 

92 try: 

93 decompressed = self._obj.decompress(data) 

94 if decompressed: 

95 self._first_try = False 

96 self._data = None # type: ignore[assignment] 

97 return decompressed 

98 except zlib.error: 

99 self._first_try = False 

100 self._obj = zlib.decompressobj(-zlib.MAX_WBITS) 

101 try: 

102 return self.decompress(self._data) 

103 finally: 

104 self._data = None # type: ignore[assignment] 

105 

106 def flush(self) -> bytes: 

107 return self._obj.flush() 

108 

109 

110class GzipDecoderState: 

111 FIRST_MEMBER = 0 

112 OTHER_MEMBERS = 1 

113 SWALLOW_DATA = 2 

114 

115 

116class GzipDecoder(ContentDecoder): 

117 def __init__(self) -> None: 

118 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

119 self._state = GzipDecoderState.FIRST_MEMBER 

120 

121 def decompress(self, data: bytes) -> bytes: 

122 ret = bytearray() 

123 if self._state == GzipDecoderState.SWALLOW_DATA or not data: 

124 return bytes(ret) 

125 while True: 

126 try: 

127 ret += self._obj.decompress(data) 

128 except zlib.error: 

129 previous_state = self._state 

130 # Ignore data after the first error 

131 self._state = GzipDecoderState.SWALLOW_DATA 

132 if previous_state == GzipDecoderState.OTHER_MEMBERS: 

133 # Allow trailing garbage acceptable in other gzip clients 

134 return bytes(ret) 

135 raise 

136 data = self._obj.unused_data 

137 if not data: 

138 return bytes(ret) 

139 self._state = GzipDecoderState.OTHER_MEMBERS 

140 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

141 

142 def flush(self) -> bytes: 

143 return self._obj.flush() 

144 

145 

146if brotli is not None: 

147 

148 class BrotliDecoder(ContentDecoder): 

149 # Supports both 'brotlipy' and 'Brotli' packages 

150 # since they share an import name. The top branches 

151 # are for 'brotlipy' and bottom branches for 'Brotli' 

152 def __init__(self) -> None: 

153 self._obj = brotli.Decompressor() 

154 if hasattr(self._obj, "decompress"): 

155 setattr(self, "decompress", self._obj.decompress) 

156 else: 

157 setattr(self, "decompress", self._obj.process) 

158 

159 def flush(self) -> bytes: 

160 if hasattr(self._obj, "flush"): 

161 return self._obj.flush() # type: ignore[no-any-return] 

162 return b"" 

163 

164 

165if HAS_ZSTD: 

166 

167 class ZstdDecoder(ContentDecoder): 

168 def __init__(self) -> None: 

169 self._obj = zstd.ZstdDecompressor().decompressobj() 

170 

171 def decompress(self, data: bytes) -> bytes: 

172 if not data: 

173 return b"" 

174 data_parts = [self._obj.decompress(data)] 

175 while self._obj.eof and self._obj.unused_data: 

176 unused_data = self._obj.unused_data 

177 self._obj = zstd.ZstdDecompressor().decompressobj() 

178 data_parts.append(self._obj.decompress(unused_data)) 

179 return b"".join(data_parts) 

180 

181 def flush(self) -> bytes: 

182 ret = self._obj.flush() # note: this is a no-op 

183 if not self._obj.eof: 

184 raise DecodeError("Zstandard data is incomplete") 

185 return ret 

186 

187 

188class MultiDecoder(ContentDecoder): 

189 """ 

190 From RFC7231: 

191 If one or more encodings have been applied to a representation, the 

192 sender that applied the encodings MUST generate a Content-Encoding 

193 header field that lists the content codings in the order in which 

194 they were applied. 

195 """ 

196 

197 def __init__(self, modes: str) -> None: 

198 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")] 

199 

200 def flush(self) -> bytes: 

201 return self._decoders[0].flush() 

202 

203 def decompress(self, data: bytes) -> bytes: 

204 for d in reversed(self._decoders): 

205 data = d.decompress(data) 

206 return data 

207 

208 

209def _get_decoder(mode: str) -> ContentDecoder: 

210 if "," in mode: 

211 return MultiDecoder(mode) 

212 

213 # According to RFC 9110 section 8.4.1.3, recipients should 

214 # consider x-gzip equivalent to gzip 

215 if mode in ("gzip", "x-gzip"): 

216 return GzipDecoder() 

217 

218 if brotli is not None and mode == "br": 

219 return BrotliDecoder() 

220 

221 if HAS_ZSTD and mode == "zstd": 

222 return ZstdDecoder() 

223 

224 return DeflateDecoder() 

225 

226 

227class BytesQueueBuffer: 

228 """Memory-efficient bytes buffer 

229 

230 To return decoded data in read() and still follow the BufferedIOBase API, we need a 

231 buffer to always return the correct amount of bytes. 

232 

233 This buffer should be filled using calls to put() 

234 

235 Our maximum memory usage is determined by the sum of the size of: 

236 

237 * self.buffer, which contains the full data 

238 * the largest chunk that we will copy in get() 

239 

240 The worst case scenario is a single chunk, in which case we'll make a full copy of 

241 the data inside get(). 

242 """ 

243 

244 def __init__(self) -> None: 

245 self.buffer: typing.Deque[bytes] = collections.deque() 

246 self._size: int = 0 

247 

248 def __len__(self) -> int: 

249 return self._size 

250 

251 def put(self, data: bytes) -> None: 

252 self.buffer.append(data) 

253 self._size += len(data) 

254 

255 def get(self, n: int) -> bytes: 

256 if n == 0: 

257 return b"" 

258 elif not self.buffer: 

259 raise RuntimeError("buffer is empty") 

260 elif n < 0: 

261 raise ValueError("n should be > 0") 

262 

263 fetched = 0 

264 ret = io.BytesIO() 

265 while fetched < n: 

266 remaining = n - fetched 

267 chunk = self.buffer.popleft() 

268 chunk_length = len(chunk) 

269 if remaining < chunk_length: 

270 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:] 

271 ret.write(left_chunk) 

272 self.buffer.appendleft(right_chunk) 

273 self._size -= remaining 

274 break 

275 else: 

276 ret.write(chunk) 

277 self._size -= chunk_length 

278 fetched += chunk_length 

279 

280 if not self.buffer: 

281 break 

282 

283 return ret.getvalue() 

284 

285 def get_all(self) -> bytes: 

286 buffer = self.buffer 

287 if not buffer: 

288 assert self._size == 0 

289 return b"" 

290 if len(buffer) == 1: 

291 result = buffer.pop() 

292 else: 

293 ret = io.BytesIO() 

294 ret.writelines(buffer.popleft() for _ in range(len(buffer))) 

295 result = ret.getvalue() 

296 self._size = 0 

297 return result 

298 

299 

300class BaseHTTPResponse(io.IOBase): 

301 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"] 

302 if brotli is not None: 

303 CONTENT_DECODERS += ["br"] 

304 if HAS_ZSTD: 

305 CONTENT_DECODERS += ["zstd"] 

306 REDIRECT_STATUSES = [301, 302, 303, 307, 308] 

307 

308 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error) 

309 if brotli is not None: 

310 DECODER_ERROR_CLASSES += (brotli.error,) 

311 

312 if HAS_ZSTD: 

313 DECODER_ERROR_CLASSES += (zstd.ZstdError,) 

314 

315 def __init__( 

316 self, 

317 *, 

318 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

319 status: int, 

320 version: int, 

321 version_string: str, 

322 reason: str | None, 

323 decode_content: bool, 

324 request_url: str | None, 

325 retries: Retry | None = None, 

326 ) -> None: 

327 if isinstance(headers, HTTPHeaderDict): 

328 self.headers = headers 

329 else: 

330 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type] 

331 self.status = status 

332 self.version = version 

333 self.version_string = version_string 

334 self.reason = reason 

335 self.decode_content = decode_content 

336 self._has_decoded_content = False 

337 self._request_url: str | None = request_url 

338 self.retries = retries 

339 

340 self.chunked = False 

341 tr_enc = self.headers.get("transfer-encoding", "").lower() 

342 # Don't incur the penalty of creating a list and then discarding it 

343 encodings = (enc.strip() for enc in tr_enc.split(",")) 

344 if "chunked" in encodings: 

345 self.chunked = True 

346 

347 self._decoder: ContentDecoder | None = None 

348 self.length_remaining: int | None 

349 

350 def get_redirect_location(self) -> str | None | typing.Literal[False]: 

351 """ 

352 Should we redirect and where to? 

353 

354 :returns: Truthy redirect location string if we got a redirect status 

355 code and valid location. ``None`` if redirect status and no 

356 location. ``False`` if not a redirect status code. 

357 """ 

358 if self.status in self.REDIRECT_STATUSES: 

359 return self.headers.get("location") 

360 return False 

361 

362 @property 

363 def data(self) -> bytes: 

364 raise NotImplementedError() 

365 

366 def json(self) -> typing.Any: 

367 """ 

368 Deserializes the body of the HTTP response as a Python object. 

369 

370 The body of the HTTP response must be encoded using UTF-8, as per 

371 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_. 

372 

373 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to 

374 your custom decoder instead. 

375 

376 If the body of the HTTP response is not decodable to UTF-8, a 

377 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a 

378 valid JSON document, a `json.JSONDecodeError` will be raised. 

379 

380 Read more :ref:`here <json_content>`. 

381 

382 :returns: The body of the HTTP response as a Python object. 

383 """ 

384 data = self.data.decode("utf-8") 

385 return _json.loads(data) 

386 

387 @property 

388 def url(self) -> str | None: 

389 raise NotImplementedError() 

390 

391 @url.setter 

392 def url(self, url: str | None) -> None: 

393 raise NotImplementedError() 

394 

395 @property 

396 def connection(self) -> BaseHTTPConnection | None: 

397 raise NotImplementedError() 

398 

399 @property 

400 def retries(self) -> Retry | None: 

401 return self._retries 

402 

403 @retries.setter 

404 def retries(self, retries: Retry | None) -> None: 

405 # Override the request_url if retries has a redirect location. 

406 if retries is not None and retries.history: 

407 self.url = retries.history[-1].redirect_location 

408 self._retries = retries 

409 

410 def stream( 

411 self, amt: int | None = 2**16, decode_content: bool | None = None 

412 ) -> typing.Iterator[bytes]: 

413 raise NotImplementedError() 

414 

415 def read( 

416 self, 

417 amt: int | None = None, 

418 decode_content: bool | None = None, 

419 cache_content: bool = False, 

420 ) -> bytes: 

421 raise NotImplementedError() 

422 

423 def read1( 

424 self, 

425 amt: int | None = None, 

426 decode_content: bool | None = None, 

427 ) -> bytes: 

428 raise NotImplementedError() 

429 

430 def read_chunked( 

431 self, 

432 amt: int | None = None, 

433 decode_content: bool | None = None, 

434 ) -> typing.Iterator[bytes]: 

435 raise NotImplementedError() 

436 

437 def release_conn(self) -> None: 

438 raise NotImplementedError() 

439 

440 def drain_conn(self) -> None: 

441 raise NotImplementedError() 

442 

443 def close(self) -> None: 

444 raise NotImplementedError() 

445 

446 def _init_decoder(self) -> None: 

447 """ 

448 Set-up the _decoder attribute if necessary. 

449 """ 

450 # Note: content-encoding value should be case-insensitive, per RFC 7230 

451 # Section 3.2 

452 content_encoding = self.headers.get("content-encoding", "").lower() 

453 if self._decoder is None: 

454 if content_encoding in self.CONTENT_DECODERS: 

455 self._decoder = _get_decoder(content_encoding) 

456 elif "," in content_encoding: 

457 encodings = [ 

458 e.strip() 

459 for e in content_encoding.split(",") 

460 if e.strip() in self.CONTENT_DECODERS 

461 ] 

462 if encodings: 

463 self._decoder = _get_decoder(content_encoding) 

464 

465 def _decode( 

466 self, data: bytes, decode_content: bool | None, flush_decoder: bool 

467 ) -> bytes: 

468 """ 

469 Decode the data passed in and potentially flush the decoder. 

470 """ 

471 if not decode_content: 

472 if self._has_decoded_content: 

473 raise RuntimeError( 

474 "Calling read(decode_content=False) is not supported after " 

475 "read(decode_content=True) was called." 

476 ) 

477 return data 

478 

479 try: 

480 if self._decoder: 

481 data = self._decoder.decompress(data) 

482 self._has_decoded_content = True 

483 except self.DECODER_ERROR_CLASSES as e: 

484 content_encoding = self.headers.get("content-encoding", "").lower() 

485 raise DecodeError( 

486 "Received response with content-encoding: %s, but " 

487 "failed to decode it." % content_encoding, 

488 e, 

489 ) from e 

490 if flush_decoder: 

491 data += self._flush_decoder() 

492 

493 return data 

494 

495 def _flush_decoder(self) -> bytes: 

496 """ 

497 Flushes the decoder. Should only be called if the decoder is actually 

498 being used. 

499 """ 

500 if self._decoder: 

501 return self._decoder.decompress(b"") + self._decoder.flush() 

502 return b"" 

503 

504 # Compatibility methods for `io` module 

505 def readinto(self, b: bytearray) -> int: 

506 temp = self.read(len(b)) 

507 if len(temp) == 0: 

508 return 0 

509 else: 

510 b[: len(temp)] = temp 

511 return len(temp) 

512 

513 # Compatibility methods for http.client.HTTPResponse 

514 def getheaders(self) -> HTTPHeaderDict: 

515 warnings.warn( 

516 "HTTPResponse.getheaders() is deprecated and will be removed " 

517 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.", 

518 category=DeprecationWarning, 

519 stacklevel=2, 

520 ) 

521 return self.headers 

522 

523 def getheader(self, name: str, default: str | None = None) -> str | None: 

524 warnings.warn( 

525 "HTTPResponse.getheader() is deprecated and will be removed " 

526 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).", 

527 category=DeprecationWarning, 

528 stacklevel=2, 

529 ) 

530 return self.headers.get(name, default) 

531 

532 # Compatibility method for http.cookiejar 

533 def info(self) -> HTTPHeaderDict: 

534 return self.headers 

535 

536 def geturl(self) -> str | None: 

537 return self.url 

538 

539 

540class HTTPResponse(BaseHTTPResponse): 

541 """ 

542 HTTP Response container. 

543 

544 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is 

545 loaded and decoded on-demand when the ``data`` property is accessed. This 

546 class is also compatible with the Python standard library's :mod:`io` 

547 module, and can hence be treated as a readable object in the context of that 

548 framework. 

549 

550 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`: 

551 

552 :param preload_content: 

553 If True, the response's body will be preloaded during construction. 

554 

555 :param decode_content: 

556 If True, will attempt to decode the body based on the 

557 'content-encoding' header. 

558 

559 :param original_response: 

560 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse` 

561 object, it's convenient to include the original for debug purposes. It's 

562 otherwise unused. 

563 

564 :param retries: 

565 The retries contains the last :class:`~urllib3.util.retry.Retry` that 

566 was used during the request. 

567 

568 :param enforce_content_length: 

569 Enforce content length checking. Body returned by server must match 

570 value of Content-Length header, if present. Otherwise, raise error. 

571 """ 

572 

573 def __init__( 

574 self, 

575 body: _TYPE_BODY = "", 

576 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

577 status: int = 0, 

578 version: int = 0, 

579 version_string: str = "HTTP/?", 

580 reason: str | None = None, 

581 preload_content: bool = True, 

582 decode_content: bool = True, 

583 original_response: _HttplibHTTPResponse | None = None, 

584 pool: HTTPConnectionPool | None = None, 

585 connection: HTTPConnection | None = None, 

586 msg: _HttplibHTTPMessage | None = None, 

587 retries: Retry | None = None, 

588 enforce_content_length: bool = True, 

589 request_method: str | None = None, 

590 request_url: str | None = None, 

591 auto_close: bool = True, 

592 ) -> None: 

593 super().__init__( 

594 headers=headers, 

595 status=status, 

596 version=version, 

597 version_string=version_string, 

598 reason=reason, 

599 decode_content=decode_content, 

600 request_url=request_url, 

601 retries=retries, 

602 ) 

603 

604 self.enforce_content_length = enforce_content_length 

605 self.auto_close = auto_close 

606 

607 self._body = None 

608 self._fp: _HttplibHTTPResponse | None = None 

609 self._original_response = original_response 

610 self._fp_bytes_read = 0 

611 self.msg = msg 

612 

613 if body and isinstance(body, (str, bytes)): 

614 self._body = body 

615 

616 self._pool = pool 

617 self._connection = connection 

618 

619 if hasattr(body, "read"): 

620 self._fp = body # type: ignore[assignment] 

621 

622 # Are we using the chunked-style of transfer encoding? 

623 self.chunk_left: int | None = None 

624 

625 # Determine length of response 

626 self.length_remaining = self._init_length(request_method) 

627 

628 # Used to return the correct amount of bytes for partial read()s 

629 self._decoded_buffer = BytesQueueBuffer() 

630 

631 # If requested, preload the body. 

632 if preload_content and not self._body: 

633 self._body = self.read(decode_content=decode_content) 

634 

635 def release_conn(self) -> None: 

636 if not self._pool or not self._connection: 

637 return None 

638 

639 self._pool._put_conn(self._connection) 

640 self._connection = None 

641 

642 def drain_conn(self) -> None: 

643 """ 

644 Read and discard any remaining HTTP response data in the response connection. 

645 

646 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool. 

647 """ 

648 try: 

649 self.read() 

650 except (HTTPError, OSError, BaseSSLError, HTTPException): 

651 pass 

652 

653 @property 

654 def data(self) -> bytes: 

655 # For backwards-compat with earlier urllib3 0.4 and earlier. 

656 if self._body: 

657 return self._body # type: ignore[return-value] 

658 

659 if self._fp: 

660 return self.read(cache_content=True) 

661 

662 return None # type: ignore[return-value] 

663 

664 @property 

665 def connection(self) -> HTTPConnection | None: 

666 return self._connection 

667 

668 def isclosed(self) -> bool: 

669 return is_fp_closed(self._fp) 

670 

671 def tell(self) -> int: 

672 """ 

673 Obtain the number of bytes pulled over the wire so far. May differ from 

674 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read`` 

675 if bytes are encoded on the wire (e.g, compressed). 

676 """ 

677 return self._fp_bytes_read 

678 

679 def _init_length(self, request_method: str | None) -> int | None: 

680 """ 

681 Set initial length value for Response content if available. 

682 """ 

683 length: int | None 

684 content_length: str | None = self.headers.get("content-length") 

685 

686 if content_length is not None: 

687 if self.chunked: 

688 # This Response will fail with an IncompleteRead if it can't be 

689 # received as chunked. This method falls back to attempt reading 

690 # the response before raising an exception. 

691 log.warning( 

692 "Received response with both Content-Length and " 

693 "Transfer-Encoding set. This is expressly forbidden " 

694 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and " 

695 "attempting to process response as Transfer-Encoding: " 

696 "chunked." 

697 ) 

698 return None 

699 

700 try: 

701 # RFC 7230 section 3.3.2 specifies multiple content lengths can 

702 # be sent in a single Content-Length header 

703 # (e.g. Content-Length: 42, 42). This line ensures the values 

704 # are all valid ints and that as long as the `set` length is 1, 

705 # all values are the same. Otherwise, the header is invalid. 

706 lengths = {int(val) for val in content_length.split(",")} 

707 if len(lengths) > 1: 

708 raise InvalidHeader( 

709 "Content-Length contained multiple " 

710 "unmatching values (%s)" % content_length 

711 ) 

712 length = lengths.pop() 

713 except ValueError: 

714 length = None 

715 else: 

716 if length < 0: 

717 length = None 

718 

719 else: # if content_length is None 

720 length = None 

721 

722 # Convert status to int for comparison 

723 # In some cases, httplib returns a status of "_UNKNOWN" 

724 try: 

725 status = int(self.status) 

726 except ValueError: 

727 status = 0 

728 

729 # Check for responses that shouldn't include a body 

730 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD": 

731 length = 0 

732 

733 return length 

734 

735 @contextmanager 

736 def _error_catcher(self) -> typing.Generator[None, None, None]: 

737 """ 

738 Catch low-level python exceptions, instead re-raising urllib3 

739 variants, so that low-level exceptions are not leaked in the 

740 high-level api. 

741 

742 On exit, release the connection back to the pool. 

743 """ 

744 clean_exit = False 

745 

746 try: 

747 try: 

748 yield 

749 

750 except SocketTimeout as e: 

751 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but 

752 # there is yet no clean way to get at it from this context. 

753 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

754 

755 except BaseSSLError as e: 

756 # FIXME: Is there a better way to differentiate between SSLErrors? 

757 if "read operation timed out" not in str(e): 

758 # SSL errors related to framing/MAC get wrapped and reraised here 

759 raise SSLError(e) from e 

760 

761 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

762 

763 except IncompleteRead as e: 

764 if ( 

765 e.expected is not None 

766 and e.partial is not None 

767 and e.expected == -e.partial 

768 ): 

769 arg = "Response may not contain content." 

770 else: 

771 arg = f"Connection broken: {e!r}" 

772 raise ProtocolError(arg, e) from e 

773 

774 except (HTTPException, OSError) as e: 

775 raise ProtocolError(f"Connection broken: {e!r}", e) from e 

776 

777 # If no exception is thrown, we should avoid cleaning up 

778 # unnecessarily. 

779 clean_exit = True 

780 finally: 

781 # If we didn't terminate cleanly, we need to throw away our 

782 # connection. 

783 if not clean_exit: 

784 # The response may not be closed but we're not going to use it 

785 # anymore so close it now to ensure that the connection is 

786 # released back to the pool. 

787 if self._original_response: 

788 self._original_response.close() 

789 

790 # Closing the response may not actually be sufficient to close 

791 # everything, so if we have a hold of the connection close that 

792 # too. 

793 if self._connection: 

794 self._connection.close() 

795 

796 # If we hold the original response but it's closed now, we should 

797 # return the connection back to the pool. 

798 if self._original_response and self._original_response.isclosed(): 

799 self.release_conn() 

800 

801 def _fp_read( 

802 self, 

803 amt: int | None = None, 

804 *, 

805 read1: bool = False, 

806 ) -> bytes: 

807 """ 

808 Read a response with the thought that reading the number of bytes 

809 larger than can fit in a 32-bit int at a time via SSL in some 

810 known cases leads to an overflow error that has to be prevented 

811 if `amt` or `self.length_remaining` indicate that a problem may 

812 happen. 

813 

814 The known cases: 

815 * 3.8 <= CPython < 3.9.7 because of a bug 

816 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900. 

817 * urllib3 injected with pyOpenSSL-backed SSL-support. 

818 * CPython < 3.10 only when `amt` does not fit 32-bit int. 

819 """ 

820 assert self._fp 

821 c_int_max = 2**31 - 1 

822 if ( 

823 (amt and amt > c_int_max) 

824 or ( 

825 amt is None 

826 and self.length_remaining 

827 and self.length_remaining > c_int_max 

828 ) 

829 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)): 

830 if read1: 

831 return self._fp.read1(c_int_max) 

832 buffer = io.BytesIO() 

833 # Besides `max_chunk_amt` being a maximum chunk size, it 

834 # affects memory overhead of reading a response by this 

835 # method in CPython. 

836 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum 

837 # chunk size that does not lead to an overflow error, but 

838 # 256 MiB is a compromise. 

839 max_chunk_amt = 2**28 

840 while amt is None or amt != 0: 

841 if amt is not None: 

842 chunk_amt = min(amt, max_chunk_amt) 

843 amt -= chunk_amt 

844 else: 

845 chunk_amt = max_chunk_amt 

846 data = self._fp.read(chunk_amt) 

847 if not data: 

848 break 

849 buffer.write(data) 

850 del data # to reduce peak memory usage by `max_chunk_amt`. 

851 return buffer.getvalue() 

852 elif read1: 

853 return self._fp.read1(amt) if amt is not None else self._fp.read1() 

854 else: 

855 # StringIO doesn't like amt=None 

856 return self._fp.read(amt) if amt is not None else self._fp.read() 

857 

858 def _raw_read( 

859 self, 

860 amt: int | None = None, 

861 *, 

862 read1: bool = False, 

863 ) -> bytes: 

864 """ 

865 Reads `amt` of bytes from the socket. 

866 """ 

867 if self._fp is None: 

868 return None # type: ignore[return-value] 

869 

870 fp_closed = getattr(self._fp, "closed", False) 

871 

872 with self._error_catcher(): 

873 data = self._fp_read(amt, read1=read1) if not fp_closed else b"" 

874 if amt is not None and amt != 0 and not data: 

875 # Platform-specific: Buggy versions of Python. 

876 # Close the connection when no data is returned 

877 # 

878 # This is redundant to what httplib/http.client _should_ 

879 # already do. However, versions of python released before 

880 # December 15, 2012 (http://bugs.python.org/issue16298) do 

881 # not properly close the connection in all cases. There is 

882 # no harm in redundantly calling close. 

883 self._fp.close() 

884 if ( 

885 self.enforce_content_length 

886 and self.length_remaining is not None 

887 and self.length_remaining != 0 

888 ): 

889 # This is an edge case that httplib failed to cover due 

890 # to concerns of backward compatibility. We're 

891 # addressing it here to make sure IncompleteRead is 

892 # raised during streaming, so all calls with incorrect 

893 # Content-Length are caught. 

894 raise IncompleteRead(self._fp_bytes_read, self.length_remaining) 

895 elif read1 and ( 

896 (amt != 0 and not data) or self.length_remaining == len(data) 

897 ): 

898 # All data has been read, but `self._fp.read1` in 

899 # CPython 3.12 and older doesn't always close 

900 # `http.client.HTTPResponse`, so we close it here. 

901 # See https://github.com/python/cpython/issues/113199 

902 self._fp.close() 

903 

904 if data: 

905 self._fp_bytes_read += len(data) 

906 if self.length_remaining is not None: 

907 self.length_remaining -= len(data) 

908 return data 

909 

910 def read( 

911 self, 

912 amt: int | None = None, 

913 decode_content: bool | None = None, 

914 cache_content: bool = False, 

915 ) -> bytes: 

916 """ 

917 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional 

918 parameters: ``decode_content`` and ``cache_content``. 

919 

920 :param amt: 

921 How much of the content to read. If specified, caching is skipped 

922 because it doesn't make sense to cache partial content as the full 

923 response. 

924 

925 :param decode_content: 

926 If True, will attempt to decode the body based on the 

927 'content-encoding' header. 

928 

929 :param cache_content: 

930 If True, will save the returned data such that the same result is 

931 returned despite of the state of the underlying file object. This 

932 is useful if you want the ``.data`` property to continue working 

933 after having ``.read()`` the file object. (Overridden if ``amt`` is 

934 set.) 

935 """ 

936 self._init_decoder() 

937 if decode_content is None: 

938 decode_content = self.decode_content 

939 

940 if amt and amt < 0: 

941 # Negative numbers and `None` should be treated the same. 

942 amt = None 

943 elif amt is not None: 

944 cache_content = False 

945 

946 if len(self._decoded_buffer) >= amt: 

947 return self._decoded_buffer.get(amt) 

948 

949 data = self._raw_read(amt) 

950 

951 flush_decoder = amt is None or (amt != 0 and not data) 

952 

953 if not data and len(self._decoded_buffer) == 0: 

954 return data 

955 

956 if amt is None: 

957 data = self._decode(data, decode_content, flush_decoder) 

958 if cache_content: 

959 self._body = data 

960 else: 

961 # do not waste memory on buffer when not decoding 

962 if not decode_content: 

963 if self._has_decoded_content: 

964 raise RuntimeError( 

965 "Calling read(decode_content=False) is not supported after " 

966 "read(decode_content=True) was called." 

967 ) 

968 return data 

969 

970 decoded_data = self._decode(data, decode_content, flush_decoder) 

971 self._decoded_buffer.put(decoded_data) 

972 

973 while len(self._decoded_buffer) < amt and data: 

974 # TODO make sure to initially read enough data to get past the headers 

975 # For example, the GZ file header takes 10 bytes, we don't want to read 

976 # it one byte at a time 

977 data = self._raw_read(amt) 

978 decoded_data = self._decode(data, decode_content, flush_decoder) 

979 self._decoded_buffer.put(decoded_data) 

980 data = self._decoded_buffer.get(amt) 

981 

982 return data 

983 

984 def read1( 

985 self, 

986 amt: int | None = None, 

987 decode_content: bool | None = None, 

988 ) -> bytes: 

989 """ 

990 Similar to ``http.client.HTTPResponse.read1`` and documented 

991 in :meth:`io.BufferedReader.read1`, but with an additional parameter: 

992 ``decode_content``. 

993 

994 :param amt: 

995 How much of the content to read. 

996 

997 :param decode_content: 

998 If True, will attempt to decode the body based on the 

999 'content-encoding' header. 

1000 """ 

1001 if decode_content is None: 

1002 decode_content = self.decode_content 

1003 if amt and amt < 0: 

1004 # Negative numbers and `None` should be treated the same. 

1005 amt = None 

1006 # try and respond without going to the network 

1007 if self._has_decoded_content: 

1008 if not decode_content: 

1009 raise RuntimeError( 

1010 "Calling read1(decode_content=False) is not supported after " 

1011 "read1(decode_content=True) was called." 

1012 ) 

1013 if len(self._decoded_buffer) > 0: 

1014 if amt is None: 

1015 return self._decoded_buffer.get_all() 

1016 return self._decoded_buffer.get(amt) 

1017 if amt == 0: 

1018 return b"" 

1019 

1020 # FIXME, this method's type doesn't say returning None is possible 

1021 data = self._raw_read(amt, read1=True) 

1022 if not decode_content or data is None: 

1023 return data 

1024 

1025 self._init_decoder() 

1026 while True: 

1027 flush_decoder = not data 

1028 decoded_data = self._decode(data, decode_content, flush_decoder) 

1029 self._decoded_buffer.put(decoded_data) 

1030 if decoded_data or flush_decoder: 

1031 break 

1032 data = self._raw_read(8192, read1=True) 

1033 

1034 if amt is None: 

1035 return self._decoded_buffer.get_all() 

1036 return self._decoded_buffer.get(amt) 

1037 

1038 def stream( 

1039 self, amt: int | None = 2**16, decode_content: bool | None = None 

1040 ) -> typing.Generator[bytes, None, None]: 

1041 """ 

1042 A generator wrapper for the read() method. A call will block until 

1043 ``amt`` bytes have been read from the connection or until the 

1044 connection is closed. 

1045 

1046 :param amt: 

1047 How much of the content to read. The generator will return up to 

1048 much data per iteration, but may return less. This is particularly 

1049 likely when using compressed data. However, the empty string will 

1050 never be returned. 

1051 

1052 :param decode_content: 

1053 If True, will attempt to decode the body based on the 

1054 'content-encoding' header. 

1055 """ 

1056 if self.chunked and self.supports_chunked_reads(): 

1057 yield from self.read_chunked(amt, decode_content=decode_content) 

1058 else: 

1059 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0: 

1060 data = self.read(amt=amt, decode_content=decode_content) 

1061 

1062 if data: 

1063 yield data 

1064 

1065 # Overrides from io.IOBase 

1066 def readable(self) -> bool: 

1067 return True 

1068 

1069 def close(self) -> None: 

1070 if not self.closed and self._fp: 

1071 self._fp.close() 

1072 

1073 if self._connection: 

1074 self._connection.close() 

1075 

1076 if not self.auto_close: 

1077 io.IOBase.close(self) 

1078 

1079 @property 

1080 def closed(self) -> bool: 

1081 if not self.auto_close: 

1082 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return] 

1083 elif self._fp is None: 

1084 return True 

1085 elif hasattr(self._fp, "isclosed"): 

1086 return self._fp.isclosed() 

1087 elif hasattr(self._fp, "closed"): 

1088 return self._fp.closed 

1089 else: 

1090 return True 

1091 

1092 def fileno(self) -> int: 

1093 if self._fp is None: 

1094 raise OSError("HTTPResponse has no file to get a fileno from") 

1095 elif hasattr(self._fp, "fileno"): 

1096 return self._fp.fileno() 

1097 else: 

1098 raise OSError( 

1099 "The file-like object this HTTPResponse is wrapped " 

1100 "around has no file descriptor" 

1101 ) 

1102 

1103 def flush(self) -> None: 

1104 if ( 

1105 self._fp is not None 

1106 and hasattr(self._fp, "flush") 

1107 and not getattr(self._fp, "closed", False) 

1108 ): 

1109 return self._fp.flush() 

1110 

1111 def supports_chunked_reads(self) -> bool: 

1112 """ 

1113 Checks if the underlying file-like object looks like a 

1114 :class:`http.client.HTTPResponse` object. We do this by testing for 

1115 the fp attribute. If it is present we assume it returns raw chunks as 

1116 processed by read_chunked(). 

1117 """ 

1118 return hasattr(self._fp, "fp") 

1119 

1120 def _update_chunk_length(self) -> None: 

1121 # First, we'll figure out length of a chunk and then 

1122 # we'll try to read it from socket. 

1123 if self.chunk_left is not None: 

1124 return None 

1125 line = self._fp.fp.readline() # type: ignore[union-attr] 

1126 line = line.split(b";", 1)[0] 

1127 try: 

1128 self.chunk_left = int(line, 16) 

1129 except ValueError: 

1130 self.close() 

1131 if line: 

1132 # Invalid chunked protocol response, abort. 

1133 raise InvalidChunkLength(self, line) from None 

1134 else: 

1135 # Truncated at start of next chunk 

1136 raise ProtocolError("Response ended prematurely") from None 

1137 

1138 def _handle_chunk(self, amt: int | None) -> bytes: 

1139 returned_chunk = None 

1140 if amt is None: 

1141 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1142 returned_chunk = chunk 

1143 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1144 self.chunk_left = None 

1145 elif self.chunk_left is not None and amt < self.chunk_left: 

1146 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1147 self.chunk_left = self.chunk_left - amt 

1148 returned_chunk = value 

1149 elif amt == self.chunk_left: 

1150 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1151 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1152 self.chunk_left = None 

1153 returned_chunk = value 

1154 else: # amt > self.chunk_left 

1155 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1156 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1157 self.chunk_left = None 

1158 return returned_chunk # type: ignore[no-any-return] 

1159 

1160 def read_chunked( 

1161 self, amt: int | None = None, decode_content: bool | None = None 

1162 ) -> typing.Generator[bytes, None, None]: 

1163 """ 

1164 Similar to :meth:`HTTPResponse.read`, but with an additional 

1165 parameter: ``decode_content``. 

1166 

1167 :param amt: 

1168 How much of the content to read. If specified, caching is skipped 

1169 because it doesn't make sense to cache partial content as the full 

1170 response. 

1171 

1172 :param decode_content: 

1173 If True, will attempt to decode the body based on the 

1174 'content-encoding' header. 

1175 """ 

1176 self._init_decoder() 

1177 # FIXME: Rewrite this method and make it a class with a better structured logic. 

1178 if not self.chunked: 

1179 raise ResponseNotChunked( 

1180 "Response is not chunked. " 

1181 "Header 'transfer-encoding: chunked' is missing." 

1182 ) 

1183 if not self.supports_chunked_reads(): 

1184 raise BodyNotHttplibCompatible( 

1185 "Body should be http.client.HTTPResponse like. " 

1186 "It should have have an fp attribute which returns raw chunks." 

1187 ) 

1188 

1189 with self._error_catcher(): 

1190 # Don't bother reading the body of a HEAD request. 

1191 if self._original_response and is_response_to_head(self._original_response): 

1192 self._original_response.close() 

1193 return None 

1194 

1195 # If a response is already read and closed 

1196 # then return immediately. 

1197 if self._fp.fp is None: # type: ignore[union-attr] 

1198 return None 

1199 

1200 if amt and amt < 0: 

1201 # Negative numbers and `None` should be treated the same, 

1202 # but httplib handles only `None` correctly. 

1203 amt = None 

1204 

1205 while True: 

1206 self._update_chunk_length() 

1207 if self.chunk_left == 0: 

1208 break 

1209 chunk = self._handle_chunk(amt) 

1210 decoded = self._decode( 

1211 chunk, decode_content=decode_content, flush_decoder=False 

1212 ) 

1213 if decoded: 

1214 yield decoded 

1215 

1216 if decode_content: 

1217 # On CPython and PyPy, we should never need to flush the 

1218 # decoder. However, on Jython we *might* need to, so 

1219 # lets defensively do it anyway. 

1220 decoded = self._flush_decoder() 

1221 if decoded: # Platform-specific: Jython. 

1222 yield decoded 

1223 

1224 # Chunk content ends with \r\n: discard it. 

1225 while self._fp is not None: 

1226 line = self._fp.fp.readline() 

1227 if not line: 

1228 # Some sites may not end with '\r\n'. 

1229 break 

1230 if line == b"\r\n": 

1231 break 

1232 

1233 # We read everything; close the "file". 

1234 if self._original_response: 

1235 self._original_response.close() 

1236 

1237 @property 

1238 def url(self) -> str | None: 

1239 """ 

1240 Returns the URL that was the source of this response. 

1241 If the request that generated this response redirected, this method 

1242 will return the final redirect location. 

1243 """ 

1244 return self._request_url 

1245 

1246 @url.setter 

1247 def url(self, url: str) -> None: 

1248 self._request_url = url 

1249 

1250 def __iter__(self) -> typing.Iterator[bytes]: 

1251 buffer: list[bytes] = [] 

1252 for chunk in self.stream(decode_content=True): 

1253 if b"\n" in chunk: 

1254 chunks = chunk.split(b"\n") 

1255 yield b"".join(buffer) + chunks[0] + b"\n" 

1256 for x in chunks[1:-1]: 

1257 yield x + b"\n" 

1258 if chunks[-1]: 

1259 buffer = [chunks[-1]] 

1260 else: 

1261 buffer = [] 

1262 else: 

1263 buffer.append(chunk) 

1264 if buffer: 

1265 yield b"".join(buffer)