Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 23%

557 statements  

« prev     ^ index     » next       coverage.py v7.2.0, created at 2023-02-23 06:30 +0000

1from __future__ import annotations 

2 

3import collections 

4import io 

5import json as _json 

6import logging 

7import re 

8import sys 

9import typing 

10import warnings 

11import zlib 

12from contextlib import contextmanager 

13from http.client import HTTPMessage as _HttplibHTTPMessage 

14from http.client import HTTPResponse as _HttplibHTTPResponse 

15from socket import timeout as SocketTimeout 

16 

17try: 

18 try: 

19 import brotlicffi as brotli # type: ignore[import] 

20 except ImportError: 

21 import brotli # type: ignore[import] 

22except ImportError: 

23 brotli = None 

24 

25try: 

26 import zstandard as zstd # type: ignore[import] 

27 

28 # The package 'zstandard' added the 'eof' property starting 

29 # in v0.18.0 which we require to ensure a complete and 

30 # valid zstd stream was fed into the ZstdDecoder. 

31 # See: https://github.com/urllib3/urllib3/pull/2624 

32 _zstd_version = _zstd_version = tuple( 

33 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr] 

34 ) 

35 if _zstd_version < (0, 18): # Defensive: 

36 zstd = None 

37 

38except (AttributeError, ImportError, ValueError): # Defensive: 

39 zstd = None 

40 

41from . import util 

42from ._base_connection import _TYPE_BODY 

43from ._collections import HTTPHeaderDict 

44from .connection import BaseSSLError, HTTPConnection, HTTPException 

45from .exceptions import ( 

46 BodyNotHttplibCompatible, 

47 DecodeError, 

48 HTTPError, 

49 IncompleteRead, 

50 InvalidChunkLength, 

51 InvalidHeader, 

52 ProtocolError, 

53 ReadTimeoutError, 

54 ResponseNotChunked, 

55 SSLError, 

56) 

57from .util.response import is_fp_closed, is_response_to_head 

58from .util.retry import Retry 

59 

60if typing.TYPE_CHECKING: 

61 from typing_extensions import Literal 

62 

63 from .connectionpool import HTTPConnectionPool 

64 

65log = logging.getLogger(__name__) 

66 

67 

68class ContentDecoder: 

69 def decompress(self, data: bytes) -> bytes: 

70 raise NotImplementedError() 

71 

72 def flush(self) -> bytes: 

73 raise NotImplementedError() 

74 

75 

76class DeflateDecoder(ContentDecoder): 

77 def __init__(self) -> None: 

78 self._first_try = True 

79 self._data = b"" 

80 self._obj = zlib.decompressobj() 

81 

82 def decompress(self, data: bytes) -> bytes: 

83 if not data: 

84 return data 

85 

86 if not self._first_try: 

87 return self._obj.decompress(data) 

88 

89 self._data += data 

90 try: 

91 decompressed = self._obj.decompress(data) 

92 if decompressed: 

93 self._first_try = False 

94 self._data = None # type: ignore[assignment] 

95 return decompressed 

96 except zlib.error: 

97 self._first_try = False 

98 self._obj = zlib.decompressobj(-zlib.MAX_WBITS) 

99 try: 

100 return self.decompress(self._data) 

101 finally: 

102 self._data = None # type: ignore[assignment] 

103 

104 def flush(self) -> bytes: 

105 return self._obj.flush() 

106 

107 

108class GzipDecoderState: 

109 FIRST_MEMBER = 0 

110 OTHER_MEMBERS = 1 

111 SWALLOW_DATA = 2 

112 

113 

114class GzipDecoder(ContentDecoder): 

115 def __init__(self) -> None: 

116 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

117 self._state = GzipDecoderState.FIRST_MEMBER 

118 

119 def decompress(self, data: bytes) -> bytes: 

120 ret = bytearray() 

121 if self._state == GzipDecoderState.SWALLOW_DATA or not data: 

122 return bytes(ret) 

123 while True: 

124 try: 

125 ret += self._obj.decompress(data) 

126 except zlib.error: 

127 previous_state = self._state 

128 # Ignore data after the first error 

129 self._state = GzipDecoderState.SWALLOW_DATA 

130 if previous_state == GzipDecoderState.OTHER_MEMBERS: 

131 # Allow trailing garbage acceptable in other gzip clients 

132 return bytes(ret) 

133 raise 

134 data = self._obj.unused_data 

135 if not data: 

136 return bytes(ret) 

137 self._state = GzipDecoderState.OTHER_MEMBERS 

138 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS) 

139 

140 def flush(self) -> bytes: 

141 return self._obj.flush() 

142 

143 

144if brotli is not None: 

145 

146 class BrotliDecoder(ContentDecoder): 

147 # Supports both 'brotlipy' and 'Brotli' packages 

148 # since they share an import name. The top branches 

149 # are for 'brotlipy' and bottom branches for 'Brotli' 

150 def __init__(self) -> None: 

151 self._obj = brotli.Decompressor() 

152 if hasattr(self._obj, "decompress"): 

153 setattr(self, "decompress", self._obj.decompress) 

154 else: 

155 setattr(self, "decompress", self._obj.process) 

156 

157 def flush(self) -> bytes: 

158 if hasattr(self._obj, "flush"): 

159 return self._obj.flush() # type: ignore[no-any-return] 

160 return b"" 

161 

162 

163if zstd is not None: 

164 

165 class ZstdDecoder(ContentDecoder): 

166 def __init__(self) -> None: 

167 self._obj = zstd.ZstdDecompressor().decompressobj() 

168 

169 def decompress(self, data: bytes) -> bytes: 

170 if not data: 

171 return b"" 

172 return self._obj.decompress(data) # type: ignore[no-any-return] 

173 

174 def flush(self) -> bytes: 

175 ret = self._obj.flush() 

176 if not self._obj.eof: 

177 raise DecodeError("Zstandard data is incomplete") 

178 return ret # type: ignore[no-any-return] 

179 

180 

181class MultiDecoder(ContentDecoder): 

182 """ 

183 From RFC7231: 

184 If one or more encodings have been applied to a representation, the 

185 sender that applied the encodings MUST generate a Content-Encoding 

186 header field that lists the content codings in the order in which 

187 they were applied. 

188 """ 

189 

190 def __init__(self, modes: str) -> None: 

191 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")] 

192 

193 def flush(self) -> bytes: 

194 return self._decoders[0].flush() 

195 

196 def decompress(self, data: bytes) -> bytes: 

197 for d in reversed(self._decoders): 

198 data = d.decompress(data) 

199 return data 

200 

201 

202def _get_decoder(mode: str) -> ContentDecoder: 

203 if "," in mode: 

204 return MultiDecoder(mode) 

205 

206 if mode == "gzip": 

207 return GzipDecoder() 

208 

209 if brotli is not None and mode == "br": 

210 return BrotliDecoder() 

211 

212 if zstd is not None and mode == "zstd": 

213 return ZstdDecoder() 

214 

215 return DeflateDecoder() 

216 

217 

218class BytesQueueBuffer: 

219 """Memory-efficient bytes buffer 

220 

221 To return decoded data in read() and still follow the BufferedIOBase API, we need a 

222 buffer to always return the correct amount of bytes. 

223 

224 This buffer should be filled using calls to put() 

225 

226 Our maximum memory usage is determined by the sum of the size of: 

227 

228 * self.buffer, which contains the full data 

229 * the largest chunk that we will copy in get() 

230 

231 The worst case scenario is a single chunk, in which case we'll make a full copy of 

232 the data inside get(). 

233 """ 

234 

235 def __init__(self) -> None: 

236 self.buffer: typing.Deque[bytes] = collections.deque() 

237 self._size: int = 0 

238 

239 def __len__(self) -> int: 

240 return self._size 

241 

242 def put(self, data: bytes) -> None: 

243 self.buffer.append(data) 

244 self._size += len(data) 

245 

246 def get(self, n: int) -> bytes: 

247 if not self.buffer: 

248 raise RuntimeError("buffer is empty") 

249 elif n < 0: 

250 raise ValueError("n should be > 0") 

251 

252 fetched = 0 

253 ret = io.BytesIO() 

254 while fetched < n: 

255 remaining = n - fetched 

256 chunk = self.buffer.popleft() 

257 chunk_length = len(chunk) 

258 if remaining < chunk_length: 

259 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:] 

260 ret.write(left_chunk) 

261 self.buffer.appendleft(right_chunk) 

262 self._size -= remaining 

263 break 

264 else: 

265 ret.write(chunk) 

266 self._size -= chunk_length 

267 fetched += chunk_length 

268 

269 if not self.buffer: 

270 break 

271 

272 return ret.getvalue() 

273 

274 

275class BaseHTTPResponse(io.IOBase): 

276 CONTENT_DECODERS = ["gzip", "deflate"] 

277 if brotli is not None: 

278 CONTENT_DECODERS += ["br"] 

279 if zstd is not None: 

280 CONTENT_DECODERS += ["zstd"] 

281 REDIRECT_STATUSES = [301, 302, 303, 307, 308] 

282 

283 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error) 

284 if brotli is not None: 

285 DECODER_ERROR_CLASSES += (brotli.error,) 

286 

287 if zstd is not None: 

288 DECODER_ERROR_CLASSES += (zstd.ZstdError,) 

289 

290 def __init__( 

291 self, 

292 *, 

293 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

294 status: int, 

295 version: int, 

296 reason: str | None, 

297 decode_content: bool, 

298 request_url: str | None, 

299 retries: Retry | None = None, 

300 ) -> None: 

301 if isinstance(headers, HTTPHeaderDict): 

302 self.headers = headers 

303 else: 

304 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type] 

305 self.status = status 

306 self.version = version 

307 self.reason = reason 

308 self.decode_content = decode_content 

309 self._has_decoded_content = False 

310 self._request_url: str | None = request_url 

311 self.retries = retries 

312 

313 self.chunked = False 

314 tr_enc = self.headers.get("transfer-encoding", "").lower() 

315 # Don't incur the penalty of creating a list and then discarding it 

316 encodings = (enc.strip() for enc in tr_enc.split(",")) 

317 if "chunked" in encodings: 

318 self.chunked = True 

319 

320 self._decoder: ContentDecoder | None = None 

321 

322 def get_redirect_location(self) -> str | None | Literal[False]: 

323 """ 

324 Should we redirect and where to? 

325 

326 :returns: Truthy redirect location string if we got a redirect status 

327 code and valid location. ``None`` if redirect status and no 

328 location. ``False`` if not a redirect status code. 

329 """ 

330 if self.status in self.REDIRECT_STATUSES: 

331 return self.headers.get("location") 

332 return False 

333 

334 @property 

335 def data(self) -> bytes: 

336 raise NotImplementedError() 

337 

338 def json(self) -> typing.Any: 

339 """ 

340 Parses the body of the HTTP response as JSON. 

341 

342 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to the decoder. 

343 

344 This method can raise either `UnicodeDecodeError` or `json.JSONDecodeError`. 

345 

346 Read more :ref:`here <json>`. 

347 """ 

348 data = self.data.decode("utf-8") 

349 return _json.loads(data) 

350 

351 @property 

352 def url(self) -> str | None: 

353 raise NotImplementedError() 

354 

355 @url.setter 

356 def url(self, url: str | None) -> None: 

357 raise NotImplementedError() 

358 

359 @property 

360 def connection(self) -> HTTPConnection | None: 

361 raise NotImplementedError() 

362 

363 @property 

364 def retries(self) -> Retry | None: 

365 return self._retries 

366 

367 @retries.setter 

368 def retries(self, retries: Retry | None) -> None: 

369 # Override the request_url if retries has a redirect location. 

370 if retries is not None and retries.history: 

371 self.url = retries.history[-1].redirect_location 

372 self._retries = retries 

373 

374 def stream( 

375 self, amt: int | None = 2**16, decode_content: bool | None = None 

376 ) -> typing.Iterator[bytes]: 

377 raise NotImplementedError() 

378 

379 def read( 

380 self, 

381 amt: int | None = None, 

382 decode_content: bool | None = None, 

383 cache_content: bool = False, 

384 ) -> bytes: 

385 raise NotImplementedError() 

386 

387 def read_chunked( 

388 self, 

389 amt: int | None = None, 

390 decode_content: bool | None = None, 

391 ) -> typing.Iterator[bytes]: 

392 raise NotImplementedError() 

393 

394 def release_conn(self) -> None: 

395 raise NotImplementedError() 

396 

397 def drain_conn(self) -> None: 

398 raise NotImplementedError() 

399 

400 def close(self) -> None: 

401 raise NotImplementedError() 

402 

403 def _init_decoder(self) -> None: 

404 """ 

405 Set-up the _decoder attribute if necessary. 

406 """ 

407 # Note: content-encoding value should be case-insensitive, per RFC 7230 

408 # Section 3.2 

409 content_encoding = self.headers.get("content-encoding", "").lower() 

410 if self._decoder is None: 

411 if content_encoding in self.CONTENT_DECODERS: 

412 self._decoder = _get_decoder(content_encoding) 

413 elif "," in content_encoding: 

414 encodings = [ 

415 e.strip() 

416 for e in content_encoding.split(",") 

417 if e.strip() in self.CONTENT_DECODERS 

418 ] 

419 if encodings: 

420 self._decoder = _get_decoder(content_encoding) 

421 

422 def _decode( 

423 self, data: bytes, decode_content: bool | None, flush_decoder: bool 

424 ) -> bytes: 

425 """ 

426 Decode the data passed in and potentially flush the decoder. 

427 """ 

428 if not decode_content: 

429 if self._has_decoded_content: 

430 raise RuntimeError( 

431 "Calling read(decode_content=False) is not supported after " 

432 "read(decode_content=True) was called." 

433 ) 

434 return data 

435 

436 try: 

437 if self._decoder: 

438 data = self._decoder.decompress(data) 

439 self._has_decoded_content = True 

440 except self.DECODER_ERROR_CLASSES as e: 

441 content_encoding = self.headers.get("content-encoding", "").lower() 

442 raise DecodeError( 

443 "Received response with content-encoding: %s, but " 

444 "failed to decode it." % content_encoding, 

445 e, 

446 ) from e 

447 if flush_decoder: 

448 data += self._flush_decoder() 

449 

450 return data 

451 

452 def _flush_decoder(self) -> bytes: 

453 """ 

454 Flushes the decoder. Should only be called if the decoder is actually 

455 being used. 

456 """ 

457 if self._decoder: 

458 return self._decoder.decompress(b"") + self._decoder.flush() 

459 return b"" 

460 

461 # Compatibility methods for `io` module 

462 def readinto(self, b: bytearray) -> int: 

463 temp = self.read(len(b)) 

464 if len(temp) == 0: 

465 return 0 

466 else: 

467 b[: len(temp)] = temp 

468 return len(temp) 

469 

470 # Compatibility methods for http.client.HTTPResponse 

471 def getheaders(self) -> HTTPHeaderDict: 

472 warnings.warn( 

473 "HTTPResponse.getheaders() is deprecated and will be removed " 

474 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.", 

475 category=DeprecationWarning, 

476 stacklevel=2, 

477 ) 

478 return self.headers 

479 

480 def getheader(self, name: str, default: str | None = None) -> str | None: 

481 warnings.warn( 

482 "HTTPResponse.getheader() is deprecated and will be removed " 

483 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).", 

484 category=DeprecationWarning, 

485 stacklevel=2, 

486 ) 

487 return self.headers.get(name, default) 

488 

489 # Compatibility method for http.cookiejar 

490 def info(self) -> HTTPHeaderDict: 

491 return self.headers 

492 

493 def geturl(self) -> str | None: 

494 return self.url 

495 

496 

497class HTTPResponse(BaseHTTPResponse): 

498 """ 

499 HTTP Response container. 

500 

501 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is 

502 loaded and decoded on-demand when the ``data`` property is accessed. This 

503 class is also compatible with the Python standard library's :mod:`io` 

504 module, and can hence be treated as a readable object in the context of that 

505 framework. 

506 

507 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`: 

508 

509 :param preload_content: 

510 If True, the response's body will be preloaded during construction. 

511 

512 :param decode_content: 

513 If True, will attempt to decode the body based on the 

514 'content-encoding' header. 

515 

516 :param original_response: 

517 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse` 

518 object, it's convenient to include the original for debug purposes. It's 

519 otherwise unused. 

520 

521 :param retries: 

522 The retries contains the last :class:`~urllib3.util.retry.Retry` that 

523 was used during the request. 

524 

525 :param enforce_content_length: 

526 Enforce content length checking. Body returned by server must match 

527 value of Content-Length header, if present. Otherwise, raise error. 

528 """ 

529 

530 def __init__( 

531 self, 

532 body: _TYPE_BODY = "", 

533 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None, 

534 status: int = 0, 

535 version: int = 0, 

536 reason: str | None = None, 

537 preload_content: bool = True, 

538 decode_content: bool = True, 

539 original_response: _HttplibHTTPResponse | None = None, 

540 pool: HTTPConnectionPool | None = None, 

541 connection: HTTPConnection | None = None, 

542 msg: _HttplibHTTPMessage | None = None, 

543 retries: Retry | None = None, 

544 enforce_content_length: bool = True, 

545 request_method: str | None = None, 

546 request_url: str | None = None, 

547 auto_close: bool = True, 

548 ) -> None: 

549 super().__init__( 

550 headers=headers, 

551 status=status, 

552 version=version, 

553 reason=reason, 

554 decode_content=decode_content, 

555 request_url=request_url, 

556 retries=retries, 

557 ) 

558 

559 self.enforce_content_length = enforce_content_length 

560 self.auto_close = auto_close 

561 

562 self._body = None 

563 self._fp: _HttplibHTTPResponse | None = None 

564 self._original_response = original_response 

565 self._fp_bytes_read = 0 

566 self.msg = msg 

567 

568 if body and isinstance(body, (str, bytes)): 

569 self._body = body 

570 

571 self._pool = pool 

572 self._connection = connection 

573 

574 if hasattr(body, "read"): 

575 self._fp = body # type: ignore[assignment] 

576 

577 # Are we using the chunked-style of transfer encoding? 

578 self.chunk_left: int | None = None 

579 

580 # Determine length of response 

581 self.length_remaining = self._init_length(request_method) 

582 

583 # Used to return the correct amount of bytes for partial read()s 

584 self._decoded_buffer = BytesQueueBuffer() 

585 

586 # If requested, preload the body. 

587 if preload_content and not self._body: 

588 self._body = self.read(decode_content=decode_content) 

589 

590 def release_conn(self) -> None: 

591 if not self._pool or not self._connection: 

592 return None 

593 

594 self._pool._put_conn(self._connection) 

595 self._connection = None 

596 

597 def drain_conn(self) -> None: 

598 """ 

599 Read and discard any remaining HTTP response data in the response connection. 

600 

601 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool. 

602 """ 

603 try: 

604 self.read() 

605 except (HTTPError, OSError, BaseSSLError, HTTPException): 

606 pass 

607 

608 @property 

609 def data(self) -> bytes: 

610 # For backwards-compat with earlier urllib3 0.4 and earlier. 

611 if self._body: 

612 return self._body # type: ignore[return-value] 

613 

614 if self._fp: 

615 return self.read(cache_content=True) 

616 

617 return None # type: ignore[return-value] 

618 

619 @property 

620 def connection(self) -> HTTPConnection | None: 

621 return self._connection 

622 

623 def isclosed(self) -> bool: 

624 return is_fp_closed(self._fp) 

625 

626 def tell(self) -> int: 

627 """ 

628 Obtain the number of bytes pulled over the wire so far. May differ from 

629 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read`` 

630 if bytes are encoded on the wire (e.g, compressed). 

631 """ 

632 return self._fp_bytes_read 

633 

634 def _init_length(self, request_method: str | None) -> int | None: 

635 """ 

636 Set initial length value for Response content if available. 

637 """ 

638 length: int | None 

639 content_length: str | None = self.headers.get("content-length") 

640 

641 if content_length is not None: 

642 if self.chunked: 

643 # This Response will fail with an IncompleteRead if it can't be 

644 # received as chunked. This method falls back to attempt reading 

645 # the response before raising an exception. 

646 log.warning( 

647 "Received response with both Content-Length and " 

648 "Transfer-Encoding set. This is expressly forbidden " 

649 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and " 

650 "attempting to process response as Transfer-Encoding: " 

651 "chunked." 

652 ) 

653 return None 

654 

655 try: 

656 # RFC 7230 section 3.3.2 specifies multiple content lengths can 

657 # be sent in a single Content-Length header 

658 # (e.g. Content-Length: 42, 42). This line ensures the values 

659 # are all valid ints and that as long as the `set` length is 1, 

660 # all values are the same. Otherwise, the header is invalid. 

661 lengths = {int(val) for val in content_length.split(",")} 

662 if len(lengths) > 1: 

663 raise InvalidHeader( 

664 "Content-Length contained multiple " 

665 "unmatching values (%s)" % content_length 

666 ) 

667 length = lengths.pop() 

668 except ValueError: 

669 length = None 

670 else: 

671 if length < 0: 

672 length = None 

673 

674 else: # if content_length is None 

675 length = None 

676 

677 # Convert status to int for comparison 

678 # In some cases, httplib returns a status of "_UNKNOWN" 

679 try: 

680 status = int(self.status) 

681 except ValueError: 

682 status = 0 

683 

684 # Check for responses that shouldn't include a body 

685 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD": 

686 length = 0 

687 

688 return length 

689 

690 @contextmanager 

691 def _error_catcher(self) -> typing.Generator[None, None, None]: 

692 """ 

693 Catch low-level python exceptions, instead re-raising urllib3 

694 variants, so that low-level exceptions are not leaked in the 

695 high-level api. 

696 

697 On exit, release the connection back to the pool. 

698 """ 

699 clean_exit = False 

700 

701 try: 

702 try: 

703 yield 

704 

705 except SocketTimeout as e: 

706 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but 

707 # there is yet no clean way to get at it from this context. 

708 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

709 

710 except BaseSSLError as e: 

711 # FIXME: Is there a better way to differentiate between SSLErrors? 

712 if "read operation timed out" not in str(e): 

713 # SSL errors related to framing/MAC get wrapped and reraised here 

714 raise SSLError(e) from e 

715 

716 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type] 

717 

718 except (HTTPException, OSError) as e: 

719 # This includes IncompleteRead. 

720 raise ProtocolError(f"Connection broken: {e!r}", e) from e 

721 

722 # If no exception is thrown, we should avoid cleaning up 

723 # unnecessarily. 

724 clean_exit = True 

725 finally: 

726 # If we didn't terminate cleanly, we need to throw away our 

727 # connection. 

728 if not clean_exit: 

729 # The response may not be closed but we're not going to use it 

730 # anymore so close it now to ensure that the connection is 

731 # released back to the pool. 

732 if self._original_response: 

733 self._original_response.close() 

734 

735 # Closing the response may not actually be sufficient to close 

736 # everything, so if we have a hold of the connection close that 

737 # too. 

738 if self._connection: 

739 self._connection.close() 

740 

741 # If we hold the original response but it's closed now, we should 

742 # return the connection back to the pool. 

743 if self._original_response and self._original_response.isclosed(): 

744 self.release_conn() 

745 

746 def _fp_read(self, amt: int | None = None) -> bytes: 

747 """ 

748 Read a response with the thought that reading the number of bytes 

749 larger than can fit in a 32-bit int at a time via SSL in some 

750 known cases leads to an overflow error that has to be prevented 

751 if `amt` or `self.length_remaining` indicate that a problem may 

752 happen. 

753 

754 The known cases: 

755 * 3.8 <= CPython < 3.9.7 because of a bug 

756 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900. 

757 * urllib3 injected with pyOpenSSL-backed SSL-support. 

758 * CPython < 3.10 only when `amt` does not fit 32-bit int. 

759 """ 

760 assert self._fp 

761 c_int_max = 2**31 - 1 

762 if ( 

763 ( 

764 (amt and amt > c_int_max) 

765 or (self.length_remaining and self.length_remaining > c_int_max) 

766 ) 

767 and not util.IS_SECURETRANSPORT 

768 and (util.IS_PYOPENSSL or sys.version_info < (3, 10)) 

769 ): 

770 buffer = io.BytesIO() 

771 # Besides `max_chunk_amt` being a maximum chunk size, it 

772 # affects memory overhead of reading a response by this 

773 # method in CPython. 

774 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum 

775 # chunk size that does not lead to an overflow error, but 

776 # 256 MiB is a compromise. 

777 max_chunk_amt = 2**28 

778 while amt is None or amt != 0: 

779 if amt is not None: 

780 chunk_amt = min(amt, max_chunk_amt) 

781 amt -= chunk_amt 

782 else: 

783 chunk_amt = max_chunk_amt 

784 data = self._fp.read(chunk_amt) 

785 if not data: 

786 break 

787 buffer.write(data) 

788 del data # to reduce peak memory usage by `max_chunk_amt`. 

789 return buffer.getvalue() 

790 else: 

791 # StringIO doesn't like amt=None 

792 return self._fp.read(amt) if amt is not None else self._fp.read() 

793 

794 def _raw_read( 

795 self, 

796 amt: int | None = None, 

797 ) -> bytes: 

798 """ 

799 Reads `amt` of bytes from the socket. 

800 """ 

801 if self._fp is None: 

802 return None # type: ignore[return-value] 

803 

804 fp_closed = getattr(self._fp, "closed", False) 

805 

806 with self._error_catcher(): 

807 data = self._fp_read(amt) if not fp_closed else b"" 

808 if amt is not None and amt != 0 and not data: 

809 # Platform-specific: Buggy versions of Python. 

810 # Close the connection when no data is returned 

811 # 

812 # This is redundant to what httplib/http.client _should_ 

813 # already do. However, versions of python released before 

814 # December 15, 2012 (http://bugs.python.org/issue16298) do 

815 # not properly close the connection in all cases. There is 

816 # no harm in redundantly calling close. 

817 self._fp.close() 

818 if ( 

819 self.enforce_content_length 

820 and self.length_remaining is not None 

821 and self.length_remaining != 0 

822 ): 

823 # This is an edge case that httplib failed to cover due 

824 # to concerns of backward compatibility. We're 

825 # addressing it here to make sure IncompleteRead is 

826 # raised during streaming, so all calls with incorrect 

827 # Content-Length are caught. 

828 raise IncompleteRead(self._fp_bytes_read, self.length_remaining) 

829 

830 if data: 

831 self._fp_bytes_read += len(data) 

832 if self.length_remaining is not None: 

833 self.length_remaining -= len(data) 

834 return data 

835 

836 def read( 

837 self, 

838 amt: int | None = None, 

839 decode_content: bool | None = None, 

840 cache_content: bool = False, 

841 ) -> bytes: 

842 """ 

843 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional 

844 parameters: ``decode_content`` and ``cache_content``. 

845 

846 :param amt: 

847 How much of the content to read. If specified, caching is skipped 

848 because it doesn't make sense to cache partial content as the full 

849 response. 

850 

851 :param decode_content: 

852 If True, will attempt to decode the body based on the 

853 'content-encoding' header. 

854 

855 :param cache_content: 

856 If True, will save the returned data such that the same result is 

857 returned despite of the state of the underlying file object. This 

858 is useful if you want the ``.data`` property to continue working 

859 after having ``.read()`` the file object. (Overridden if ``amt`` is 

860 set.) 

861 """ 

862 self._init_decoder() 

863 if decode_content is None: 

864 decode_content = self.decode_content 

865 

866 if amt is not None: 

867 cache_content = False 

868 

869 if len(self._decoded_buffer) >= amt: 

870 return self._decoded_buffer.get(amt) 

871 

872 data = self._raw_read(amt) 

873 

874 flush_decoder = False 

875 if amt is None: 

876 flush_decoder = True 

877 elif amt != 0 and not data: 

878 flush_decoder = True 

879 

880 if not data and len(self._decoded_buffer) == 0: 

881 return data 

882 

883 if amt is None: 

884 data = self._decode(data, decode_content, flush_decoder) 

885 if cache_content: 

886 self._body = data 

887 else: 

888 # do not waste memory on buffer when not decoding 

889 if not decode_content: 

890 if self._has_decoded_content: 

891 raise RuntimeError( 

892 "Calling read(decode_content=False) is not supported after " 

893 "read(decode_content=True) was called." 

894 ) 

895 return data 

896 

897 decoded_data = self._decode(data, decode_content, flush_decoder) 

898 self._decoded_buffer.put(decoded_data) 

899 

900 while len(self._decoded_buffer) < amt and data: 

901 # TODO make sure to initially read enough data to get past the headers 

902 # For example, the GZ file header takes 10 bytes, we don't want to read 

903 # it one byte at a time 

904 data = self._raw_read(amt) 

905 decoded_data = self._decode(data, decode_content, flush_decoder) 

906 self._decoded_buffer.put(decoded_data) 

907 data = self._decoded_buffer.get(amt) 

908 

909 return data 

910 

911 def stream( 

912 self, amt: int | None = 2**16, decode_content: bool | None = None 

913 ) -> typing.Generator[bytes, None, None]: 

914 """ 

915 A generator wrapper for the read() method. A call will block until 

916 ``amt`` bytes have been read from the connection or until the 

917 connection is closed. 

918 

919 :param amt: 

920 How much of the content to read. The generator will return up to 

921 much data per iteration, but may return less. This is particularly 

922 likely when using compressed data. However, the empty string will 

923 never be returned. 

924 

925 :param decode_content: 

926 If True, will attempt to decode the body based on the 

927 'content-encoding' header. 

928 """ 

929 if self.chunked and self.supports_chunked_reads(): 

930 yield from self.read_chunked(amt, decode_content=decode_content) 

931 else: 

932 while not is_fp_closed(self._fp): 

933 data = self.read(amt=amt, decode_content=decode_content) 

934 

935 if data: 

936 yield data 

937 

938 # Overrides from io.IOBase 

939 def readable(self) -> bool: 

940 return True 

941 

942 def close(self) -> None: 

943 if not self.closed and self._fp: 

944 self._fp.close() 

945 

946 if self._connection: 

947 self._connection.close() 

948 

949 if not self.auto_close: 

950 io.IOBase.close(self) 

951 

952 @property 

953 def closed(self) -> bool: 

954 if not self.auto_close: 

955 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return] 

956 elif self._fp is None: 

957 return True 

958 elif hasattr(self._fp, "isclosed"): 

959 return self._fp.isclosed() 

960 elif hasattr(self._fp, "closed"): 

961 return self._fp.closed 

962 else: 

963 return True 

964 

965 def fileno(self) -> int: 

966 if self._fp is None: 

967 raise OSError("HTTPResponse has no file to get a fileno from") 

968 elif hasattr(self._fp, "fileno"): 

969 return self._fp.fileno() 

970 else: 

971 raise OSError( 

972 "The file-like object this HTTPResponse is wrapped " 

973 "around has no file descriptor" 

974 ) 

975 

976 def flush(self) -> None: 

977 if ( 

978 self._fp is not None 

979 and hasattr(self._fp, "flush") 

980 and not getattr(self._fp, "closed", False) 

981 ): 

982 return self._fp.flush() 

983 

984 def supports_chunked_reads(self) -> bool: 

985 """ 

986 Checks if the underlying file-like object looks like a 

987 :class:`http.client.HTTPResponse` object. We do this by testing for 

988 the fp attribute. If it is present we assume it returns raw chunks as 

989 processed by read_chunked(). 

990 """ 

991 return hasattr(self._fp, "fp") 

992 

993 def _update_chunk_length(self) -> None: 

994 # First, we'll figure out length of a chunk and then 

995 # we'll try to read it from socket. 

996 if self.chunk_left is not None: 

997 return None 

998 line = self._fp.fp.readline() # type: ignore[union-attr] 

999 line = line.split(b";", 1)[0] 

1000 try: 

1001 self.chunk_left = int(line, 16) 

1002 except ValueError: 

1003 # Invalid chunked protocol response, abort. 

1004 self.close() 

1005 raise InvalidChunkLength(self, line) from None 

1006 

1007 def _handle_chunk(self, amt: int | None) -> bytes: 

1008 returned_chunk = None 

1009 if amt is None: 

1010 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1011 returned_chunk = chunk 

1012 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1013 self.chunk_left = None 

1014 elif self.chunk_left is not None and amt < self.chunk_left: 

1015 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1016 self.chunk_left = self.chunk_left - amt 

1017 returned_chunk = value 

1018 elif amt == self.chunk_left: 

1019 value = self._fp._safe_read(amt) # type: ignore[union-attr] 

1020 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1021 self.chunk_left = None 

1022 returned_chunk = value 

1023 else: # amt > self.chunk_left 

1024 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr] 

1025 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk. 

1026 self.chunk_left = None 

1027 return returned_chunk # type: ignore[no-any-return] 

1028 

1029 def read_chunked( 

1030 self, amt: int | None = None, decode_content: bool | None = None 

1031 ) -> typing.Generator[bytes, None, None]: 

1032 """ 

1033 Similar to :meth:`HTTPResponse.read`, but with an additional 

1034 parameter: ``decode_content``. 

1035 

1036 :param amt: 

1037 How much of the content to read. If specified, caching is skipped 

1038 because it doesn't make sense to cache partial content as the full 

1039 response. 

1040 

1041 :param decode_content: 

1042 If True, will attempt to decode the body based on the 

1043 'content-encoding' header. 

1044 """ 

1045 self._init_decoder() 

1046 # FIXME: Rewrite this method and make it a class with a better structured logic. 

1047 if not self.chunked: 

1048 raise ResponseNotChunked( 

1049 "Response is not chunked. " 

1050 "Header 'transfer-encoding: chunked' is missing." 

1051 ) 

1052 if not self.supports_chunked_reads(): 

1053 raise BodyNotHttplibCompatible( 

1054 "Body should be http.client.HTTPResponse like. " 

1055 "It should have have an fp attribute which returns raw chunks." 

1056 ) 

1057 

1058 with self._error_catcher(): 

1059 # Don't bother reading the body of a HEAD request. 

1060 if self._original_response and is_response_to_head(self._original_response): 

1061 self._original_response.close() 

1062 return None 

1063 

1064 # If a response is already read and closed 

1065 # then return immediately. 

1066 if self._fp.fp is None: # type: ignore[union-attr] 

1067 return None 

1068 

1069 while True: 

1070 self._update_chunk_length() 

1071 if self.chunk_left == 0: 

1072 break 

1073 chunk = self._handle_chunk(amt) 

1074 decoded = self._decode( 

1075 chunk, decode_content=decode_content, flush_decoder=False 

1076 ) 

1077 if decoded: 

1078 yield decoded 

1079 

1080 if decode_content: 

1081 # On CPython and PyPy, we should never need to flush the 

1082 # decoder. However, on Jython we *might* need to, so 

1083 # lets defensively do it anyway. 

1084 decoded = self._flush_decoder() 

1085 if decoded: # Platform-specific: Jython. 

1086 yield decoded 

1087 

1088 # Chunk content ends with \r\n: discard it. 

1089 while self._fp is not None: 

1090 line = self._fp.fp.readline() 

1091 if not line: 

1092 # Some sites may not end with '\r\n'. 

1093 break 

1094 if line == b"\r\n": 

1095 break 

1096 

1097 # We read everything; close the "file". 

1098 if self._original_response: 

1099 self._original_response.close() 

1100 

1101 @property 

1102 def url(self) -> str | None: 

1103 """ 

1104 Returns the URL that was the source of this response. 

1105 If the request that generated this response redirected, this method 

1106 will return the final redirect location. 

1107 """ 

1108 return self._request_url 

1109 

1110 @url.setter 

1111 def url(self, url: str) -> None: 

1112 self._request_url = url 

1113 

1114 def __iter__(self) -> typing.Iterator[bytes]: 

1115 buffer: list[bytes] = [] 

1116 for chunk in self.stream(decode_content=True): 

1117 if b"\n" in chunk: 

1118 chunks = chunk.split(b"\n") 

1119 yield b"".join(buffer) + chunks[0] + b"\n" 

1120 for x in chunks[1:-1]: 

1121 yield x + b"\n" 

1122 if chunks[-1]: 

1123 buffer = [chunks[-1]] 

1124 else: 

1125 buffer = [] 

1126 else: 

1127 buffer.append(chunk) 

1128 if buffer: 

1129 yield b"".join(buffer)