Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 22%

1from __future__ import annotations

3import collections

4import io

5import json as _json

6import logging

7import re

8import sys

9import typing

10import warnings

11import zlib

12from contextlib import contextmanager

13from http.client import HTTPMessage as _HttplibHTTPMessage

14from http.client import HTTPResponse as _HttplibHTTPResponse

15from socket import timeout as SocketTimeout

17if typing.TYPE_CHECKING:

18 from ._base_connection import BaseHTTPConnection

20try:

21 try:

22 import brotlicffi as brotli # type: ignore[import-not-found]

23 except ImportError:

24 import brotli # type: ignore[import-not-found]

25except ImportError:

26 brotli = None

28try:

29 import zstandard as zstd

30except (AttributeError, ImportError, ValueError): # Defensive:

31 HAS_ZSTD = False

32else:

33 # The package 'zstandard' added the 'eof' property starting

34 # in v0.18.0 which we require to ensure a complete and

35 # valid zstd stream was fed into the ZstdDecoder.

36 # See: https://github.com/urllib3/urllib3/pull/2624

37 _zstd_version = tuple(

38 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]

39 )

40 if _zstd_version < (0, 18): # Defensive:

41 HAS_ZSTD = False

42 else:

43 HAS_ZSTD = True

45from . import util

46from ._base_connection import _TYPE_BODY

47from ._collections import HTTPHeaderDict

48from .connection import BaseSSLError, HTTPConnection, HTTPException

49from .exceptions import (

50 BodyNotHttplibCompatible,

51 DecodeError,

52 HTTPError,

53 IncompleteRead,

54 InvalidChunkLength,

55 InvalidHeader,

56 ProtocolError,

57 ReadTimeoutError,

58 ResponseNotChunked,

59 SSLError,

60)

61from .util.response import is_fp_closed, is_response_to_head

62from .util.retry import Retry

64if typing.TYPE_CHECKING:

65 from .connectionpool import HTTPConnectionPool

67log = logging.getLogger(__name__)

70class ContentDecoder:

71 def decompress(self, data: bytes) -> bytes:

72 raise NotImplementedError()

74 def flush(self) -> bytes:

75 raise NotImplementedError()

78class DeflateDecoder(ContentDecoder):

79 def __init__(self) -> None:

80 self._first_try = True

81 self._data = b""

82 self._obj = zlib.decompressobj()

84 def decompress(self, data: bytes) -> bytes:

85 if not data:

86 return data

88 if not self._first_try:

89 return self._obj.decompress(data)

91 self._data += data

92 try:

93 decompressed = self._obj.decompress(data)

94 if decompressed:

95 self._first_try = False

96 self._data = None # type: ignore[assignment]

97 return decompressed

98 except zlib.error:

99 self._first_try = False

100 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)

101 try:

102 return self.decompress(self._data)

103 finally:

104 self._data = None # type: ignore[assignment]

105

106 def flush(self) -> bytes:

107 return self._obj.flush()

108

109

110class GzipDecoderState:

111 FIRST_MEMBER = 0

112 OTHER_MEMBERS = 1

113 SWALLOW_DATA = 2

114

115

116class GzipDecoder(ContentDecoder):

117 def __init__(self) -> None:

118 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

119 self._state = GzipDecoderState.FIRST_MEMBER

120

121 def decompress(self, data: bytes) -> bytes:

122 ret = bytearray()

123 if self._state == GzipDecoderState.SWALLOW_DATA or not data:

124 return bytes(ret)

125 while True:

126 try:

127 ret += self._obj.decompress(data)

128 except zlib.error:

129 previous_state = self._state

130 # Ignore data after the first error

131 self._state = GzipDecoderState.SWALLOW_DATA

132 if previous_state == GzipDecoderState.OTHER_MEMBERS:

133 # Allow trailing garbage acceptable in other gzip clients

134 return bytes(ret)

135 raise

136 data = self._obj.unused_data

137 if not data:

138 return bytes(ret)

139 self._state = GzipDecoderState.OTHER_MEMBERS

140 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

141

142 def flush(self) -> bytes:

143 return self._obj.flush()

144

145

146if brotli is not None:

147

148 class BrotliDecoder(ContentDecoder):

149 # Supports both 'brotlipy' and 'Brotli' packages

150 # since they share an import name. The top branches

151 # are for 'brotlipy' and bottom branches for 'Brotli'

152 def __init__(self) -> None:

153 self._obj = brotli.Decompressor()

154 if hasattr(self._obj, "decompress"):

155 setattr(self, "decompress", self._obj.decompress)

156 else:

157 setattr(self, "decompress", self._obj.process)

158

159 def flush(self) -> bytes:

160 if hasattr(self._obj, "flush"):

161 return self._obj.flush() # type: ignore[no-any-return]

162 return b""

163

164

165if HAS_ZSTD:

166

167 class ZstdDecoder(ContentDecoder):

168 def __init__(self) -> None:

169 self._obj = zstd.ZstdDecompressor().decompressobj()

170

171 def decompress(self, data: bytes) -> bytes:

172 if not data:

173 return b""

174 data_parts = [self._obj.decompress(data)]

175 while self._obj.eof and self._obj.unused_data:

176 unused_data = self._obj.unused_data

177 self._obj = zstd.ZstdDecompressor().decompressobj()

178 data_parts.append(self._obj.decompress(unused_data))

179 return b"".join(data_parts)

180

181 def flush(self) -> bytes:

182 ret = self._obj.flush() # note: this is a no-op

183 if not self._obj.eof:

184 raise DecodeError("Zstandard data is incomplete")

185 return ret

186

187

188class MultiDecoder(ContentDecoder):

189 """

190 From RFC7231:

191 If one or more encodings have been applied to a representation, the

192 sender that applied the encodings MUST generate a Content-Encoding

193 header field that lists the content codings in the order in which

194 they were applied.

195 """

196

197 def __init__(self, modes: str) -> None:

198 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]

199

200 def flush(self) -> bytes:

201 return self._decoders[0].flush()

202

203 def decompress(self, data: bytes) -> bytes:

204 for d in reversed(self._decoders):

205 data = d.decompress(data)

206 return data

207

208

209def _get_decoder(mode: str) -> ContentDecoder:

210 if "," in mode:

211 return MultiDecoder(mode)

212

213 # According to RFC 9110 section 8.4.1.3, recipients should

214 # consider x-gzip equivalent to gzip

215 if mode in ("gzip", "x-gzip"):

216 return GzipDecoder()

217

218 if brotli is not None and mode == "br":

219 return BrotliDecoder()

220

221 if HAS_ZSTD and mode == "zstd":

222 return ZstdDecoder()

223

224 return DeflateDecoder()

225

226

227class BytesQueueBuffer:

228 """Memory-efficient bytes buffer

229

230 To return decoded data in read() and still follow the BufferedIOBase API, we need a

231 buffer to always return the correct amount of bytes.

232

233 This buffer should be filled using calls to put()

234

235 Our maximum memory usage is determined by the sum of the size of:

236

237 * self.buffer, which contains the full data

238 * the largest chunk that we will copy in get()

239

240 The worst case scenario is a single chunk, in which case we'll make a full copy of

241 the data inside get().

242 """

243

244 def __init__(self) -> None:

245 self.buffer: typing.Deque[bytes] = collections.deque()

246 self._size: int = 0

247

248 def __len__(self) -> int:

249 return self._size

250

251 def put(self, data: bytes) -> None:

252 self.buffer.append(data)

253 self._size += len(data)

254

255 def get(self, n: int) -> bytes:

256 if n == 0:

257 return b""

258 elif not self.buffer:

259 raise RuntimeError("buffer is empty")

260 elif n < 0:

261 raise ValueError("n should be > 0")

262

263 fetched = 0

264 ret = io.BytesIO()

265 while fetched < n:

266 remaining = n - fetched

267 chunk = self.buffer.popleft()

268 chunk_length = len(chunk)

269 if remaining < chunk_length:

270 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]

271 ret.write(left_chunk)

272 self.buffer.appendleft(right_chunk)

273 self._size -= remaining

274 break

275 else:

276 ret.write(chunk)

277 self._size -= chunk_length

278 fetched += chunk_length

279

280 if not self.buffer:

281 break

282

283 return ret.getvalue()

284

285 def get_all(self) -> bytes:

286 buffer = self.buffer

287 if not buffer:

288 assert self._size == 0

289 return b""

290 if len(buffer) == 1:

291 result = buffer.pop()

292 else:

293 ret = io.BytesIO()

294 ret.writelines(buffer.popleft() for _ in range(len(buffer)))

295 result = ret.getvalue()

296 self._size = 0

297 return result

298

299

300class BaseHTTPResponse(io.IOBase):

301 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]

302 if brotli is not None:

303 CONTENT_DECODERS += ["br"]

304 if HAS_ZSTD:

305 CONTENT_DECODERS += ["zstd"]

306 REDIRECT_STATUSES = [301, 302, 303, 307, 308]

307

308 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)

309 if brotli is not None:

310 DECODER_ERROR_CLASSES += (brotli.error,)

311

312 if HAS_ZSTD:

313 DECODER_ERROR_CLASSES += (zstd.ZstdError,)

314

315 def __init__(

316 self,

317 *,

318 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

319 status: int,

320 version: int,

321 version_string: str,

322 reason: str | None,

323 decode_content: bool,

324 request_url: str | None,

325 retries: Retry | None = None,

326 ) -> None:

327 if isinstance(headers, HTTPHeaderDict):

328 self.headers = headers

329 else:

330 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]

331 self.status = status

332 self.version = version

333 self.version_string = version_string

334 self.reason = reason

335 self.decode_content = decode_content

336 self._has_decoded_content = False

337 self._request_url: str | None = request_url

338 self.retries = retries

339

340 self.chunked = False

341 tr_enc = self.headers.get("transfer-encoding", "").lower()

342 # Don't incur the penalty of creating a list and then discarding it

343 encodings = (enc.strip() for enc in tr_enc.split(","))

344 if "chunked" in encodings:

345 self.chunked = True

346

347 self._decoder: ContentDecoder | None = None

348 self.length_remaining: int | None

349

350 def get_redirect_location(self) -> str | None | typing.Literal[False]:

351 """

352 Should we redirect and where to?

353

354 :returns: Truthy redirect location string if we got a redirect status

355 code and valid location. ``None`` if redirect status and no

356 location. ``False`` if not a redirect status code.

357 """

358 if self.status in self.REDIRECT_STATUSES:

359 return self.headers.get("location")

360 return False

361

362 @property

363 def data(self) -> bytes:

364 raise NotImplementedError()

365

366 def json(self) -> typing.Any:

367 """

368 Deserializes the body of the HTTP response as a Python object.

369

370 The body of the HTTP response must be encoded using UTF-8, as per

371 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.

372

373 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to

374 your custom decoder instead.

375

376 If the body of the HTTP response is not decodable to UTF-8, a

377 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a

378 valid JSON document, a `json.JSONDecodeError` will be raised.

379

380 Read more :ref:`here <json_content>`.

381

382 :returns: The body of the HTTP response as a Python object.

383 """

384 data = self.data.decode("utf-8")

385 return _json.loads(data)

386

387 @property

388 def url(self) -> str | None:

389 raise NotImplementedError()

390

391 @url.setter

392 def url(self, url: str | None) -> None:

393 raise NotImplementedError()

394

395 @property

396 def connection(self) -> BaseHTTPConnection | None:

397 raise NotImplementedError()

398

399 @property

400 def retries(self) -> Retry | None:

401 return self._retries

402

403 @retries.setter

404 def retries(self, retries: Retry | None) -> None:

405 # Override the request_url if retries has a redirect location.

406 if retries is not None and retries.history:

407 self.url = retries.history[-1].redirect_location

408 self._retries = retries

409

410 def stream(

411 self, amt: int | None = 2**16, decode_content: bool | None = None

412 ) -> typing.Iterator[bytes]:

413 raise NotImplementedError()

414

415 def read(

416 self,

417 amt: int | None = None,

418 decode_content: bool | None = None,

419 cache_content: bool = False,

420 ) -> bytes:

421 raise NotImplementedError()

422

423 def read1(

424 self,

425 amt: int | None = None,

426 decode_content: bool | None = None,

427 ) -> bytes:

428 raise NotImplementedError()

429

430 def read_chunked(

431 self,

432 amt: int | None = None,

433 decode_content: bool | None = None,

434 ) -> typing.Iterator[bytes]:

435 raise NotImplementedError()

436

437 def release_conn(self) -> None:

438 raise NotImplementedError()

439

440 def drain_conn(self) -> None:

441 raise NotImplementedError()

442

443 def close(self) -> None:

444 raise NotImplementedError()

445

446 def _init_decoder(self) -> None:

447 """

448 Set-up the _decoder attribute if necessary.

449 """

450 # Note: content-encoding value should be case-insensitive, per RFC 7230

451 # Section 3.2

452 content_encoding = self.headers.get("content-encoding", "").lower()

453 if self._decoder is None:

454 if content_encoding in self.CONTENT_DECODERS:

455 self._decoder = _get_decoder(content_encoding)

456 elif "," in content_encoding:

457 encodings = [

458 e.strip()

459 for e in content_encoding.split(",")

460 if e.strip() in self.CONTENT_DECODERS

461 ]

462 if encodings:

463 self._decoder = _get_decoder(content_encoding)

464

465 def _decode(

466 self, data: bytes, decode_content: bool | None, flush_decoder: bool

467 ) -> bytes:

468 """

469 Decode the data passed in and potentially flush the decoder.

470 """

471 if not decode_content:

472 if self._has_decoded_content:

473 raise RuntimeError(

474 "Calling read(decode_content=False) is not supported after "

475 "read(decode_content=True) was called."

476 )

477 return data

478

479 try:

480 if self._decoder:

481 data = self._decoder.decompress(data)

482 self._has_decoded_content = True

483 except self.DECODER_ERROR_CLASSES as e:

484 content_encoding = self.headers.get("content-encoding", "").lower()

485 raise DecodeError(

486 "Received response with content-encoding: %s, but "

487 "failed to decode it." % content_encoding,

488 e,

489 ) from e

490 if flush_decoder:

491 data += self._flush_decoder()

492

493 return data

494

495 def _flush_decoder(self) -> bytes:

496 """

497 Flushes the decoder. Should only be called if the decoder is actually

498 being used.

499 """

500 if self._decoder:

501 return self._decoder.decompress(b"") + self._decoder.flush()

502 return b""

503

504 # Compatibility methods for `io` module

505 def readinto(self, b: bytearray) -> int:

506 temp = self.read(len(b))

507 if len(temp) == 0:

508 return 0

509 else:

510 b[: len(temp)] = temp

511 return len(temp)

512

513 # Compatibility methods for http.client.HTTPResponse

514 def getheaders(self) -> HTTPHeaderDict:

515 warnings.warn(

516 "HTTPResponse.getheaders() is deprecated and will be removed "

517 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",

518 category=DeprecationWarning,

519 stacklevel=2,

520 )

521 return self.headers

522

523 def getheader(self, name: str, default: str | None = None) -> str | None:

524 warnings.warn(

525 "HTTPResponse.getheader() is deprecated and will be removed "

526 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",

527 category=DeprecationWarning,

528 stacklevel=2,

529 )

530 return self.headers.get(name, default)

531

532 # Compatibility method for http.cookiejar

533 def info(self) -> HTTPHeaderDict:

534 return self.headers

535

536 def geturl(self) -> str | None:

537 return self.url

538

539

540class HTTPResponse(BaseHTTPResponse):

541 """

542 HTTP Response container.

543

544 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is

545 loaded and decoded on-demand when the ``data`` property is accessed. This

546 class is also compatible with the Python standard library's :mod:`io`

547 module, and can hence be treated as a readable object in the context of that

548 framework.

549

550 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:

551

552 :param preload_content:

553 If True, the response's body will be preloaded during construction.

554

555 :param decode_content:

556 If True, will attempt to decode the body based on the

557 'content-encoding' header.

558

559 :param original_response:

560 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`

561 object, it's convenient to include the original for debug purposes. It's

562 otherwise unused.

563

564 :param retries:

565 The retries contains the last :class:`~urllib3.util.retry.Retry` that

566 was used during the request.

567

568 :param enforce_content_length:

569 Enforce content length checking. Body returned by server must match

570 value of Content-Length header, if present. Otherwise, raise error.

571 """

572

573 def __init__(

574 self,

575 body: _TYPE_BODY = "",

576 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

577 status: int = 0,

578 version: int = 0,

579 version_string: str = "HTTP/?",

580 reason: str | None = None,

581 preload_content: bool = True,

582 decode_content: bool = True,

583 original_response: _HttplibHTTPResponse | None = None,

584 pool: HTTPConnectionPool | None = None,

585 connection: HTTPConnection | None = None,

586 msg: _HttplibHTTPMessage | None = None,

587 retries: Retry | None = None,

588 enforce_content_length: bool = True,

589 request_method: str | None = None,

590 request_url: str | None = None,

591 auto_close: bool = True,

592 ) -> None:

593 super().__init__(

594 headers=headers,

595 status=status,

596 version=version,

597 version_string=version_string,

598 reason=reason,

599 decode_content=decode_content,

600 request_url=request_url,

601 retries=retries,

602 )

603

604 self.enforce_content_length = enforce_content_length

605 self.auto_close = auto_close

606

607 self._body = None

608 self._fp: _HttplibHTTPResponse | None = None

609 self._original_response = original_response

610 self._fp_bytes_read = 0

611 self.msg = msg

612

613 if body and isinstance(body, (str, bytes)):

614 self._body = body

615

616 self._pool = pool

617 self._connection = connection

618

619 if hasattr(body, "read"):

620 self._fp = body # type: ignore[assignment]

621

622 # Are we using the chunked-style of transfer encoding?

623 self.chunk_left: int | None = None

624

625 # Determine length of response

626 self.length_remaining = self._init_length(request_method)

627

628 # Used to return the correct amount of bytes for partial read()s

629 self._decoded_buffer = BytesQueueBuffer()

630

631 # If requested, preload the body.

632 if preload_content and not self._body:

633 self._body = self.read(decode_content=decode_content)

634

635 def release_conn(self) -> None:

636 if not self._pool or not self._connection:

637 return None

638

639 self._pool._put_conn(self._connection)

640 self._connection = None

641

642 def drain_conn(self) -> None:

643 """

644 Read and discard any remaining HTTP response data in the response connection.

645

646 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.

647 """

648 try:

649 self.read()

650 except (HTTPError, OSError, BaseSSLError, HTTPException):

651 pass

652

653 @property

654 def data(self) -> bytes:

655 # For backwards-compat with earlier urllib3 0.4 and earlier.

656 if self._body:

657 return self._body # type: ignore[return-value]

658

659 if self._fp:

660 return self.read(cache_content=True)

661

662 return None # type: ignore[return-value]

663

664 @property

665 def connection(self) -> HTTPConnection | None:

666 return self._connection

667

668 def isclosed(self) -> bool:

669 return is_fp_closed(self._fp)

670

671 def tell(self) -> int:

672 """

673 Obtain the number of bytes pulled over the wire so far. May differ from

674 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``

675 if bytes are encoded on the wire (e.g, compressed).

676 """

677 return self._fp_bytes_read

678

679 def _init_length(self, request_method: str | None) -> int | None:

680 """

681 Set initial length value for Response content if available.

682 """

683 length: int | None

684 content_length: str | None = self.headers.get("content-length")

685

686 if content_length is not None:

687 if self.chunked:

688 # This Response will fail with an IncompleteRead if it can't be

689 # received as chunked. This method falls back to attempt reading

690 # the response before raising an exception.

691 log.warning(

692 "Received response with both Content-Length and "

693 "Transfer-Encoding set. This is expressly forbidden "

694 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "

695 "attempting to process response as Transfer-Encoding: "

696 "chunked."

697 )

698 return None

699

700 try:

701 # RFC 7230 section 3.3.2 specifies multiple content lengths can

702 # be sent in a single Content-Length header

703 # (e.g. Content-Length: 42, 42). This line ensures the values

704 # are all valid ints and that as long as the `set` length is 1,

705 # all values are the same. Otherwise, the header is invalid.

706 lengths = {int(val) for val in content_length.split(",")}

707 if len(lengths) > 1:

708 raise InvalidHeader(

709 "Content-Length contained multiple "

710 "unmatching values (%s)" % content_length

711 )

712 length = lengths.pop()

713 except ValueError:

714 length = None

715 else:

716 if length < 0:

717 length = None

718

719 else: # if content_length is None

720 length = None

721

722 # Convert status to int for comparison

723 # In some cases, httplib returns a status of "_UNKNOWN"

724 try:

725 status = int(self.status)

726 except ValueError:

727 status = 0

728

729 # Check for responses that shouldn't include a body

730 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":

731 length = 0

732

733 return length

734

735 @contextmanager

736 def _error_catcher(self) -> typing.Generator[None, None, None]:

737 """

738 Catch low-level python exceptions, instead re-raising urllib3

739 variants, so that low-level exceptions are not leaked in the

740 high-level api.

741

742 On exit, release the connection back to the pool.

743 """

744 clean_exit = False

745

746 try:

747 try:

748 yield

749

750 except SocketTimeout as e:

751 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but

752 # there is yet no clean way to get at it from this context.

753 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

754

755 except BaseSSLError as e:

756 # FIXME: Is there a better way to differentiate between SSLErrors?

757 if "read operation timed out" not in str(e):

758 # SSL errors related to framing/MAC get wrapped and reraised here

759 raise SSLError(e) from e

760

761 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

762

763 except IncompleteRead as e:

764 if (

765 e.expected is not None

766 and e.partial is not None

767 and e.expected == -e.partial

768 ):

769 arg = "Response may not contain content."

770 else:

771 arg = f"Connection broken: {e!r}"

772 raise ProtocolError(arg, e) from e

773

774 except (HTTPException, OSError) as e:

775 raise ProtocolError(f"Connection broken: {e!r}", e) from e

776

777 # If no exception is thrown, we should avoid cleaning up

778 # unnecessarily.

779 clean_exit = True

780 finally:

781 # If we didn't terminate cleanly, we need to throw away our

782 # connection.

783 if not clean_exit:

784 # The response may not be closed but we're not going to use it

785 # anymore so close it now to ensure that the connection is

786 # released back to the pool.

787 if self._original_response:

788 self._original_response.close()

789

790 # Closing the response may not actually be sufficient to close

791 # everything, so if we have a hold of the connection close that

792 # too.

793 if self._connection:

794 self._connection.close()

795

796 # If we hold the original response but it's closed now, we should

797 # return the connection back to the pool.

798 if self._original_response and self._original_response.isclosed():

799 self.release_conn()

800

801 def _fp_read(

802 self,

803 amt: int | None = None,

804 *,

805 read1: bool = False,

806 ) -> bytes:

807 """

808 Read a response with the thought that reading the number of bytes

809 larger than can fit in a 32-bit int at a time via SSL in some

810 known cases leads to an overflow error that has to be prevented

811 if `amt` or `self.length_remaining` indicate that a problem may

812 happen.

813

814 The known cases:

815 * 3.8 <= CPython < 3.9.7 because of a bug

816 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.

817 * urllib3 injected with pyOpenSSL-backed SSL-support.

818 * CPython < 3.10 only when `amt` does not fit 32-bit int.

819 """

820 assert self._fp

821 c_int_max = 2**31 - 1

822 if (

823 (amt and amt > c_int_max)

824 or (

825 amt is None

826 and self.length_remaining

827 and self.length_remaining > c_int_max

828 )

829 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):

830 if read1:

831 return self._fp.read1(c_int_max)

832 buffer = io.BytesIO()

833 # Besides `max_chunk_amt` being a maximum chunk size, it

834 # affects memory overhead of reading a response by this

835 # method in CPython.

836 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum

837 # chunk size that does not lead to an overflow error, but

838 # 256 MiB is a compromise.

839 max_chunk_amt = 2**28

840 while amt is None or amt != 0:

841 if amt is not None:

842 chunk_amt = min(amt, max_chunk_amt)

843 amt -= chunk_amt

844 else:

845 chunk_amt = max_chunk_amt

846 data = self._fp.read(chunk_amt)

847 if not data:

848 break

849 buffer.write(data)

850 del data # to reduce peak memory usage by `max_chunk_amt`.

851 return buffer.getvalue()

852 elif read1:

853 return self._fp.read1(amt) if amt is not None else self._fp.read1()

854 else:

855 # StringIO doesn't like amt=None

856 return self._fp.read(amt) if amt is not None else self._fp.read()

857

858 def _raw_read(

859 self,

860 amt: int | None = None,

861 *,

862 read1: bool = False,

863 ) -> bytes:

864 """

865 Reads `amt` of bytes from the socket.

866 """

867 if self._fp is None:

868 return None # type: ignore[return-value]

869

870 fp_closed = getattr(self._fp, "closed", False)

871

872 with self._error_catcher():

873 data = self._fp_read(amt, read1=read1) if not fp_closed else b""

874 if amt is not None and amt != 0 and not data:

875 # Platform-specific: Buggy versions of Python.

876 # Close the connection when no data is returned

877 #

878 # This is redundant to what httplib/http.client _should_

879 # already do. However, versions of python released before

880 # December 15, 2012 (http://bugs.python.org/issue16298) do

881 # not properly close the connection in all cases. There is

882 # no harm in redundantly calling close.

883 self._fp.close()

884 if (

885 self.enforce_content_length

886 and self.length_remaining is not None

887 and self.length_remaining != 0

888 ):

889 # This is an edge case that httplib failed to cover due

890 # to concerns of backward compatibility. We're

891 # addressing it here to make sure IncompleteRead is

892 # raised during streaming, so all calls with incorrect

893 # Content-Length are caught.

894 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)

895 elif read1 and (

896 (amt != 0 and not data) or self.length_remaining == len(data)

897 ):

898 # All data has been read, but `self._fp.read1` in

899 # CPython 3.12 and older doesn't always close

900 # `http.client.HTTPResponse`, so we close it here.

901 # See https://github.com/python/cpython/issues/113199

902 self._fp.close()

903

904 if data:

905 self._fp_bytes_read += len(data)

906 if self.length_remaining is not None:

907 self.length_remaining -= len(data)

908 return data

909

910 def read(

911 self,

912 amt: int | None = None,

913 decode_content: bool | None = None,

914 cache_content: bool = False,

915 ) -> bytes:

916 """

917 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional

918 parameters: ``decode_content`` and ``cache_content``.

919

920 :param amt:

921 How much of the content to read. If specified, caching is skipped

922 because it doesn't make sense to cache partial content as the full

923 response.

924

925 :param decode_content:

926 If True, will attempt to decode the body based on the

927 'content-encoding' header.

928

929 :param cache_content:

930 If True, will save the returned data such that the same result is

931 returned despite of the state of the underlying file object. This

932 is useful if you want the ``.data`` property to continue working

933 after having ``.read()`` the file object. (Overridden if ``amt`` is

934 set.)

935 """

936 self._init_decoder()

937 if decode_content is None:

938 decode_content = self.decode_content

939

940 if amt and amt < 0:

941 # Negative numbers and `None` should be treated the same.

942 amt = None

943 elif amt is not None:

944 cache_content = False

945

946 if len(self._decoded_buffer) >= amt:

947 return self._decoded_buffer.get(amt)

948

949 data = self._raw_read(amt)

950

951 flush_decoder = amt is None or (amt != 0 and not data)

952

953 if not data and len(self._decoded_buffer) == 0:

954 return data

955

956 if amt is None:

957 data = self._decode(data, decode_content, flush_decoder)

958 if cache_content:

959 self._body = data

960 else:

961 # do not waste memory on buffer when not decoding

962 if not decode_content:

963 if self._has_decoded_content:

964 raise RuntimeError(

965 "Calling read(decode_content=False) is not supported after "

966 "read(decode_content=True) was called."

967 )

968 return data

969

970 decoded_data = self._decode(data, decode_content, flush_decoder)

971 self._decoded_buffer.put(decoded_data)

972

973 while len(self._decoded_buffer) < amt and data:

974 # TODO make sure to initially read enough data to get past the headers

975 # For example, the GZ file header takes 10 bytes, we don't want to read

976 # it one byte at a time

977 data = self._raw_read(amt)

978 decoded_data = self._decode(data, decode_content, flush_decoder)

979 self._decoded_buffer.put(decoded_data)

980 data = self._decoded_buffer.get(amt)

981

982 return data

983

984 def read1(

985 self,

986 amt: int | None = None,

987 decode_content: bool | None = None,

988 ) -> bytes:

989 """

990 Similar to ``http.client.HTTPResponse.read1`` and documented

991 in :meth:`io.BufferedReader.read1`, but with an additional parameter:

992 ``decode_content``.

993

994 :param amt:

995 How much of the content to read.

996

997 :param decode_content:

998 If True, will attempt to decode the body based on the

999 'content-encoding' header.

1000 """

1001 if decode_content is None:

1002 decode_content = self.decode_content

1003 if amt and amt < 0:

1004 # Negative numbers and `None` should be treated the same.

1005 amt = None

1006 # try and respond without going to the network

1007 if self._has_decoded_content:

1008 if not decode_content:

1009 raise RuntimeError(

1010 "Calling read1(decode_content=False) is not supported after "

1011 "read1(decode_content=True) was called."

1012 )

1013 if len(self._decoded_buffer) > 0:

1014 if amt is None:

1015 return self._decoded_buffer.get_all()

1016 return self._decoded_buffer.get(amt)

1017 if amt == 0:

1018 return b""

1019

1020 # FIXME, this method's type doesn't say returning None is possible

1021 data = self._raw_read(amt, read1=True)

1022 if not decode_content or data is None:

1023 return data

1024

1025 self._init_decoder()

1026 while True:

1027 flush_decoder = not data

1028 decoded_data = self._decode(data, decode_content, flush_decoder)

1029 self._decoded_buffer.put(decoded_data)

1030 if decoded_data or flush_decoder:

1031 break

1032 data = self._raw_read(8192, read1=True)

1033

1034 if amt is None:

1035 return self._decoded_buffer.get_all()

1036 return self._decoded_buffer.get(amt)

1037

1038 def stream(

1039 self, amt: int | None = 2**16, decode_content: bool | None = None

1040 ) -> typing.Generator[bytes, None, None]:

1041 """

1042 A generator wrapper for the read() method. A call will block until

1043 ``amt`` bytes have been read from the connection or until the

1044 connection is closed.

1045

1046 :param amt:

1047 How much of the content to read. The generator will return up to

1048 much data per iteration, but may return less. This is particularly

1049 likely when using compressed data. However, the empty string will

1050 never be returned.

1051

1052 :param decode_content:

1053 If True, will attempt to decode the body based on the

1054 'content-encoding' header.

1055 """

1056 if self.chunked and self.supports_chunked_reads():

1057 yield from self.read_chunked(amt, decode_content=decode_content)

1058 else:

1059 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:

1060 data = self.read(amt=amt, decode_content=decode_content)

1061

1062 if data:

1063 yield data

1064

1065 # Overrides from io.IOBase

1066 def readable(self) -> bool:

1067 return True

1068

1069 def close(self) -> None:

1070 if not self.closed and self._fp:

1071 self._fp.close()

1072

1073 if self._connection:

1074 self._connection.close()

1075

1076 if not self.auto_close:

1077 io.IOBase.close(self)

1078

1079 @property

1080 def closed(self) -> bool:

1081 if not self.auto_close:

1082 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]

1083 elif self._fp is None:

1084 return True

1085 elif hasattr(self._fp, "isclosed"):

1086 return self._fp.isclosed()

1087 elif hasattr(self._fp, "closed"):

1088 return self._fp.closed

1089 else:

1090 return True

1091

1092 def fileno(self) -> int:

1093 if self._fp is None:

1094 raise OSError("HTTPResponse has no file to get a fileno from")

1095 elif hasattr(self._fp, "fileno"):

1096 return self._fp.fileno()

1097 else:

1098 raise OSError(

1099 "The file-like object this HTTPResponse is wrapped "

1100 "around has no file descriptor"

1101 )

1102

1103 def flush(self) -> None:

1104 if (

1105 self._fp is not None

1106 and hasattr(self._fp, "flush")

1107 and not getattr(self._fp, "closed", False)

1108 ):

1109 return self._fp.flush()

1110

1111 def supports_chunked_reads(self) -> bool:

1112 """

1113 Checks if the underlying file-like object looks like a

1114 :class:`http.client.HTTPResponse` object. We do this by testing for

1115 the fp attribute. If it is present we assume it returns raw chunks as

1116 processed by read_chunked().

1117 """

1118 return hasattr(self._fp, "fp")

1119

1120 def _update_chunk_length(self) -> None:

1121 # First, we'll figure out length of a chunk and then

1122 # we'll try to read it from socket.

1123 if self.chunk_left is not None:

1124 return None

1125 line = self._fp.fp.readline() # type: ignore[union-attr]

1126 line = line.split(b";", 1)[0]

1127 try:

1128 self.chunk_left = int(line, 16)

1129 except ValueError:

1130 self.close()

1131 if line:

1132 # Invalid chunked protocol response, abort.

1133 raise InvalidChunkLength(self, line) from None

1134 else:

1135 # Truncated at start of next chunk

1136 raise ProtocolError("Response ended prematurely") from None

1137

1138 def _handle_chunk(self, amt: int | None) -> bytes:

1139 returned_chunk = None

1140 if amt is None:

1141 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1142 returned_chunk = chunk

1143 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1144 self.chunk_left = None

1145 elif self.chunk_left is not None and amt < self.chunk_left:

1146 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1147 self.chunk_left = self.chunk_left - amt

1148 returned_chunk = value

1149 elif amt == self.chunk_left:

1150 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1151 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1152 self.chunk_left = None

1153 returned_chunk = value

1154 else: # amt > self.chunk_left

1155 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1156 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1157 self.chunk_left = None

1158 return returned_chunk # type: ignore[no-any-return]

1159

1160 def read_chunked(

1161 self, amt: int | None = None, decode_content: bool | None = None

1162 ) -> typing.Generator[bytes, None, None]:

1163 """

1164 Similar to :meth:`HTTPResponse.read`, but with an additional

1165 parameter: ``decode_content``.

1166

1167 :param amt:

1168 How much of the content to read. If specified, caching is skipped

1169 because it doesn't make sense to cache partial content as the full

1170 response.

1171

1172 :param decode_content:

1173 If True, will attempt to decode the body based on the

1174 'content-encoding' header.

1175 """

1176 self._init_decoder()

1177 # FIXME: Rewrite this method and make it a class with a better structured logic.

1178 if not self.chunked:

1179 raise ResponseNotChunked(

1180 "Response is not chunked. "

1181 "Header 'transfer-encoding: chunked' is missing."

1182 )

1183 if not self.supports_chunked_reads():

1184 raise BodyNotHttplibCompatible(

1185 "Body should be http.client.HTTPResponse like. "

1186 "It should have have an fp attribute which returns raw chunks."

1187 )

1188

1189 with self._error_catcher():

1190 # Don't bother reading the body of a HEAD request.

1191 if self._original_response and is_response_to_head(self._original_response):

1192 self._original_response.close()

1193 return None

1194

1195 # If a response is already read and closed

1196 # then return immediately.

1197 if self._fp.fp is None: # type: ignore[union-attr]

1198 return None

1199

1200 if amt and amt < 0:

1201 # Negative numbers and `None` should be treated the same,

1202 # but httplib handles only `None` correctly.

1203 amt = None

1204

1205 while True:

1206 self._update_chunk_length()

1207 if self.chunk_left == 0:

1208 break

1209 chunk = self._handle_chunk(amt)

1210 decoded = self._decode(

1211 chunk, decode_content=decode_content, flush_decoder=False

1212 )

1213 if decoded:

1214 yield decoded

1215

1216 if decode_content:

1217 # On CPython and PyPy, we should never need to flush the

1218 # decoder. However, on Jython we *might* need to, so

1219 # lets defensively do it anyway.

1220 decoded = self._flush_decoder()

1221 if decoded: # Platform-specific: Jython.

1222 yield decoded

1223

1224 # Chunk content ends with \r\n: discard it.

1225 while self._fp is not None:

1226 line = self._fp.fp.readline()

1227 if not line:

1228 # Some sites may not end with '\r\n'.

1229 break

1230 if line == b"\r\n":

1231 break

1232

1233 # We read everything; close the "file".

1234 if self._original_response:

1235 self._original_response.close()

1236

1237 @property

1238 def url(self) -> str | None:

1239 """

1240 Returns the URL that was the source of this response.

1241 If the request that generated this response redirected, this method

1242 will return the final redirect location.

1243 """

1244 return self._request_url

1245

1246 @url.setter

1247 def url(self, url: str) -> None:

1248 self._request_url = url

1249

1250 def __iter__(self) -> typing.Iterator[bytes]:

1251 buffer: list[bytes] = []

1252 for chunk in self.stream(decode_content=True):

1253 if b"\n" in chunk:

1254 chunks = chunk.split(b"\n")

1255 yield b"".join(buffer) + chunks[0] + b"\n"

1256 for x in chunks[1:-1]:

1257 yield x + b"\n"

1258 if chunks[-1]:

1259 buffer = [chunks[-1]]

1260 else:

1261 buffer = []

1262 else:

1263 buffer.append(chunk)

1264 if buffer:

1265 yield b"".join(buffer)