Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/urllib3/response.py: 21%

1from __future__ import annotations

3import collections

4import io

5import json as _json

6import logging

7import socket

8import sys

9import typing

10import zlib

11from contextlib import contextmanager

12from http.client import HTTPMessage as _HttplibHTTPMessage

13from http.client import HTTPResponse as _HttplibHTTPResponse

14from socket import timeout as SocketTimeout

16if typing.TYPE_CHECKING:

17 from ._base_connection import BaseHTTPConnection

19try:

20 try:

21 import brotlicffi as brotli # type: ignore[import-not-found]

22 except ImportError:

23 import brotli # type: ignore[import-not-found]

24except ImportError:

25 brotli = None

27from . import util

28from ._base_connection import _TYPE_BODY

29from ._collections import HTTPHeaderDict

30from .connection import BaseSSLError, HTTPConnection, HTTPException

31from .exceptions import (

32 BodyNotHttplibCompatible,

33 DecodeError,

34 HTTPError,

35 IncompleteRead,

36 InvalidChunkLength,

37 InvalidHeader,

38 ProtocolError,

39 ReadTimeoutError,

40 ResponseNotChunked,

41 SSLError,

42)

43from .util.response import is_fp_closed, is_response_to_head

44from .util.retry import Retry

46if typing.TYPE_CHECKING:

47 from .connectionpool import HTTPConnectionPool

49log = logging.getLogger(__name__)

52class ContentDecoder:

53 def decompress(self, data: bytes) -> bytes:

54 raise NotImplementedError()

56 def flush(self) -> bytes:

57 raise NotImplementedError()

60class DeflateDecoder(ContentDecoder):

61 def __init__(self) -> None:

62 self._first_try = True

63 self._data = b""

64 self._obj = zlib.decompressobj()

66 def decompress(self, data: bytes) -> bytes:

67 if not data:

68 return data

70 if not self._first_try:

71 return self._obj.decompress(data)

73 self._data += data

74 try:

75 decompressed = self._obj.decompress(data)

76 if decompressed:

77 self._first_try = False

78 self._data = None # type: ignore[assignment]

79 return decompressed

80 except zlib.error:

81 self._first_try = False

82 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)

83 try:

84 return self.decompress(self._data)

85 finally:

86 self._data = None # type: ignore[assignment]

88 def flush(self) -> bytes:

89 return self._obj.flush()

92class GzipDecoderState:

93 FIRST_MEMBER = 0

94 OTHER_MEMBERS = 1

95 SWALLOW_DATA = 2

98class GzipDecoder(ContentDecoder):

99 def __init__(self) -> None:

100 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

101 self._state = GzipDecoderState.FIRST_MEMBER

102

103 def decompress(self, data: bytes) -> bytes:

104 ret = bytearray()

105 if self._state == GzipDecoderState.SWALLOW_DATA or not data:

106 return bytes(ret)

107 while True:

108 try:

109 ret += self._obj.decompress(data)

110 except zlib.error:

111 previous_state = self._state

112 # Ignore data after the first error

113 self._state = GzipDecoderState.SWALLOW_DATA

114 if previous_state == GzipDecoderState.OTHER_MEMBERS:

115 # Allow trailing garbage acceptable in other gzip clients

116 return bytes(ret)

117 raise

118 data = self._obj.unused_data

119 if not data:

120 return bytes(ret)

121 self._state = GzipDecoderState.OTHER_MEMBERS

122 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

123

124 def flush(self) -> bytes:

125 return self._obj.flush()

126

127

128if brotli is not None:

129

130 class BrotliDecoder(ContentDecoder):

131 # Supports both 'brotlipy' and 'Brotli' packages

132 # since they share an import name. The top branches

133 # are for 'brotlipy' and bottom branches for 'Brotli'

134 def __init__(self) -> None:

135 self._obj = brotli.Decompressor()

136 if hasattr(self._obj, "decompress"):

137 setattr(self, "decompress", self._obj.decompress)

138 else:

139 setattr(self, "decompress", self._obj.process)

140

141 def flush(self) -> bytes:

142 if hasattr(self._obj, "flush"):

143 return self._obj.flush() # type: ignore[no-any-return]

144 return b""

145

146

147try:

148 if sys.version_info >= (3, 14):

149 from compression import zstd

150 else:

151 from backports import zstd

152except ImportError:

153 HAS_ZSTD = False

154else:

155 HAS_ZSTD = True

156

157 class ZstdDecoder(ContentDecoder):

158 def __init__(self) -> None:

159 self._obj = zstd.ZstdDecompressor()

160

161 def decompress(self, data: bytes) -> bytes:

162 if not data:

163 return b""

164 data_parts = [self._obj.decompress(data)]

165 while self._obj.eof and self._obj.unused_data:

166 unused_data = self._obj.unused_data

167 self._obj = zstd.ZstdDecompressor()

168 data_parts.append(self._obj.decompress(unused_data))

169 return b"".join(data_parts)

170

171 def flush(self) -> bytes:

172 if not self._obj.eof:

173 raise DecodeError("Zstandard data is incomplete")

174 return b""

175

176

177class MultiDecoder(ContentDecoder):

178 """

179 From RFC7231:

180 If one or more encodings have been applied to a representation, the

181 sender that applied the encodings MUST generate a Content-Encoding

182 header field that lists the content codings in the order in which

183 they were applied.

184 """

185

186 def __init__(self, modes: str) -> None:

187 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]

188

189 def flush(self) -> bytes:

190 return self._decoders[0].flush()

191

192 def decompress(self, data: bytes) -> bytes:

193 for d in reversed(self._decoders):

194 data = d.decompress(data)

195 return data

196

197

198def _get_decoder(mode: str) -> ContentDecoder:

199 if "," in mode:

200 return MultiDecoder(mode)

201

202 # According to RFC 9110 section 8.4.1.3, recipients should

203 # consider x-gzip equivalent to gzip

204 if mode in ("gzip", "x-gzip"):

205 return GzipDecoder()

206

207 if brotli is not None and mode == "br":

208 return BrotliDecoder()

209

210 if HAS_ZSTD and mode == "zstd":

211 return ZstdDecoder()

212

213 return DeflateDecoder()

214

215

216class BytesQueueBuffer:

217 """Memory-efficient bytes buffer

218

219 To return decoded data in read() and still follow the BufferedIOBase API, we need a

220 buffer to always return the correct amount of bytes.

221

222 This buffer should be filled using calls to put()

223

224 Our maximum memory usage is determined by the sum of the size of:

225

226 * self.buffer, which contains the full data

227 * the largest chunk that we will copy in get()

228

229 The worst case scenario is a single chunk, in which case we'll make a full copy of

230 the data inside get().

231 """

232

233 def __init__(self) -> None:

234 self.buffer: typing.Deque[bytes] = collections.deque()

235 self._size: int = 0

236

237 def __len__(self) -> int:

238 return self._size

239

240 def put(self, data: bytes) -> None:

241 self.buffer.append(data)

242 self._size += len(data)

243

244 def get(self, n: int) -> bytes:

245 if n == 0:

246 return b""

247 elif not self.buffer:

248 raise RuntimeError("buffer is empty")

249 elif n < 0:

250 raise ValueError("n should be > 0")

251

252 fetched = 0

253 ret = io.BytesIO()

254 while fetched < n:

255 remaining = n - fetched

256 chunk = self.buffer.popleft()

257 chunk_length = len(chunk)

258 if remaining < chunk_length:

259 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]

260 ret.write(left_chunk)

261 self.buffer.appendleft(right_chunk)

262 self._size -= remaining

263 break

264 else:

265 ret.write(chunk)

266 self._size -= chunk_length

267 fetched += chunk_length

268

269 if not self.buffer:

270 break

271

272 return ret.getvalue()

273

274 def get_all(self) -> bytes:

275 buffer = self.buffer

276 if not buffer:

277 assert self._size == 0

278 return b""

279 if len(buffer) == 1:

280 result = buffer.pop()

281 else:

282 ret = io.BytesIO()

283 ret.writelines(buffer.popleft() for _ in range(len(buffer)))

284 result = ret.getvalue()

285 self._size = 0

286 return result

287

288

289class BaseHTTPResponse(io.IOBase):

290 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]

291 if brotli is not None:

292 CONTENT_DECODERS += ["br"]

293 if HAS_ZSTD:

294 CONTENT_DECODERS += ["zstd"]

295 REDIRECT_STATUSES = [301, 302, 303, 307, 308]

296

297 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)

298 if brotli is not None:

299 DECODER_ERROR_CLASSES += (brotli.error,)

300

301 if HAS_ZSTD:

302 DECODER_ERROR_CLASSES += (zstd.ZstdError,)

303

304 def __init__(

305 self,

306 *,

307 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

308 status: int,

309 version: int,

310 version_string: str,

311 reason: str | None,

312 decode_content: bool,

313 request_url: str | None,

314 retries: Retry | None = None,

315 ) -> None:

316 if isinstance(headers, HTTPHeaderDict):

317 self.headers = headers

318 else:

319 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]

320 self.status = status

321 self.version = version

322 self.version_string = version_string

323 self.reason = reason

324 self.decode_content = decode_content

325 self._has_decoded_content = False

326 self._request_url: str | None = request_url

327 self.retries = retries

328

329 self.chunked = False

330 tr_enc = self.headers.get("transfer-encoding", "").lower()

331 # Don't incur the penalty of creating a list and then discarding it

332 encodings = (enc.strip() for enc in tr_enc.split(","))

333 if "chunked" in encodings:

334 self.chunked = True

335

336 self._decoder: ContentDecoder | None = None

337 self.length_remaining: int | None

338

339 def get_redirect_location(self) -> str | None | typing.Literal[False]:

340 """

341 Should we redirect and where to?

342

343 :returns: Truthy redirect location string if we got a redirect status

344 code and valid location. ``None`` if redirect status and no

345 location. ``False`` if not a redirect status code.

346 """

347 if self.status in self.REDIRECT_STATUSES:

348 return self.headers.get("location")

349 return False

350

351 @property

352 def data(self) -> bytes:

353 raise NotImplementedError()

354

355 def json(self) -> typing.Any:

356 """

357 Deserializes the body of the HTTP response as a Python object.

358

359 The body of the HTTP response must be encoded using UTF-8, as per

360 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.

361

362 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to

363 your custom decoder instead.

364

365 If the body of the HTTP response is not decodable to UTF-8, a

366 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a

367 valid JSON document, a `json.JSONDecodeError` will be raised.

368

369 Read more :ref:`here <json_content>`.

370

371 :returns: The body of the HTTP response as a Python object.

372 """

373 data = self.data.decode("utf-8")

374 return _json.loads(data)

375

376 @property

377 def url(self) -> str | None:

378 raise NotImplementedError()

379

380 @url.setter

381 def url(self, url: str | None) -> None:

382 raise NotImplementedError()

383

384 @property

385 def connection(self) -> BaseHTTPConnection | None:

386 raise NotImplementedError()

387

388 @property

389 def retries(self) -> Retry | None:

390 return self._retries

391

392 @retries.setter

393 def retries(self, retries: Retry | None) -> None:

394 # Override the request_url if retries has a redirect location.

395 if retries is not None and retries.history:

396 self.url = retries.history[-1].redirect_location

397 self._retries = retries

398

399 def stream(

400 self, amt: int | None = 2**16, decode_content: bool | None = None

401 ) -> typing.Iterator[bytes]:

402 raise NotImplementedError()

403

404 def read(

405 self,

406 amt: int | None = None,

407 decode_content: bool | None = None,

408 cache_content: bool = False,

409 ) -> bytes:

410 raise NotImplementedError()

411

412 def read1(

413 self,

414 amt: int | None = None,

415 decode_content: bool | None = None,

416 ) -> bytes:

417 raise NotImplementedError()

418

419 def read_chunked(

420 self,

421 amt: int | None = None,

422 decode_content: bool | None = None,

423 ) -> typing.Iterator[bytes]:

424 raise NotImplementedError()

425

426 def release_conn(self) -> None:

427 raise NotImplementedError()

428

429 def drain_conn(self) -> None:

430 raise NotImplementedError()

431

432 def shutdown(self) -> None:

433 raise NotImplementedError()

434

435 def close(self) -> None:

436 raise NotImplementedError()

437

438 def _init_decoder(self) -> None:

439 """

440 Set-up the _decoder attribute if necessary.

441 """

442 # Note: content-encoding value should be case-insensitive, per RFC 7230

443 # Section 3.2

444 content_encoding = self.headers.get("content-encoding", "").lower()

445 if self._decoder is None:

446 if content_encoding in self.CONTENT_DECODERS:

447 self._decoder = _get_decoder(content_encoding)

448 elif "," in content_encoding:

449 encodings = [

450 e.strip()

451 for e in content_encoding.split(",")

452 if e.strip() in self.CONTENT_DECODERS

453 ]

454 if encodings:

455 self._decoder = _get_decoder(content_encoding)

456

457 def _decode(

458 self, data: bytes, decode_content: bool | None, flush_decoder: bool

459 ) -> bytes:

460 """

461 Decode the data passed in and potentially flush the decoder.

462 """

463 if not decode_content:

464 if self._has_decoded_content:

465 raise RuntimeError(

466 "Calling read(decode_content=False) is not supported after "

467 "read(decode_content=True) was called."

468 )

469 return data

470

471 try:

472 if self._decoder:

473 data = self._decoder.decompress(data)

474 self._has_decoded_content = True

475 except self.DECODER_ERROR_CLASSES as e:

476 content_encoding = self.headers.get("content-encoding", "").lower()

477 raise DecodeError(

478 "Received response with content-encoding: %s, but "

479 "failed to decode it." % content_encoding,

480 e,

481 ) from e

482 if flush_decoder:

483 data += self._flush_decoder()

484

485 return data

486

487 def _flush_decoder(self) -> bytes:

488 """

489 Flushes the decoder. Should only be called if the decoder is actually

490 being used.

491 """

492 if self._decoder:

493 return self._decoder.decompress(b"") + self._decoder.flush()

494 return b""

495

496 # Compatibility methods for `io` module

497 def readinto(self, b: bytearray) -> int:

498 temp = self.read(len(b))

499 if len(temp) == 0:

500 return 0

501 else:

502 b[: len(temp)] = temp

503 return len(temp)

504

505 # Compatibility method for http.cookiejar

506 def info(self) -> HTTPHeaderDict:

507 return self.headers

508

509 def geturl(self) -> str | None:

510 return self.url

511

512

513class HTTPResponse(BaseHTTPResponse):

514 """

515 HTTP Response container.

516

517 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is

518 loaded and decoded on-demand when the ``data`` property is accessed. This

519 class is also compatible with the Python standard library's :mod:`io`

520 module, and can hence be treated as a readable object in the context of that

521 framework.

522

523 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:

524

525 :param preload_content:

526 If True, the response's body will be preloaded during construction.

527

528 :param decode_content:

529 If True, will attempt to decode the body based on the

530 'content-encoding' header.

531

532 :param original_response:

533 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`

534 object, it's convenient to include the original for debug purposes. It's

535 otherwise unused.

536

537 :param retries:

538 The retries contains the last :class:`~urllib3.util.retry.Retry` that

539 was used during the request.

540

541 :param enforce_content_length:

542 Enforce content length checking. Body returned by server must match

543 value of Content-Length header, if present. Otherwise, raise error.

544 """

545

546 def __init__(

547 self,

548 body: _TYPE_BODY = "",

549 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

550 status: int = 0,

551 version: int = 0,

552 version_string: str = "HTTP/?",

553 reason: str | None = None,

554 preload_content: bool = True,

555 decode_content: bool = True,

556 original_response: _HttplibHTTPResponse | None = None,

557 pool: HTTPConnectionPool | None = None,

558 connection: HTTPConnection | None = None,

559 msg: _HttplibHTTPMessage | None = None,

560 retries: Retry | None = None,

561 enforce_content_length: bool = True,

562 request_method: str | None = None,

563 request_url: str | None = None,

564 auto_close: bool = True,

565 sock_shutdown: typing.Callable[[int], None] | None = None,

566 ) -> None:

567 super().__init__(

568 headers=headers,

569 status=status,

570 version=version,

571 version_string=version_string,

572 reason=reason,

573 decode_content=decode_content,

574 request_url=request_url,

575 retries=retries,

576 )

577

578 self.enforce_content_length = enforce_content_length

579 self.auto_close = auto_close

580

581 self._body = None

582 self._fp: _HttplibHTTPResponse | None = None

583 self._original_response = original_response

584 self._fp_bytes_read = 0

585 self.msg = msg

586

587 if body and isinstance(body, (str, bytes)):

588 self._body = body

589

590 self._pool = pool

591 self._connection = connection

592

593 if hasattr(body, "read"):

594 self._fp = body # type: ignore[assignment]

595 self._sock_shutdown = sock_shutdown

596

597 # Are we using the chunked-style of transfer encoding?

598 self.chunk_left: int | None = None

599

600 # Determine length of response

601 self.length_remaining = self._init_length(request_method)

602

603 # Used to return the correct amount of bytes for partial read()s

604 self._decoded_buffer = BytesQueueBuffer()

605

606 # If requested, preload the body.

607 if preload_content and not self._body:

608 self._body = self.read(decode_content=decode_content)

609

610 def release_conn(self) -> None:

611 if not self._pool or not self._connection:

612 return None

613

614 self._pool._put_conn(self._connection)

615 self._connection = None

616

617 def drain_conn(self) -> None:

618 """

619 Read and discard any remaining HTTP response data in the response connection.

620

621 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.

622 """

623 try:

624 self.read()

625 except (HTTPError, OSError, BaseSSLError, HTTPException):

626 pass

627

628 @property

629 def data(self) -> bytes:

630 # For backwards-compat with earlier urllib3 0.4 and earlier.

631 if self._body:

632 return self._body # type: ignore[return-value]

633

634 if self._fp:

635 return self.read(cache_content=True)

636

637 return None # type: ignore[return-value]

638

639 @property

640 def connection(self) -> HTTPConnection | None:

641 return self._connection

642

643 def isclosed(self) -> bool:

644 return is_fp_closed(self._fp)

645

646 def tell(self) -> int:

647 """

648 Obtain the number of bytes pulled over the wire so far. May differ from

649 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``

650 if bytes are encoded on the wire (e.g, compressed).

651 """

652 return self._fp_bytes_read

653

654 def _init_length(self, request_method: str | None) -> int | None:

655 """

656 Set initial length value for Response content if available.

657 """

658 length: int | None

659 content_length: str | None = self.headers.get("content-length")

660

661 if content_length is not None:

662 if self.chunked:

663 # This Response will fail with an IncompleteRead if it can't be

664 # received as chunked. This method falls back to attempt reading

665 # the response before raising an exception.

666 log.warning(

667 "Received response with both Content-Length and "

668 "Transfer-Encoding set. This is expressly forbidden "

669 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "

670 "attempting to process response as Transfer-Encoding: "

671 "chunked."

672 )

673 return None

674

675 try:

676 # RFC 7230 section 3.3.2 specifies multiple content lengths can

677 # be sent in a single Content-Length header

678 # (e.g. Content-Length: 42, 42). This line ensures the values

679 # are all valid ints and that as long as the `set` length is 1,

680 # all values are the same. Otherwise, the header is invalid.

681 lengths = {int(val) for val in content_length.split(",")}

682 if len(lengths) > 1:

683 raise InvalidHeader(

684 "Content-Length contained multiple "

685 "unmatching values (%s)" % content_length

686 )

687 length = lengths.pop()

688 except ValueError:

689 length = None

690 else:

691 if length < 0:

692 length = None

693

694 else: # if content_length is None

695 length = None

696

697 # Convert status to int for comparison

698 # In some cases, httplib returns a status of "_UNKNOWN"

699 try:

700 status = int(self.status)

701 except ValueError:

702 status = 0

703

704 # Check for responses that shouldn't include a body

705 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":

706 length = 0

707

708 return length

709

710 @contextmanager

711 def _error_catcher(self) -> typing.Generator[None]:

712 """

713 Catch low-level python exceptions, instead re-raising urllib3

714 variants, so that low-level exceptions are not leaked in the

715 high-level api.

716

717 On exit, release the connection back to the pool.

718 """

719 clean_exit = False

720

721 try:

722 try:

723 yield

724

725 except SocketTimeout as e:

726 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but

727 # there is yet no clean way to get at it from this context.

728 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

729

730 except BaseSSLError as e:

731 # FIXME: Is there a better way to differentiate between SSLErrors?

732 if "read operation timed out" not in str(e):

733 # SSL errors related to framing/MAC get wrapped and reraised here

734 raise SSLError(e) from e

735

736 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

737

738 except IncompleteRead as e:

739 if (

740 e.expected is not None

741 and e.partial is not None

742 and e.expected == -e.partial

743 ):

744 arg = "Response may not contain content."

745 else:

746 arg = f"Connection broken: {e!r}"

747 raise ProtocolError(arg, e) from e

748

749 except (HTTPException, OSError) as e:

750 raise ProtocolError(f"Connection broken: {e!r}", e) from e

751

752 # If no exception is thrown, we should avoid cleaning up

753 # unnecessarily.

754 clean_exit = True

755 finally:

756 # If we didn't terminate cleanly, we need to throw away our

757 # connection.

758 if not clean_exit:

759 # The response may not be closed but we're not going to use it

760 # anymore so close it now to ensure that the connection is

761 # released back to the pool.

762 if self._original_response:

763 self._original_response.close()

764

765 # Closing the response may not actually be sufficient to close

766 # everything, so if we have a hold of the connection close that

767 # too.

768 if self._connection:

769 self._connection.close()

770

771 # If we hold the original response but it's closed now, we should

772 # return the connection back to the pool.

773 if self._original_response and self._original_response.isclosed():

774 self.release_conn()

775

776 def _fp_read(

777 self,

778 amt: int | None = None,

779 *,

780 read1: bool = False,

781 ) -> bytes:

782 """

783 Read a response with the thought that reading the number of bytes

784 larger than can fit in a 32-bit int at a time via SSL in some

785 known cases leads to an overflow error that has to be prevented

786 if `amt` or `self.length_remaining` indicate that a problem may

787 happen.

788

789 The known cases:

790 * CPython < 3.9.7 because of a bug

791 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.

792 * urllib3 injected with pyOpenSSL-backed SSL-support.

793 * CPython < 3.10 only when `amt` does not fit 32-bit int.

794 """

795 assert self._fp

796 c_int_max = 2**31 - 1

797 if (

798 (amt and amt > c_int_max)

799 or (

800 amt is None

801 and self.length_remaining

802 and self.length_remaining > c_int_max

803 )

804 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):

805 if read1:

806 return self._fp.read1(c_int_max)

807 buffer = io.BytesIO()

808 # Besides `max_chunk_amt` being a maximum chunk size, it

809 # affects memory overhead of reading a response by this

810 # method in CPython.

811 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum

812 # chunk size that does not lead to an overflow error, but

813 # 256 MiB is a compromise.

814 max_chunk_amt = 2**28

815 while amt is None or amt != 0:

816 if amt is not None:

817 chunk_amt = min(amt, max_chunk_amt)

818 amt -= chunk_amt

819 else:

820 chunk_amt = max_chunk_amt

821 data = self._fp.read(chunk_amt)

822 if not data:

823 break

824 buffer.write(data)

825 del data # to reduce peak memory usage by `max_chunk_amt`.

826 return buffer.getvalue()

827 elif read1:

828 return self._fp.read1(amt) if amt is not None else self._fp.read1()

829 else:

830 # StringIO doesn't like amt=None

831 return self._fp.read(amt) if amt is not None else self._fp.read()

832

833 def _raw_read(

834 self,

835 amt: int | None = None,

836 *,

837 read1: bool = False,

838 ) -> bytes:

839 """

840 Reads `amt` of bytes from the socket.

841 """

842 if self._fp is None:

843 return None # type: ignore[return-value]

844

845 fp_closed = getattr(self._fp, "closed", False)

846

847 with self._error_catcher():

848 data = self._fp_read(amt, read1=read1) if not fp_closed else b""

849 if amt is not None and amt != 0 and not data:

850 # Platform-specific: Buggy versions of Python.

851 # Close the connection when no data is returned

852 #

853 # This is redundant to what httplib/http.client _should_

854 # already do. However, versions of python released before

855 # December 15, 2012 (http://bugs.python.org/issue16298) do

856 # not properly close the connection in all cases. There is

857 # no harm in redundantly calling close.

858 self._fp.close()

859 if (

860 self.enforce_content_length

861 and self.length_remaining is not None

862 and self.length_remaining != 0

863 ):

864 # This is an edge case that httplib failed to cover due

865 # to concerns of backward compatibility. We're

866 # addressing it here to make sure IncompleteRead is

867 # raised during streaming, so all calls with incorrect

868 # Content-Length are caught.

869 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)

870 elif read1 and (

871 (amt != 0 and not data) or self.length_remaining == len(data)

872 ):

873 # All data has been read, but `self._fp.read1` in

874 # CPython 3.12 and older doesn't always close

875 # `http.client.HTTPResponse`, so we close it here.

876 # See https://github.com/python/cpython/issues/113199

877 self._fp.close()

878

879 if data:

880 self._fp_bytes_read += len(data)

881 if self.length_remaining is not None:

882 self.length_remaining -= len(data)

883 return data

884

885 def read(

886 self,

887 amt: int | None = None,

888 decode_content: bool | None = None,

889 cache_content: bool = False,

890 ) -> bytes:

891 """

892 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional

893 parameters: ``decode_content`` and ``cache_content``.

894

895 :param amt:

896 How much of the content to read. If specified, caching is skipped

897 because it doesn't make sense to cache partial content as the full

898 response.

899

900 :param decode_content:

901 If True, will attempt to decode the body based on the

902 'content-encoding' header.

903

904 :param cache_content:

905 If True, will save the returned data such that the same result is

906 returned despite of the state of the underlying file object. This

907 is useful if you want the ``.data`` property to continue working

908 after having ``.read()`` the file object. (Overridden if ``amt`` is

909 set.)

910 """

911 self._init_decoder()

912 if decode_content is None:

913 decode_content = self.decode_content

914

915 if amt and amt < 0:

916 # Negative numbers and `None` should be treated the same.

917 amt = None

918 elif amt is not None:

919 cache_content = False

920

921 if len(self._decoded_buffer) >= amt:

922 return self._decoded_buffer.get(amt)

923

924 data = self._raw_read(amt)

925

926 flush_decoder = amt is None or (amt != 0 and not data)

927

928 if not data and len(self._decoded_buffer) == 0:

929 return data

930

931 if amt is None:

932 data = self._decode(data, decode_content, flush_decoder)

933 if cache_content:

934 self._body = data

935 else:

936 # do not waste memory on buffer when not decoding

937 if not decode_content:

938 if self._has_decoded_content:

939 raise RuntimeError(

940 "Calling read(decode_content=False) is not supported after "

941 "read(decode_content=True) was called."

942 )

943 return data

944

945 decoded_data = self._decode(data, decode_content, flush_decoder)

946 self._decoded_buffer.put(decoded_data)

947

948 while len(self._decoded_buffer) < amt and data:

949 # TODO make sure to initially read enough data to get past the headers

950 # For example, the GZ file header takes 10 bytes, we don't want to read

951 # it one byte at a time

952 data = self._raw_read(amt)

953 decoded_data = self._decode(data, decode_content, flush_decoder)

954 self._decoded_buffer.put(decoded_data)

955 data = self._decoded_buffer.get(amt)

956

957 return data

958

959 def read1(

960 self,

961 amt: int | None = None,

962 decode_content: bool | None = None,

963 ) -> bytes:

964 """

965 Similar to ``http.client.HTTPResponse.read1`` and documented

966 in :meth:`io.BufferedReader.read1`, but with an additional parameter:

967 ``decode_content``.

968

969 :param amt:

970 How much of the content to read.

971

972 :param decode_content:

973 If True, will attempt to decode the body based on the

974 'content-encoding' header.

975 """

976 if decode_content is None:

977 decode_content = self.decode_content

978 if amt and amt < 0:

979 # Negative numbers and `None` should be treated the same.

980 amt = None

981 # try and respond without going to the network

982 if self._has_decoded_content:

983 if not decode_content:

984 raise RuntimeError(

985 "Calling read1(decode_content=False) is not supported after "

986 "read1(decode_content=True) was called."

987 )

988 if len(self._decoded_buffer) > 0:

989 if amt is None:

990 return self._decoded_buffer.get_all()

991 return self._decoded_buffer.get(amt)

992 if amt == 0:

993 return b""

994

995 # FIXME, this method's type doesn't say returning None is possible

996 data = self._raw_read(amt, read1=True)

997 if not decode_content or data is None:

998 return data

999

1000 self._init_decoder()

1001 while True:

1002 flush_decoder = not data

1003 decoded_data = self._decode(data, decode_content, flush_decoder)

1004 self._decoded_buffer.put(decoded_data)

1005 if decoded_data or flush_decoder:

1006 break

1007 data = self._raw_read(8192, read1=True)

1008

1009 if amt is None:

1010 return self._decoded_buffer.get_all()

1011 return self._decoded_buffer.get(amt)

1012

1013 def stream(

1014 self, amt: int | None = 2**16, decode_content: bool | None = None

1015 ) -> typing.Generator[bytes]:

1016 """

1017 A generator wrapper for the read() method. A call will block until

1018 ``amt`` bytes have been read from the connection or until the

1019 connection is closed.

1020

1021 :param amt:

1022 How much of the content to read. The generator will return up to

1023 much data per iteration, but may return less. This is particularly

1024 likely when using compressed data. However, the empty string will

1025 never be returned.

1026

1027 :param decode_content:

1028 If True, will attempt to decode the body based on the

1029 'content-encoding' header.

1030 """

1031 if self.chunked and self.supports_chunked_reads():

1032 yield from self.read_chunked(amt, decode_content=decode_content)

1033 else:

1034 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:

1035 data = self.read(amt=amt, decode_content=decode_content)

1036

1037 if data:

1038 yield data

1039

1040 # Overrides from io.IOBase

1041 def readable(self) -> bool:

1042 return True

1043

1044 def shutdown(self) -> None:

1045 if not self._sock_shutdown:

1046 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")

1047 if self._connection is None:

1048 raise RuntimeError(

1049 "Cannot shutdown as connection has already been released to the pool"

1050 )

1051 self._sock_shutdown(socket.SHUT_RD)

1052

1053 def close(self) -> None:

1054 self._sock_shutdown = None

1055

1056 if not self.closed and self._fp:

1057 self._fp.close()

1058

1059 if self._connection:

1060 self._connection.close()

1061

1062 if not self.auto_close:

1063 io.IOBase.close(self)

1064

1065 @property

1066 def closed(self) -> bool:

1067 if not self.auto_close:

1068 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]

1069 elif self._fp is None:

1070 return True

1071 elif hasattr(self._fp, "isclosed"):

1072 return self._fp.isclosed()

1073 elif hasattr(self._fp, "closed"):

1074 return self._fp.closed

1075 else:

1076 return True

1077

1078 def fileno(self) -> int:

1079 if self._fp is None:

1080 raise OSError("HTTPResponse has no file to get a fileno from")

1081 elif hasattr(self._fp, "fileno"):

1082 return self._fp.fileno()

1083 else:

1084 raise OSError(

1085 "The file-like object this HTTPResponse is wrapped "

1086 "around has no file descriptor"

1087 )

1088

1089 def flush(self) -> None:

1090 if (

1091 self._fp is not None

1092 and hasattr(self._fp, "flush")

1093 and not getattr(self._fp, "closed", False)

1094 ):

1095 return self._fp.flush()

1096

1097 def supports_chunked_reads(self) -> bool:

1098 """

1099 Checks if the underlying file-like object looks like a

1100 :class:`http.client.HTTPResponse` object. We do this by testing for

1101 the fp attribute. If it is present we assume it returns raw chunks as

1102 processed by read_chunked().

1103 """

1104 return hasattr(self._fp, "fp")

1105

1106 def _update_chunk_length(self) -> None:

1107 # First, we'll figure out length of a chunk and then

1108 # we'll try to read it from socket.

1109 if self.chunk_left is not None:

1110 return None

1111 line = self._fp.fp.readline() # type: ignore[union-attr]

1112 line = line.split(b";", 1)[0]

1113 try:

1114 self.chunk_left = int(line, 16)

1115 except ValueError:

1116 self.close()

1117 if line:

1118 # Invalid chunked protocol response, abort.

1119 raise InvalidChunkLength(self, line) from None

1120 else:

1121 # Truncated at start of next chunk

1122 raise ProtocolError("Response ended prematurely") from None

1123

1124 def _handle_chunk(self, amt: int | None) -> bytes:

1125 returned_chunk = None

1126 if amt is None:

1127 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1128 returned_chunk = chunk

1129 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1130 self.chunk_left = None

1131 elif self.chunk_left is not None and amt < self.chunk_left:

1132 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1133 self.chunk_left = self.chunk_left - amt

1134 returned_chunk = value

1135 elif amt == self.chunk_left:

1136 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1137 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1138 self.chunk_left = None

1139 returned_chunk = value

1140 else: # amt > self.chunk_left

1141 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1142 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1143 self.chunk_left = None

1144 return returned_chunk # type: ignore[no-any-return]

1145

1146 def read_chunked(

1147 self, amt: int | None = None, decode_content: bool | None = None

1148 ) -> typing.Generator[bytes]:

1149 """

1150 Similar to :meth:`HTTPResponse.read`, but with an additional

1151 parameter: ``decode_content``.

1152

1153 :param amt:

1154 How much of the content to read. If specified, caching is skipped

1155 because it doesn't make sense to cache partial content as the full

1156 response.

1157

1158 :param decode_content:

1159 If True, will attempt to decode the body based on the

1160 'content-encoding' header.

1161 """

1162 self._init_decoder()

1163 # FIXME: Rewrite this method and make it a class with a better structured logic.

1164 if not self.chunked:

1165 raise ResponseNotChunked(

1166 "Response is not chunked. "

1167 "Header 'transfer-encoding: chunked' is missing."

1168 )

1169 if not self.supports_chunked_reads():

1170 raise BodyNotHttplibCompatible(

1171 "Body should be http.client.HTTPResponse like. "

1172 "It should have have an fp attribute which returns raw chunks."

1173 )

1174

1175 with self._error_catcher():

1176 # Don't bother reading the body of a HEAD request.

1177 if self._original_response and is_response_to_head(self._original_response):

1178 self._original_response.close()

1179 return None

1180

1181 # If a response is already read and closed

1182 # then return immediately.

1183 if self._fp.fp is None: # type: ignore[union-attr]

1184 return None

1185

1186 if amt and amt < 0:

1187 # Negative numbers and `None` should be treated the same,

1188 # but httplib handles only `None` correctly.

1189 amt = None

1190

1191 while True:

1192 self._update_chunk_length()

1193 if self.chunk_left == 0:

1194 break

1195 chunk = self._handle_chunk(amt)

1196 decoded = self._decode(

1197 chunk, decode_content=decode_content, flush_decoder=False

1198 )

1199 if decoded:

1200 yield decoded

1201

1202 if decode_content:

1203 # On CPython and PyPy, we should never need to flush the

1204 # decoder. However, on Jython we *might* need to, so

1205 # lets defensively do it anyway.

1206 decoded = self._flush_decoder()

1207 if decoded: # Platform-specific: Jython.

1208 yield decoded

1209

1210 # Chunk content ends with \r\n: discard it.

1211 while self._fp is not None:

1212 line = self._fp.fp.readline()

1213 if not line:

1214 # Some sites may not end with '\r\n'.

1215 break

1216 if line == b"\r\n":

1217 break

1218

1219 # We read everything; close the "file".

1220 if self._original_response:

1221 self._original_response.close()

1222

1223 @property

1224 def url(self) -> str | None:

1225 """

1226 Returns the URL that was the source of this response.

1227 If the request that generated this response redirected, this method

1228 will return the final redirect location.

1229 """

1230 return self._request_url

1231

1232 @url.setter

1233 def url(self, url: str | None) -> None:

1234 self._request_url = url

1235

1236 def __iter__(self) -> typing.Iterator[bytes]:

1237 buffer: list[bytes] = []

1238 for chunk in self.stream(decode_content=True):

1239 if b"\n" in chunk:

1240 chunks = chunk.split(b"\n")

1241 yield b"".join(buffer) + chunks[0] + b"\n"

1242 for x in chunks[1:-1]:

1243 yield x + b"\n"

1244 if chunks[-1]:

1245 buffer = [chunks[-1]]

1246 else:

1247 buffer = []

1248 else:

1249 buffer.append(chunk)

1250 if buffer:

1251 yield b"".join(buffer)