Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 22%

1from __future__ import annotations

3import collections

4import io

5import json as _json

6import logging

7import re

8import sys

9import typing

10import warnings

11import zlib

12from contextlib import contextmanager

13from http.client import HTTPMessage as _HttplibHTTPMessage

14from http.client import HTTPResponse as _HttplibHTTPResponse

15from socket import timeout as SocketTimeout

17if typing.TYPE_CHECKING:

18 from ._base_connection import BaseHTTPConnection

20try:

21 try:

22 import brotlicffi as brotli # type: ignore[import-not-found]

23 except ImportError:

24 import brotli # type: ignore[import-not-found]

25except ImportError:

26 brotli = None

28try:

29 import zstandard as zstd # type: ignore[import-not-found]

31 # The package 'zstandard' added the 'eof' property starting

32 # in v0.18.0 which we require to ensure a complete and

33 # valid zstd stream was fed into the ZstdDecoder.

34 # See: https://github.com/urllib3/urllib3/pull/2624

35 _zstd_version = _zstd_version = tuple(

36 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]

37 )

38 if _zstd_version < (0, 18): # Defensive:

39 zstd = None

41except (AttributeError, ImportError, ValueError): # Defensive:

42 zstd = None

44from . import util

45from ._base_connection import _TYPE_BODY

46from ._collections import HTTPHeaderDict

47from .connection import BaseSSLError, HTTPConnection, HTTPException

48from .exceptions import (

49 BodyNotHttplibCompatible,

50 DecodeError,

51 HTTPError,

52 IncompleteRead,

53 InvalidChunkLength,

54 InvalidHeader,

55 ProtocolError,

56 ReadTimeoutError,

57 ResponseNotChunked,

58 SSLError,

59)

60from .util.response import is_fp_closed, is_response_to_head

61from .util.retry import Retry

63if typing.TYPE_CHECKING:

64 from typing import Literal

66 from .connectionpool import HTTPConnectionPool

68log = logging.getLogger(__name__)

71class ContentDecoder:

72 def decompress(self, data: bytes) -> bytes:

73 raise NotImplementedError()

75 def flush(self) -> bytes:

76 raise NotImplementedError()

79class DeflateDecoder(ContentDecoder):

80 def __init__(self) -> None:

81 self._first_try = True

82 self._data = b""

83 self._obj = zlib.decompressobj()

85 def decompress(self, data: bytes) -> bytes:

86 if not data:

87 return data

89 if not self._first_try:

90 return self._obj.decompress(data)

92 self._data += data

93 try:

94 decompressed = self._obj.decompress(data)

95 if decompressed:

96 self._first_try = False

97 self._data = None # type: ignore[assignment]

98 return decompressed

99 except zlib.error:

100 self._first_try = False

101 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)

102 try:

103 return self.decompress(self._data)

104 finally:

105 self._data = None # type: ignore[assignment]

106

107 def flush(self) -> bytes:

108 return self._obj.flush()

109

110

111class GzipDecoderState:

112 FIRST_MEMBER = 0

113 OTHER_MEMBERS = 1

114 SWALLOW_DATA = 2

115

116

117class GzipDecoder(ContentDecoder):

118 def __init__(self) -> None:

119 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

120 self._state = GzipDecoderState.FIRST_MEMBER

121

122 def decompress(self, data: bytes) -> bytes:

123 ret = bytearray()

124 if self._state == GzipDecoderState.SWALLOW_DATA or not data:

125 return bytes(ret)

126 while True:

127 try:

128 ret += self._obj.decompress(data)

129 except zlib.error:

130 previous_state = self._state

131 # Ignore data after the first error

132 self._state = GzipDecoderState.SWALLOW_DATA

133 if previous_state == GzipDecoderState.OTHER_MEMBERS:

134 # Allow trailing garbage acceptable in other gzip clients

135 return bytes(ret)

136 raise

137 data = self._obj.unused_data

138 if not data:

139 return bytes(ret)

140 self._state = GzipDecoderState.OTHER_MEMBERS

141 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

142

143 def flush(self) -> bytes:

144 return self._obj.flush()

145

146

147if brotli is not None:

148

149 class BrotliDecoder(ContentDecoder):

150 # Supports both 'brotlipy' and 'Brotli' packages

151 # since they share an import name. The top branches

152 # are for 'brotlipy' and bottom branches for 'Brotli'

153 def __init__(self) -> None:

154 self._obj = brotli.Decompressor()

155 if hasattr(self._obj, "decompress"):

156 setattr(self, "decompress", self._obj.decompress)

157 else:

158 setattr(self, "decompress", self._obj.process)

159

160 def flush(self) -> bytes:

161 if hasattr(self._obj, "flush"):

162 return self._obj.flush() # type: ignore[no-any-return]

163 return b""

164

165

166if zstd is not None:

167

168 class ZstdDecoder(ContentDecoder):

169 def __init__(self) -> None:

170 self._obj = zstd.ZstdDecompressor().decompressobj()

171

172 def decompress(self, data: bytes) -> bytes:

173 if not data:

174 return b""

175 data_parts = [self._obj.decompress(data)]

176 while self._obj.eof and self._obj.unused_data:

177 unused_data = self._obj.unused_data

178 self._obj = zstd.ZstdDecompressor().decompressobj()

179 data_parts.append(self._obj.decompress(unused_data))

180 return b"".join(data_parts)

181

182 def flush(self) -> bytes:

183 ret = self._obj.flush() # note: this is a no-op

184 if not self._obj.eof:

185 raise DecodeError("Zstandard data is incomplete")

186 return ret # type: ignore[no-any-return]

187

188

189class MultiDecoder(ContentDecoder):

190 """

191 From RFC7231:

192 If one or more encodings have been applied to a representation, the

193 sender that applied the encodings MUST generate a Content-Encoding

194 header field that lists the content codings in the order in which

195 they were applied.

196 """

197

198 def __init__(self, modes: str) -> None:

199 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]

200

201 def flush(self) -> bytes:

202 return self._decoders[0].flush()

203

204 def decompress(self, data: bytes) -> bytes:

205 for d in reversed(self._decoders):

206 data = d.decompress(data)

207 return data

208

209

210def _get_decoder(mode: str) -> ContentDecoder:

211 if "," in mode:

212 return MultiDecoder(mode)

213

214 # According to RFC 9110 section 8.4.1.3, recipients should

215 # consider x-gzip equivalent to gzip

216 if mode in ("gzip", "x-gzip"):

217 return GzipDecoder()

218

219 if brotli is not None and mode == "br":

220 return BrotliDecoder()

221

222 if zstd is not None and mode == "zstd":

223 return ZstdDecoder()

224

225 return DeflateDecoder()

226

227

228class BytesQueueBuffer:

229 """Memory-efficient bytes buffer

230

231 To return decoded data in read() and still follow the BufferedIOBase API, we need a

232 buffer to always return the correct amount of bytes.

233

234 This buffer should be filled using calls to put()

235

236 Our maximum memory usage is determined by the sum of the size of:

237

238 * self.buffer, which contains the full data

239 * the largest chunk that we will copy in get()

240

241 The worst case scenario is a single chunk, in which case we'll make a full copy of

242 the data inside get().

243 """

244

245 def __init__(self) -> None:

246 self.buffer: typing.Deque[bytes] = collections.deque()

247 self._size: int = 0

248

249 def __len__(self) -> int:

250 return self._size

251

252 def put(self, data: bytes) -> None:

253 self.buffer.append(data)

254 self._size += len(data)

255

256 def get(self, n: int) -> bytes:

257 if n == 0:

258 return b""

259 elif not self.buffer:

260 raise RuntimeError("buffer is empty")

261 elif n < 0:

262 raise ValueError("n should be > 0")

263

264 fetched = 0

265 ret = io.BytesIO()

266 while fetched < n:

267 remaining = n - fetched

268 chunk = self.buffer.popleft()

269 chunk_length = len(chunk)

270 if remaining < chunk_length:

271 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]

272 ret.write(left_chunk)

273 self.buffer.appendleft(right_chunk)

274 self._size -= remaining

275 break

276 else:

277 ret.write(chunk)

278 self._size -= chunk_length

279 fetched += chunk_length

280

281 if not self.buffer:

282 break

283

284 return ret.getvalue()

285

286 def get_all(self) -> bytes:

287 buffer = self.buffer

288 if not buffer:

289 assert self._size == 0

290 return b""

291 if len(buffer) == 1:

292 result = buffer.pop()

293 else:

294 ret = io.BytesIO()

295 ret.writelines(buffer.popleft() for _ in range(len(buffer)))

296 result = ret.getvalue()

297 self._size = 0

298 return result

299

300

301class BaseHTTPResponse(io.IOBase):

302 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]

303 if brotli is not None:

304 CONTENT_DECODERS += ["br"]

305 if zstd is not None:

306 CONTENT_DECODERS += ["zstd"]

307 REDIRECT_STATUSES = [301, 302, 303, 307, 308]

308

309 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)

310 if brotli is not None:

311 DECODER_ERROR_CLASSES += (brotli.error,)

312

313 if zstd is not None:

314 DECODER_ERROR_CLASSES += (zstd.ZstdError,)

315

316 def __init__(

317 self,

318 *,

319 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

320 status: int,

321 version: int,

322 reason: str | None,

323 decode_content: bool,

324 request_url: str | None,

325 retries: Retry | None = None,

326 ) -> None:

327 if isinstance(headers, HTTPHeaderDict):

328 self.headers = headers

329 else:

330 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]

331 self.status = status

332 self.version = version

333 self.reason = reason

334 self.decode_content = decode_content

335 self._has_decoded_content = False

336 self._request_url: str | None = request_url

337 self.retries = retries

338

339 self.chunked = False

340 tr_enc = self.headers.get("transfer-encoding", "").lower()

341 # Don't incur the penalty of creating a list and then discarding it

342 encodings = (enc.strip() for enc in tr_enc.split(","))

343 if "chunked" in encodings:

344 self.chunked = True

345

346 self._decoder: ContentDecoder | None = None

347 self.length_remaining: int | None

348

349 def get_redirect_location(self) -> str | None | Literal[False]:

350 """

351 Should we redirect and where to?

352

353 :returns: Truthy redirect location string if we got a redirect status

354 code and valid location. ``None`` if redirect status and no

355 location. ``False`` if not a redirect status code.

356 """

357 if self.status in self.REDIRECT_STATUSES:

358 return self.headers.get("location")

359 return False

360

361 @property

362 def data(self) -> bytes:

363 raise NotImplementedError()

364

365 def json(self) -> typing.Any:

366 """

367 Parses the body of the HTTP response as JSON.

368

369 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to the decoder.

370

371 This method can raise either `UnicodeDecodeError` or `json.JSONDecodeError`.

372

373 Read more :ref:`here <json>`.

374 """

375 data = self.data.decode("utf-8")

376 return _json.loads(data)

377

378 @property

379 def url(self) -> str | None:

380 raise NotImplementedError()

381

382 @url.setter

383 def url(self, url: str | None) -> None:

384 raise NotImplementedError()

385

386 @property

387 def connection(self) -> BaseHTTPConnection | None:

388 raise NotImplementedError()

389

390 @property

391 def retries(self) -> Retry | None:

392 return self._retries

393

394 @retries.setter

395 def retries(self, retries: Retry | None) -> None:

396 # Override the request_url if retries has a redirect location.

397 if retries is not None and retries.history:

398 self.url = retries.history[-1].redirect_location

399 self._retries = retries

400

401 def stream(

402 self, amt: int | None = 2**16, decode_content: bool | None = None

403 ) -> typing.Iterator[bytes]:

404 raise NotImplementedError()

405

406 def read(

407 self,

408 amt: int | None = None,

409 decode_content: bool | None = None,

410 cache_content: bool = False,

411 ) -> bytes:

412 raise NotImplementedError()

413

414 def read1(

415 self,

416 amt: int | None = None,

417 decode_content: bool | None = None,

418 ) -> bytes:

419 raise NotImplementedError()

420

421 def read_chunked(

422 self,

423 amt: int | None = None,

424 decode_content: bool | None = None,

425 ) -> typing.Iterator[bytes]:

426 raise NotImplementedError()

427

428 def release_conn(self) -> None:

429 raise NotImplementedError()

430

431 def drain_conn(self) -> None:

432 raise NotImplementedError()

433

434 def close(self) -> None:

435 raise NotImplementedError()

436

437 def _init_decoder(self) -> None:

438 """

439 Set-up the _decoder attribute if necessary.

440 """

441 # Note: content-encoding value should be case-insensitive, per RFC 7230

442 # Section 3.2

443 content_encoding = self.headers.get("content-encoding", "").lower()

444 if self._decoder is None:

445 if content_encoding in self.CONTENT_DECODERS:

446 self._decoder = _get_decoder(content_encoding)

447 elif "," in content_encoding:

448 encodings = [

449 e.strip()

450 for e in content_encoding.split(",")

451 if e.strip() in self.CONTENT_DECODERS

452 ]

453 if encodings:

454 self._decoder = _get_decoder(content_encoding)

455

456 def _decode(

457 self, data: bytes, decode_content: bool | None, flush_decoder: bool

458 ) -> bytes:

459 """

460 Decode the data passed in and potentially flush the decoder.

461 """

462 if not decode_content:

463 if self._has_decoded_content:

464 raise RuntimeError(

465 "Calling read(decode_content=False) is not supported after "

466 "read(decode_content=True) was called."

467 )

468 return data

469

470 try:

471 if self._decoder:

472 data = self._decoder.decompress(data)

473 self._has_decoded_content = True

474 except self.DECODER_ERROR_CLASSES as e:

475 content_encoding = self.headers.get("content-encoding", "").lower()

476 raise DecodeError(

477 "Received response with content-encoding: %s, but "

478 "failed to decode it." % content_encoding,

479 e,

480 ) from e

481 if flush_decoder:

482 data += self._flush_decoder()

483

484 return data

485

486 def _flush_decoder(self) -> bytes:

487 """

488 Flushes the decoder. Should only be called if the decoder is actually

489 being used.

490 """

491 if self._decoder:

492 return self._decoder.decompress(b"") + self._decoder.flush()

493 return b""

494

495 # Compatibility methods for `io` module

496 def readinto(self, b: bytearray) -> int:

497 temp = self.read(len(b))

498 if len(temp) == 0:

499 return 0

500 else:

501 b[: len(temp)] = temp

502 return len(temp)

503

504 # Compatibility methods for http.client.HTTPResponse

505 def getheaders(self) -> HTTPHeaderDict:

506 warnings.warn(

507 "HTTPResponse.getheaders() is deprecated and will be removed "

508 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",

509 category=DeprecationWarning,

510 stacklevel=2,

511 )

512 return self.headers

513

514 def getheader(self, name: str, default: str | None = None) -> str | None:

515 warnings.warn(

516 "HTTPResponse.getheader() is deprecated and will be removed "

517 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",

518 category=DeprecationWarning,

519 stacklevel=2,

520 )

521 return self.headers.get(name, default)

522

523 # Compatibility method for http.cookiejar

524 def info(self) -> HTTPHeaderDict:

525 return self.headers

526

527 def geturl(self) -> str | None:

528 return self.url

529

530

531class HTTPResponse(BaseHTTPResponse):

532 """

533 HTTP Response container.

534

535 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is

536 loaded and decoded on-demand when the ``data`` property is accessed. This

537 class is also compatible with the Python standard library's :mod:`io`

538 module, and can hence be treated as a readable object in the context of that

539 framework.

540

541 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:

542

543 :param preload_content:

544 If True, the response's body will be preloaded during construction.

545

546 :param decode_content:

547 If True, will attempt to decode the body based on the

548 'content-encoding' header.

549

550 :param original_response:

551 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`

552 object, it's convenient to include the original for debug purposes. It's

553 otherwise unused.

554

555 :param retries:

556 The retries contains the last :class:`~urllib3.util.retry.Retry` that

557 was used during the request.

558

559 :param enforce_content_length:

560 Enforce content length checking. Body returned by server must match

561 value of Content-Length header, if present. Otherwise, raise error.

562 """

563

564 def __init__(

565 self,

566 body: _TYPE_BODY = "",

567 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

568 status: int = 0,

569 version: int = 0,

570 reason: str | None = None,

571 preload_content: bool = True,

572 decode_content: bool = True,

573 original_response: _HttplibHTTPResponse | None = None,

574 pool: HTTPConnectionPool | None = None,

575 connection: HTTPConnection | None = None,

576 msg: _HttplibHTTPMessage | None = None,

577 retries: Retry | None = None,

578 enforce_content_length: bool = True,

579 request_method: str | None = None,

580 request_url: str | None = None,

581 auto_close: bool = True,

582 ) -> None:

583 super().__init__(

584 headers=headers,

585 status=status,

586 version=version,

587 reason=reason,

588 decode_content=decode_content,

589 request_url=request_url,

590 retries=retries,

591 )

592

593 self.enforce_content_length = enforce_content_length

594 self.auto_close = auto_close

595

596 self._body = None

597 self._fp: _HttplibHTTPResponse | None = None

598 self._original_response = original_response

599 self._fp_bytes_read = 0

600 self.msg = msg

601

602 if body and isinstance(body, (str, bytes)):

603 self._body = body

604

605 self._pool = pool

606 self._connection = connection

607

608 if hasattr(body, "read"):

609 self._fp = body # type: ignore[assignment]

610

611 # Are we using the chunked-style of transfer encoding?

612 self.chunk_left: int | None = None

613

614 # Determine length of response

615 self.length_remaining = self._init_length(request_method)

616

617 # Used to return the correct amount of bytes for partial read()s

618 self._decoded_buffer = BytesQueueBuffer()

619

620 # If requested, preload the body.

621 if preload_content and not self._body:

622 self._body = self.read(decode_content=decode_content)

623

624 def release_conn(self) -> None:

625 if not self._pool or not self._connection:

626 return None

627

628 self._pool._put_conn(self._connection)

629 self._connection = None

630

631 def drain_conn(self) -> None:

632 """

633 Read and discard any remaining HTTP response data in the response connection.

634

635 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.

636 """

637 try:

638 self.read()

639 except (HTTPError, OSError, BaseSSLError, HTTPException):

640 pass

641

642 @property

643 def data(self) -> bytes:

644 # For backwards-compat with earlier urllib3 0.4 and earlier.

645 if self._body:

646 return self._body # type: ignore[return-value]

647

648 if self._fp:

649 return self.read(cache_content=True)

650

651 return None # type: ignore[return-value]

652

653 @property

654 def connection(self) -> HTTPConnection | None:

655 return self._connection

656

657 def isclosed(self) -> bool:

658 return is_fp_closed(self._fp)

659

660 def tell(self) -> int:

661 """

662 Obtain the number of bytes pulled over the wire so far. May differ from

663 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``

664 if bytes are encoded on the wire (e.g, compressed).

665 """

666 return self._fp_bytes_read

667

668 def _init_length(self, request_method: str | None) -> int | None:

669 """

670 Set initial length value for Response content if available.

671 """

672 length: int | None

673 content_length: str | None = self.headers.get("content-length")

674

675 if content_length is not None:

676 if self.chunked:

677 # This Response will fail with an IncompleteRead if it can't be

678 # received as chunked. This method falls back to attempt reading

679 # the response before raising an exception.

680 log.warning(

681 "Received response with both Content-Length and "

682 "Transfer-Encoding set. This is expressly forbidden "

683 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "

684 "attempting to process response as Transfer-Encoding: "

685 "chunked."

686 )

687 return None

688

689 try:

690 # RFC 7230 section 3.3.2 specifies multiple content lengths can

691 # be sent in a single Content-Length header

692 # (e.g. Content-Length: 42, 42). This line ensures the values

693 # are all valid ints and that as long as the `set` length is 1,

694 # all values are the same. Otherwise, the header is invalid.

695 lengths = {int(val) for val in content_length.split(",")}

696 if len(lengths) > 1:

697 raise InvalidHeader(

698 "Content-Length contained multiple "

699 "unmatching values (%s)" % content_length

700 )

701 length = lengths.pop()

702 except ValueError:

703 length = None

704 else:

705 if length < 0:

706 length = None

707

708 else: # if content_length is None

709 length = None

710

711 # Convert status to int for comparison

712 # In some cases, httplib returns a status of "_UNKNOWN"

713 try:

714 status = int(self.status)

715 except ValueError:

716 status = 0

717

718 # Check for responses that shouldn't include a body

719 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":

720 length = 0

721

722 return length

723

724 @contextmanager

725 def _error_catcher(self) -> typing.Generator[None, None, None]:

726 """

727 Catch low-level python exceptions, instead re-raising urllib3

728 variants, so that low-level exceptions are not leaked in the

729 high-level api.

730

731 On exit, release the connection back to the pool.

732 """

733 clean_exit = False

734

735 try:

736 try:

737 yield

738

739 except SocketTimeout as e:

740 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but

741 # there is yet no clean way to get at it from this context.

742 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

743

744 except BaseSSLError as e:

745 # FIXME: Is there a better way to differentiate between SSLErrors?

746 if "read operation timed out" not in str(e):

747 # SSL errors related to framing/MAC get wrapped and reraised here

748 raise SSLError(e) from e

749

750 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

751

752 except IncompleteRead as e:

753 if (

754 e.expected is not None

755 and e.partial is not None

756 and e.expected == -e.partial

757 ):

758 arg = "Response may not contain content."

759 else:

760 arg = f"Connection broken: {e!r}"

761 raise ProtocolError(arg, e) from e

762

763 except (HTTPException, OSError) as e:

764 raise ProtocolError(f"Connection broken: {e!r}", e) from e

765

766 # If no exception is thrown, we should avoid cleaning up

767 # unnecessarily.

768 clean_exit = True

769 finally:

770 # If we didn't terminate cleanly, we need to throw away our

771 # connection.

772 if not clean_exit:

773 # The response may not be closed but we're not going to use it

774 # anymore so close it now to ensure that the connection is

775 # released back to the pool.

776 if self._original_response:

777 self._original_response.close()

778

779 # Closing the response may not actually be sufficient to close

780 # everything, so if we have a hold of the connection close that

781 # too.

782 if self._connection:

783 self._connection.close()

784

785 # If we hold the original response but it's closed now, we should

786 # return the connection back to the pool.

787 if self._original_response and self._original_response.isclosed():

788 self.release_conn()

789

790 def _fp_read(

791 self,

792 amt: int | None = None,

793 *,

794 read1: bool = False,

795 ) -> bytes:

796 """

797 Read a response with the thought that reading the number of bytes

798 larger than can fit in a 32-bit int at a time via SSL in some

799 known cases leads to an overflow error that has to be prevented

800 if `amt` or `self.length_remaining` indicate that a problem may

801 happen.

802

803 The known cases:

804 * 3.8 <= CPython < 3.9.7 because of a bug

805 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.

806 * urllib3 injected with pyOpenSSL-backed SSL-support.

807 * CPython < 3.10 only when `amt` does not fit 32-bit int.

808 """

809 assert self._fp

810 c_int_max = 2**31 - 1

811 if (

812 (amt and amt > c_int_max)

813 or (

814 amt is None

815 and self.length_remaining

816 and self.length_remaining > c_int_max

817 )

818 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):

819 if read1:

820 return self._fp.read1(c_int_max)

821 buffer = io.BytesIO()

822 # Besides `max_chunk_amt` being a maximum chunk size, it

823 # affects memory overhead of reading a response by this

824 # method in CPython.

825 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum

826 # chunk size that does not lead to an overflow error, but

827 # 256 MiB is a compromise.

828 max_chunk_amt = 2**28

829 while amt is None or amt != 0:

830 if amt is not None:

831 chunk_amt = min(amt, max_chunk_amt)

832 amt -= chunk_amt

833 else:

834 chunk_amt = max_chunk_amt

835 data = self._fp.read(chunk_amt)

836 if not data:

837 break

838 buffer.write(data)

839 del data # to reduce peak memory usage by `max_chunk_amt`.

840 return buffer.getvalue()

841 elif read1:

842 return self._fp.read1(amt) if amt is not None else self._fp.read1()

843 else:

844 # StringIO doesn't like amt=None

845 return self._fp.read(amt) if amt is not None else self._fp.read()

846

847 def _raw_read(

848 self,

849 amt: int | None = None,

850 *,

851 read1: bool = False,

852 ) -> bytes:

853 """

854 Reads `amt` of bytes from the socket.

855 """

856 if self._fp is None:

857 return None # type: ignore[return-value]

858

859 fp_closed = getattr(self._fp, "closed", False)

860

861 with self._error_catcher():

862 data = self._fp_read(amt, read1=read1) if not fp_closed else b""

863 if amt is not None and amt != 0 and not data:

864 # Platform-specific: Buggy versions of Python.

865 # Close the connection when no data is returned

866 #

867 # This is redundant to what httplib/http.client _should_

868 # already do. However, versions of python released before

869 # December 15, 2012 (http://bugs.python.org/issue16298) do

870 # not properly close the connection in all cases. There is

871 # no harm in redundantly calling close.

872 self._fp.close()

873 if (

874 self.enforce_content_length

875 and self.length_remaining is not None

876 and self.length_remaining != 0

877 ):

878 # This is an edge case that httplib failed to cover due

879 # to concerns of backward compatibility. We're

880 # addressing it here to make sure IncompleteRead is

881 # raised during streaming, so all calls with incorrect

882 # Content-Length are caught.

883 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)

884 elif read1 and (

885 (amt != 0 and not data) or self.length_remaining == len(data)

886 ):

887 # All data has been read, but `self._fp.read1` in

888 # CPython 3.12 and older doesn't always close

889 # `http.client.HTTPResponse`, so we close it here.

890 # See https://github.com/python/cpython/issues/113199

891 self._fp.close()

892

893 if data:

894 self._fp_bytes_read += len(data)

895 if self.length_remaining is not None:

896 self.length_remaining -= len(data)

897 return data

898

899 def read(

900 self,

901 amt: int | None = None,

902 decode_content: bool | None = None,

903 cache_content: bool = False,

904 ) -> bytes:

905 """

906 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional

907 parameters: ``decode_content`` and ``cache_content``.

908

909 :param amt:

910 How much of the content to read. If specified, caching is skipped

911 because it doesn't make sense to cache partial content as the full

912 response.

913

914 :param decode_content:

915 If True, will attempt to decode the body based on the

916 'content-encoding' header.

917

918 :param cache_content:

919 If True, will save the returned data such that the same result is

920 returned despite of the state of the underlying file object. This

921 is useful if you want the ``.data`` property to continue working

922 after having ``.read()`` the file object. (Overridden if ``amt`` is

923 set.)

924 """

925 self._init_decoder()

926 if decode_content is None:

927 decode_content = self.decode_content

928

929 if amt is not None:

930 cache_content = False

931

932 if len(self._decoded_buffer) >= amt:

933 return self._decoded_buffer.get(amt)

934

935 data = self._raw_read(amt)

936

937 flush_decoder = amt is None or (amt != 0 and not data)

938

939 if not data and len(self._decoded_buffer) == 0:

940 return data

941

942 if amt is None:

943 data = self._decode(data, decode_content, flush_decoder)

944 if cache_content:

945 self._body = data

946 else:

947 # do not waste memory on buffer when not decoding

948 if not decode_content:

949 if self._has_decoded_content:

950 raise RuntimeError(

951 "Calling read(decode_content=False) is not supported after "

952 "read(decode_content=True) was called."

953 )

954 return data

955

956 decoded_data = self._decode(data, decode_content, flush_decoder)

957 self._decoded_buffer.put(decoded_data)

958

959 while len(self._decoded_buffer) < amt and data:

960 # TODO make sure to initially read enough data to get past the headers

961 # For example, the GZ file header takes 10 bytes, we don't want to read

962 # it one byte at a time

963 data = self._raw_read(amt)

964 decoded_data = self._decode(data, decode_content, flush_decoder)

965 self._decoded_buffer.put(decoded_data)

966 data = self._decoded_buffer.get(amt)

967

968 return data

969

970 def read1(

971 self,

972 amt: int | None = None,

973 decode_content: bool | None = None,

974 ) -> bytes:

975 """

976 Similar to ``http.client.HTTPResponse.read1`` and documented

977 in :meth:`io.BufferedReader.read1`, but with an additional parameter:

978 ``decode_content``.

979

980 :param amt:

981 How much of the content to read.

982

983 :param decode_content:

984 If True, will attempt to decode the body based on the

985 'content-encoding' header.

986 """

987 if decode_content is None:

988 decode_content = self.decode_content

989 # try and respond without going to the network

990 if self._has_decoded_content:

991 if not decode_content:

992 raise RuntimeError(

993 "Calling read1(decode_content=False) is not supported after "

994 "read1(decode_content=True) was called."

995 )

996 if len(self._decoded_buffer) > 0:

997 if amt is None:

998 return self._decoded_buffer.get_all()

999 return self._decoded_buffer.get(amt)

1000 if amt == 0:

1001 return b""

1002

1003 # FIXME, this method's type doesn't say returning None is possible

1004 data = self._raw_read(amt, read1=True)

1005 if not decode_content or data is None:

1006 return data

1007

1008 self._init_decoder()

1009 while True:

1010 flush_decoder = not data

1011 decoded_data = self._decode(data, decode_content, flush_decoder)

1012 self._decoded_buffer.put(decoded_data)

1013 if decoded_data or flush_decoder:

1014 break

1015 data = self._raw_read(8192, read1=True)

1016

1017 if amt is None:

1018 return self._decoded_buffer.get_all()

1019 return self._decoded_buffer.get(amt)

1020

1021 def stream(

1022 self, amt: int | None = 2**16, decode_content: bool | None = None

1023 ) -> typing.Generator[bytes, None, None]:

1024 """

1025 A generator wrapper for the read() method. A call will block until

1026 ``amt`` bytes have been read from the connection or until the

1027 connection is closed.

1028

1029 :param amt:

1030 How much of the content to read. The generator will return up to

1031 much data per iteration, but may return less. This is particularly

1032 likely when using compressed data. However, the empty string will

1033 never be returned.

1034

1035 :param decode_content:

1036 If True, will attempt to decode the body based on the

1037 'content-encoding' header.

1038 """

1039 if self.chunked and self.supports_chunked_reads():

1040 yield from self.read_chunked(amt, decode_content=decode_content)

1041 else:

1042 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:

1043 data = self.read(amt=amt, decode_content=decode_content)

1044

1045 if data:

1046 yield data

1047

1048 # Overrides from io.IOBase

1049 def readable(self) -> bool:

1050 return True

1051

1052 def close(self) -> None:

1053 if not self.closed and self._fp:

1054 self._fp.close()

1055

1056 if self._connection:

1057 self._connection.close()

1058

1059 if not self.auto_close:

1060 io.IOBase.close(self)

1061

1062 @property

1063 def closed(self) -> bool:

1064 if not self.auto_close:

1065 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]

1066 elif self._fp is None:

1067 return True

1068 elif hasattr(self._fp, "isclosed"):

1069 return self._fp.isclosed()

1070 elif hasattr(self._fp, "closed"):

1071 return self._fp.closed

1072 else:

1073 return True

1074

1075 def fileno(self) -> int:

1076 if self._fp is None:

1077 raise OSError("HTTPResponse has no file to get a fileno from")

1078 elif hasattr(self._fp, "fileno"):

1079 return self._fp.fileno()

1080 else:

1081 raise OSError(

1082 "The file-like object this HTTPResponse is wrapped "

1083 "around has no file descriptor"

1084 )

1085

1086 def flush(self) -> None:

1087 if (

1088 self._fp is not None

1089 and hasattr(self._fp, "flush")

1090 and not getattr(self._fp, "closed", False)

1091 ):

1092 return self._fp.flush()

1093

1094 def supports_chunked_reads(self) -> bool:

1095 """

1096 Checks if the underlying file-like object looks like a

1097 :class:`http.client.HTTPResponse` object. We do this by testing for

1098 the fp attribute. If it is present we assume it returns raw chunks as

1099 processed by read_chunked().

1100 """

1101 return hasattr(self._fp, "fp")

1102

1103 def _update_chunk_length(self) -> None:

1104 # First, we'll figure out length of a chunk and then

1105 # we'll try to read it from socket.

1106 if self.chunk_left is not None:

1107 return None

1108 line = self._fp.fp.readline() # type: ignore[union-attr]

1109 line = line.split(b";", 1)[0]

1110 try:

1111 self.chunk_left = int(line, 16)

1112 except ValueError:

1113 self.close()

1114 if line:

1115 # Invalid chunked protocol response, abort.

1116 raise InvalidChunkLength(self, line) from None

1117 else:

1118 # Truncated at start of next chunk

1119 raise ProtocolError("Response ended prematurely") from None

1120

1121 def _handle_chunk(self, amt: int | None) -> bytes:

1122 returned_chunk = None

1123 if amt is None:

1124 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1125 returned_chunk = chunk

1126 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1127 self.chunk_left = None

1128 elif self.chunk_left is not None and amt < self.chunk_left:

1129 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1130 self.chunk_left = self.chunk_left - amt

1131 returned_chunk = value

1132 elif amt == self.chunk_left:

1133 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1134 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1135 self.chunk_left = None

1136 returned_chunk = value

1137 else: # amt > self.chunk_left

1138 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1139 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1140 self.chunk_left = None

1141 return returned_chunk # type: ignore[no-any-return]

1142

1143 def read_chunked(

1144 self, amt: int | None = None, decode_content: bool | None = None

1145 ) -> typing.Generator[bytes, None, None]:

1146 """

1147 Similar to :meth:`HTTPResponse.read`, but with an additional

1148 parameter: ``decode_content``.

1149

1150 :param amt:

1151 How much of the content to read. If specified, caching is skipped

1152 because it doesn't make sense to cache partial content as the full

1153 response.

1154

1155 :param decode_content:

1156 If True, will attempt to decode the body based on the

1157 'content-encoding' header.

1158 """

1159 self._init_decoder()

1160 # FIXME: Rewrite this method and make it a class with a better structured logic.

1161 if not self.chunked:

1162 raise ResponseNotChunked(

1163 "Response is not chunked. "

1164 "Header 'transfer-encoding: chunked' is missing."

1165 )

1166 if not self.supports_chunked_reads():

1167 raise BodyNotHttplibCompatible(

1168 "Body should be http.client.HTTPResponse like. "

1169 "It should have have an fp attribute which returns raw chunks."

1170 )

1171

1172 with self._error_catcher():

1173 # Don't bother reading the body of a HEAD request.

1174 if self._original_response and is_response_to_head(self._original_response):

1175 self._original_response.close()

1176 return None

1177

1178 # If a response is already read and closed

1179 # then return immediately.

1180 if self._fp.fp is None: # type: ignore[union-attr]

1181 return None

1182

1183 while True:

1184 self._update_chunk_length()

1185 if self.chunk_left == 0:

1186 break

1187 chunk = self._handle_chunk(amt)

1188 decoded = self._decode(

1189 chunk, decode_content=decode_content, flush_decoder=False

1190 )

1191 if decoded:

1192 yield decoded

1193

1194 if decode_content:

1195 # On CPython and PyPy, we should never need to flush the

1196 # decoder. However, on Jython we *might* need to, so

1197 # lets defensively do it anyway.

1198 decoded = self._flush_decoder()

1199 if decoded: # Platform-specific: Jython.

1200 yield decoded

1201

1202 # Chunk content ends with \r\n: discard it.

1203 while self._fp is not None:

1204 line = self._fp.fp.readline()

1205 if not line:

1206 # Some sites may not end with '\r\n'.

1207 break

1208 if line == b"\r\n":

1209 break

1210

1211 # We read everything; close the "file".

1212 if self._original_response:

1213 self._original_response.close()

1214

1215 @property

1216 def url(self) -> str | None:

1217 """

1218 Returns the URL that was the source of this response.

1219 If the request that generated this response redirected, this method

1220 will return the final redirect location.

1221 """

1222 return self._request_url

1223

1224 @url.setter

1225 def url(self, url: str) -> None:

1226 self._request_url = url

1227

1228 def __iter__(self) -> typing.Iterator[bytes]:

1229 buffer: list[bytes] = []

1230 for chunk in self.stream(decode_content=True):

1231 if b"\n" in chunk:

1232 chunks = chunk.split(b"\n")

1233 yield b"".join(buffer) + chunks[0] + b"\n"

1234 for x in chunks[1:-1]:

1235 yield x + b"\n"

1236 if chunks[-1]:

1237 buffer = [chunks[-1]]

1238 else:

1239 buffer = []

1240 else:

1241 buffer.append(chunk)

1242 if buffer:

1243 yield b"".join(buffer)