Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 23%

1from __future__ import annotations

3import collections

4import io

5import json as _json

6import logging

7import re

8import sys

9import typing

10import warnings

11import zlib

12from contextlib import contextmanager

13from http.client import HTTPMessage as _HttplibHTTPMessage

14from http.client import HTTPResponse as _HttplibHTTPResponse

15from socket import timeout as SocketTimeout

17try:

18 try:

19 import brotlicffi as brotli # type: ignore[import]

20 except ImportError:

21 import brotli # type: ignore[import]

22except ImportError:

23 brotli = None

25try:

26 import zstandard as zstd # type: ignore[import]

28 # The package 'zstandard' added the 'eof' property starting

29 # in v0.18.0 which we require to ensure a complete and

30 # valid zstd stream was fed into the ZstdDecoder.

31 # See: https://github.com/urllib3/urllib3/pull/2624

32 _zstd_version = _zstd_version = tuple(

33 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]

34 )

35 if _zstd_version < (0, 18): # Defensive:

36 zstd = None

38except (AttributeError, ImportError, ValueError): # Defensive:

39 zstd = None

41from . import util

42from ._base_connection import _TYPE_BODY

43from ._collections import HTTPHeaderDict

44from .connection import BaseSSLError, HTTPConnection, HTTPException

45from .exceptions import (

46 BodyNotHttplibCompatible,

47 DecodeError,

48 HTTPError,

49 IncompleteRead,

50 InvalidChunkLength,

51 InvalidHeader,

52 ProtocolError,

53 ReadTimeoutError,

54 ResponseNotChunked,

55 SSLError,

56)

57from .util.response import is_fp_closed, is_response_to_head

58from .util.retry import Retry

60if typing.TYPE_CHECKING:

61 from typing_extensions import Literal

63 from .connectionpool import HTTPConnectionPool

65log = logging.getLogger(__name__)

68class ContentDecoder:

69 def decompress(self, data: bytes) -> bytes:

70 raise NotImplementedError()

72 def flush(self) -> bytes:

73 raise NotImplementedError()

76class DeflateDecoder(ContentDecoder):

77 def __init__(self) -> None:

78 self._first_try = True

79 self._data = b""

80 self._obj = zlib.decompressobj()

82 def decompress(self, data: bytes) -> bytes:

83 if not data:

84 return data

86 if not self._first_try:

87 return self._obj.decompress(data)

89 self._data += data

90 try:

91 decompressed = self._obj.decompress(data)

92 if decompressed:

93 self._first_try = False

94 self._data = None # type: ignore[assignment]

95 return decompressed

96 except zlib.error:

97 self._first_try = False

98 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)

99 try:

100 return self.decompress(self._data)

101 finally:

102 self._data = None # type: ignore[assignment]

103

104 def flush(self) -> bytes:

105 return self._obj.flush()

106

107

108class GzipDecoderState:

109 FIRST_MEMBER = 0

110 OTHER_MEMBERS = 1

111 SWALLOW_DATA = 2

112

113

114class GzipDecoder(ContentDecoder):

115 def __init__(self) -> None:

116 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

117 self._state = GzipDecoderState.FIRST_MEMBER

118

119 def decompress(self, data: bytes) -> bytes:

120 ret = bytearray()

121 if self._state == GzipDecoderState.SWALLOW_DATA or not data:

122 return bytes(ret)

123 while True:

124 try:

125 ret += self._obj.decompress(data)

126 except zlib.error:

127 previous_state = self._state

128 # Ignore data after the first error

129 self._state = GzipDecoderState.SWALLOW_DATA

130 if previous_state == GzipDecoderState.OTHER_MEMBERS:

131 # Allow trailing garbage acceptable in other gzip clients

132 return bytes(ret)

133 raise

134 data = self._obj.unused_data

135 if not data:

136 return bytes(ret)

137 self._state = GzipDecoderState.OTHER_MEMBERS

138 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

139

140 def flush(self) -> bytes:

141 return self._obj.flush()

142

143

144if brotli is not None:

145

146 class BrotliDecoder(ContentDecoder):

147 # Supports both 'brotlipy' and 'Brotli' packages

148 # since they share an import name. The top branches

149 # are for 'brotlipy' and bottom branches for 'Brotli'

150 def __init__(self) -> None:

151 self._obj = brotli.Decompressor()

152 if hasattr(self._obj, "decompress"):

153 setattr(self, "decompress", self._obj.decompress)

154 else:

155 setattr(self, "decompress", self._obj.process)

156

157 def flush(self) -> bytes:

158 if hasattr(self._obj, "flush"):

159 return self._obj.flush() # type: ignore[no-any-return]

160 return b""

161

162

163if zstd is not None:

164

165 class ZstdDecoder(ContentDecoder):

166 def __init__(self) -> None:

167 self._obj = zstd.ZstdDecompressor().decompressobj()

168

169 def decompress(self, data: bytes) -> bytes:

170 if not data:

171 return b""

172 return self._obj.decompress(data) # type: ignore[no-any-return]

173

174 def flush(self) -> bytes:

175 ret = self._obj.flush()

176 if not self._obj.eof:

177 raise DecodeError("Zstandard data is incomplete")

178 return ret # type: ignore[no-any-return]

179

180

181class MultiDecoder(ContentDecoder):

182 """

183 From RFC7231:

184 If one or more encodings have been applied to a representation, the

185 sender that applied the encodings MUST generate a Content-Encoding

186 header field that lists the content codings in the order in which

187 they were applied.

188 """

189

190 def __init__(self, modes: str) -> None:

191 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]

192

193 def flush(self) -> bytes:

194 return self._decoders[0].flush()

195

196 def decompress(self, data: bytes) -> bytes:

197 for d in reversed(self._decoders):

198 data = d.decompress(data)

199 return data

200

201

202def _get_decoder(mode: str) -> ContentDecoder:

203 if "," in mode:

204 return MultiDecoder(mode)

205

206 if mode == "gzip":

207 return GzipDecoder()

208

209 if brotli is not None and mode == "br":

210 return BrotliDecoder()

211

212 if zstd is not None and mode == "zstd":

213 return ZstdDecoder()

214

215 return DeflateDecoder()

216

217

218class BytesQueueBuffer:

219 """Memory-efficient bytes buffer

220

221 To return decoded data in read() and still follow the BufferedIOBase API, we need a

222 buffer to always return the correct amount of bytes.

223

224 This buffer should be filled using calls to put()

225

226 Our maximum memory usage is determined by the sum of the size of:

227

228 * self.buffer, which contains the full data

229 * the largest chunk that we will copy in get()

230

231 The worst case scenario is a single chunk, in which case we'll make a full copy of

232 the data inside get().

233 """

234

235 def __init__(self) -> None:

236 self.buffer: typing.Deque[bytes] = collections.deque()

237 self._size: int = 0

238

239 def __len__(self) -> int:

240 return self._size

241

242 def put(self, data: bytes) -> None:

243 self.buffer.append(data)

244 self._size += len(data)

245

246 def get(self, n: int) -> bytes:

247 if n == 0:

248 return b""

249 elif not self.buffer:

250 raise RuntimeError("buffer is empty")

251 elif n < 0:

252 raise ValueError("n should be > 0")

253

254 fetched = 0

255 ret = io.BytesIO()

256 while fetched < n:

257 remaining = n - fetched

258 chunk = self.buffer.popleft()

259 chunk_length = len(chunk)

260 if remaining < chunk_length:

261 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]

262 ret.write(left_chunk)

263 self.buffer.appendleft(right_chunk)

264 self._size -= remaining

265 break

266 else:

267 ret.write(chunk)

268 self._size -= chunk_length

269 fetched += chunk_length

270

271 if not self.buffer:

272 break

273

274 return ret.getvalue()

275

276

277class BaseHTTPResponse(io.IOBase):

278 CONTENT_DECODERS = ["gzip", "deflate"]

279 if brotli is not None:

280 CONTENT_DECODERS += ["br"]

281 if zstd is not None:

282 CONTENT_DECODERS += ["zstd"]

283 REDIRECT_STATUSES = [301, 302, 303, 307, 308]

284

285 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)

286 if brotli is not None:

287 DECODER_ERROR_CLASSES += (brotli.error,)

288

289 if zstd is not None:

290 DECODER_ERROR_CLASSES += (zstd.ZstdError,)

291

292 def __init__(

293 self,

294 *,

295 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

296 status: int,

297 version: int,

298 reason: str | None,

299 decode_content: bool,

300 request_url: str | None,

301 retries: Retry | None = None,

302 ) -> None:

303 if isinstance(headers, HTTPHeaderDict):

304 self.headers = headers

305 else:

306 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]

307 self.status = status

308 self.version = version

309 self.reason = reason

310 self.decode_content = decode_content

311 self._has_decoded_content = False

312 self._request_url: str | None = request_url

313 self.retries = retries

314

315 self.chunked = False

316 tr_enc = self.headers.get("transfer-encoding", "").lower()

317 # Don't incur the penalty of creating a list and then discarding it

318 encodings = (enc.strip() for enc in tr_enc.split(","))

319 if "chunked" in encodings:

320 self.chunked = True

321

322 self._decoder: ContentDecoder | None = None

323

324 def get_redirect_location(self) -> str | None | Literal[False]:

325 """

326 Should we redirect and where to?

327

328 :returns: Truthy redirect location string if we got a redirect status

329 code and valid location. ``None`` if redirect status and no

330 location. ``False`` if not a redirect status code.

331 """

332 if self.status in self.REDIRECT_STATUSES:

333 return self.headers.get("location")

334 return False

335

336 @property

337 def data(self) -> bytes:

338 raise NotImplementedError()

339

340 def json(self) -> typing.Any:

341 """

342 Parses the body of the HTTP response as JSON.

343

344 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to the decoder.

345

346 This method can raise either `UnicodeDecodeError` or `json.JSONDecodeError`.

347

348 Read more :ref:`here <json>`.

349 """

350 data = self.data.decode("utf-8")

351 return _json.loads(data)

352

353 @property

354 def url(self) -> str | None:

355 raise NotImplementedError()

356

357 @url.setter

358 def url(self, url: str | None) -> None:

359 raise NotImplementedError()

360

361 @property

362 def connection(self) -> HTTPConnection | None:

363 raise NotImplementedError()

364

365 @property

366 def retries(self) -> Retry | None:

367 return self._retries

368

369 @retries.setter

370 def retries(self, retries: Retry | None) -> None:

371 # Override the request_url if retries has a redirect location.

372 if retries is not None and retries.history:

373 self.url = retries.history[-1].redirect_location

374 self._retries = retries

375

376 def stream(

377 self, amt: int | None = 2**16, decode_content: bool | None = None

378 ) -> typing.Iterator[bytes]:

379 raise NotImplementedError()

380

381 def read(

382 self,

383 amt: int | None = None,

384 decode_content: bool | None = None,

385 cache_content: bool = False,

386 ) -> bytes:

387 raise NotImplementedError()

388

389 def read_chunked(

390 self,

391 amt: int | None = None,

392 decode_content: bool | None = None,

393 ) -> typing.Iterator[bytes]:

394 raise NotImplementedError()

395

396 def release_conn(self) -> None:

397 raise NotImplementedError()

398

399 def drain_conn(self) -> None:

400 raise NotImplementedError()

401

402 def close(self) -> None:

403 raise NotImplementedError()

404

405 def _init_decoder(self) -> None:

406 """

407 Set-up the _decoder attribute if necessary.

408 """

409 # Note: content-encoding value should be case-insensitive, per RFC 7230

410 # Section 3.2

411 content_encoding = self.headers.get("content-encoding", "").lower()

412 if self._decoder is None:

413 if content_encoding in self.CONTENT_DECODERS:

414 self._decoder = _get_decoder(content_encoding)

415 elif "," in content_encoding:

416 encodings = [

417 e.strip()

418 for e in content_encoding.split(",")

419 if e.strip() in self.CONTENT_DECODERS

420 ]

421 if encodings:

422 self._decoder = _get_decoder(content_encoding)

423

424 def _decode(

425 self, data: bytes, decode_content: bool | None, flush_decoder: bool

426 ) -> bytes:

427 """

428 Decode the data passed in and potentially flush the decoder.

429 """

430 if not decode_content:

431 if self._has_decoded_content:

432 raise RuntimeError(

433 "Calling read(decode_content=False) is not supported after "

434 "read(decode_content=True) was called."

435 )

436 return data

437

438 try:

439 if self._decoder:

440 data = self._decoder.decompress(data)

441 self._has_decoded_content = True

442 except self.DECODER_ERROR_CLASSES as e:

443 content_encoding = self.headers.get("content-encoding", "").lower()

444 raise DecodeError(

445 "Received response with content-encoding: %s, but "

446 "failed to decode it." % content_encoding,

447 e,

448 ) from e

449 if flush_decoder:

450 data += self._flush_decoder()

451

452 return data

453

454 def _flush_decoder(self) -> bytes:

455 """

456 Flushes the decoder. Should only be called if the decoder is actually

457 being used.

458 """

459 if self._decoder:

460 return self._decoder.decompress(b"") + self._decoder.flush()

461 return b""

462

463 # Compatibility methods for `io` module

464 def readinto(self, b: bytearray) -> int:

465 temp = self.read(len(b))

466 if len(temp) == 0:

467 return 0

468 else:

469 b[: len(temp)] = temp

470 return len(temp)

471

472 # Compatibility methods for http.client.HTTPResponse

473 def getheaders(self) -> HTTPHeaderDict:

474 warnings.warn(

475 "HTTPResponse.getheaders() is deprecated and will be removed "

476 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",

477 category=DeprecationWarning,

478 stacklevel=2,

479 )

480 return self.headers

481

482 def getheader(self, name: str, default: str | None = None) -> str | None:

483 warnings.warn(

484 "HTTPResponse.getheader() is deprecated and will be removed "

485 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",

486 category=DeprecationWarning,

487 stacklevel=2,

488 )

489 return self.headers.get(name, default)

490

491 # Compatibility method for http.cookiejar

492 def info(self) -> HTTPHeaderDict:

493 return self.headers

494

495 def geturl(self) -> str | None:

496 return self.url

497

498

499class HTTPResponse(BaseHTTPResponse):

500 """

501 HTTP Response container.

502

503 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is

504 loaded and decoded on-demand when the ``data`` property is accessed. This

505 class is also compatible with the Python standard library's :mod:`io`

506 module, and can hence be treated as a readable object in the context of that

507 framework.

508

509 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:

510

511 :param preload_content:

512 If True, the response's body will be preloaded during construction.

513

514 :param decode_content:

515 If True, will attempt to decode the body based on the

516 'content-encoding' header.

517

518 :param original_response:

519 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`

520 object, it's convenient to include the original for debug purposes. It's

521 otherwise unused.

522

523 :param retries:

524 The retries contains the last :class:`~urllib3.util.retry.Retry` that

525 was used during the request.

526

527 :param enforce_content_length:

528 Enforce content length checking. Body returned by server must match

529 value of Content-Length header, if present. Otherwise, raise error.

530 """

531

532 def __init__(

533 self,

534 body: _TYPE_BODY = "",

535 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

536 status: int = 0,

537 version: int = 0,

538 reason: str | None = None,

539 preload_content: bool = True,

540 decode_content: bool = True,

541 original_response: _HttplibHTTPResponse | None = None,

542 pool: HTTPConnectionPool | None = None,

543 connection: HTTPConnection | None = None,

544 msg: _HttplibHTTPMessage | None = None,

545 retries: Retry | None = None,

546 enforce_content_length: bool = True,

547 request_method: str | None = None,

548 request_url: str | None = None,

549 auto_close: bool = True,

550 ) -> None:

551 super().__init__(

552 headers=headers,

553 status=status,

554 version=version,

555 reason=reason,

556 decode_content=decode_content,

557 request_url=request_url,

558 retries=retries,

559 )

560

561 self.enforce_content_length = enforce_content_length

562 self.auto_close = auto_close

563

564 self._body = None

565 self._fp: _HttplibHTTPResponse | None = None

566 self._original_response = original_response

567 self._fp_bytes_read = 0

568 self.msg = msg

569

570 if body and isinstance(body, (str, bytes)):

571 self._body = body

572

573 self._pool = pool

574 self._connection = connection

575

576 if hasattr(body, "read"):

577 self._fp = body # type: ignore[assignment]

578

579 # Are we using the chunked-style of transfer encoding?

580 self.chunk_left: int | None = None

581

582 # Determine length of response

583 self.length_remaining = self._init_length(request_method)

584

585 # Used to return the correct amount of bytes for partial read()s

586 self._decoded_buffer = BytesQueueBuffer()

587

588 # If requested, preload the body.

589 if preload_content and not self._body:

590 self._body = self.read(decode_content=decode_content)

591

592 def release_conn(self) -> None:

593 if not self._pool or not self._connection:

594 return None

595

596 self._pool._put_conn(self._connection)

597 self._connection = None

598

599 def drain_conn(self) -> None:

600 """

601 Read and discard any remaining HTTP response data in the response connection.

602

603 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.

604 """

605 try:

606 self.read()

607 except (HTTPError, OSError, BaseSSLError, HTTPException):

608 pass

609

610 @property

611 def data(self) -> bytes:

612 # For backwards-compat with earlier urllib3 0.4 and earlier.

613 if self._body:

614 return self._body # type: ignore[return-value]

615

616 if self._fp:

617 return self.read(cache_content=True)

618

619 return None # type: ignore[return-value]

620

621 @property

622 def connection(self) -> HTTPConnection | None:

623 return self._connection

624

625 def isclosed(self) -> bool:

626 return is_fp_closed(self._fp)

627

628 def tell(self) -> int:

629 """

630 Obtain the number of bytes pulled over the wire so far. May differ from

631 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``

632 if bytes are encoded on the wire (e.g, compressed).

633 """

634 return self._fp_bytes_read

635

636 def _init_length(self, request_method: str | None) -> int | None:

637 """

638 Set initial length value for Response content if available.

639 """

640 length: int | None

641 content_length: str | None = self.headers.get("content-length")

642

643 if content_length is not None:

644 if self.chunked:

645 # This Response will fail with an IncompleteRead if it can't be

646 # received as chunked. This method falls back to attempt reading

647 # the response before raising an exception.

648 log.warning(

649 "Received response with both Content-Length and "

650 "Transfer-Encoding set. This is expressly forbidden "

651 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "

652 "attempting to process response as Transfer-Encoding: "

653 "chunked."

654 )

655 return None

656

657 try:

658 # RFC 7230 section 3.3.2 specifies multiple content lengths can

659 # be sent in a single Content-Length header

660 # (e.g. Content-Length: 42, 42). This line ensures the values

661 # are all valid ints and that as long as the `set` length is 1,

662 # all values are the same. Otherwise, the header is invalid.

663 lengths = {int(val) for val in content_length.split(",")}

664 if len(lengths) > 1:

665 raise InvalidHeader(

666 "Content-Length contained multiple "

667 "unmatching values (%s)" % content_length

668 )

669 length = lengths.pop()

670 except ValueError:

671 length = None

672 else:

673 if length < 0:

674 length = None

675

676 else: # if content_length is None

677 length = None

678

679 # Convert status to int for comparison

680 # In some cases, httplib returns a status of "_UNKNOWN"

681 try:

682 status = int(self.status)

683 except ValueError:

684 status = 0

685

686 # Check for responses that shouldn't include a body

687 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":

688 length = 0

689

690 return length

691

692 @contextmanager

693 def _error_catcher(self) -> typing.Generator[None, None, None]:

694 """

695 Catch low-level python exceptions, instead re-raising urllib3

696 variants, so that low-level exceptions are not leaked in the

697 high-level api.

698

699 On exit, release the connection back to the pool.

700 """

701 clean_exit = False

702

703 try:

704 try:

705 yield

706

707 except SocketTimeout as e:

708 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but

709 # there is yet no clean way to get at it from this context.

710 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

711

712 except BaseSSLError as e:

713 # FIXME: Is there a better way to differentiate between SSLErrors?

714 if "read operation timed out" not in str(e):

715 # SSL errors related to framing/MAC get wrapped and reraised here

716 raise SSLError(e) from e

717

718 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

719

720 except (HTTPException, OSError) as e:

721 # This includes IncompleteRead.

722 raise ProtocolError(f"Connection broken: {e!r}", e) from e

723

724 # If no exception is thrown, we should avoid cleaning up

725 # unnecessarily.

726 clean_exit = True

727 finally:

728 # If we didn't terminate cleanly, we need to throw away our

729 # connection.

730 if not clean_exit:

731 # The response may not be closed but we're not going to use it

732 # anymore so close it now to ensure that the connection is

733 # released back to the pool.

734 if self._original_response:

735 self._original_response.close()

736

737 # Closing the response may not actually be sufficient to close

738 # everything, so if we have a hold of the connection close that

739 # too.

740 if self._connection:

741 self._connection.close()

742

743 # If we hold the original response but it's closed now, we should

744 # return the connection back to the pool.

745 if self._original_response and self._original_response.isclosed():

746 self.release_conn()

747

748 def _fp_read(self, amt: int | None = None) -> bytes:

749 """

750 Read a response with the thought that reading the number of bytes

751 larger than can fit in a 32-bit int at a time via SSL in some

752 known cases leads to an overflow error that has to be prevented

753 if `amt` or `self.length_remaining` indicate that a problem may

754 happen.

755

756 The known cases:

757 * 3.8 <= CPython < 3.9.7 because of a bug

758 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.

759 * urllib3 injected with pyOpenSSL-backed SSL-support.

760 * CPython < 3.10 only when `amt` does not fit 32-bit int.

761 """

762 assert self._fp

763 c_int_max = 2**31 - 1

764 if (

765 (

766 (amt and amt > c_int_max)

767 or (self.length_remaining and self.length_remaining > c_int_max)

768 )

769 and not util.IS_SECURETRANSPORT

770 and (util.IS_PYOPENSSL or sys.version_info < (3, 10))

771 ):

772 buffer = io.BytesIO()

773 # Besides `max_chunk_amt` being a maximum chunk size, it

774 # affects memory overhead of reading a response by this

775 # method in CPython.

776 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum

777 # chunk size that does not lead to an overflow error, but

778 # 256 MiB is a compromise.

779 max_chunk_amt = 2**28

780 while amt is None or amt != 0:

781 if amt is not None:

782 chunk_amt = min(amt, max_chunk_amt)

783 amt -= chunk_amt

784 else:

785 chunk_amt = max_chunk_amt

786 data = self._fp.read(chunk_amt)

787 if not data:

788 break

789 buffer.write(data)

790 del data # to reduce peak memory usage by `max_chunk_amt`.

791 return buffer.getvalue()

792 else:

793 # StringIO doesn't like amt=None

794 return self._fp.read(amt) if amt is not None else self._fp.read()

795

796 def _raw_read(

797 self,

798 amt: int | None = None,

799 ) -> bytes:

800 """

801 Reads `amt` of bytes from the socket.

802 """

803 if self._fp is None:

804 return None # type: ignore[return-value]

805

806 fp_closed = getattr(self._fp, "closed", False)

807

808 with self._error_catcher():

809 data = self._fp_read(amt) if not fp_closed else b""

810 if amt is not None and amt != 0 and not data:

811 # Platform-specific: Buggy versions of Python.

812 # Close the connection when no data is returned

813 #

814 # This is redundant to what httplib/http.client _should_

815 # already do. However, versions of python released before

816 # December 15, 2012 (http://bugs.python.org/issue16298) do

817 # not properly close the connection in all cases. There is

818 # no harm in redundantly calling close.

819 self._fp.close()

820 if (

821 self.enforce_content_length

822 and self.length_remaining is not None

823 and self.length_remaining != 0

824 ):

825 # This is an edge case that httplib failed to cover due

826 # to concerns of backward compatibility. We're

827 # addressing it here to make sure IncompleteRead is

828 # raised during streaming, so all calls with incorrect

829 # Content-Length are caught.

830 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)

831

832 if data:

833 self._fp_bytes_read += len(data)

834 if self.length_remaining is not None:

835 self.length_remaining -= len(data)

836 return data

837

838 def read(

839 self,

840 amt: int | None = None,

841 decode_content: bool | None = None,

842 cache_content: bool = False,

843 ) -> bytes:

844 """

845 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional

846 parameters: ``decode_content`` and ``cache_content``.

847

848 :param amt:

849 How much of the content to read. If specified, caching is skipped

850 because it doesn't make sense to cache partial content as the full

851 response.

852

853 :param decode_content:

854 If True, will attempt to decode the body based on the

855 'content-encoding' header.

856

857 :param cache_content:

858 If True, will save the returned data such that the same result is

859 returned despite of the state of the underlying file object. This

860 is useful if you want the ``.data`` property to continue working

861 after having ``.read()`` the file object. (Overridden if ``amt`` is

862 set.)

863 """

864 self._init_decoder()

865 if decode_content is None:

866 decode_content = self.decode_content

867

868 if amt is not None:

869 cache_content = False

870

871 if len(self._decoded_buffer) >= amt:

872 return self._decoded_buffer.get(amt)

873

874 data = self._raw_read(amt)

875

876 flush_decoder = False

877 if amt is None:

878 flush_decoder = True

879 elif amt != 0 and not data:

880 flush_decoder = True

881

882 if not data and len(self._decoded_buffer) == 0:

883 return data

884

885 if amt is None:

886 data = self._decode(data, decode_content, flush_decoder)

887 if cache_content:

888 self._body = data

889 else:

890 # do not waste memory on buffer when not decoding

891 if not decode_content:

892 if self._has_decoded_content:

893 raise RuntimeError(

894 "Calling read(decode_content=False) is not supported after "

895 "read(decode_content=True) was called."

896 )

897 return data

898

899 decoded_data = self._decode(data, decode_content, flush_decoder)

900 self._decoded_buffer.put(decoded_data)

901

902 while len(self._decoded_buffer) < amt and data:

903 # TODO make sure to initially read enough data to get past the headers

904 # For example, the GZ file header takes 10 bytes, we don't want to read

905 # it one byte at a time

906 data = self._raw_read(amt)

907 decoded_data = self._decode(data, decode_content, flush_decoder)

908 self._decoded_buffer.put(decoded_data)

909 data = self._decoded_buffer.get(amt)

910

911 return data

912

913 def stream(

914 self, amt: int | None = 2**16, decode_content: bool | None = None

915 ) -> typing.Generator[bytes, None, None]:

916 """

917 A generator wrapper for the read() method. A call will block until

918 ``amt`` bytes have been read from the connection or until the

919 connection is closed.

920

921 :param amt:

922 How much of the content to read. The generator will return up to

923 much data per iteration, but may return less. This is particularly

924 likely when using compressed data. However, the empty string will

925 never be returned.

926

927 :param decode_content:

928 If True, will attempt to decode the body based on the

929 'content-encoding' header.

930 """

931 if self.chunked and self.supports_chunked_reads():

932 yield from self.read_chunked(amt, decode_content=decode_content)

933 else:

934 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:

935 data = self.read(amt=amt, decode_content=decode_content)

936

937 if data:

938 yield data

939

940 # Overrides from io.IOBase

941 def readable(self) -> bool:

942 return True

943

944 def close(self) -> None:

945 if not self.closed and self._fp:

946 self._fp.close()

947

948 if self._connection:

949 self._connection.close()

950

951 if not self.auto_close:

952 io.IOBase.close(self)

953

954 @property

955 def closed(self) -> bool:

956 if not self.auto_close:

957 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]

958 elif self._fp is None:

959 return True

960 elif hasattr(self._fp, "isclosed"):

961 return self._fp.isclosed()

962 elif hasattr(self._fp, "closed"):

963 return self._fp.closed

964 else:

965 return True

966

967 def fileno(self) -> int:

968 if self._fp is None:

969 raise OSError("HTTPResponse has no file to get a fileno from")

970 elif hasattr(self._fp, "fileno"):

971 return self._fp.fileno()

972 else:

973 raise OSError(

974 "The file-like object this HTTPResponse is wrapped "

975 "around has no file descriptor"

976 )

977

978 def flush(self) -> None:

979 if (

980 self._fp is not None

981 and hasattr(self._fp, "flush")

982 and not getattr(self._fp, "closed", False)

983 ):

984 return self._fp.flush()

985

986 def supports_chunked_reads(self) -> bool:

987 """

988 Checks if the underlying file-like object looks like a

989 :class:`http.client.HTTPResponse` object. We do this by testing for

990 the fp attribute. If it is present we assume it returns raw chunks as

991 processed by read_chunked().

992 """

993 return hasattr(self._fp, "fp")

994

995 def _update_chunk_length(self) -> None:

996 # First, we'll figure out length of a chunk and then

997 # we'll try to read it from socket.

998 if self.chunk_left is not None:

999 return None

1000 line = self._fp.fp.readline() # type: ignore[union-attr]

1001 line = line.split(b";", 1)[0]

1002 try:

1003 self.chunk_left = int(line, 16)

1004 except ValueError:

1005 # Invalid chunked protocol response, abort.

1006 self.close()

1007 raise InvalidChunkLength(self, line) from None

1008

1009 def _handle_chunk(self, amt: int | None) -> bytes:

1010 returned_chunk = None

1011 if amt is None:

1012 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1013 returned_chunk = chunk

1014 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1015 self.chunk_left = None

1016 elif self.chunk_left is not None and amt < self.chunk_left:

1017 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1018 self.chunk_left = self.chunk_left - amt

1019 returned_chunk = value

1020 elif amt == self.chunk_left:

1021 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1022 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1023 self.chunk_left = None

1024 returned_chunk = value

1025 else: # amt > self.chunk_left

1026 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1027 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1028 self.chunk_left = None

1029 return returned_chunk # type: ignore[no-any-return]

1030

1031 def read_chunked(

1032 self, amt: int | None = None, decode_content: bool | None = None

1033 ) -> typing.Generator[bytes, None, None]:

1034 """

1035 Similar to :meth:`HTTPResponse.read`, but with an additional

1036 parameter: ``decode_content``.

1037

1038 :param amt:

1039 How much of the content to read. If specified, caching is skipped

1040 because it doesn't make sense to cache partial content as the full

1041 response.

1042

1043 :param decode_content:

1044 If True, will attempt to decode the body based on the

1045 'content-encoding' header.

1046 """

1047 self._init_decoder()

1048 # FIXME: Rewrite this method and make it a class with a better structured logic.

1049 if not self.chunked:

1050 raise ResponseNotChunked(

1051 "Response is not chunked. "

1052 "Header 'transfer-encoding: chunked' is missing."

1053 )

1054 if not self.supports_chunked_reads():

1055 raise BodyNotHttplibCompatible(

1056 "Body should be http.client.HTTPResponse like. "

1057 "It should have have an fp attribute which returns raw chunks."

1058 )

1059

1060 with self._error_catcher():

1061 # Don't bother reading the body of a HEAD request.

1062 if self._original_response and is_response_to_head(self._original_response):

1063 self._original_response.close()

1064 return None

1065

1066 # If a response is already read and closed

1067 # then return immediately.

1068 if self._fp.fp is None: # type: ignore[union-attr]

1069 return None

1070

1071 while True:

1072 self._update_chunk_length()

1073 if self.chunk_left == 0:

1074 break

1075 chunk = self._handle_chunk(amt)

1076 decoded = self._decode(

1077 chunk, decode_content=decode_content, flush_decoder=False

1078 )

1079 if decoded:

1080 yield decoded

1081

1082 if decode_content:

1083 # On CPython and PyPy, we should never need to flush the

1084 # decoder. However, on Jython we *might* need to, so

1085 # lets defensively do it anyway.

1086 decoded = self._flush_decoder()

1087 if decoded: # Platform-specific: Jython.

1088 yield decoded

1089

1090 # Chunk content ends with \r\n: discard it.

1091 while self._fp is not None:

1092 line = self._fp.fp.readline()

1093 if not line:

1094 # Some sites may not end with '\r\n'.

1095 break

1096 if line == b"\r\n":

1097 break

1098

1099 # We read everything; close the "file".

1100 if self._original_response:

1101 self._original_response.close()

1102

1103 @property

1104 def url(self) -> str | None:

1105 """

1106 Returns the URL that was the source of this response.

1107 If the request that generated this response redirected, this method

1108 will return the final redirect location.

1109 """

1110 return self._request_url

1111

1112 @url.setter

1113 def url(self, url: str) -> None:

1114 self._request_url = url

1115

1116 def __iter__(self) -> typing.Iterator[bytes]:

1117 buffer: list[bytes] = []

1118 for chunk in self.stream(decode_content=True):

1119 if b"\n" in chunk:

1120 chunks = chunk.split(b"\n")

1121 yield b"".join(buffer) + chunks[0] + b"\n"

1122 for x in chunks[1:-1]:

1123 yield x + b"\n"

1124 if chunks[-1]:

1125 buffer = [chunks[-1]]

1126 else:

1127 buffer = []

1128 else:

1129 buffer.append(chunk)

1130 if buffer:

1131 yield b"".join(buffer)