Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 23%

1from __future__ import annotations

3import collections

4import io

5import json as _json

6import logging

7import re

8import sys

9import typing

10import warnings

11import zlib

12from contextlib import contextmanager

13from http.client import HTTPMessage as _HttplibHTTPMessage

14from http.client import HTTPResponse as _HttplibHTTPResponse

15from socket import timeout as SocketTimeout

17try:

18 try:

19 import brotlicffi as brotli # type: ignore[import]

20 except ImportError:

21 import brotli # type: ignore[import]

22except ImportError:

23 brotli = None

25try:

26 import zstandard as zstd # type: ignore[import]

28 # The package 'zstandard' added the 'eof' property starting

29 # in v0.18.0 which we require to ensure a complete and

30 # valid zstd stream was fed into the ZstdDecoder.

31 # See: https://github.com/urllib3/urllib3/pull/2624

32 _zstd_version = _zstd_version = tuple(

33 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]

34 )

35 if _zstd_version < (0, 18): # Defensive:

36 zstd = None

38except (AttributeError, ImportError, ValueError): # Defensive:

39 zstd = None

41from . import util

42from ._base_connection import _TYPE_BODY

43from ._collections import HTTPHeaderDict

44from .connection import BaseSSLError, HTTPConnection, HTTPException

45from .exceptions import (

46 BodyNotHttplibCompatible,

47 DecodeError,

48 HTTPError,

49 IncompleteRead,

50 InvalidChunkLength,

51 InvalidHeader,

52 ProtocolError,

53 ReadTimeoutError,

54 ResponseNotChunked,

55 SSLError,

56)

57from .util.response import is_fp_closed, is_response_to_head

58from .util.retry import Retry

60if typing.TYPE_CHECKING:

61 from typing_extensions import Literal

63 from .connectionpool import HTTPConnectionPool

65log = logging.getLogger(__name__)

68class ContentDecoder:

69 def decompress(self, data: bytes) -> bytes:

70 raise NotImplementedError()

72 def flush(self) -> bytes:

73 raise NotImplementedError()

76class DeflateDecoder(ContentDecoder):

77 def __init__(self) -> None:

78 self._first_try = True

79 self._data = b""

80 self._obj = zlib.decompressobj()

82 def decompress(self, data: bytes) -> bytes:

83 if not data:

84 return data

86 if not self._first_try:

87 return self._obj.decompress(data)

89 self._data += data

90 try:

91 decompressed = self._obj.decompress(data)

92 if decompressed:

93 self._first_try = False

94 self._data = None # type: ignore[assignment]

95 return decompressed

96 except zlib.error:

97 self._first_try = False

98 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)

99 try:

100 return self.decompress(self._data)

101 finally:

102 self._data = None # type: ignore[assignment]

103

104 def flush(self) -> bytes:

105 return self._obj.flush()

106

107

108class GzipDecoderState:

109 FIRST_MEMBER = 0

110 OTHER_MEMBERS = 1

111 SWALLOW_DATA = 2

112

113

114class GzipDecoder(ContentDecoder):

115 def __init__(self) -> None:

116 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

117 self._state = GzipDecoderState.FIRST_MEMBER

118

119 def decompress(self, data: bytes) -> bytes:

120 ret = bytearray()

121 if self._state == GzipDecoderState.SWALLOW_DATA or not data:

122 return bytes(ret)

123 while True:

124 try:

125 ret += self._obj.decompress(data)

126 except zlib.error:

127 previous_state = self._state

128 # Ignore data after the first error

129 self._state = GzipDecoderState.SWALLOW_DATA

130 if previous_state == GzipDecoderState.OTHER_MEMBERS:

131 # Allow trailing garbage acceptable in other gzip clients

132 return bytes(ret)

133 raise

134 data = self._obj.unused_data

135 if not data:

136 return bytes(ret)

137 self._state = GzipDecoderState.OTHER_MEMBERS

138 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

139

140 def flush(self) -> bytes:

141 return self._obj.flush()

142

143

144if brotli is not None:

145

146 class BrotliDecoder(ContentDecoder):

147 # Supports both 'brotlipy' and 'Brotli' packages

148 # since they share an import name. The top branches

149 # are for 'brotlipy' and bottom branches for 'Brotli'

150 def __init__(self) -> None:

151 self._obj = brotli.Decompressor()

152 if hasattr(self._obj, "decompress"):

153 setattr(self, "decompress", self._obj.decompress)

154 else:

155 setattr(self, "decompress", self._obj.process)

156

157 def flush(self) -> bytes:

158 if hasattr(self._obj, "flush"):

159 return self._obj.flush() # type: ignore[no-any-return]

160 return b""

161

162

163if zstd is not None:

164

165 class ZstdDecoder(ContentDecoder):

166 def __init__(self) -> None:

167 self._obj = zstd.ZstdDecompressor().decompressobj()

168

169 def decompress(self, data: bytes) -> bytes:

170 if not data:

171 return b""

172 return self._obj.decompress(data) # type: ignore[no-any-return]

173

174 def flush(self) -> bytes:

175 ret = self._obj.flush()

176 if not self._obj.eof:

177 raise DecodeError("Zstandard data is incomplete")

178 return ret # type: ignore[no-any-return]

179

180

181class MultiDecoder(ContentDecoder):

182 """

183 From RFC7231:

184 If one or more encodings have been applied to a representation, the

185 sender that applied the encodings MUST generate a Content-Encoding

186 header field that lists the content codings in the order in which

187 they were applied.

188 """

189

190 def __init__(self, modes: str) -> None:

191 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]

192

193 def flush(self) -> bytes:

194 return self._decoders[0].flush()

195

196 def decompress(self, data: bytes) -> bytes:

197 for d in reversed(self._decoders):

198 data = d.decompress(data)

199 return data

200

201

202def _get_decoder(mode: str) -> ContentDecoder:

203 if "," in mode:

204 return MultiDecoder(mode)

205

206 if mode == "gzip":

207 return GzipDecoder()

208

209 if brotli is not None and mode == "br":

210 return BrotliDecoder()

211

212 if zstd is not None and mode == "zstd":

213 return ZstdDecoder()

214

215 return DeflateDecoder()

216

217

218class BytesQueueBuffer:

219 """Memory-efficient bytes buffer

220

221 To return decoded data in read() and still follow the BufferedIOBase API, we need a

222 buffer to always return the correct amount of bytes.

223

224 This buffer should be filled using calls to put()

225

226 Our maximum memory usage is determined by the sum of the size of:

227

228 * self.buffer, which contains the full data

229 * the largest chunk that we will copy in get()

230

231 The worst case scenario is a single chunk, in which case we'll make a full copy of

232 the data inside get().

233 """

234

235 def __init__(self) -> None:

236 self.buffer: typing.Deque[bytes] = collections.deque()

237 self._size: int = 0

238

239 def __len__(self) -> int:

240 return self._size

241

242 def put(self, data: bytes) -> None:

243 self.buffer.append(data)

244 self._size += len(data)

245

246 def get(self, n: int) -> bytes:

247 if not self.buffer:

248 raise RuntimeError("buffer is empty")

249 elif n < 0:

250 raise ValueError("n should be > 0")

251

252 fetched = 0

253 ret = io.BytesIO()

254 while fetched < n:

255 remaining = n - fetched

256 chunk = self.buffer.popleft()

257 chunk_length = len(chunk)

258 if remaining < chunk_length:

259 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]

260 ret.write(left_chunk)

261 self.buffer.appendleft(right_chunk)

262 self._size -= remaining

263 break

264 else:

265 ret.write(chunk)

266 self._size -= chunk_length

267 fetched += chunk_length

268

269 if not self.buffer:

270 break

271

272 return ret.getvalue()

273

274

275class BaseHTTPResponse(io.IOBase):

276 CONTENT_DECODERS = ["gzip", "deflate"]

277 if brotli is not None:

278 CONTENT_DECODERS += ["br"]

279 if zstd is not None:

280 CONTENT_DECODERS += ["zstd"]

281 REDIRECT_STATUSES = [301, 302, 303, 307, 308]

282

283 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)

284 if brotli is not None:

285 DECODER_ERROR_CLASSES += (brotli.error,)

286

287 if zstd is not None:

288 DECODER_ERROR_CLASSES += (zstd.ZstdError,)

289

290 def __init__(

291 self,

292 *,

293 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

294 status: int,

295 version: int,

296 reason: str | None,

297 decode_content: bool,

298 request_url: str | None,

299 retries: Retry | None = None,

300 ) -> None:

301 if isinstance(headers, HTTPHeaderDict):

302 self.headers = headers

303 else:

304 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]

305 self.status = status

306 self.version = version

307 self.reason = reason

308 self.decode_content = decode_content

309 self._has_decoded_content = False

310 self._request_url: str | None = request_url

311 self.retries = retries

312

313 self.chunked = False

314 tr_enc = self.headers.get("transfer-encoding", "").lower()

315 # Don't incur the penalty of creating a list and then discarding it

316 encodings = (enc.strip() for enc in tr_enc.split(","))

317 if "chunked" in encodings:

318 self.chunked = True

319

320 self._decoder: ContentDecoder | None = None

321

322 def get_redirect_location(self) -> str | None | Literal[False]:

323 """

324 Should we redirect and where to?

325

326 :returns: Truthy redirect location string if we got a redirect status

327 code and valid location. ``None`` if redirect status and no

328 location. ``False`` if not a redirect status code.

329 """

330 if self.status in self.REDIRECT_STATUSES:

331 return self.headers.get("location")

332 return False

333

334 @property

335 def data(self) -> bytes:

336 raise NotImplementedError()

337

338 def json(self) -> typing.Any:

339 """

340 Parses the body of the HTTP response as JSON.

341

342 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to the decoder.

343

344 This method can raise either `UnicodeDecodeError` or `json.JSONDecodeError`.

345

346 Read more :ref:`here <json>`.

347 """

348 data = self.data.decode("utf-8")

349 return _json.loads(data)

350

351 @property

352 def url(self) -> str | None:

353 raise NotImplementedError()

354

355 @url.setter

356 def url(self, url: str | None) -> None:

357 raise NotImplementedError()

358

359 @property

360 def connection(self) -> HTTPConnection | None:

361 raise NotImplementedError()

362

363 @property

364 def retries(self) -> Retry | None:

365 return self._retries

366

367 @retries.setter

368 def retries(self, retries: Retry | None) -> None:

369 # Override the request_url if retries has a redirect location.

370 if retries is not None and retries.history:

371 self.url = retries.history[-1].redirect_location

372 self._retries = retries

373

374 def stream(

375 self, amt: int | None = 2**16, decode_content: bool | None = None

376 ) -> typing.Iterator[bytes]:

377 raise NotImplementedError()

378

379 def read(

380 self,

381 amt: int | None = None,

382 decode_content: bool | None = None,

383 cache_content: bool = False,

384 ) -> bytes:

385 raise NotImplementedError()

386

387 def read_chunked(

388 self,

389 amt: int | None = None,

390 decode_content: bool | None = None,

391 ) -> typing.Iterator[bytes]:

392 raise NotImplementedError()

393

394 def release_conn(self) -> None:

395 raise NotImplementedError()

396

397 def drain_conn(self) -> None:

398 raise NotImplementedError()

399

400 def close(self) -> None:

401 raise NotImplementedError()

402

403 def _init_decoder(self) -> None:

404 """

405 Set-up the _decoder attribute if necessary.

406 """

407 # Note: content-encoding value should be case-insensitive, per RFC 7230

408 # Section 3.2

409 content_encoding = self.headers.get("content-encoding", "").lower()

410 if self._decoder is None:

411 if content_encoding in self.CONTENT_DECODERS:

412 self._decoder = _get_decoder(content_encoding)

413 elif "," in content_encoding:

414 encodings = [

415 e.strip()

416 for e in content_encoding.split(",")

417 if e.strip() in self.CONTENT_DECODERS

418 ]

419 if encodings:

420 self._decoder = _get_decoder(content_encoding)

421

422 def _decode(

423 self, data: bytes, decode_content: bool | None, flush_decoder: bool

424 ) -> bytes:

425 """

426 Decode the data passed in and potentially flush the decoder.

427 """

428 if not decode_content:

429 if self._has_decoded_content:

430 raise RuntimeError(

431 "Calling read(decode_content=False) is not supported after "

432 "read(decode_content=True) was called."

433 )

434 return data

435

436 try:

437 if self._decoder:

438 data = self._decoder.decompress(data)

439 self._has_decoded_content = True

440 except self.DECODER_ERROR_CLASSES as e:

441 content_encoding = self.headers.get("content-encoding", "").lower()

442 raise DecodeError(

443 "Received response with content-encoding: %s, but "

444 "failed to decode it." % content_encoding,

445 e,

446 ) from e

447 if flush_decoder:

448 data += self._flush_decoder()

449

450 return data

451

452 def _flush_decoder(self) -> bytes:

453 """

454 Flushes the decoder. Should only be called if the decoder is actually

455 being used.

456 """

457 if self._decoder:

458 return self._decoder.decompress(b"") + self._decoder.flush()

459 return b""

460

461 # Compatibility methods for `io` module

462 def readinto(self, b: bytearray) -> int:

463 temp = self.read(len(b))

464 if len(temp) == 0:

465 return 0

466 else:

467 b[: len(temp)] = temp

468 return len(temp)

469

470 # Compatibility methods for http.client.HTTPResponse

471 def getheaders(self) -> HTTPHeaderDict:

472 warnings.warn(

473 "HTTPResponse.getheaders() is deprecated and will be removed "

474 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",

475 category=DeprecationWarning,

476 stacklevel=2,

477 )

478 return self.headers

479

480 def getheader(self, name: str, default: str | None = None) -> str | None:

481 warnings.warn(

482 "HTTPResponse.getheader() is deprecated and will be removed "

483 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",

484 category=DeprecationWarning,

485 stacklevel=2,

486 )

487 return self.headers.get(name, default)

488

489 # Compatibility method for http.cookiejar

490 def info(self) -> HTTPHeaderDict:

491 return self.headers

492

493 def geturl(self) -> str | None:

494 return self.url

495

496

497class HTTPResponse(BaseHTTPResponse):

498 """

499 HTTP Response container.

500

501 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is

502 loaded and decoded on-demand when the ``data`` property is accessed. This

503 class is also compatible with the Python standard library's :mod:`io`

504 module, and can hence be treated as a readable object in the context of that

505 framework.

506

507 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:

508

509 :param preload_content:

510 If True, the response's body will be preloaded during construction.

511

512 :param decode_content:

513 If True, will attempt to decode the body based on the

514 'content-encoding' header.

515

516 :param original_response:

517 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`

518 object, it's convenient to include the original for debug purposes. It's

519 otherwise unused.

520

521 :param retries:

522 The retries contains the last :class:`~urllib3.util.retry.Retry` that

523 was used during the request.

524

525 :param enforce_content_length:

526 Enforce content length checking. Body returned by server must match

527 value of Content-Length header, if present. Otherwise, raise error.

528 """

529

530 def __init__(

531 self,

532 body: _TYPE_BODY = "",

533 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

534 status: int = 0,

535 version: int = 0,

536 reason: str | None = None,

537 preload_content: bool = True,

538 decode_content: bool = True,

539 original_response: _HttplibHTTPResponse | None = None,

540 pool: HTTPConnectionPool | None = None,

541 connection: HTTPConnection | None = None,

542 msg: _HttplibHTTPMessage | None = None,

543 retries: Retry | None = None,

544 enforce_content_length: bool = True,

545 request_method: str | None = None,

546 request_url: str | None = None,

547 auto_close: bool = True,

548 ) -> None:

549 super().__init__(

550 headers=headers,

551 status=status,

552 version=version,

553 reason=reason,

554 decode_content=decode_content,

555 request_url=request_url,

556 retries=retries,

557 )

558

559 self.enforce_content_length = enforce_content_length

560 self.auto_close = auto_close

561

562 self._body = None

563 self._fp: _HttplibHTTPResponse | None = None

564 self._original_response = original_response

565 self._fp_bytes_read = 0

566 self.msg = msg

567

568 if body and isinstance(body, (str, bytes)):

569 self._body = body

570

571 self._pool = pool

572 self._connection = connection

573

574 if hasattr(body, "read"):

575 self._fp = body # type: ignore[assignment]

576

577 # Are we using the chunked-style of transfer encoding?

578 self.chunk_left: int | None = None

579

580 # Determine length of response

581 self.length_remaining = self._init_length(request_method)

582

583 # Used to return the correct amount of bytes for partial read()s

584 self._decoded_buffer = BytesQueueBuffer()

585

586 # If requested, preload the body.

587 if preload_content and not self._body:

588 self._body = self.read(decode_content=decode_content)

589

590 def release_conn(self) -> None:

591 if not self._pool or not self._connection:

592 return None

593

594 self._pool._put_conn(self._connection)

595 self._connection = None

596

597 def drain_conn(self) -> None:

598 """

599 Read and discard any remaining HTTP response data in the response connection.

600

601 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.

602 """

603 try:

604 self.read()

605 except (HTTPError, OSError, BaseSSLError, HTTPException):

606 pass

607

608 @property

609 def data(self) -> bytes:

610 # For backwards-compat with earlier urllib3 0.4 and earlier.

611 if self._body:

612 return self._body # type: ignore[return-value]

613

614 if self._fp:

615 return self.read(cache_content=True)

616

617 return None # type: ignore[return-value]

618

619 @property

620 def connection(self) -> HTTPConnection | None:

621 return self._connection

622

623 def isclosed(self) -> bool:

624 return is_fp_closed(self._fp)

625

626 def tell(self) -> int:

627 """

628 Obtain the number of bytes pulled over the wire so far. May differ from

629 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``

630 if bytes are encoded on the wire (e.g, compressed).

631 """

632 return self._fp_bytes_read

633

634 def _init_length(self, request_method: str | None) -> int | None:

635 """

636 Set initial length value for Response content if available.

637 """

638 length: int | None

639 content_length: str | None = self.headers.get("content-length")

640

641 if content_length is not None:

642 if self.chunked:

643 # This Response will fail with an IncompleteRead if it can't be

644 # received as chunked. This method falls back to attempt reading

645 # the response before raising an exception.

646 log.warning(

647 "Received response with both Content-Length and "

648 "Transfer-Encoding set. This is expressly forbidden "

649 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "

650 "attempting to process response as Transfer-Encoding: "

651 "chunked."

652 )

653 return None

654

655 try:

656 # RFC 7230 section 3.3.2 specifies multiple content lengths can

657 # be sent in a single Content-Length header

658 # (e.g. Content-Length: 42, 42). This line ensures the values

659 # are all valid ints and that as long as the `set` length is 1,

660 # all values are the same. Otherwise, the header is invalid.

661 lengths = {int(val) for val in content_length.split(",")}

662 if len(lengths) > 1:

663 raise InvalidHeader(

664 "Content-Length contained multiple "

665 "unmatching values (%s)" % content_length

666 )

667 length = lengths.pop()

668 except ValueError:

669 length = None

670 else:

671 if length < 0:

672 length = None

673

674 else: # if content_length is None

675 length = None

676

677 # Convert status to int for comparison

678 # In some cases, httplib returns a status of "_UNKNOWN"

679 try:

680 status = int(self.status)

681 except ValueError:

682 status = 0

683

684 # Check for responses that shouldn't include a body

685 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":

686 length = 0

687

688 return length

689

690 @contextmanager

691 def _error_catcher(self) -> typing.Generator[None, None, None]:

692 """

693 Catch low-level python exceptions, instead re-raising urllib3

694 variants, so that low-level exceptions are not leaked in the

695 high-level api.

696

697 On exit, release the connection back to the pool.

698 """

699 clean_exit = False

700

701 try:

702 try:

703 yield

704

705 except SocketTimeout as e:

706 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but

707 # there is yet no clean way to get at it from this context.

708 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

709

710 except BaseSSLError as e:

711 # FIXME: Is there a better way to differentiate between SSLErrors?

712 if "read operation timed out" not in str(e):

713 # SSL errors related to framing/MAC get wrapped and reraised here

714 raise SSLError(e) from e

715

716 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

717

718 except (HTTPException, OSError) as e:

719 # This includes IncompleteRead.

720 raise ProtocolError(f"Connection broken: {e!r}", e) from e

721

722 # If no exception is thrown, we should avoid cleaning up

723 # unnecessarily.

724 clean_exit = True

725 finally:

726 # If we didn't terminate cleanly, we need to throw away our

727 # connection.

728 if not clean_exit:

729 # The response may not be closed but we're not going to use it

730 # anymore so close it now to ensure that the connection is

731 # released back to the pool.

732 if self._original_response:

733 self._original_response.close()

734

735 # Closing the response may not actually be sufficient to close

736 # everything, so if we have a hold of the connection close that

737 # too.

738 if self._connection:

739 self._connection.close()

740

741 # If we hold the original response but it's closed now, we should

742 # return the connection back to the pool.

743 if self._original_response and self._original_response.isclosed():

744 self.release_conn()

745

746 def _fp_read(self, amt: int | None = None) -> bytes:

747 """

748 Read a response with the thought that reading the number of bytes

749 larger than can fit in a 32-bit int at a time via SSL in some

750 known cases leads to an overflow error that has to be prevented

751 if `amt` or `self.length_remaining` indicate that a problem may

752 happen.

753

754 The known cases:

755 * 3.8 <= CPython < 3.9.7 because of a bug

756 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.

757 * urllib3 injected with pyOpenSSL-backed SSL-support.

758 * CPython < 3.10 only when `amt` does not fit 32-bit int.

759 """

760 assert self._fp

761 c_int_max = 2**31 - 1

762 if (

763 (

764 (amt and amt > c_int_max)

765 or (self.length_remaining and self.length_remaining > c_int_max)

766 )

767 and not util.IS_SECURETRANSPORT

768 and (util.IS_PYOPENSSL or sys.version_info < (3, 10))

769 ):

770 buffer = io.BytesIO()

771 # Besides `max_chunk_amt` being a maximum chunk size, it

772 # affects memory overhead of reading a response by this

773 # method in CPython.

774 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum

775 # chunk size that does not lead to an overflow error, but

776 # 256 MiB is a compromise.

777 max_chunk_amt = 2**28

778 while amt is None or amt != 0:

779 if amt is not None:

780 chunk_amt = min(amt, max_chunk_amt)

781 amt -= chunk_amt

782 else:

783 chunk_amt = max_chunk_amt

784 data = self._fp.read(chunk_amt)

785 if not data:

786 break

787 buffer.write(data)

788 del data # to reduce peak memory usage by `max_chunk_amt`.

789 return buffer.getvalue()

790 else:

791 # StringIO doesn't like amt=None

792 return self._fp.read(amt) if amt is not None else self._fp.read()

793

794 def _raw_read(

795 self,

796 amt: int | None = None,

797 ) -> bytes:

798 """

799 Reads `amt` of bytes from the socket.

800 """

801 if self._fp is None:

802 return None # type: ignore[return-value]

803

804 fp_closed = getattr(self._fp, "closed", False)

805

806 with self._error_catcher():

807 data = self._fp_read(amt) if not fp_closed else b""

808 if amt is not None and amt != 0 and not data:

809 # Platform-specific: Buggy versions of Python.

810 # Close the connection when no data is returned

811 #

812 # This is redundant to what httplib/http.client _should_

813 # already do. However, versions of python released before

814 # December 15, 2012 (http://bugs.python.org/issue16298) do

815 # not properly close the connection in all cases. There is

816 # no harm in redundantly calling close.

817 self._fp.close()

818 if (

819 self.enforce_content_length

820 and self.length_remaining is not None

821 and self.length_remaining != 0

822 ):

823 # This is an edge case that httplib failed to cover due

824 # to concerns of backward compatibility. We're

825 # addressing it here to make sure IncompleteRead is

826 # raised during streaming, so all calls with incorrect

827 # Content-Length are caught.

828 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)

829

830 if data:

831 self._fp_bytes_read += len(data)

832 if self.length_remaining is not None:

833 self.length_remaining -= len(data)

834 return data

835

836 def read(

837 self,

838 amt: int | None = None,

839 decode_content: bool | None = None,

840 cache_content: bool = False,

841 ) -> bytes:

842 """

843 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional

844 parameters: ``decode_content`` and ``cache_content``.

845

846 :param amt:

847 How much of the content to read. If specified, caching is skipped

848 because it doesn't make sense to cache partial content as the full

849 response.

850

851 :param decode_content:

852 If True, will attempt to decode the body based on the

853 'content-encoding' header.

854

855 :param cache_content:

856 If True, will save the returned data such that the same result is

857 returned despite of the state of the underlying file object. This

858 is useful if you want the ``.data`` property to continue working

859 after having ``.read()`` the file object. (Overridden if ``amt`` is

860 set.)

861 """

862 self._init_decoder()

863 if decode_content is None:

864 decode_content = self.decode_content

865

866 if amt is not None:

867 cache_content = False

868

869 if len(self._decoded_buffer) >= amt:

870 return self._decoded_buffer.get(amt)

871

872 data = self._raw_read(amt)

873

874 flush_decoder = False

875 if amt is None:

876 flush_decoder = True

877 elif amt != 0 and not data:

878 flush_decoder = True

879

880 if not data and len(self._decoded_buffer) == 0:

881 return data

882

883 if amt is None:

884 data = self._decode(data, decode_content, flush_decoder)

885 if cache_content:

886 self._body = data

887 else:

888 # do not waste memory on buffer when not decoding

889 if not decode_content:

890 if self._has_decoded_content:

891 raise RuntimeError(

892 "Calling read(decode_content=False) is not supported after "

893 "read(decode_content=True) was called."

894 )

895 return data

896

897 decoded_data = self._decode(data, decode_content, flush_decoder)

898 self._decoded_buffer.put(decoded_data)

899

900 while len(self._decoded_buffer) < amt and data:

901 # TODO make sure to initially read enough data to get past the headers

902 # For example, the GZ file header takes 10 bytes, we don't want to read

903 # it one byte at a time

904 data = self._raw_read(amt)

905 decoded_data = self._decode(data, decode_content, flush_decoder)

906 self._decoded_buffer.put(decoded_data)

907 data = self._decoded_buffer.get(amt)

908

909 return data

910

911 def stream(

912 self, amt: int | None = 2**16, decode_content: bool | None = None

913 ) -> typing.Generator[bytes, None, None]:

914 """

915 A generator wrapper for the read() method. A call will block until

916 ``amt`` bytes have been read from the connection or until the

917 connection is closed.

918

919 :param amt:

920 How much of the content to read. The generator will return up to

921 much data per iteration, but may return less. This is particularly

922 likely when using compressed data. However, the empty string will

923 never be returned.

924

925 :param decode_content:

926 If True, will attempt to decode the body based on the

927 'content-encoding' header.

928 """

929 if self.chunked and self.supports_chunked_reads():

930 yield from self.read_chunked(amt, decode_content=decode_content)

931 else:

932 while not is_fp_closed(self._fp):

933 data = self.read(amt=amt, decode_content=decode_content)

934

935 if data:

936 yield data

937

938 # Overrides from io.IOBase

939 def readable(self) -> bool:

940 return True

941

942 def close(self) -> None:

943 if not self.closed and self._fp:

944 self._fp.close()

945

946 if self._connection:

947 self._connection.close()

948

949 if not self.auto_close:

950 io.IOBase.close(self)

951

952 @property

953 def closed(self) -> bool:

954 if not self.auto_close:

955 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]

956 elif self._fp is None:

957 return True

958 elif hasattr(self._fp, "isclosed"):

959 return self._fp.isclosed()

960 elif hasattr(self._fp, "closed"):

961 return self._fp.closed

962 else:

963 return True

964

965 def fileno(self) -> int:

966 if self._fp is None:

967 raise OSError("HTTPResponse has no file to get a fileno from")

968 elif hasattr(self._fp, "fileno"):

969 return self._fp.fileno()

970 else:

971 raise OSError(

972 "The file-like object this HTTPResponse is wrapped "

973 "around has no file descriptor"

974 )

975

976 def flush(self) -> None:

977 if (

978 self._fp is not None

979 and hasattr(self._fp, "flush")

980 and not getattr(self._fp, "closed", False)

981 ):

982 return self._fp.flush()

983

984 def supports_chunked_reads(self) -> bool:

985 """

986 Checks if the underlying file-like object looks like a

987 :class:`http.client.HTTPResponse` object. We do this by testing for

988 the fp attribute. If it is present we assume it returns raw chunks as

989 processed by read_chunked().

990 """

991 return hasattr(self._fp, "fp")

992

993 def _update_chunk_length(self) -> None:

994 # First, we'll figure out length of a chunk and then

995 # we'll try to read it from socket.

996 if self.chunk_left is not None:

997 return None

998 line = self._fp.fp.readline() # type: ignore[union-attr]

999 line = line.split(b";", 1)[0]

1000 try:

1001 self.chunk_left = int(line, 16)

1002 except ValueError:

1003 # Invalid chunked protocol response, abort.

1004 self.close()

1005 raise InvalidChunkLength(self, line) from None

1006

1007 def _handle_chunk(self, amt: int | None) -> bytes:

1008 returned_chunk = None

1009 if amt is None:

1010 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1011 returned_chunk = chunk

1012 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1013 self.chunk_left = None

1014 elif self.chunk_left is not None and amt < self.chunk_left:

1015 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1016 self.chunk_left = self.chunk_left - amt

1017 returned_chunk = value

1018 elif amt == self.chunk_left:

1019 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1020 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1021 self.chunk_left = None

1022 returned_chunk = value

1023 else: # amt > self.chunk_left

1024 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1025 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1026 self.chunk_left = None

1027 return returned_chunk # type: ignore[no-any-return]

1028

1029 def read_chunked(

1030 self, amt: int | None = None, decode_content: bool | None = None

1031 ) -> typing.Generator[bytes, None, None]:

1032 """

1033 Similar to :meth:`HTTPResponse.read`, but with an additional

1034 parameter: ``decode_content``.

1035

1036 :param amt:

1037 How much of the content to read. If specified, caching is skipped

1038 because it doesn't make sense to cache partial content as the full

1039 response.

1040

1041 :param decode_content:

1042 If True, will attempt to decode the body based on the

1043 'content-encoding' header.

1044 """

1045 self._init_decoder()

1046 # FIXME: Rewrite this method and make it a class with a better structured logic.

1047 if not self.chunked:

1048 raise ResponseNotChunked(

1049 "Response is not chunked. "

1050 "Header 'transfer-encoding: chunked' is missing."

1051 )

1052 if not self.supports_chunked_reads():

1053 raise BodyNotHttplibCompatible(

1054 "Body should be http.client.HTTPResponse like. "

1055 "It should have have an fp attribute which returns raw chunks."

1056 )

1057

1058 with self._error_catcher():

1059 # Don't bother reading the body of a HEAD request.

1060 if self._original_response and is_response_to_head(self._original_response):

1061 self._original_response.close()

1062 return None

1063

1064 # If a response is already read and closed

1065 # then return immediately.

1066 if self._fp.fp is None: # type: ignore[union-attr]

1067 return None

1068

1069 while True:

1070 self._update_chunk_length()

1071 if self.chunk_left == 0:

1072 break

1073 chunk = self._handle_chunk(amt)

1074 decoded = self._decode(

1075 chunk, decode_content=decode_content, flush_decoder=False

1076 )

1077 if decoded:

1078 yield decoded

1079

1080 if decode_content:

1081 # On CPython and PyPy, we should never need to flush the

1082 # decoder. However, on Jython we *might* need to, so

1083 # lets defensively do it anyway.

1084 decoded = self._flush_decoder()

1085 if decoded: # Platform-specific: Jython.

1086 yield decoded

1087

1088 # Chunk content ends with \r\n: discard it.

1089 while self._fp is not None:

1090 line = self._fp.fp.readline()

1091 if not line:

1092 # Some sites may not end with '\r\n'.

1093 break

1094 if line == b"\r\n":

1095 break

1096

1097 # We read everything; close the "file".

1098 if self._original_response:

1099 self._original_response.close()

1100

1101 @property

1102 def url(self) -> str | None:

1103 """

1104 Returns the URL that was the source of this response.

1105 If the request that generated this response redirected, this method

1106 will return the final redirect location.

1107 """

1108 return self._request_url

1109

1110 @url.setter

1111 def url(self, url: str) -> None:

1112 self._request_url = url

1113

1114 def __iter__(self) -> typing.Iterator[bytes]:

1115 buffer: list[bytes] = []

1116 for chunk in self.stream(decode_content=True):

1117 if b"\n" in chunk:

1118 chunks = chunk.split(b"\n")

1119 yield b"".join(buffer) + chunks[0] + b"\n"

1120 for x in chunks[1:-1]:

1121 yield x + b"\n"

1122 if chunks[-1]:

1123 buffer = [chunks[-1]]

1124 else:

1125 buffer = []

1126 else:

1127 buffer.append(chunk)

1128 if buffer:

1129 yield b"".join(buffer)