Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 23%

1from __future__ import annotations

3import collections

4import io

5import json as _json

6import logging

7import re

8import sys

9import typing

10import warnings

11import zlib

12from contextlib import contextmanager

13from http.client import HTTPMessage as _HttplibHTTPMessage

14from http.client import HTTPResponse as _HttplibHTTPResponse

15from socket import timeout as SocketTimeout

17try:

18 try:

19 import brotlicffi as brotli # type: ignore[import]

20 except ImportError:

21 import brotli # type: ignore[import]

22except ImportError:

23 brotli = None

25try:

26 import zstandard as zstd # type: ignore[import]

28 # The package 'zstandard' added the 'eof' property starting

29 # in v0.18.0 which we require to ensure a complete and

30 # valid zstd stream was fed into the ZstdDecoder.

31 # See: https://github.com/urllib3/urllib3/pull/2624

32 _zstd_version = _zstd_version = tuple(

33 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]

34 )

35 if _zstd_version < (0, 18): # Defensive:

36 zstd = None

38except (AttributeError, ImportError, ValueError): # Defensive:

39 zstd = None

41from . import util

42from ._base_connection import _TYPE_BODY

43from ._collections import HTTPHeaderDict

44from .connection import BaseSSLError, HTTPConnection, HTTPException

45from .exceptions import (

46 BodyNotHttplibCompatible,

47 DecodeError,

48 HTTPError,

49 IncompleteRead,

50 InvalidChunkLength,

51 InvalidHeader,

52 ProtocolError,

53 ReadTimeoutError,

54 ResponseNotChunked,

55 SSLError,

56)

57from .util.response import is_fp_closed, is_response_to_head

58from .util.retry import Retry

60if typing.TYPE_CHECKING:

61 from typing_extensions import Literal

63 from .connectionpool import HTTPConnectionPool

65log = logging.getLogger(__name__)

68class ContentDecoder:

69 def decompress(self, data: bytes) -> bytes:

70 raise NotImplementedError()

72 def flush(self) -> bytes:

73 raise NotImplementedError()

76class DeflateDecoder(ContentDecoder):

77 def __init__(self) -> None:

78 self._first_try = True

79 self._data = b""

80 self._obj = zlib.decompressobj()

82 def decompress(self, data: bytes) -> bytes:

83 if not data:

84 return data

86 if not self._first_try:

87 return self._obj.decompress(data)

89 self._data += data

90 try:

91 decompressed = self._obj.decompress(data)

92 if decompressed:

93 self._first_try = False

94 self._data = None # type: ignore[assignment]

95 return decompressed

96 except zlib.error:

97 self._first_try = False

98 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)

99 try:

100 return self.decompress(self._data)

101 finally:

102 self._data = None # type: ignore[assignment]

103

104 def flush(self) -> bytes:

105 return self._obj.flush()

106

107

108class GzipDecoderState:

109 FIRST_MEMBER = 0

110 OTHER_MEMBERS = 1

111 SWALLOW_DATA = 2

112

113

114class GzipDecoder(ContentDecoder):

115 def __init__(self) -> None:

116 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

117 self._state = GzipDecoderState.FIRST_MEMBER

118

119 def decompress(self, data: bytes) -> bytes:

120 ret = bytearray()

121 if self._state == GzipDecoderState.SWALLOW_DATA or not data:

122 return bytes(ret)

123 while True:

124 try:

125 ret += self._obj.decompress(data)

126 except zlib.error:

127 previous_state = self._state

128 # Ignore data after the first error

129 self._state = GzipDecoderState.SWALLOW_DATA

130 if previous_state == GzipDecoderState.OTHER_MEMBERS:

131 # Allow trailing garbage acceptable in other gzip clients

132 return bytes(ret)

133 raise

134 data = self._obj.unused_data

135 if not data:

136 return bytes(ret)

137 self._state = GzipDecoderState.OTHER_MEMBERS

138 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

139

140 def flush(self) -> bytes:

141 return self._obj.flush()

142

143

144if brotli is not None:

145

146 class BrotliDecoder(ContentDecoder):

147 # Supports both 'brotlipy' and 'Brotli' packages

148 # since they share an import name. The top branches

149 # are for 'brotlipy' and bottom branches for 'Brotli'

150 def __init__(self) -> None:

151 self._obj = brotli.Decompressor()

152 if hasattr(self._obj, "decompress"):

153 setattr(self, "decompress", self._obj.decompress)

154 else:

155 setattr(self, "decompress", self._obj.process)

156

157 def flush(self) -> bytes:

158 if hasattr(self._obj, "flush"):

159 return self._obj.flush() # type: ignore[no-any-return]

160 return b""

161

162

163if zstd is not None:

164

165 class ZstdDecoder(ContentDecoder):

166 def __init__(self) -> None:

167 self._obj = zstd.ZstdDecompressor().decompressobj()

168

169 def decompress(self, data: bytes) -> bytes:

170 if not data:

171 return b""

172 data_parts = [self._obj.decompress(data)]

173 while self._obj.eof and self._obj.unused_data:

174 unused_data = self._obj.unused_data

175 self._obj = zstd.ZstdDecompressor().decompressobj()

176 data_parts.append(self._obj.decompress(unused_data))

177 return b"".join(data_parts)

178

179 def flush(self) -> bytes:

180 ret = self._obj.flush() # note: this is a no-op

181 if not self._obj.eof:

182 raise DecodeError("Zstandard data is incomplete")

183 return ret # type: ignore[no-any-return]

184

185

186class MultiDecoder(ContentDecoder):

187 """

188 From RFC7231:

189 If one or more encodings have been applied to a representation, the

190 sender that applied the encodings MUST generate a Content-Encoding

191 header field that lists the content codings in the order in which

192 they were applied.

193 """

194

195 def __init__(self, modes: str) -> None:

196 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]

197

198 def flush(self) -> bytes:

199 return self._decoders[0].flush()

200

201 def decompress(self, data: bytes) -> bytes:

202 for d in reversed(self._decoders):

203 data = d.decompress(data)

204 return data

205

206

207def _get_decoder(mode: str) -> ContentDecoder:

208 if "," in mode:

209 return MultiDecoder(mode)

210

211 if mode == "gzip":

212 return GzipDecoder()

213

214 if brotli is not None and mode == "br":

215 return BrotliDecoder()

216

217 if zstd is not None and mode == "zstd":

218 return ZstdDecoder()

219

220 return DeflateDecoder()

221

222

223class BytesQueueBuffer:

224 """Memory-efficient bytes buffer

225

226 To return decoded data in read() and still follow the BufferedIOBase API, we need a

227 buffer to always return the correct amount of bytes.

228

229 This buffer should be filled using calls to put()

230

231 Our maximum memory usage is determined by the sum of the size of:

232

233 * self.buffer, which contains the full data

234 * the largest chunk that we will copy in get()

235

236 The worst case scenario is a single chunk, in which case we'll make a full copy of

237 the data inside get().

238 """

239

240 def __init__(self) -> None:

241 self.buffer: typing.Deque[bytes] = collections.deque()

242 self._size: int = 0

243

244 def __len__(self) -> int:

245 return self._size

246

247 def put(self, data: bytes) -> None:

248 self.buffer.append(data)

249 self._size += len(data)

250

251 def get(self, n: int) -> bytes:

252 if n == 0:

253 return b""

254 elif not self.buffer:

255 raise RuntimeError("buffer is empty")

256 elif n < 0:

257 raise ValueError("n should be > 0")

258

259 fetched = 0

260 ret = io.BytesIO()

261 while fetched < n:

262 remaining = n - fetched

263 chunk = self.buffer.popleft()

264 chunk_length = len(chunk)

265 if remaining < chunk_length:

266 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]

267 ret.write(left_chunk)

268 self.buffer.appendleft(right_chunk)

269 self._size -= remaining

270 break

271 else:

272 ret.write(chunk)

273 self._size -= chunk_length

274 fetched += chunk_length

275

276 if not self.buffer:

277 break

278

279 return ret.getvalue()

280

281

282class BaseHTTPResponse(io.IOBase):

283 CONTENT_DECODERS = ["gzip", "deflate"]

284 if brotli is not None:

285 CONTENT_DECODERS += ["br"]

286 if zstd is not None:

287 CONTENT_DECODERS += ["zstd"]

288 REDIRECT_STATUSES = [301, 302, 303, 307, 308]

289

290 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)

291 if brotli is not None:

292 DECODER_ERROR_CLASSES += (brotli.error,)

293

294 if zstd is not None:

295 DECODER_ERROR_CLASSES += (zstd.ZstdError,)

296

297 def __init__(

298 self,

299 *,

300 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

301 status: int,

302 version: int,

303 reason: str | None,

304 decode_content: bool,

305 request_url: str | None,

306 retries: Retry | None = None,

307 ) -> None:

308 if isinstance(headers, HTTPHeaderDict):

309 self.headers = headers

310 else:

311 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]

312 self.status = status

313 self.version = version

314 self.reason = reason

315 self.decode_content = decode_content

316 self._has_decoded_content = False

317 self._request_url: str | None = request_url

318 self.retries = retries

319

320 self.chunked = False

321 tr_enc = self.headers.get("transfer-encoding", "").lower()

322 # Don't incur the penalty of creating a list and then discarding it

323 encodings = (enc.strip() for enc in tr_enc.split(","))

324 if "chunked" in encodings:

325 self.chunked = True

326

327 self._decoder: ContentDecoder | None = None

328

329 def get_redirect_location(self) -> str | None | Literal[False]:

330 """

331 Should we redirect and where to?

332

333 :returns: Truthy redirect location string if we got a redirect status

334 code and valid location. ``None`` if redirect status and no

335 location. ``False`` if not a redirect status code.

336 """

337 if self.status in self.REDIRECT_STATUSES:

338 return self.headers.get("location")

339 return False

340

341 @property

342 def data(self) -> bytes:

343 raise NotImplementedError()

344

345 def json(self) -> typing.Any:

346 """

347 Parses the body of the HTTP response as JSON.

348

349 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to the decoder.

350

351 This method can raise either `UnicodeDecodeError` or `json.JSONDecodeError`.

352

353 Read more :ref:`here <json>`.

354 """

355 data = self.data.decode("utf-8")

356 return _json.loads(data)

357

358 @property

359 def url(self) -> str | None:

360 raise NotImplementedError()

361

362 @url.setter

363 def url(self, url: str | None) -> None:

364 raise NotImplementedError()

365

366 @property

367 def connection(self) -> HTTPConnection | None:

368 raise NotImplementedError()

369

370 @property

371 def retries(self) -> Retry | None:

372 return self._retries

373

374 @retries.setter

375 def retries(self, retries: Retry | None) -> None:

376 # Override the request_url if retries has a redirect location.

377 if retries is not None and retries.history:

378 self.url = retries.history[-1].redirect_location

379 self._retries = retries

380

381 def stream(

382 self, amt: int | None = 2**16, decode_content: bool | None = None

383 ) -> typing.Iterator[bytes]:

384 raise NotImplementedError()

385

386 def read(

387 self,

388 amt: int | None = None,

389 decode_content: bool | None = None,

390 cache_content: bool = False,

391 ) -> bytes:

392 raise NotImplementedError()

393

394 def read_chunked(

395 self,

396 amt: int | None = None,

397 decode_content: bool | None = None,

398 ) -> typing.Iterator[bytes]:

399 raise NotImplementedError()

400

401 def release_conn(self) -> None:

402 raise NotImplementedError()

403

404 def drain_conn(self) -> None:

405 raise NotImplementedError()

406

407 def close(self) -> None:

408 raise NotImplementedError()

409

410 def _init_decoder(self) -> None:

411 """

412 Set-up the _decoder attribute if necessary.

413 """

414 # Note: content-encoding value should be case-insensitive, per RFC 7230

415 # Section 3.2

416 content_encoding = self.headers.get("content-encoding", "").lower()

417 if self._decoder is None:

418 if content_encoding in self.CONTENT_DECODERS:

419 self._decoder = _get_decoder(content_encoding)

420 elif "," in content_encoding:

421 encodings = [

422 e.strip()

423 for e in content_encoding.split(",")

424 if e.strip() in self.CONTENT_DECODERS

425 ]

426 if encodings:

427 self._decoder = _get_decoder(content_encoding)

428

429 def _decode(

430 self, data: bytes, decode_content: bool | None, flush_decoder: bool

431 ) -> bytes:

432 """

433 Decode the data passed in and potentially flush the decoder.

434 """

435 if not decode_content:

436 if self._has_decoded_content:

437 raise RuntimeError(

438 "Calling read(decode_content=False) is not supported after "

439 "read(decode_content=True) was called."

440 )

441 return data

442

443 try:

444 if self._decoder:

445 data = self._decoder.decompress(data)

446 self._has_decoded_content = True

447 except self.DECODER_ERROR_CLASSES as e:

448 content_encoding = self.headers.get("content-encoding", "").lower()

449 raise DecodeError(

450 "Received response with content-encoding: %s, but "

451 "failed to decode it." % content_encoding,

452 e,

453 ) from e

454 if flush_decoder:

455 data += self._flush_decoder()

456

457 return data

458

459 def _flush_decoder(self) -> bytes:

460 """

461 Flushes the decoder. Should only be called if the decoder is actually

462 being used.

463 """

464 if self._decoder:

465 return self._decoder.decompress(b"") + self._decoder.flush()

466 return b""

467

468 # Compatibility methods for `io` module

469 def readinto(self, b: bytearray) -> int:

470 temp = self.read(len(b))

471 if len(temp) == 0:

472 return 0

473 else:

474 b[: len(temp)] = temp

475 return len(temp)

476

477 # Compatibility methods for http.client.HTTPResponse

478 def getheaders(self) -> HTTPHeaderDict:

479 warnings.warn(

480 "HTTPResponse.getheaders() is deprecated and will be removed "

481 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",

482 category=DeprecationWarning,

483 stacklevel=2,

484 )

485 return self.headers

486

487 def getheader(self, name: str, default: str | None = None) -> str | None:

488 warnings.warn(

489 "HTTPResponse.getheader() is deprecated and will be removed "

490 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",

491 category=DeprecationWarning,

492 stacklevel=2,

493 )

494 return self.headers.get(name, default)

495

496 # Compatibility method for http.cookiejar

497 def info(self) -> HTTPHeaderDict:

498 return self.headers

499

500 def geturl(self) -> str | None:

501 return self.url

502

503

504class HTTPResponse(BaseHTTPResponse):

505 """

506 HTTP Response container.

507

508 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is

509 loaded and decoded on-demand when the ``data`` property is accessed. This

510 class is also compatible with the Python standard library's :mod:`io`

511 module, and can hence be treated as a readable object in the context of that

512 framework.

513

514 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:

515

516 :param preload_content:

517 If True, the response's body will be preloaded during construction.

518

519 :param decode_content:

520 If True, will attempt to decode the body based on the

521 'content-encoding' header.

522

523 :param original_response:

524 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`

525 object, it's convenient to include the original for debug purposes. It's

526 otherwise unused.

527

528 :param retries:

529 The retries contains the last :class:`~urllib3.util.retry.Retry` that

530 was used during the request.

531

532 :param enforce_content_length:

533 Enforce content length checking. Body returned by server must match

534 value of Content-Length header, if present. Otherwise, raise error.

535 """

536

537 def __init__(

538 self,

539 body: _TYPE_BODY = "",

540 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

541 status: int = 0,

542 version: int = 0,

543 reason: str | None = None,

544 preload_content: bool = True,

545 decode_content: bool = True,

546 original_response: _HttplibHTTPResponse | None = None,

547 pool: HTTPConnectionPool | None = None,

548 connection: HTTPConnection | None = None,

549 msg: _HttplibHTTPMessage | None = None,

550 retries: Retry | None = None,

551 enforce_content_length: bool = True,

552 request_method: str | None = None,

553 request_url: str | None = None,

554 auto_close: bool = True,

555 ) -> None:

556 super().__init__(

557 headers=headers,

558 status=status,

559 version=version,

560 reason=reason,

561 decode_content=decode_content,

562 request_url=request_url,

563 retries=retries,

564 )

565

566 self.enforce_content_length = enforce_content_length

567 self.auto_close = auto_close

568

569 self._body = None

570 self._fp: _HttplibHTTPResponse | None = None

571 self._original_response = original_response

572 self._fp_bytes_read = 0

573 self.msg = msg

574

575 if body and isinstance(body, (str, bytes)):

576 self._body = body

577

578 self._pool = pool

579 self._connection = connection

580

581 if hasattr(body, "read"):

582 self._fp = body # type: ignore[assignment]

583

584 # Are we using the chunked-style of transfer encoding?

585 self.chunk_left: int | None = None

586

587 # Determine length of response

588 self.length_remaining = self._init_length(request_method)

589

590 # Used to return the correct amount of bytes for partial read()s

591 self._decoded_buffer = BytesQueueBuffer()

592

593 # If requested, preload the body.

594 if preload_content and not self._body:

595 self._body = self.read(decode_content=decode_content)

596

597 def release_conn(self) -> None:

598 if not self._pool or not self._connection:

599 return None

600

601 self._pool._put_conn(self._connection)

602 self._connection = None

603

604 def drain_conn(self) -> None:

605 """

606 Read and discard any remaining HTTP response data in the response connection.

607

608 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.

609 """

610 try:

611 self.read()

612 except (HTTPError, OSError, BaseSSLError, HTTPException):

613 pass

614

615 @property

616 def data(self) -> bytes:

617 # For backwards-compat with earlier urllib3 0.4 and earlier.

618 if self._body:

619 return self._body # type: ignore[return-value]

620

621 if self._fp:

622 return self.read(cache_content=True)

623

624 return None # type: ignore[return-value]

625

626 @property

627 def connection(self) -> HTTPConnection | None:

628 return self._connection

629

630 def isclosed(self) -> bool:

631 return is_fp_closed(self._fp)

632

633 def tell(self) -> int:

634 """

635 Obtain the number of bytes pulled over the wire so far. May differ from

636 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``

637 if bytes are encoded on the wire (e.g, compressed).

638 """

639 return self._fp_bytes_read

640

641 def _init_length(self, request_method: str | None) -> int | None:

642 """

643 Set initial length value for Response content if available.

644 """

645 length: int | None

646 content_length: str | None = self.headers.get("content-length")

647

648 if content_length is not None:

649 if self.chunked:

650 # This Response will fail with an IncompleteRead if it can't be

651 # received as chunked. This method falls back to attempt reading

652 # the response before raising an exception.

653 log.warning(

654 "Received response with both Content-Length and "

655 "Transfer-Encoding set. This is expressly forbidden "

656 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "

657 "attempting to process response as Transfer-Encoding: "

658 "chunked."

659 )

660 return None

661

662 try:

663 # RFC 7230 section 3.3.2 specifies multiple content lengths can

664 # be sent in a single Content-Length header

665 # (e.g. Content-Length: 42, 42). This line ensures the values

666 # are all valid ints and that as long as the `set` length is 1,

667 # all values are the same. Otherwise, the header is invalid.

668 lengths = {int(val) for val in content_length.split(",")}

669 if len(lengths) > 1:

670 raise InvalidHeader(

671 "Content-Length contained multiple "

672 "unmatching values (%s)" % content_length

673 )

674 length = lengths.pop()

675 except ValueError:

676 length = None

677 else:

678 if length < 0:

679 length = None

680

681 else: # if content_length is None

682 length = None

683

684 # Convert status to int for comparison

685 # In some cases, httplib returns a status of "_UNKNOWN"

686 try:

687 status = int(self.status)

688 except ValueError:

689 status = 0

690

691 # Check for responses that shouldn't include a body

692 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":

693 length = 0

694

695 return length

696

697 @contextmanager

698 def _error_catcher(self) -> typing.Generator[None, None, None]:

699 """

700 Catch low-level python exceptions, instead re-raising urllib3

701 variants, so that low-level exceptions are not leaked in the

702 high-level api.

703

704 On exit, release the connection back to the pool.

705 """

706 clean_exit = False

707

708 try:

709 try:

710 yield

711

712 except SocketTimeout as e:

713 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but

714 # there is yet no clean way to get at it from this context.

715 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

716

717 except BaseSSLError as e:

718 # FIXME: Is there a better way to differentiate between SSLErrors?

719 if "read operation timed out" not in str(e):

720 # SSL errors related to framing/MAC get wrapped and reraised here

721 raise SSLError(e) from e

722

723 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

724

725 except (HTTPException, OSError) as e:

726 # This includes IncompleteRead.

727 raise ProtocolError(f"Connection broken: {e!r}", e) from e

728

729 # If no exception is thrown, we should avoid cleaning up

730 # unnecessarily.

731 clean_exit = True

732 finally:

733 # If we didn't terminate cleanly, we need to throw away our

734 # connection.

735 if not clean_exit:

736 # The response may not be closed but we're not going to use it

737 # anymore so close it now to ensure that the connection is

738 # released back to the pool.

739 if self._original_response:

740 self._original_response.close()

741

742 # Closing the response may not actually be sufficient to close

743 # everything, so if we have a hold of the connection close that

744 # too.

745 if self._connection:

746 self._connection.close()

747

748 # If we hold the original response but it's closed now, we should

749 # return the connection back to the pool.

750 if self._original_response and self._original_response.isclosed():

751 self.release_conn()

752

753 def _fp_read(self, amt: int | None = None) -> bytes:

754 """

755 Read a response with the thought that reading the number of bytes

756 larger than can fit in a 32-bit int at a time via SSL in some

757 known cases leads to an overflow error that has to be prevented

758 if `amt` or `self.length_remaining` indicate that a problem may

759 happen.

760

761 The known cases:

762 * 3.8 <= CPython < 3.9.7 because of a bug

763 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.

764 * urllib3 injected with pyOpenSSL-backed SSL-support.

765 * CPython < 3.10 only when `amt` does not fit 32-bit int.

766 """

767 assert self._fp

768 c_int_max = 2**31 - 1

769 if (

770 (

771 (amt and amt > c_int_max)

772 or (self.length_remaining and self.length_remaining > c_int_max)

773 )

774 and not util.IS_SECURETRANSPORT

775 and (util.IS_PYOPENSSL or sys.version_info < (3, 10))

776 ):

777 buffer = io.BytesIO()

778 # Besides `max_chunk_amt` being a maximum chunk size, it

779 # affects memory overhead of reading a response by this

780 # method in CPython.

781 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum

782 # chunk size that does not lead to an overflow error, but

783 # 256 MiB is a compromise.

784 max_chunk_amt = 2**28

785 while amt is None or amt != 0:

786 if amt is not None:

787 chunk_amt = min(amt, max_chunk_amt)

788 amt -= chunk_amt

789 else:

790 chunk_amt = max_chunk_amt

791 data = self._fp.read(chunk_amt)

792 if not data:

793 break

794 buffer.write(data)

795 del data # to reduce peak memory usage by `max_chunk_amt`.

796 return buffer.getvalue()

797 else:

798 # StringIO doesn't like amt=None

799 return self._fp.read(amt) if amt is not None else self._fp.read()

800

801 def _raw_read(

802 self,

803 amt: int | None = None,

804 ) -> bytes:

805 """

806 Reads `amt` of bytes from the socket.

807 """

808 if self._fp is None:

809 return None # type: ignore[return-value]

810

811 fp_closed = getattr(self._fp, "closed", False)

812

813 with self._error_catcher():

814 data = self._fp_read(amt) if not fp_closed else b""

815 if amt is not None and amt != 0 and not data:

816 # Platform-specific: Buggy versions of Python.

817 # Close the connection when no data is returned

818 #

819 # This is redundant to what httplib/http.client _should_

820 # already do. However, versions of python released before

821 # December 15, 2012 (http://bugs.python.org/issue16298) do

822 # not properly close the connection in all cases. There is

823 # no harm in redundantly calling close.

824 self._fp.close()

825 if (

826 self.enforce_content_length

827 and self.length_remaining is not None

828 and self.length_remaining != 0

829 ):

830 # This is an edge case that httplib failed to cover due

831 # to concerns of backward compatibility. We're

832 # addressing it here to make sure IncompleteRead is

833 # raised during streaming, so all calls with incorrect

834 # Content-Length are caught.

835 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)

836

837 if data:

838 self._fp_bytes_read += len(data)

839 if self.length_remaining is not None:

840 self.length_remaining -= len(data)

841 return data

842

843 def read(

844 self,

845 amt: int | None = None,

846 decode_content: bool | None = None,

847 cache_content: bool = False,

848 ) -> bytes:

849 """

850 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional

851 parameters: ``decode_content`` and ``cache_content``.

852

853 :param amt:

854 How much of the content to read. If specified, caching is skipped

855 because it doesn't make sense to cache partial content as the full

856 response.

857

858 :param decode_content:

859 If True, will attempt to decode the body based on the

860 'content-encoding' header.

861

862 :param cache_content:

863 If True, will save the returned data such that the same result is

864 returned despite of the state of the underlying file object. This

865 is useful if you want the ``.data`` property to continue working

866 after having ``.read()`` the file object. (Overridden if ``amt`` is

867 set.)

868 """

869 self._init_decoder()

870 if decode_content is None:

871 decode_content = self.decode_content

872

873 if amt is not None:

874 cache_content = False

875

876 if len(self._decoded_buffer) >= amt:

877 return self._decoded_buffer.get(amt)

878

879 data = self._raw_read(amt)

880

881 flush_decoder = amt is None or (amt != 0 and not data)

882

883 if not data and len(self._decoded_buffer) == 0:

884 return data

885

886 if amt is None:

887 data = self._decode(data, decode_content, flush_decoder)

888 if cache_content:

889 self._body = data

890 else:

891 # do not waste memory on buffer when not decoding

892 if not decode_content:

893 if self._has_decoded_content:

894 raise RuntimeError(

895 "Calling read(decode_content=False) is not supported after "

896 "read(decode_content=True) was called."

897 )

898 return data

899

900 decoded_data = self._decode(data, decode_content, flush_decoder)

901 self._decoded_buffer.put(decoded_data)

902

903 while len(self._decoded_buffer) < amt and data:

904 # TODO make sure to initially read enough data to get past the headers

905 # For example, the GZ file header takes 10 bytes, we don't want to read

906 # it one byte at a time

907 data = self._raw_read(amt)

908 decoded_data = self._decode(data, decode_content, flush_decoder)

909 self._decoded_buffer.put(decoded_data)

910 data = self._decoded_buffer.get(amt)

911

912 return data

913

914 def stream(

915 self, amt: int | None = 2**16, decode_content: bool | None = None

916 ) -> typing.Generator[bytes, None, None]:

917 """

918 A generator wrapper for the read() method. A call will block until

919 ``amt`` bytes have been read from the connection or until the

920 connection is closed.

921

922 :param amt:

923 How much of the content to read. The generator will return up to

924 much data per iteration, but may return less. This is particularly

925 likely when using compressed data. However, the empty string will

926 never be returned.

927

928 :param decode_content:

929 If True, will attempt to decode the body based on the

930 'content-encoding' header.

931 """

932 if self.chunked and self.supports_chunked_reads():

933 yield from self.read_chunked(amt, decode_content=decode_content)

934 else:

935 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:

936 data = self.read(amt=amt, decode_content=decode_content)

937

938 if data:

939 yield data

940

941 # Overrides from io.IOBase

942 def readable(self) -> bool:

943 return True

944

945 def close(self) -> None:

946 if not self.closed and self._fp:

947 self._fp.close()

948

949 if self._connection:

950 self._connection.close()

951

952 if not self.auto_close:

953 io.IOBase.close(self)

954

955 @property

956 def closed(self) -> bool:

957 if not self.auto_close:

958 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]

959 elif self._fp is None:

960 return True

961 elif hasattr(self._fp, "isclosed"):

962 return self._fp.isclosed()

963 elif hasattr(self._fp, "closed"):

964 return self._fp.closed

965 else:

966 return True

967

968 def fileno(self) -> int:

969 if self._fp is None:

970 raise OSError("HTTPResponse has no file to get a fileno from")

971 elif hasattr(self._fp, "fileno"):

972 return self._fp.fileno()

973 else:

974 raise OSError(

975 "The file-like object this HTTPResponse is wrapped "

976 "around has no file descriptor"

977 )

978

979 def flush(self) -> None:

980 if (

981 self._fp is not None

982 and hasattr(self._fp, "flush")

983 and not getattr(self._fp, "closed", False)

984 ):

985 return self._fp.flush()

986

987 def supports_chunked_reads(self) -> bool:

988 """

989 Checks if the underlying file-like object looks like a

990 :class:`http.client.HTTPResponse` object. We do this by testing for

991 the fp attribute. If it is present we assume it returns raw chunks as

992 processed by read_chunked().

993 """

994 return hasattr(self._fp, "fp")

995

996 def _update_chunk_length(self) -> None:

997 # First, we'll figure out length of a chunk and then

998 # we'll try to read it from socket.

999 if self.chunk_left is not None:

1000 return None

1001 line = self._fp.fp.readline() # type: ignore[union-attr]

1002 line = line.split(b";", 1)[0]

1003 try:

1004 self.chunk_left = int(line, 16)

1005 except ValueError:

1006 # Invalid chunked protocol response, abort.

1007 self.close()

1008 raise InvalidChunkLength(self, line) from None

1009

1010 def _handle_chunk(self, amt: int | None) -> bytes:

1011 returned_chunk = None

1012 if amt is None:

1013 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1014 returned_chunk = chunk

1015 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1016 self.chunk_left = None

1017 elif self.chunk_left is not None and amt < self.chunk_left:

1018 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1019 self.chunk_left = self.chunk_left - amt

1020 returned_chunk = value

1021 elif amt == self.chunk_left:

1022 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1023 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1024 self.chunk_left = None

1025 returned_chunk = value

1026 else: # amt > self.chunk_left

1027 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1028 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1029 self.chunk_left = None

1030 return returned_chunk # type: ignore[no-any-return]

1031

1032 def read_chunked(

1033 self, amt: int | None = None, decode_content: bool | None = None

1034 ) -> typing.Generator[bytes, None, None]:

1035 """

1036 Similar to :meth:`HTTPResponse.read`, but with an additional

1037 parameter: ``decode_content``.

1038

1039 :param amt:

1040 How much of the content to read. If specified, caching is skipped

1041 because it doesn't make sense to cache partial content as the full

1042 response.

1043

1044 :param decode_content:

1045 If True, will attempt to decode the body based on the

1046 'content-encoding' header.

1047 """

1048 self._init_decoder()

1049 # FIXME: Rewrite this method and make it a class with a better structured logic.

1050 if not self.chunked:

1051 raise ResponseNotChunked(

1052 "Response is not chunked. "

1053 "Header 'transfer-encoding: chunked' is missing."

1054 )

1055 if not self.supports_chunked_reads():

1056 raise BodyNotHttplibCompatible(

1057 "Body should be http.client.HTTPResponse like. "

1058 "It should have have an fp attribute which returns raw chunks."

1059 )

1060

1061 with self._error_catcher():

1062 # Don't bother reading the body of a HEAD request.

1063 if self._original_response and is_response_to_head(self._original_response):

1064 self._original_response.close()

1065 return None

1066

1067 # If a response is already read and closed

1068 # then return immediately.

1069 if self._fp.fp is None: # type: ignore[union-attr]

1070 return None

1071

1072 while True:

1073 self._update_chunk_length()

1074 if self.chunk_left == 0:

1075 break

1076 chunk = self._handle_chunk(amt)

1077 decoded = self._decode(

1078 chunk, decode_content=decode_content, flush_decoder=False

1079 )

1080 if decoded:

1081 yield decoded

1082

1083 if decode_content:

1084 # On CPython and PyPy, we should never need to flush the

1085 # decoder. However, on Jython we *might* need to, so

1086 # lets defensively do it anyway.

1087 decoded = self._flush_decoder()

1088 if decoded: # Platform-specific: Jython.

1089 yield decoded

1090

1091 # Chunk content ends with \r\n: discard it.

1092 while self._fp is not None:

1093 line = self._fp.fp.readline()

1094 if not line:

1095 # Some sites may not end with '\r\n'.

1096 break

1097 if line == b"\r\n":

1098 break

1099

1100 # We read everything; close the "file".

1101 if self._original_response:

1102 self._original_response.close()

1103

1104 @property

1105 def url(self) -> str | None:

1106 """

1107 Returns the URL that was the source of this response.

1108 If the request that generated this response redirected, this method

1109 will return the final redirect location.

1110 """

1111 return self._request_url

1112

1113 @url.setter

1114 def url(self, url: str) -> None:

1115 self._request_url = url

1116

1117 def __iter__(self) -> typing.Iterator[bytes]:

1118 buffer: list[bytes] = []

1119 for chunk in self.stream(decode_content=True):

1120 if b"\n" in chunk:

1121 chunks = chunk.split(b"\n")

1122 yield b"".join(buffer) + chunks[0] + b"\n"

1123 for x in chunks[1:-1]:

1124 yield x + b"\n"

1125 if chunks[-1]:

1126 buffer = [chunks[-1]]

1127 else:

1128 buffer = []

1129 else:

1130 buffer.append(chunk)

1131 if buffer:

1132 yield b"".join(buffer)