Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 23%

1from __future__ import annotations

3import collections

4import io

5import json as _json

6import logging

7import re

8import sys

9import typing

10import warnings

11import zlib

12from contextlib import contextmanager

13from http.client import HTTPMessage as _HttplibHTTPMessage

14from http.client import HTTPResponse as _HttplibHTTPResponse

15from socket import timeout as SocketTimeout

17try:

18 try:

19 import brotlicffi as brotli # type: ignore[import]

20 except ImportError:

21 import brotli # type: ignore[import]

22except ImportError:

23 brotli = None

25try:

26 import zstandard as zstd # type: ignore[import]

28 # The package 'zstandard' added the 'eof' property starting

29 # in v0.18.0 which we require to ensure a complete and

30 # valid zstd stream was fed into the ZstdDecoder.

31 # See: https://github.com/urllib3/urllib3/pull/2624

32 _zstd_version = _zstd_version = tuple(

33 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]

34 )

35 if _zstd_version < (0, 18): # Defensive:

36 zstd = None

38except (AttributeError, ImportError, ValueError): # Defensive:

39 zstd = None

41from . import util

42from ._base_connection import _TYPE_BODY

43from ._collections import HTTPHeaderDict

44from .connection import BaseSSLError, HTTPConnection, HTTPException

45from .exceptions import (

46 BodyNotHttplibCompatible,

47 DecodeError,

48 HTTPError,

49 IncompleteRead,

50 InvalidChunkLength,

51 InvalidHeader,

52 ProtocolError,

53 ReadTimeoutError,

54 ResponseNotChunked,

55 SSLError,

56)

57from .util.response import is_fp_closed, is_response_to_head

58from .util.retry import Retry

60if typing.TYPE_CHECKING:

61 from typing import Literal

63 from .connectionpool import HTTPConnectionPool

65log = logging.getLogger(__name__)

68class ContentDecoder:

69 def decompress(self, data: bytes) -> bytes:

70 raise NotImplementedError()

72 def flush(self) -> bytes:

73 raise NotImplementedError()

76class DeflateDecoder(ContentDecoder):

77 def __init__(self) -> None:

78 self._first_try = True

79 self._data = b""

80 self._obj = zlib.decompressobj()

82 def decompress(self, data: bytes) -> bytes:

83 if not data:

84 return data

86 if not self._first_try:

87 return self._obj.decompress(data)

89 self._data += data

90 try:

91 decompressed = self._obj.decompress(data)

92 if decompressed:

93 self._first_try = False

94 self._data = None # type: ignore[assignment]

95 return decompressed

96 except zlib.error:

97 self._first_try = False

98 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)

99 try:

100 return self.decompress(self._data)

101 finally:

102 self._data = None # type: ignore[assignment]

103

104 def flush(self) -> bytes:

105 return self._obj.flush()

106

107

108class GzipDecoderState:

109 FIRST_MEMBER = 0

110 OTHER_MEMBERS = 1

111 SWALLOW_DATA = 2

112

113

114class GzipDecoder(ContentDecoder):

115 def __init__(self) -> None:

116 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

117 self._state = GzipDecoderState.FIRST_MEMBER

118

119 def decompress(self, data: bytes) -> bytes:

120 ret = bytearray()

121 if self._state == GzipDecoderState.SWALLOW_DATA or not data:

122 return bytes(ret)

123 while True:

124 try:

125 ret += self._obj.decompress(data)

126 except zlib.error:

127 previous_state = self._state

128 # Ignore data after the first error

129 self._state = GzipDecoderState.SWALLOW_DATA

130 if previous_state == GzipDecoderState.OTHER_MEMBERS:

131 # Allow trailing garbage acceptable in other gzip clients

132 return bytes(ret)

133 raise

134 data = self._obj.unused_data

135 if not data:

136 return bytes(ret)

137 self._state = GzipDecoderState.OTHER_MEMBERS

138 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

139

140 def flush(self) -> bytes:

141 return self._obj.flush()

142

143

144if brotli is not None:

145

146 class BrotliDecoder(ContentDecoder):

147 # Supports both 'brotlipy' and 'Brotli' packages

148 # since they share an import name. The top branches

149 # are for 'brotlipy' and bottom branches for 'Brotli'

150 def __init__(self) -> None:

151 self._obj = brotli.Decompressor()

152 if hasattr(self._obj, "decompress"):

153 setattr(self, "decompress", self._obj.decompress)

154 else:

155 setattr(self, "decompress", self._obj.process)

156

157 def flush(self) -> bytes:

158 if hasattr(self._obj, "flush"):

159 return self._obj.flush() # type: ignore[no-any-return]

160 return b""

161

162

163if zstd is not None:

164

165 class ZstdDecoder(ContentDecoder):

166 def __init__(self) -> None:

167 self._obj = zstd.ZstdDecompressor().decompressobj()

168

169 def decompress(self, data: bytes) -> bytes:

170 if not data:

171 return b""

172 data_parts = [self._obj.decompress(data)]

173 while self._obj.eof and self._obj.unused_data:

174 unused_data = self._obj.unused_data

175 self._obj = zstd.ZstdDecompressor().decompressobj()

176 data_parts.append(self._obj.decompress(unused_data))

177 return b"".join(data_parts)

178

179 def flush(self) -> bytes:

180 ret = self._obj.flush() # note: this is a no-op

181 if not self._obj.eof:

182 raise DecodeError("Zstandard data is incomplete")

183 return ret # type: ignore[no-any-return]

184

185

186class MultiDecoder(ContentDecoder):

187 """

188 From RFC7231:

189 If one or more encodings have been applied to a representation, the

190 sender that applied the encodings MUST generate a Content-Encoding

191 header field that lists the content codings in the order in which

192 they were applied.

193 """

194

195 def __init__(self, modes: str) -> None:

196 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]

197

198 def flush(self) -> bytes:

199 return self._decoders[0].flush()

200

201 def decompress(self, data: bytes) -> bytes:

202 for d in reversed(self._decoders):

203 data = d.decompress(data)

204 return data

205

206

207def _get_decoder(mode: str) -> ContentDecoder:

208 if "," in mode:

209 return MultiDecoder(mode)

210

211 # According to RFC 9110 section 8.4.1.3, recipients should

212 # consider x-gzip equivalent to gzip

213 if mode in ("gzip", "x-gzip"):

214 return GzipDecoder()

215

216 if brotli is not None and mode == "br":

217 return BrotliDecoder()

218

219 if zstd is not None and mode == "zstd":

220 return ZstdDecoder()

221

222 return DeflateDecoder()

223

224

225class BytesQueueBuffer:

226 """Memory-efficient bytes buffer

227

228 To return decoded data in read() and still follow the BufferedIOBase API, we need a

229 buffer to always return the correct amount of bytes.

230

231 This buffer should be filled using calls to put()

232

233 Our maximum memory usage is determined by the sum of the size of:

234

235 * self.buffer, which contains the full data

236 * the largest chunk that we will copy in get()

237

238 The worst case scenario is a single chunk, in which case we'll make a full copy of

239 the data inside get().

240 """

241

242 def __init__(self) -> None:

243 self.buffer: typing.Deque[bytes] = collections.deque()

244 self._size: int = 0

245

246 def __len__(self) -> int:

247 return self._size

248

249 def put(self, data: bytes) -> None:

250 self.buffer.append(data)

251 self._size += len(data)

252

253 def get(self, n: int) -> bytes:

254 if n == 0:

255 return b""

256 elif not self.buffer:

257 raise RuntimeError("buffer is empty")

258 elif n < 0:

259 raise ValueError("n should be > 0")

260

261 fetched = 0

262 ret = io.BytesIO()

263 while fetched < n:

264 remaining = n - fetched

265 chunk = self.buffer.popleft()

266 chunk_length = len(chunk)

267 if remaining < chunk_length:

268 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]

269 ret.write(left_chunk)

270 self.buffer.appendleft(right_chunk)

271 self._size -= remaining

272 break

273 else:

274 ret.write(chunk)

275 self._size -= chunk_length

276 fetched += chunk_length

277

278 if not self.buffer:

279 break

280

281 return ret.getvalue()

282

283

284class BaseHTTPResponse(io.IOBase):

285 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]

286 if brotli is not None:

287 CONTENT_DECODERS += ["br"]

288 if zstd is not None:

289 CONTENT_DECODERS += ["zstd"]

290 REDIRECT_STATUSES = [301, 302, 303, 307, 308]

291

292 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)

293 if brotli is not None:

294 DECODER_ERROR_CLASSES += (brotli.error,)

295

296 if zstd is not None:

297 DECODER_ERROR_CLASSES += (zstd.ZstdError,)

298

299 def __init__(

300 self,

301 *,

302 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

303 status: int,

304 version: int,

305 reason: str | None,

306 decode_content: bool,

307 request_url: str | None,

308 retries: Retry | None = None,

309 ) -> None:

310 if isinstance(headers, HTTPHeaderDict):

311 self.headers = headers

312 else:

313 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]

314 self.status = status

315 self.version = version

316 self.reason = reason

317 self.decode_content = decode_content

318 self._has_decoded_content = False

319 self._request_url: str | None = request_url

320 self.retries = retries

321

322 self.chunked = False

323 tr_enc = self.headers.get("transfer-encoding", "").lower()

324 # Don't incur the penalty of creating a list and then discarding it

325 encodings = (enc.strip() for enc in tr_enc.split(","))

326 if "chunked" in encodings:

327 self.chunked = True

328

329 self._decoder: ContentDecoder | None = None

330

331 def get_redirect_location(self) -> str | None | Literal[False]:

332 """

333 Should we redirect and where to?

334

335 :returns: Truthy redirect location string if we got a redirect status

336 code and valid location. ``None`` if redirect status and no

337 location. ``False`` if not a redirect status code.

338 """

339 if self.status in self.REDIRECT_STATUSES:

340 return self.headers.get("location")

341 return False

342

343 @property

344 def data(self) -> bytes:

345 raise NotImplementedError()

346

347 def json(self) -> typing.Any:

348 """

349 Parses the body of the HTTP response as JSON.

350

351 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to the decoder.

352

353 This method can raise either `UnicodeDecodeError` or `json.JSONDecodeError`.

354

355 Read more :ref:`here <json>`.

356 """

357 data = self.data.decode("utf-8")

358 return _json.loads(data)

359

360 @property

361 def url(self) -> str | None:

362 raise NotImplementedError()

363

364 @url.setter

365 def url(self, url: str | None) -> None:

366 raise NotImplementedError()

367

368 @property

369 def connection(self) -> HTTPConnection | None:

370 raise NotImplementedError()

371

372 @property

373 def retries(self) -> Retry | None:

374 return self._retries

375

376 @retries.setter

377 def retries(self, retries: Retry | None) -> None:

378 # Override the request_url if retries has a redirect location.

379 if retries is not None and retries.history:

380 self.url = retries.history[-1].redirect_location

381 self._retries = retries

382

383 def stream(

384 self, amt: int | None = 2**16, decode_content: bool | None = None

385 ) -> typing.Iterator[bytes]:

386 raise NotImplementedError()

387

388 def read(

389 self,

390 amt: int | None = None,

391 decode_content: bool | None = None,

392 cache_content: bool = False,

393 ) -> bytes:

394 raise NotImplementedError()

395

396 def read_chunked(

397 self,

398 amt: int | None = None,

399 decode_content: bool | None = None,

400 ) -> typing.Iterator[bytes]:

401 raise NotImplementedError()

402

403 def release_conn(self) -> None:

404 raise NotImplementedError()

405

406 def drain_conn(self) -> None:

407 raise NotImplementedError()

408

409 def close(self) -> None:

410 raise NotImplementedError()

411

412 def _init_decoder(self) -> None:

413 """

414 Set-up the _decoder attribute if necessary.

415 """

416 # Note: content-encoding value should be case-insensitive, per RFC 7230

417 # Section 3.2

418 content_encoding = self.headers.get("content-encoding", "").lower()

419 if self._decoder is None:

420 if content_encoding in self.CONTENT_DECODERS:

421 self._decoder = _get_decoder(content_encoding)

422 elif "," in content_encoding:

423 encodings = [

424 e.strip()

425 for e in content_encoding.split(",")

426 if e.strip() in self.CONTENT_DECODERS

427 ]

428 if encodings:

429 self._decoder = _get_decoder(content_encoding)

430

431 def _decode(

432 self, data: bytes, decode_content: bool | None, flush_decoder: bool

433 ) -> bytes:

434 """

435 Decode the data passed in and potentially flush the decoder.

436 """

437 if not decode_content:

438 if self._has_decoded_content:

439 raise RuntimeError(

440 "Calling read(decode_content=False) is not supported after "

441 "read(decode_content=True) was called."

442 )

443 return data

444

445 try:

446 if self._decoder:

447 data = self._decoder.decompress(data)

448 self._has_decoded_content = True

449 except self.DECODER_ERROR_CLASSES as e:

450 content_encoding = self.headers.get("content-encoding", "").lower()

451 raise DecodeError(

452 "Received response with content-encoding: %s, but "

453 "failed to decode it." % content_encoding,

454 e,

455 ) from e

456 if flush_decoder:

457 data += self._flush_decoder()

458

459 return data

460

461 def _flush_decoder(self) -> bytes:

462 """

463 Flushes the decoder. Should only be called if the decoder is actually

464 being used.

465 """

466 if self._decoder:

467 return self._decoder.decompress(b"") + self._decoder.flush()

468 return b""

469

470 # Compatibility methods for `io` module

471 def readinto(self, b: bytearray) -> int:

472 temp = self.read(len(b))

473 if len(temp) == 0:

474 return 0

475 else:

476 b[: len(temp)] = temp

477 return len(temp)

478

479 # Compatibility methods for http.client.HTTPResponse

480 def getheaders(self) -> HTTPHeaderDict:

481 warnings.warn(

482 "HTTPResponse.getheaders() is deprecated and will be removed "

483 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",

484 category=DeprecationWarning,

485 stacklevel=2,

486 )

487 return self.headers

488

489 def getheader(self, name: str, default: str | None = None) -> str | None:

490 warnings.warn(

491 "HTTPResponse.getheader() is deprecated and will be removed "

492 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",

493 category=DeprecationWarning,

494 stacklevel=2,

495 )

496 return self.headers.get(name, default)

497

498 # Compatibility method for http.cookiejar

499 def info(self) -> HTTPHeaderDict:

500 return self.headers

501

502 def geturl(self) -> str | None:

503 return self.url

504

505

506class HTTPResponse(BaseHTTPResponse):

507 """

508 HTTP Response container.

509

510 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is

511 loaded and decoded on-demand when the ``data`` property is accessed. This

512 class is also compatible with the Python standard library's :mod:`io`

513 module, and can hence be treated as a readable object in the context of that

514 framework.

515

516 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:

517

518 :param preload_content:

519 If True, the response's body will be preloaded during construction.

520

521 :param decode_content:

522 If True, will attempt to decode the body based on the

523 'content-encoding' header.

524

525 :param original_response:

526 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`

527 object, it's convenient to include the original for debug purposes. It's

528 otherwise unused.

529

530 :param retries:

531 The retries contains the last :class:`~urllib3.util.retry.Retry` that

532 was used during the request.

533

534 :param enforce_content_length:

535 Enforce content length checking. Body returned by server must match

536 value of Content-Length header, if present. Otherwise, raise error.

537 """

538

539 def __init__(

540 self,

541 body: _TYPE_BODY = "",

542 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

543 status: int = 0,

544 version: int = 0,

545 reason: str | None = None,

546 preload_content: bool = True,

547 decode_content: bool = True,

548 original_response: _HttplibHTTPResponse | None = None,

549 pool: HTTPConnectionPool | None = None,

550 connection: HTTPConnection | None = None,

551 msg: _HttplibHTTPMessage | None = None,

552 retries: Retry | None = None,

553 enforce_content_length: bool = True,

554 request_method: str | None = None,

555 request_url: str | None = None,

556 auto_close: bool = True,

557 ) -> None:

558 super().__init__(

559 headers=headers,

560 status=status,

561 version=version,

562 reason=reason,

563 decode_content=decode_content,

564 request_url=request_url,

565 retries=retries,

566 )

567

568 self.enforce_content_length = enforce_content_length

569 self.auto_close = auto_close

570

571 self._body = None

572 self._fp: _HttplibHTTPResponse | None = None

573 self._original_response = original_response

574 self._fp_bytes_read = 0

575 self.msg = msg

576

577 if body and isinstance(body, (str, bytes)):

578 self._body = body

579

580 self._pool = pool

581 self._connection = connection

582

583 if hasattr(body, "read"):

584 self._fp = body # type: ignore[assignment]

585

586 # Are we using the chunked-style of transfer encoding?

587 self.chunk_left: int | None = None

588

589 # Determine length of response

590 self.length_remaining = self._init_length(request_method)

591

592 # Used to return the correct amount of bytes for partial read()s

593 self._decoded_buffer = BytesQueueBuffer()

594

595 # If requested, preload the body.

596 if preload_content and not self._body:

597 self._body = self.read(decode_content=decode_content)

598

599 def release_conn(self) -> None:

600 if not self._pool or not self._connection:

601 return None

602

603 self._pool._put_conn(self._connection)

604 self._connection = None

605

606 def drain_conn(self) -> None:

607 """

608 Read and discard any remaining HTTP response data in the response connection.

609

610 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.

611 """

612 try:

613 self.read()

614 except (HTTPError, OSError, BaseSSLError, HTTPException):

615 pass

616

617 @property

618 def data(self) -> bytes:

619 # For backwards-compat with earlier urllib3 0.4 and earlier.

620 if self._body:

621 return self._body # type: ignore[return-value]

622

623 if self._fp:

624 return self.read(cache_content=True)

625

626 return None # type: ignore[return-value]

627

628 @property

629 def connection(self) -> HTTPConnection | None:

630 return self._connection

631

632 def isclosed(self) -> bool:

633 return is_fp_closed(self._fp)

634

635 def tell(self) -> int:

636 """

637 Obtain the number of bytes pulled over the wire so far. May differ from

638 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``

639 if bytes are encoded on the wire (e.g, compressed).

640 """

641 return self._fp_bytes_read

642

643 def _init_length(self, request_method: str | None) -> int | None:

644 """

645 Set initial length value for Response content if available.

646 """

647 length: int | None

648 content_length: str | None = self.headers.get("content-length")

649

650 if content_length is not None:

651 if self.chunked:

652 # This Response will fail with an IncompleteRead if it can't be

653 # received as chunked. This method falls back to attempt reading

654 # the response before raising an exception.

655 log.warning(

656 "Received response with both Content-Length and "

657 "Transfer-Encoding set. This is expressly forbidden "

658 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "

659 "attempting to process response as Transfer-Encoding: "

660 "chunked."

661 )

662 return None

663

664 try:

665 # RFC 7230 section 3.3.2 specifies multiple content lengths can

666 # be sent in a single Content-Length header

667 # (e.g. Content-Length: 42, 42). This line ensures the values

668 # are all valid ints and that as long as the `set` length is 1,

669 # all values are the same. Otherwise, the header is invalid.

670 lengths = {int(val) for val in content_length.split(",")}

671 if len(lengths) > 1:

672 raise InvalidHeader(

673 "Content-Length contained multiple "

674 "unmatching values (%s)" % content_length

675 )

676 length = lengths.pop()

677 except ValueError:

678 length = None

679 else:

680 if length < 0:

681 length = None

682

683 else: # if content_length is None

684 length = None

685

686 # Convert status to int for comparison

687 # In some cases, httplib returns a status of "_UNKNOWN"

688 try:

689 status = int(self.status)

690 except ValueError:

691 status = 0

692

693 # Check for responses that shouldn't include a body

694 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":

695 length = 0

696

697 return length

698

699 @contextmanager

700 def _error_catcher(self) -> typing.Generator[None, None, None]:

701 """

702 Catch low-level python exceptions, instead re-raising urllib3

703 variants, so that low-level exceptions are not leaked in the

704 high-level api.

705

706 On exit, release the connection back to the pool.

707 """

708 clean_exit = False

709

710 try:

711 try:

712 yield

713

714 except SocketTimeout as e:

715 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but

716 # there is yet no clean way to get at it from this context.

717 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

718

719 except BaseSSLError as e:

720 # FIXME: Is there a better way to differentiate between SSLErrors?

721 if "read operation timed out" not in str(e):

722 # SSL errors related to framing/MAC get wrapped and reraised here

723 raise SSLError(e) from e

724

725 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

726

727 except (HTTPException, OSError) as e:

728 # This includes IncompleteRead.

729 raise ProtocolError(f"Connection broken: {e!r}", e) from e

730

731 # If no exception is thrown, we should avoid cleaning up

732 # unnecessarily.

733 clean_exit = True

734 finally:

735 # If we didn't terminate cleanly, we need to throw away our

736 # connection.

737 if not clean_exit:

738 # The response may not be closed but we're not going to use it

739 # anymore so close it now to ensure that the connection is

740 # released back to the pool.

741 if self._original_response:

742 self._original_response.close()

743

744 # Closing the response may not actually be sufficient to close

745 # everything, so if we have a hold of the connection close that

746 # too.

747 if self._connection:

748 self._connection.close()

749

750 # If we hold the original response but it's closed now, we should

751 # return the connection back to the pool.

752 if self._original_response and self._original_response.isclosed():

753 self.release_conn()

754

755 def _fp_read(self, amt: int | None = None) -> bytes:

756 """

757 Read a response with the thought that reading the number of bytes

758 larger than can fit in a 32-bit int at a time via SSL in some

759 known cases leads to an overflow error that has to be prevented

760 if `amt` or `self.length_remaining` indicate that a problem may

761 happen.

762

763 The known cases:

764 * 3.8 <= CPython < 3.9.7 because of a bug

765 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.

766 * urllib3 injected with pyOpenSSL-backed SSL-support.

767 * CPython < 3.10 only when `amt` does not fit 32-bit int.

768 """

769 assert self._fp

770 c_int_max = 2**31 - 1

771 if (

772 (amt and amt > c_int_max)

773 or (self.length_remaining and self.length_remaining > c_int_max)

774 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):

775 buffer = io.BytesIO()

776 # Besides `max_chunk_amt` being a maximum chunk size, it

777 # affects memory overhead of reading a response by this

778 # method in CPython.

779 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum

780 # chunk size that does not lead to an overflow error, but

781 # 256 MiB is a compromise.

782 max_chunk_amt = 2**28

783 while amt is None or amt != 0:

784 if amt is not None:

785 chunk_amt = min(amt, max_chunk_amt)

786 amt -= chunk_amt

787 else:

788 chunk_amt = max_chunk_amt

789 data = self._fp.read(chunk_amt)

790 if not data:

791 break

792 buffer.write(data)

793 del data # to reduce peak memory usage by `max_chunk_amt`.

794 return buffer.getvalue()

795 else:

796 # StringIO doesn't like amt=None

797 return self._fp.read(amt) if amt is not None else self._fp.read()

798

799 def _raw_read(

800 self,

801 amt: int | None = None,

802 ) -> bytes:

803 """

804 Reads `amt` of bytes from the socket.

805 """

806 if self._fp is None:

807 return None # type: ignore[return-value]

808

809 fp_closed = getattr(self._fp, "closed", False)

810

811 with self._error_catcher():

812 data = self._fp_read(amt) if not fp_closed else b""

813 if amt is not None and amt != 0 and not data:

814 # Platform-specific: Buggy versions of Python.

815 # Close the connection when no data is returned

816 #

817 # This is redundant to what httplib/http.client _should_

818 # already do. However, versions of python released before

819 # December 15, 2012 (http://bugs.python.org/issue16298) do

820 # not properly close the connection in all cases. There is

821 # no harm in redundantly calling close.

822 self._fp.close()

823 if (

824 self.enforce_content_length

825 and self.length_remaining is not None

826 and self.length_remaining != 0

827 ):

828 # This is an edge case that httplib failed to cover due

829 # to concerns of backward compatibility. We're

830 # addressing it here to make sure IncompleteRead is

831 # raised during streaming, so all calls with incorrect

832 # Content-Length are caught.

833 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)

834

835 if data:

836 self._fp_bytes_read += len(data)

837 if self.length_remaining is not None:

838 self.length_remaining -= len(data)

839 return data

840

841 def read(

842 self,

843 amt: int | None = None,

844 decode_content: bool | None = None,

845 cache_content: bool = False,

846 ) -> bytes:

847 """

848 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional

849 parameters: ``decode_content`` and ``cache_content``.

850

851 :param amt:

852 How much of the content to read. If specified, caching is skipped

853 because it doesn't make sense to cache partial content as the full

854 response.

855

856 :param decode_content:

857 If True, will attempt to decode the body based on the

858 'content-encoding' header.

859

860 :param cache_content:

861 If True, will save the returned data such that the same result is

862 returned despite of the state of the underlying file object. This

863 is useful if you want the ``.data`` property to continue working

864 after having ``.read()`` the file object. (Overridden if ``amt`` is

865 set.)

866 """

867 self._init_decoder()

868 if decode_content is None:

869 decode_content = self.decode_content

870

871 if amt is not None:

872 cache_content = False

873

874 if len(self._decoded_buffer) >= amt:

875 return self._decoded_buffer.get(amt)

876

877 data = self._raw_read(amt)

878

879 flush_decoder = amt is None or (amt != 0 and not data)

880

881 if not data and len(self._decoded_buffer) == 0:

882 return data

883

884 if amt is None:

885 data = self._decode(data, decode_content, flush_decoder)

886 if cache_content:

887 self._body = data

888 else:

889 # do not waste memory on buffer when not decoding

890 if not decode_content:

891 if self._has_decoded_content:

892 raise RuntimeError(

893 "Calling read(decode_content=False) is not supported after "

894 "read(decode_content=True) was called."

895 )

896 return data

897

898 decoded_data = self._decode(data, decode_content, flush_decoder)

899 self._decoded_buffer.put(decoded_data)

900

901 while len(self._decoded_buffer) < amt and data:

902 # TODO make sure to initially read enough data to get past the headers

903 # For example, the GZ file header takes 10 bytes, we don't want to read

904 # it one byte at a time

905 data = self._raw_read(amt)

906 decoded_data = self._decode(data, decode_content, flush_decoder)

907 self._decoded_buffer.put(decoded_data)

908 data = self._decoded_buffer.get(amt)

909

910 return data

911

912 def stream(

913 self, amt: int | None = 2**16, decode_content: bool | None = None

914 ) -> typing.Generator[bytes, None, None]:

915 """

916 A generator wrapper for the read() method. A call will block until

917 ``amt`` bytes have been read from the connection or until the

918 connection is closed.

919

920 :param amt:

921 How much of the content to read. The generator will return up to

922 much data per iteration, but may return less. This is particularly

923 likely when using compressed data. However, the empty string will

924 never be returned.

925

926 :param decode_content:

927 If True, will attempt to decode the body based on the

928 'content-encoding' header.

929 """

930 if self.chunked and self.supports_chunked_reads():

931 yield from self.read_chunked(amt, decode_content=decode_content)

932 else:

933 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:

934 data = self.read(amt=amt, decode_content=decode_content)

935

936 if data:

937 yield data

938

939 # Overrides from io.IOBase

940 def readable(self) -> bool:

941 return True

942

943 def close(self) -> None:

944 if not self.closed and self._fp:

945 self._fp.close()

946

947 if self._connection:

948 self._connection.close()

949

950 if not self.auto_close:

951 io.IOBase.close(self)

952

953 @property

954 def closed(self) -> bool:

955 if not self.auto_close:

956 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]

957 elif self._fp is None:

958 return True

959 elif hasattr(self._fp, "isclosed"):

960 return self._fp.isclosed()

961 elif hasattr(self._fp, "closed"):

962 return self._fp.closed

963 else:

964 return True

965

966 def fileno(self) -> int:

967 if self._fp is None:

968 raise OSError("HTTPResponse has no file to get a fileno from")

969 elif hasattr(self._fp, "fileno"):

970 return self._fp.fileno()

971 else:

972 raise OSError(

973 "The file-like object this HTTPResponse is wrapped "

974 "around has no file descriptor"

975 )

976

977 def flush(self) -> None:

978 if (

979 self._fp is not None

980 and hasattr(self._fp, "flush")

981 and not getattr(self._fp, "closed", False)

982 ):

983 return self._fp.flush()

984

985 def supports_chunked_reads(self) -> bool:

986 """

987 Checks if the underlying file-like object looks like a

988 :class:`http.client.HTTPResponse` object. We do this by testing for

989 the fp attribute. If it is present we assume it returns raw chunks as

990 processed by read_chunked().

991 """

992 return hasattr(self._fp, "fp")

993

994 def _update_chunk_length(self) -> None:

995 # First, we'll figure out length of a chunk and then

996 # we'll try to read it from socket.

997 if self.chunk_left is not None:

998 return None

999 line = self._fp.fp.readline() # type: ignore[union-attr]

1000 line = line.split(b";", 1)[0]

1001 try:

1002 self.chunk_left = int(line, 16)

1003 except ValueError:

1004 # Invalid chunked protocol response, abort.

1005 self.close()

1006 raise InvalidChunkLength(self, line) from None

1007

1008 def _handle_chunk(self, amt: int | None) -> bytes:

1009 returned_chunk = None

1010 if amt is None:

1011 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1012 returned_chunk = chunk

1013 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1014 self.chunk_left = None

1015 elif self.chunk_left is not None and amt < self.chunk_left:

1016 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1017 self.chunk_left = self.chunk_left - amt

1018 returned_chunk = value

1019 elif amt == self.chunk_left:

1020 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1021 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1022 self.chunk_left = None

1023 returned_chunk = value

1024 else: # amt > self.chunk_left

1025 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1026 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1027 self.chunk_left = None

1028 return returned_chunk # type: ignore[no-any-return]

1029

1030 def read_chunked(

1031 self, amt: int | None = None, decode_content: bool | None = None

1032 ) -> typing.Generator[bytes, None, None]:

1033 """

1034 Similar to :meth:`HTTPResponse.read`, but with an additional

1035 parameter: ``decode_content``.

1036

1037 :param amt:

1038 How much of the content to read. If specified, caching is skipped

1039 because it doesn't make sense to cache partial content as the full

1040 response.

1041

1042 :param decode_content:

1043 If True, will attempt to decode the body based on the

1044 'content-encoding' header.

1045 """

1046 self._init_decoder()

1047 # FIXME: Rewrite this method and make it a class with a better structured logic.

1048 if not self.chunked:

1049 raise ResponseNotChunked(

1050 "Response is not chunked. "

1051 "Header 'transfer-encoding: chunked' is missing."

1052 )

1053 if not self.supports_chunked_reads():

1054 raise BodyNotHttplibCompatible(

1055 "Body should be http.client.HTTPResponse like. "

1056 "It should have have an fp attribute which returns raw chunks."

1057 )

1058

1059 with self._error_catcher():

1060 # Don't bother reading the body of a HEAD request.

1061 if self._original_response and is_response_to_head(self._original_response):

1062 self._original_response.close()

1063 return None

1064

1065 # If a response is already read and closed

1066 # then return immediately.

1067 if self._fp.fp is None: # type: ignore[union-attr]

1068 return None

1069

1070 while True:

1071 self._update_chunk_length()

1072 if self.chunk_left == 0:

1073 break

1074 chunk = self._handle_chunk(amt)

1075 decoded = self._decode(

1076 chunk, decode_content=decode_content, flush_decoder=False

1077 )

1078 if decoded:

1079 yield decoded

1080

1081 if decode_content:

1082 # On CPython and PyPy, we should never need to flush the

1083 # decoder. However, on Jython we *might* need to, so

1084 # lets defensively do it anyway.

1085 decoded = self._flush_decoder()

1086 if decoded: # Platform-specific: Jython.

1087 yield decoded

1088

1089 # Chunk content ends with \r\n: discard it.

1090 while self._fp is not None:

1091 line = self._fp.fp.readline()

1092 if not line:

1093 # Some sites may not end with '\r\n'.

1094 break

1095 if line == b"\r\n":

1096 break

1097

1098 # We read everything; close the "file".

1099 if self._original_response:

1100 self._original_response.close()

1101

1102 @property

1103 def url(self) -> str | None:

1104 """

1105 Returns the URL that was the source of this response.

1106 If the request that generated this response redirected, this method

1107 will return the final redirect location.

1108 """

1109 return self._request_url

1110

1111 @url.setter

1112 def url(self, url: str) -> None:

1113 self._request_url = url

1114

1115 def __iter__(self) -> typing.Iterator[bytes]:

1116 buffer: list[bytes] = []

1117 for chunk in self.stream(decode_content=True):

1118 if b"\n" in chunk:

1119 chunks = chunk.split(b"\n")

1120 yield b"".join(buffer) + chunks[0] + b"\n"

1121 for x in chunks[1:-1]:

1122 yield x + b"\n"

1123 if chunks[-1]:

1124 buffer = [chunks[-1]]

1125 else:

1126 buffer = []

1127 else:

1128 buffer.append(chunk)

1129 if buffer:

1130 yield b"".join(buffer)