Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/urllib3/response.py: 21%

1from __future__ import annotations

3import collections

4import io

5import json as _json

6import logging

7import re

8import socket

9import sys

10import typing

11import warnings

12import zlib

13from contextlib import contextmanager

14from http.client import HTTPMessage as _HttplibHTTPMessage

15from http.client import HTTPResponse as _HttplibHTTPResponse

16from socket import timeout as SocketTimeout

18if typing.TYPE_CHECKING:

19 from ._base_connection import BaseHTTPConnection

21try:

22 try:

23 import brotlicffi as brotli # type: ignore[import-not-found]

24 except ImportError:

25 import brotli # type: ignore[import-not-found]

26except ImportError:

27 brotli = None

29from . import util

30from ._base_connection import _TYPE_BODY

31from ._collections import HTTPHeaderDict

32from .connection import BaseSSLError, HTTPConnection, HTTPException

33from .exceptions import (

34 BodyNotHttplibCompatible,

35 DecodeError,

36 HTTPError,

37 IncompleteRead,

38 InvalidChunkLength,

39 InvalidHeader,

40 ProtocolError,

41 ReadTimeoutError,

42 ResponseNotChunked,

43 SSLError,

44)

45from .util.response import is_fp_closed, is_response_to_head

46from .util.retry import Retry

48if typing.TYPE_CHECKING:

49 from .connectionpool import HTTPConnectionPool

51log = logging.getLogger(__name__)

54class ContentDecoder:

55 def decompress(self, data: bytes) -> bytes:

56 raise NotImplementedError()

58 def flush(self) -> bytes:

59 raise NotImplementedError()

62class DeflateDecoder(ContentDecoder):

63 def __init__(self) -> None:

64 self._first_try = True

65 self._data = b""

66 self._obj = zlib.decompressobj()

68 def decompress(self, data: bytes) -> bytes:

69 if not data:

70 return data

72 if not self._first_try:

73 return self._obj.decompress(data)

75 self._data += data

76 try:

77 decompressed = self._obj.decompress(data)

78 if decompressed:

79 self._first_try = False

80 self._data = None # type: ignore[assignment]

81 return decompressed

82 except zlib.error:

83 self._first_try = False

84 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)

85 try:

86 return self.decompress(self._data)

87 finally:

88 self._data = None # type: ignore[assignment]

90 def flush(self) -> bytes:

91 return self._obj.flush()

94class GzipDecoderState:

95 FIRST_MEMBER = 0

96 OTHER_MEMBERS = 1

97 SWALLOW_DATA = 2

100class GzipDecoder(ContentDecoder):

101 def __init__(self) -> None:

102 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

103 self._state = GzipDecoderState.FIRST_MEMBER

104

105 def decompress(self, data: bytes) -> bytes:

106 ret = bytearray()

107 if self._state == GzipDecoderState.SWALLOW_DATA or not data:

108 return bytes(ret)

109 while True:

110 try:

111 ret += self._obj.decompress(data)

112 except zlib.error:

113 previous_state = self._state

114 # Ignore data after the first error

115 self._state = GzipDecoderState.SWALLOW_DATA

116 if previous_state == GzipDecoderState.OTHER_MEMBERS:

117 # Allow trailing garbage acceptable in other gzip clients

118 return bytes(ret)

119 raise

120 data = self._obj.unused_data

121 if not data:

122 return bytes(ret)

123 self._state = GzipDecoderState.OTHER_MEMBERS

124 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

125

126 def flush(self) -> bytes:

127 return self._obj.flush()

128

129

130if brotli is not None:

131

132 class BrotliDecoder(ContentDecoder):

133 # Supports both 'brotlipy' and 'Brotli' packages

134 # since they share an import name. The top branches

135 # are for 'brotlipy' and bottom branches for 'Brotli'

136 def __init__(self) -> None:

137 self._obj = brotli.Decompressor()

138 if hasattr(self._obj, "decompress"):

139 setattr(self, "decompress", self._obj.decompress)

140 else:

141 setattr(self, "decompress", self._obj.process)

142

143 def flush(self) -> bytes:

144 if hasattr(self._obj, "flush"):

145 return self._obj.flush() # type: ignore[no-any-return]

146 return b""

147

148

149try:

150 # Python 3.14+

151 from compression import zstd # type: ignore[import-not-found] # noqa: F401

152

153 HAS_ZSTD = True

154

155 class ZstdDecoder(ContentDecoder):

156 def __init__(self) -> None:

157 self._obj = zstd.ZstdDecompressor()

158

159 def decompress(self, data: bytes) -> bytes:

160 if not data:

161 return b""

162 data_parts = [self._obj.decompress(data)]

163 while self._obj.eof and self._obj.unused_data:

164 unused_data = self._obj.unused_data

165 self._obj = zstd.ZstdDecompressor()

166 data_parts.append(self._obj.decompress(unused_data))

167 return b"".join(data_parts)

168

169 def flush(self) -> bytes:

170 if not self._obj.eof:

171 raise DecodeError("Zstandard data is incomplete")

172 return b""

173

174except ImportError:

175 try:

176 # Python 3.13 and earlier require the 'zstandard' module.

177 import zstandard as zstd

178

179 # The package 'zstandard' added the 'eof' property starting

180 # in v0.18.0 which we require to ensure a complete and

181 # valid zstd stream was fed into the ZstdDecoder.

182 # See: https://github.com/urllib3/urllib3/pull/2624

183 _zstd_version = tuple(

184 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]

185 )

186 if _zstd_version < (0, 18): # Defensive:

187 raise ImportError("zstandard module doesn't have eof")

188 except (AttributeError, ImportError, ValueError): # Defensive:

189 HAS_ZSTD = False

190 else:

191 HAS_ZSTD = True

192

193 class ZstdDecoder(ContentDecoder): # type: ignore[no-redef]

194 def __init__(self) -> None:

195 self._obj = zstd.ZstdDecompressor().decompressobj()

196

197 def decompress(self, data: bytes) -> bytes:

198 if not data:

199 return b""

200 data_parts = [self._obj.decompress(data)]

201 while self._obj.eof and self._obj.unused_data:

202 unused_data = self._obj.unused_data

203 self._obj = zstd.ZstdDecompressor().decompressobj()

204 data_parts.append(self._obj.decompress(unused_data))

205 return b"".join(data_parts)

206

207 def flush(self) -> bytes:

208 ret = self._obj.flush() # note: this is a no-op

209 if not self._obj.eof:

210 raise DecodeError("Zstandard data is incomplete")

211 return ret # type: ignore[no-any-return]

212

213

214class MultiDecoder(ContentDecoder):

215 """

216 From RFC7231:

217 If one or more encodings have been applied to a representation, the

218 sender that applied the encodings MUST generate a Content-Encoding

219 header field that lists the content codings in the order in which

220 they were applied.

221 """

222

223 def __init__(self, modes: str) -> None:

224 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]

225

226 def flush(self) -> bytes:

227 return self._decoders[0].flush()

228

229 def decompress(self, data: bytes) -> bytes:

230 for d in reversed(self._decoders):

231 data = d.decompress(data)

232 return data

233

234

235def _get_decoder(mode: str) -> ContentDecoder:

236 if "," in mode:

237 return MultiDecoder(mode)

238

239 # According to RFC 9110 section 8.4.1.3, recipients should

240 # consider x-gzip equivalent to gzip

241 if mode in ("gzip", "x-gzip"):

242 return GzipDecoder()

243

244 if brotli is not None and mode == "br":

245 return BrotliDecoder()

246

247 if HAS_ZSTD and mode == "zstd":

248 return ZstdDecoder()

249

250 return DeflateDecoder()

251

252

253class BytesQueueBuffer:

254 """Memory-efficient bytes buffer

255

256 To return decoded data in read() and still follow the BufferedIOBase API, we need a

257 buffer to always return the correct amount of bytes.

258

259 This buffer should be filled using calls to put()

260

261 Our maximum memory usage is determined by the sum of the size of:

262

263 * self.buffer, which contains the full data

264 * the largest chunk that we will copy in get()

265

266 The worst case scenario is a single chunk, in which case we'll make a full copy of

267 the data inside get().

268 """

269

270 def __init__(self) -> None:

271 self.buffer: typing.Deque[bytes] = collections.deque()

272 self._size: int = 0

273

274 def __len__(self) -> int:

275 return self._size

276

277 def put(self, data: bytes) -> None:

278 self.buffer.append(data)

279 self._size += len(data)

280

281 def get(self, n: int) -> bytes:

282 if n == 0:

283 return b""

284 elif not self.buffer:

285 raise RuntimeError("buffer is empty")

286 elif n < 0:

287 raise ValueError("n should be > 0")

288

289 fetched = 0

290 ret = io.BytesIO()

291 while fetched < n:

292 remaining = n - fetched

293 chunk = self.buffer.popleft()

294 chunk_length = len(chunk)

295 if remaining < chunk_length:

296 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]

297 ret.write(left_chunk)

298 self.buffer.appendleft(right_chunk)

299 self._size -= remaining

300 break

301 else:

302 ret.write(chunk)

303 self._size -= chunk_length

304 fetched += chunk_length

305

306 if not self.buffer:

307 break

308

309 return ret.getvalue()

310

311 def get_all(self) -> bytes:

312 buffer = self.buffer

313 if not buffer:

314 assert self._size == 0

315 return b""

316 if len(buffer) == 1:

317 result = buffer.pop()

318 else:

319 ret = io.BytesIO()

320 ret.writelines(buffer.popleft() for _ in range(len(buffer)))

321 result = ret.getvalue()

322 self._size = 0

323 return result

324

325

326class BaseHTTPResponse(io.IOBase):

327 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]

328 if brotli is not None:

329 CONTENT_DECODERS += ["br"]

330 if HAS_ZSTD:

331 CONTENT_DECODERS += ["zstd"]

332 REDIRECT_STATUSES = [301, 302, 303, 307, 308]

333

334 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)

335 if brotli is not None:

336 DECODER_ERROR_CLASSES += (brotli.error,)

337

338 if HAS_ZSTD:

339 DECODER_ERROR_CLASSES += (zstd.ZstdError,)

340

341 def __init__(

342 self,

343 *,

344 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

345 status: int,

346 version: int,

347 version_string: str,

348 reason: str | None,

349 decode_content: bool,

350 request_url: str | None,

351 retries: Retry | None = None,

352 ) -> None:

353 if isinstance(headers, HTTPHeaderDict):

354 self.headers = headers

355 else:

356 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]

357 self.status = status

358 self.version = version

359 self.version_string = version_string

360 self.reason = reason

361 self.decode_content = decode_content

362 self._has_decoded_content = False

363 self._request_url: str | None = request_url

364 self.retries = retries

365

366 self.chunked = False

367 tr_enc = self.headers.get("transfer-encoding", "").lower()

368 # Don't incur the penalty of creating a list and then discarding it

369 encodings = (enc.strip() for enc in tr_enc.split(","))

370 if "chunked" in encodings:

371 self.chunked = True

372

373 self._decoder: ContentDecoder | None = None

374 self.length_remaining: int | None

375

376 def get_redirect_location(self) -> str | None | typing.Literal[False]:

377 """

378 Should we redirect and where to?

379

380 :returns: Truthy redirect location string if we got a redirect status

381 code and valid location. ``None`` if redirect status and no

382 location. ``False`` if not a redirect status code.

383 """

384 if self.status in self.REDIRECT_STATUSES:

385 return self.headers.get("location")

386 return False

387

388 @property

389 def data(self) -> bytes:

390 raise NotImplementedError()

391

392 def json(self) -> typing.Any:

393 """

394 Deserializes the body of the HTTP response as a Python object.

395

396 The body of the HTTP response must be encoded using UTF-8, as per

397 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.

398

399 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to

400 your custom decoder instead.

401

402 If the body of the HTTP response is not decodable to UTF-8, a

403 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a

404 valid JSON document, a `json.JSONDecodeError` will be raised.

405

406 Read more :ref:`here <json_content>`.

407

408 :returns: The body of the HTTP response as a Python object.

409 """

410 data = self.data.decode("utf-8")

411 return _json.loads(data)

412

413 @property

414 def url(self) -> str | None:

415 raise NotImplementedError()

416

417 @url.setter

418 def url(self, url: str | None) -> None:

419 raise NotImplementedError()

420

421 @property

422 def connection(self) -> BaseHTTPConnection | None:

423 raise NotImplementedError()

424

425 @property

426 def retries(self) -> Retry | None:

427 return self._retries

428

429 @retries.setter

430 def retries(self, retries: Retry | None) -> None:

431 # Override the request_url if retries has a redirect location.

432 if retries is not None and retries.history:

433 self.url = retries.history[-1].redirect_location

434 self._retries = retries

435

436 def stream(

437 self, amt: int | None = 2**16, decode_content: bool | None = None

438 ) -> typing.Iterator[bytes]:

439 raise NotImplementedError()

440

441 def read(

442 self,

443 amt: int | None = None,

444 decode_content: bool | None = None,

445 cache_content: bool = False,

446 ) -> bytes:

447 raise NotImplementedError()

448

449 def read1(

450 self,

451 amt: int | None = None,

452 decode_content: bool | None = None,

453 ) -> bytes:

454 raise NotImplementedError()

455

456 def read_chunked(

457 self,

458 amt: int | None = None,

459 decode_content: bool | None = None,

460 ) -> typing.Iterator[bytes]:

461 raise NotImplementedError()

462

463 def release_conn(self) -> None:

464 raise NotImplementedError()

465

466 def drain_conn(self) -> None:

467 raise NotImplementedError()

468

469 def shutdown(self) -> None:

470 raise NotImplementedError()

471

472 def close(self) -> None:

473 raise NotImplementedError()

474

475 def _init_decoder(self) -> None:

476 """

477 Set-up the _decoder attribute if necessary.

478 """

479 # Note: content-encoding value should be case-insensitive, per RFC 7230

480 # Section 3.2

481 content_encoding = self.headers.get("content-encoding", "").lower()

482 if self._decoder is None:

483 if content_encoding in self.CONTENT_DECODERS:

484 self._decoder = _get_decoder(content_encoding)

485 elif "," in content_encoding:

486 encodings = [

487 e.strip()

488 for e in content_encoding.split(",")

489 if e.strip() in self.CONTENT_DECODERS

490 ]

491 if encodings:

492 self._decoder = _get_decoder(content_encoding)

493

494 def _decode(

495 self, data: bytes, decode_content: bool | None, flush_decoder: bool

496 ) -> bytes:

497 """

498 Decode the data passed in and potentially flush the decoder.

499 """

500 if not decode_content:

501 if self._has_decoded_content:

502 raise RuntimeError(

503 "Calling read(decode_content=False) is not supported after "

504 "read(decode_content=True) was called."

505 )

506 return data

507

508 try:

509 if self._decoder:

510 data = self._decoder.decompress(data)

511 self._has_decoded_content = True

512 except self.DECODER_ERROR_CLASSES as e:

513 content_encoding = self.headers.get("content-encoding", "").lower()

514 raise DecodeError(

515 "Received response with content-encoding: %s, but "

516 "failed to decode it." % content_encoding,

517 e,

518 ) from e

519 if flush_decoder:

520 data += self._flush_decoder()

521

522 return data

523

524 def _flush_decoder(self) -> bytes:

525 """

526 Flushes the decoder. Should only be called if the decoder is actually

527 being used.

528 """

529 if self._decoder:

530 return self._decoder.decompress(b"") + self._decoder.flush()

531 return b""

532

533 # Compatibility methods for `io` module

534 def readinto(self, b: bytearray) -> int:

535 temp = self.read(len(b))

536 if len(temp) == 0:

537 return 0

538 else:

539 b[: len(temp)] = temp

540 return len(temp)

541

542 # Compatibility methods for http.client.HTTPResponse

543 def getheaders(self) -> HTTPHeaderDict:

544 warnings.warn(

545 "HTTPResponse.getheaders() is deprecated and will be removed "

546 "in urllib3 v2.6.0. Instead access HTTPResponse.headers directly.",

547 category=DeprecationWarning,

548 stacklevel=2,

549 )

550 return self.headers

551

552 def getheader(self, name: str, default: str | None = None) -> str | None:

553 warnings.warn(

554 "HTTPResponse.getheader() is deprecated and will be removed "

555 "in urllib3 v2.6.0. Instead use HTTPResponse.headers.get(name, default).",

556 category=DeprecationWarning,

557 stacklevel=2,

558 )

559 return self.headers.get(name, default)

560

561 # Compatibility method for http.cookiejar

562 def info(self) -> HTTPHeaderDict:

563 return self.headers

564

565 def geturl(self) -> str | None:

566 return self.url

567

568

569class HTTPResponse(BaseHTTPResponse):

570 """

571 HTTP Response container.

572

573 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is

574 loaded and decoded on-demand when the ``data`` property is accessed. This

575 class is also compatible with the Python standard library's :mod:`io`

576 module, and can hence be treated as a readable object in the context of that

577 framework.

578

579 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:

580

581 :param preload_content:

582 If True, the response's body will be preloaded during construction.

583

584 :param decode_content:

585 If True, will attempt to decode the body based on the

586 'content-encoding' header.

587

588 :param original_response:

589 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`

590 object, it's convenient to include the original for debug purposes. It's

591 otherwise unused.

592

593 :param retries:

594 The retries contains the last :class:`~urllib3.util.retry.Retry` that

595 was used during the request.

596

597 :param enforce_content_length:

598 Enforce content length checking. Body returned by server must match

599 value of Content-Length header, if present. Otherwise, raise error.

600 """

601

602 def __init__(

603 self,

604 body: _TYPE_BODY = "",

605 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

606 status: int = 0,

607 version: int = 0,

608 version_string: str = "HTTP/?",

609 reason: str | None = None,

610 preload_content: bool = True,

611 decode_content: bool = True,

612 original_response: _HttplibHTTPResponse | None = None,

613 pool: HTTPConnectionPool | None = None,

614 connection: HTTPConnection | None = None,

615 msg: _HttplibHTTPMessage | None = None,

616 retries: Retry | None = None,

617 enforce_content_length: bool = True,

618 request_method: str | None = None,

619 request_url: str | None = None,

620 auto_close: bool = True,

621 sock_shutdown: typing.Callable[[int], None] | None = None,

622 ) -> None:

623 super().__init__(

624 headers=headers,

625 status=status,

626 version=version,

627 version_string=version_string,

628 reason=reason,

629 decode_content=decode_content,

630 request_url=request_url,

631 retries=retries,

632 )

633

634 self.enforce_content_length = enforce_content_length

635 self.auto_close = auto_close

636

637 self._body = None

638 self._fp: _HttplibHTTPResponse | None = None

639 self._original_response = original_response

640 self._fp_bytes_read = 0

641 self.msg = msg

642

643 if body and isinstance(body, (str, bytes)):

644 self._body = body

645

646 self._pool = pool

647 self._connection = connection

648

649 if hasattr(body, "read"):

650 self._fp = body # type: ignore[assignment]

651 self._sock_shutdown = sock_shutdown

652

653 # Are we using the chunked-style of transfer encoding?

654 self.chunk_left: int | None = None

655

656 # Determine length of response

657 self.length_remaining = self._init_length(request_method)

658

659 # Used to return the correct amount of bytes for partial read()s

660 self._decoded_buffer = BytesQueueBuffer()

661

662 # If requested, preload the body.

663 if preload_content and not self._body:

664 self._body = self.read(decode_content=decode_content)

665

666 def release_conn(self) -> None:

667 if not self._pool or not self._connection:

668 return None

669

670 self._pool._put_conn(self._connection)

671 self._connection = None

672

673 def drain_conn(self) -> None:

674 """

675 Read and discard any remaining HTTP response data in the response connection.

676

677 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.

678 """

679 try:

680 self.read()

681 except (HTTPError, OSError, BaseSSLError, HTTPException):

682 pass

683

684 @property

685 def data(self) -> bytes:

686 # For backwards-compat with earlier urllib3 0.4 and earlier.

687 if self._body:

688 return self._body # type: ignore[return-value]

689

690 if self._fp:

691 return self.read(cache_content=True)

692

693 return None # type: ignore[return-value]

694

695 @property

696 def connection(self) -> HTTPConnection | None:

697 return self._connection

698

699 def isclosed(self) -> bool:

700 return is_fp_closed(self._fp)

701

702 def tell(self) -> int:

703 """

704 Obtain the number of bytes pulled over the wire so far. May differ from

705 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``

706 if bytes are encoded on the wire (e.g, compressed).

707 """

708 return self._fp_bytes_read

709

710 def _init_length(self, request_method: str | None) -> int | None:

711 """

712 Set initial length value for Response content if available.

713 """

714 length: int | None

715 content_length: str | None = self.headers.get("content-length")

716

717 if content_length is not None:

718 if self.chunked:

719 # This Response will fail with an IncompleteRead if it can't be

720 # received as chunked. This method falls back to attempt reading

721 # the response before raising an exception.

722 log.warning(

723 "Received response with both Content-Length and "

724 "Transfer-Encoding set. This is expressly forbidden "

725 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "

726 "attempting to process response as Transfer-Encoding: "

727 "chunked."

728 )

729 return None

730

731 try:

732 # RFC 7230 section 3.3.2 specifies multiple content lengths can

733 # be sent in a single Content-Length header

734 # (e.g. Content-Length: 42, 42). This line ensures the values

735 # are all valid ints and that as long as the `set` length is 1,

736 # all values are the same. Otherwise, the header is invalid.

737 lengths = {int(val) for val in content_length.split(",")}

738 if len(lengths) > 1:

739 raise InvalidHeader(

740 "Content-Length contained multiple "

741 "unmatching values (%s)" % content_length

742 )

743 length = lengths.pop()

744 except ValueError:

745 length = None

746 else:

747 if length < 0:

748 length = None

749

750 else: # if content_length is None

751 length = None

752

753 # Convert status to int for comparison

754 # In some cases, httplib returns a status of "_UNKNOWN"

755 try:

756 status = int(self.status)

757 except ValueError:

758 status = 0

759

760 # Check for responses that shouldn't include a body

761 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":

762 length = 0

763

764 return length

765

766 @contextmanager

767 def _error_catcher(self) -> typing.Generator[None]:

768 """

769 Catch low-level python exceptions, instead re-raising urllib3

770 variants, so that low-level exceptions are not leaked in the

771 high-level api.

772

773 On exit, release the connection back to the pool.

774 """

775 clean_exit = False

776

777 try:

778 try:

779 yield

780

781 except SocketTimeout as e:

782 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but

783 # there is yet no clean way to get at it from this context.

784 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

785

786 except BaseSSLError as e:

787 # FIXME: Is there a better way to differentiate between SSLErrors?

788 if "read operation timed out" not in str(e):

789 # SSL errors related to framing/MAC get wrapped and reraised here

790 raise SSLError(e) from e

791

792 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

793

794 except IncompleteRead as e:

795 if (

796 e.expected is not None

797 and e.partial is not None

798 and e.expected == -e.partial

799 ):

800 arg = "Response may not contain content."

801 else:

802 arg = f"Connection broken: {e!r}"

803 raise ProtocolError(arg, e) from e

804

805 except (HTTPException, OSError) as e:

806 raise ProtocolError(f"Connection broken: {e!r}", e) from e

807

808 # If no exception is thrown, we should avoid cleaning up

809 # unnecessarily.

810 clean_exit = True

811 finally:

812 # If we didn't terminate cleanly, we need to throw away our

813 # connection.

814 if not clean_exit:

815 # The response may not be closed but we're not going to use it

816 # anymore so close it now to ensure that the connection is

817 # released back to the pool.

818 if self._original_response:

819 self._original_response.close()

820

821 # Closing the response may not actually be sufficient to close

822 # everything, so if we have a hold of the connection close that

823 # too.

824 if self._connection:

825 self._connection.close()

826

827 # If we hold the original response but it's closed now, we should

828 # return the connection back to the pool.

829 if self._original_response and self._original_response.isclosed():

830 self.release_conn()

831

832 def _fp_read(

833 self,

834 amt: int | None = None,

835 *,

836 read1: bool = False,

837 ) -> bytes:

838 """

839 Read a response with the thought that reading the number of bytes

840 larger than can fit in a 32-bit int at a time via SSL in some

841 known cases leads to an overflow error that has to be prevented

842 if `amt` or `self.length_remaining` indicate that a problem may

843 happen.

844

845 The known cases:

846 * CPython < 3.9.7 because of a bug

847 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.

848 * urllib3 injected with pyOpenSSL-backed SSL-support.

849 * CPython < 3.10 only when `amt` does not fit 32-bit int.

850 """

851 assert self._fp

852 c_int_max = 2**31 - 1

853 if (

854 (amt and amt > c_int_max)

855 or (

856 amt is None

857 and self.length_remaining

858 and self.length_remaining > c_int_max

859 )

860 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):

861 if read1:

862 return self._fp.read1(c_int_max)

863 buffer = io.BytesIO()

864 # Besides `max_chunk_amt` being a maximum chunk size, it

865 # affects memory overhead of reading a response by this

866 # method in CPython.

867 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum

868 # chunk size that does not lead to an overflow error, but

869 # 256 MiB is a compromise.

870 max_chunk_amt = 2**28

871 while amt is None or amt != 0:

872 if amt is not None:

873 chunk_amt = min(amt, max_chunk_amt)

874 amt -= chunk_amt

875 else:

876 chunk_amt = max_chunk_amt

877 data = self._fp.read(chunk_amt)

878 if not data:

879 break

880 buffer.write(data)

881 del data # to reduce peak memory usage by `max_chunk_amt`.

882 return buffer.getvalue()

883 elif read1:

884 return self._fp.read1(amt) if amt is not None else self._fp.read1()

885 else:

886 # StringIO doesn't like amt=None

887 return self._fp.read(amt) if amt is not None else self._fp.read()

888

889 def _raw_read(

890 self,

891 amt: int | None = None,

892 *,

893 read1: bool = False,

894 ) -> bytes:

895 """

896 Reads `amt` of bytes from the socket.

897 """

898 if self._fp is None:

899 return None # type: ignore[return-value]

900

901 fp_closed = getattr(self._fp, "closed", False)

902

903 with self._error_catcher():

904 data = self._fp_read(amt, read1=read1) if not fp_closed else b""

905 if amt is not None and amt != 0 and not data:

906 # Platform-specific: Buggy versions of Python.

907 # Close the connection when no data is returned

908 #

909 # This is redundant to what httplib/http.client _should_

910 # already do. However, versions of python released before

911 # December 15, 2012 (http://bugs.python.org/issue16298) do

912 # not properly close the connection in all cases. There is

913 # no harm in redundantly calling close.

914 self._fp.close()

915 if (

916 self.enforce_content_length

917 and self.length_remaining is not None

918 and self.length_remaining != 0

919 ):

920 # This is an edge case that httplib failed to cover due

921 # to concerns of backward compatibility. We're

922 # addressing it here to make sure IncompleteRead is

923 # raised during streaming, so all calls with incorrect

924 # Content-Length are caught.

925 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)

926 elif read1 and (

927 (amt != 0 and not data) or self.length_remaining == len(data)

928 ):

929 # All data has been read, but `self._fp.read1` in

930 # CPython 3.12 and older doesn't always close

931 # `http.client.HTTPResponse`, so we close it here.

932 # See https://github.com/python/cpython/issues/113199

933 self._fp.close()

934

935 if data:

936 self._fp_bytes_read += len(data)

937 if self.length_remaining is not None:

938 self.length_remaining -= len(data)

939 return data

940

941 def read(

942 self,

943 amt: int | None = None,

944 decode_content: bool | None = None,

945 cache_content: bool = False,

946 ) -> bytes:

947 """

948 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional

949 parameters: ``decode_content`` and ``cache_content``.

950

951 :param amt:

952 How much of the content to read. If specified, caching is skipped

953 because it doesn't make sense to cache partial content as the full

954 response.

955

956 :param decode_content:

957 If True, will attempt to decode the body based on the

958 'content-encoding' header.

959

960 :param cache_content:

961 If True, will save the returned data such that the same result is

962 returned despite of the state of the underlying file object. This

963 is useful if you want the ``.data`` property to continue working

964 after having ``.read()`` the file object. (Overridden if ``amt`` is

965 set.)

966 """

967 self._init_decoder()

968 if decode_content is None:

969 decode_content = self.decode_content

970

971 if amt and amt < 0:

972 # Negative numbers and `None` should be treated the same.

973 amt = None

974 elif amt is not None:

975 cache_content = False

976

977 if len(self._decoded_buffer) >= amt:

978 return self._decoded_buffer.get(amt)

979

980 data = self._raw_read(amt)

981

982 flush_decoder = amt is None or (amt != 0 and not data)

983

984 if not data and len(self._decoded_buffer) == 0:

985 return data

986

987 if amt is None:

988 data = self._decode(data, decode_content, flush_decoder)

989 if cache_content:

990 self._body = data

991 else:

992 # do not waste memory on buffer when not decoding

993 if not decode_content:

994 if self._has_decoded_content:

995 raise RuntimeError(

996 "Calling read(decode_content=False) is not supported after "

997 "read(decode_content=True) was called."

998 )

999 return data

1000

1001 decoded_data = self._decode(data, decode_content, flush_decoder)

1002 self._decoded_buffer.put(decoded_data)

1003

1004 while len(self._decoded_buffer) < amt and data:

1005 # TODO make sure to initially read enough data to get past the headers

1006 # For example, the GZ file header takes 10 bytes, we don't want to read

1007 # it one byte at a time

1008 data = self._raw_read(amt)

1009 decoded_data = self._decode(data, decode_content, flush_decoder)

1010 self._decoded_buffer.put(decoded_data)

1011 data = self._decoded_buffer.get(amt)

1012

1013 return data

1014

1015 def read1(

1016 self,

1017 amt: int | None = None,

1018 decode_content: bool | None = None,

1019 ) -> bytes:

1020 """

1021 Similar to ``http.client.HTTPResponse.read1`` and documented

1022 in :meth:`io.BufferedReader.read1`, but with an additional parameter:

1023 ``decode_content``.

1024

1025 :param amt:

1026 How much of the content to read.

1027

1028 :param decode_content:

1029 If True, will attempt to decode the body based on the

1030 'content-encoding' header.

1031 """

1032 if decode_content is None:

1033 decode_content = self.decode_content

1034 if amt and amt < 0:

1035 # Negative numbers and `None` should be treated the same.

1036 amt = None

1037 # try and respond without going to the network

1038 if self._has_decoded_content:

1039 if not decode_content:

1040 raise RuntimeError(

1041 "Calling read1(decode_content=False) is not supported after "

1042 "read1(decode_content=True) was called."

1043 )

1044 if len(self._decoded_buffer) > 0:

1045 if amt is None:

1046 return self._decoded_buffer.get_all()

1047 return self._decoded_buffer.get(amt)

1048 if amt == 0:

1049 return b""

1050

1051 # FIXME, this method's type doesn't say returning None is possible

1052 data = self._raw_read(amt, read1=True)

1053 if not decode_content or data is None:

1054 return data

1055

1056 self._init_decoder()

1057 while True:

1058 flush_decoder = not data

1059 decoded_data = self._decode(data, decode_content, flush_decoder)

1060 self._decoded_buffer.put(decoded_data)

1061 if decoded_data or flush_decoder:

1062 break

1063 data = self._raw_read(8192, read1=True)

1064

1065 if amt is None:

1066 return self._decoded_buffer.get_all()

1067 return self._decoded_buffer.get(amt)

1068

1069 def stream(

1070 self, amt: int | None = 2**16, decode_content: bool | None = None

1071 ) -> typing.Generator[bytes]:

1072 """

1073 A generator wrapper for the read() method. A call will block until

1074 ``amt`` bytes have been read from the connection or until the

1075 connection is closed.

1076

1077 :param amt:

1078 How much of the content to read. The generator will return up to

1079 much data per iteration, but may return less. This is particularly

1080 likely when using compressed data. However, the empty string will

1081 never be returned.

1082

1083 :param decode_content:

1084 If True, will attempt to decode the body based on the

1085 'content-encoding' header.

1086 """

1087 if self.chunked and self.supports_chunked_reads():

1088 yield from self.read_chunked(amt, decode_content=decode_content)

1089 else:

1090 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:

1091 data = self.read(amt=amt, decode_content=decode_content)

1092

1093 if data:

1094 yield data

1095

1096 # Overrides from io.IOBase

1097 def readable(self) -> bool:

1098 return True

1099

1100 def shutdown(self) -> None:

1101 if not self._sock_shutdown:

1102 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")

1103 if self._connection is None:

1104 raise RuntimeError(

1105 "Cannot shutdown as connection has already been released to the pool"

1106 )

1107 self._sock_shutdown(socket.SHUT_RD)

1108

1109 def close(self) -> None:

1110 self._sock_shutdown = None

1111

1112 if not self.closed and self._fp:

1113 self._fp.close()

1114

1115 if self._connection:

1116 self._connection.close()

1117

1118 if not self.auto_close:

1119 io.IOBase.close(self)

1120

1121 @property

1122 def closed(self) -> bool:

1123 if not self.auto_close:

1124 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]

1125 elif self._fp is None:

1126 return True

1127 elif hasattr(self._fp, "isclosed"):

1128 return self._fp.isclosed()

1129 elif hasattr(self._fp, "closed"):

1130 return self._fp.closed

1131 else:

1132 return True

1133

1134 def fileno(self) -> int:

1135 if self._fp is None:

1136 raise OSError("HTTPResponse has no file to get a fileno from")

1137 elif hasattr(self._fp, "fileno"):

1138 return self._fp.fileno()

1139 else:

1140 raise OSError(

1141 "The file-like object this HTTPResponse is wrapped "

1142 "around has no file descriptor"

1143 )

1144

1145 def flush(self) -> None:

1146 if (

1147 self._fp is not None

1148 and hasattr(self._fp, "flush")

1149 and not getattr(self._fp, "closed", False)

1150 ):

1151 return self._fp.flush()

1152

1153 def supports_chunked_reads(self) -> bool:

1154 """

1155 Checks if the underlying file-like object looks like a

1156 :class:`http.client.HTTPResponse` object. We do this by testing for

1157 the fp attribute. If it is present we assume it returns raw chunks as

1158 processed by read_chunked().

1159 """

1160 return hasattr(self._fp, "fp")

1161

1162 def _update_chunk_length(self) -> None:

1163 # First, we'll figure out length of a chunk and then

1164 # we'll try to read it from socket.

1165 if self.chunk_left is not None:

1166 return None

1167 line = self._fp.fp.readline() # type: ignore[union-attr]

1168 line = line.split(b";", 1)[0]

1169 try:

1170 self.chunk_left = int(line, 16)

1171 except ValueError:

1172 self.close()

1173 if line:

1174 # Invalid chunked protocol response, abort.

1175 raise InvalidChunkLength(self, line) from None

1176 else:

1177 # Truncated at start of next chunk

1178 raise ProtocolError("Response ended prematurely") from None

1179

1180 def _handle_chunk(self, amt: int | None) -> bytes:

1181 returned_chunk = None

1182 if amt is None:

1183 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1184 returned_chunk = chunk

1185 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1186 self.chunk_left = None

1187 elif self.chunk_left is not None and amt < self.chunk_left:

1188 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1189 self.chunk_left = self.chunk_left - amt

1190 returned_chunk = value

1191 elif amt == self.chunk_left:

1192 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1193 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1194 self.chunk_left = None

1195 returned_chunk = value

1196 else: # amt > self.chunk_left

1197 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1198 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1199 self.chunk_left = None

1200 return returned_chunk # type: ignore[no-any-return]

1201

1202 def read_chunked(

1203 self, amt: int | None = None, decode_content: bool | None = None

1204 ) -> typing.Generator[bytes]:

1205 """

1206 Similar to :meth:`HTTPResponse.read`, but with an additional

1207 parameter: ``decode_content``.

1208

1209 :param amt:

1210 How much of the content to read. If specified, caching is skipped

1211 because it doesn't make sense to cache partial content as the full

1212 response.

1213

1214 :param decode_content:

1215 If True, will attempt to decode the body based on the

1216 'content-encoding' header.

1217 """

1218 self._init_decoder()

1219 # FIXME: Rewrite this method and make it a class with a better structured logic.

1220 if not self.chunked:

1221 raise ResponseNotChunked(

1222 "Response is not chunked. "

1223 "Header 'transfer-encoding: chunked' is missing."

1224 )

1225 if not self.supports_chunked_reads():

1226 raise BodyNotHttplibCompatible(

1227 "Body should be http.client.HTTPResponse like. "

1228 "It should have have an fp attribute which returns raw chunks."

1229 )

1230

1231 with self._error_catcher():

1232 # Don't bother reading the body of a HEAD request.

1233 if self._original_response and is_response_to_head(self._original_response):

1234 self._original_response.close()

1235 return None

1236

1237 # If a response is already read and closed

1238 # then return immediately.

1239 if self._fp.fp is None: # type: ignore[union-attr]

1240 return None

1241

1242 if amt and amt < 0:

1243 # Negative numbers and `None` should be treated the same,

1244 # but httplib handles only `None` correctly.

1245 amt = None

1246

1247 while True:

1248 self._update_chunk_length()

1249 if self.chunk_left == 0:

1250 break

1251 chunk = self._handle_chunk(amt)

1252 decoded = self._decode(

1253 chunk, decode_content=decode_content, flush_decoder=False

1254 )

1255 if decoded:

1256 yield decoded

1257

1258 if decode_content:

1259 # On CPython and PyPy, we should never need to flush the

1260 # decoder. However, on Jython we *might* need to, so

1261 # lets defensively do it anyway.

1262 decoded = self._flush_decoder()

1263 if decoded: # Platform-specific: Jython.

1264 yield decoded

1265

1266 # Chunk content ends with \r\n: discard it.

1267 while self._fp is not None:

1268 line = self._fp.fp.readline()

1269 if not line:

1270 # Some sites may not end with '\r\n'.

1271 break

1272 if line == b"\r\n":

1273 break

1274

1275 # We read everything; close the "file".

1276 if self._original_response:

1277 self._original_response.close()

1278

1279 @property

1280 def url(self) -> str | None:

1281 """

1282 Returns the URL that was the source of this response.

1283 If the request that generated this response redirected, this method

1284 will return the final redirect location.

1285 """

1286 return self._request_url

1287

1288 @url.setter

1289 def url(self, url: str) -> None:

1290 self._request_url = url

1291

1292 def __iter__(self) -> typing.Iterator[bytes]:

1293 buffer: list[bytes] = []

1294 for chunk in self.stream(decode_content=True):

1295 if b"\n" in chunk:

1296 chunks = chunk.split(b"\n")

1297 yield b"".join(buffer) + chunks[0] + b"\n"

1298 for x in chunks[1:-1]:

1299 yield x + b"\n"

1300 if chunks[-1]:

1301 buffer = [chunks[-1]]

1302 else:

1303 buffer = []

1304 else:

1305 buffer.append(chunk)

1306 if buffer:

1307 yield b"".join(buffer)