Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/urllib3/response.py: 21%

1from __future__ import annotations

3import collections

4import io

5import json as _json

6import logging

7import re

8import socket

9import sys

10import typing

11import zlib

12from contextlib import contextmanager

13from http.client import HTTPMessage as _HttplibHTTPMessage

14from http.client import HTTPResponse as _HttplibHTTPResponse

15from socket import timeout as SocketTimeout

17if typing.TYPE_CHECKING:

18 from ._base_connection import BaseHTTPConnection

20try:

21 try:

22 import brotlicffi as brotli # type: ignore[import-not-found]

23 except ImportError:

24 import brotli # type: ignore[import-not-found]

25except ImportError:

26 brotli = None

28from . import util

29from ._base_connection import _TYPE_BODY

30from ._collections import HTTPHeaderDict

31from .connection import BaseSSLError, HTTPConnection, HTTPException

32from .exceptions import (

33 BodyNotHttplibCompatible,

34 DecodeError,

35 HTTPError,

36 IncompleteRead,

37 InvalidChunkLength,

38 InvalidHeader,

39 ProtocolError,

40 ReadTimeoutError,

41 ResponseNotChunked,

42 SSLError,

43)

44from .util.response import is_fp_closed, is_response_to_head

45from .util.retry import Retry

47if typing.TYPE_CHECKING:

48 from .connectionpool import HTTPConnectionPool

50log = logging.getLogger(__name__)

53class ContentDecoder:

54 def decompress(self, data: bytes) -> bytes:

55 raise NotImplementedError()

57 def flush(self) -> bytes:

58 raise NotImplementedError()

61class DeflateDecoder(ContentDecoder):

62 def __init__(self) -> None:

63 self._first_try = True

64 self._data = b""

65 self._obj = zlib.decompressobj()

67 def decompress(self, data: bytes) -> bytes:

68 if not data:

69 return data

71 if not self._first_try:

72 return self._obj.decompress(data)

74 self._data += data

75 try:

76 decompressed = self._obj.decompress(data)

77 if decompressed:

78 self._first_try = False

79 self._data = None # type: ignore[assignment]

80 return decompressed

81 except zlib.error:

82 self._first_try = False

83 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)

84 try:

85 return self.decompress(self._data)

86 finally:

87 self._data = None # type: ignore[assignment]

89 def flush(self) -> bytes:

90 return self._obj.flush()

93class GzipDecoderState:

94 FIRST_MEMBER = 0

95 OTHER_MEMBERS = 1

96 SWALLOW_DATA = 2

99class GzipDecoder(ContentDecoder):

100 def __init__(self) -> None:

101 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

102 self._state = GzipDecoderState.FIRST_MEMBER

103

104 def decompress(self, data: bytes) -> bytes:

105 ret = bytearray()

106 if self._state == GzipDecoderState.SWALLOW_DATA or not data:

107 return bytes(ret)

108 while True:

109 try:

110 ret += self._obj.decompress(data)

111 except zlib.error:

112 previous_state = self._state

113 # Ignore data after the first error

114 self._state = GzipDecoderState.SWALLOW_DATA

115 if previous_state == GzipDecoderState.OTHER_MEMBERS:

116 # Allow trailing garbage acceptable in other gzip clients

117 return bytes(ret)

118 raise

119 data = self._obj.unused_data

120 if not data:

121 return bytes(ret)

122 self._state = GzipDecoderState.OTHER_MEMBERS

123 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

124

125 def flush(self) -> bytes:

126 return self._obj.flush()

127

128

129if brotli is not None:

130

131 class BrotliDecoder(ContentDecoder):

132 # Supports both 'brotlipy' and 'Brotli' packages

133 # since they share an import name. The top branches

134 # are for 'brotlipy' and bottom branches for 'Brotli'

135 def __init__(self) -> None:

136 self._obj = brotli.Decompressor()

137 if hasattr(self._obj, "decompress"):

138 setattr(self, "decompress", self._obj.decompress)

139 else:

140 setattr(self, "decompress", self._obj.process)

141

142 def flush(self) -> bytes:

143 if hasattr(self._obj, "flush"):

144 return self._obj.flush() # type: ignore[no-any-return]

145 return b""

146

147

148try:

149 # Python 3.14+

150 from compression import zstd # type: ignore[import-not-found] # noqa: F401

151

152 HAS_ZSTD = True

153

154 class ZstdDecoder(ContentDecoder):

155 def __init__(self) -> None:

156 self._obj = zstd.ZstdDecompressor()

157

158 def decompress(self, data: bytes) -> bytes:

159 if not data:

160 return b""

161 data_parts = [self._obj.decompress(data)]

162 while self._obj.eof and self._obj.unused_data:

163 unused_data = self._obj.unused_data

164 self._obj = zstd.ZstdDecompressor()

165 data_parts.append(self._obj.decompress(unused_data))

166 return b"".join(data_parts)

167

168 def flush(self) -> bytes:

169 if not self._obj.eof:

170 raise DecodeError("Zstandard data is incomplete")

171 return b""

172

173except ImportError:

174 try:

175 # Python 3.13 and earlier require the 'zstandard' module.

176 import zstandard as zstd

177

178 # The package 'zstandard' added the 'eof' property starting

179 # in v0.18.0 which we require to ensure a complete and

180 # valid zstd stream was fed into the ZstdDecoder.

181 # See: https://github.com/urllib3/urllib3/pull/2624

182 _zstd_version = tuple(

183 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]

184 )

185 if _zstd_version < (0, 18): # Defensive:

186 raise ImportError("zstandard module doesn't have eof")

187 except (AttributeError, ImportError, ValueError): # Defensive:

188 HAS_ZSTD = False

189 else:

190 HAS_ZSTD = True

191

192 class ZstdDecoder(ContentDecoder): # type: ignore[no-redef]

193 def __init__(self) -> None:

194 self._obj = zstd.ZstdDecompressor().decompressobj()

195

196 def decompress(self, data: bytes) -> bytes:

197 if not data:

198 return b""

199 data_parts = [self._obj.decompress(data)]

200 while self._obj.eof and self._obj.unused_data:

201 unused_data = self._obj.unused_data

202 self._obj = zstd.ZstdDecompressor().decompressobj()

203 data_parts.append(self._obj.decompress(unused_data))

204 return b"".join(data_parts)

205

206 def flush(self) -> bytes:

207 ret = self._obj.flush() # note: this is a no-op

208 if not self._obj.eof:

209 raise DecodeError("Zstandard data is incomplete")

210 return ret # type: ignore[no-any-return]

211

212

213class MultiDecoder(ContentDecoder):

214 """

215 From RFC7231:

216 If one or more encodings have been applied to a representation, the

217 sender that applied the encodings MUST generate a Content-Encoding

218 header field that lists the content codings in the order in which

219 they were applied.

220 """

221

222 def __init__(self, modes: str) -> None:

223 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]

224

225 def flush(self) -> bytes:

226 return self._decoders[0].flush()

227

228 def decompress(self, data: bytes) -> bytes:

229 for d in reversed(self._decoders):

230 data = d.decompress(data)

231 return data

232

233

234def _get_decoder(mode: str) -> ContentDecoder:

235 if "," in mode:

236 return MultiDecoder(mode)

237

238 # According to RFC 9110 section 8.4.1.3, recipients should

239 # consider x-gzip equivalent to gzip

240 if mode in ("gzip", "x-gzip"):

241 return GzipDecoder()

242

243 if brotli is not None and mode == "br":

244 return BrotliDecoder()

245

246 if HAS_ZSTD and mode == "zstd":

247 return ZstdDecoder()

248

249 return DeflateDecoder()

250

251

252class BytesQueueBuffer:

253 """Memory-efficient bytes buffer

254

255 To return decoded data in read() and still follow the BufferedIOBase API, we need a

256 buffer to always return the correct amount of bytes.

257

258 This buffer should be filled using calls to put()

259

260 Our maximum memory usage is determined by the sum of the size of:

261

262 * self.buffer, which contains the full data

263 * the largest chunk that we will copy in get()

264

265 The worst case scenario is a single chunk, in which case we'll make a full copy of

266 the data inside get().

267 """

268

269 def __init__(self) -> None:

270 self.buffer: typing.Deque[bytes] = collections.deque()

271 self._size: int = 0

272

273 def __len__(self) -> int:

274 return self._size

275

276 def put(self, data: bytes) -> None:

277 self.buffer.append(data)

278 self._size += len(data)

279

280 def get(self, n: int) -> bytes:

281 if n == 0:

282 return b""

283 elif not self.buffer:

284 raise RuntimeError("buffer is empty")

285 elif n < 0:

286 raise ValueError("n should be > 0")

287

288 fetched = 0

289 ret = io.BytesIO()

290 while fetched < n:

291 remaining = n - fetched

292 chunk = self.buffer.popleft()

293 chunk_length = len(chunk)

294 if remaining < chunk_length:

295 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]

296 ret.write(left_chunk)

297 self.buffer.appendleft(right_chunk)

298 self._size -= remaining

299 break

300 else:

301 ret.write(chunk)

302 self._size -= chunk_length

303 fetched += chunk_length

304

305 if not self.buffer:

306 break

307

308 return ret.getvalue()

309

310 def get_all(self) -> bytes:

311 buffer = self.buffer

312 if not buffer:

313 assert self._size == 0

314 return b""

315 if len(buffer) == 1:

316 result = buffer.pop()

317 else:

318 ret = io.BytesIO()

319 ret.writelines(buffer.popleft() for _ in range(len(buffer)))

320 result = ret.getvalue()

321 self._size = 0

322 return result

323

324

325class BaseHTTPResponse(io.IOBase):

326 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]

327 if brotli is not None:

328 CONTENT_DECODERS += ["br"]

329 if HAS_ZSTD:

330 CONTENT_DECODERS += ["zstd"]

331 REDIRECT_STATUSES = [301, 302, 303, 307, 308]

332

333 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)

334 if brotli is not None:

335 DECODER_ERROR_CLASSES += (brotli.error,)

336

337 if HAS_ZSTD:

338 DECODER_ERROR_CLASSES += (zstd.ZstdError,)

339

340 def __init__(

341 self,

342 *,

343 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

344 status: int,

345 version: int,

346 version_string: str,

347 reason: str | None,

348 decode_content: bool,

349 request_url: str | None,

350 retries: Retry | None = None,

351 ) -> None:

352 if isinstance(headers, HTTPHeaderDict):

353 self.headers = headers

354 else:

355 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]

356 self.status = status

357 self.version = version

358 self.version_string = version_string

359 self.reason = reason

360 self.decode_content = decode_content

361 self._has_decoded_content = False

362 self._request_url: str | None = request_url

363 self.retries = retries

364

365 self.chunked = False

366 tr_enc = self.headers.get("transfer-encoding", "").lower()

367 # Don't incur the penalty of creating a list and then discarding it

368 encodings = (enc.strip() for enc in tr_enc.split(","))

369 if "chunked" in encodings:

370 self.chunked = True

371

372 self._decoder: ContentDecoder | None = None

373 self.length_remaining: int | None

374

375 def get_redirect_location(self) -> str | None | typing.Literal[False]:

376 """

377 Should we redirect and where to?

378

379 :returns: Truthy redirect location string if we got a redirect status

380 code and valid location. ``None`` if redirect status and no

381 location. ``False`` if not a redirect status code.

382 """

383 if self.status in self.REDIRECT_STATUSES:

384 return self.headers.get("location")

385 return False

386

387 @property

388 def data(self) -> bytes:

389 raise NotImplementedError()

390

391 def json(self) -> typing.Any:

392 """

393 Deserializes the body of the HTTP response as a Python object.

394

395 The body of the HTTP response must be encoded using UTF-8, as per

396 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.

397

398 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to

399 your custom decoder instead.

400

401 If the body of the HTTP response is not decodable to UTF-8, a

402 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a

403 valid JSON document, a `json.JSONDecodeError` will be raised.

404

405 Read more :ref:`here <json_content>`.

406

407 :returns: The body of the HTTP response as a Python object.

408 """

409 data = self.data.decode("utf-8")

410 return _json.loads(data)

411

412 @property

413 def url(self) -> str | None:

414 raise NotImplementedError()

415

416 @url.setter

417 def url(self, url: str | None) -> None:

418 raise NotImplementedError()

419

420 @property

421 def connection(self) -> BaseHTTPConnection | None:

422 raise NotImplementedError()

423

424 @property

425 def retries(self) -> Retry | None:

426 return self._retries

427

428 @retries.setter

429 def retries(self, retries: Retry | None) -> None:

430 # Override the request_url if retries has a redirect location.

431 if retries is not None and retries.history:

432 self.url = retries.history[-1].redirect_location

433 self._retries = retries

434

435 def stream(

436 self, amt: int | None = 2**16, decode_content: bool | None = None

437 ) -> typing.Iterator[bytes]:

438 raise NotImplementedError()

439

440 def read(

441 self,

442 amt: int | None = None,

443 decode_content: bool | None = None,

444 cache_content: bool = False,

445 ) -> bytes:

446 raise NotImplementedError()

447

448 def read1(

449 self,

450 amt: int | None = None,

451 decode_content: bool | None = None,

452 ) -> bytes:

453 raise NotImplementedError()

454

455 def read_chunked(

456 self,

457 amt: int | None = None,

458 decode_content: bool | None = None,

459 ) -> typing.Iterator[bytes]:

460 raise NotImplementedError()

461

462 def release_conn(self) -> None:

463 raise NotImplementedError()

464

465 def drain_conn(self) -> None:

466 raise NotImplementedError()

467

468 def shutdown(self) -> None:

469 raise NotImplementedError()

470

471 def close(self) -> None:

472 raise NotImplementedError()

473

474 def _init_decoder(self) -> None:

475 """

476 Set-up the _decoder attribute if necessary.

477 """

478 # Note: content-encoding value should be case-insensitive, per RFC 7230

479 # Section 3.2

480 content_encoding = self.headers.get("content-encoding", "").lower()

481 if self._decoder is None:

482 if content_encoding in self.CONTENT_DECODERS:

483 self._decoder = _get_decoder(content_encoding)

484 elif "," in content_encoding:

485 encodings = [

486 e.strip()

487 for e in content_encoding.split(",")

488 if e.strip() in self.CONTENT_DECODERS

489 ]

490 if encodings:

491 self._decoder = _get_decoder(content_encoding)

492

493 def _decode(

494 self, data: bytes, decode_content: bool | None, flush_decoder: bool

495 ) -> bytes:

496 """

497 Decode the data passed in and potentially flush the decoder.

498 """

499 if not decode_content:

500 if self._has_decoded_content:

501 raise RuntimeError(

502 "Calling read(decode_content=False) is not supported after "

503 "read(decode_content=True) was called."

504 )

505 return data

506

507 try:

508 if self._decoder:

509 data = self._decoder.decompress(data)

510 self._has_decoded_content = True

511 except self.DECODER_ERROR_CLASSES as e:

512 content_encoding = self.headers.get("content-encoding", "").lower()

513 raise DecodeError(

514 "Received response with content-encoding: %s, but "

515 "failed to decode it." % content_encoding,

516 e,

517 ) from e

518 if flush_decoder:

519 data += self._flush_decoder()

520

521 return data

522

523 def _flush_decoder(self) -> bytes:

524 """

525 Flushes the decoder. Should only be called if the decoder is actually

526 being used.

527 """

528 if self._decoder:

529 return self._decoder.decompress(b"") + self._decoder.flush()

530 return b""

531

532 # Compatibility methods for `io` module

533 def readinto(self, b: bytearray) -> int:

534 temp = self.read(len(b))

535 if len(temp) == 0:

536 return 0

537 else:

538 b[: len(temp)] = temp

539 return len(temp)

540

541 # Compatibility method for http.cookiejar

542 def info(self) -> HTTPHeaderDict:

543 return self.headers

544

545 def geturl(self) -> str | None:

546 return self.url

547

548

549class HTTPResponse(BaseHTTPResponse):

550 """

551 HTTP Response container.

552

553 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is

554 loaded and decoded on-demand when the ``data`` property is accessed. This

555 class is also compatible with the Python standard library's :mod:`io`

556 module, and can hence be treated as a readable object in the context of that

557 framework.

558

559 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:

560

561 :param preload_content:

562 If True, the response's body will be preloaded during construction.

563

564 :param decode_content:

565 If True, will attempt to decode the body based on the

566 'content-encoding' header.

567

568 :param original_response:

569 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`

570 object, it's convenient to include the original for debug purposes. It's

571 otherwise unused.

572

573 :param retries:

574 The retries contains the last :class:`~urllib3.util.retry.Retry` that

575 was used during the request.

576

577 :param enforce_content_length:

578 Enforce content length checking. Body returned by server must match

579 value of Content-Length header, if present. Otherwise, raise error.

580 """

581

582 def __init__(

583 self,

584 body: _TYPE_BODY = "",

585 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

586 status: int = 0,

587 version: int = 0,

588 version_string: str = "HTTP/?",

589 reason: str | None = None,

590 preload_content: bool = True,

591 decode_content: bool = True,

592 original_response: _HttplibHTTPResponse | None = None,

593 pool: HTTPConnectionPool | None = None,

594 connection: HTTPConnection | None = None,

595 msg: _HttplibHTTPMessage | None = None,

596 retries: Retry | None = None,

597 enforce_content_length: bool = True,

598 request_method: str | None = None,

599 request_url: str | None = None,

600 auto_close: bool = True,

601 sock_shutdown: typing.Callable[[int], None] | None = None,

602 ) -> None:

603 super().__init__(

604 headers=headers,

605 status=status,

606 version=version,

607 version_string=version_string,

608 reason=reason,

609 decode_content=decode_content,

610 request_url=request_url,

611 retries=retries,

612 )

613

614 self.enforce_content_length = enforce_content_length

615 self.auto_close = auto_close

616

617 self._body = None

618 self._fp: _HttplibHTTPResponse | None = None

619 self._original_response = original_response

620 self._fp_bytes_read = 0

621 self.msg = msg

622

623 if body and isinstance(body, (str, bytes)):

624 self._body = body

625

626 self._pool = pool

627 self._connection = connection

628

629 if hasattr(body, "read"):

630 self._fp = body # type: ignore[assignment]

631 self._sock_shutdown = sock_shutdown

632

633 # Are we using the chunked-style of transfer encoding?

634 self.chunk_left: int | None = None

635

636 # Determine length of response

637 self.length_remaining = self._init_length(request_method)

638

639 # Used to return the correct amount of bytes for partial read()s

640 self._decoded_buffer = BytesQueueBuffer()

641

642 # If requested, preload the body.

643 if preload_content and not self._body:

644 self._body = self.read(decode_content=decode_content)

645

646 def release_conn(self) -> None:

647 if not self._pool or not self._connection:

648 return None

649

650 self._pool._put_conn(self._connection)

651 self._connection = None

652

653 def drain_conn(self) -> None:

654 """

655 Read and discard any remaining HTTP response data in the response connection.

656

657 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.

658 """

659 try:

660 self.read()

661 except (HTTPError, OSError, BaseSSLError, HTTPException):

662 pass

663

664 @property

665 def data(self) -> bytes:

666 # For backwards-compat with earlier urllib3 0.4 and earlier.

667 if self._body:

668 return self._body # type: ignore[return-value]

669

670 if self._fp:

671 return self.read(cache_content=True)

672

673 return None # type: ignore[return-value]

674

675 @property

676 def connection(self) -> HTTPConnection | None:

677 return self._connection

678

679 def isclosed(self) -> bool:

680 return is_fp_closed(self._fp)

681

682 def tell(self) -> int:

683 """

684 Obtain the number of bytes pulled over the wire so far. May differ from

685 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``

686 if bytes are encoded on the wire (e.g, compressed).

687 """

688 return self._fp_bytes_read

689

690 def _init_length(self, request_method: str | None) -> int | None:

691 """

692 Set initial length value for Response content if available.

693 """

694 length: int | None

695 content_length: str | None = self.headers.get("content-length")

696

697 if content_length is not None:

698 if self.chunked:

699 # This Response will fail with an IncompleteRead if it can't be

700 # received as chunked. This method falls back to attempt reading

701 # the response before raising an exception.

702 log.warning(

703 "Received response with both Content-Length and "

704 "Transfer-Encoding set. This is expressly forbidden "

705 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "

706 "attempting to process response as Transfer-Encoding: "

707 "chunked."

708 )

709 return None

710

711 try:

712 # RFC 7230 section 3.3.2 specifies multiple content lengths can

713 # be sent in a single Content-Length header

714 # (e.g. Content-Length: 42, 42). This line ensures the values

715 # are all valid ints and that as long as the `set` length is 1,

716 # all values are the same. Otherwise, the header is invalid.

717 lengths = {int(val) for val in content_length.split(",")}

718 if len(lengths) > 1:

719 raise InvalidHeader(

720 "Content-Length contained multiple "

721 "unmatching values (%s)" % content_length

722 )

723 length = lengths.pop()

724 except ValueError:

725 length = None

726 else:

727 if length < 0:

728 length = None

729

730 else: # if content_length is None

731 length = None

732

733 # Convert status to int for comparison

734 # In some cases, httplib returns a status of "_UNKNOWN"

735 try:

736 status = int(self.status)

737 except ValueError:

738 status = 0

739

740 # Check for responses that shouldn't include a body

741 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":

742 length = 0

743

744 return length

745

746 @contextmanager

747 def _error_catcher(self) -> typing.Generator[None]:

748 """

749 Catch low-level python exceptions, instead re-raising urllib3

750 variants, so that low-level exceptions are not leaked in the

751 high-level api.

752

753 On exit, release the connection back to the pool.

754 """

755 clean_exit = False

756

757 try:

758 try:

759 yield

760

761 except SocketTimeout as e:

762 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but

763 # there is yet no clean way to get at it from this context.

764 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

765

766 except BaseSSLError as e:

767 # FIXME: Is there a better way to differentiate between SSLErrors?

768 if "read operation timed out" not in str(e):

769 # SSL errors related to framing/MAC get wrapped and reraised here

770 raise SSLError(e) from e

771

772 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

773

774 except IncompleteRead as e:

775 if (

776 e.expected is not None

777 and e.partial is not None

778 and e.expected == -e.partial

779 ):

780 arg = "Response may not contain content."

781 else:

782 arg = f"Connection broken: {e!r}"

783 raise ProtocolError(arg, e) from e

784

785 except (HTTPException, OSError) as e:

786 raise ProtocolError(f"Connection broken: {e!r}", e) from e

787

788 # If no exception is thrown, we should avoid cleaning up

789 # unnecessarily.

790 clean_exit = True

791 finally:

792 # If we didn't terminate cleanly, we need to throw away our

793 # connection.

794 if not clean_exit:

795 # The response may not be closed but we're not going to use it

796 # anymore so close it now to ensure that the connection is

797 # released back to the pool.

798 if self._original_response:

799 self._original_response.close()

800

801 # Closing the response may not actually be sufficient to close

802 # everything, so if we have a hold of the connection close that

803 # too.

804 if self._connection:

805 self._connection.close()

806

807 # If we hold the original response but it's closed now, we should

808 # return the connection back to the pool.

809 if self._original_response and self._original_response.isclosed():

810 self.release_conn()

811

812 def _fp_read(

813 self,

814 amt: int | None = None,

815 *,

816 read1: bool = False,

817 ) -> bytes:

818 """

819 Read a response with the thought that reading the number of bytes

820 larger than can fit in a 32-bit int at a time via SSL in some

821 known cases leads to an overflow error that has to be prevented

822 if `amt` or `self.length_remaining` indicate that a problem may

823 happen.

824

825 The known cases:

826 * CPython < 3.9.7 because of a bug

827 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.

828 * urllib3 injected with pyOpenSSL-backed SSL-support.

829 * CPython < 3.10 only when `amt` does not fit 32-bit int.

830 """

831 assert self._fp

832 c_int_max = 2**31 - 1

833 if (

834 (amt and amt > c_int_max)

835 or (

836 amt is None

837 and self.length_remaining

838 and self.length_remaining > c_int_max

839 )

840 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):

841 if read1:

842 return self._fp.read1(c_int_max)

843 buffer = io.BytesIO()

844 # Besides `max_chunk_amt` being a maximum chunk size, it

845 # affects memory overhead of reading a response by this

846 # method in CPython.

847 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum

848 # chunk size that does not lead to an overflow error, but

849 # 256 MiB is a compromise.

850 max_chunk_amt = 2**28

851 while amt is None or amt != 0:

852 if amt is not None:

853 chunk_amt = min(amt, max_chunk_amt)

854 amt -= chunk_amt

855 else:

856 chunk_amt = max_chunk_amt

857 data = self._fp.read(chunk_amt)

858 if not data:

859 break

860 buffer.write(data)

861 del data # to reduce peak memory usage by `max_chunk_amt`.

862 return buffer.getvalue()

863 elif read1:

864 return self._fp.read1(amt) if amt is not None else self._fp.read1()

865 else:

866 # StringIO doesn't like amt=None

867 return self._fp.read(amt) if amt is not None else self._fp.read()

868

869 def _raw_read(

870 self,

871 amt: int | None = None,

872 *,

873 read1: bool = False,

874 ) -> bytes:

875 """

876 Reads `amt` of bytes from the socket.

877 """

878 if self._fp is None:

879 return None # type: ignore[return-value]

880

881 fp_closed = getattr(self._fp, "closed", False)

882

883 with self._error_catcher():

884 data = self._fp_read(amt, read1=read1) if not fp_closed else b""

885 if amt is not None and amt != 0 and not data:

886 # Platform-specific: Buggy versions of Python.

887 # Close the connection when no data is returned

888 #

889 # This is redundant to what httplib/http.client _should_

890 # already do. However, versions of python released before

891 # December 15, 2012 (http://bugs.python.org/issue16298) do

892 # not properly close the connection in all cases. There is

893 # no harm in redundantly calling close.

894 self._fp.close()

895 if (

896 self.enforce_content_length

897 and self.length_remaining is not None

898 and self.length_remaining != 0

899 ):

900 # This is an edge case that httplib failed to cover due

901 # to concerns of backward compatibility. We're

902 # addressing it here to make sure IncompleteRead is

903 # raised during streaming, so all calls with incorrect

904 # Content-Length are caught.

905 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)

906 elif read1 and (

907 (amt != 0 and not data) or self.length_remaining == len(data)

908 ):

909 # All data has been read, but `self._fp.read1` in

910 # CPython 3.12 and older doesn't always close

911 # `http.client.HTTPResponse`, so we close it here.

912 # See https://github.com/python/cpython/issues/113199

913 self._fp.close()

914

915 if data:

916 self._fp_bytes_read += len(data)

917 if self.length_remaining is not None:

918 self.length_remaining -= len(data)

919 return data

920

921 def read(

922 self,

923 amt: int | None = None,

924 decode_content: bool | None = None,

925 cache_content: bool = False,

926 ) -> bytes:

927 """

928 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional

929 parameters: ``decode_content`` and ``cache_content``.

930

931 :param amt:

932 How much of the content to read. If specified, caching is skipped

933 because it doesn't make sense to cache partial content as the full

934 response.

935

936 :param decode_content:

937 If True, will attempt to decode the body based on the

938 'content-encoding' header.

939

940 :param cache_content:

941 If True, will save the returned data such that the same result is

942 returned despite of the state of the underlying file object. This

943 is useful if you want the ``.data`` property to continue working

944 after having ``.read()`` the file object. (Overridden if ``amt`` is

945 set.)

946 """

947 self._init_decoder()

948 if decode_content is None:

949 decode_content = self.decode_content

950

951 if amt and amt < 0:

952 # Negative numbers and `None` should be treated the same.

953 amt = None

954 elif amt is not None:

955 cache_content = False

956

957 if len(self._decoded_buffer) >= amt:

958 return self._decoded_buffer.get(amt)

959

960 data = self._raw_read(amt)

961

962 flush_decoder = amt is None or (amt != 0 and not data)

963

964 if not data and len(self._decoded_buffer) == 0:

965 return data

966

967 if amt is None:

968 data = self._decode(data, decode_content, flush_decoder)

969 if cache_content:

970 self._body = data

971 else:

972 # do not waste memory on buffer when not decoding

973 if not decode_content:

974 if self._has_decoded_content:

975 raise RuntimeError(

976 "Calling read(decode_content=False) is not supported after "

977 "read(decode_content=True) was called."

978 )

979 return data

980

981 decoded_data = self._decode(data, decode_content, flush_decoder)

982 self._decoded_buffer.put(decoded_data)

983

984 while len(self._decoded_buffer) < amt and data:

985 # TODO make sure to initially read enough data to get past the headers

986 # For example, the GZ file header takes 10 bytes, we don't want to read

987 # it one byte at a time

988 data = self._raw_read(amt)

989 decoded_data = self._decode(data, decode_content, flush_decoder)

990 self._decoded_buffer.put(decoded_data)

991 data = self._decoded_buffer.get(amt)

992

993 return data

994

995 def read1(

996 self,

997 amt: int | None = None,

998 decode_content: bool | None = None,

999 ) -> bytes:

1000 """

1001 Similar to ``http.client.HTTPResponse.read1`` and documented

1002 in :meth:`io.BufferedReader.read1`, but with an additional parameter:

1003 ``decode_content``.

1004

1005 :param amt:

1006 How much of the content to read.

1007

1008 :param decode_content:

1009 If True, will attempt to decode the body based on the

1010 'content-encoding' header.

1011 """

1012 if decode_content is None:

1013 decode_content = self.decode_content

1014 if amt and amt < 0:

1015 # Negative numbers and `None` should be treated the same.

1016 amt = None

1017 # try and respond without going to the network

1018 if self._has_decoded_content:

1019 if not decode_content:

1020 raise RuntimeError(

1021 "Calling read1(decode_content=False) is not supported after "

1022 "read1(decode_content=True) was called."

1023 )

1024 if len(self._decoded_buffer) > 0:

1025 if amt is None:

1026 return self._decoded_buffer.get_all()

1027 return self._decoded_buffer.get(amt)

1028 if amt == 0:

1029 return b""

1030

1031 # FIXME, this method's type doesn't say returning None is possible

1032 data = self._raw_read(amt, read1=True)

1033 if not decode_content or data is None:

1034 return data

1035

1036 self._init_decoder()

1037 while True:

1038 flush_decoder = not data

1039 decoded_data = self._decode(data, decode_content, flush_decoder)

1040 self._decoded_buffer.put(decoded_data)

1041 if decoded_data or flush_decoder:

1042 break

1043 data = self._raw_read(8192, read1=True)

1044

1045 if amt is None:

1046 return self._decoded_buffer.get_all()

1047 return self._decoded_buffer.get(amt)

1048

1049 def stream(

1050 self, amt: int | None = 2**16, decode_content: bool | None = None

1051 ) -> typing.Generator[bytes]:

1052 """

1053 A generator wrapper for the read() method. A call will block until

1054 ``amt`` bytes have been read from the connection or until the

1055 connection is closed.

1056

1057 :param amt:

1058 How much of the content to read. The generator will return up to

1059 much data per iteration, but may return less. This is particularly

1060 likely when using compressed data. However, the empty string will

1061 never be returned.

1062

1063 :param decode_content:

1064 If True, will attempt to decode the body based on the

1065 'content-encoding' header.

1066 """

1067 if self.chunked and self.supports_chunked_reads():

1068 yield from self.read_chunked(amt, decode_content=decode_content)

1069 else:

1070 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:

1071 data = self.read(amt=amt, decode_content=decode_content)

1072

1073 if data:

1074 yield data

1075

1076 # Overrides from io.IOBase

1077 def readable(self) -> bool:

1078 return True

1079

1080 def shutdown(self) -> None:

1081 if not self._sock_shutdown:

1082 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")

1083 if self._connection is None:

1084 raise RuntimeError(

1085 "Cannot shutdown as connection has already been released to the pool"

1086 )

1087 self._sock_shutdown(socket.SHUT_RD)

1088

1089 def close(self) -> None:

1090 self._sock_shutdown = None

1091

1092 if not self.closed and self._fp:

1093 self._fp.close()

1094

1095 if self._connection:

1096 self._connection.close()

1097

1098 if not self.auto_close:

1099 io.IOBase.close(self)

1100

1101 @property

1102 def closed(self) -> bool:

1103 if not self.auto_close:

1104 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]

1105 elif self._fp is None:

1106 return True

1107 elif hasattr(self._fp, "isclosed"):

1108 return self._fp.isclosed()

1109 elif hasattr(self._fp, "closed"):

1110 return self._fp.closed

1111 else:

1112 return True

1113

1114 def fileno(self) -> int:

1115 if self._fp is None:

1116 raise OSError("HTTPResponse has no file to get a fileno from")

1117 elif hasattr(self._fp, "fileno"):

1118 return self._fp.fileno()

1119 else:

1120 raise OSError(

1121 "The file-like object this HTTPResponse is wrapped "

1122 "around has no file descriptor"

1123 )

1124

1125 def flush(self) -> None:

1126 if (

1127 self._fp is not None

1128 and hasattr(self._fp, "flush")

1129 and not getattr(self._fp, "closed", False)

1130 ):

1131 return self._fp.flush()

1132

1133 def supports_chunked_reads(self) -> bool:

1134 """

1135 Checks if the underlying file-like object looks like a

1136 :class:`http.client.HTTPResponse` object. We do this by testing for

1137 the fp attribute. If it is present we assume it returns raw chunks as

1138 processed by read_chunked().

1139 """

1140 return hasattr(self._fp, "fp")

1141

1142 def _update_chunk_length(self) -> None:

1143 # First, we'll figure out length of a chunk and then

1144 # we'll try to read it from socket.

1145 if self.chunk_left is not None:

1146 return None

1147 line = self._fp.fp.readline() # type: ignore[union-attr]

1148 line = line.split(b";", 1)[0]

1149 try:

1150 self.chunk_left = int(line, 16)

1151 except ValueError:

1152 self.close()

1153 if line:

1154 # Invalid chunked protocol response, abort.

1155 raise InvalidChunkLength(self, line) from None

1156 else:

1157 # Truncated at start of next chunk

1158 raise ProtocolError("Response ended prematurely") from None

1159

1160 def _handle_chunk(self, amt: int | None) -> bytes:

1161 returned_chunk = None

1162 if amt is None:

1163 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1164 returned_chunk = chunk

1165 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1166 self.chunk_left = None

1167 elif self.chunk_left is not None and amt < self.chunk_left:

1168 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1169 self.chunk_left = self.chunk_left - amt

1170 returned_chunk = value

1171 elif amt == self.chunk_left:

1172 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1173 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1174 self.chunk_left = None

1175 returned_chunk = value

1176 else: # amt > self.chunk_left

1177 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1178 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1179 self.chunk_left = None

1180 return returned_chunk # type: ignore[no-any-return]

1181

1182 def read_chunked(

1183 self, amt: int | None = None, decode_content: bool | None = None

1184 ) -> typing.Generator[bytes]:

1185 """

1186 Similar to :meth:`HTTPResponse.read`, but with an additional

1187 parameter: ``decode_content``.

1188

1189 :param amt:

1190 How much of the content to read. If specified, caching is skipped

1191 because it doesn't make sense to cache partial content as the full

1192 response.

1193

1194 :param decode_content:

1195 If True, will attempt to decode the body based on the

1196 'content-encoding' header.

1197 """

1198 self._init_decoder()

1199 # FIXME: Rewrite this method and make it a class with a better structured logic.

1200 if not self.chunked:

1201 raise ResponseNotChunked(

1202 "Response is not chunked. "

1203 "Header 'transfer-encoding: chunked' is missing."

1204 )

1205 if not self.supports_chunked_reads():

1206 raise BodyNotHttplibCompatible(

1207 "Body should be http.client.HTTPResponse like. "

1208 "It should have have an fp attribute which returns raw chunks."

1209 )

1210

1211 with self._error_catcher():

1212 # Don't bother reading the body of a HEAD request.

1213 if self._original_response and is_response_to_head(self._original_response):

1214 self._original_response.close()

1215 return None

1216

1217 # If a response is already read and closed

1218 # then return immediately.

1219 if self._fp.fp is None: # type: ignore[union-attr]

1220 return None

1221

1222 if amt and amt < 0:

1223 # Negative numbers and `None` should be treated the same,

1224 # but httplib handles only `None` correctly.

1225 amt = None

1226

1227 while True:

1228 self._update_chunk_length()

1229 if self.chunk_left == 0:

1230 break

1231 chunk = self._handle_chunk(amt)

1232 decoded = self._decode(

1233 chunk, decode_content=decode_content, flush_decoder=False

1234 )

1235 if decoded:

1236 yield decoded

1237

1238 if decode_content:

1239 # On CPython and PyPy, we should never need to flush the

1240 # decoder. However, on Jython we *might* need to, so

1241 # lets defensively do it anyway.

1242 decoded = self._flush_decoder()

1243 if decoded: # Platform-specific: Jython.

1244 yield decoded

1245

1246 # Chunk content ends with \r\n: discard it.

1247 while self._fp is not None:

1248 line = self._fp.fp.readline()

1249 if not line:

1250 # Some sites may not end with '\r\n'.

1251 break

1252 if line == b"\r\n":

1253 break

1254

1255 # We read everything; close the "file".

1256 if self._original_response:

1257 self._original_response.close()

1258

1259 @property

1260 def url(self) -> str | None:

1261 """

1262 Returns the URL that was the source of this response.

1263 If the request that generated this response redirected, this method

1264 will return the final redirect location.

1265 """

1266 return self._request_url

1267

1268 @url.setter

1269 def url(self, url: str | None) -> None:

1270 self._request_url = url

1271

1272 def __iter__(self) -> typing.Iterator[bytes]:

1273 buffer: list[bytes] = []

1274 for chunk in self.stream(decode_content=True):

1275 if b"\n" in chunk:

1276 chunks = chunk.split(b"\n")

1277 yield b"".join(buffer) + chunks[0] + b"\n"

1278 for x in chunks[1:-1]:

1279 yield x + b"\n"

1280 if chunks[-1]:

1281 buffer = [chunks[-1]]

1282 else:

1283 buffer = []

1284 else:

1285 buffer.append(chunk)

1286 if buffer:

1287 yield b"".join(buffer)