Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/urllib3/response.py: 22%

1from __future__ import annotations

3import collections

4import io

5import json as _json

6import logging

7import re

8import socket

9import sys

10import typing

11import warnings

12import zlib

13from contextlib import contextmanager

14from http.client import HTTPMessage as _HttplibHTTPMessage

15from http.client import HTTPResponse as _HttplibHTTPResponse

16from socket import timeout as SocketTimeout

18if typing.TYPE_CHECKING:

19 from ._base_connection import BaseHTTPConnection

21try:

22 try:

23 import brotlicffi as brotli # type: ignore[import-not-found]

24 except ImportError:

25 import brotli # type: ignore[import-not-found]

26except ImportError:

27 brotli = None

29try:

30 import zstandard as zstd

31except (AttributeError, ImportError, ValueError): # Defensive:

32 HAS_ZSTD = False

33else:

34 # The package 'zstandard' added the 'eof' property starting

35 # in v0.18.0 which we require to ensure a complete and

36 # valid zstd stream was fed into the ZstdDecoder.

37 # See: https://github.com/urllib3/urllib3/pull/2624

38 _zstd_version = tuple(

39 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]

40 )

41 if _zstd_version < (0, 18): # Defensive:

42 HAS_ZSTD = False

43 else:

44 HAS_ZSTD = True

46from . import util

47from ._base_connection import _TYPE_BODY

48from ._collections import HTTPHeaderDict

49from .connection import BaseSSLError, HTTPConnection, HTTPException

50from .exceptions import (

51 BodyNotHttplibCompatible,

52 DecodeError,

53 HTTPError,

54 IncompleteRead,

55 InvalidChunkLength,

56 InvalidHeader,

57 ProtocolError,

58 ReadTimeoutError,

59 ResponseNotChunked,

60 SSLError,

61)

62from .util.response import is_fp_closed, is_response_to_head

63from .util.retry import Retry

65if typing.TYPE_CHECKING:

66 from .connectionpool import HTTPConnectionPool

68log = logging.getLogger(__name__)

71class ContentDecoder:

72 def decompress(self, data: bytes) -> bytes:

73 raise NotImplementedError()

75 def flush(self) -> bytes:

76 raise NotImplementedError()

79class DeflateDecoder(ContentDecoder):

80 def __init__(self) -> None:

81 self._first_try = True

82 self._data = b""

83 self._obj = zlib.decompressobj()

85 def decompress(self, data: bytes) -> bytes:

86 if not data:

87 return data

89 if not self._first_try:

90 return self._obj.decompress(data)

92 self._data += data

93 try:

94 decompressed = self._obj.decompress(data)

95 if decompressed:

96 self._first_try = False

97 self._data = None # type: ignore[assignment]

98 return decompressed

99 except zlib.error:

100 self._first_try = False

101 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)

102 try:

103 return self.decompress(self._data)

104 finally:

105 self._data = None # type: ignore[assignment]

106

107 def flush(self) -> bytes:

108 return self._obj.flush()

109

110

111class GzipDecoderState:

112 FIRST_MEMBER = 0

113 OTHER_MEMBERS = 1

114 SWALLOW_DATA = 2

115

116

117class GzipDecoder(ContentDecoder):

118 def __init__(self) -> None:

119 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

120 self._state = GzipDecoderState.FIRST_MEMBER

121

122 def decompress(self, data: bytes) -> bytes:

123 ret = bytearray()

124 if self._state == GzipDecoderState.SWALLOW_DATA or not data:

125 return bytes(ret)

126 while True:

127 try:

128 ret += self._obj.decompress(data)

129 except zlib.error:

130 previous_state = self._state

131 # Ignore data after the first error

132 self._state = GzipDecoderState.SWALLOW_DATA

133 if previous_state == GzipDecoderState.OTHER_MEMBERS:

134 # Allow trailing garbage acceptable in other gzip clients

135 return bytes(ret)

136 raise

137 data = self._obj.unused_data

138 if not data:

139 return bytes(ret)

140 self._state = GzipDecoderState.OTHER_MEMBERS

141 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

142

143 def flush(self) -> bytes:

144 return self._obj.flush()

145

146

147if brotli is not None:

148

149 class BrotliDecoder(ContentDecoder):

150 # Supports both 'brotlipy' and 'Brotli' packages

151 # since they share an import name. The top branches

152 # are for 'brotlipy' and bottom branches for 'Brotli'

153 def __init__(self) -> None:

154 self._obj = brotli.Decompressor()

155 if hasattr(self._obj, "decompress"):

156 setattr(self, "decompress", self._obj.decompress)

157 else:

158 setattr(self, "decompress", self._obj.process)

159

160 def flush(self) -> bytes:

161 if hasattr(self._obj, "flush"):

162 return self._obj.flush() # type: ignore[no-any-return]

163 return b""

164

165

166if HAS_ZSTD:

167

168 class ZstdDecoder(ContentDecoder):

169 def __init__(self) -> None:

170 self._obj = zstd.ZstdDecompressor().decompressobj()

171

172 def decompress(self, data: bytes) -> bytes:

173 if not data:

174 return b""

175 data_parts = [self._obj.decompress(data)]

176 while self._obj.eof and self._obj.unused_data:

177 unused_data = self._obj.unused_data

178 self._obj = zstd.ZstdDecompressor().decompressobj()

179 data_parts.append(self._obj.decompress(unused_data))

180 return b"".join(data_parts)

181

182 def flush(self) -> bytes:

183 ret = self._obj.flush() # note: this is a no-op

184 if not self._obj.eof:

185 raise DecodeError("Zstandard data is incomplete")

186 return ret

187

188

189class MultiDecoder(ContentDecoder):

190 """

191 From RFC7231:

192 If one or more encodings have been applied to a representation, the

193 sender that applied the encodings MUST generate a Content-Encoding

194 header field that lists the content codings in the order in which

195 they were applied.

196 """

197

198 def __init__(self, modes: str) -> None:

199 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]

200

201 def flush(self) -> bytes:

202 return self._decoders[0].flush()

203

204 def decompress(self, data: bytes) -> bytes:

205 for d in reversed(self._decoders):

206 data = d.decompress(data)

207 return data

208

209

210def _get_decoder(mode: str) -> ContentDecoder:

211 if "," in mode:

212 return MultiDecoder(mode)

213

214 # According to RFC 9110 section 8.4.1.3, recipients should

215 # consider x-gzip equivalent to gzip

216 if mode in ("gzip", "x-gzip"):

217 return GzipDecoder()

218

219 if brotli is not None and mode == "br":

220 return BrotliDecoder()

221

222 if HAS_ZSTD and mode == "zstd":

223 return ZstdDecoder()

224

225 return DeflateDecoder()

226

227

228class BytesQueueBuffer:

229 """Memory-efficient bytes buffer

230

231 To return decoded data in read() and still follow the BufferedIOBase API, we need a

232 buffer to always return the correct amount of bytes.

233

234 This buffer should be filled using calls to put()

235

236 Our maximum memory usage is determined by the sum of the size of:

237

238 * self.buffer, which contains the full data

239 * the largest chunk that we will copy in get()

240

241 The worst case scenario is a single chunk, in which case we'll make a full copy of

242 the data inside get().

243 """

244

245 def __init__(self) -> None:

246 self.buffer: typing.Deque[bytes] = collections.deque()

247 self._size: int = 0

248

249 def __len__(self) -> int:

250 return self._size

251

252 def put(self, data: bytes) -> None:

253 self.buffer.append(data)

254 self._size += len(data)

255

256 def get(self, n: int) -> bytes:

257 if n == 0:

258 return b""

259 elif not self.buffer:

260 raise RuntimeError("buffer is empty")

261 elif n < 0:

262 raise ValueError("n should be > 0")

263

264 fetched = 0

265 ret = io.BytesIO()

266 while fetched < n:

267 remaining = n - fetched

268 chunk = self.buffer.popleft()

269 chunk_length = len(chunk)

270 if remaining < chunk_length:

271 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]

272 ret.write(left_chunk)

273 self.buffer.appendleft(right_chunk)

274 self._size -= remaining

275 break

276 else:

277 ret.write(chunk)

278 self._size -= chunk_length

279 fetched += chunk_length

280

281 if not self.buffer:

282 break

283

284 return ret.getvalue()

285

286 def get_all(self) -> bytes:

287 buffer = self.buffer

288 if not buffer:

289 assert self._size == 0

290 return b""

291 if len(buffer) == 1:

292 result = buffer.pop()

293 else:

294 ret = io.BytesIO()

295 ret.writelines(buffer.popleft() for _ in range(len(buffer)))

296 result = ret.getvalue()

297 self._size = 0

298 return result

299

300

301class BaseHTTPResponse(io.IOBase):

302 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]

303 if brotli is not None:

304 CONTENT_DECODERS += ["br"]

305 if HAS_ZSTD:

306 CONTENT_DECODERS += ["zstd"]

307 REDIRECT_STATUSES = [301, 302, 303, 307, 308]

308

309 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)

310 if brotli is not None:

311 DECODER_ERROR_CLASSES += (brotli.error,)

312

313 if HAS_ZSTD:

314 DECODER_ERROR_CLASSES += (zstd.ZstdError,)

315

316 def __init__(

317 self,

318 *,

319 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

320 status: int,

321 version: int,

322 version_string: str,

323 reason: str | None,

324 decode_content: bool,

325 request_url: str | None,

326 retries: Retry | None = None,

327 ) -> None:

328 if isinstance(headers, HTTPHeaderDict):

329 self.headers = headers

330 else:

331 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]

332 self.status = status

333 self.version = version

334 self.version_string = version_string

335 self.reason = reason

336 self.decode_content = decode_content

337 self._has_decoded_content = False

338 self._request_url: str | None = request_url

339 self.retries = retries

340

341 self.chunked = False

342 tr_enc = self.headers.get("transfer-encoding", "").lower()

343 # Don't incur the penalty of creating a list and then discarding it

344 encodings = (enc.strip() for enc in tr_enc.split(","))

345 if "chunked" in encodings:

346 self.chunked = True

347

348 self._decoder: ContentDecoder | None = None

349 self.length_remaining: int | None

350

351 def get_redirect_location(self) -> str | None | typing.Literal[False]:

352 """

353 Should we redirect and where to?

354

355 :returns: Truthy redirect location string if we got a redirect status

356 code and valid location. ``None`` if redirect status and no

357 location. ``False`` if not a redirect status code.

358 """

359 if self.status in self.REDIRECT_STATUSES:

360 return self.headers.get("location")

361 return False

362

363 @property

364 def data(self) -> bytes:

365 raise NotImplementedError()

366

367 def json(self) -> typing.Any:

368 """

369 Deserializes the body of the HTTP response as a Python object.

370

371 The body of the HTTP response must be encoded using UTF-8, as per

372 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.

373

374 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to

375 your custom decoder instead.

376

377 If the body of the HTTP response is not decodable to UTF-8, a

378 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a

379 valid JSON document, a `json.JSONDecodeError` will be raised.

380

381 Read more :ref:`here <json_content>`.

382

383 :returns: The body of the HTTP response as a Python object.

384 """

385 data = self.data.decode("utf-8")

386 return _json.loads(data)

387

388 @property

389 def url(self) -> str | None:

390 raise NotImplementedError()

391

392 @url.setter

393 def url(self, url: str | None) -> None:

394 raise NotImplementedError()

395

396 @property

397 def connection(self) -> BaseHTTPConnection | None:

398 raise NotImplementedError()

399

400 @property

401 def retries(self) -> Retry | None:

402 return self._retries

403

404 @retries.setter

405 def retries(self, retries: Retry | None) -> None:

406 # Override the request_url if retries has a redirect location.

407 if retries is not None and retries.history:

408 self.url = retries.history[-1].redirect_location

409 self._retries = retries

410

411 def stream(

412 self, amt: int | None = 2**16, decode_content: bool | None = None

413 ) -> typing.Iterator[bytes]:

414 raise NotImplementedError()

415

416 def read(

417 self,

418 amt: int | None = None,

419 decode_content: bool | None = None,

420 cache_content: bool = False,

421 ) -> bytes:

422 raise NotImplementedError()

423

424 def read1(

425 self,

426 amt: int | None = None,

427 decode_content: bool | None = None,

428 ) -> bytes:

429 raise NotImplementedError()

430

431 def read_chunked(

432 self,

433 amt: int | None = None,

434 decode_content: bool | None = None,

435 ) -> typing.Iterator[bytes]:

436 raise NotImplementedError()

437

438 def release_conn(self) -> None:

439 raise NotImplementedError()

440

441 def drain_conn(self) -> None:

442 raise NotImplementedError()

443

444 def shutdown(self) -> None:

445 raise NotImplementedError()

446

447 def close(self) -> None:

448 raise NotImplementedError()

449

450 def _init_decoder(self) -> None:

451 """

452 Set-up the _decoder attribute if necessary.

453 """

454 # Note: content-encoding value should be case-insensitive, per RFC 7230

455 # Section 3.2

456 content_encoding = self.headers.get("content-encoding", "").lower()

457 if self._decoder is None:

458 if content_encoding in self.CONTENT_DECODERS:

459 self._decoder = _get_decoder(content_encoding)

460 elif "," in content_encoding:

461 encodings = [

462 e.strip()

463 for e in content_encoding.split(",")

464 if e.strip() in self.CONTENT_DECODERS

465 ]

466 if encodings:

467 self._decoder = _get_decoder(content_encoding)

468

469 def _decode(

470 self, data: bytes, decode_content: bool | None, flush_decoder: bool

471 ) -> bytes:

472 """

473 Decode the data passed in and potentially flush the decoder.

474 """

475 if not decode_content:

476 if self._has_decoded_content:

477 raise RuntimeError(

478 "Calling read(decode_content=False) is not supported after "

479 "read(decode_content=True) was called."

480 )

481 return data

482

483 try:

484 if self._decoder:

485 data = self._decoder.decompress(data)

486 self._has_decoded_content = True

487 except self.DECODER_ERROR_CLASSES as e:

488 content_encoding = self.headers.get("content-encoding", "").lower()

489 raise DecodeError(

490 "Received response with content-encoding: %s, but "

491 "failed to decode it." % content_encoding,

492 e,

493 ) from e

494 if flush_decoder:

495 data += self._flush_decoder()

496

497 return data

498

499 def _flush_decoder(self) -> bytes:

500 """

501 Flushes the decoder. Should only be called if the decoder is actually

502 being used.

503 """

504 if self._decoder:

505 return self._decoder.decompress(b"") + self._decoder.flush()

506 return b""

507

508 # Compatibility methods for `io` module

509 def readinto(self, b: bytearray) -> int:

510 temp = self.read(len(b))

511 if len(temp) == 0:

512 return 0

513 else:

514 b[: len(temp)] = temp

515 return len(temp)

516

517 # Compatibility methods for http.client.HTTPResponse

518 def getheaders(self) -> HTTPHeaderDict:

519 warnings.warn(

520 "HTTPResponse.getheaders() is deprecated and will be removed "

521 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",

522 category=DeprecationWarning,

523 stacklevel=2,

524 )

525 return self.headers

526

527 def getheader(self, name: str, default: str | None = None) -> str | None:

528 warnings.warn(

529 "HTTPResponse.getheader() is deprecated and will be removed "

530 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",

531 category=DeprecationWarning,

532 stacklevel=2,

533 )

534 return self.headers.get(name, default)

535

536 # Compatibility method for http.cookiejar

537 def info(self) -> HTTPHeaderDict:

538 return self.headers

539

540 def geturl(self) -> str | None:

541 return self.url

542

543

544class HTTPResponse(BaseHTTPResponse):

545 """

546 HTTP Response container.

547

548 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is

549 loaded and decoded on-demand when the ``data`` property is accessed. This

550 class is also compatible with the Python standard library's :mod:`io`

551 module, and can hence be treated as a readable object in the context of that

552 framework.

553

554 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:

555

556 :param preload_content:

557 If True, the response's body will be preloaded during construction.

558

559 :param decode_content:

560 If True, will attempt to decode the body based on the

561 'content-encoding' header.

562

563 :param original_response:

564 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`

565 object, it's convenient to include the original for debug purposes. It's

566 otherwise unused.

567

568 :param retries:

569 The retries contains the last :class:`~urllib3.util.retry.Retry` that

570 was used during the request.

571

572 :param enforce_content_length:

573 Enforce content length checking. Body returned by server must match

574 value of Content-Length header, if present. Otherwise, raise error.

575 """

576

577 def __init__(

578 self,

579 body: _TYPE_BODY = "",

580 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

581 status: int = 0,

582 version: int = 0,

583 version_string: str = "HTTP/?",

584 reason: str | None = None,

585 preload_content: bool = True,

586 decode_content: bool = True,

587 original_response: _HttplibHTTPResponse | None = None,

588 pool: HTTPConnectionPool | None = None,

589 connection: HTTPConnection | None = None,

590 msg: _HttplibHTTPMessage | None = None,

591 retries: Retry | None = None,

592 enforce_content_length: bool = True,

593 request_method: str | None = None,

594 request_url: str | None = None,

595 auto_close: bool = True,

596 sock_shutdown: typing.Callable[[int], None] | None = None,

597 ) -> None:

598 super().__init__(

599 headers=headers,

600 status=status,

601 version=version,

602 version_string=version_string,

603 reason=reason,

604 decode_content=decode_content,

605 request_url=request_url,

606 retries=retries,

607 )

608

609 self.enforce_content_length = enforce_content_length

610 self.auto_close = auto_close

611

612 self._body = None

613 self._fp: _HttplibHTTPResponse | None = None

614 self._original_response = original_response

615 self._fp_bytes_read = 0

616 self.msg = msg

617

618 if body and isinstance(body, (str, bytes)):

619 self._body = body

620

621 self._pool = pool

622 self._connection = connection

623

624 if hasattr(body, "read"):

625 self._fp = body # type: ignore[assignment]

626 self._sock_shutdown = sock_shutdown

627

628 # Are we using the chunked-style of transfer encoding?

629 self.chunk_left: int | None = None

630

631 # Determine length of response

632 self.length_remaining = self._init_length(request_method)

633

634 # Used to return the correct amount of bytes for partial read()s

635 self._decoded_buffer = BytesQueueBuffer()

636

637 # If requested, preload the body.

638 if preload_content and not self._body:

639 self._body = self.read(decode_content=decode_content)

640

641 def release_conn(self) -> None:

642 if not self._pool or not self._connection:

643 return None

644

645 self._pool._put_conn(self._connection)

646 self._connection = None

647

648 def drain_conn(self) -> None:

649 """

650 Read and discard any remaining HTTP response data in the response connection.

651

652 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.

653 """

654 try:

655 self.read()

656 except (HTTPError, OSError, BaseSSLError, HTTPException):

657 pass

658

659 @property

660 def data(self) -> bytes:

661 # For backwards-compat with earlier urllib3 0.4 and earlier.

662 if self._body:

663 return self._body # type: ignore[return-value]

664

665 if self._fp:

666 return self.read(cache_content=True)

667

668 return None # type: ignore[return-value]

669

670 @property

671 def connection(self) -> HTTPConnection | None:

672 return self._connection

673

674 def isclosed(self) -> bool:

675 return is_fp_closed(self._fp)

676

677 def tell(self) -> int:

678 """

679 Obtain the number of bytes pulled over the wire so far. May differ from

680 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``

681 if bytes are encoded on the wire (e.g, compressed).

682 """

683 return self._fp_bytes_read

684

685 def _init_length(self, request_method: str | None) -> int | None:

686 """

687 Set initial length value for Response content if available.

688 """

689 length: int | None

690 content_length: str | None = self.headers.get("content-length")

691

692 if content_length is not None:

693 if self.chunked:

694 # This Response will fail with an IncompleteRead if it can't be

695 # received as chunked. This method falls back to attempt reading

696 # the response before raising an exception.

697 log.warning(

698 "Received response with both Content-Length and "

699 "Transfer-Encoding set. This is expressly forbidden "

700 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "

701 "attempting to process response as Transfer-Encoding: "

702 "chunked."

703 )

704 return None

705

706 try:

707 # RFC 7230 section 3.3.2 specifies multiple content lengths can

708 # be sent in a single Content-Length header

709 # (e.g. Content-Length: 42, 42). This line ensures the values

710 # are all valid ints and that as long as the `set` length is 1,

711 # all values are the same. Otherwise, the header is invalid.

712 lengths = {int(val) for val in content_length.split(",")}

713 if len(lengths) > 1:

714 raise InvalidHeader(

715 "Content-Length contained multiple "

716 "unmatching values (%s)" % content_length

717 )

718 length = lengths.pop()

719 except ValueError:

720 length = None

721 else:

722 if length < 0:

723 length = None

724

725 else: # if content_length is None

726 length = None

727

728 # Convert status to int for comparison

729 # In some cases, httplib returns a status of "_UNKNOWN"

730 try:

731 status = int(self.status)

732 except ValueError:

733 status = 0

734

735 # Check for responses that shouldn't include a body

736 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":

737 length = 0

738

739 return length

740

741 @contextmanager

742 def _error_catcher(self) -> typing.Generator[None]:

743 """

744 Catch low-level python exceptions, instead re-raising urllib3

745 variants, so that low-level exceptions are not leaked in the

746 high-level api.

747

748 On exit, release the connection back to the pool.

749 """

750 clean_exit = False

751

752 try:

753 try:

754 yield

755

756 except SocketTimeout as e:

757 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but

758 # there is yet no clean way to get at it from this context.

759 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

760

761 except BaseSSLError as e:

762 # FIXME: Is there a better way to differentiate between SSLErrors?

763 if "read operation timed out" not in str(e):

764 # SSL errors related to framing/MAC get wrapped and reraised here

765 raise SSLError(e) from e

766

767 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

768

769 except IncompleteRead as e:

770 if (

771 e.expected is not None

772 and e.partial is not None

773 and e.expected == -e.partial

774 ):

775 arg = "Response may not contain content."

776 else:

777 arg = f"Connection broken: {e!r}"

778 raise ProtocolError(arg, e) from e

779

780 except (HTTPException, OSError) as e:

781 raise ProtocolError(f"Connection broken: {e!r}", e) from e

782

783 # If no exception is thrown, we should avoid cleaning up

784 # unnecessarily.

785 clean_exit = True

786 finally:

787 # If we didn't terminate cleanly, we need to throw away our

788 # connection.

789 if not clean_exit:

790 # The response may not be closed but we're not going to use it

791 # anymore so close it now to ensure that the connection is

792 # released back to the pool.

793 if self._original_response:

794 self._original_response.close()

795

796 # Closing the response may not actually be sufficient to close

797 # everything, so if we have a hold of the connection close that

798 # too.

799 if self._connection:

800 self._connection.close()

801

802 # If we hold the original response but it's closed now, we should

803 # return the connection back to the pool.

804 if self._original_response and self._original_response.isclosed():

805 self.release_conn()

806

807 def _fp_read(

808 self,

809 amt: int | None = None,

810 *,

811 read1: bool = False,

812 ) -> bytes:

813 """

814 Read a response with the thought that reading the number of bytes

815 larger than can fit in a 32-bit int at a time via SSL in some

816 known cases leads to an overflow error that has to be prevented

817 if `amt` or `self.length_remaining` indicate that a problem may

818 happen.

819

820 The known cases:

821 * CPython < 3.9.7 because of a bug

822 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.

823 * urllib3 injected with pyOpenSSL-backed SSL-support.

824 * CPython < 3.10 only when `amt` does not fit 32-bit int.

825 """

826 assert self._fp

827 c_int_max = 2**31 - 1

828 if (

829 (amt and amt > c_int_max)

830 or (

831 amt is None

832 and self.length_remaining

833 and self.length_remaining > c_int_max

834 )

835 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):

836 if read1:

837 return self._fp.read1(c_int_max)

838 buffer = io.BytesIO()

839 # Besides `max_chunk_amt` being a maximum chunk size, it

840 # affects memory overhead of reading a response by this

841 # method in CPython.

842 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum

843 # chunk size that does not lead to an overflow error, but

844 # 256 MiB is a compromise.

845 max_chunk_amt = 2**28

846 while amt is None or amt != 0:

847 if amt is not None:

848 chunk_amt = min(amt, max_chunk_amt)

849 amt -= chunk_amt

850 else:

851 chunk_amt = max_chunk_amt

852 data = self._fp.read(chunk_amt)

853 if not data:

854 break

855 buffer.write(data)

856 del data # to reduce peak memory usage by `max_chunk_amt`.

857 return buffer.getvalue()

858 elif read1:

859 return self._fp.read1(amt) if amt is not None else self._fp.read1()

860 else:

861 # StringIO doesn't like amt=None

862 return self._fp.read(amt) if amt is not None else self._fp.read()

863

864 def _raw_read(

865 self,

866 amt: int | None = None,

867 *,

868 read1: bool = False,

869 ) -> bytes:

870 """

871 Reads `amt` of bytes from the socket.

872 """

873 if self._fp is None:

874 return None # type: ignore[return-value]

875

876 fp_closed = getattr(self._fp, "closed", False)

877

878 with self._error_catcher():

879 data = self._fp_read(amt, read1=read1) if not fp_closed else b""

880 if amt is not None and amt != 0 and not data:

881 # Platform-specific: Buggy versions of Python.

882 # Close the connection when no data is returned

883 #

884 # This is redundant to what httplib/http.client _should_

885 # already do. However, versions of python released before

886 # December 15, 2012 (http://bugs.python.org/issue16298) do

887 # not properly close the connection in all cases. There is

888 # no harm in redundantly calling close.

889 self._fp.close()

890 if (

891 self.enforce_content_length

892 and self.length_remaining is not None

893 and self.length_remaining != 0

894 ):

895 # This is an edge case that httplib failed to cover due

896 # to concerns of backward compatibility. We're

897 # addressing it here to make sure IncompleteRead is

898 # raised during streaming, so all calls with incorrect

899 # Content-Length are caught.

900 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)

901 elif read1 and (

902 (amt != 0 and not data) or self.length_remaining == len(data)

903 ):

904 # All data has been read, but `self._fp.read1` in

905 # CPython 3.12 and older doesn't always close

906 # `http.client.HTTPResponse`, so we close it here.

907 # See https://github.com/python/cpython/issues/113199

908 self._fp.close()

909

910 if data:

911 self._fp_bytes_read += len(data)

912 if self.length_remaining is not None:

913 self.length_remaining -= len(data)

914 return data

915

916 def read(

917 self,

918 amt: int | None = None,

919 decode_content: bool | None = None,

920 cache_content: bool = False,

921 ) -> bytes:

922 """

923 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional

924 parameters: ``decode_content`` and ``cache_content``.

925

926 :param amt:

927 How much of the content to read. If specified, caching is skipped

928 because it doesn't make sense to cache partial content as the full

929 response.

930

931 :param decode_content:

932 If True, will attempt to decode the body based on the

933 'content-encoding' header.

934

935 :param cache_content:

936 If True, will save the returned data such that the same result is

937 returned despite of the state of the underlying file object. This

938 is useful if you want the ``.data`` property to continue working

939 after having ``.read()`` the file object. (Overridden if ``amt`` is

940 set.)

941 """

942 self._init_decoder()

943 if decode_content is None:

944 decode_content = self.decode_content

945

946 if amt and amt < 0:

947 # Negative numbers and `None` should be treated the same.

948 amt = None

949 elif amt is not None:

950 cache_content = False

951

952 if len(self._decoded_buffer) >= amt:

953 return self._decoded_buffer.get(amt)

954

955 data = self._raw_read(amt)

956

957 flush_decoder = amt is None or (amt != 0 and not data)

958

959 if not data and len(self._decoded_buffer) == 0:

960 return data

961

962 if amt is None:

963 data = self._decode(data, decode_content, flush_decoder)

964 if cache_content:

965 self._body = data

966 else:

967 # do not waste memory on buffer when not decoding

968 if not decode_content:

969 if self._has_decoded_content:

970 raise RuntimeError(

971 "Calling read(decode_content=False) is not supported after "

972 "read(decode_content=True) was called."

973 )

974 return data

975

976 decoded_data = self._decode(data, decode_content, flush_decoder)

977 self._decoded_buffer.put(decoded_data)

978

979 while len(self._decoded_buffer) < amt and data:

980 # TODO make sure to initially read enough data to get past the headers

981 # For example, the GZ file header takes 10 bytes, we don't want to read

982 # it one byte at a time

983 data = self._raw_read(amt)

984 decoded_data = self._decode(data, decode_content, flush_decoder)

985 self._decoded_buffer.put(decoded_data)

986 data = self._decoded_buffer.get(amt)

987

988 return data

989

990 def read1(

991 self,

992 amt: int | None = None,

993 decode_content: bool | None = None,

994 ) -> bytes:

995 """

996 Similar to ``http.client.HTTPResponse.read1`` and documented

997 in :meth:`io.BufferedReader.read1`, but with an additional parameter:

998 ``decode_content``.

999

1000 :param amt:

1001 How much of the content to read.

1002

1003 :param decode_content:

1004 If True, will attempt to decode the body based on the

1005 'content-encoding' header.

1006 """

1007 if decode_content is None:

1008 decode_content = self.decode_content

1009 if amt and amt < 0:

1010 # Negative numbers and `None` should be treated the same.

1011 amt = None

1012 # try and respond without going to the network

1013 if self._has_decoded_content:

1014 if not decode_content:

1015 raise RuntimeError(

1016 "Calling read1(decode_content=False) is not supported after "

1017 "read1(decode_content=True) was called."

1018 )

1019 if len(self._decoded_buffer) > 0:

1020 if amt is None:

1021 return self._decoded_buffer.get_all()

1022 return self._decoded_buffer.get(amt)

1023 if amt == 0:

1024 return b""

1025

1026 # FIXME, this method's type doesn't say returning None is possible

1027 data = self._raw_read(amt, read1=True)

1028 if not decode_content or data is None:

1029 return data

1030

1031 self._init_decoder()

1032 while True:

1033 flush_decoder = not data

1034 decoded_data = self._decode(data, decode_content, flush_decoder)

1035 self._decoded_buffer.put(decoded_data)

1036 if decoded_data or flush_decoder:

1037 break

1038 data = self._raw_read(8192, read1=True)

1039

1040 if amt is None:

1041 return self._decoded_buffer.get_all()

1042 return self._decoded_buffer.get(amt)

1043

1044 def stream(

1045 self, amt: int | None = 2**16, decode_content: bool | None = None

1046 ) -> typing.Generator[bytes]:

1047 """

1048 A generator wrapper for the read() method. A call will block until

1049 ``amt`` bytes have been read from the connection or until the

1050 connection is closed.

1051

1052 :param amt:

1053 How much of the content to read. The generator will return up to

1054 much data per iteration, but may return less. This is particularly

1055 likely when using compressed data. However, the empty string will

1056 never be returned.

1057

1058 :param decode_content:

1059 If True, will attempt to decode the body based on the

1060 'content-encoding' header.

1061 """

1062 if self.chunked and self.supports_chunked_reads():

1063 yield from self.read_chunked(amt, decode_content=decode_content)

1064 else:

1065 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:

1066 data = self.read(amt=amt, decode_content=decode_content)

1067

1068 if data:

1069 yield data

1070

1071 # Overrides from io.IOBase

1072 def readable(self) -> bool:

1073 return True

1074

1075 def shutdown(self) -> None:

1076 if not self._sock_shutdown:

1077 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")

1078 self._sock_shutdown(socket.SHUT_RD)

1079

1080 def close(self) -> None:

1081 self._sock_shutdown = None

1082

1083 if not self.closed and self._fp:

1084 self._fp.close()

1085

1086 if self._connection:

1087 self._connection.close()

1088

1089 if not self.auto_close:

1090 io.IOBase.close(self)

1091

1092 @property

1093 def closed(self) -> bool:

1094 if not self.auto_close:

1095 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]

1096 elif self._fp is None:

1097 return True

1098 elif hasattr(self._fp, "isclosed"):

1099 return self._fp.isclosed()

1100 elif hasattr(self._fp, "closed"):

1101 return self._fp.closed

1102 else:

1103 return True

1104

1105 def fileno(self) -> int:

1106 if self._fp is None:

1107 raise OSError("HTTPResponse has no file to get a fileno from")

1108 elif hasattr(self._fp, "fileno"):

1109 return self._fp.fileno()

1110 else:

1111 raise OSError(

1112 "The file-like object this HTTPResponse is wrapped "

1113 "around has no file descriptor"

1114 )

1115

1116 def flush(self) -> None:

1117 if (

1118 self._fp is not None

1119 and hasattr(self._fp, "flush")

1120 and not getattr(self._fp, "closed", False)

1121 ):

1122 return self._fp.flush()

1123

1124 def supports_chunked_reads(self) -> bool:

1125 """

1126 Checks if the underlying file-like object looks like a

1127 :class:`http.client.HTTPResponse` object. We do this by testing for

1128 the fp attribute. If it is present we assume it returns raw chunks as

1129 processed by read_chunked().

1130 """

1131 return hasattr(self._fp, "fp")

1132

1133 def _update_chunk_length(self) -> None:

1134 # First, we'll figure out length of a chunk and then

1135 # we'll try to read it from socket.

1136 if self.chunk_left is not None:

1137 return None

1138 line = self._fp.fp.readline() # type: ignore[union-attr]

1139 line = line.split(b";", 1)[0]

1140 try:

1141 self.chunk_left = int(line, 16)

1142 except ValueError:

1143 self.close()

1144 if line:

1145 # Invalid chunked protocol response, abort.

1146 raise InvalidChunkLength(self, line) from None

1147 else:

1148 # Truncated at start of next chunk

1149 raise ProtocolError("Response ended prematurely") from None

1150

1151 def _handle_chunk(self, amt: int | None) -> bytes:

1152 returned_chunk = None

1153 if amt is None:

1154 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1155 returned_chunk = chunk

1156 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1157 self.chunk_left = None

1158 elif self.chunk_left is not None and amt < self.chunk_left:

1159 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1160 self.chunk_left = self.chunk_left - amt

1161 returned_chunk = value

1162 elif amt == self.chunk_left:

1163 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1164 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1165 self.chunk_left = None

1166 returned_chunk = value

1167 else: # amt > self.chunk_left

1168 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1169 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1170 self.chunk_left = None

1171 return returned_chunk # type: ignore[no-any-return]

1172

1173 def read_chunked(

1174 self, amt: int | None = None, decode_content: bool | None = None

1175 ) -> typing.Generator[bytes]:

1176 """

1177 Similar to :meth:`HTTPResponse.read`, but with an additional

1178 parameter: ``decode_content``.

1179

1180 :param amt:

1181 How much of the content to read. If specified, caching is skipped

1182 because it doesn't make sense to cache partial content as the full

1183 response.

1184

1185 :param decode_content:

1186 If True, will attempt to decode the body based on the

1187 'content-encoding' header.

1188 """

1189 self._init_decoder()

1190 # FIXME: Rewrite this method and make it a class with a better structured logic.

1191 if not self.chunked:

1192 raise ResponseNotChunked(

1193 "Response is not chunked. "

1194 "Header 'transfer-encoding: chunked' is missing."

1195 )

1196 if not self.supports_chunked_reads():

1197 raise BodyNotHttplibCompatible(

1198 "Body should be http.client.HTTPResponse like. "

1199 "It should have have an fp attribute which returns raw chunks."

1200 )

1201

1202 with self._error_catcher():

1203 # Don't bother reading the body of a HEAD request.

1204 if self._original_response and is_response_to_head(self._original_response):

1205 self._original_response.close()

1206 return None

1207

1208 # If a response is already read and closed

1209 # then return immediately.

1210 if self._fp.fp is None: # type: ignore[union-attr]

1211 return None

1212

1213 if amt and amt < 0:

1214 # Negative numbers and `None` should be treated the same,

1215 # but httplib handles only `None` correctly.

1216 amt = None

1217

1218 while True:

1219 self._update_chunk_length()

1220 if self.chunk_left == 0:

1221 break

1222 chunk = self._handle_chunk(amt)

1223 decoded = self._decode(

1224 chunk, decode_content=decode_content, flush_decoder=False

1225 )

1226 if decoded:

1227 yield decoded

1228

1229 if decode_content:

1230 # On CPython and PyPy, we should never need to flush the

1231 # decoder. However, on Jython we *might* need to, so

1232 # lets defensively do it anyway.

1233 decoded = self._flush_decoder()

1234 if decoded: # Platform-specific: Jython.

1235 yield decoded

1236

1237 # Chunk content ends with \r\n: discard it.

1238 while self._fp is not None:

1239 line = self._fp.fp.readline()

1240 if not line:

1241 # Some sites may not end with '\r\n'.

1242 break

1243 if line == b"\r\n":

1244 break

1245

1246 # We read everything; close the "file".

1247 if self._original_response:

1248 self._original_response.close()

1249

1250 @property

1251 def url(self) -> str | None:

1252 """

1253 Returns the URL that was the source of this response.

1254 If the request that generated this response redirected, this method

1255 will return the final redirect location.

1256 """

1257 return self._request_url

1258

1259 @url.setter

1260 def url(self, url: str) -> None:

1261 self._request_url = url

1262

1263 def __iter__(self) -> typing.Iterator[bytes]:

1264 buffer: list[bytes] = []

1265 for chunk in self.stream(decode_content=True):

1266 if b"\n" in chunk:

1267 chunks = chunk.split(b"\n")

1268 yield b"".join(buffer) + chunks[0] + b"\n"

1269 for x in chunks[1:-1]:

1270 yield x + b"\n"

1271 if chunks[-1]:

1272 buffer = [chunks[-1]]

1273 else:

1274 buffer = []

1275 else:

1276 buffer.append(chunk)

1277 if buffer:

1278 yield b"".join(buffer)