Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/urllib3/response.py: 20%

1from __future__ import annotations

3import collections

4import io

5import json as _json

6import logging

7import socket

8import sys

9import typing

10import warnings

11import zlib

12from contextlib import contextmanager

13from http.client import HTTPMessage as _HttplibHTTPMessage

14from http.client import HTTPResponse as _HttplibHTTPResponse

15from socket import timeout as SocketTimeout

17if typing.TYPE_CHECKING:

18 from ._base_connection import BaseHTTPConnection

20try:

21 try:

22 import brotlicffi as brotli # type: ignore[import-not-found]

23 except ImportError:

24 import brotli # type: ignore[import-not-found]

25except ImportError:

26 brotli = None

28from . import util

29from ._base_connection import _TYPE_BODY

30from ._collections import HTTPHeaderDict

31from .connection import BaseSSLError, HTTPConnection, HTTPException

32from .exceptions import (

33 BodyNotHttplibCompatible,

34 DecodeError,

35 DependencyWarning,

36 HTTPError,

37 IncompleteRead,

38 InvalidChunkLength,

39 InvalidHeader,

40 ProtocolError,

41 ReadTimeoutError,

42 ResponseNotChunked,

43 SSLError,

44)

45from .util.response import is_fp_closed, is_response_to_head

46from .util.retry import Retry

48if typing.TYPE_CHECKING:

49 from .connectionpool import HTTPConnectionPool

51log = logging.getLogger(__name__)

54class ContentDecoder:

55 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

56 raise NotImplementedError()

58 @property

59 def has_unconsumed_tail(self) -> bool:

60 raise NotImplementedError()

62 def flush(self) -> bytes:

63 raise NotImplementedError()

66class DeflateDecoder(ContentDecoder):

67 def __init__(self) -> None:

68 self._first_try = True

69 self._first_try_data = b""

70 self._unfed_data = b""

71 self._obj = zlib.decompressobj()

73 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

74 data = self._unfed_data + data

75 self._unfed_data = b""

76 if not data and not self._obj.unconsumed_tail:

77 return data

78 original_max_length = max_length

79 if original_max_length < 0:

80 max_length = 0

81 elif original_max_length == 0:

82 # We should not pass 0 to the zlib decompressor because 0 is

83 # the default value that will make zlib decompress without a

84 # length limit.

85 # Data should be stored for subsequent calls.

86 self._unfed_data = data

87 return b""

89 # Subsequent calls always reuse `self._obj`. zlib requires

90 # passing the unconsumed tail if decompression is to continue.

91 if not self._first_try:

92 return self._obj.decompress(

93 self._obj.unconsumed_tail + data, max_length=max_length

94 )

96 # First call tries with RFC 1950 ZLIB format.

97 self._first_try_data += data

98 try:

99 decompressed = self._obj.decompress(data, max_length=max_length)

100 if decompressed:

101 self._first_try = False

102 self._first_try_data = b""

103 return decompressed

104 # On failure, it falls back to RFC 1951 DEFLATE format.

105 except zlib.error:

106 self._first_try = False

107 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)

108 try:

109 return self.decompress(

110 self._first_try_data, max_length=original_max_length

111 )

112 finally:

113 self._first_try_data = b""

114

115 @property

116 def has_unconsumed_tail(self) -> bool:

117 return bool(self._unfed_data) or (

118 bool(self._obj.unconsumed_tail) and not self._first_try

119 )

120

121 def flush(self) -> bytes:

122 return self._obj.flush()

123

124

125class GzipDecoderState:

126 FIRST_MEMBER = 0

127 OTHER_MEMBERS = 1

128 SWALLOW_DATA = 2

129

130

131class GzipDecoder(ContentDecoder):

132 def __init__(self) -> None:

133 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

134 self._state = GzipDecoderState.FIRST_MEMBER

135 self._unconsumed_tail = b""

136

137 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

138 ret = bytearray()

139 if self._state == GzipDecoderState.SWALLOW_DATA:

140 return bytes(ret)

141

142 if max_length == 0:

143 # We should not pass 0 to the zlib decompressor because 0 is

144 # the default value that will make zlib decompress without a

145 # length limit.

146 # Data should be stored for subsequent calls.

147 self._unconsumed_tail += data

148 return b""

149

150 # zlib requires passing the unconsumed tail to the subsequent

151 # call if decompression is to continue.

152 data = self._unconsumed_tail + data

153 if not data and self._obj.eof:

154 return bytes(ret)

155

156 while True:

157 try:

158 ret += self._obj.decompress(

159 data, max_length=max(max_length - len(ret), 0)

160 )

161 except zlib.error:

162 previous_state = self._state

163 # Ignore data after the first error

164 self._state = GzipDecoderState.SWALLOW_DATA

165 self._unconsumed_tail = b""

166 if previous_state == GzipDecoderState.OTHER_MEMBERS:

167 # Allow trailing garbage acceptable in other gzip clients

168 return bytes(ret)

169 raise

170

171 self._unconsumed_tail = data = (

172 self._obj.unconsumed_tail or self._obj.unused_data

173 )

174 if max_length > 0 and len(ret) >= max_length:

175 break

176

177 if not data:

178 return bytes(ret)

179 # When the end of a gzip member is reached, a new decompressor

180 # must be created for unused (possibly future) data.

181 if self._obj.eof:

182 self._state = GzipDecoderState.OTHER_MEMBERS

183 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

184

185 return bytes(ret)

186

187 @property

188 def has_unconsumed_tail(self) -> bool:

189 return bool(self._unconsumed_tail)

190

191 def flush(self) -> bytes:

192 return self._obj.flush()

193

194

195if brotli is not None:

196

197 class BrotliDecoder(ContentDecoder):

198 # Supports both 'brotlipy' and 'Brotli' packages

199 # since they share an import name. The top branches

200 # are for 'brotlipy' and bottom branches for 'Brotli'

201 def __init__(self) -> None:

202 self._obj = brotli.Decompressor()

203 if hasattr(self._obj, "decompress"):

204 setattr(self, "_decompress", self._obj.decompress)

205 else:

206 setattr(self, "_decompress", self._obj.process)

207

208 # Requires Brotli >= 1.2.0 for `output_buffer_limit`.

209 def _decompress(self, data: bytes, output_buffer_limit: int = -1) -> bytes:

210 raise NotImplementedError()

211

212 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

213 try:

214 if max_length > 0:

215 return self._decompress(data, output_buffer_limit=max_length)

216 else:

217 return self._decompress(data)

218 except TypeError:

219 # Fallback for Brotli/brotlicffi/brotlipy versions without

220 # the `output_buffer_limit` parameter.

221 warnings.warn(

222 "Brotli >= 1.2.0 is required to prevent decompression bombs.",

223 DependencyWarning,

224 )

225 return self._decompress(data)

226

227 @property

228 def has_unconsumed_tail(self) -> bool:

229 try:

230 return not self._obj.can_accept_more_data()

231 except AttributeError:

232 return False

233

234 def flush(self) -> bytes:

235 if hasattr(self._obj, "flush"):

236 return self._obj.flush() # type: ignore[no-any-return]

237 return b""

238

239

240try:

241 if sys.version_info >= (3, 14):

242 from compression import zstd

243 else:

244 from backports import zstd

245except ImportError:

246 HAS_ZSTD = False

247else:

248 HAS_ZSTD = True

249

250 class ZstdDecoder(ContentDecoder):

251 def __init__(self) -> None:

252 self._obj = zstd.ZstdDecompressor()

253

254 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

255 if not data and not self.has_unconsumed_tail:

256 return b""

257 if self._obj.eof:

258 data = self._obj.unused_data + data

259 self._obj = zstd.ZstdDecompressor()

260 part = self._obj.decompress(data, max_length=max_length)

261 length = len(part)

262 data_parts = [part]

263 # Every loop iteration is supposed to read data from a separate frame.

264 # The loop breaks when:

265 # - enough data is read;

266 # - no more unused data is available;

267 # - end of the last read frame has not been reached (i.e.,

268 # more data has to be fed).

269 while (

270 self._obj.eof

271 and self._obj.unused_data

272 and (max_length < 0 or length < max_length)

273 ):

274 unused_data = self._obj.unused_data

275 if not self._obj.needs_input:

276 self._obj = zstd.ZstdDecompressor()

277 part = self._obj.decompress(

278 unused_data,

279 max_length=(max_length - length) if max_length > 0 else -1,

280 )

281 if part_length := len(part):

282 data_parts.append(part)

283 length += part_length

284 elif self._obj.needs_input:

285 break

286 return b"".join(data_parts)

287

288 @property

289 def has_unconsumed_tail(self) -> bool:

290 return not (self._obj.needs_input or self._obj.eof) or bool(

291 self._obj.unused_data

292 )

293

294 def flush(self) -> bytes:

295 if not self._obj.eof:

296 raise DecodeError("Zstandard data is incomplete")

297 return b""

298

299

300class MultiDecoder(ContentDecoder):

301 """

302 From RFC7231:

303 If one or more encodings have been applied to a representation, the

304 sender that applied the encodings MUST generate a Content-Encoding

305 header field that lists the content codings in the order in which

306 they were applied.

307 """

308

309 # Maximum allowed number of chained HTTP encodings in the

310 # Content-Encoding header.

311 max_decode_links = 5

312

313 def __init__(self, modes: str) -> None:

314 encodings = [m.strip() for m in modes.split(",")]

315 if len(encodings) > self.max_decode_links:

316 raise DecodeError(

317 "Too many content encodings in the chain: "

318 f"{len(encodings)} > {self.max_decode_links}"

319 )

320 self._decoders = [_get_decoder(e) for e in encodings]

321

322 def flush(self) -> bytes:

323 return self._decoders[0].flush()

324

325 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

326 if max_length <= 0:

327 for d in reversed(self._decoders):

328 data = d.decompress(data)

329 return data

330

331 ret = bytearray()

332 # Every while loop iteration goes through all decoders once.

333 # It exits when enough data is read or no more data can be read.

334 # It is possible that the while loop iteration does not produce

335 # any data because we retrieve up to `max_length` from every

336 # decoder, and the amount of bytes may be insufficient for the

337 # next decoder to produce enough/any output.

338 while True:

339 any_data = False

340 for d in reversed(self._decoders):

341 data = d.decompress(data, max_length=max_length - len(ret))

342 if data:

343 any_data = True

344 # We should not break when no data is returned because

345 # next decoders may produce data even with empty input.

346 ret += data

347 if not any_data or len(ret) >= max_length:

348 return bytes(ret)

349 data = b""

350

351 @property

352 def has_unconsumed_tail(self) -> bool:

353 return any(d.has_unconsumed_tail for d in self._decoders)

354

355

356def _get_decoder(mode: str) -> ContentDecoder:

357 if "," in mode:

358 return MultiDecoder(mode)

359

360 # According to RFC 9110 section 8.4.1.3, recipients should

361 # consider x-gzip equivalent to gzip

362 if mode in ("gzip", "x-gzip"):

363 return GzipDecoder()

364

365 if brotli is not None and mode == "br":

366 return BrotliDecoder()

367

368 if HAS_ZSTD and mode == "zstd":

369 return ZstdDecoder()

370

371 return DeflateDecoder()

372

373

374class BytesQueueBuffer:

375 """Memory-efficient bytes buffer

376

377 To return decoded data in read() and still follow the BufferedIOBase API, we need a

378 buffer to always return the correct amount of bytes.

379

380 This buffer should be filled using calls to put()

381

382 Our maximum memory usage is determined by the sum of the size of:

383

384 * self.buffer, which contains the full data

385 * the largest chunk that we will copy in get()

386 """

387

388 def __init__(self) -> None:

389 self.buffer: typing.Deque[bytes | memoryview[bytes]] = collections.deque()

390 self._size: int = 0

391

392 def __len__(self) -> int:

393 return self._size

394

395 def put(self, data: bytes) -> None:

396 self.buffer.append(data)

397 self._size += len(data)

398

399 def get(self, n: int) -> bytes:

400 if n == 0:

401 return b""

402 elif not self.buffer:

403 raise RuntimeError("buffer is empty")

404 elif n < 0:

405 raise ValueError("n should be > 0")

406

407 if len(self.buffer[0]) == n and isinstance(self.buffer[0], bytes):

408 self._size -= n

409 return self.buffer.popleft()

410

411 fetched = 0

412 ret = io.BytesIO()

413 while fetched < n:

414 remaining = n - fetched

415 chunk = self.buffer.popleft()

416 chunk_length = len(chunk)

417 if remaining < chunk_length:

418 chunk = memoryview(chunk)

419 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]

420 ret.write(left_chunk)

421 self.buffer.appendleft(right_chunk)

422 self._size -= remaining

423 break

424 else:

425 ret.write(chunk)

426 self._size -= chunk_length

427 fetched += chunk_length

428

429 if not self.buffer:

430 break

431

432 return ret.getvalue()

433

434 def get_all(self) -> bytes:

435 buffer = self.buffer

436 if not buffer:

437 assert self._size == 0

438 return b""

439 if len(buffer) == 1:

440 result = buffer.pop()

441 if isinstance(result, memoryview):

442 result = result.tobytes()

443 else:

444 ret = io.BytesIO()

445 ret.writelines(buffer.popleft() for _ in range(len(buffer)))

446 result = ret.getvalue()

447 self._size = 0

448 return result

449

450

451class BaseHTTPResponse(io.IOBase):

452 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]

453 if brotli is not None:

454 CONTENT_DECODERS += ["br"]

455 if HAS_ZSTD:

456 CONTENT_DECODERS += ["zstd"]

457 REDIRECT_STATUSES = [301, 302, 303, 307, 308]

458

459 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)

460 if brotli is not None:

461 DECODER_ERROR_CLASSES += (brotli.error,)

462

463 if HAS_ZSTD:

464 DECODER_ERROR_CLASSES += (zstd.ZstdError,)

465

466 def __init__(

467 self,

468 *,

469 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

470 status: int,

471 version: int,

472 version_string: str,

473 reason: str | None,

474 decode_content: bool,

475 request_url: str | None,

476 retries: Retry | None = None,

477 ) -> None:

478 if isinstance(headers, HTTPHeaderDict):

479 self.headers = headers

480 else:

481 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]

482 self.status = status

483 self.version = version

484 self.version_string = version_string

485 self.reason = reason

486 self.decode_content = decode_content

487 self._has_decoded_content = False

488 self._request_url: str | None = request_url

489 self.retries = retries

490

491 self.chunked = False

492 tr_enc = self.headers.get("transfer-encoding", "").lower()

493 # Don't incur the penalty of creating a list and then discarding it

494 encodings = (enc.strip() for enc in tr_enc.split(","))

495 if "chunked" in encodings:

496 self.chunked = True

497

498 self._decoder: ContentDecoder | None = None

499 self.length_remaining: int | None

500

501 def get_redirect_location(self) -> str | None | typing.Literal[False]:

502 """

503 Should we redirect and where to?

504

505 :returns: Truthy redirect location string if we got a redirect status

506 code and valid location. ``None`` if redirect status and no

507 location. ``False`` if not a redirect status code.

508 """

509 if self.status in self.REDIRECT_STATUSES:

510 return self.headers.get("location")

511 return False

512

513 @property

514 def data(self) -> bytes:

515 raise NotImplementedError()

516

517 def json(self) -> typing.Any:

518 """

519 Deserializes the body of the HTTP response as a Python object.

520

521 The body of the HTTP response must be encoded using UTF-8, as per

522 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.

523

524 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to

525 your custom decoder instead.

526

527 If the body of the HTTP response is not decodable to UTF-8, a

528 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a

529 valid JSON document, a `json.JSONDecodeError` will be raised.

530

531 Read more :ref:`here <json_content>`.

532

533 :returns: The body of the HTTP response as a Python object.

534 """

535 data = self.data.decode("utf-8")

536 return _json.loads(data)

537

538 @property

539 def url(self) -> str | None:

540 raise NotImplementedError()

541

542 @url.setter

543 def url(self, url: str | None) -> None:

544 raise NotImplementedError()

545

546 @property

547 def connection(self) -> BaseHTTPConnection | None:

548 raise NotImplementedError()

549

550 @property

551 def retries(self) -> Retry | None:

552 return self._retries

553

554 @retries.setter

555 def retries(self, retries: Retry | None) -> None:

556 # Override the request_url if retries has a redirect location.

557 if retries is not None and retries.history:

558 self.url = retries.history[-1].redirect_location

559 self._retries = retries

560

561 def stream(

562 self, amt: int | None = 2**16, decode_content: bool | None = None

563 ) -> typing.Iterator[bytes]:

564 raise NotImplementedError()

565

566 def read(

567 self,

568 amt: int | None = None,

569 decode_content: bool | None = None,

570 cache_content: bool = False,

571 ) -> bytes:

572 raise NotImplementedError()

573

574 def read1(

575 self,

576 amt: int | None = None,

577 decode_content: bool | None = None,

578 ) -> bytes:

579 raise NotImplementedError()

580

581 def read_chunked(

582 self,

583 amt: int | None = None,

584 decode_content: bool | None = None,

585 ) -> typing.Iterator[bytes]:

586 raise NotImplementedError()

587

588 def release_conn(self) -> None:

589 raise NotImplementedError()

590

591 def drain_conn(self) -> None:

592 raise NotImplementedError()

593

594 def shutdown(self) -> None:

595 raise NotImplementedError()

596

597 def close(self) -> None:

598 raise NotImplementedError()

599

600 def _init_decoder(self) -> None:

601 """

602 Set-up the _decoder attribute if necessary.

603 """

604 # Note: content-encoding value should be case-insensitive, per RFC 7230

605 # Section 3.2

606 content_encoding = self.headers.get("content-encoding", "").lower()

607 if self._decoder is None:

608 if content_encoding in self.CONTENT_DECODERS:

609 self._decoder = _get_decoder(content_encoding)

610 elif "," in content_encoding:

611 encodings = [

612 e.strip()

613 for e in content_encoding.split(",")

614 if e.strip() in self.CONTENT_DECODERS

615 ]

616 if encodings:

617 self._decoder = _get_decoder(content_encoding)

618

619 def _decode(

620 self,

621 data: bytes,

622 decode_content: bool | None,

623 flush_decoder: bool,

624 max_length: int | None = None,

625 ) -> bytes:

626 """

627 Decode the data passed in and potentially flush the decoder.

628 """

629 if not decode_content:

630 if self._has_decoded_content:

631 raise RuntimeError(

632 "Calling read(decode_content=False) is not supported after "

633 "read(decode_content=True) was called."

634 )

635 return data

636

637 if max_length is None or flush_decoder:

638 max_length = -1

639

640 try:

641 if self._decoder:

642 data = self._decoder.decompress(data, max_length=max_length)

643 self._has_decoded_content = True

644 except self.DECODER_ERROR_CLASSES as e:

645 content_encoding = self.headers.get("content-encoding", "").lower()

646 raise DecodeError(

647 "Received response with content-encoding: %s, but "

648 "failed to decode it." % content_encoding,

649 e,

650 ) from e

651 if flush_decoder:

652 data += self._flush_decoder()

653

654 return data

655

656 def _flush_decoder(self) -> bytes:

657 """

658 Flushes the decoder. Should only be called if the decoder is actually

659 being used.

660 """

661 if self._decoder:

662 return self._decoder.decompress(b"") + self._decoder.flush()

663 return b""

664

665 # Compatibility methods for `io` module

666 def readinto(self, b: bytearray | memoryview[int]) -> int:

667 temp = self.read(len(b))

668 if len(temp) == 0:

669 return 0

670 else:

671 b[: len(temp)] = temp

672 return len(temp)

673

674 # Methods used by dependent libraries

675 def getheaders(self) -> HTTPHeaderDict:

676 return self.headers

677

678 def getheader(self, name: str, default: str | None = None) -> str | None:

679 return self.headers.get(name, default)

680

681 # Compatibility method for http.cookiejar

682 def info(self) -> HTTPHeaderDict:

683 return self.headers

684

685 def geturl(self) -> str | None:

686 return self.url

687

688

689class HTTPResponse(BaseHTTPResponse):

690 """

691 HTTP Response container.

692

693 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is

694 loaded and decoded on-demand when the ``data`` property is accessed. This

695 class is also compatible with the Python standard library's :mod:`io`

696 module, and can hence be treated as a readable object in the context of that

697 framework.

698

699 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:

700

701 :param preload_content:

702 If True, the response's body will be preloaded during construction.

703

704 :param decode_content:

705 If True, will attempt to decode the body based on the

706 'content-encoding' header.

707

708 :param original_response:

709 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`

710 object, it's convenient to include the original for debug purposes. It's

711 otherwise unused.

712

713 :param retries:

714 The retries contains the last :class:`~urllib3.util.retry.Retry` that

715 was used during the request.

716

717 :param enforce_content_length:

718 Enforce content length checking. Body returned by server must match

719 value of Content-Length header, if present. Otherwise, raise error.

720 """

721

722 def __init__(

723 self,

724 body: _TYPE_BODY = "",

725 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

726 status: int = 0,

727 version: int = 0,

728 version_string: str = "HTTP/?",

729 reason: str | None = None,

730 preload_content: bool = True,

731 decode_content: bool = True,

732 original_response: _HttplibHTTPResponse | None = None,

733 pool: HTTPConnectionPool | None = None,

734 connection: HTTPConnection | None = None,

735 msg: _HttplibHTTPMessage | None = None,

736 retries: Retry | None = None,

737 enforce_content_length: bool = True,

738 request_method: str | None = None,

739 request_url: str | None = None,

740 auto_close: bool = True,

741 sock_shutdown: typing.Callable[[int], None] | None = None,

742 ) -> None:

743 super().__init__(

744 headers=headers,

745 status=status,

746 version=version,

747 version_string=version_string,

748 reason=reason,

749 decode_content=decode_content,

750 request_url=request_url,

751 retries=retries,

752 )

753

754 self.enforce_content_length = enforce_content_length

755 self.auto_close = auto_close

756

757 self._body = None

758 self._uncached_read_occurred = False

759 self._fp: _HttplibHTTPResponse | None = None

760 self._original_response = original_response

761 self._fp_bytes_read = 0

762 self.msg = msg

763

764 if body and isinstance(body, (str, bytes)):

765 self._body = body

766

767 self._pool = pool

768 self._connection = connection

769

770 if hasattr(body, "read"):

771 self._fp = body # type: ignore[assignment]

772 self._sock_shutdown = sock_shutdown

773

774 # Are we using the chunked-style of transfer encoding?

775 self.chunk_left: int | None = None

776

777 # Determine length of response

778 self.length_remaining = self._init_length(request_method)

779

780 # Used to return the correct amount of bytes for partial read()s

781 self._decoded_buffer = BytesQueueBuffer()

782

783 # If requested, preload the body.

784 if preload_content and not self._body:

785 self._body = self.read(decode_content=decode_content)

786

787 def release_conn(self) -> None:

788 if not self._pool or not self._connection:

789 return None

790

791 self._pool._put_conn(self._connection)

792 self._connection = None

793

794 def drain_conn(self) -> None:

795 """

796 Read and discard any remaining HTTP response data in the response connection.

797

798 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.

799 """

800 try:

801 self._raw_read()

802 except (HTTPError, OSError, BaseSSLError, HTTPException):

803 pass

804 if self._has_decoded_content:

805 # `_raw_read` skips decompression, so we should clean up the

806 # decoder to avoid keeping unnecessary data in memory.

807 self._decoded_buffer = BytesQueueBuffer()

808 self._decoder = None

809

810 @property

811 def data(self) -> bytes:

812 # For backwards-compat with earlier urllib3 0.4 and earlier.

813 if self._body:

814 return self._body # type: ignore[return-value]

815

816 if self._fp:

817 return self.read(cache_content=True)

818

819 return None # type: ignore[return-value]

820

821 @property

822 def connection(self) -> HTTPConnection | None:

823 return self._connection

824

825 def isclosed(self) -> bool:

826 return is_fp_closed(self._fp)

827

828 def tell(self) -> int:

829 """

830 Obtain the number of bytes pulled over the wire so far. May differ from

831 the amount of content returned by :meth:`HTTPResponse.read`

832 if bytes are encoded on the wire (e.g, compressed).

833 """

834 return self._fp_bytes_read

835

836 def _init_length(self, request_method: str | None) -> int | None:

837 """

838 Set initial length value for Response content if available.

839 """

840 length: int | None

841 content_length: str | None = self.headers.get("content-length")

842

843 if content_length is not None:

844 if self.chunked:

845 # This Response will fail with an IncompleteRead if it can't be

846 # received as chunked. This method falls back to attempt reading

847 # the response before raising an exception.

848 log.warning(

849 "Received response with both Content-Length and "

850 "Transfer-Encoding set. This is expressly forbidden "

851 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "

852 "attempting to process response as Transfer-Encoding: "

853 "chunked."

854 )

855 return None

856

857 try:

858 # RFC 7230 section 3.3.2 specifies multiple content lengths can

859 # be sent in a single Content-Length header

860 # (e.g. Content-Length: 42, 42). This line ensures the values

861 # are all valid ints and that as long as the `set` length is 1,

862 # all values are the same. Otherwise, the header is invalid.

863 lengths = {int(val) for val in content_length.split(",")}

864 if len(lengths) > 1:

865 raise InvalidHeader(

866 "Content-Length contained multiple "

867 "unmatching values (%s)" % content_length

868 )

869 length = lengths.pop()

870 except ValueError:

871 length = None

872 else:

873 if length < 0:

874 length = None

875

876 else: # if content_length is None

877 length = None

878

879 # Convert status to int for comparison

880 # In some cases, httplib returns a status of "_UNKNOWN"

881 try:

882 status = int(self.status)

883 except ValueError:

884 status = 0

885

886 # Check for responses that shouldn't include a body

887 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":

888 length = 0

889

890 return length

891

892 @contextmanager

893 def _error_catcher(self) -> typing.Generator[None]:

894 """

895 Catch low-level python exceptions, instead re-raising urllib3

896 variants, so that low-level exceptions are not leaked in the

897 high-level api.

898

899 On exit, release the connection back to the pool.

900 """

901 clean_exit = False

902

903 try:

904 try:

905 yield

906

907 except SocketTimeout as e:

908 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but

909 # there is yet no clean way to get at it from this context.

910 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

911

912 except BaseSSLError as e:

913 # SSL errors related to framing/MAC get wrapped and reraised here

914 raise SSLError(e) from e

915

916 except IncompleteRead as e:

917 if (

918 e.expected is not None

919 and e.partial is not None

920 and e.expected == -e.partial

921 ):

922 arg = "Response may not contain content."

923 else:

924 arg = f"Connection broken: {e!r}"

925 raise ProtocolError(arg, e) from e

926

927 except (HTTPException, OSError) as e:

928 raise ProtocolError(f"Connection broken: {e!r}", e) from e

929

930 # If no exception is thrown, we should avoid cleaning up

931 # unnecessarily.

932 clean_exit = True

933 finally:

934 # If we didn't terminate cleanly, we need to throw away our

935 # connection.

936 if not clean_exit:

937 # The response may not be closed but we're not going to use it

938 # anymore so close it now to ensure that the connection is

939 # released back to the pool.

940 if self._original_response:

941 self._original_response.close()

942

943 # Closing the response may not actually be sufficient to close

944 # everything, so if we have a hold of the connection close that

945 # too.

946 if self._connection:

947 self._connection.close()

948

949 # If we hold the original response but it's closed now, we should

950 # return the connection back to the pool.

951 if self._original_response and self._original_response.isclosed():

952 self.release_conn()

953

954 def _fp_read(

955 self,

956 amt: int | None = None,

957 *,

958 read1: bool = False,

959 ) -> bytes:

960 """

961 Read a response with the thought that reading the number of bytes

962 larger than can fit in a 32-bit int at a time via SSL in some

963 known cases leads to an overflow error that has to be prevented

964 if `amt` or `self.length_remaining` indicate that a problem may

965 happen.

966

967 This happens to urllib3 injected with pyOpenSSL-backed SSL-support.

968 """

969 assert self._fp

970 c_int_max = 2**31 - 1

971 if (

972 (amt and amt > c_int_max)

973 or (

974 amt is None

975 and self.length_remaining

976 and self.length_remaining > c_int_max

977 )

978 ) and util.IS_PYOPENSSL:

979 if read1:

980 return self._fp.read1(c_int_max)

981 buffer = io.BytesIO()

982 # Besides `max_chunk_amt` being a maximum chunk size, it

983 # affects memory overhead of reading a response by this

984 # method in CPython.

985 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum

986 # chunk size that does not lead to an overflow error, but

987 # 256 MiB is a compromise.

988 max_chunk_amt = 2**28

989 while amt is None or amt != 0:

990 if amt is not None:

991 chunk_amt = min(amt, max_chunk_amt)

992 amt -= chunk_amt

993 else:

994 chunk_amt = max_chunk_amt

995 data = self._fp.read(chunk_amt)

996 if not data:

997 break

998 buffer.write(data)

999 del data # to reduce peak memory usage by `max_chunk_amt`.

1000 return buffer.getvalue()

1001 elif read1:

1002 return self._fp.read1(amt) if amt is not None else self._fp.read1()

1003 else:

1004 # StringIO doesn't like amt=None

1005 return self._fp.read(amt) if amt is not None else self._fp.read()

1006

1007 def _raw_read(

1008 self,

1009 amt: int | None = None,

1010 *,

1011 read1: bool = False,

1012 ) -> bytes:

1013 """

1014 Reads `amt` of bytes from the socket.

1015 """

1016 if self._fp is None:

1017 return None # type: ignore[return-value]

1018

1019 fp_closed = getattr(self._fp, "closed", False)

1020

1021 with self._error_catcher():

1022 data = self._fp_read(amt, read1=read1) if not fp_closed else b""

1023 if amt is not None and amt != 0 and not data:

1024 # Platform-specific: Buggy versions of Python.

1025 # Close the connection when no data is returned

1026 #

1027 # This is redundant to what httplib/http.client _should_

1028 # already do. However, versions of python released before

1029 # December 15, 2012 (http://bugs.python.org/issue16298) do

1030 # not properly close the connection in all cases. There is

1031 # no harm in redundantly calling close.

1032 self._fp.close()

1033 if (

1034 self.enforce_content_length

1035 and self.length_remaining is not None

1036 and self.length_remaining != 0

1037 ):

1038 # This is an edge case that httplib failed to cover due

1039 # to concerns of backward compatibility. We're

1040 # addressing it here to make sure IncompleteRead is

1041 # raised during streaming, so all calls with incorrect

1042 # Content-Length are caught.

1043 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)

1044 elif read1 and (

1045 (amt != 0 and not data) or self.length_remaining == len(data)

1046 ):

1047 # All data has been read, but `self._fp.read1` in

1048 # CPython 3.12 and older doesn't always close

1049 # `http.client.HTTPResponse`, so we close it here.

1050 # See https://github.com/python/cpython/issues/113199

1051 self._fp.close()

1052

1053 if data:

1054 self._fp_bytes_read += len(data)

1055 if self.length_remaining is not None:

1056 self.length_remaining -= len(data)

1057 return data

1058

1059 def read(

1060 self,

1061 amt: int | None = None,

1062 decode_content: bool | None = None,

1063 cache_content: bool = False,

1064 ) -> bytes:

1065 """

1066 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional

1067 parameters: ``decode_content`` and ``cache_content``.

1068

1069 :param amt:

1070 How much of the content to read. If specified, caching is skipped

1071 because it doesn't make sense to cache partial content as the full

1072 response.

1073

1074 :param decode_content:

1075 If True, will attempt to decode the body based on the

1076 'content-encoding' header.

1077

1078 :param cache_content:

1079 If True, will save the returned data such that the same result is

1080 returned despite of the state of the underlying file object. This

1081 is useful if you want the ``.data`` property to continue working

1082 after having ``.read()`` the file object. (Overridden if ``amt`` is

1083 set.)

1084 """

1085 self._init_decoder()

1086 if decode_content is None:

1087 decode_content = self.decode_content

1088

1089 if amt and amt < 0:

1090 # Negative numbers and `None` should be treated the same.

1091 amt = None

1092 elif amt is not None:

1093 cache_content = False

1094

1095 if (

1096 self._decoder

1097 and self._decoder.has_unconsumed_tail

1098 and len(self._decoded_buffer) < amt

1099 ):

1100 decoded_data = self._decode(

1101 b"",

1102 decode_content,

1103 flush_decoder=False,

1104 max_length=amt - len(self._decoded_buffer),

1105 )

1106 self._decoded_buffer.put(decoded_data)

1107 if len(self._decoded_buffer) >= amt:

1108 return self._decoded_buffer.get(amt)

1109

1110 data = self._raw_read(amt)

1111 if not cache_content:

1112 self._uncached_read_occurred = True

1113

1114 flush_decoder = amt is None or (amt != 0 and not data)

1115

1116 if (

1117 not data

1118 and len(self._decoded_buffer) == 0

1119 and not (self._decoder and self._decoder.has_unconsumed_tail)

1120 ):

1121 return data

1122

1123 if amt is None:

1124 data = self._decode(data, decode_content, flush_decoder)

1125 # It's possible that there is buffered decoded data after a

1126 # partial read.

1127 if decode_content and len(self._decoded_buffer) > 0:

1128 self._decoded_buffer.put(data)

1129 data = self._decoded_buffer.get_all()

1130

1131 if cache_content and not self._uncached_read_occurred:

1132 self._body = data

1133 else:

1134 # do not waste memory on buffer when not decoding

1135 if not decode_content:

1136 if self._has_decoded_content:

1137 raise RuntimeError(

1138 "Calling read(decode_content=False) is not supported after "

1139 "read(decode_content=True) was called."

1140 )

1141 return data

1142

1143 decoded_data = self._decode(

1144 data,

1145 decode_content,

1146 flush_decoder,

1147 max_length=amt - len(self._decoded_buffer),

1148 )

1149 self._decoded_buffer.put(decoded_data)

1150

1151 while len(self._decoded_buffer) < amt and data:

1152 # TODO make sure to initially read enough data to get past the headers

1153 # For example, the GZ file header takes 10 bytes, we don't want to read

1154 # it one byte at a time

1155 data = self._raw_read(amt)

1156 decoded_data = self._decode(

1157 data,

1158 decode_content,

1159 flush_decoder,

1160 max_length=amt - len(self._decoded_buffer),

1161 )

1162 self._decoded_buffer.put(decoded_data)

1163 data = self._decoded_buffer.get(amt)

1164

1165 return data

1166

1167 def read1(

1168 self,

1169 amt: int | None = None,

1170 decode_content: bool | None = None,

1171 ) -> bytes:

1172 """

1173 Similar to ``http.client.HTTPResponse.read1`` and documented

1174 in :meth:`io.BufferedReader.read1`, but with an additional parameter:

1175 ``decode_content``.

1176

1177 :param amt:

1178 How much of the content to read.

1179

1180 :param decode_content:

1181 If True, will attempt to decode the body based on the

1182 'content-encoding' header.

1183 """

1184 if decode_content is None:

1185 decode_content = self.decode_content

1186 if amt and amt < 0:

1187 # Negative numbers and `None` should be treated the same.

1188 amt = None

1189 # try and respond without going to the network

1190 if self._has_decoded_content:

1191 if not decode_content:

1192 raise RuntimeError(

1193 "Calling read1(decode_content=False) is not supported after "

1194 "read1(decode_content=True) was called."

1195 )

1196 if (

1197 self._decoder

1198 and self._decoder.has_unconsumed_tail

1199 and (amt is None or len(self._decoded_buffer) < amt)

1200 ):

1201 decoded_data = self._decode(

1202 b"",

1203 decode_content,

1204 flush_decoder=False,

1205 max_length=(

1206 amt - len(self._decoded_buffer) if amt is not None else None

1207 ),

1208 )

1209 self._decoded_buffer.put(decoded_data)

1210 if len(self._decoded_buffer) > 0:

1211 if amt is None:

1212 return self._decoded_buffer.get_all()

1213 return self._decoded_buffer.get(amt)

1214 if amt == 0:

1215 return b""

1216

1217 # FIXME, this method's type doesn't say returning None is possible

1218 data = self._raw_read(amt, read1=True)

1219 self._uncached_read_occurred = True

1220 if not decode_content or data is None:

1221 return data

1222

1223 self._init_decoder()

1224 while True:

1225 flush_decoder = not data

1226 decoded_data = self._decode(

1227 data, decode_content, flush_decoder, max_length=amt

1228 )

1229 self._decoded_buffer.put(decoded_data)

1230 if decoded_data or flush_decoder:

1231 break

1232 data = self._raw_read(8192, read1=True)

1233

1234 if amt is None:

1235 return self._decoded_buffer.get_all()

1236 return self._decoded_buffer.get(amt)

1237

1238 def stream(

1239 self, amt: int | None = 2**16, decode_content: bool | None = None

1240 ) -> typing.Generator[bytes]:

1241 """

1242 A generator wrapper for the read() method. A call will block until

1243 ``amt`` bytes have been read from the connection or until the

1244 connection is closed.

1245

1246 :param amt:

1247 How much of the content to read. The generator will return up to

1248 much data per iteration, but may return less. This is particularly

1249 likely when using compressed data. However, the empty string will

1250 never be returned.

1251

1252 :param decode_content:

1253 If True, will attempt to decode the body based on the

1254 'content-encoding' header.

1255 """

1256 if amt == 0:

1257 return

1258

1259 if self.chunked and self.supports_chunked_reads():

1260 yield from self.read_chunked(amt, decode_content=decode_content)

1261 else:

1262 while (

1263 not is_fp_closed(self._fp)

1264 or len(self._decoded_buffer) > 0

1265 or (self._decoder and self._decoder.has_unconsumed_tail)

1266 ):

1267 data = self.read(amt=amt, decode_content=decode_content)

1268

1269 if data:

1270 yield data

1271

1272 # Overrides from io.IOBase

1273 def readable(self) -> bool:

1274 return True

1275

1276 def shutdown(self) -> None:

1277 if not self._sock_shutdown:

1278 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")

1279 if self._connection is None:

1280 raise RuntimeError(

1281 "Cannot shutdown as connection has already been released to the pool"

1282 )

1283 self._sock_shutdown(socket.SHUT_RD)

1284

1285 def close(self) -> None:

1286 self._sock_shutdown = None

1287

1288 if not self.closed and self._fp:

1289 self._fp.close()

1290

1291 if self._connection:

1292 self._connection.close()

1293

1294 if not self.auto_close:

1295 io.IOBase.close(self)

1296

1297 @property

1298 def closed(self) -> bool:

1299 if not self.auto_close:

1300 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]

1301 elif self._fp is None:

1302 return True

1303 elif hasattr(self._fp, "isclosed"):

1304 return self._fp.isclosed()

1305 elif hasattr(self._fp, "closed"):

1306 return self._fp.closed

1307 else:

1308 return True

1309

1310 def fileno(self) -> int:

1311 if self._fp is None:

1312 raise OSError("HTTPResponse has no file to get a fileno from")

1313 elif hasattr(self._fp, "fileno"):

1314 return self._fp.fileno()

1315 else:

1316 raise OSError(

1317 "The file-like object this HTTPResponse is wrapped "

1318 "around has no file descriptor"

1319 )

1320

1321 def flush(self) -> None:

1322 if (

1323 self._fp is not None

1324 and hasattr(self._fp, "flush")

1325 and not getattr(self._fp, "closed", False)

1326 ):

1327 return self._fp.flush()

1328

1329 def supports_chunked_reads(self) -> bool:

1330 """

1331 Checks if the underlying file-like object looks like a

1332 :class:`http.client.HTTPResponse` object. We do this by testing for

1333 the fp attribute. If it is present we assume it returns raw chunks as

1334 processed by read_chunked().

1335 """

1336 return hasattr(self._fp, "fp")

1337

1338 def _update_chunk_length(self) -> None:

1339 # First, we'll figure out length of a chunk and then

1340 # we'll try to read it from socket.

1341 if self.chunk_left is not None:

1342 return None

1343 line = self._fp.fp.readline() # type: ignore[union-attr]

1344 line = line.split(b";", 1)[0]

1345 try:

1346 self.chunk_left = int(line, 16)

1347 except ValueError:

1348 self.close()

1349 if line:

1350 # Invalid chunked protocol response, abort.

1351 raise InvalidChunkLength(self, line) from None

1352 else:

1353 # Truncated at start of next chunk

1354 raise ProtocolError("Response ended prematurely") from None

1355

1356 def _handle_chunk(self, amt: int | None) -> bytes:

1357 returned_chunk = None

1358 if amt is None:

1359 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1360 returned_chunk = chunk

1361 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1362 self.chunk_left = None

1363 elif self.chunk_left is not None and amt < self.chunk_left:

1364 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1365 self.chunk_left = self.chunk_left - amt

1366 returned_chunk = value

1367 elif amt == self.chunk_left:

1368 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1369 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1370 self.chunk_left = None

1371 returned_chunk = value

1372 else: # amt > self.chunk_left

1373 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1374 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1375 self.chunk_left = None

1376 return returned_chunk # type: ignore[no-any-return]

1377

1378 def read_chunked(

1379 self, amt: int | None = None, decode_content: bool | None = None

1380 ) -> typing.Generator[bytes]:

1381 """

1382 Similar to :meth:`HTTPResponse.read`, but with an additional

1383 parameter: ``decode_content``.

1384

1385 :param amt:

1386 How much of the content to read. If specified, caching is skipped

1387 because it doesn't make sense to cache partial content as the full

1388 response.

1389

1390 :param decode_content:

1391 If True, will attempt to decode the body based on the

1392 'content-encoding' header.

1393 """

1394 self._init_decoder()

1395 # FIXME: Rewrite this method and make it a class with a better structured logic.

1396 if not self.chunked:

1397 raise ResponseNotChunked(

1398 "Response is not chunked. "

1399 "Header 'transfer-encoding: chunked' is missing."

1400 )

1401 if not self.supports_chunked_reads():

1402 raise BodyNotHttplibCompatible(

1403 "Body should be http.client.HTTPResponse like. "

1404 "It should have have an fp attribute which returns raw chunks."

1405 )

1406

1407 with self._error_catcher():

1408 # Don't bother reading the body of a HEAD request.

1409 if self._original_response and is_response_to_head(self._original_response):

1410 self._original_response.close()

1411 return None

1412

1413 # If a response is already read and closed

1414 # then return immediately.

1415 if self._fp.fp is None: # type: ignore[union-attr]

1416 return None

1417

1418 if amt == 0:

1419 return

1420 elif amt and amt < 0:

1421 # Negative numbers and `None` should be treated the same,

1422 # but httplib handles only `None` correctly.

1423 amt = None

1424

1425 while True:

1426 # First, check if any data is left in the decoder's buffer.

1427 if self._decoder and self._decoder.has_unconsumed_tail:

1428 chunk = b""

1429 else:

1430 self._update_chunk_length()

1431 self._uncached_read_occurred = True

1432 if self.chunk_left == 0:

1433 break

1434 chunk = self._handle_chunk(amt)

1435 decoded = self._decode(

1436 chunk,

1437 decode_content=decode_content,

1438 flush_decoder=False,

1439 max_length=amt,

1440 )

1441 if decoded:

1442 yield decoded

1443

1444 if decode_content:

1445 # On CPython and PyPy, we should never need to flush the

1446 # decoder. However, on Jython we *might* need to, so

1447 # lets defensively do it anyway.

1448 decoded = self._flush_decoder()

1449 if decoded: # Platform-specific: Jython.

1450 yield decoded

1451

1452 # Chunk content ends with \r\n: discard it.

1453 while self._fp is not None:

1454 line = self._fp.fp.readline()

1455 if not line:

1456 # Some sites may not end with '\r\n'.

1457 break

1458 if line == b"\r\n":

1459 break

1460

1461 # We read everything; close the "file".

1462 if self._original_response:

1463 self._original_response.close()

1464

1465 @property

1466 def url(self) -> str | None:

1467 """

1468 Returns the URL that was the source of this response.

1469 If the request that generated this response redirected, this method

1470 will return the final redirect location.

1471 """

1472 return self._request_url

1473

1474 @url.setter

1475 def url(self, url: str | None) -> None:

1476 self._request_url = url

1477

1478 def __iter__(self) -> typing.Iterator[bytes]:

1479 buffer: list[bytes] = []

1480 for chunk in self.stream(decode_content=True):

1481 if b"\n" in chunk:

1482 chunks = chunk.split(b"\n")

1483 yield b"".join(buffer) + chunks[0] + b"\n"

1484 for x in chunks[1:-1]:

1485 yield x + b"\n"

1486 if chunks[-1]:

1487 buffer = [chunks[-1]]

1488 else:

1489 buffer = []

1490 else:

1491 buffer.append(chunk)

1492 if buffer:

1493 yield b"".join(buffer)