Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/urllib3/response.py: 20%

1from __future__ import annotations

3import collections

4import io

5import json as _json

6import logging

7import socket

8import sys

9import typing

10import warnings

11import zlib

12from contextlib import contextmanager

13from http.client import HTTPMessage as _HttplibHTTPMessage

14from http.client import HTTPResponse as _HttplibHTTPResponse

15from socket import timeout as SocketTimeout

17if typing.TYPE_CHECKING:

18 from ._base_connection import BaseHTTPConnection

20try:

21 try:

22 import brotlicffi as brotli # type: ignore[import-not-found]

23 except ImportError:

24 import brotli # type: ignore[import-not-found]

25except ImportError:

26 brotli = None

28from . import util

29from ._base_connection import _TYPE_BODY

30from ._collections import HTTPHeaderDict

31from .connection import BaseSSLError, HTTPConnection, HTTPException

32from .exceptions import (

33 BodyNotHttplibCompatible,

34 DecodeError,

35 DependencyWarning,

36 HTTPError,

37 IncompleteRead,

38 InvalidChunkLength,

39 InvalidHeader,

40 ProtocolError,

41 ReadTimeoutError,

42 ResponseNotChunked,

43 SSLError,

44)

45from .util.response import is_fp_closed, is_response_to_head

46from .util.retry import Retry

48if typing.TYPE_CHECKING:

49 from .connectionpool import HTTPConnectionPool

51log = logging.getLogger(__name__)

53# Read in 64 KiB chunks

54_READ_CHUNK_SIZE = 2**16

57class ContentDecoder:

58 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

59 raise NotImplementedError()

61 @property

62 def has_unconsumed_tail(self) -> bool:

63 raise NotImplementedError()

65 def flush(self) -> bytes:

66 raise NotImplementedError()

69class DeflateDecoder(ContentDecoder):

70 def __init__(self) -> None:

71 self._first_try = True

72 self._first_try_data = b""

73 self._unfed_data = b""

74 self._obj = zlib.decompressobj()

76 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

77 data = self._unfed_data + data

78 self._unfed_data = b""

79 if not data and not self._obj.unconsumed_tail:

80 return data

81 original_max_length = max_length

82 if original_max_length < 0:

83 max_length = 0

84 elif original_max_length == 0:

85 # We should not pass 0 to the zlib decompressor because 0 is

86 # the default value that will make zlib decompress without a

87 # length limit.

88 # Data should be stored for subsequent calls.

89 self._unfed_data = data

90 return b""

92 # Subsequent calls always reuse `self._obj`. zlib requires

93 # passing the unconsumed tail if decompression is to continue.

94 if not self._first_try:

95 return self._obj.decompress(

96 self._obj.unconsumed_tail + data, max_length=max_length

97 )

99 # First call tries with RFC 1950 ZLIB format.

100 self._first_try_data += data

101 try:

102 decompressed = self._obj.decompress(data, max_length=max_length)

103 if decompressed:

104 self._first_try = False

105 self._first_try_data = b""

106 return decompressed

107 # On failure, it falls back to RFC 1951 DEFLATE format.

108 except zlib.error:

109 self._first_try = False

110 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)

111 try:

112 return self.decompress(

113 self._first_try_data, max_length=original_max_length

114 )

115 finally:

116 self._first_try_data = b""

117

118 @property

119 def has_unconsumed_tail(self) -> bool:

120 return bool(self._unfed_data) or (

121 bool(self._obj.unconsumed_tail) and not self._first_try

122 )

123

124 def flush(self) -> bytes:

125 return self._obj.flush()

126

127

128class GzipDecoderState:

129 FIRST_MEMBER = 0

130 OTHER_MEMBERS = 1

131 SWALLOW_DATA = 2

132

133

134class GzipDecoder(ContentDecoder):

135 def __init__(self) -> None:

136 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

137 self._state = GzipDecoderState.FIRST_MEMBER

138 self._unconsumed_tail = b""

139

140 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

141 ret = bytearray()

142 if self._state == GzipDecoderState.SWALLOW_DATA:

143 return bytes(ret)

144

145 if max_length == 0:

146 # We should not pass 0 to the zlib decompressor because 0 is

147 # the default value that will make zlib decompress without a

148 # length limit.

149 # Data should be stored for subsequent calls.

150 self._unconsumed_tail += data

151 return b""

152

153 # zlib requires passing the unconsumed tail to the subsequent

154 # call if decompression is to continue.

155 data = self._unconsumed_tail + data

156 if not data and self._obj.eof:

157 return bytes(ret)

158

159 while True:

160 try:

161 ret += self._obj.decompress(

162 data, max_length=max(max_length - len(ret), 0)

163 )

164 except zlib.error:

165 previous_state = self._state

166 # Ignore data after the first error

167 self._state = GzipDecoderState.SWALLOW_DATA

168 self._unconsumed_tail = b""

169 if previous_state == GzipDecoderState.OTHER_MEMBERS:

170 # Allow trailing garbage acceptable in other gzip clients

171 return bytes(ret)

172 raise

173

174 self._unconsumed_tail = data = (

175 self._obj.unconsumed_tail or self._obj.unused_data

176 )

177 if max_length > 0 and len(ret) >= max_length:

178 break

179

180 if not data:

181 return bytes(ret)

182 # When the end of a gzip member is reached, a new decompressor

183 # must be created for unused (possibly future) data.

184 if self._obj.eof:

185 self._state = GzipDecoderState.OTHER_MEMBERS

186 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

187

188 return bytes(ret)

189

190 @property

191 def has_unconsumed_tail(self) -> bool:

192 return bool(self._unconsumed_tail)

193

194 def flush(self) -> bytes:

195 return self._obj.flush()

196

197

198if brotli is not None:

199

200 class BrotliDecoder(ContentDecoder):

201 # Supports both 'brotlipy' and 'Brotli' packages

202 # since they share an import name. The top branches

203 # are for 'brotlipy' and bottom branches for 'Brotli'

204 def __init__(self) -> None:

205 self._obj = brotli.Decompressor()

206 if hasattr(self._obj, "decompress"):

207 setattr(self, "_decompress", self._obj.decompress)

208 else:

209 setattr(self, "_decompress", self._obj.process)

210

211 # Requires Brotli >= 1.2.0 for `output_buffer_limit`.

212 def _decompress(self, data: bytes, output_buffer_limit: int = -1) -> bytes:

213 raise NotImplementedError()

214

215 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

216 try:

217 if max_length > 0:

218 return self._decompress(data, output_buffer_limit=max_length)

219 else:

220 return self._decompress(data)

221 except TypeError:

222 # Fallback for Brotli/brotlicffi/brotlipy versions without

223 # the `output_buffer_limit` parameter.

224 warnings.warn(

225 "Brotli >= 1.2.0 is required to prevent decompression bombs.",

226 DependencyWarning,

227 )

228 return self._decompress(data)

229

230 @property

231 def has_unconsumed_tail(self) -> bool:

232 try:

233 return not self._obj.can_accept_more_data()

234 except AttributeError:

235 return False

236

237 def flush(self) -> bytes:

238 if hasattr(self._obj, "flush"):

239 return self._obj.flush() # type: ignore[no-any-return]

240 return b""

241

242

243try:

244 if sys.version_info >= (3, 14):

245 from compression import zstd

246 else:

247 from backports import zstd

248except ImportError:

249 HAS_ZSTD = False

250else:

251 HAS_ZSTD = True

252

253 class ZstdDecoder(ContentDecoder):

254 def __init__(self) -> None:

255 self._obj = zstd.ZstdDecompressor()

256

257 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

258 if not data and not self.has_unconsumed_tail:

259 return b""

260 if self._obj.eof:

261 data = self._obj.unused_data + data

262 self._obj = zstd.ZstdDecompressor()

263 part = self._obj.decompress(data, max_length=max_length)

264 length = len(part)

265 data_parts = [part]

266 # Every loop iteration is supposed to read data from a separate frame.

267 # The loop breaks when:

268 # - enough data is read;

269 # - no more unused data is available;

270 # - end of the last read frame has not been reached (i.e.,

271 # more data has to be fed).

272 while (

273 self._obj.eof

274 and self._obj.unused_data

275 and (max_length < 0 or length < max_length)

276 ):

277 unused_data = self._obj.unused_data

278 if not self._obj.needs_input:

279 self._obj = zstd.ZstdDecompressor()

280 part = self._obj.decompress(

281 unused_data,

282 max_length=(max_length - length) if max_length > 0 else -1,

283 )

284 if part_length := len(part):

285 data_parts.append(part)

286 length += part_length

287 elif self._obj.needs_input:

288 break

289 return b"".join(data_parts)

290

291 @property

292 def has_unconsumed_tail(self) -> bool:

293 return not (self._obj.needs_input or self._obj.eof) or bool(

294 self._obj.unused_data

295 )

296

297 def flush(self) -> bytes:

298 if not self._obj.eof:

299 raise DecodeError("Zstandard data is incomplete")

300 return b""

301

302

303class MultiDecoder(ContentDecoder):

304 """

305 From RFC7231:

306 If one or more encodings have been applied to a representation, the

307 sender that applied the encodings MUST generate a Content-Encoding

308 header field that lists the content codings in the order in which

309 they were applied.

310 """

311

312 # Maximum allowed number of chained HTTP encodings in the

313 # Content-Encoding header.

314 max_decode_links = 5

315

316 def __init__(self, modes: str) -> None:

317 encodings = [m.strip() for m in modes.split(",")]

318 if len(encodings) > self.max_decode_links:

319 raise DecodeError(

320 "Too many content encodings in the chain: "

321 f"{len(encodings)} > {self.max_decode_links}"

322 )

323 self._decoders = [_get_decoder(e) for e in encodings]

324

325 def flush(self) -> bytes:

326 return self._decoders[0].flush()

327

328 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

329 if max_length <= 0:

330 for d in reversed(self._decoders):

331 data = d.decompress(data)

332 return data

333

334 ret = bytearray()

335 # Every while loop iteration goes through all decoders once.

336 # It exits when enough data is read or no more data can be read.

337 # It is possible that the while loop iteration does not produce

338 # any data because we retrieve up to `max_length` from every

339 # decoder, and the amount of bytes may be insufficient for the

340 # next decoder to produce enough/any output.

341 while True:

342 any_data = False

343 for d in reversed(self._decoders):

344 data = d.decompress(data, max_length=max_length - len(ret))

345 if data:

346 any_data = True

347 # We should not break when no data is returned because

348 # next decoders may produce data even with empty input.

349 ret += data

350 if not any_data or len(ret) >= max_length:

351 return bytes(ret)

352 data = b""

353

354 @property

355 def has_unconsumed_tail(self) -> bool:

356 return any(d.has_unconsumed_tail for d in self._decoders)

357

358

359def _get_decoder(mode: str) -> ContentDecoder:

360 if "," in mode:

361 return MultiDecoder(mode)

362

363 # According to RFC 9110 section 8.4.1.3, recipients should

364 # consider x-gzip equivalent to gzip

365 if mode in ("gzip", "x-gzip"):

366 return GzipDecoder()

367

368 if brotli is not None and mode == "br":

369 return BrotliDecoder()

370

371 if HAS_ZSTD and mode == "zstd":

372 return ZstdDecoder()

373

374 return DeflateDecoder()

375

376

377class BytesQueueBuffer:

378 """Memory-efficient bytes buffer

379

380 To return decoded data in read() and still follow the BufferedIOBase API, we need a

381 buffer to always return the correct amount of bytes.

382

383 This buffer should be filled using calls to put()

384

385 Our maximum memory usage is determined by the sum of the size of:

386

387 * self.buffer, which contains the full data

388 * the largest chunk that we will copy in get()

389 """

390

391 def __init__(self) -> None:

392 self.buffer: typing.Deque[bytes | memoryview[bytes]] = collections.deque()

393 self._size: int = 0

394

395 def __len__(self) -> int:

396 return self._size

397

398 def put(self, data: bytes) -> None:

399 self.buffer.append(data)

400 self._size += len(data)

401

402 def get(self, n: int) -> bytes:

403 if n == 0:

404 return b""

405 elif not self.buffer:

406 raise RuntimeError("buffer is empty")

407 elif n < 0:

408 raise ValueError("n should be > 0")

409

410 if len(self.buffer[0]) == n and isinstance(self.buffer[0], bytes):

411 self._size -= n

412 return self.buffer.popleft()

413

414 fetched = 0

415 ret = io.BytesIO()

416 while fetched < n:

417 remaining = n - fetched

418 chunk = self.buffer.popleft()

419 chunk_length = len(chunk)

420 if remaining < chunk_length:

421 chunk = memoryview(chunk)

422 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]

423 ret.write(left_chunk)

424 self.buffer.appendleft(right_chunk)

425 self._size -= remaining

426 break

427 else:

428 ret.write(chunk)

429 self._size -= chunk_length

430 fetched += chunk_length

431

432 if not self.buffer:

433 break

434

435 return ret.getvalue()

436

437 def get_all(self) -> bytes:

438 buffer = self.buffer

439 if not buffer:

440 assert self._size == 0

441 return b""

442 if len(buffer) == 1:

443 result = buffer.pop()

444 if isinstance(result, memoryview):

445 result = result.tobytes()

446 else:

447 ret = io.BytesIO()

448 ret.writelines(buffer.popleft() for _ in range(len(buffer)))

449 result = ret.getvalue()

450 self._size = 0

451 return result

452

453

454class BaseHTTPResponse(io.IOBase):

455 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]

456 if brotli is not None:

457 CONTENT_DECODERS += ["br"]

458 if HAS_ZSTD:

459 CONTENT_DECODERS += ["zstd"]

460 REDIRECT_STATUSES = [301, 302, 303, 307, 308]

461

462 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)

463 if brotli is not None:

464 DECODER_ERROR_CLASSES += (brotli.error,)

465

466 if HAS_ZSTD:

467 DECODER_ERROR_CLASSES += (zstd.ZstdError,)

468

469 def __init__(

470 self,

471 *,

472 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

473 status: int,

474 version: int,

475 version_string: str,

476 reason: str | None,

477 decode_content: bool,

478 request_url: str | None,

479 retries: Retry | None = None,

480 ) -> None:

481 if isinstance(headers, HTTPHeaderDict):

482 self.headers = headers

483 else:

484 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]

485 self.status = status

486 self.version = version

487 self.version_string = version_string

488 self.reason = reason

489 self.decode_content = decode_content

490 self._has_decoded_content = False

491 self._request_url: str | None = request_url

492 self.retries = retries

493

494 self.chunked = False

495 tr_enc = self.headers.get("transfer-encoding", "").lower()

496 # Don't incur the penalty of creating a list and then discarding it

497 encodings = (enc.strip() for enc in tr_enc.split(","))

498 if "chunked" in encodings:

499 self.chunked = True

500

501 self._decoder: ContentDecoder | None = None

502 self.length_remaining: int | None

503

504 def get_redirect_location(self) -> str | None | typing.Literal[False]:

505 """

506 Should we redirect and where to?

507

508 :returns: Truthy redirect location string if we got a redirect status

509 code and valid location. ``None`` if redirect status and no

510 location. ``False`` if not a redirect status code.

511 """

512 if self.status in self.REDIRECT_STATUSES:

513 return self.headers.get("location")

514 return False

515

516 @property

517 def data(self) -> bytes:

518 raise NotImplementedError()

519

520 def json(self) -> typing.Any:

521 """

522 Deserializes the body of the HTTP response as a Python object.

523

524 The body of the HTTP response must be encoded using UTF-8, as per

525 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.

526

527 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to

528 your custom decoder instead.

529

530 If the body of the HTTP response is not decodable to UTF-8, a

531 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a

532 valid JSON document, a `json.JSONDecodeError` will be raised.

533

534 Read more :ref:`here <json_content>`.

535

536 :returns: The body of the HTTP response as a Python object.

537 """

538 data = self.data.decode("utf-8")

539 return _json.loads(data)

540

541 @property

542 def url(self) -> str | None:

543 raise NotImplementedError()

544

545 @url.setter

546 def url(self, url: str | None) -> None:

547 raise NotImplementedError()

548

549 @property

550 def connection(self) -> BaseHTTPConnection | None:

551 raise NotImplementedError()

552

553 @property

554 def retries(self) -> Retry | None:

555 return self._retries

556

557 @retries.setter

558 def retries(self, retries: Retry | None) -> None:

559 # Override the request_url if retries has a redirect location.

560 if retries is not None and retries.history:

561 self.url = retries.history[-1].redirect_location

562 self._retries = retries

563

564 def stream(

565 self, amt: int | None = _READ_CHUNK_SIZE, decode_content: bool | None = None

566 ) -> typing.Iterator[bytes]:

567 raise NotImplementedError()

568

569 def read(

570 self,

571 amt: int | None = None,

572 decode_content: bool | None = None,

573 cache_content: bool = False,

574 ) -> bytes:

575 raise NotImplementedError()

576

577 def read1(

578 self,

579 amt: int | None = None,

580 decode_content: bool | None = None,

581 ) -> bytes:

582 raise NotImplementedError()

583

584 def read_chunked(

585 self,

586 amt: int | None = None,

587 decode_content: bool | None = None,

588 ) -> typing.Iterator[bytes]:

589 raise NotImplementedError()

590

591 def release_conn(self) -> None:

592 raise NotImplementedError()

593

594 def drain_conn(self) -> None:

595 raise NotImplementedError()

596

597 def shutdown(self) -> None:

598 raise NotImplementedError()

599

600 def close(self) -> None:

601 raise NotImplementedError()

602

603 def _init_decoder(self) -> None:

604 """

605 Set-up the _decoder attribute if necessary.

606 """

607 # Note: content-encoding value should be case-insensitive, per RFC 7230

608 # Section 3.2

609 content_encoding = self.headers.get("content-encoding", "").lower()

610 if self._decoder is None:

611 if content_encoding in self.CONTENT_DECODERS:

612 self._decoder = _get_decoder(content_encoding)

613 elif "," in content_encoding:

614 encodings = [

615 e.strip()

616 for e in content_encoding.split(",")

617 if e.strip() in self.CONTENT_DECODERS

618 ]

619 if encodings:

620 self._decoder = _get_decoder(content_encoding)

621

622 def _decode(

623 self,

624 data: bytes,

625 decode_content: bool | None,

626 flush_decoder: bool,

627 max_length: int | None = None,

628 ) -> bytes:

629 """

630 Decode the data passed in and potentially flush the decoder.

631 """

632 if not decode_content:

633 if self._has_decoded_content:

634 raise RuntimeError(

635 "Calling read(decode_content=False) is not supported after "

636 "read(decode_content=True) was called."

637 )

638 return data

639

640 if max_length is None or flush_decoder:

641 max_length = -1

642

643 try:

644 if self._decoder:

645 data = self._decoder.decompress(data, max_length=max_length)

646 self._has_decoded_content = True

647 except self.DECODER_ERROR_CLASSES as e:

648 content_encoding = self.headers.get("content-encoding", "").lower()

649 raise DecodeError(

650 "Received response with content-encoding: %s, but "

651 "failed to decode it." % content_encoding,

652 e,

653 ) from e

654 if flush_decoder:

655 data += self._flush_decoder()

656

657 return data

658

659 def _flush_decoder(self) -> bytes:

660 """

661 Flushes the decoder. Should only be called if the decoder is actually

662 being used.

663 """

664 if self._decoder:

665 return self._decoder.decompress(b"") + self._decoder.flush()

666 return b""

667

668 # Compatibility methods for `io` module

669 def readinto(self, b: bytearray | memoryview[int]) -> int:

670 temp = self.read(len(b))

671 if len(temp) == 0:

672 return 0

673 else:

674 b[: len(temp)] = temp

675 return len(temp)

676

677 # Methods used by dependent libraries

678 def getheaders(self) -> HTTPHeaderDict:

679 return self.headers

680

681 def getheader(self, name: str, default: str | None = None) -> str | None:

682 return self.headers.get(name, default)

683

684 # Compatibility method for http.cookiejar

685 def info(self) -> HTTPHeaderDict:

686 return self.headers

687

688 def geturl(self) -> str | None:

689 return self.url

690

691

692class HTTPResponse(BaseHTTPResponse):

693 """

694 HTTP Response container.

695

696 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is

697 loaded and decoded on-demand when the ``data`` property is accessed. This

698 class is also compatible with the Python standard library's :mod:`io`

699 module, and can hence be treated as a readable object in the context of that

700 framework.

701

702 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:

703

704 :param preload_content:

705 If True, the response's body will be preloaded during construction.

706

707 :param decode_content:

708 If True, will attempt to decode the body based on the

709 'content-encoding' header.

710

711 :param original_response:

712 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`

713 object, it's convenient to include the original for debug purposes. It's

714 otherwise unused.

715

716 :param retries:

717 The retries contains the last :class:`~urllib3.util.retry.Retry` that

718 was used during the request.

719

720 :param enforce_content_length:

721 Enforce content length checking. Body returned by server must match

722 value of Content-Length header, if present. Otherwise, raise error.

723 """

724

725 def __init__(

726 self,

727 body: _TYPE_BODY = "",

728 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

729 status: int = 0,

730 version: int = 0,

731 version_string: str = "HTTP/?",

732 reason: str | None = None,

733 preload_content: bool = True,

734 decode_content: bool = True,

735 original_response: _HttplibHTTPResponse | None = None,

736 pool: HTTPConnectionPool | None = None,

737 connection: HTTPConnection | None = None,

738 msg: _HttplibHTTPMessage | None = None,

739 retries: Retry | None = None,

740 enforce_content_length: bool = True,

741 request_method: str | None = None,

742 request_url: str | None = None,

743 auto_close: bool = True,

744 sock_shutdown: typing.Callable[[int], None] | None = None,

745 ) -> None:

746 super().__init__(

747 headers=headers,

748 status=status,

749 version=version,

750 version_string=version_string,

751 reason=reason,

752 decode_content=decode_content,

753 request_url=request_url,

754 retries=retries,

755 )

756

757 self.enforce_content_length = enforce_content_length

758 self.auto_close = auto_close

759

760 self._body = None

761 self._uncached_read_occurred = False

762 self._fp: _HttplibHTTPResponse | None = None

763 self._original_response = original_response

764 self._fp_bytes_read = 0

765 self.msg = msg

766

767 if body and isinstance(body, (str, bytes)):

768 self._body = body

769

770 self._pool = pool

771 self._connection = connection

772

773 if hasattr(body, "read"):

774 self._fp = body # type: ignore[assignment]

775 self._sock_shutdown = sock_shutdown

776

777 # Are we using the chunked-style of transfer encoding?

778 self.chunk_left: int | None = None

779

780 # Determine length of response

781 self.length_remaining = self._init_length(request_method)

782

783 # Used to return the correct amount of bytes for partial read()s

784 self._decoded_buffer = BytesQueueBuffer()

785

786 # If requested, preload the body.

787 if preload_content and not self._body:

788 self._body = self.read(decode_content=decode_content)

789

790 def release_conn(self) -> None:

791 if not self._pool or not self._connection:

792 return None

793

794 self._pool._put_conn(self._connection)

795 self._connection = None

796

797 def drain_conn(self) -> None:

798 """

799 Read and discard any remaining HTTP response data in the response connection.

800

801 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.

802 """

803 try:

804 while self._raw_read(_READ_CHUNK_SIZE):

805 pass

806 except (HTTPError, OSError, BaseSSLError, HTTPException):

807 pass

808 if self._has_decoded_content:

809 # `_raw_read` skips decompression, so we should clean up the

810 # decoder to avoid keeping unnecessary data in memory.

811 self._decoded_buffer = BytesQueueBuffer()

812 self._decoder = None

813

814 @property

815 def data(self) -> bytes:

816 # For backwards-compat with earlier urllib3 0.4 and earlier.

817 if self._body:

818 return self._body # type: ignore[return-value]

819

820 if self._fp:

821 return self.read(cache_content=True)

822

823 return None # type: ignore[return-value]

824

825 @property

826 def connection(self) -> HTTPConnection | None:

827 return self._connection

828

829 def isclosed(self) -> bool:

830 return is_fp_closed(self._fp)

831

832 def tell(self) -> int:

833 """

834 Obtain the number of bytes pulled over the wire so far. May differ from

835 the amount of content returned by :meth:`HTTPResponse.read`

836 if bytes are encoded on the wire (e.g, compressed).

837 """

838 return self._fp_bytes_read

839

840 def _init_length(self, request_method: str | None) -> int | None:

841 """

842 Set initial length value for Response content if available.

843 """

844 length: int | None

845 content_length: str | None = self.headers.get("content-length")

846

847 if content_length is not None:

848 if self.chunked:

849 # This Response will fail with an IncompleteRead if it can't be

850 # received as chunked. This method falls back to attempt reading

851 # the response before raising an exception.

852 log.warning(

853 "Received response with both Content-Length and "

854 "Transfer-Encoding set. This is expressly forbidden "

855 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "

856 "attempting to process response as Transfer-Encoding: "

857 "chunked."

858 )

859 return None

860

861 try:

862 # RFC 7230 section 3.3.2 specifies multiple content lengths can

863 # be sent in a single Content-Length header

864 # (e.g. Content-Length: 42, 42). This line ensures the values

865 # are all valid ints and that as long as the `set` length is 1,

866 # all values are the same. Otherwise, the header is invalid.

867 lengths = {int(val) for val in content_length.split(",")}

868 if len(lengths) > 1:

869 raise InvalidHeader(

870 "Content-Length contained multiple "

871 "unmatching values (%s)" % content_length

872 )

873 length = lengths.pop()

874 except ValueError:

875 length = None

876 else:

877 if length < 0:

878 length = None

879

880 else: # if content_length is None

881 length = None

882

883 # Convert status to int for comparison

884 # In some cases, httplib returns a status of "_UNKNOWN"

885 try:

886 status = int(self.status)

887 except ValueError:

888 status = 0

889

890 # Check for responses that shouldn't include a body

891 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":

892 length = 0

893

894 return length

895

896 @contextmanager

897 def _error_catcher(self) -> typing.Generator[None]:

898 """

899 Catch low-level python exceptions, instead re-raising urllib3

900 variants, so that low-level exceptions are not leaked in the

901 high-level api.

902

903 On exit, release the connection back to the pool.

904 """

905 clean_exit = False

906

907 try:

908 try:

909 yield

910

911 except SocketTimeout as e:

912 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but

913 # there is yet no clean way to get at it from this context.

914 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

915

916 except BaseSSLError as e:

917 # SSL errors related to framing/MAC get wrapped and reraised here

918 raise SSLError(e) from e

919

920 except IncompleteRead as e:

921 if (

922 e.expected is not None

923 and e.partial is not None

924 and e.expected == -e.partial

925 ):

926 arg = "Response may not contain content."

927 else:

928 arg = f"Connection broken: {e!r}"

929 raise ProtocolError(arg, e) from e

930

931 except (HTTPException, OSError) as e:

932 raise ProtocolError(f"Connection broken: {e!r}", e) from e

933

934 # If no exception is thrown, we should avoid cleaning up

935 # unnecessarily.

936 clean_exit = True

937 finally:

938 # If we didn't terminate cleanly, we need to throw away our

939 # connection.

940 if not clean_exit:

941 # The response may not be closed but we're not going to use it

942 # anymore so close it now to ensure that the connection is

943 # released back to the pool.

944 if self._original_response:

945 self._original_response.close()

946

947 # Closing the response may not actually be sufficient to close

948 # everything, so if we have a hold of the connection close that

949 # too.

950 if self._connection:

951 self._connection.close()

952

953 # If we hold the original response but it's closed now, we should

954 # return the connection back to the pool.

955 if self._original_response and self._original_response.isclosed():

956 self.release_conn()

957

958 def _fp_read(

959 self,

960 amt: int | None = None,

961 *,

962 read1: bool = False,

963 ) -> bytes:

964 """

965 Read a response with the thought that reading the number of bytes

966 larger than can fit in a 32-bit int at a time via SSL in some

967 known cases leads to an overflow error that has to be prevented

968 if `amt` or `self.length_remaining` indicate that a problem may

969 happen.

970

971 This happens to urllib3 injected with pyOpenSSL-backed SSL-support.

972 """

973 assert self._fp

974 c_int_max = 2**31 - 1

975 if (

976 (amt and amt > c_int_max)

977 or (

978 amt is None

979 and self.length_remaining

980 and self.length_remaining > c_int_max

981 )

982 ) and util.IS_PYOPENSSL:

983 if read1:

984 return self._fp.read1(c_int_max)

985 buffer = io.BytesIO()

986 # Besides `max_chunk_amt` being a maximum chunk size, it

987 # affects memory overhead of reading a response by this

988 # method in CPython.

989 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum

990 # chunk size that does not lead to an overflow error, but

991 # 256 MiB is a compromise.

992 max_chunk_amt = 2**28

993 while amt is None or amt != 0:

994 if amt is not None:

995 chunk_amt = min(amt, max_chunk_amt)

996 amt -= chunk_amt

997 else:

998 chunk_amt = max_chunk_amt

999 data = self._fp.read(chunk_amt)

1000 if not data:

1001 break

1002 buffer.write(data)

1003 del data # to reduce peak memory usage by `max_chunk_amt`.

1004 return buffer.getvalue()

1005 elif read1:

1006 return self._fp.read1(amt) if amt is not None else self._fp.read1()

1007 else:

1008 # StringIO doesn't like amt=None

1009 return self._fp.read(amt) if amt is not None else self._fp.read()

1010

1011 def _raw_read(

1012 self,

1013 amt: int | None = None,

1014 *,

1015 read1: bool = False,

1016 ) -> bytes:

1017 """

1018 Reads `amt` of bytes from the socket.

1019 """

1020 if self._fp is None:

1021 return None # type: ignore[return-value]

1022

1023 fp_closed = getattr(self._fp, "closed", False)

1024

1025 with self._error_catcher():

1026 data = self._fp_read(amt, read1=read1) if not fp_closed else b""

1027 if amt is not None and amt != 0 and not data:

1028 # Platform-specific: Buggy versions of Python.

1029 # Close the connection when no data is returned

1030 #

1031 # This is redundant to what httplib/http.client _should_

1032 # already do. However, versions of python released before

1033 # December 15, 2012 (http://bugs.python.org/issue16298) do

1034 # not properly close the connection in all cases. There is

1035 # no harm in redundantly calling close.

1036 self._fp.close()

1037 if (

1038 self.enforce_content_length

1039 and self.length_remaining is not None

1040 and self.length_remaining != 0

1041 ):

1042 # This is an edge case that httplib failed to cover due

1043 # to concerns of backward compatibility. We're

1044 # addressing it here to make sure IncompleteRead is

1045 # raised during streaming, so all calls with incorrect

1046 # Content-Length are caught.

1047 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)

1048 elif read1 and (

1049 (amt != 0 and not data) or self.length_remaining == len(data)

1050 ):

1051 # All data has been read, but `self._fp.read1` in

1052 # CPython 3.12 and older doesn't always close

1053 # `http.client.HTTPResponse`, so we close it here.

1054 # See https://github.com/python/cpython/issues/113199

1055 self._fp.close()

1056

1057 if data:

1058 self._fp_bytes_read += len(data)

1059 if self.length_remaining is not None:

1060 self.length_remaining -= len(data)

1061 return data

1062

1063 def read(

1064 self,

1065 amt: int | None = None,

1066 decode_content: bool | None = None,

1067 cache_content: bool = False,

1068 ) -> bytes:

1069 """

1070 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional

1071 parameters: ``decode_content`` and ``cache_content``.

1072

1073 :param amt:

1074 How much of the content to read. If specified, caching is skipped

1075 because it doesn't make sense to cache partial content as the full

1076 response.

1077

1078 :param decode_content:

1079 If True, will attempt to decode the body based on the

1080 'content-encoding' header.

1081

1082 :param cache_content:

1083 If True, will save the returned data such that the same result is

1084 returned despite of the state of the underlying file object. This

1085 is useful if you want the ``.data`` property to continue working

1086 after having ``.read()`` the file object. (Overridden if ``amt`` is

1087 set.)

1088 """

1089 self._init_decoder()

1090 if decode_content is None:

1091 decode_content = self.decode_content

1092

1093 if amt and amt < 0:

1094 # Negative numbers and `None` should be treated the same.

1095 amt = None

1096 elif amt is not None:

1097 cache_content = False

1098

1099 if (

1100 self._decoder

1101 and self._decoder.has_unconsumed_tail

1102 and len(self._decoded_buffer) < amt

1103 ):

1104 decoded_data = self._decode(

1105 b"",

1106 decode_content,

1107 flush_decoder=False,

1108 max_length=amt - len(self._decoded_buffer),

1109 )

1110 self._decoded_buffer.put(decoded_data)

1111 if len(self._decoded_buffer) >= amt:

1112 return self._decoded_buffer.get(amt)

1113

1114 data = self._raw_read(amt)

1115 if not cache_content:

1116 self._uncached_read_occurred = True

1117

1118 flush_decoder = amt is None or (amt != 0 and not data)

1119

1120 if (

1121 not data

1122 and len(self._decoded_buffer) == 0

1123 and not (self._decoder and self._decoder.has_unconsumed_tail)

1124 ):

1125 return data

1126

1127 if amt is None:

1128 data = self._decode(data, decode_content, flush_decoder)

1129 # It's possible that there is buffered decoded data after a

1130 # partial read.

1131 if decode_content and len(self._decoded_buffer) > 0:

1132 self._decoded_buffer.put(data)

1133 data = self._decoded_buffer.get_all()

1134

1135 if cache_content and not self._uncached_read_occurred:

1136 self._body = data

1137 else:

1138 # do not waste memory on buffer when not decoding

1139 if not decode_content:

1140 if self._has_decoded_content:

1141 raise RuntimeError(

1142 "Calling read(decode_content=False) is not supported after "

1143 "read(decode_content=True) was called."

1144 )

1145 return data

1146

1147 decoded_data = self._decode(

1148 data,

1149 decode_content,

1150 flush_decoder,

1151 max_length=amt - len(self._decoded_buffer),

1152 )

1153 self._decoded_buffer.put(decoded_data)

1154

1155 while len(self._decoded_buffer) < amt and data:

1156 # TODO make sure to initially read enough data to get past the headers

1157 # For example, the GZ file header takes 10 bytes, we don't want to read

1158 # it one byte at a time

1159 data = self._raw_read(amt)

1160 decoded_data = self._decode(

1161 data,

1162 decode_content,

1163 flush_decoder,

1164 max_length=amt - len(self._decoded_buffer),

1165 )

1166 self._decoded_buffer.put(decoded_data)

1167 data = self._decoded_buffer.get(amt)

1168

1169 return data

1170

1171 def read1(

1172 self,

1173 amt: int | None = None,

1174 decode_content: bool | None = None,

1175 ) -> bytes:

1176 """

1177 Similar to ``http.client.HTTPResponse.read1`` and documented

1178 in :meth:`io.BufferedReader.read1`, but with an additional parameter:

1179 ``decode_content``.

1180

1181 :param amt:

1182 How much of the content to read.

1183

1184 :param decode_content:

1185 If True, will attempt to decode the body based on the

1186 'content-encoding' header.

1187 """

1188 if decode_content is None:

1189 decode_content = self.decode_content

1190 if amt and amt < 0:

1191 # Negative numbers and `None` should be treated the same.

1192 amt = None

1193 # try and respond without going to the network

1194 if self._has_decoded_content:

1195 if not decode_content:

1196 raise RuntimeError(

1197 "Calling read1(decode_content=False) is not supported after "

1198 "read1(decode_content=True) was called."

1199 )

1200 if (

1201 self._decoder

1202 and self._decoder.has_unconsumed_tail

1203 and (amt is None or len(self._decoded_buffer) < amt)

1204 ):

1205 decoded_data = self._decode(

1206 b"",

1207 decode_content,

1208 flush_decoder=False,

1209 max_length=(

1210 amt - len(self._decoded_buffer) if amt is not None else None

1211 ),

1212 )

1213 self._decoded_buffer.put(decoded_data)

1214 if len(self._decoded_buffer) > 0:

1215 if amt is None:

1216 return self._decoded_buffer.get_all()

1217 return self._decoded_buffer.get(amt)

1218 if amt == 0:

1219 return b""

1220

1221 # FIXME, this method's type doesn't say returning None is possible

1222 data = self._raw_read(amt, read1=True)

1223 self._uncached_read_occurred = True

1224 if not decode_content or data is None:

1225 return data

1226

1227 self._init_decoder()

1228 while True:

1229 flush_decoder = not data

1230 decoded_data = self._decode(

1231 data, decode_content, flush_decoder, max_length=amt

1232 )

1233 self._decoded_buffer.put(decoded_data)

1234 if decoded_data or flush_decoder:

1235 break

1236 data = self._raw_read(8192, read1=True)

1237

1238 if amt is None:

1239 return self._decoded_buffer.get_all()

1240 return self._decoded_buffer.get(amt)

1241

1242 def stream(

1243 self, amt: int | None = _READ_CHUNK_SIZE, decode_content: bool | None = None

1244 ) -> typing.Generator[bytes]:

1245 """

1246 A generator wrapper for the read() method. A call will block until

1247 ``amt`` bytes have been read from the connection or until the

1248 connection is closed.

1249

1250 :param amt:

1251 How much of the content to read. The generator will return up to

1252 much data per iteration, but may return less. This is particularly

1253 likely when using compressed data. However, the empty string will

1254 never be returned.

1255

1256 :param decode_content:

1257 If True, will attempt to decode the body based on the

1258 'content-encoding' header.

1259 """

1260 if amt == 0:

1261 return

1262

1263 if self.chunked and self.supports_chunked_reads():

1264 yield from self.read_chunked(amt, decode_content=decode_content)

1265 else:

1266 while (

1267 not is_fp_closed(self._fp)

1268 or len(self._decoded_buffer) > 0

1269 or (self._decoder and self._decoder.has_unconsumed_tail)

1270 ):

1271 data = self.read(amt=amt, decode_content=decode_content)

1272

1273 if data:

1274 yield data

1275

1276 # Overrides from io.IOBase

1277 def readable(self) -> bool:

1278 return True

1279

1280 def shutdown(self) -> None:

1281 if not self._sock_shutdown:

1282 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")

1283 if self._connection is None:

1284 raise RuntimeError(

1285 "Cannot shutdown as connection has already been released to the pool"

1286 )

1287 self._sock_shutdown(socket.SHUT_RD)

1288

1289 def close(self) -> None:

1290 self._sock_shutdown = None

1291

1292 if not self.closed and self._fp:

1293 self._fp.close()

1294

1295 if self._connection:

1296 self._connection.close()

1297

1298 if not self.auto_close:

1299 io.IOBase.close(self)

1300

1301 @property

1302 def closed(self) -> bool:

1303 if not self.auto_close:

1304 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]

1305 elif self._fp is None:

1306 return True

1307 elif hasattr(self._fp, "isclosed"):

1308 return self._fp.isclosed()

1309 elif hasattr(self._fp, "closed"):

1310 return self._fp.closed

1311 else:

1312 return True

1313

1314 def fileno(self) -> int:

1315 if self._fp is None:

1316 raise OSError("HTTPResponse has no file to get a fileno from")

1317 elif hasattr(self._fp, "fileno"):

1318 return self._fp.fileno()

1319 else:

1320 raise OSError(

1321 "The file-like object this HTTPResponse is wrapped "

1322 "around has no file descriptor"

1323 )

1324

1325 def flush(self) -> None:

1326 if (

1327 self._fp is not None

1328 and hasattr(self._fp, "flush")

1329 and not getattr(self._fp, "closed", False)

1330 ):

1331 return self._fp.flush()

1332

1333 def supports_chunked_reads(self) -> bool:

1334 """

1335 Checks if the underlying file-like object looks like a

1336 :class:`http.client.HTTPResponse` object. We do this by testing for

1337 the fp attribute. If it is present we assume it returns raw chunks as

1338 processed by read_chunked().

1339 """

1340 return hasattr(self._fp, "fp")

1341

1342 def _update_chunk_length(self) -> None:

1343 # First, we'll figure out length of a chunk and then

1344 # we'll try to read it from socket.

1345 if self.chunk_left is not None:

1346 return None

1347 line = self._fp.fp.readline() # type: ignore[union-attr]

1348 line = line.split(b";", 1)[0]

1349 try:

1350 self.chunk_left = int(line, 16)

1351 except ValueError:

1352 self.close()

1353 if line:

1354 # Invalid chunked protocol response, abort.

1355 raise InvalidChunkLength(self, line) from None

1356 else:

1357 # Truncated at start of next chunk

1358 raise ProtocolError("Response ended prematurely") from None

1359

1360 def _handle_chunk(self, amt: int | None) -> bytes:

1361 returned_chunk = None

1362 if amt is None:

1363 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1364 returned_chunk = chunk

1365 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1366 self.chunk_left = None

1367 elif self.chunk_left is not None and amt < self.chunk_left:

1368 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1369 self.chunk_left = self.chunk_left - amt

1370 returned_chunk = value

1371 elif amt == self.chunk_left:

1372 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1373 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1374 self.chunk_left = None

1375 returned_chunk = value

1376 else: # amt > self.chunk_left

1377 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1378 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1379 self.chunk_left = None

1380 return returned_chunk # type: ignore[no-any-return]

1381

1382 def read_chunked(

1383 self, amt: int | None = None, decode_content: bool | None = None

1384 ) -> typing.Generator[bytes]:

1385 """

1386 Similar to :meth:`HTTPResponse.read`, but with an additional

1387 parameter: ``decode_content``.

1388

1389 :param amt:

1390 How much of the content to read. If specified, caching is skipped

1391 because it doesn't make sense to cache partial content as the full

1392 response.

1393

1394 :param decode_content:

1395 If True, will attempt to decode the body based on the

1396 'content-encoding' header.

1397 """

1398 self._init_decoder()

1399 # FIXME: Rewrite this method and make it a class with a better structured logic.

1400 if not self.chunked:

1401 raise ResponseNotChunked(

1402 "Response is not chunked. "

1403 "Header 'transfer-encoding: chunked' is missing."

1404 )

1405 if not self.supports_chunked_reads():

1406 raise BodyNotHttplibCompatible(

1407 "Body should be http.client.HTTPResponse like. "

1408 "It should have have an fp attribute which returns raw chunks."

1409 )

1410

1411 with self._error_catcher():

1412 # Don't bother reading the body of a HEAD request.

1413 if self._original_response and is_response_to_head(self._original_response):

1414 self._original_response.close()

1415 return None

1416

1417 # If a response is already read and closed

1418 # then return immediately.

1419 if self._fp.fp is None: # type: ignore[union-attr]

1420 return None

1421

1422 if amt == 0:

1423 return

1424 elif amt and amt < 0:

1425 # Negative numbers and `None` should be treated the same,

1426 # but httplib handles only `None` correctly.

1427 amt = None

1428

1429 while True:

1430 # First, check if any data is left in the decoder's buffer.

1431 if self._decoder and self._decoder.has_unconsumed_tail:

1432 chunk = b""

1433 else:

1434 self._update_chunk_length()

1435 self._uncached_read_occurred = True

1436 if self.chunk_left == 0:

1437 break

1438 chunk = self._handle_chunk(amt)

1439 decoded = self._decode(

1440 chunk,

1441 decode_content=decode_content,

1442 flush_decoder=False,

1443 max_length=amt,

1444 )

1445 if decoded:

1446 yield decoded

1447

1448 if decode_content:

1449 # On CPython and PyPy, we should never need to flush the

1450 # decoder. However, on Jython we *might* need to, so

1451 # lets defensively do it anyway.

1452 decoded = self._flush_decoder()

1453 if decoded: # Platform-specific: Jython.

1454 yield decoded

1455

1456 # Chunk content ends with \r\n: discard it.

1457 while self._fp is not None:

1458 line = self._fp.fp.readline()

1459 if not line:

1460 # Some sites may not end with '\r\n'.

1461 break

1462 if line == b"\r\n":

1463 break

1464

1465 # We read everything; close the "file".

1466 if self._original_response:

1467 self._original_response.close()

1468

1469 @property

1470 def url(self) -> str | None:

1471 """

1472 Returns the URL that was the source of this response.

1473 If the request that generated this response redirected, this method

1474 will return the final redirect location.

1475 """

1476 return self._request_url

1477

1478 @url.setter

1479 def url(self, url: str | None) -> None:

1480 self._request_url = url

1481

1482 def __iter__(self) -> typing.Iterator[bytes]:

1483 buffer: list[bytes] = []

1484 for chunk in self.stream(decode_content=True):

1485 if b"\n" in chunk:

1486 chunks = chunk.split(b"\n")

1487 yield b"".join(buffer) + chunks[0] + b"\n"

1488 for x in chunks[1:-1]:

1489 yield x + b"\n"

1490 if chunks[-1]:

1491 buffer = [chunks[-1]]

1492 else:

1493 buffer = []

1494 else:

1495 buffer.append(chunk)

1496 if buffer:

1497 yield b"".join(buffer)