Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/urllib3/response.py: 35%

1from __future__ import annotations

3import collections

4import io

5import json as _json

6import logging

7import socket

8import sys

9import typing

10import warnings

11import zlib

12from contextlib import contextmanager

13from http.client import HTTPMessage as _HttplibHTTPMessage

14from http.client import HTTPResponse as _HttplibHTTPResponse

15from socket import timeout as SocketTimeout

17if typing.TYPE_CHECKING:

18 from ._base_connection import BaseHTTPConnection

20try:

21 try:

22 import brotlicffi as brotli # type: ignore[import-not-found]

23 except ImportError:

24 import brotli # type: ignore[import-not-found]

25except ImportError:

26 brotli = None

28from . import util

29from ._base_connection import _TYPE_BODY

30from ._collections import HTTPHeaderDict

31from .connection import BaseSSLError, HTTPConnection, HTTPException

32from .exceptions import (

33 BodyNotHttplibCompatible,

34 DecodeError,

35 DependencyWarning,

36 HTTPError,

37 IncompleteRead,

38 InvalidChunkLength,

39 InvalidHeader,

40 ProtocolError,

41 ReadTimeoutError,

42 ResponseNotChunked,

43 SSLError,

44)

45from .util.response import is_fp_closed, is_response_to_head

46from .util.retry import Retry

48if typing.TYPE_CHECKING:

49 from .connectionpool import HTTPConnectionPool

51log = logging.getLogger(__name__)

54class ContentDecoder:

55 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

56 raise NotImplementedError()

58 @property

59 def has_unconsumed_tail(self) -> bool:

60 raise NotImplementedError()

62 def flush(self) -> bytes:

63 raise NotImplementedError()

66class DeflateDecoder(ContentDecoder):

67 def __init__(self) -> None:

68 self._first_try = True

69 self._first_try_data = b""

70 self._unfed_data = b""

71 self._obj = zlib.decompressobj()

73 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

74 data = self._unfed_data + data

75 self._unfed_data = b""

76 if not data and not self._obj.unconsumed_tail:

77 return data

78 original_max_length = max_length

79 if original_max_length < 0:

80 max_length = 0

81 elif original_max_length == 0:

82 # We should not pass 0 to the zlib decompressor because 0 is

83 # the default value that will make zlib decompress without a

84 # length limit.

85 # Data should be stored for subsequent calls.

86 self._unfed_data = data

87 return b""

89 # Subsequent calls always reuse `self._obj`. zlib requires

90 # passing the unconsumed tail if decompression is to continue.

91 if not self._first_try:

92 return self._obj.decompress(

93 self._obj.unconsumed_tail + data, max_length=max_length

94 )

96 # First call tries with RFC 1950 ZLIB format.

97 self._first_try_data += data

98 try:

99 decompressed = self._obj.decompress(data, max_length=max_length)

100 if decompressed:

101 self._first_try = False

102 self._first_try_data = b""

103 return decompressed

104 # On failure, it falls back to RFC 1951 DEFLATE format.

105 except zlib.error:

106 self._first_try = False

107 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)

108 try:

109 return self.decompress(

110 self._first_try_data, max_length=original_max_length

111 )

112 finally:

113 self._first_try_data = b""

114

115 @property

116 def has_unconsumed_tail(self) -> bool:

117 return bool(self._unfed_data) or (

118 bool(self._obj.unconsumed_tail) and not self._first_try

119 )

120

121 def flush(self) -> bytes:

122 return self._obj.flush()

123

124

125class GzipDecoderState:

126 FIRST_MEMBER = 0

127 OTHER_MEMBERS = 1

128 SWALLOW_DATA = 2

129

130

131class GzipDecoder(ContentDecoder):

132 def __init__(self) -> None:

133 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

134 self._state = GzipDecoderState.FIRST_MEMBER

135 self._unconsumed_tail = b""

136

137 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

138 ret = bytearray()

139 if self._state == GzipDecoderState.SWALLOW_DATA:

140 return bytes(ret)

141

142 if max_length == 0:

143 # We should not pass 0 to the zlib decompressor because 0 is

144 # the default value that will make zlib decompress without a

145 # length limit.

146 # Data should be stored for subsequent calls.

147 self._unconsumed_tail += data

148 return b""

149

150 # zlib requires passing the unconsumed tail to the subsequent

151 # call if decompression is to continue.

152 data = self._unconsumed_tail + data

153 if not data and self._obj.eof:

154 return bytes(ret)

155

156 while True:

157 try:

158 ret += self._obj.decompress(

159 data, max_length=max(max_length - len(ret), 0)

160 )

161 except zlib.error:

162 previous_state = self._state

163 # Ignore data after the first error

164 self._state = GzipDecoderState.SWALLOW_DATA

165 self._unconsumed_tail = b""

166 if previous_state == GzipDecoderState.OTHER_MEMBERS:

167 # Allow trailing garbage acceptable in other gzip clients

168 return bytes(ret)

169 raise

170

171 self._unconsumed_tail = data = (

172 self._obj.unconsumed_tail or self._obj.unused_data

173 )

174 if max_length > 0 and len(ret) >= max_length:

175 break

176

177 if not data:

178 return bytes(ret)

179 # When the end of a gzip member is reached, a new decompressor

180 # must be created for unused (possibly future) data.

181 if self._obj.eof:

182 self._state = GzipDecoderState.OTHER_MEMBERS

183 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

184

185 return bytes(ret)

186

187 @property

188 def has_unconsumed_tail(self) -> bool:

189 return bool(self._unconsumed_tail)

190

191 def flush(self) -> bytes:

192 return self._obj.flush()

193

194

195if brotli is not None:

196

197 class BrotliDecoder(ContentDecoder):

198 # Supports both 'brotlipy' and 'Brotli' packages

199 # since they share an import name. The top branches

200 # are for 'brotlipy' and bottom branches for 'Brotli'

201 def __init__(self) -> None:

202 self._obj = brotli.Decompressor()

203 if hasattr(self._obj, "decompress"):

204 setattr(self, "_decompress", self._obj.decompress)

205 else:

206 setattr(self, "_decompress", self._obj.process)

207

208 # Requires Brotli >= 1.2.0 for `output_buffer_limit`.

209 def _decompress(self, data: bytes, output_buffer_limit: int = -1) -> bytes:

210 raise NotImplementedError()

211

212 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

213 try:

214 if max_length > 0:

215 return self._decompress(data, output_buffer_limit=max_length)

216 else:

217 return self._decompress(data)

218 except TypeError:

219 # Fallback for Brotli/brotlicffi/brotlipy versions without

220 # the `output_buffer_limit` parameter.

221 warnings.warn(

222 "Brotli >= 1.2.0 is required to prevent decompression bombs.",

223 DependencyWarning,

224 )

225 return self._decompress(data)

226

227 @property

228 def has_unconsumed_tail(self) -> bool:

229 try:

230 return not self._obj.can_accept_more_data()

231 except AttributeError:

232 return False

233

234 def flush(self) -> bytes:

235 if hasattr(self._obj, "flush"):

236 return self._obj.flush() # type: ignore[no-any-return]

237 return b""

238

239

240try:

241 if sys.version_info >= (3, 14):

242 from compression import zstd

243 else:

244 from backports import zstd

245except ImportError:

246 HAS_ZSTD = False

247else:

248 HAS_ZSTD = True

249

250 class ZstdDecoder(ContentDecoder):

251 def __init__(self) -> None:

252 self._obj = zstd.ZstdDecompressor()

253

254 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

255 if not data and not self.has_unconsumed_tail:

256 return b""

257 if self._obj.eof:

258 data = self._obj.unused_data + data

259 self._obj = zstd.ZstdDecompressor()

260 part = self._obj.decompress(data, max_length=max_length)

261 length = len(part)

262 data_parts = [part]

263 # Every loop iteration is supposed to read data from a separate frame.

264 # The loop breaks when:

265 # - enough data is read;

266 # - no more unused data is available;

267 # - end of the last read frame has not been reached (i.e.,

268 # more data has to be fed).

269 while (

270 self._obj.eof

271 and self._obj.unused_data

272 and (max_length < 0 or length < max_length)

273 ):

274 unused_data = self._obj.unused_data

275 if not self._obj.needs_input:

276 self._obj = zstd.ZstdDecompressor()

277 part = self._obj.decompress(

278 unused_data,

279 max_length=(max_length - length) if max_length > 0 else -1,

280 )

281 if part_length := len(part):

282 data_parts.append(part)

283 length += part_length

284 elif self._obj.needs_input:

285 break

286 return b"".join(data_parts)

287

288 @property

289 def has_unconsumed_tail(self) -> bool:

290 return not (self._obj.needs_input or self._obj.eof) or bool(

291 self._obj.unused_data

292 )

293

294 def flush(self) -> bytes:

295 if not self._obj.eof:

296 raise DecodeError("Zstandard data is incomplete")

297 return b""

298

299

300class MultiDecoder(ContentDecoder):

301 """

302 From RFC7231:

303 If one or more encodings have been applied to a representation, the

304 sender that applied the encodings MUST generate a Content-Encoding

305 header field that lists the content codings in the order in which

306 they were applied.

307 """

308

309 # Maximum allowed number of chained HTTP encodings in the

310 # Content-Encoding header.

311 max_decode_links = 5

312

313 def __init__(self, modes: str) -> None:

314 encodings = [m.strip() for m in modes.split(",")]

315 if len(encodings) > self.max_decode_links:

316 raise DecodeError(

317 "Too many content encodings in the chain: "

318 f"{len(encodings)} > {self.max_decode_links}"

319 )

320 self._decoders = [_get_decoder(e) for e in encodings]

321

322 def flush(self) -> bytes:

323 return self._decoders[0].flush()

324

325 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

326 if max_length <= 0:

327 for d in reversed(self._decoders):

328 data = d.decompress(data)

329 return data

330

331 ret = bytearray()

332 # Every while loop iteration goes through all decoders once.

333 # It exits when enough data is read or no more data can be read.

334 # It is possible that the while loop iteration does not produce

335 # any data because we retrieve up to `max_length` from every

336 # decoder, and the amount of bytes may be insufficient for the

337 # next decoder to produce enough/any output.

338 while True:

339 any_data = False

340 for d in reversed(self._decoders):

341 data = d.decompress(data, max_length=max_length - len(ret))

342 if data:

343 any_data = True

344 # We should not break when no data is returned because

345 # next decoders may produce data even with empty input.

346 ret += data

347 if not any_data or len(ret) >= max_length:

348 return bytes(ret)

349 data = b""

350

351 @property

352 def has_unconsumed_tail(self) -> bool:

353 return any(d.has_unconsumed_tail for d in self._decoders)

354

355

356def _get_decoder(mode: str) -> ContentDecoder:

357 if "," in mode:

358 return MultiDecoder(mode)

359

360 # According to RFC 9110 section 8.4.1.3, recipients should

361 # consider x-gzip equivalent to gzip

362 if mode in ("gzip", "x-gzip"):

363 return GzipDecoder()

364

365 if brotli is not None and mode == "br":

366 return BrotliDecoder()

367

368 if HAS_ZSTD and mode == "zstd":

369 return ZstdDecoder()

370

371 return DeflateDecoder()

372

373

374class BytesQueueBuffer:

375 """Memory-efficient bytes buffer

376

377 To return decoded data in read() and still follow the BufferedIOBase API, we need a

378 buffer to always return the correct amount of bytes.

379

380 This buffer should be filled using calls to put()

381

382 Our maximum memory usage is determined by the sum of the size of:

383

384 * self.buffer, which contains the full data

385 * the largest chunk that we will copy in get()

386 """

387

388 def __init__(self) -> None:

389 self.buffer: typing.Deque[bytes | memoryview[bytes]] = collections.deque()

390 self._size: int = 0

391

392 def __len__(self) -> int:

393 return self._size

394

395 def put(self, data: bytes) -> None:

396 self.buffer.append(data)

397 self._size += len(data)

398

399 def get(self, n: int) -> bytes:

400 if n == 0:

401 return b""

402 elif not self.buffer:

403 raise RuntimeError("buffer is empty")

404 elif n < 0:

405 raise ValueError("n should be > 0")

406

407 if len(self.buffer[0]) == n and isinstance(self.buffer[0], bytes):

408 self._size -= n

409 return self.buffer.popleft()

410

411 fetched = 0

412 ret = io.BytesIO()

413 while fetched < n:

414 remaining = n - fetched

415 chunk = self.buffer.popleft()

416 chunk_length = len(chunk)

417 if remaining < chunk_length:

418 chunk = memoryview(chunk)

419 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]

420 ret.write(left_chunk)

421 self.buffer.appendleft(right_chunk)

422 self._size -= remaining

423 break

424 else:

425 ret.write(chunk)

426 self._size -= chunk_length

427 fetched += chunk_length

428

429 if not self.buffer:

430 break

431

432 return ret.getvalue()

433

434 def get_all(self) -> bytes:

435 buffer = self.buffer

436 if not buffer:

437 assert self._size == 0

438 return b""

439 if len(buffer) == 1:

440 result = buffer.pop()

441 if isinstance(result, memoryview):

442 result = result.tobytes()

443 else:

444 ret = io.BytesIO()

445 ret.writelines(buffer.popleft() for _ in range(len(buffer)))

446 result = ret.getvalue()

447 self._size = 0

448 return result

449

450

451class BaseHTTPResponse(io.IOBase):

452 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]

453 if brotli is not None:

454 CONTENT_DECODERS += ["br"]

455 if HAS_ZSTD:

456 CONTENT_DECODERS += ["zstd"]

457 REDIRECT_STATUSES = [301, 302, 303, 307, 308]

458

459 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)

460 if brotli is not None:

461 DECODER_ERROR_CLASSES += (brotli.error,)

462

463 if HAS_ZSTD:

464 DECODER_ERROR_CLASSES += (zstd.ZstdError,)

465

466 def __init__(

467 self,

468 *,

469 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

470 status: int,

471 version: int,

472 version_string: str,

473 reason: str | None,

474 decode_content: bool,

475 request_url: str | None,

476 retries: Retry | None = None,

477 ) -> None:

478 if isinstance(headers, HTTPHeaderDict):

479 self.headers = headers

480 else:

481 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]

482 self.status = status

483 self.version = version

484 self.version_string = version_string

485 self.reason = reason

486 self.decode_content = decode_content

487 self._has_decoded_content = False

488 self._request_url: str | None = request_url

489 self.retries = retries

490

491 self.chunked = False

492 tr_enc = self.headers.get("transfer-encoding", "").lower()

493 # Don't incur the penalty of creating a list and then discarding it

494 encodings = (enc.strip() for enc in tr_enc.split(","))

495 if "chunked" in encodings:

496 self.chunked = True

497

498 self._decoder: ContentDecoder | None = None

499 self.length_remaining: int | None

500

501 def get_redirect_location(self) -> str | None | typing.Literal[False]:

502 """

503 Should we redirect and where to?

504

505 :returns: Truthy redirect location string if we got a redirect status

506 code and valid location. ``None`` if redirect status and no

507 location. ``False`` if not a redirect status code.

508 """

509 if self.status in self.REDIRECT_STATUSES:

510 return self.headers.get("location")

511 return False

512

513 @property

514 def data(self) -> bytes:

515 raise NotImplementedError()

516

517 def json(self) -> typing.Any:

518 """

519 Deserializes the body of the HTTP response as a Python object.

520

521 The body of the HTTP response must be encoded using UTF-8, as per

522 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.

523

524 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to

525 your custom decoder instead.

526

527 If the body of the HTTP response is not decodable to UTF-8, a

528 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a

529 valid JSON document, a `json.JSONDecodeError` will be raised.

530

531 Read more :ref:`here <json_content>`.

532

533 :returns: The body of the HTTP response as a Python object.

534 """

535 data = self.data.decode("utf-8")

536 return _json.loads(data)

537

538 @property

539 def url(self) -> str | None:

540 raise NotImplementedError()

541

542 @url.setter

543 def url(self, url: str | None) -> None:

544 raise NotImplementedError()

545

546 @property

547 def connection(self) -> BaseHTTPConnection | None:

548 raise NotImplementedError()

549

550 @property

551 def retries(self) -> Retry | None:

552 return self._retries

553

554 @retries.setter

555 def retries(self, retries: Retry | None) -> None:

556 # Override the request_url if retries has a redirect location.

557 if retries is not None and retries.history:

558 self.url = retries.history[-1].redirect_location

559 self._retries = retries

560

561 def stream(

562 self, amt: int | None = 2**16, decode_content: bool | None = None

563 ) -> typing.Iterator[bytes]:

564 raise NotImplementedError()

565

566 def read(

567 self,

568 amt: int | None = None,

569 decode_content: bool | None = None,

570 cache_content: bool = False,

571 ) -> bytes:

572 raise NotImplementedError()

573

574 def read1(

575 self,

576 amt: int | None = None,

577 decode_content: bool | None = None,

578 ) -> bytes:

579 raise NotImplementedError()

580

581 def read_chunked(

582 self,

583 amt: int | None = None,

584 decode_content: bool | None = None,

585 ) -> typing.Iterator[bytes]:

586 raise NotImplementedError()

587

588 def release_conn(self) -> None:

589 raise NotImplementedError()

590

591 def drain_conn(self) -> None:

592 raise NotImplementedError()

593

594 def shutdown(self) -> None:

595 raise NotImplementedError()

596

597 def close(self) -> None:

598 raise NotImplementedError()

599

600 def _init_decoder(self) -> None:

601 """

602 Set-up the _decoder attribute if necessary.

603 """

604 # Note: content-encoding value should be case-insensitive, per RFC 7230

605 # Section 3.2

606 content_encoding = self.headers.get("content-encoding", "").lower()

607 if self._decoder is None:

608 if content_encoding in self.CONTENT_DECODERS:

609 self._decoder = _get_decoder(content_encoding)

610 elif "," in content_encoding:

611 encodings = [

612 e.strip()

613 for e in content_encoding.split(",")

614 if e.strip() in self.CONTENT_DECODERS

615 ]

616 if encodings:

617 self._decoder = _get_decoder(content_encoding)

618

619 def _decode(

620 self,

621 data: bytes,

622 decode_content: bool | None,

623 flush_decoder: bool,

624 max_length: int | None = None,

625 ) -> bytes:

626 """

627 Decode the data passed in and potentially flush the decoder.

628 """

629 if not decode_content:

630 if self._has_decoded_content:

631 raise RuntimeError(

632 "Calling read(decode_content=False) is not supported after "

633 "read(decode_content=True) was called."

634 )

635 return data

636

637 if max_length is None or flush_decoder:

638 max_length = -1

639

640 try:

641 if self._decoder:

642 data = self._decoder.decompress(data, max_length=max_length)

643 self._has_decoded_content = True

644 except self.DECODER_ERROR_CLASSES as e:

645 content_encoding = self.headers.get("content-encoding", "").lower()

646 raise DecodeError(

647 "Received response with content-encoding: %s, but "

648 "failed to decode it." % content_encoding,

649 e,

650 ) from e

651 if flush_decoder:

652 data += self._flush_decoder()

653

654 return data

655

656 def _flush_decoder(self) -> bytes:

657 """

658 Flushes the decoder. Should only be called if the decoder is actually

659 being used.

660 """

661 if self._decoder:

662 return self._decoder.decompress(b"") + self._decoder.flush()

663 return b""

664

665 # Compatibility methods for `io` module

666 def readinto(self, b: bytearray | memoryview[int]) -> int:

667 temp = self.read(len(b))

668 if len(temp) == 0:

669 return 0

670 else:

671 b[: len(temp)] = temp

672 return len(temp)

673

674 # Methods used by dependent libraries

675 def getheaders(self) -> HTTPHeaderDict:

676 return self.headers

677

678 def getheader(self, name: str, default: str | None = None) -> str | None:

679 return self.headers.get(name, default)

680

681 # Compatibility method for http.cookiejar

682 def info(self) -> HTTPHeaderDict:

683 return self.headers

684

685 def geturl(self) -> str | None:

686 return self.url

687

688

689class HTTPResponse(BaseHTTPResponse):

690 """

691 HTTP Response container.

692

693 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is

694 loaded and decoded on-demand when the ``data`` property is accessed. This

695 class is also compatible with the Python standard library's :mod:`io`

696 module, and can hence be treated as a readable object in the context of that

697 framework.

698

699 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:

700

701 :param preload_content:

702 If True, the response's body will be preloaded during construction.

703

704 :param decode_content:

705 If True, will attempt to decode the body based on the

706 'content-encoding' header.

707

708 :param original_response:

709 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`

710 object, it's convenient to include the original for debug purposes. It's

711 otherwise unused.

712

713 :param retries:

714 The retries contains the last :class:`~urllib3.util.retry.Retry` that

715 was used during the request.

716

717 :param enforce_content_length:

718 Enforce content length checking. Body returned by server must match

719 value of Content-Length header, if present. Otherwise, raise error.

720 """

721

722 def __init__(

723 self,

724 body: _TYPE_BODY = "",

725 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

726 status: int = 0,

727 version: int = 0,

728 version_string: str = "HTTP/?",

729 reason: str | None = None,

730 preload_content: bool = True,

731 decode_content: bool = True,

732 original_response: _HttplibHTTPResponse | None = None,

733 pool: HTTPConnectionPool | None = None,

734 connection: HTTPConnection | None = None,

735 msg: _HttplibHTTPMessage | None = None,

736 retries: Retry | None = None,

737 enforce_content_length: bool = True,

738 request_method: str | None = None,

739 request_url: str | None = None,

740 auto_close: bool = True,

741 sock_shutdown: typing.Callable[[int], None] | None = None,

742 ) -> None:

743 super().__init__(

744 headers=headers,

745 status=status,

746 version=version,

747 version_string=version_string,

748 reason=reason,

749 decode_content=decode_content,

750 request_url=request_url,

751 retries=retries,

752 )

753

754 self.enforce_content_length = enforce_content_length

755 self.auto_close = auto_close

756

757 self._body = None

758 self._fp: _HttplibHTTPResponse | None = None

759 self._original_response = original_response

760 self._fp_bytes_read = 0

761 self.msg = msg

762

763 if body and isinstance(body, (str, bytes)):

764 self._body = body

765

766 self._pool = pool

767 self._connection = connection

768

769 if hasattr(body, "read"):

770 self._fp = body # type: ignore[assignment]

771 self._sock_shutdown = sock_shutdown

772

773 # Are we using the chunked-style of transfer encoding?

774 self.chunk_left: int | None = None

775

776 # Determine length of response

777 self.length_remaining = self._init_length(request_method)

778

779 # Used to return the correct amount of bytes for partial read()s

780 self._decoded_buffer = BytesQueueBuffer()

781

782 # If requested, preload the body.

783 if preload_content and not self._body:

784 self._body = self.read(decode_content=decode_content)

785

786 def release_conn(self) -> None:

787 if not self._pool or not self._connection:

788 return None

789

790 self._pool._put_conn(self._connection)

791 self._connection = None

792

793 def drain_conn(self) -> None:

794 """

795 Read and discard any remaining HTTP response data in the response connection.

796

797 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.

798 """

799 try:

800 self.read(

801 # Do not spend resources decoding the content unless

802 # decoding has already been initiated.

803 decode_content=self._has_decoded_content,

804 )

805 except (HTTPError, OSError, BaseSSLError, HTTPException):

806 pass

807

808 @property

809 def data(self) -> bytes:

810 # For backwards-compat with earlier urllib3 0.4 and earlier.

811 if self._body:

812 return self._body # type: ignore[return-value]

813

814 if self._fp:

815 return self.read(cache_content=True)

816

817 return None # type: ignore[return-value]

818

819 @property

820 def connection(self) -> HTTPConnection | None:

821 return self._connection

822

823 def isclosed(self) -> bool:

824 return is_fp_closed(self._fp)

825

826 def tell(self) -> int:

827 """

828 Obtain the number of bytes pulled over the wire so far. May differ from

829 the amount of content returned by :meth:`HTTPResponse.read`

830 if bytes are encoded on the wire (e.g, compressed).

831 """

832 return self._fp_bytes_read

833

834 def _init_length(self, request_method: str | None) -> int | None:

835 """

836 Set initial length value for Response content if available.

837 """

838 length: int | None

839 content_length: str | None = self.headers.get("content-length")

840

841 if content_length is not None:

842 if self.chunked:

843 # This Response will fail with an IncompleteRead if it can't be

844 # received as chunked. This method falls back to attempt reading

845 # the response before raising an exception.

846 log.warning(

847 "Received response with both Content-Length and "

848 "Transfer-Encoding set. This is expressly forbidden "

849 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "

850 "attempting to process response as Transfer-Encoding: "

851 "chunked."

852 )

853 return None

854

855 try:

856 # RFC 7230 section 3.3.2 specifies multiple content lengths can

857 # be sent in a single Content-Length header

858 # (e.g. Content-Length: 42, 42). This line ensures the values

859 # are all valid ints and that as long as the `set` length is 1,

860 # all values are the same. Otherwise, the header is invalid.

861 lengths = {int(val) for val in content_length.split(",")}

862 if len(lengths) > 1:

863 raise InvalidHeader(

864 "Content-Length contained multiple "

865 "unmatching values (%s)" % content_length

866 )

867 length = lengths.pop()

868 except ValueError:

869 length = None

870 else:

871 if length < 0:

872 length = None

873

874 else: # if content_length is None

875 length = None

876

877 # Convert status to int for comparison

878 # In some cases, httplib returns a status of "_UNKNOWN"

879 try:

880 status = int(self.status)

881 except ValueError:

882 status = 0

883

884 # Check for responses that shouldn't include a body

885 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":

886 length = 0

887

888 return length

889

890 @contextmanager

891 def _error_catcher(self) -> typing.Generator[None]:

892 """

893 Catch low-level python exceptions, instead re-raising urllib3

894 variants, so that low-level exceptions are not leaked in the

895 high-level api.

896

897 On exit, release the connection back to the pool.

898 """

899 clean_exit = False

900

901 try:

902 try:

903 yield

904

905 except SocketTimeout as e:

906 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but

907 # there is yet no clean way to get at it from this context.

908 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

909

910 except BaseSSLError as e:

911 # SSL errors related to framing/MAC get wrapped and reraised here

912 raise SSLError(e) from e

913

914 except IncompleteRead as e:

915 if (

916 e.expected is not None

917 and e.partial is not None

918 and e.expected == -e.partial

919 ):

920 arg = "Response may not contain content."

921 else:

922 arg = f"Connection broken: {e!r}"

923 raise ProtocolError(arg, e) from e

924

925 except (HTTPException, OSError) as e:

926 raise ProtocolError(f"Connection broken: {e!r}", e) from e

927

928 # If no exception is thrown, we should avoid cleaning up

929 # unnecessarily.

930 clean_exit = True

931 finally:

932 # If we didn't terminate cleanly, we need to throw away our

933 # connection.

934 if not clean_exit:

935 # The response may not be closed but we're not going to use it

936 # anymore so close it now to ensure that the connection is

937 # released back to the pool.

938 if self._original_response:

939 self._original_response.close()

940

941 # Closing the response may not actually be sufficient to close

942 # everything, so if we have a hold of the connection close that

943 # too.

944 if self._connection:

945 self._connection.close()

946

947 # If we hold the original response but it's closed now, we should

948 # return the connection back to the pool.

949 if self._original_response and self._original_response.isclosed():

950 self.release_conn()

951

952 def _fp_read(

953 self,

954 amt: int | None = None,

955 *,

956 read1: bool = False,

957 ) -> bytes:

958 """

959 Read a response with the thought that reading the number of bytes

960 larger than can fit in a 32-bit int at a time via SSL in some

961 known cases leads to an overflow error that has to be prevented

962 if `amt` or `self.length_remaining` indicate that a problem may

963 happen.

964

965 This happens to urllib3 injected with pyOpenSSL-backed SSL-support.

966 """

967 assert self._fp

968 c_int_max = 2**31 - 1

969 if (

970 (amt and amt > c_int_max)

971 or (

972 amt is None

973 and self.length_remaining

974 and self.length_remaining > c_int_max

975 )

976 ) and util.IS_PYOPENSSL:

977 if read1:

978 return self._fp.read1(c_int_max)

979 buffer = io.BytesIO()

980 # Besides `max_chunk_amt` being a maximum chunk size, it

981 # affects memory overhead of reading a response by this

982 # method in CPython.

983 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum

984 # chunk size that does not lead to an overflow error, but

985 # 256 MiB is a compromise.

986 max_chunk_amt = 2**28

987 while amt is None or amt != 0:

988 if amt is not None:

989 chunk_amt = min(amt, max_chunk_amt)

990 amt -= chunk_amt

991 else:

992 chunk_amt = max_chunk_amt

993 data = self._fp.read(chunk_amt)

994 if not data:

995 break

996 buffer.write(data)

997 del data # to reduce peak memory usage by `max_chunk_amt`.

998 return buffer.getvalue()

999 elif read1:

1000 return self._fp.read1(amt) if amt is not None else self._fp.read1()

1001 else:

1002 # StringIO doesn't like amt=None

1003 return self._fp.read(amt) if amt is not None else self._fp.read()

1004

1005 def _raw_read(

1006 self,

1007 amt: int | None = None,

1008 *,

1009 read1: bool = False,

1010 ) -> bytes:

1011 """

1012 Reads `amt` of bytes from the socket.

1013 """

1014 if self._fp is None:

1015 return None # type: ignore[return-value]

1016

1017 fp_closed = getattr(self._fp, "closed", False)

1018

1019 with self._error_catcher():

1020 data = self._fp_read(amt, read1=read1) if not fp_closed else b""

1021 if amt is not None and amt != 0 and not data:

1022 # Platform-specific: Buggy versions of Python.

1023 # Close the connection when no data is returned

1024 #

1025 # This is redundant to what httplib/http.client _should_

1026 # already do. However, versions of python released before

1027 # December 15, 2012 (http://bugs.python.org/issue16298) do

1028 # not properly close the connection in all cases. There is

1029 # no harm in redundantly calling close.

1030 self._fp.close()

1031 if (

1032 self.enforce_content_length

1033 and self.length_remaining is not None

1034 and self.length_remaining != 0

1035 ):

1036 # This is an edge case that httplib failed to cover due

1037 # to concerns of backward compatibility. We're

1038 # addressing it here to make sure IncompleteRead is

1039 # raised during streaming, so all calls with incorrect

1040 # Content-Length are caught.

1041 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)

1042 elif read1 and (

1043 (amt != 0 and not data) or self.length_remaining == len(data)

1044 ):

1045 # All data has been read, but `self._fp.read1` in

1046 # CPython 3.12 and older doesn't always close

1047 # `http.client.HTTPResponse`, so we close it here.

1048 # See https://github.com/python/cpython/issues/113199

1049 self._fp.close()

1050

1051 if data:

1052 self._fp_bytes_read += len(data)

1053 if self.length_remaining is not None:

1054 self.length_remaining -= len(data)

1055 return data

1056

1057 def read(

1058 self,

1059 amt: int | None = None,

1060 decode_content: bool | None = None,

1061 cache_content: bool = False,

1062 ) -> bytes:

1063 """

1064 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional

1065 parameters: ``decode_content`` and ``cache_content``.

1066

1067 :param amt:

1068 How much of the content to read. If specified, caching is skipped

1069 because it doesn't make sense to cache partial content as the full

1070 response.

1071

1072 :param decode_content:

1073 If True, will attempt to decode the body based on the

1074 'content-encoding' header.

1075

1076 :param cache_content:

1077 If True, will save the returned data such that the same result is

1078 returned despite of the state of the underlying file object. This

1079 is useful if you want the ``.data`` property to continue working

1080 after having ``.read()`` the file object. (Overridden if ``amt`` is

1081 set.)

1082 """

1083 self._init_decoder()

1084 if decode_content is None:

1085 decode_content = self.decode_content

1086

1087 if amt and amt < 0:

1088 # Negative numbers and `None` should be treated the same.

1089 amt = None

1090 elif amt is not None:

1091 cache_content = False

1092

1093 if self._decoder and self._decoder.has_unconsumed_tail:

1094 decoded_data = self._decode(

1095 b"",

1096 decode_content,

1097 flush_decoder=False,

1098 max_length=amt - len(self._decoded_buffer),

1099 )

1100 self._decoded_buffer.put(decoded_data)

1101 if len(self._decoded_buffer) >= amt:

1102 return self._decoded_buffer.get(amt)

1103

1104 data = self._raw_read(amt)

1105

1106 flush_decoder = amt is None or (amt != 0 and not data)

1107

1108 if (

1109 not data

1110 and len(self._decoded_buffer) == 0

1111 and not (self._decoder and self._decoder.has_unconsumed_tail)

1112 ):

1113 return data

1114

1115 if amt is None:

1116 data = self._decode(data, decode_content, flush_decoder)

1117 if cache_content:

1118 self._body = data

1119 else:

1120 # do not waste memory on buffer when not decoding

1121 if not decode_content:

1122 if self._has_decoded_content:

1123 raise RuntimeError(

1124 "Calling read(decode_content=False) is not supported after "

1125 "read(decode_content=True) was called."

1126 )

1127 return data

1128

1129 decoded_data = self._decode(

1130 data,

1131 decode_content,

1132 flush_decoder,

1133 max_length=amt - len(self._decoded_buffer),

1134 )

1135 self._decoded_buffer.put(decoded_data)

1136

1137 while len(self._decoded_buffer) < amt and data:

1138 # TODO make sure to initially read enough data to get past the headers

1139 # For example, the GZ file header takes 10 bytes, we don't want to read

1140 # it one byte at a time

1141 data = self._raw_read(amt)

1142 decoded_data = self._decode(

1143 data,

1144 decode_content,

1145 flush_decoder,

1146 max_length=amt - len(self._decoded_buffer),

1147 )

1148 self._decoded_buffer.put(decoded_data)

1149 data = self._decoded_buffer.get(amt)

1150

1151 return data

1152

1153 def read1(

1154 self,

1155 amt: int | None = None,

1156 decode_content: bool | None = None,

1157 ) -> bytes:

1158 """

1159 Similar to ``http.client.HTTPResponse.read1`` and documented

1160 in :meth:`io.BufferedReader.read1`, but with an additional parameter:

1161 ``decode_content``.

1162

1163 :param amt:

1164 How much of the content to read.

1165

1166 :param decode_content:

1167 If True, will attempt to decode the body based on the

1168 'content-encoding' header.

1169 """

1170 if decode_content is None:

1171 decode_content = self.decode_content

1172 if amt and amt < 0:

1173 # Negative numbers and `None` should be treated the same.

1174 amt = None

1175 # try and respond without going to the network

1176 if self._has_decoded_content:

1177 if not decode_content:

1178 raise RuntimeError(

1179 "Calling read1(decode_content=False) is not supported after "

1180 "read1(decode_content=True) was called."

1181 )

1182 if (

1183 self._decoder

1184 and self._decoder.has_unconsumed_tail

1185 and (amt is None or len(self._decoded_buffer) < amt)

1186 ):

1187 decoded_data = self._decode(

1188 b"",

1189 decode_content,

1190 flush_decoder=False,

1191 max_length=(

1192 amt - len(self._decoded_buffer) if amt is not None else None

1193 ),

1194 )

1195 self._decoded_buffer.put(decoded_data)

1196 if len(self._decoded_buffer) > 0:

1197 if amt is None:

1198 return self._decoded_buffer.get_all()

1199 return self._decoded_buffer.get(amt)

1200 if amt == 0:

1201 return b""

1202

1203 # FIXME, this method's type doesn't say returning None is possible

1204 data = self._raw_read(amt, read1=True)

1205 if not decode_content or data is None:

1206 return data

1207

1208 self._init_decoder()

1209 while True:

1210 flush_decoder = not data

1211 decoded_data = self._decode(

1212 data, decode_content, flush_decoder, max_length=amt

1213 )

1214 self._decoded_buffer.put(decoded_data)

1215 if decoded_data or flush_decoder:

1216 break

1217 data = self._raw_read(8192, read1=True)

1218

1219 if amt is None:

1220 return self._decoded_buffer.get_all()

1221 return self._decoded_buffer.get(amt)

1222

1223 def stream(

1224 self, amt: int | None = 2**16, decode_content: bool | None = None

1225 ) -> typing.Generator[bytes]:

1226 """

1227 A generator wrapper for the read() method. A call will block until

1228 ``amt`` bytes have been read from the connection or until the

1229 connection is closed.

1230

1231 :param amt:

1232 How much of the content to read. The generator will return up to

1233 much data per iteration, but may return less. This is particularly

1234 likely when using compressed data. However, the empty string will

1235 never be returned.

1236

1237 :param decode_content:

1238 If True, will attempt to decode the body based on the

1239 'content-encoding' header.

1240 """

1241 if self.chunked and self.supports_chunked_reads():

1242 yield from self.read_chunked(amt, decode_content=decode_content)

1243 else:

1244 while (

1245 not is_fp_closed(self._fp)

1246 or len(self._decoded_buffer) > 0

1247 or (self._decoder and self._decoder.has_unconsumed_tail)

1248 ):

1249 data = self.read(amt=amt, decode_content=decode_content)

1250

1251 if data:

1252 yield data

1253

1254 # Overrides from io.IOBase

1255 def readable(self) -> bool:

1256 return True

1257

1258 def shutdown(self) -> None:

1259 if not self._sock_shutdown:

1260 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")

1261 if self._connection is None:

1262 raise RuntimeError(

1263 "Cannot shutdown as connection has already been released to the pool"

1264 )

1265 self._sock_shutdown(socket.SHUT_RD)

1266

1267 def close(self) -> None:

1268 self._sock_shutdown = None

1269

1270 if not self.closed and self._fp:

1271 self._fp.close()

1272

1273 if self._connection:

1274 self._connection.close()

1275

1276 if not self.auto_close:

1277 io.IOBase.close(self)

1278

1279 @property

1280 def closed(self) -> bool:

1281 if not self.auto_close:

1282 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]

1283 elif self._fp is None:

1284 return True

1285 elif hasattr(self._fp, "isclosed"):

1286 return self._fp.isclosed()

1287 elif hasattr(self._fp, "closed"):

1288 return self._fp.closed

1289 else:

1290 return True

1291

1292 def fileno(self) -> int:

1293 if self._fp is None:

1294 raise OSError("HTTPResponse has no file to get a fileno from")

1295 elif hasattr(self._fp, "fileno"):

1296 return self._fp.fileno()

1297 else:

1298 raise OSError(

1299 "The file-like object this HTTPResponse is wrapped "

1300 "around has no file descriptor"

1301 )

1302

1303 def flush(self) -> None:

1304 if (

1305 self._fp is not None

1306 and hasattr(self._fp, "flush")

1307 and not getattr(self._fp, "closed", False)

1308 ):

1309 return self._fp.flush()

1310

1311 def supports_chunked_reads(self) -> bool:

1312 """

1313 Checks if the underlying file-like object looks like a

1314 :class:`http.client.HTTPResponse` object. We do this by testing for

1315 the fp attribute. If it is present we assume it returns raw chunks as

1316 processed by read_chunked().

1317 """

1318 return hasattr(self._fp, "fp")

1319

1320 def _update_chunk_length(self) -> None:

1321 # First, we'll figure out length of a chunk and then

1322 # we'll try to read it from socket.

1323 if self.chunk_left is not None:

1324 return None

1325 line = self._fp.fp.readline() # type: ignore[union-attr]

1326 line = line.split(b";", 1)[0]

1327 try:

1328 self.chunk_left = int(line, 16)

1329 except ValueError:

1330 self.close()

1331 if line:

1332 # Invalid chunked protocol response, abort.

1333 raise InvalidChunkLength(self, line) from None

1334 else:

1335 # Truncated at start of next chunk

1336 raise ProtocolError("Response ended prematurely") from None

1337

1338 def _handle_chunk(self, amt: int | None) -> bytes:

1339 returned_chunk = None

1340 if amt is None:

1341 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1342 returned_chunk = chunk

1343 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1344 self.chunk_left = None

1345 elif self.chunk_left is not None and amt < self.chunk_left:

1346 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1347 self.chunk_left = self.chunk_left - amt

1348 returned_chunk = value

1349 elif amt == self.chunk_left:

1350 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1351 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1352 self.chunk_left = None

1353 returned_chunk = value

1354 else: # amt > self.chunk_left

1355 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1356 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1357 self.chunk_left = None

1358 return returned_chunk # type: ignore[no-any-return]

1359

1360 def read_chunked(

1361 self, amt: int | None = None, decode_content: bool | None = None

1362 ) -> typing.Generator[bytes]:

1363 """

1364 Similar to :meth:`HTTPResponse.read`, but with an additional

1365 parameter: ``decode_content``.

1366

1367 :param amt:

1368 How much of the content to read. If specified, caching is skipped

1369 because it doesn't make sense to cache partial content as the full

1370 response.

1371

1372 :param decode_content:

1373 If True, will attempt to decode the body based on the

1374 'content-encoding' header.

1375 """

1376 self._init_decoder()

1377 # FIXME: Rewrite this method and make it a class with a better structured logic.

1378 if not self.chunked:

1379 raise ResponseNotChunked(

1380 "Response is not chunked. "

1381 "Header 'transfer-encoding: chunked' is missing."

1382 )

1383 if not self.supports_chunked_reads():

1384 raise BodyNotHttplibCompatible(

1385 "Body should be http.client.HTTPResponse like. "

1386 "It should have have an fp attribute which returns raw chunks."

1387 )

1388

1389 with self._error_catcher():

1390 # Don't bother reading the body of a HEAD request.

1391 if self._original_response and is_response_to_head(self._original_response):

1392 self._original_response.close()

1393 return None

1394

1395 # If a response is already read and closed

1396 # then return immediately.

1397 if self._fp.fp is None: # type: ignore[union-attr]

1398 return None

1399

1400 if amt and amt < 0:

1401 # Negative numbers and `None` should be treated the same,

1402 # but httplib handles only `None` correctly.

1403 amt = None

1404

1405 while True:

1406 # First, check if any data is left in the decoder's buffer.

1407 if self._decoder and self._decoder.has_unconsumed_tail:

1408 chunk = b""

1409 else:

1410 self._update_chunk_length()

1411 if self.chunk_left == 0:

1412 break

1413 chunk = self._handle_chunk(amt)

1414 decoded = self._decode(

1415 chunk,

1416 decode_content=decode_content,

1417 flush_decoder=False,

1418 max_length=amt,

1419 )

1420 if decoded:

1421 yield decoded

1422

1423 if decode_content:

1424 # On CPython and PyPy, we should never need to flush the

1425 # decoder. However, on Jython we *might* need to, so

1426 # lets defensively do it anyway.

1427 decoded = self._flush_decoder()

1428 if decoded: # Platform-specific: Jython.

1429 yield decoded

1430

1431 # Chunk content ends with \r\n: discard it.

1432 while self._fp is not None:

1433 line = self._fp.fp.readline()

1434 if not line:

1435 # Some sites may not end with '\r\n'.

1436 break

1437 if line == b"\r\n":

1438 break

1439

1440 # We read everything; close the "file".

1441 if self._original_response:

1442 self._original_response.close()

1443

1444 @property

1445 def url(self) -> str | None:

1446 """

1447 Returns the URL that was the source of this response.

1448 If the request that generated this response redirected, this method

1449 will return the final redirect location.

1450 """

1451 return self._request_url

1452

1453 @url.setter

1454 def url(self, url: str | None) -> None:

1455 self._request_url = url

1456

1457 def __iter__(self) -> typing.Iterator[bytes]:

1458 buffer: list[bytes] = []

1459 for chunk in self.stream(decode_content=True):

1460 if b"\n" in chunk:

1461 chunks = chunk.split(b"\n")

1462 yield b"".join(buffer) + chunks[0] + b"\n"

1463 for x in chunks[1:-1]:

1464 yield x + b"\n"

1465 if chunks[-1]:

1466 buffer = [chunks[-1]]

1467 else:

1468 buffer = []

1469 else:

1470 buffer.append(chunk)

1471 if buffer:

1472 yield b"".join(buffer)