Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/urllib3/response.py: 20%

1from __future__ import annotations

3import collections

4import io

5import json as _json

6import logging

7import socket

8import sys

9import typing

10import warnings

11import zlib

12from contextlib import contextmanager

13from http.client import HTTPMessage as _HttplibHTTPMessage

14from http.client import HTTPResponse as _HttplibHTTPResponse

15from socket import timeout as SocketTimeout

17if typing.TYPE_CHECKING:

18 from ._base_connection import BaseHTTPConnection

20try:

21 try:

22 import brotlicffi as brotli # type: ignore[import-not-found]

23 except ImportError:

24 import brotli # type: ignore[import-not-found]

25except ImportError:

26 brotli = None

28from . import util

29from ._base_connection import _TYPE_BODY

30from ._collections import HTTPHeaderDict

31from .connection import BaseSSLError, HTTPConnection, HTTPException

32from .exceptions import (

33 BodyNotHttplibCompatible,

34 DecodeError,

35 DependencyWarning,

36 HTTPError,

37 IncompleteRead,

38 InvalidChunkLength,

39 InvalidHeader,

40 ProtocolError,

41 ReadTimeoutError,

42 ResponseNotChunked,

43 SSLError,

44)

45from .util.response import is_fp_closed, is_response_to_head

46from .util.retry import Retry

48if typing.TYPE_CHECKING:

49 from .connectionpool import HTTPConnectionPool

51log = logging.getLogger(__name__)

54class ContentDecoder:

55 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

56 raise NotImplementedError()

58 @property

59 def has_unconsumed_tail(self) -> bool:

60 raise NotImplementedError()

62 def flush(self) -> bytes:

63 raise NotImplementedError()

66class DeflateDecoder(ContentDecoder):

67 def __init__(self) -> None:

68 self._first_try = True

69 self._first_try_data = b""

70 self._unfed_data = b""

71 self._obj = zlib.decompressobj()

73 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

74 data = self._unfed_data + data

75 self._unfed_data = b""

76 if not data and not self._obj.unconsumed_tail:

77 return data

78 original_max_length = max_length

79 if original_max_length < 0:

80 max_length = 0

81 elif original_max_length == 0:

82 # We should not pass 0 to the zlib decompressor because 0 is

83 # the default value that will make zlib decompress without a

84 # length limit.

85 # Data should be stored for subsequent calls.

86 self._unfed_data = data

87 return b""

89 # Subsequent calls always reuse `self._obj`. zlib requires

90 # passing the unconsumed tail if decompression is to continue.

91 if not self._first_try:

92 return self._obj.decompress(

93 self._obj.unconsumed_tail + data, max_length=max_length

94 )

96 # First call tries with RFC 1950 ZLIB format.

97 self._first_try_data += data

98 try:

99 decompressed = self._obj.decompress(data, max_length=max_length)

100 if decompressed:

101 self._first_try = False

102 self._first_try_data = b""

103 return decompressed

104 # On failure, it falls back to RFC 1951 DEFLATE format.

105 except zlib.error:

106 self._first_try = False

107 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)

108 try:

109 return self.decompress(

110 self._first_try_data, max_length=original_max_length

111 )

112 finally:

113 self._first_try_data = b""

114

115 @property

116 def has_unconsumed_tail(self) -> bool:

117 return bool(self._unfed_data) or (

118 bool(self._obj.unconsumed_tail) and not self._first_try

119 )

120

121 def flush(self) -> bytes:

122 return self._obj.flush()

123

124

125class GzipDecoderState:

126 FIRST_MEMBER = 0

127 OTHER_MEMBERS = 1

128 SWALLOW_DATA = 2

129

130

131class GzipDecoder(ContentDecoder):

132 def __init__(self) -> None:

133 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

134 self._state = GzipDecoderState.FIRST_MEMBER

135 self._unconsumed_tail = b""

136

137 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

138 ret = bytearray()

139 if self._state == GzipDecoderState.SWALLOW_DATA:

140 return bytes(ret)

141

142 if max_length == 0:

143 # We should not pass 0 to the zlib decompressor because 0 is

144 # the default value that will make zlib decompress without a

145 # length limit.

146 # Data should be stored for subsequent calls.

147 self._unconsumed_tail += data

148 return b""

149

150 # zlib requires passing the unconsumed tail to the subsequent

151 # call if decompression is to continue.

152 data = self._unconsumed_tail + data

153 if not data and self._obj.eof:

154 return bytes(ret)

155

156 while True:

157 try:

158 ret += self._obj.decompress(

159 data, max_length=max(max_length - len(ret), 0)

160 )

161 except zlib.error:

162 previous_state = self._state

163 # Ignore data after the first error

164 self._state = GzipDecoderState.SWALLOW_DATA

165 self._unconsumed_tail = b""

166 if previous_state == GzipDecoderState.OTHER_MEMBERS:

167 # Allow trailing garbage acceptable in other gzip clients

168 return bytes(ret)

169 raise

170

171 self._unconsumed_tail = data = (

172 self._obj.unconsumed_tail or self._obj.unused_data

173 )

174 if max_length > 0 and len(ret) >= max_length:

175 break

176

177 if not data:

178 return bytes(ret)

179 # When the end of a gzip member is reached, a new decompressor

180 # must be created for unused (possibly future) data.

181 if self._obj.eof:

182 self._state = GzipDecoderState.OTHER_MEMBERS

183 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)

184

185 return bytes(ret)

186

187 @property

188 def has_unconsumed_tail(self) -> bool:

189 return bool(self._unconsumed_tail)

190

191 def flush(self) -> bytes:

192 return self._obj.flush()

193

194

195if brotli is not None:

196

197 class BrotliDecoder(ContentDecoder):

198 # Supports both 'brotlipy' and 'Brotli' packages

199 # since they share an import name. The top branches

200 # are for 'brotlipy' and bottom branches for 'Brotli'

201 def __init__(self) -> None:

202 self._obj = brotli.Decompressor()

203 if hasattr(self._obj, "decompress"):

204 setattr(self, "_decompress", self._obj.decompress)

205 else:

206 setattr(self, "_decompress", self._obj.process)

207

208 # Requires Brotli >= 1.2.0 for `output_buffer_limit`.

209 def _decompress(self, data: bytes, output_buffer_limit: int = -1) -> bytes:

210 raise NotImplementedError()

211

212 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

213 try:

214 if max_length > 0:

215 return self._decompress(data, output_buffer_limit=max_length)

216 else:

217 return self._decompress(data)

218 except TypeError:

219 # Fallback for Brotli/brotlicffi/brotlipy versions without

220 # the `output_buffer_limit` parameter.

221 warnings.warn(

222 "Brotli >= 1.2.0 is required to prevent decompression bombs.",

223 DependencyWarning,

224 )

225 return self._decompress(data)

226

227 @property

228 def has_unconsumed_tail(self) -> bool:

229 try:

230 return not self._obj.can_accept_more_data()

231 except AttributeError:

232 return False

233

234 def flush(self) -> bytes:

235 if hasattr(self._obj, "flush"):

236 return self._obj.flush() # type: ignore[no-any-return]

237 return b""

238

239

240try:

241 if sys.version_info >= (3, 14):

242 from compression import zstd

243 else:

244 from backports import zstd

245except ImportError:

246 HAS_ZSTD = False

247else:

248 HAS_ZSTD = True

249

250 class ZstdDecoder(ContentDecoder):

251 def __init__(self) -> None:

252 self._obj = zstd.ZstdDecompressor()

253

254 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

255 if not data and not self.has_unconsumed_tail:

256 return b""

257 if self._obj.eof:

258 data = self._obj.unused_data + data

259 self._obj = zstd.ZstdDecompressor()

260 part = self._obj.decompress(data, max_length=max_length)

261 length = len(part)

262 data_parts = [part]

263 # Every loop iteration is supposed to read data from a separate frame.

264 # The loop breaks when:

265 # - enough data is read;

266 # - no more unused data is available;

267 # - end of the last read frame has not been reached (i.e.,

268 # more data has to be fed).

269 while (

270 self._obj.eof

271 and self._obj.unused_data

272 and (max_length < 0 or length < max_length)

273 ):

274 unused_data = self._obj.unused_data

275 if not self._obj.needs_input:

276 self._obj = zstd.ZstdDecompressor()

277 part = self._obj.decompress(

278 unused_data,

279 max_length=(max_length - length) if max_length > 0 else -1,

280 )

281 if part_length := len(part):

282 data_parts.append(part)

283 length += part_length

284 elif self._obj.needs_input:

285 break

286 return b"".join(data_parts)

287

288 @property

289 def has_unconsumed_tail(self) -> bool:

290 return not (self._obj.needs_input or self._obj.eof) or bool(

291 self._obj.unused_data

292 )

293

294 def flush(self) -> bytes:

295 if not self._obj.eof:

296 raise DecodeError("Zstandard data is incomplete")

297 return b""

298

299

300class MultiDecoder(ContentDecoder):

301 """

302 From RFC7231:

303 If one or more encodings have been applied to a representation, the

304 sender that applied the encodings MUST generate a Content-Encoding

305 header field that lists the content codings in the order in which

306 they were applied.

307 """

308

309 # Maximum allowed number of chained HTTP encodings in the

310 # Content-Encoding header.

311 max_decode_links = 5

312

313 def __init__(self, modes: str) -> None:

314 encodings = [m.strip() for m in modes.split(",")]

315 if len(encodings) > self.max_decode_links:

316 raise DecodeError(

317 "Too many content encodings in the chain: "

318 f"{len(encodings)} > {self.max_decode_links}"

319 )

320 self._decoders = [_get_decoder(e) for e in encodings]

321

322 def flush(self) -> bytes:

323 return self._decoders[0].flush()

324

325 def decompress(self, data: bytes, max_length: int = -1) -> bytes:

326 if max_length <= 0:

327 for d in reversed(self._decoders):

328 data = d.decompress(data)

329 return data

330

331 ret = bytearray()

332 # Every while loop iteration goes through all decoders once.

333 # It exits when enough data is read or no more data can be read.

334 # It is possible that the while loop iteration does not produce

335 # any data because we retrieve up to `max_length` from every

336 # decoder, and the amount of bytes may be insufficient for the

337 # next decoder to produce enough/any output.

338 while True:

339 any_data = False

340 for d in reversed(self._decoders):

341 data = d.decompress(data, max_length=max_length - len(ret))

342 if data:

343 any_data = True

344 # We should not break when no data is returned because

345 # next decoders may produce data even with empty input.

346 ret += data

347 if not any_data or len(ret) >= max_length:

348 return bytes(ret)

349 data = b""

350

351 @property

352 def has_unconsumed_tail(self) -> bool:

353 return any(d.has_unconsumed_tail for d in self._decoders)

354

355

356def _get_decoder(mode: str) -> ContentDecoder:

357 if "," in mode:

358 return MultiDecoder(mode)

359

360 # According to RFC 9110 section 8.4.1.3, recipients should

361 # consider x-gzip equivalent to gzip

362 if mode in ("gzip", "x-gzip"):

363 return GzipDecoder()

364

365 if brotli is not None and mode == "br":

366 return BrotliDecoder()

367

368 if HAS_ZSTD and mode == "zstd":

369 return ZstdDecoder()

370

371 return DeflateDecoder()

372

373

374class BytesQueueBuffer:

375 """Memory-efficient bytes buffer

376

377 To return decoded data in read() and still follow the BufferedIOBase API, we need a

378 buffer to always return the correct amount of bytes.

379

380 This buffer should be filled using calls to put()

381

382 Our maximum memory usage is determined by the sum of the size of:

383

384 * self.buffer, which contains the full data

385 * the largest chunk that we will copy in get()

386 """

387

388 def __init__(self) -> None:

389 self.buffer: typing.Deque[bytes | memoryview[bytes]] = collections.deque()

390 self._size: int = 0

391

392 def __len__(self) -> int:

393 return self._size

394

395 def put(self, data: bytes) -> None:

396 self.buffer.append(data)

397 self._size += len(data)

398

399 def get(self, n: int) -> bytes:

400 if n == 0:

401 return b""

402 elif not self.buffer:

403 raise RuntimeError("buffer is empty")

404 elif n < 0:

405 raise ValueError("n should be > 0")

406

407 if len(self.buffer[0]) == n and isinstance(self.buffer[0], bytes):

408 self._size -= n

409 return self.buffer.popleft()

410

411 fetched = 0

412 ret = io.BytesIO()

413 while fetched < n:

414 remaining = n - fetched

415 chunk = self.buffer.popleft()

416 chunk_length = len(chunk)

417 if remaining < chunk_length:

418 chunk = memoryview(chunk)

419 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]

420 ret.write(left_chunk)

421 self.buffer.appendleft(right_chunk)

422 self._size -= remaining

423 break

424 else:

425 ret.write(chunk)

426 self._size -= chunk_length

427 fetched += chunk_length

428

429 if not self.buffer:

430 break

431

432 return ret.getvalue()

433

434 def get_all(self) -> bytes:

435 buffer = self.buffer

436 if not buffer:

437 assert self._size == 0

438 return b""

439 if len(buffer) == 1:

440 result = buffer.pop()

441 if isinstance(result, memoryview):

442 result = result.tobytes()

443 else:

444 ret = io.BytesIO()

445 ret.writelines(buffer.popleft() for _ in range(len(buffer)))

446 result = ret.getvalue()

447 self._size = 0

448 return result

449

450

451class BaseHTTPResponse(io.IOBase):

452 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]

453 if brotli is not None:

454 CONTENT_DECODERS += ["br"]

455 if HAS_ZSTD:

456 CONTENT_DECODERS += ["zstd"]

457 REDIRECT_STATUSES = [301, 302, 303, 307, 308]

458

459 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)

460 if brotli is not None:

461 DECODER_ERROR_CLASSES += (brotli.error,)

462

463 if HAS_ZSTD:

464 DECODER_ERROR_CLASSES += (zstd.ZstdError,)

465

466 def __init__(

467 self,

468 *,

469 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

470 status: int,

471 version: int,

472 version_string: str,

473 reason: str | None,

474 decode_content: bool,

475 request_url: str | None,

476 retries: Retry | None = None,

477 ) -> None:

478 if isinstance(headers, HTTPHeaderDict):

479 self.headers = headers

480 else:

481 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]

482 self.status = status

483 self.version = version

484 self.version_string = version_string

485 self.reason = reason

486 self.decode_content = decode_content

487 self._has_decoded_content = False

488 self._request_url: str | None = request_url

489 self.retries = retries

490

491 self.chunked = False

492 tr_enc = self.headers.get("transfer-encoding", "").lower()

493 # Don't incur the penalty of creating a list and then discarding it

494 encodings = (enc.strip() for enc in tr_enc.split(","))

495 if "chunked" in encodings:

496 self.chunked = True

497

498 self._decoder: ContentDecoder | None = None

499 self.length_remaining: int | None

500

501 def get_redirect_location(self) -> str | None | typing.Literal[False]:

502 """

503 Should we redirect and where to?

504

505 :returns: Truthy redirect location string if we got a redirect status

506 code and valid location. ``None`` if redirect status and no

507 location. ``False`` if not a redirect status code.

508 """

509 if self.status in self.REDIRECT_STATUSES:

510 return self.headers.get("location")

511 return False

512

513 @property

514 def data(self) -> bytes:

515 raise NotImplementedError()

516

517 def json(self) -> typing.Any:

518 """

519 Deserializes the body of the HTTP response as a Python object.

520

521 The body of the HTTP response must be encoded using UTF-8, as per

522 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.

523

524 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to

525 your custom decoder instead.

526

527 If the body of the HTTP response is not decodable to UTF-8, a

528 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a

529 valid JSON document, a `json.JSONDecodeError` will be raised.

530

531 Read more :ref:`here <json_content>`.

532

533 :returns: The body of the HTTP response as a Python object.

534 """

535 data = self.data.decode("utf-8")

536 return _json.loads(data)

537

538 @property

539 def url(self) -> str | None:

540 raise NotImplementedError()

541

542 @url.setter

543 def url(self, url: str | None) -> None:

544 raise NotImplementedError()

545

546 @property

547 def connection(self) -> BaseHTTPConnection | None:

548 raise NotImplementedError()

549

550 @property

551 def retries(self) -> Retry | None:

552 return self._retries

553

554 @retries.setter

555 def retries(self, retries: Retry | None) -> None:

556 # Override the request_url if retries has a redirect location.

557 if retries is not None and retries.history:

558 self.url = retries.history[-1].redirect_location

559 self._retries = retries

560

561 def stream(

562 self, amt: int | None = 2**16, decode_content: bool | None = None

563 ) -> typing.Iterator[bytes]:

564 raise NotImplementedError()

565

566 def read(

567 self,

568 amt: int | None = None,

569 decode_content: bool | None = None,

570 cache_content: bool = False,

571 ) -> bytes:

572 raise NotImplementedError()

573

574 def read1(

575 self,

576 amt: int | None = None,

577 decode_content: bool | None = None,

578 ) -> bytes:

579 raise NotImplementedError()

580

581 def read_chunked(

582 self,

583 amt: int | None = None,

584 decode_content: bool | None = None,

585 ) -> typing.Iterator[bytes]:

586 raise NotImplementedError()

587

588 def release_conn(self) -> None:

589 raise NotImplementedError()

590

591 def drain_conn(self) -> None:

592 raise NotImplementedError()

593

594 def shutdown(self) -> None:

595 raise NotImplementedError()

596

597 def close(self) -> None:

598 raise NotImplementedError()

599

600 def _init_decoder(self) -> None:

601 """

602 Set-up the _decoder attribute if necessary.

603 """

604 # Note: content-encoding value should be case-insensitive, per RFC 7230

605 # Section 3.2

606 content_encoding = self.headers.get("content-encoding", "").lower()

607 if self._decoder is None:

608 if content_encoding in self.CONTENT_DECODERS:

609 self._decoder = _get_decoder(content_encoding)

610 elif "," in content_encoding:

611 encodings = [

612 e.strip()

613 for e in content_encoding.split(",")

614 if e.strip() in self.CONTENT_DECODERS

615 ]

616 if encodings:

617 self._decoder = _get_decoder(content_encoding)

618

619 def _decode(

620 self,

621 data: bytes,

622 decode_content: bool | None,

623 flush_decoder: bool,

624 max_length: int | None = None,

625 ) -> bytes:

626 """

627 Decode the data passed in and potentially flush the decoder.

628 """

629 if not decode_content:

630 if self._has_decoded_content:

631 raise RuntimeError(

632 "Calling read(decode_content=False) is not supported after "

633 "read(decode_content=True) was called."

634 )

635 return data

636

637 if max_length is None or flush_decoder:

638 max_length = -1

639

640 try:

641 if self._decoder:

642 data = self._decoder.decompress(data, max_length=max_length)

643 self._has_decoded_content = True

644 except self.DECODER_ERROR_CLASSES as e:

645 content_encoding = self.headers.get("content-encoding", "").lower()

646 raise DecodeError(

647 "Received response with content-encoding: %s, but "

648 "failed to decode it." % content_encoding,

649 e,

650 ) from e

651 if flush_decoder:

652 data += self._flush_decoder()

653

654 return data

655

656 def _flush_decoder(self) -> bytes:

657 """

658 Flushes the decoder. Should only be called if the decoder is actually

659 being used.

660 """

661 if self._decoder:

662 return self._decoder.decompress(b"") + self._decoder.flush()

663 return b""

664

665 # Compatibility methods for `io` module

666 def readinto(self, b: bytearray) -> int:

667 temp = self.read(len(b))

668 if len(temp) == 0:

669 return 0

670 else:

671 b[: len(temp)] = temp

672 return len(temp)

673

674 # Compatibility method for http.cookiejar

675 def info(self) -> HTTPHeaderDict:

676 return self.headers

677

678 def geturl(self) -> str | None:

679 return self.url

680

681

682class HTTPResponse(BaseHTTPResponse):

683 """

684 HTTP Response container.

685

686 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is

687 loaded and decoded on-demand when the ``data`` property is accessed. This

688 class is also compatible with the Python standard library's :mod:`io`

689 module, and can hence be treated as a readable object in the context of that

690 framework.

691

692 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:

693

694 :param preload_content:

695 If True, the response's body will be preloaded during construction.

696

697 :param decode_content:

698 If True, will attempt to decode the body based on the

699 'content-encoding' header.

700

701 :param original_response:

702 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`

703 object, it's convenient to include the original for debug purposes. It's

704 otherwise unused.

705

706 :param retries:

707 The retries contains the last :class:`~urllib3.util.retry.Retry` that

708 was used during the request.

709

710 :param enforce_content_length:

711 Enforce content length checking. Body returned by server must match

712 value of Content-Length header, if present. Otherwise, raise error.

713 """

714

715 def __init__(

716 self,

717 body: _TYPE_BODY = "",

718 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,

719 status: int = 0,

720 version: int = 0,

721 version_string: str = "HTTP/?",

722 reason: str | None = None,

723 preload_content: bool = True,

724 decode_content: bool = True,

725 original_response: _HttplibHTTPResponse | None = None,

726 pool: HTTPConnectionPool | None = None,

727 connection: HTTPConnection | None = None,

728 msg: _HttplibHTTPMessage | None = None,

729 retries: Retry | None = None,

730 enforce_content_length: bool = True,

731 request_method: str | None = None,

732 request_url: str | None = None,

733 auto_close: bool = True,

734 sock_shutdown: typing.Callable[[int], None] | None = None,

735 ) -> None:

736 super().__init__(

737 headers=headers,

738 status=status,

739 version=version,

740 version_string=version_string,

741 reason=reason,

742 decode_content=decode_content,

743 request_url=request_url,

744 retries=retries,

745 )

746

747 self.enforce_content_length = enforce_content_length

748 self.auto_close = auto_close

749

750 self._body = None

751 self._fp: _HttplibHTTPResponse | None = None

752 self._original_response = original_response

753 self._fp_bytes_read = 0

754 self.msg = msg

755

756 if body and isinstance(body, (str, bytes)):

757 self._body = body

758

759 self._pool = pool

760 self._connection = connection

761

762 if hasattr(body, "read"):

763 self._fp = body # type: ignore[assignment]

764 self._sock_shutdown = sock_shutdown

765

766 # Are we using the chunked-style of transfer encoding?

767 self.chunk_left: int | None = None

768

769 # Determine length of response

770 self.length_remaining = self._init_length(request_method)

771

772 # Used to return the correct amount of bytes for partial read()s

773 self._decoded_buffer = BytesQueueBuffer()

774

775 # If requested, preload the body.

776 if preload_content and not self._body:

777 self._body = self.read(decode_content=decode_content)

778

779 def release_conn(self) -> None:

780 if not self._pool or not self._connection:

781 return None

782

783 self._pool._put_conn(self._connection)

784 self._connection = None

785

786 def drain_conn(self) -> None:

787 """

788 Read and discard any remaining HTTP response data in the response connection.

789

790 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.

791 """

792 try:

793 self.read()

794 except (HTTPError, OSError, BaseSSLError, HTTPException):

795 pass

796

797 @property

798 def data(self) -> bytes:

799 # For backwards-compat with earlier urllib3 0.4 and earlier.

800 if self._body:

801 return self._body # type: ignore[return-value]

802

803 if self._fp:

804 return self.read(cache_content=True)

805

806 return None # type: ignore[return-value]

807

808 @property

809 def connection(self) -> HTTPConnection | None:

810 return self._connection

811

812 def isclosed(self) -> bool:

813 return is_fp_closed(self._fp)

814

815 def tell(self) -> int:

816 """

817 Obtain the number of bytes pulled over the wire so far. May differ from

818 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``

819 if bytes are encoded on the wire (e.g, compressed).

820 """

821 return self._fp_bytes_read

822

823 def _init_length(self, request_method: str | None) -> int | None:

824 """

825 Set initial length value for Response content if available.

826 """

827 length: int | None

828 content_length: str | None = self.headers.get("content-length")

829

830 if content_length is not None:

831 if self.chunked:

832 # This Response will fail with an IncompleteRead if it can't be

833 # received as chunked. This method falls back to attempt reading

834 # the response before raising an exception.

835 log.warning(

836 "Received response with both Content-Length and "

837 "Transfer-Encoding set. This is expressly forbidden "

838 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "

839 "attempting to process response as Transfer-Encoding: "

840 "chunked."

841 )

842 return None

843

844 try:

845 # RFC 7230 section 3.3.2 specifies multiple content lengths can

846 # be sent in a single Content-Length header

847 # (e.g. Content-Length: 42, 42). This line ensures the values

848 # are all valid ints and that as long as the `set` length is 1,

849 # all values are the same. Otherwise, the header is invalid.

850 lengths = {int(val) for val in content_length.split(",")}

851 if len(lengths) > 1:

852 raise InvalidHeader(

853 "Content-Length contained multiple "

854 "unmatching values (%s)" % content_length

855 )

856 length = lengths.pop()

857 except ValueError:

858 length = None

859 else:

860 if length < 0:

861 length = None

862

863 else: # if content_length is None

864 length = None

865

866 # Convert status to int for comparison

867 # In some cases, httplib returns a status of "_UNKNOWN"

868 try:

869 status = int(self.status)

870 except ValueError:

871 status = 0

872

873 # Check for responses that shouldn't include a body

874 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":

875 length = 0

876

877 return length

878

879 @contextmanager

880 def _error_catcher(self) -> typing.Generator[None]:

881 """

882 Catch low-level python exceptions, instead re-raising urllib3

883 variants, so that low-level exceptions are not leaked in the

884 high-level api.

885

886 On exit, release the connection back to the pool.

887 """

888 clean_exit = False

889

890 try:

891 try:

892 yield

893

894 except SocketTimeout as e:

895 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but

896 # there is yet no clean way to get at it from this context.

897 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

898

899 except BaseSSLError as e:

900 # FIXME: Is there a better way to differentiate between SSLErrors?

901 if "read operation timed out" not in str(e):

902 # SSL errors related to framing/MAC get wrapped and reraised here

903 raise SSLError(e) from e

904

905 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]

906

907 except IncompleteRead as e:

908 if (

909 e.expected is not None

910 and e.partial is not None

911 and e.expected == -e.partial

912 ):

913 arg = "Response may not contain content."

914 else:

915 arg = f"Connection broken: {e!r}"

916 raise ProtocolError(arg, e) from e

917

918 except (HTTPException, OSError) as e:

919 raise ProtocolError(f"Connection broken: {e!r}", e) from e

920

921 # If no exception is thrown, we should avoid cleaning up

922 # unnecessarily.

923 clean_exit = True

924 finally:

925 # If we didn't terminate cleanly, we need to throw away our

926 # connection.

927 if not clean_exit:

928 # The response may not be closed but we're not going to use it

929 # anymore so close it now to ensure that the connection is

930 # released back to the pool.

931 if self._original_response:

932 self._original_response.close()

933

934 # Closing the response may not actually be sufficient to close

935 # everything, so if we have a hold of the connection close that

936 # too.

937 if self._connection:

938 self._connection.close()

939

940 # If we hold the original response but it's closed now, we should

941 # return the connection back to the pool.

942 if self._original_response and self._original_response.isclosed():

943 self.release_conn()

944

945 def _fp_read(

946 self,

947 amt: int | None = None,

948 *,

949 read1: bool = False,

950 ) -> bytes:

951 """

952 Read a response with the thought that reading the number of bytes

953 larger than can fit in a 32-bit int at a time via SSL in some

954 known cases leads to an overflow error that has to be prevented

955 if `amt` or `self.length_remaining` indicate that a problem may

956 happen.

957

958 The known cases:

959 * CPython < 3.9.7 because of a bug

960 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.

961 * urllib3 injected with pyOpenSSL-backed SSL-support.

962 * CPython < 3.10 only when `amt` does not fit 32-bit int.

963 """

964 assert self._fp

965 c_int_max = 2**31 - 1

966 if (

967 (amt and amt > c_int_max)

968 or (

969 amt is None

970 and self.length_remaining

971 and self.length_remaining > c_int_max

972 )

973 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):

974 if read1:

975 return self._fp.read1(c_int_max)

976 buffer = io.BytesIO()

977 # Besides `max_chunk_amt` being a maximum chunk size, it

978 # affects memory overhead of reading a response by this

979 # method in CPython.

980 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum

981 # chunk size that does not lead to an overflow error, but

982 # 256 MiB is a compromise.

983 max_chunk_amt = 2**28

984 while amt is None or amt != 0:

985 if amt is not None:

986 chunk_amt = min(amt, max_chunk_amt)

987 amt -= chunk_amt

988 else:

989 chunk_amt = max_chunk_amt

990 data = self._fp.read(chunk_amt)

991 if not data:

992 break

993 buffer.write(data)

994 del data # to reduce peak memory usage by `max_chunk_amt`.

995 return buffer.getvalue()

996 elif read1:

997 return self._fp.read1(amt) if amt is not None else self._fp.read1()

998 else:

999 # StringIO doesn't like amt=None

1000 return self._fp.read(amt) if amt is not None else self._fp.read()

1001

1002 def _raw_read(

1003 self,

1004 amt: int | None = None,

1005 *,

1006 read1: bool = False,

1007 ) -> bytes:

1008 """

1009 Reads `amt` of bytes from the socket.

1010 """

1011 if self._fp is None:

1012 return None # type: ignore[return-value]

1013

1014 fp_closed = getattr(self._fp, "closed", False)

1015

1016 with self._error_catcher():

1017 data = self._fp_read(amt, read1=read1) if not fp_closed else b""

1018 if amt is not None and amt != 0 and not data:

1019 # Platform-specific: Buggy versions of Python.

1020 # Close the connection when no data is returned

1021 #

1022 # This is redundant to what httplib/http.client _should_

1023 # already do. However, versions of python released before

1024 # December 15, 2012 (http://bugs.python.org/issue16298) do

1025 # not properly close the connection in all cases. There is

1026 # no harm in redundantly calling close.

1027 self._fp.close()

1028 if (

1029 self.enforce_content_length

1030 and self.length_remaining is not None

1031 and self.length_remaining != 0

1032 ):

1033 # This is an edge case that httplib failed to cover due

1034 # to concerns of backward compatibility. We're

1035 # addressing it here to make sure IncompleteRead is

1036 # raised during streaming, so all calls with incorrect

1037 # Content-Length are caught.

1038 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)

1039 elif read1 and (

1040 (amt != 0 and not data) or self.length_remaining == len(data)

1041 ):

1042 # All data has been read, but `self._fp.read1` in

1043 # CPython 3.12 and older doesn't always close

1044 # `http.client.HTTPResponse`, so we close it here.

1045 # See https://github.com/python/cpython/issues/113199

1046 self._fp.close()

1047

1048 if data:

1049 self._fp_bytes_read += len(data)

1050 if self.length_remaining is not None:

1051 self.length_remaining -= len(data)

1052 return data

1053

1054 def read(

1055 self,

1056 amt: int | None = None,

1057 decode_content: bool | None = None,

1058 cache_content: bool = False,

1059 ) -> bytes:

1060 """

1061 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional

1062 parameters: ``decode_content`` and ``cache_content``.

1063

1064 :param amt:

1065 How much of the content to read. If specified, caching is skipped

1066 because it doesn't make sense to cache partial content as the full

1067 response.

1068

1069 :param decode_content:

1070 If True, will attempt to decode the body based on the

1071 'content-encoding' header.

1072

1073 :param cache_content:

1074 If True, will save the returned data such that the same result is

1075 returned despite of the state of the underlying file object. This

1076 is useful if you want the ``.data`` property to continue working

1077 after having ``.read()`` the file object. (Overridden if ``amt`` is

1078 set.)

1079 """

1080 self._init_decoder()

1081 if decode_content is None:

1082 decode_content = self.decode_content

1083

1084 if amt and amt < 0:

1085 # Negative numbers and `None` should be treated the same.

1086 amt = None

1087 elif amt is not None:

1088 cache_content = False

1089

1090 if self._decoder and self._decoder.has_unconsumed_tail:

1091 decoded_data = self._decode(

1092 b"",

1093 decode_content,

1094 flush_decoder=False,

1095 max_length=amt - len(self._decoded_buffer),

1096 )

1097 self._decoded_buffer.put(decoded_data)

1098 if len(self._decoded_buffer) >= amt:

1099 return self._decoded_buffer.get(amt)

1100

1101 data = self._raw_read(amt)

1102

1103 flush_decoder = amt is None or (amt != 0 and not data)

1104

1105 if (

1106 not data

1107 and len(self._decoded_buffer) == 0

1108 and not (self._decoder and self._decoder.has_unconsumed_tail)

1109 ):

1110 return data

1111

1112 if amt is None:

1113 data = self._decode(data, decode_content, flush_decoder)

1114 if cache_content:

1115 self._body = data

1116 else:

1117 # do not waste memory on buffer when not decoding

1118 if not decode_content:

1119 if self._has_decoded_content:

1120 raise RuntimeError(

1121 "Calling read(decode_content=False) is not supported after "

1122 "read(decode_content=True) was called."

1123 )

1124 return data

1125

1126 decoded_data = self._decode(

1127 data,

1128 decode_content,

1129 flush_decoder,

1130 max_length=amt - len(self._decoded_buffer),

1131 )

1132 self._decoded_buffer.put(decoded_data)

1133

1134 while len(self._decoded_buffer) < amt and data:

1135 # TODO make sure to initially read enough data to get past the headers

1136 # For example, the GZ file header takes 10 bytes, we don't want to read

1137 # it one byte at a time

1138 data = self._raw_read(amt)

1139 decoded_data = self._decode(

1140 data,

1141 decode_content,

1142 flush_decoder,

1143 max_length=amt - len(self._decoded_buffer),

1144 )

1145 self._decoded_buffer.put(decoded_data)

1146 data = self._decoded_buffer.get(amt)

1147

1148 return data

1149

1150 def read1(

1151 self,

1152 amt: int | None = None,

1153 decode_content: bool | None = None,

1154 ) -> bytes:

1155 """

1156 Similar to ``http.client.HTTPResponse.read1`` and documented

1157 in :meth:`io.BufferedReader.read1`, but with an additional parameter:

1158 ``decode_content``.

1159

1160 :param amt:

1161 How much of the content to read.

1162

1163 :param decode_content:

1164 If True, will attempt to decode the body based on the

1165 'content-encoding' header.

1166 """

1167 if decode_content is None:

1168 decode_content = self.decode_content

1169 if amt and amt < 0:

1170 # Negative numbers and `None` should be treated the same.

1171 amt = None

1172 # try and respond without going to the network

1173 if self._has_decoded_content:

1174 if not decode_content:

1175 raise RuntimeError(

1176 "Calling read1(decode_content=False) is not supported after "

1177 "read1(decode_content=True) was called."

1178 )

1179 if (

1180 self._decoder

1181 and self._decoder.has_unconsumed_tail

1182 and (amt is None or len(self._decoded_buffer) < amt)

1183 ):

1184 decoded_data = self._decode(

1185 b"",

1186 decode_content,

1187 flush_decoder=False,

1188 max_length=(

1189 amt - len(self._decoded_buffer) if amt is not None else None

1190 ),

1191 )

1192 self._decoded_buffer.put(decoded_data)

1193 if len(self._decoded_buffer) > 0:

1194 if amt is None:

1195 return self._decoded_buffer.get_all()

1196 return self._decoded_buffer.get(amt)

1197 if amt == 0:

1198 return b""

1199

1200 # FIXME, this method's type doesn't say returning None is possible

1201 data = self._raw_read(amt, read1=True)

1202 if not decode_content or data is None:

1203 return data

1204

1205 self._init_decoder()

1206 while True:

1207 flush_decoder = not data

1208 decoded_data = self._decode(

1209 data, decode_content, flush_decoder, max_length=amt

1210 )

1211 self._decoded_buffer.put(decoded_data)

1212 if decoded_data or flush_decoder:

1213 break

1214 data = self._raw_read(8192, read1=True)

1215

1216 if amt is None:

1217 return self._decoded_buffer.get_all()

1218 return self._decoded_buffer.get(amt)

1219

1220 def stream(

1221 self, amt: int | None = 2**16, decode_content: bool | None = None

1222 ) -> typing.Generator[bytes]:

1223 """

1224 A generator wrapper for the read() method. A call will block until

1225 ``amt`` bytes have been read from the connection or until the

1226 connection is closed.

1227

1228 :param amt:

1229 How much of the content to read. The generator will return up to

1230 much data per iteration, but may return less. This is particularly

1231 likely when using compressed data. However, the empty string will

1232 never be returned.

1233

1234 :param decode_content:

1235 If True, will attempt to decode the body based on the

1236 'content-encoding' header.

1237 """

1238 if self.chunked and self.supports_chunked_reads():

1239 yield from self.read_chunked(amt, decode_content=decode_content)

1240 else:

1241 while (

1242 not is_fp_closed(self._fp)

1243 or len(self._decoded_buffer) > 0

1244 or (self._decoder and self._decoder.has_unconsumed_tail)

1245 ):

1246 data = self.read(amt=amt, decode_content=decode_content)

1247

1248 if data:

1249 yield data

1250

1251 # Overrides from io.IOBase

1252 def readable(self) -> bool:

1253 return True

1254

1255 def shutdown(self) -> None:

1256 if not self._sock_shutdown:

1257 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")

1258 if self._connection is None:

1259 raise RuntimeError(

1260 "Cannot shutdown as connection has already been released to the pool"

1261 )

1262 self._sock_shutdown(socket.SHUT_RD)

1263

1264 def close(self) -> None:

1265 self._sock_shutdown = None

1266

1267 if not self.closed and self._fp:

1268 self._fp.close()

1269

1270 if self._connection:

1271 self._connection.close()

1272

1273 if not self.auto_close:

1274 io.IOBase.close(self)

1275

1276 @property

1277 def closed(self) -> bool:

1278 if not self.auto_close:

1279 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]

1280 elif self._fp is None:

1281 return True

1282 elif hasattr(self._fp, "isclosed"):

1283 return self._fp.isclosed()

1284 elif hasattr(self._fp, "closed"):

1285 return self._fp.closed

1286 else:

1287 return True

1288

1289 def fileno(self) -> int:

1290 if self._fp is None:

1291 raise OSError("HTTPResponse has no file to get a fileno from")

1292 elif hasattr(self._fp, "fileno"):

1293 return self._fp.fileno()

1294 else:

1295 raise OSError(

1296 "The file-like object this HTTPResponse is wrapped "

1297 "around has no file descriptor"

1298 )

1299

1300 def flush(self) -> None:

1301 if (

1302 self._fp is not None

1303 and hasattr(self._fp, "flush")

1304 and not getattr(self._fp, "closed", False)

1305 ):

1306 return self._fp.flush()

1307

1308 def supports_chunked_reads(self) -> bool:

1309 """

1310 Checks if the underlying file-like object looks like a

1311 :class:`http.client.HTTPResponse` object. We do this by testing for

1312 the fp attribute. If it is present we assume it returns raw chunks as

1313 processed by read_chunked().

1314 """

1315 return hasattr(self._fp, "fp")

1316

1317 def _update_chunk_length(self) -> None:

1318 # First, we'll figure out length of a chunk and then

1319 # we'll try to read it from socket.

1320 if self.chunk_left is not None:

1321 return None

1322 line = self._fp.fp.readline() # type: ignore[union-attr]

1323 line = line.split(b";", 1)[0]

1324 try:

1325 self.chunk_left = int(line, 16)

1326 except ValueError:

1327 self.close()

1328 if line:

1329 # Invalid chunked protocol response, abort.

1330 raise InvalidChunkLength(self, line) from None

1331 else:

1332 # Truncated at start of next chunk

1333 raise ProtocolError("Response ended prematurely") from None

1334

1335 def _handle_chunk(self, amt: int | None) -> bytes:

1336 returned_chunk = None

1337 if amt is None:

1338 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1339 returned_chunk = chunk

1340 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1341 self.chunk_left = None

1342 elif self.chunk_left is not None and amt < self.chunk_left:

1343 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1344 self.chunk_left = self.chunk_left - amt

1345 returned_chunk = value

1346 elif amt == self.chunk_left:

1347 value = self._fp._safe_read(amt) # type: ignore[union-attr]

1348 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1349 self.chunk_left = None

1350 returned_chunk = value

1351 else: # amt > self.chunk_left

1352 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]

1353 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.

1354 self.chunk_left = None

1355 return returned_chunk # type: ignore[no-any-return]

1356

1357 def read_chunked(

1358 self, amt: int | None = None, decode_content: bool | None = None

1359 ) -> typing.Generator[bytes]:

1360 """

1361 Similar to :meth:`HTTPResponse.read`, but with an additional

1362 parameter: ``decode_content``.

1363

1364 :param amt:

1365 How much of the content to read. If specified, caching is skipped

1366 because it doesn't make sense to cache partial content as the full

1367 response.

1368

1369 :param decode_content:

1370 If True, will attempt to decode the body based on the

1371 'content-encoding' header.

1372 """

1373 self._init_decoder()

1374 # FIXME: Rewrite this method and make it a class with a better structured logic.

1375 if not self.chunked:

1376 raise ResponseNotChunked(

1377 "Response is not chunked. "

1378 "Header 'transfer-encoding: chunked' is missing."

1379 )

1380 if not self.supports_chunked_reads():

1381 raise BodyNotHttplibCompatible(

1382 "Body should be http.client.HTTPResponse like. "

1383 "It should have have an fp attribute which returns raw chunks."

1384 )

1385

1386 with self._error_catcher():

1387 # Don't bother reading the body of a HEAD request.

1388 if self._original_response and is_response_to_head(self._original_response):

1389 self._original_response.close()

1390 return None

1391

1392 # If a response is already read and closed

1393 # then return immediately.

1394 if self._fp.fp is None: # type: ignore[union-attr]

1395 return None

1396

1397 if amt and amt < 0:

1398 # Negative numbers and `None` should be treated the same,

1399 # but httplib handles only `None` correctly.

1400 amt = None

1401

1402 while True:

1403 self._update_chunk_length()

1404 if self.chunk_left == 0:

1405 break

1406 chunk = self._handle_chunk(amt)

1407 decoded = self._decode(

1408 chunk,

1409 decode_content=decode_content,

1410 flush_decoder=False,

1411 max_length=amt,

1412 )

1413 if decoded:

1414 yield decoded

1415

1416 if decode_content:

1417 # On CPython and PyPy, we should never need to flush the

1418 # decoder. However, on Jython we *might* need to, so

1419 # lets defensively do it anyway.

1420 decoded = self._flush_decoder()

1421 if decoded: # Platform-specific: Jython.

1422 yield decoded

1423

1424 # Chunk content ends with \r\n: discard it.

1425 while self._fp is not None:

1426 line = self._fp.fp.readline()

1427 if not line:

1428 # Some sites may not end with '\r\n'.

1429 break

1430 if line == b"\r\n":

1431 break

1432

1433 # We read everything; close the "file".

1434 if self._original_response:

1435 self._original_response.close()

1436

1437 @property

1438 def url(self) -> str | None:

1439 """

1440 Returns the URL that was the source of this response.

1441 If the request that generated this response redirected, this method

1442 will return the final redirect location.

1443 """

1444 return self._request_url

1445

1446 @url.setter

1447 def url(self, url: str | None) -> None:

1448 self._request_url = url

1449

1450 def __iter__(self) -> typing.Iterator[bytes]:

1451 buffer: list[bytes] = []

1452 for chunk in self.stream(decode_content=True):

1453 if b"\n" in chunk:

1454 chunks = chunk.split(b"\n")

1455 yield b"".join(buffer) + chunks[0] + b"\n"

1456 for x in chunks[1:-1]:

1457 yield x + b"\n"

1458 if chunks[-1]:

1459 buffer = [chunks[-1]]

1460 else:

1461 buffer = []

1462 else:

1463 buffer.append(chunk)

1464 if buffer:

1465 yield b"".join(buffer)