Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/urllib3/response.py: 21%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import collections
4import io
5import json as _json
6import logging
7import socket
8import sys
9import typing
10import zlib
11from contextlib import contextmanager
12from http.client import HTTPMessage as _HttplibHTTPMessage
13from http.client import HTTPResponse as _HttplibHTTPResponse
14from socket import timeout as SocketTimeout
16if typing.TYPE_CHECKING:
17 from ._base_connection import BaseHTTPConnection
19try:
20 try:
21 import brotlicffi as brotli # type: ignore[import-not-found]
22 except ImportError:
23 import brotli # type: ignore[import-not-found]
24except ImportError:
25 brotli = None
27from . import util
28from ._base_connection import _TYPE_BODY
29from ._collections import HTTPHeaderDict
30from .connection import BaseSSLError, HTTPConnection, HTTPException
31from .exceptions import (
32 BodyNotHttplibCompatible,
33 DecodeError,
34 HTTPError,
35 IncompleteRead,
36 InvalidChunkLength,
37 InvalidHeader,
38 ProtocolError,
39 ReadTimeoutError,
40 ResponseNotChunked,
41 SSLError,
42)
43from .util.response import is_fp_closed, is_response_to_head
44from .util.retry import Retry
46if typing.TYPE_CHECKING:
47 from .connectionpool import HTTPConnectionPool
49log = logging.getLogger(__name__)
52class ContentDecoder:
53 def decompress(self, data: bytes) -> bytes:
54 raise NotImplementedError()
56 def flush(self) -> bytes:
57 raise NotImplementedError()
60class DeflateDecoder(ContentDecoder):
61 def __init__(self) -> None:
62 self._first_try = True
63 self._data = b""
64 self._obj = zlib.decompressobj()
66 def decompress(self, data: bytes) -> bytes:
67 if not data:
68 return data
70 if not self._first_try:
71 return self._obj.decompress(data)
73 self._data += data
74 try:
75 decompressed = self._obj.decompress(data)
76 if decompressed:
77 self._first_try = False
78 self._data = None # type: ignore[assignment]
79 return decompressed
80 except zlib.error:
81 self._first_try = False
82 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
83 try:
84 return self.decompress(self._data)
85 finally:
86 self._data = None # type: ignore[assignment]
88 def flush(self) -> bytes:
89 return self._obj.flush()
92class GzipDecoderState:
93 FIRST_MEMBER = 0
94 OTHER_MEMBERS = 1
95 SWALLOW_DATA = 2
98class GzipDecoder(ContentDecoder):
99 def __init__(self) -> None:
100 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
101 self._state = GzipDecoderState.FIRST_MEMBER
103 def decompress(self, data: bytes) -> bytes:
104 ret = bytearray()
105 if self._state == GzipDecoderState.SWALLOW_DATA or not data:
106 return bytes(ret)
107 while True:
108 try:
109 ret += self._obj.decompress(data)
110 except zlib.error:
111 previous_state = self._state
112 # Ignore data after the first error
113 self._state = GzipDecoderState.SWALLOW_DATA
114 if previous_state == GzipDecoderState.OTHER_MEMBERS:
115 # Allow trailing garbage acceptable in other gzip clients
116 return bytes(ret)
117 raise
118 data = self._obj.unused_data
119 if not data:
120 return bytes(ret)
121 self._state = GzipDecoderState.OTHER_MEMBERS
122 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
124 def flush(self) -> bytes:
125 return self._obj.flush()
128if brotli is not None:
130 class BrotliDecoder(ContentDecoder):
131 # Supports both 'brotlipy' and 'Brotli' packages
132 # since they share an import name. The top branches
133 # are for 'brotlipy' and bottom branches for 'Brotli'
134 def __init__(self) -> None:
135 self._obj = brotli.Decompressor()
136 if hasattr(self._obj, "decompress"):
137 setattr(self, "decompress", self._obj.decompress)
138 else:
139 setattr(self, "decompress", self._obj.process)
141 def flush(self) -> bytes:
142 if hasattr(self._obj, "flush"):
143 return self._obj.flush() # type: ignore[no-any-return]
144 return b""
147try:
148 if sys.version_info >= (3, 14):
149 from compression import zstd
150 else:
151 from backports import zstd
152except ImportError:
153 HAS_ZSTD = False
154else:
155 HAS_ZSTD = True
157 class ZstdDecoder(ContentDecoder):
158 def __init__(self) -> None:
159 self._obj = zstd.ZstdDecompressor()
161 def decompress(self, data: bytes) -> bytes:
162 if not data:
163 return b""
164 data_parts = [self._obj.decompress(data)]
165 while self._obj.eof and self._obj.unused_data:
166 unused_data = self._obj.unused_data
167 self._obj = zstd.ZstdDecompressor()
168 data_parts.append(self._obj.decompress(unused_data))
169 return b"".join(data_parts)
171 def flush(self) -> bytes:
172 if not self._obj.eof:
173 raise DecodeError("Zstandard data is incomplete")
174 return b""
177class MultiDecoder(ContentDecoder):
178 """
179 From RFC7231:
180 If one or more encodings have been applied to a representation, the
181 sender that applied the encodings MUST generate a Content-Encoding
182 header field that lists the content codings in the order in which
183 they were applied.
184 """
186 def __init__(self, modes: str) -> None:
187 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
189 def flush(self) -> bytes:
190 return self._decoders[0].flush()
192 def decompress(self, data: bytes) -> bytes:
193 for d in reversed(self._decoders):
194 data = d.decompress(data)
195 return data
198def _get_decoder(mode: str) -> ContentDecoder:
199 if "," in mode:
200 return MultiDecoder(mode)
202 # According to RFC 9110 section 8.4.1.3, recipients should
203 # consider x-gzip equivalent to gzip
204 if mode in ("gzip", "x-gzip"):
205 return GzipDecoder()
207 if brotli is not None and mode == "br":
208 return BrotliDecoder()
210 if HAS_ZSTD and mode == "zstd":
211 return ZstdDecoder()
213 return DeflateDecoder()
216class BytesQueueBuffer:
217 """Memory-efficient bytes buffer
219 To return decoded data in read() and still follow the BufferedIOBase API, we need a
220 buffer to always return the correct amount of bytes.
222 This buffer should be filled using calls to put()
224 Our maximum memory usage is determined by the sum of the size of:
226 * self.buffer, which contains the full data
227 * the largest chunk that we will copy in get()
229 The worst case scenario is a single chunk, in which case we'll make a full copy of
230 the data inside get().
231 """
233 def __init__(self) -> None:
234 self.buffer: typing.Deque[bytes] = collections.deque()
235 self._size: int = 0
237 def __len__(self) -> int:
238 return self._size
240 def put(self, data: bytes) -> None:
241 self.buffer.append(data)
242 self._size += len(data)
244 def get(self, n: int) -> bytes:
245 if n == 0:
246 return b""
247 elif not self.buffer:
248 raise RuntimeError("buffer is empty")
249 elif n < 0:
250 raise ValueError("n should be > 0")
252 fetched = 0
253 ret = io.BytesIO()
254 while fetched < n:
255 remaining = n - fetched
256 chunk = self.buffer.popleft()
257 chunk_length = len(chunk)
258 if remaining < chunk_length:
259 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
260 ret.write(left_chunk)
261 self.buffer.appendleft(right_chunk)
262 self._size -= remaining
263 break
264 else:
265 ret.write(chunk)
266 self._size -= chunk_length
267 fetched += chunk_length
269 if not self.buffer:
270 break
272 return ret.getvalue()
274 def get_all(self) -> bytes:
275 buffer = self.buffer
276 if not buffer:
277 assert self._size == 0
278 return b""
279 if len(buffer) == 1:
280 result = buffer.pop()
281 else:
282 ret = io.BytesIO()
283 ret.writelines(buffer.popleft() for _ in range(len(buffer)))
284 result = ret.getvalue()
285 self._size = 0
286 return result
289class BaseHTTPResponse(io.IOBase):
290 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]
291 if brotli is not None:
292 CONTENT_DECODERS += ["br"]
293 if HAS_ZSTD:
294 CONTENT_DECODERS += ["zstd"]
295 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
297 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
298 if brotli is not None:
299 DECODER_ERROR_CLASSES += (brotli.error,)
301 if HAS_ZSTD:
302 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
304 def __init__(
305 self,
306 *,
307 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
308 status: int,
309 version: int,
310 version_string: str,
311 reason: str | None,
312 decode_content: bool,
313 request_url: str | None,
314 retries: Retry | None = None,
315 ) -> None:
316 if isinstance(headers, HTTPHeaderDict):
317 self.headers = headers
318 else:
319 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
320 self.status = status
321 self.version = version
322 self.version_string = version_string
323 self.reason = reason
324 self.decode_content = decode_content
325 self._has_decoded_content = False
326 self._request_url: str | None = request_url
327 self.retries = retries
329 self.chunked = False
330 tr_enc = self.headers.get("transfer-encoding", "").lower()
331 # Don't incur the penalty of creating a list and then discarding it
332 encodings = (enc.strip() for enc in tr_enc.split(","))
333 if "chunked" in encodings:
334 self.chunked = True
336 self._decoder: ContentDecoder | None = None
337 self.length_remaining: int | None
339 def get_redirect_location(self) -> str | None | typing.Literal[False]:
340 """
341 Should we redirect and where to?
343 :returns: Truthy redirect location string if we got a redirect status
344 code and valid location. ``None`` if redirect status and no
345 location. ``False`` if not a redirect status code.
346 """
347 if self.status in self.REDIRECT_STATUSES:
348 return self.headers.get("location")
349 return False
351 @property
352 def data(self) -> bytes:
353 raise NotImplementedError()
355 def json(self) -> typing.Any:
356 """
357 Deserializes the body of the HTTP response as a Python object.
359 The body of the HTTP response must be encoded using UTF-8, as per
360 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.
362 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to
363 your custom decoder instead.
365 If the body of the HTTP response is not decodable to UTF-8, a
366 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a
367 valid JSON document, a `json.JSONDecodeError` will be raised.
369 Read more :ref:`here <json_content>`.
371 :returns: The body of the HTTP response as a Python object.
372 """
373 data = self.data.decode("utf-8")
374 return _json.loads(data)
376 @property
377 def url(self) -> str | None:
378 raise NotImplementedError()
380 @url.setter
381 def url(self, url: str | None) -> None:
382 raise NotImplementedError()
384 @property
385 def connection(self) -> BaseHTTPConnection | None:
386 raise NotImplementedError()
388 @property
389 def retries(self) -> Retry | None:
390 return self._retries
392 @retries.setter
393 def retries(self, retries: Retry | None) -> None:
394 # Override the request_url if retries has a redirect location.
395 if retries is not None and retries.history:
396 self.url = retries.history[-1].redirect_location
397 self._retries = retries
399 def stream(
400 self, amt: int | None = 2**16, decode_content: bool | None = None
401 ) -> typing.Iterator[bytes]:
402 raise NotImplementedError()
404 def read(
405 self,
406 amt: int | None = None,
407 decode_content: bool | None = None,
408 cache_content: bool = False,
409 ) -> bytes:
410 raise NotImplementedError()
412 def read1(
413 self,
414 amt: int | None = None,
415 decode_content: bool | None = None,
416 ) -> bytes:
417 raise NotImplementedError()
419 def read_chunked(
420 self,
421 amt: int | None = None,
422 decode_content: bool | None = None,
423 ) -> typing.Iterator[bytes]:
424 raise NotImplementedError()
426 def release_conn(self) -> None:
427 raise NotImplementedError()
429 def drain_conn(self) -> None:
430 raise NotImplementedError()
432 def shutdown(self) -> None:
433 raise NotImplementedError()
435 def close(self) -> None:
436 raise NotImplementedError()
438 def _init_decoder(self) -> None:
439 """
440 Set-up the _decoder attribute if necessary.
441 """
442 # Note: content-encoding value should be case-insensitive, per RFC 7230
443 # Section 3.2
444 content_encoding = self.headers.get("content-encoding", "").lower()
445 if self._decoder is None:
446 if content_encoding in self.CONTENT_DECODERS:
447 self._decoder = _get_decoder(content_encoding)
448 elif "," in content_encoding:
449 encodings = [
450 e.strip()
451 for e in content_encoding.split(",")
452 if e.strip() in self.CONTENT_DECODERS
453 ]
454 if encodings:
455 self._decoder = _get_decoder(content_encoding)
457 def _decode(
458 self, data: bytes, decode_content: bool | None, flush_decoder: bool
459 ) -> bytes:
460 """
461 Decode the data passed in and potentially flush the decoder.
462 """
463 if not decode_content:
464 if self._has_decoded_content:
465 raise RuntimeError(
466 "Calling read(decode_content=False) is not supported after "
467 "read(decode_content=True) was called."
468 )
469 return data
471 try:
472 if self._decoder:
473 data = self._decoder.decompress(data)
474 self._has_decoded_content = True
475 except self.DECODER_ERROR_CLASSES as e:
476 content_encoding = self.headers.get("content-encoding", "").lower()
477 raise DecodeError(
478 "Received response with content-encoding: %s, but "
479 "failed to decode it." % content_encoding,
480 e,
481 ) from e
482 if flush_decoder:
483 data += self._flush_decoder()
485 return data
487 def _flush_decoder(self) -> bytes:
488 """
489 Flushes the decoder. Should only be called if the decoder is actually
490 being used.
491 """
492 if self._decoder:
493 return self._decoder.decompress(b"") + self._decoder.flush()
494 return b""
496 # Compatibility methods for `io` module
497 def readinto(self, b: bytearray) -> int:
498 temp = self.read(len(b))
499 if len(temp) == 0:
500 return 0
501 else:
502 b[: len(temp)] = temp
503 return len(temp)
505 # Compatibility method for http.cookiejar
506 def info(self) -> HTTPHeaderDict:
507 return self.headers
509 def geturl(self) -> str | None:
510 return self.url
513class HTTPResponse(BaseHTTPResponse):
514 """
515 HTTP Response container.
517 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
518 loaded and decoded on-demand when the ``data`` property is accessed. This
519 class is also compatible with the Python standard library's :mod:`io`
520 module, and can hence be treated as a readable object in the context of that
521 framework.
523 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
525 :param preload_content:
526 If True, the response's body will be preloaded during construction.
528 :param decode_content:
529 If True, will attempt to decode the body based on the
530 'content-encoding' header.
532 :param original_response:
533 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
534 object, it's convenient to include the original for debug purposes. It's
535 otherwise unused.
537 :param retries:
538 The retries contains the last :class:`~urllib3.util.retry.Retry` that
539 was used during the request.
541 :param enforce_content_length:
542 Enforce content length checking. Body returned by server must match
543 value of Content-Length header, if present. Otherwise, raise error.
544 """
546 def __init__(
547 self,
548 body: _TYPE_BODY = "",
549 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
550 status: int = 0,
551 version: int = 0,
552 version_string: str = "HTTP/?",
553 reason: str | None = None,
554 preload_content: bool = True,
555 decode_content: bool = True,
556 original_response: _HttplibHTTPResponse | None = None,
557 pool: HTTPConnectionPool | None = None,
558 connection: HTTPConnection | None = None,
559 msg: _HttplibHTTPMessage | None = None,
560 retries: Retry | None = None,
561 enforce_content_length: bool = True,
562 request_method: str | None = None,
563 request_url: str | None = None,
564 auto_close: bool = True,
565 sock_shutdown: typing.Callable[[int], None] | None = None,
566 ) -> None:
567 super().__init__(
568 headers=headers,
569 status=status,
570 version=version,
571 version_string=version_string,
572 reason=reason,
573 decode_content=decode_content,
574 request_url=request_url,
575 retries=retries,
576 )
578 self.enforce_content_length = enforce_content_length
579 self.auto_close = auto_close
581 self._body = None
582 self._fp: _HttplibHTTPResponse | None = None
583 self._original_response = original_response
584 self._fp_bytes_read = 0
585 self.msg = msg
587 if body and isinstance(body, (str, bytes)):
588 self._body = body
590 self._pool = pool
591 self._connection = connection
593 if hasattr(body, "read"):
594 self._fp = body # type: ignore[assignment]
595 self._sock_shutdown = sock_shutdown
597 # Are we using the chunked-style of transfer encoding?
598 self.chunk_left: int | None = None
600 # Determine length of response
601 self.length_remaining = self._init_length(request_method)
603 # Used to return the correct amount of bytes for partial read()s
604 self._decoded_buffer = BytesQueueBuffer()
606 # If requested, preload the body.
607 if preload_content and not self._body:
608 self._body = self.read(decode_content=decode_content)
610 def release_conn(self) -> None:
611 if not self._pool or not self._connection:
612 return None
614 self._pool._put_conn(self._connection)
615 self._connection = None
617 def drain_conn(self) -> None:
618 """
619 Read and discard any remaining HTTP response data in the response connection.
621 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
622 """
623 try:
624 self.read()
625 except (HTTPError, OSError, BaseSSLError, HTTPException):
626 pass
628 @property
629 def data(self) -> bytes:
630 # For backwards-compat with earlier urllib3 0.4 and earlier.
631 if self._body:
632 return self._body # type: ignore[return-value]
634 if self._fp:
635 return self.read(cache_content=True)
637 return None # type: ignore[return-value]
639 @property
640 def connection(self) -> HTTPConnection | None:
641 return self._connection
643 def isclosed(self) -> bool:
644 return is_fp_closed(self._fp)
646 def tell(self) -> int:
647 """
648 Obtain the number of bytes pulled over the wire so far. May differ from
649 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
650 if bytes are encoded on the wire (e.g, compressed).
651 """
652 return self._fp_bytes_read
654 def _init_length(self, request_method: str | None) -> int | None:
655 """
656 Set initial length value for Response content if available.
657 """
658 length: int | None
659 content_length: str | None = self.headers.get("content-length")
661 if content_length is not None:
662 if self.chunked:
663 # This Response will fail with an IncompleteRead if it can't be
664 # received as chunked. This method falls back to attempt reading
665 # the response before raising an exception.
666 log.warning(
667 "Received response with both Content-Length and "
668 "Transfer-Encoding set. This is expressly forbidden "
669 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
670 "attempting to process response as Transfer-Encoding: "
671 "chunked."
672 )
673 return None
675 try:
676 # RFC 7230 section 3.3.2 specifies multiple content lengths can
677 # be sent in a single Content-Length header
678 # (e.g. Content-Length: 42, 42). This line ensures the values
679 # are all valid ints and that as long as the `set` length is 1,
680 # all values are the same. Otherwise, the header is invalid.
681 lengths = {int(val) for val in content_length.split(",")}
682 if len(lengths) > 1:
683 raise InvalidHeader(
684 "Content-Length contained multiple "
685 "unmatching values (%s)" % content_length
686 )
687 length = lengths.pop()
688 except ValueError:
689 length = None
690 else:
691 if length < 0:
692 length = None
694 else: # if content_length is None
695 length = None
697 # Convert status to int for comparison
698 # In some cases, httplib returns a status of "_UNKNOWN"
699 try:
700 status = int(self.status)
701 except ValueError:
702 status = 0
704 # Check for responses that shouldn't include a body
705 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
706 length = 0
708 return length
710 @contextmanager
711 def _error_catcher(self) -> typing.Generator[None]:
712 """
713 Catch low-level python exceptions, instead re-raising urllib3
714 variants, so that low-level exceptions are not leaked in the
715 high-level api.
717 On exit, release the connection back to the pool.
718 """
719 clean_exit = False
721 try:
722 try:
723 yield
725 except SocketTimeout as e:
726 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
727 # there is yet no clean way to get at it from this context.
728 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
730 except BaseSSLError as e:
731 # FIXME: Is there a better way to differentiate between SSLErrors?
732 if "read operation timed out" not in str(e):
733 # SSL errors related to framing/MAC get wrapped and reraised here
734 raise SSLError(e) from e
736 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
738 except IncompleteRead as e:
739 if (
740 e.expected is not None
741 and e.partial is not None
742 and e.expected == -e.partial
743 ):
744 arg = "Response may not contain content."
745 else:
746 arg = f"Connection broken: {e!r}"
747 raise ProtocolError(arg, e) from e
749 except (HTTPException, OSError) as e:
750 raise ProtocolError(f"Connection broken: {e!r}", e) from e
752 # If no exception is thrown, we should avoid cleaning up
753 # unnecessarily.
754 clean_exit = True
755 finally:
756 # If we didn't terminate cleanly, we need to throw away our
757 # connection.
758 if not clean_exit:
759 # The response may not be closed but we're not going to use it
760 # anymore so close it now to ensure that the connection is
761 # released back to the pool.
762 if self._original_response:
763 self._original_response.close()
765 # Closing the response may not actually be sufficient to close
766 # everything, so if we have a hold of the connection close that
767 # too.
768 if self._connection:
769 self._connection.close()
771 # If we hold the original response but it's closed now, we should
772 # return the connection back to the pool.
773 if self._original_response and self._original_response.isclosed():
774 self.release_conn()
776 def _fp_read(
777 self,
778 amt: int | None = None,
779 *,
780 read1: bool = False,
781 ) -> bytes:
782 """
783 Read a response with the thought that reading the number of bytes
784 larger than can fit in a 32-bit int at a time via SSL in some
785 known cases leads to an overflow error that has to be prevented
786 if `amt` or `self.length_remaining` indicate that a problem may
787 happen.
789 The known cases:
790 * CPython < 3.9.7 because of a bug
791 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.
792 * urllib3 injected with pyOpenSSL-backed SSL-support.
793 * CPython < 3.10 only when `amt` does not fit 32-bit int.
794 """
795 assert self._fp
796 c_int_max = 2**31 - 1
797 if (
798 (amt and amt > c_int_max)
799 or (
800 amt is None
801 and self.length_remaining
802 and self.length_remaining > c_int_max
803 )
804 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):
805 if read1:
806 return self._fp.read1(c_int_max)
807 buffer = io.BytesIO()
808 # Besides `max_chunk_amt` being a maximum chunk size, it
809 # affects memory overhead of reading a response by this
810 # method in CPython.
811 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
812 # chunk size that does not lead to an overflow error, but
813 # 256 MiB is a compromise.
814 max_chunk_amt = 2**28
815 while amt is None or amt != 0:
816 if amt is not None:
817 chunk_amt = min(amt, max_chunk_amt)
818 amt -= chunk_amt
819 else:
820 chunk_amt = max_chunk_amt
821 data = self._fp.read(chunk_amt)
822 if not data:
823 break
824 buffer.write(data)
825 del data # to reduce peak memory usage by `max_chunk_amt`.
826 return buffer.getvalue()
827 elif read1:
828 return self._fp.read1(amt) if amt is not None else self._fp.read1()
829 else:
830 # StringIO doesn't like amt=None
831 return self._fp.read(amt) if amt is not None else self._fp.read()
833 def _raw_read(
834 self,
835 amt: int | None = None,
836 *,
837 read1: bool = False,
838 ) -> bytes:
839 """
840 Reads `amt` of bytes from the socket.
841 """
842 if self._fp is None:
843 return None # type: ignore[return-value]
845 fp_closed = getattr(self._fp, "closed", False)
847 with self._error_catcher():
848 data = self._fp_read(amt, read1=read1) if not fp_closed else b""
849 if amt is not None and amt != 0 and not data:
850 # Platform-specific: Buggy versions of Python.
851 # Close the connection when no data is returned
852 #
853 # This is redundant to what httplib/http.client _should_
854 # already do. However, versions of python released before
855 # December 15, 2012 (http://bugs.python.org/issue16298) do
856 # not properly close the connection in all cases. There is
857 # no harm in redundantly calling close.
858 self._fp.close()
859 if (
860 self.enforce_content_length
861 and self.length_remaining is not None
862 and self.length_remaining != 0
863 ):
864 # This is an edge case that httplib failed to cover due
865 # to concerns of backward compatibility. We're
866 # addressing it here to make sure IncompleteRead is
867 # raised during streaming, so all calls with incorrect
868 # Content-Length are caught.
869 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
870 elif read1 and (
871 (amt != 0 and not data) or self.length_remaining == len(data)
872 ):
873 # All data has been read, but `self._fp.read1` in
874 # CPython 3.12 and older doesn't always close
875 # `http.client.HTTPResponse`, so we close it here.
876 # See https://github.com/python/cpython/issues/113199
877 self._fp.close()
879 if data:
880 self._fp_bytes_read += len(data)
881 if self.length_remaining is not None:
882 self.length_remaining -= len(data)
883 return data
885 def read(
886 self,
887 amt: int | None = None,
888 decode_content: bool | None = None,
889 cache_content: bool = False,
890 ) -> bytes:
891 """
892 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
893 parameters: ``decode_content`` and ``cache_content``.
895 :param amt:
896 How much of the content to read. If specified, caching is skipped
897 because it doesn't make sense to cache partial content as the full
898 response.
900 :param decode_content:
901 If True, will attempt to decode the body based on the
902 'content-encoding' header.
904 :param cache_content:
905 If True, will save the returned data such that the same result is
906 returned despite of the state of the underlying file object. This
907 is useful if you want the ``.data`` property to continue working
908 after having ``.read()`` the file object. (Overridden if ``amt`` is
909 set.)
910 """
911 self._init_decoder()
912 if decode_content is None:
913 decode_content = self.decode_content
915 if amt and amt < 0:
916 # Negative numbers and `None` should be treated the same.
917 amt = None
918 elif amt is not None:
919 cache_content = False
921 if len(self._decoded_buffer) >= amt:
922 return self._decoded_buffer.get(amt)
924 data = self._raw_read(amt)
926 flush_decoder = amt is None or (amt != 0 and not data)
928 if not data and len(self._decoded_buffer) == 0:
929 return data
931 if amt is None:
932 data = self._decode(data, decode_content, flush_decoder)
933 if cache_content:
934 self._body = data
935 else:
936 # do not waste memory on buffer when not decoding
937 if not decode_content:
938 if self._has_decoded_content:
939 raise RuntimeError(
940 "Calling read(decode_content=False) is not supported after "
941 "read(decode_content=True) was called."
942 )
943 return data
945 decoded_data = self._decode(data, decode_content, flush_decoder)
946 self._decoded_buffer.put(decoded_data)
948 while len(self._decoded_buffer) < amt and data:
949 # TODO make sure to initially read enough data to get past the headers
950 # For example, the GZ file header takes 10 bytes, we don't want to read
951 # it one byte at a time
952 data = self._raw_read(amt)
953 decoded_data = self._decode(data, decode_content, flush_decoder)
954 self._decoded_buffer.put(decoded_data)
955 data = self._decoded_buffer.get(amt)
957 return data
959 def read1(
960 self,
961 amt: int | None = None,
962 decode_content: bool | None = None,
963 ) -> bytes:
964 """
965 Similar to ``http.client.HTTPResponse.read1`` and documented
966 in :meth:`io.BufferedReader.read1`, but with an additional parameter:
967 ``decode_content``.
969 :param amt:
970 How much of the content to read.
972 :param decode_content:
973 If True, will attempt to decode the body based on the
974 'content-encoding' header.
975 """
976 if decode_content is None:
977 decode_content = self.decode_content
978 if amt and amt < 0:
979 # Negative numbers and `None` should be treated the same.
980 amt = None
981 # try and respond without going to the network
982 if self._has_decoded_content:
983 if not decode_content:
984 raise RuntimeError(
985 "Calling read1(decode_content=False) is not supported after "
986 "read1(decode_content=True) was called."
987 )
988 if len(self._decoded_buffer) > 0:
989 if amt is None:
990 return self._decoded_buffer.get_all()
991 return self._decoded_buffer.get(amt)
992 if amt == 0:
993 return b""
995 # FIXME, this method's type doesn't say returning None is possible
996 data = self._raw_read(amt, read1=True)
997 if not decode_content or data is None:
998 return data
1000 self._init_decoder()
1001 while True:
1002 flush_decoder = not data
1003 decoded_data = self._decode(data, decode_content, flush_decoder)
1004 self._decoded_buffer.put(decoded_data)
1005 if decoded_data or flush_decoder:
1006 break
1007 data = self._raw_read(8192, read1=True)
1009 if amt is None:
1010 return self._decoded_buffer.get_all()
1011 return self._decoded_buffer.get(amt)
1013 def stream(
1014 self, amt: int | None = 2**16, decode_content: bool | None = None
1015 ) -> typing.Generator[bytes]:
1016 """
1017 A generator wrapper for the read() method. A call will block until
1018 ``amt`` bytes have been read from the connection or until the
1019 connection is closed.
1021 :param amt:
1022 How much of the content to read. The generator will return up to
1023 much data per iteration, but may return less. This is particularly
1024 likely when using compressed data. However, the empty string will
1025 never be returned.
1027 :param decode_content:
1028 If True, will attempt to decode the body based on the
1029 'content-encoding' header.
1030 """
1031 if self.chunked and self.supports_chunked_reads():
1032 yield from self.read_chunked(amt, decode_content=decode_content)
1033 else:
1034 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
1035 data = self.read(amt=amt, decode_content=decode_content)
1037 if data:
1038 yield data
1040 # Overrides from io.IOBase
1041 def readable(self) -> bool:
1042 return True
1044 def shutdown(self) -> None:
1045 if not self._sock_shutdown:
1046 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")
1047 if self._connection is None:
1048 raise RuntimeError(
1049 "Cannot shutdown as connection has already been released to the pool"
1050 )
1051 self._sock_shutdown(socket.SHUT_RD)
1053 def close(self) -> None:
1054 self._sock_shutdown = None
1056 if not self.closed and self._fp:
1057 self._fp.close()
1059 if self._connection:
1060 self._connection.close()
1062 if not self.auto_close:
1063 io.IOBase.close(self)
1065 @property
1066 def closed(self) -> bool:
1067 if not self.auto_close:
1068 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
1069 elif self._fp is None:
1070 return True
1071 elif hasattr(self._fp, "isclosed"):
1072 return self._fp.isclosed()
1073 elif hasattr(self._fp, "closed"):
1074 return self._fp.closed
1075 else:
1076 return True
1078 def fileno(self) -> int:
1079 if self._fp is None:
1080 raise OSError("HTTPResponse has no file to get a fileno from")
1081 elif hasattr(self._fp, "fileno"):
1082 return self._fp.fileno()
1083 else:
1084 raise OSError(
1085 "The file-like object this HTTPResponse is wrapped "
1086 "around has no file descriptor"
1087 )
1089 def flush(self) -> None:
1090 if (
1091 self._fp is not None
1092 and hasattr(self._fp, "flush")
1093 and not getattr(self._fp, "closed", False)
1094 ):
1095 return self._fp.flush()
1097 def supports_chunked_reads(self) -> bool:
1098 """
1099 Checks if the underlying file-like object looks like a
1100 :class:`http.client.HTTPResponse` object. We do this by testing for
1101 the fp attribute. If it is present we assume it returns raw chunks as
1102 processed by read_chunked().
1103 """
1104 return hasattr(self._fp, "fp")
1106 def _update_chunk_length(self) -> None:
1107 # First, we'll figure out length of a chunk and then
1108 # we'll try to read it from socket.
1109 if self.chunk_left is not None:
1110 return None
1111 line = self._fp.fp.readline() # type: ignore[union-attr]
1112 line = line.split(b";", 1)[0]
1113 try:
1114 self.chunk_left = int(line, 16)
1115 except ValueError:
1116 self.close()
1117 if line:
1118 # Invalid chunked protocol response, abort.
1119 raise InvalidChunkLength(self, line) from None
1120 else:
1121 # Truncated at start of next chunk
1122 raise ProtocolError("Response ended prematurely") from None
1124 def _handle_chunk(self, amt: int | None) -> bytes:
1125 returned_chunk = None
1126 if amt is None:
1127 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1128 returned_chunk = chunk
1129 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1130 self.chunk_left = None
1131 elif self.chunk_left is not None and amt < self.chunk_left:
1132 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1133 self.chunk_left = self.chunk_left - amt
1134 returned_chunk = value
1135 elif amt == self.chunk_left:
1136 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1137 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1138 self.chunk_left = None
1139 returned_chunk = value
1140 else: # amt > self.chunk_left
1141 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1142 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1143 self.chunk_left = None
1144 return returned_chunk # type: ignore[no-any-return]
1146 def read_chunked(
1147 self, amt: int | None = None, decode_content: bool | None = None
1148 ) -> typing.Generator[bytes]:
1149 """
1150 Similar to :meth:`HTTPResponse.read`, but with an additional
1151 parameter: ``decode_content``.
1153 :param amt:
1154 How much of the content to read. If specified, caching is skipped
1155 because it doesn't make sense to cache partial content as the full
1156 response.
1158 :param decode_content:
1159 If True, will attempt to decode the body based on the
1160 'content-encoding' header.
1161 """
1162 self._init_decoder()
1163 # FIXME: Rewrite this method and make it a class with a better structured logic.
1164 if not self.chunked:
1165 raise ResponseNotChunked(
1166 "Response is not chunked. "
1167 "Header 'transfer-encoding: chunked' is missing."
1168 )
1169 if not self.supports_chunked_reads():
1170 raise BodyNotHttplibCompatible(
1171 "Body should be http.client.HTTPResponse like. "
1172 "It should have have an fp attribute which returns raw chunks."
1173 )
1175 with self._error_catcher():
1176 # Don't bother reading the body of a HEAD request.
1177 if self._original_response and is_response_to_head(self._original_response):
1178 self._original_response.close()
1179 return None
1181 # If a response is already read and closed
1182 # then return immediately.
1183 if self._fp.fp is None: # type: ignore[union-attr]
1184 return None
1186 if amt and amt < 0:
1187 # Negative numbers and `None` should be treated the same,
1188 # but httplib handles only `None` correctly.
1189 amt = None
1191 while True:
1192 self._update_chunk_length()
1193 if self.chunk_left == 0:
1194 break
1195 chunk = self._handle_chunk(amt)
1196 decoded = self._decode(
1197 chunk, decode_content=decode_content, flush_decoder=False
1198 )
1199 if decoded:
1200 yield decoded
1202 if decode_content:
1203 # On CPython and PyPy, we should never need to flush the
1204 # decoder. However, on Jython we *might* need to, so
1205 # lets defensively do it anyway.
1206 decoded = self._flush_decoder()
1207 if decoded: # Platform-specific: Jython.
1208 yield decoded
1210 # Chunk content ends with \r\n: discard it.
1211 while self._fp is not None:
1212 line = self._fp.fp.readline()
1213 if not line:
1214 # Some sites may not end with '\r\n'.
1215 break
1216 if line == b"\r\n":
1217 break
1219 # We read everything; close the "file".
1220 if self._original_response:
1221 self._original_response.close()
1223 @property
1224 def url(self) -> str | None:
1225 """
1226 Returns the URL that was the source of this response.
1227 If the request that generated this response redirected, this method
1228 will return the final redirect location.
1229 """
1230 return self._request_url
1232 @url.setter
1233 def url(self, url: str | None) -> None:
1234 self._request_url = url
1236 def __iter__(self) -> typing.Iterator[bytes]:
1237 buffer: list[bytes] = []
1238 for chunk in self.stream(decode_content=True):
1239 if b"\n" in chunk:
1240 chunks = chunk.split(b"\n")
1241 yield b"".join(buffer) + chunks[0] + b"\n"
1242 for x in chunks[1:-1]:
1243 yield x + b"\n"
1244 if chunks[-1]:
1245 buffer = [chunks[-1]]
1246 else:
1247 buffer = []
1248 else:
1249 buffer.append(chunk)
1250 if buffer:
1251 yield b"".join(buffer)