Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/urllib3/response.py: 22%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import collections
4import io
5import json as _json
6import logging
7import re
8import socket
9import sys
10import typing
11import warnings
12import zlib
13from contextlib import contextmanager
14from http.client import HTTPMessage as _HttplibHTTPMessage
15from http.client import HTTPResponse as _HttplibHTTPResponse
16from socket import timeout as SocketTimeout
18if typing.TYPE_CHECKING:
19 from ._base_connection import BaseHTTPConnection
21try:
22 try:
23 import brotlicffi as brotli # type: ignore[import-not-found]
24 except ImportError:
25 import brotli # type: ignore[import-not-found]
26except ImportError:
27 brotli = None
29try:
30 import zstandard as zstd
31except (AttributeError, ImportError, ValueError): # Defensive:
32 HAS_ZSTD = False
33else:
34 # The package 'zstandard' added the 'eof' property starting
35 # in v0.18.0 which we require to ensure a complete and
36 # valid zstd stream was fed into the ZstdDecoder.
37 # See: https://github.com/urllib3/urllib3/pull/2624
38 _zstd_version = tuple(
39 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]
40 )
41 if _zstd_version < (0, 18): # Defensive:
42 HAS_ZSTD = False
43 else:
44 HAS_ZSTD = True
46from . import util
47from ._base_connection import _TYPE_BODY
48from ._collections import HTTPHeaderDict
49from .connection import BaseSSLError, HTTPConnection, HTTPException
50from .exceptions import (
51 BodyNotHttplibCompatible,
52 DecodeError,
53 HTTPError,
54 IncompleteRead,
55 InvalidChunkLength,
56 InvalidHeader,
57 ProtocolError,
58 ReadTimeoutError,
59 ResponseNotChunked,
60 SSLError,
61)
62from .util.response import is_fp_closed, is_response_to_head
63from .util.retry import Retry
65if typing.TYPE_CHECKING:
66 from .connectionpool import HTTPConnectionPool
68log = logging.getLogger(__name__)
71class ContentDecoder:
72 def decompress(self, data: bytes) -> bytes:
73 raise NotImplementedError()
75 def flush(self) -> bytes:
76 raise NotImplementedError()
79class DeflateDecoder(ContentDecoder):
80 def __init__(self) -> None:
81 self._first_try = True
82 self._data = b""
83 self._obj = zlib.decompressobj()
85 def decompress(self, data: bytes) -> bytes:
86 if not data:
87 return data
89 if not self._first_try:
90 return self._obj.decompress(data)
92 self._data += data
93 try:
94 decompressed = self._obj.decompress(data)
95 if decompressed:
96 self._first_try = False
97 self._data = None # type: ignore[assignment]
98 return decompressed
99 except zlib.error:
100 self._first_try = False
101 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
102 try:
103 return self.decompress(self._data)
104 finally:
105 self._data = None # type: ignore[assignment]
107 def flush(self) -> bytes:
108 return self._obj.flush()
111class GzipDecoderState:
112 FIRST_MEMBER = 0
113 OTHER_MEMBERS = 1
114 SWALLOW_DATA = 2
117class GzipDecoder(ContentDecoder):
118 def __init__(self) -> None:
119 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
120 self._state = GzipDecoderState.FIRST_MEMBER
122 def decompress(self, data: bytes) -> bytes:
123 ret = bytearray()
124 if self._state == GzipDecoderState.SWALLOW_DATA or not data:
125 return bytes(ret)
126 while True:
127 try:
128 ret += self._obj.decompress(data)
129 except zlib.error:
130 previous_state = self._state
131 # Ignore data after the first error
132 self._state = GzipDecoderState.SWALLOW_DATA
133 if previous_state == GzipDecoderState.OTHER_MEMBERS:
134 # Allow trailing garbage acceptable in other gzip clients
135 return bytes(ret)
136 raise
137 data = self._obj.unused_data
138 if not data:
139 return bytes(ret)
140 self._state = GzipDecoderState.OTHER_MEMBERS
141 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
143 def flush(self) -> bytes:
144 return self._obj.flush()
147if brotli is not None:
149 class BrotliDecoder(ContentDecoder):
150 # Supports both 'brotlipy' and 'Brotli' packages
151 # since they share an import name. The top branches
152 # are for 'brotlipy' and bottom branches for 'Brotli'
153 def __init__(self) -> None:
154 self._obj = brotli.Decompressor()
155 if hasattr(self._obj, "decompress"):
156 setattr(self, "decompress", self._obj.decompress)
157 else:
158 setattr(self, "decompress", self._obj.process)
160 def flush(self) -> bytes:
161 if hasattr(self._obj, "flush"):
162 return self._obj.flush() # type: ignore[no-any-return]
163 return b""
166if HAS_ZSTD:
168 class ZstdDecoder(ContentDecoder):
169 def __init__(self) -> None:
170 self._obj = zstd.ZstdDecompressor().decompressobj()
172 def decompress(self, data: bytes) -> bytes:
173 if not data:
174 return b""
175 data_parts = [self._obj.decompress(data)]
176 while self._obj.eof and self._obj.unused_data:
177 unused_data = self._obj.unused_data
178 self._obj = zstd.ZstdDecompressor().decompressobj()
179 data_parts.append(self._obj.decompress(unused_data))
180 return b"".join(data_parts)
182 def flush(self) -> bytes:
183 ret = self._obj.flush() # note: this is a no-op
184 if not self._obj.eof:
185 raise DecodeError("Zstandard data is incomplete")
186 return ret
189class MultiDecoder(ContentDecoder):
190 """
191 From RFC7231:
192 If one or more encodings have been applied to a representation, the
193 sender that applied the encodings MUST generate a Content-Encoding
194 header field that lists the content codings in the order in which
195 they were applied.
196 """
198 def __init__(self, modes: str) -> None:
199 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
201 def flush(self) -> bytes:
202 return self._decoders[0].flush()
204 def decompress(self, data: bytes) -> bytes:
205 for d in reversed(self._decoders):
206 data = d.decompress(data)
207 return data
210def _get_decoder(mode: str) -> ContentDecoder:
211 if "," in mode:
212 return MultiDecoder(mode)
214 # According to RFC 9110 section 8.4.1.3, recipients should
215 # consider x-gzip equivalent to gzip
216 if mode in ("gzip", "x-gzip"):
217 return GzipDecoder()
219 if brotli is not None and mode == "br":
220 return BrotliDecoder()
222 if HAS_ZSTD and mode == "zstd":
223 return ZstdDecoder()
225 return DeflateDecoder()
228class BytesQueueBuffer:
229 """Memory-efficient bytes buffer
231 To return decoded data in read() and still follow the BufferedIOBase API, we need a
232 buffer to always return the correct amount of bytes.
234 This buffer should be filled using calls to put()
236 Our maximum memory usage is determined by the sum of the size of:
238 * self.buffer, which contains the full data
239 * the largest chunk that we will copy in get()
241 The worst case scenario is a single chunk, in which case we'll make a full copy of
242 the data inside get().
243 """
245 def __init__(self) -> None:
246 self.buffer: typing.Deque[bytes] = collections.deque()
247 self._size: int = 0
249 def __len__(self) -> int:
250 return self._size
252 def put(self, data: bytes) -> None:
253 self.buffer.append(data)
254 self._size += len(data)
256 def get(self, n: int) -> bytes:
257 if n == 0:
258 return b""
259 elif not self.buffer:
260 raise RuntimeError("buffer is empty")
261 elif n < 0:
262 raise ValueError("n should be > 0")
264 fetched = 0
265 ret = io.BytesIO()
266 while fetched < n:
267 remaining = n - fetched
268 chunk = self.buffer.popleft()
269 chunk_length = len(chunk)
270 if remaining < chunk_length:
271 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
272 ret.write(left_chunk)
273 self.buffer.appendleft(right_chunk)
274 self._size -= remaining
275 break
276 else:
277 ret.write(chunk)
278 self._size -= chunk_length
279 fetched += chunk_length
281 if not self.buffer:
282 break
284 return ret.getvalue()
286 def get_all(self) -> bytes:
287 buffer = self.buffer
288 if not buffer:
289 assert self._size == 0
290 return b""
291 if len(buffer) == 1:
292 result = buffer.pop()
293 else:
294 ret = io.BytesIO()
295 ret.writelines(buffer.popleft() for _ in range(len(buffer)))
296 result = ret.getvalue()
297 self._size = 0
298 return result
301class BaseHTTPResponse(io.IOBase):
302 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]
303 if brotli is not None:
304 CONTENT_DECODERS += ["br"]
305 if HAS_ZSTD:
306 CONTENT_DECODERS += ["zstd"]
307 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
309 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
310 if brotli is not None:
311 DECODER_ERROR_CLASSES += (brotli.error,)
313 if HAS_ZSTD:
314 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
316 def __init__(
317 self,
318 *,
319 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
320 status: int,
321 version: int,
322 version_string: str,
323 reason: str | None,
324 decode_content: bool,
325 request_url: str | None,
326 retries: Retry | None = None,
327 ) -> None:
328 if isinstance(headers, HTTPHeaderDict):
329 self.headers = headers
330 else:
331 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
332 self.status = status
333 self.version = version
334 self.version_string = version_string
335 self.reason = reason
336 self.decode_content = decode_content
337 self._has_decoded_content = False
338 self._request_url: str | None = request_url
339 self.retries = retries
341 self.chunked = False
342 tr_enc = self.headers.get("transfer-encoding", "").lower()
343 # Don't incur the penalty of creating a list and then discarding it
344 encodings = (enc.strip() for enc in tr_enc.split(","))
345 if "chunked" in encodings:
346 self.chunked = True
348 self._decoder: ContentDecoder | None = None
349 self.length_remaining: int | None
351 def get_redirect_location(self) -> str | None | typing.Literal[False]:
352 """
353 Should we redirect and where to?
355 :returns: Truthy redirect location string if we got a redirect status
356 code and valid location. ``None`` if redirect status and no
357 location. ``False`` if not a redirect status code.
358 """
359 if self.status in self.REDIRECT_STATUSES:
360 return self.headers.get("location")
361 return False
363 @property
364 def data(self) -> bytes:
365 raise NotImplementedError()
367 def json(self) -> typing.Any:
368 """
369 Deserializes the body of the HTTP response as a Python object.
371 The body of the HTTP response must be encoded using UTF-8, as per
372 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.
374 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to
375 your custom decoder instead.
377 If the body of the HTTP response is not decodable to UTF-8, a
378 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a
379 valid JSON document, a `json.JSONDecodeError` will be raised.
381 Read more :ref:`here <json_content>`.
383 :returns: The body of the HTTP response as a Python object.
384 """
385 data = self.data.decode("utf-8")
386 return _json.loads(data)
388 @property
389 def url(self) -> str | None:
390 raise NotImplementedError()
392 @url.setter
393 def url(self, url: str | None) -> None:
394 raise NotImplementedError()
396 @property
397 def connection(self) -> BaseHTTPConnection | None:
398 raise NotImplementedError()
400 @property
401 def retries(self) -> Retry | None:
402 return self._retries
404 @retries.setter
405 def retries(self, retries: Retry | None) -> None:
406 # Override the request_url if retries has a redirect location.
407 if retries is not None and retries.history:
408 self.url = retries.history[-1].redirect_location
409 self._retries = retries
411 def stream(
412 self, amt: int | None = 2**16, decode_content: bool | None = None
413 ) -> typing.Iterator[bytes]:
414 raise NotImplementedError()
416 def read(
417 self,
418 amt: int | None = None,
419 decode_content: bool | None = None,
420 cache_content: bool = False,
421 ) -> bytes:
422 raise NotImplementedError()
424 def read1(
425 self,
426 amt: int | None = None,
427 decode_content: bool | None = None,
428 ) -> bytes:
429 raise NotImplementedError()
431 def read_chunked(
432 self,
433 amt: int | None = None,
434 decode_content: bool | None = None,
435 ) -> typing.Iterator[bytes]:
436 raise NotImplementedError()
438 def release_conn(self) -> None:
439 raise NotImplementedError()
441 def drain_conn(self) -> None:
442 raise NotImplementedError()
444 def shutdown(self) -> None:
445 raise NotImplementedError()
447 def close(self) -> None:
448 raise NotImplementedError()
450 def _init_decoder(self) -> None:
451 """
452 Set-up the _decoder attribute if necessary.
453 """
454 # Note: content-encoding value should be case-insensitive, per RFC 7230
455 # Section 3.2
456 content_encoding = self.headers.get("content-encoding", "").lower()
457 if self._decoder is None:
458 if content_encoding in self.CONTENT_DECODERS:
459 self._decoder = _get_decoder(content_encoding)
460 elif "," in content_encoding:
461 encodings = [
462 e.strip()
463 for e in content_encoding.split(",")
464 if e.strip() in self.CONTENT_DECODERS
465 ]
466 if encodings:
467 self._decoder = _get_decoder(content_encoding)
469 def _decode(
470 self, data: bytes, decode_content: bool | None, flush_decoder: bool
471 ) -> bytes:
472 """
473 Decode the data passed in and potentially flush the decoder.
474 """
475 if not decode_content:
476 if self._has_decoded_content:
477 raise RuntimeError(
478 "Calling read(decode_content=False) is not supported after "
479 "read(decode_content=True) was called."
480 )
481 return data
483 try:
484 if self._decoder:
485 data = self._decoder.decompress(data)
486 self._has_decoded_content = True
487 except self.DECODER_ERROR_CLASSES as e:
488 content_encoding = self.headers.get("content-encoding", "").lower()
489 raise DecodeError(
490 "Received response with content-encoding: %s, but "
491 "failed to decode it." % content_encoding,
492 e,
493 ) from e
494 if flush_decoder:
495 data += self._flush_decoder()
497 return data
499 def _flush_decoder(self) -> bytes:
500 """
501 Flushes the decoder. Should only be called if the decoder is actually
502 being used.
503 """
504 if self._decoder:
505 return self._decoder.decompress(b"") + self._decoder.flush()
506 return b""
508 # Compatibility methods for `io` module
509 def readinto(self, b: bytearray) -> int:
510 temp = self.read(len(b))
511 if len(temp) == 0:
512 return 0
513 else:
514 b[: len(temp)] = temp
515 return len(temp)
517 # Compatibility methods for http.client.HTTPResponse
518 def getheaders(self) -> HTTPHeaderDict:
519 warnings.warn(
520 "HTTPResponse.getheaders() is deprecated and will be removed "
521 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",
522 category=DeprecationWarning,
523 stacklevel=2,
524 )
525 return self.headers
527 def getheader(self, name: str, default: str | None = None) -> str | None:
528 warnings.warn(
529 "HTTPResponse.getheader() is deprecated and will be removed "
530 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",
531 category=DeprecationWarning,
532 stacklevel=2,
533 )
534 return self.headers.get(name, default)
536 # Compatibility method for http.cookiejar
537 def info(self) -> HTTPHeaderDict:
538 return self.headers
540 def geturl(self) -> str | None:
541 return self.url
544class HTTPResponse(BaseHTTPResponse):
545 """
546 HTTP Response container.
548 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
549 loaded and decoded on-demand when the ``data`` property is accessed. This
550 class is also compatible with the Python standard library's :mod:`io`
551 module, and can hence be treated as a readable object in the context of that
552 framework.
554 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
556 :param preload_content:
557 If True, the response's body will be preloaded during construction.
559 :param decode_content:
560 If True, will attempt to decode the body based on the
561 'content-encoding' header.
563 :param original_response:
564 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
565 object, it's convenient to include the original for debug purposes. It's
566 otherwise unused.
568 :param retries:
569 The retries contains the last :class:`~urllib3.util.retry.Retry` that
570 was used during the request.
572 :param enforce_content_length:
573 Enforce content length checking. Body returned by server must match
574 value of Content-Length header, if present. Otherwise, raise error.
575 """
577 def __init__(
578 self,
579 body: _TYPE_BODY = "",
580 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
581 status: int = 0,
582 version: int = 0,
583 version_string: str = "HTTP/?",
584 reason: str | None = None,
585 preload_content: bool = True,
586 decode_content: bool = True,
587 original_response: _HttplibHTTPResponse | None = None,
588 pool: HTTPConnectionPool | None = None,
589 connection: HTTPConnection | None = None,
590 msg: _HttplibHTTPMessage | None = None,
591 retries: Retry | None = None,
592 enforce_content_length: bool = True,
593 request_method: str | None = None,
594 request_url: str | None = None,
595 auto_close: bool = True,
596 sock_shutdown: typing.Callable[[int], None] | None = None,
597 ) -> None:
598 super().__init__(
599 headers=headers,
600 status=status,
601 version=version,
602 version_string=version_string,
603 reason=reason,
604 decode_content=decode_content,
605 request_url=request_url,
606 retries=retries,
607 )
609 self.enforce_content_length = enforce_content_length
610 self.auto_close = auto_close
612 self._body = None
613 self._fp: _HttplibHTTPResponse | None = None
614 self._original_response = original_response
615 self._fp_bytes_read = 0
616 self.msg = msg
618 if body and isinstance(body, (str, bytes)):
619 self._body = body
621 self._pool = pool
622 self._connection = connection
624 if hasattr(body, "read"):
625 self._fp = body # type: ignore[assignment]
626 self._sock_shutdown = sock_shutdown
628 # Are we using the chunked-style of transfer encoding?
629 self.chunk_left: int | None = None
631 # Determine length of response
632 self.length_remaining = self._init_length(request_method)
634 # Used to return the correct amount of bytes for partial read()s
635 self._decoded_buffer = BytesQueueBuffer()
637 # If requested, preload the body.
638 if preload_content and not self._body:
639 self._body = self.read(decode_content=decode_content)
641 def release_conn(self) -> None:
642 if not self._pool or not self._connection:
643 return None
645 self._pool._put_conn(self._connection)
646 self._connection = None
648 def drain_conn(self) -> None:
649 """
650 Read and discard any remaining HTTP response data in the response connection.
652 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
653 """
654 try:
655 self.read()
656 except (HTTPError, OSError, BaseSSLError, HTTPException):
657 pass
659 @property
660 def data(self) -> bytes:
661 # For backwards-compat with earlier urllib3 0.4 and earlier.
662 if self._body:
663 return self._body # type: ignore[return-value]
665 if self._fp:
666 return self.read(cache_content=True)
668 return None # type: ignore[return-value]
670 @property
671 def connection(self) -> HTTPConnection | None:
672 return self._connection
674 def isclosed(self) -> bool:
675 return is_fp_closed(self._fp)
677 def tell(self) -> int:
678 """
679 Obtain the number of bytes pulled over the wire so far. May differ from
680 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
681 if bytes are encoded on the wire (e.g, compressed).
682 """
683 return self._fp_bytes_read
685 def _init_length(self, request_method: str | None) -> int | None:
686 """
687 Set initial length value for Response content if available.
688 """
689 length: int | None
690 content_length: str | None = self.headers.get("content-length")
692 if content_length is not None:
693 if self.chunked:
694 # This Response will fail with an IncompleteRead if it can't be
695 # received as chunked. This method falls back to attempt reading
696 # the response before raising an exception.
697 log.warning(
698 "Received response with both Content-Length and "
699 "Transfer-Encoding set. This is expressly forbidden "
700 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
701 "attempting to process response as Transfer-Encoding: "
702 "chunked."
703 )
704 return None
706 try:
707 # RFC 7230 section 3.3.2 specifies multiple content lengths can
708 # be sent in a single Content-Length header
709 # (e.g. Content-Length: 42, 42). This line ensures the values
710 # are all valid ints and that as long as the `set` length is 1,
711 # all values are the same. Otherwise, the header is invalid.
712 lengths = {int(val) for val in content_length.split(",")}
713 if len(lengths) > 1:
714 raise InvalidHeader(
715 "Content-Length contained multiple "
716 "unmatching values (%s)" % content_length
717 )
718 length = lengths.pop()
719 except ValueError:
720 length = None
721 else:
722 if length < 0:
723 length = None
725 else: # if content_length is None
726 length = None
728 # Convert status to int for comparison
729 # In some cases, httplib returns a status of "_UNKNOWN"
730 try:
731 status = int(self.status)
732 except ValueError:
733 status = 0
735 # Check for responses that shouldn't include a body
736 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
737 length = 0
739 return length
741 @contextmanager
742 def _error_catcher(self) -> typing.Generator[None]:
743 """
744 Catch low-level python exceptions, instead re-raising urllib3
745 variants, so that low-level exceptions are not leaked in the
746 high-level api.
748 On exit, release the connection back to the pool.
749 """
750 clean_exit = False
752 try:
753 try:
754 yield
756 except SocketTimeout as e:
757 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
758 # there is yet no clean way to get at it from this context.
759 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
761 except BaseSSLError as e:
762 # FIXME: Is there a better way to differentiate between SSLErrors?
763 if "read operation timed out" not in str(e):
764 # SSL errors related to framing/MAC get wrapped and reraised here
765 raise SSLError(e) from e
767 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
769 except IncompleteRead as e:
770 if (
771 e.expected is not None
772 and e.partial is not None
773 and e.expected == -e.partial
774 ):
775 arg = "Response may not contain content."
776 else:
777 arg = f"Connection broken: {e!r}"
778 raise ProtocolError(arg, e) from e
780 except (HTTPException, OSError) as e:
781 raise ProtocolError(f"Connection broken: {e!r}", e) from e
783 # If no exception is thrown, we should avoid cleaning up
784 # unnecessarily.
785 clean_exit = True
786 finally:
787 # If we didn't terminate cleanly, we need to throw away our
788 # connection.
789 if not clean_exit:
790 # The response may not be closed but we're not going to use it
791 # anymore so close it now to ensure that the connection is
792 # released back to the pool.
793 if self._original_response:
794 self._original_response.close()
796 # Closing the response may not actually be sufficient to close
797 # everything, so if we have a hold of the connection close that
798 # too.
799 if self._connection:
800 self._connection.close()
802 # If we hold the original response but it's closed now, we should
803 # return the connection back to the pool.
804 if self._original_response and self._original_response.isclosed():
805 self.release_conn()
807 def _fp_read(
808 self,
809 amt: int | None = None,
810 *,
811 read1: bool = False,
812 ) -> bytes:
813 """
814 Read a response with the thought that reading the number of bytes
815 larger than can fit in a 32-bit int at a time via SSL in some
816 known cases leads to an overflow error that has to be prevented
817 if `amt` or `self.length_remaining` indicate that a problem may
818 happen.
820 The known cases:
821 * CPython < 3.9.7 because of a bug
822 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.
823 * urllib3 injected with pyOpenSSL-backed SSL-support.
824 * CPython < 3.10 only when `amt` does not fit 32-bit int.
825 """
826 assert self._fp
827 c_int_max = 2**31 - 1
828 if (
829 (amt and amt > c_int_max)
830 or (
831 amt is None
832 and self.length_remaining
833 and self.length_remaining > c_int_max
834 )
835 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):
836 if read1:
837 return self._fp.read1(c_int_max)
838 buffer = io.BytesIO()
839 # Besides `max_chunk_amt` being a maximum chunk size, it
840 # affects memory overhead of reading a response by this
841 # method in CPython.
842 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
843 # chunk size that does not lead to an overflow error, but
844 # 256 MiB is a compromise.
845 max_chunk_amt = 2**28
846 while amt is None or amt != 0:
847 if amt is not None:
848 chunk_amt = min(amt, max_chunk_amt)
849 amt -= chunk_amt
850 else:
851 chunk_amt = max_chunk_amt
852 data = self._fp.read(chunk_amt)
853 if not data:
854 break
855 buffer.write(data)
856 del data # to reduce peak memory usage by `max_chunk_amt`.
857 return buffer.getvalue()
858 elif read1:
859 return self._fp.read1(amt) if amt is not None else self._fp.read1()
860 else:
861 # StringIO doesn't like amt=None
862 return self._fp.read(amt) if amt is not None else self._fp.read()
864 def _raw_read(
865 self,
866 amt: int | None = None,
867 *,
868 read1: bool = False,
869 ) -> bytes:
870 """
871 Reads `amt` of bytes from the socket.
872 """
873 if self._fp is None:
874 return None # type: ignore[return-value]
876 fp_closed = getattr(self._fp, "closed", False)
878 with self._error_catcher():
879 data = self._fp_read(amt, read1=read1) if not fp_closed else b""
880 if amt is not None and amt != 0 and not data:
881 # Platform-specific: Buggy versions of Python.
882 # Close the connection when no data is returned
883 #
884 # This is redundant to what httplib/http.client _should_
885 # already do. However, versions of python released before
886 # December 15, 2012 (http://bugs.python.org/issue16298) do
887 # not properly close the connection in all cases. There is
888 # no harm in redundantly calling close.
889 self._fp.close()
890 if (
891 self.enforce_content_length
892 and self.length_remaining is not None
893 and self.length_remaining != 0
894 ):
895 # This is an edge case that httplib failed to cover due
896 # to concerns of backward compatibility. We're
897 # addressing it here to make sure IncompleteRead is
898 # raised during streaming, so all calls with incorrect
899 # Content-Length are caught.
900 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
901 elif read1 and (
902 (amt != 0 and not data) or self.length_remaining == len(data)
903 ):
904 # All data has been read, but `self._fp.read1` in
905 # CPython 3.12 and older doesn't always close
906 # `http.client.HTTPResponse`, so we close it here.
907 # See https://github.com/python/cpython/issues/113199
908 self._fp.close()
910 if data:
911 self._fp_bytes_read += len(data)
912 if self.length_remaining is not None:
913 self.length_remaining -= len(data)
914 return data
916 def read(
917 self,
918 amt: int | None = None,
919 decode_content: bool | None = None,
920 cache_content: bool = False,
921 ) -> bytes:
922 """
923 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
924 parameters: ``decode_content`` and ``cache_content``.
926 :param amt:
927 How much of the content to read. If specified, caching is skipped
928 because it doesn't make sense to cache partial content as the full
929 response.
931 :param decode_content:
932 If True, will attempt to decode the body based on the
933 'content-encoding' header.
935 :param cache_content:
936 If True, will save the returned data such that the same result is
937 returned despite of the state of the underlying file object. This
938 is useful if you want the ``.data`` property to continue working
939 after having ``.read()`` the file object. (Overridden if ``amt`` is
940 set.)
941 """
942 self._init_decoder()
943 if decode_content is None:
944 decode_content = self.decode_content
946 if amt and amt < 0:
947 # Negative numbers and `None` should be treated the same.
948 amt = None
949 elif amt is not None:
950 cache_content = False
952 if len(self._decoded_buffer) >= amt:
953 return self._decoded_buffer.get(amt)
955 data = self._raw_read(amt)
957 flush_decoder = amt is None or (amt != 0 and not data)
959 if not data and len(self._decoded_buffer) == 0:
960 return data
962 if amt is None:
963 data = self._decode(data, decode_content, flush_decoder)
964 if cache_content:
965 self._body = data
966 else:
967 # do not waste memory on buffer when not decoding
968 if not decode_content:
969 if self._has_decoded_content:
970 raise RuntimeError(
971 "Calling read(decode_content=False) is not supported after "
972 "read(decode_content=True) was called."
973 )
974 return data
976 decoded_data = self._decode(data, decode_content, flush_decoder)
977 self._decoded_buffer.put(decoded_data)
979 while len(self._decoded_buffer) < amt and data:
980 # TODO make sure to initially read enough data to get past the headers
981 # For example, the GZ file header takes 10 bytes, we don't want to read
982 # it one byte at a time
983 data = self._raw_read(amt)
984 decoded_data = self._decode(data, decode_content, flush_decoder)
985 self._decoded_buffer.put(decoded_data)
986 data = self._decoded_buffer.get(amt)
988 return data
990 def read1(
991 self,
992 amt: int | None = None,
993 decode_content: bool | None = None,
994 ) -> bytes:
995 """
996 Similar to ``http.client.HTTPResponse.read1`` and documented
997 in :meth:`io.BufferedReader.read1`, but with an additional parameter:
998 ``decode_content``.
1000 :param amt:
1001 How much of the content to read.
1003 :param decode_content:
1004 If True, will attempt to decode the body based on the
1005 'content-encoding' header.
1006 """
1007 if decode_content is None:
1008 decode_content = self.decode_content
1009 if amt and amt < 0:
1010 # Negative numbers and `None` should be treated the same.
1011 amt = None
1012 # try and respond without going to the network
1013 if self._has_decoded_content:
1014 if not decode_content:
1015 raise RuntimeError(
1016 "Calling read1(decode_content=False) is not supported after "
1017 "read1(decode_content=True) was called."
1018 )
1019 if len(self._decoded_buffer) > 0:
1020 if amt is None:
1021 return self._decoded_buffer.get_all()
1022 return self._decoded_buffer.get(amt)
1023 if amt == 0:
1024 return b""
1026 # FIXME, this method's type doesn't say returning None is possible
1027 data = self._raw_read(amt, read1=True)
1028 if not decode_content or data is None:
1029 return data
1031 self._init_decoder()
1032 while True:
1033 flush_decoder = not data
1034 decoded_data = self._decode(data, decode_content, flush_decoder)
1035 self._decoded_buffer.put(decoded_data)
1036 if decoded_data or flush_decoder:
1037 break
1038 data = self._raw_read(8192, read1=True)
1040 if amt is None:
1041 return self._decoded_buffer.get_all()
1042 return self._decoded_buffer.get(amt)
1044 def stream(
1045 self, amt: int | None = 2**16, decode_content: bool | None = None
1046 ) -> typing.Generator[bytes]:
1047 """
1048 A generator wrapper for the read() method. A call will block until
1049 ``amt`` bytes have been read from the connection or until the
1050 connection is closed.
1052 :param amt:
1053 How much of the content to read. The generator will return up to
1054 much data per iteration, but may return less. This is particularly
1055 likely when using compressed data. However, the empty string will
1056 never be returned.
1058 :param decode_content:
1059 If True, will attempt to decode the body based on the
1060 'content-encoding' header.
1061 """
1062 if self.chunked and self.supports_chunked_reads():
1063 yield from self.read_chunked(amt, decode_content=decode_content)
1064 else:
1065 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
1066 data = self.read(amt=amt, decode_content=decode_content)
1068 if data:
1069 yield data
1071 # Overrides from io.IOBase
1072 def readable(self) -> bool:
1073 return True
1075 def shutdown(self) -> None:
1076 if not self._sock_shutdown:
1077 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")
1078 self._sock_shutdown(socket.SHUT_RD)
1080 def close(self) -> None:
1081 self._sock_shutdown = None
1083 if not self.closed and self._fp:
1084 self._fp.close()
1086 if self._connection:
1087 self._connection.close()
1089 if not self.auto_close:
1090 io.IOBase.close(self)
1092 @property
1093 def closed(self) -> bool:
1094 if not self.auto_close:
1095 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
1096 elif self._fp is None:
1097 return True
1098 elif hasattr(self._fp, "isclosed"):
1099 return self._fp.isclosed()
1100 elif hasattr(self._fp, "closed"):
1101 return self._fp.closed
1102 else:
1103 return True
1105 def fileno(self) -> int:
1106 if self._fp is None:
1107 raise OSError("HTTPResponse has no file to get a fileno from")
1108 elif hasattr(self._fp, "fileno"):
1109 return self._fp.fileno()
1110 else:
1111 raise OSError(
1112 "The file-like object this HTTPResponse is wrapped "
1113 "around has no file descriptor"
1114 )
1116 def flush(self) -> None:
1117 if (
1118 self._fp is not None
1119 and hasattr(self._fp, "flush")
1120 and not getattr(self._fp, "closed", False)
1121 ):
1122 return self._fp.flush()
1124 def supports_chunked_reads(self) -> bool:
1125 """
1126 Checks if the underlying file-like object looks like a
1127 :class:`http.client.HTTPResponse` object. We do this by testing for
1128 the fp attribute. If it is present we assume it returns raw chunks as
1129 processed by read_chunked().
1130 """
1131 return hasattr(self._fp, "fp")
1133 def _update_chunk_length(self) -> None:
1134 # First, we'll figure out length of a chunk and then
1135 # we'll try to read it from socket.
1136 if self.chunk_left is not None:
1137 return None
1138 line = self._fp.fp.readline() # type: ignore[union-attr]
1139 line = line.split(b";", 1)[0]
1140 try:
1141 self.chunk_left = int(line, 16)
1142 except ValueError:
1143 self.close()
1144 if line:
1145 # Invalid chunked protocol response, abort.
1146 raise InvalidChunkLength(self, line) from None
1147 else:
1148 # Truncated at start of next chunk
1149 raise ProtocolError("Response ended prematurely") from None
1151 def _handle_chunk(self, amt: int | None) -> bytes:
1152 returned_chunk = None
1153 if amt is None:
1154 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1155 returned_chunk = chunk
1156 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1157 self.chunk_left = None
1158 elif self.chunk_left is not None and amt < self.chunk_left:
1159 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1160 self.chunk_left = self.chunk_left - amt
1161 returned_chunk = value
1162 elif amt == self.chunk_left:
1163 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1164 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1165 self.chunk_left = None
1166 returned_chunk = value
1167 else: # amt > self.chunk_left
1168 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1169 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1170 self.chunk_left = None
1171 return returned_chunk # type: ignore[no-any-return]
1173 def read_chunked(
1174 self, amt: int | None = None, decode_content: bool | None = None
1175 ) -> typing.Generator[bytes]:
1176 """
1177 Similar to :meth:`HTTPResponse.read`, but with an additional
1178 parameter: ``decode_content``.
1180 :param amt:
1181 How much of the content to read. If specified, caching is skipped
1182 because it doesn't make sense to cache partial content as the full
1183 response.
1185 :param decode_content:
1186 If True, will attempt to decode the body based on the
1187 'content-encoding' header.
1188 """
1189 self._init_decoder()
1190 # FIXME: Rewrite this method and make it a class with a better structured logic.
1191 if not self.chunked:
1192 raise ResponseNotChunked(
1193 "Response is not chunked. "
1194 "Header 'transfer-encoding: chunked' is missing."
1195 )
1196 if not self.supports_chunked_reads():
1197 raise BodyNotHttplibCompatible(
1198 "Body should be http.client.HTTPResponse like. "
1199 "It should have have an fp attribute which returns raw chunks."
1200 )
1202 with self._error_catcher():
1203 # Don't bother reading the body of a HEAD request.
1204 if self._original_response and is_response_to_head(self._original_response):
1205 self._original_response.close()
1206 return None
1208 # If a response is already read and closed
1209 # then return immediately.
1210 if self._fp.fp is None: # type: ignore[union-attr]
1211 return None
1213 if amt and amt < 0:
1214 # Negative numbers and `None` should be treated the same,
1215 # but httplib handles only `None` correctly.
1216 amt = None
1218 while True:
1219 self._update_chunk_length()
1220 if self.chunk_left == 0:
1221 break
1222 chunk = self._handle_chunk(amt)
1223 decoded = self._decode(
1224 chunk, decode_content=decode_content, flush_decoder=False
1225 )
1226 if decoded:
1227 yield decoded
1229 if decode_content:
1230 # On CPython and PyPy, we should never need to flush the
1231 # decoder. However, on Jython we *might* need to, so
1232 # lets defensively do it anyway.
1233 decoded = self._flush_decoder()
1234 if decoded: # Platform-specific: Jython.
1235 yield decoded
1237 # Chunk content ends with \r\n: discard it.
1238 while self._fp is not None:
1239 line = self._fp.fp.readline()
1240 if not line:
1241 # Some sites may not end with '\r\n'.
1242 break
1243 if line == b"\r\n":
1244 break
1246 # We read everything; close the "file".
1247 if self._original_response:
1248 self._original_response.close()
1250 @property
1251 def url(self) -> str | None:
1252 """
1253 Returns the URL that was the source of this response.
1254 If the request that generated this response redirected, this method
1255 will return the final redirect location.
1256 """
1257 return self._request_url
1259 @url.setter
1260 def url(self, url: str) -> None:
1261 self._request_url = url
1263 def __iter__(self) -> typing.Iterator[bytes]:
1264 buffer: list[bytes] = []
1265 for chunk in self.stream(decode_content=True):
1266 if b"\n" in chunk:
1267 chunks = chunk.split(b"\n")
1268 yield b"".join(buffer) + chunks[0] + b"\n"
1269 for x in chunks[1:-1]:
1270 yield x + b"\n"
1271 if chunks[-1]:
1272 buffer = [chunks[-1]]
1273 else:
1274 buffer = []
1275 else:
1276 buffer.append(chunk)
1277 if buffer:
1278 yield b"".join(buffer)