Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 22%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import collections
4import io
5import json as _json
6import logging
7import re
8import sys
9import typing
10import warnings
11import zlib
12from contextlib import contextmanager
13from http.client import HTTPMessage as _HttplibHTTPMessage
14from http.client import HTTPResponse as _HttplibHTTPResponse
15from socket import timeout as SocketTimeout
17if typing.TYPE_CHECKING:
18 from ._base_connection import BaseHTTPConnection
20try:
21 try:
22 import brotlicffi as brotli # type: ignore[import-not-found]
23 except ImportError:
24 import brotli # type: ignore[import-not-found]
25except ImportError:
26 brotli = None
28try:
29 import zstandard as zstd
30except (AttributeError, ImportError, ValueError): # Defensive:
31 HAS_ZSTD = False
32else:
33 # The package 'zstandard' added the 'eof' property starting
34 # in v0.18.0 which we require to ensure a complete and
35 # valid zstd stream was fed into the ZstdDecoder.
36 # See: https://github.com/urllib3/urllib3/pull/2624
37 _zstd_version = tuple(
38 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]
39 )
40 if _zstd_version < (0, 18): # Defensive:
41 HAS_ZSTD = False
42 else:
43 HAS_ZSTD = True
45from . import util
46from ._base_connection import _TYPE_BODY
47from ._collections import HTTPHeaderDict
48from .connection import BaseSSLError, HTTPConnection, HTTPException
49from .exceptions import (
50 BodyNotHttplibCompatible,
51 DecodeError,
52 HTTPError,
53 IncompleteRead,
54 InvalidChunkLength,
55 InvalidHeader,
56 ProtocolError,
57 ReadTimeoutError,
58 ResponseNotChunked,
59 SSLError,
60)
61from .util.response import is_fp_closed, is_response_to_head
62from .util.retry import Retry
64if typing.TYPE_CHECKING:
65 from .connectionpool import HTTPConnectionPool
67log = logging.getLogger(__name__)
70class ContentDecoder:
71 def decompress(self, data: bytes) -> bytes:
72 raise NotImplementedError()
74 def flush(self) -> bytes:
75 raise NotImplementedError()
78class DeflateDecoder(ContentDecoder):
79 def __init__(self) -> None:
80 self._first_try = True
81 self._data = b""
82 self._obj = zlib.decompressobj()
84 def decompress(self, data: bytes) -> bytes:
85 if not data:
86 return data
88 if not self._first_try:
89 return self._obj.decompress(data)
91 self._data += data
92 try:
93 decompressed = self._obj.decompress(data)
94 if decompressed:
95 self._first_try = False
96 self._data = None # type: ignore[assignment]
97 return decompressed
98 except zlib.error:
99 self._first_try = False
100 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
101 try:
102 return self.decompress(self._data)
103 finally:
104 self._data = None # type: ignore[assignment]
106 def flush(self) -> bytes:
107 return self._obj.flush()
110class GzipDecoderState:
111 FIRST_MEMBER = 0
112 OTHER_MEMBERS = 1
113 SWALLOW_DATA = 2
116class GzipDecoder(ContentDecoder):
117 def __init__(self) -> None:
118 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
119 self._state = GzipDecoderState.FIRST_MEMBER
121 def decompress(self, data: bytes) -> bytes:
122 ret = bytearray()
123 if self._state == GzipDecoderState.SWALLOW_DATA or not data:
124 return bytes(ret)
125 while True:
126 try:
127 ret += self._obj.decompress(data)
128 except zlib.error:
129 previous_state = self._state
130 # Ignore data after the first error
131 self._state = GzipDecoderState.SWALLOW_DATA
132 if previous_state == GzipDecoderState.OTHER_MEMBERS:
133 # Allow trailing garbage acceptable in other gzip clients
134 return bytes(ret)
135 raise
136 data = self._obj.unused_data
137 if not data:
138 return bytes(ret)
139 self._state = GzipDecoderState.OTHER_MEMBERS
140 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
142 def flush(self) -> bytes:
143 return self._obj.flush()
146if brotli is not None:
148 class BrotliDecoder(ContentDecoder):
149 # Supports both 'brotlipy' and 'Brotli' packages
150 # since they share an import name. The top branches
151 # are for 'brotlipy' and bottom branches for 'Brotli'
152 def __init__(self) -> None:
153 self._obj = brotli.Decompressor()
154 if hasattr(self._obj, "decompress"):
155 setattr(self, "decompress", self._obj.decompress)
156 else:
157 setattr(self, "decompress", self._obj.process)
159 def flush(self) -> bytes:
160 if hasattr(self._obj, "flush"):
161 return self._obj.flush() # type: ignore[no-any-return]
162 return b""
165if HAS_ZSTD:
167 class ZstdDecoder(ContentDecoder):
168 def __init__(self) -> None:
169 self._obj = zstd.ZstdDecompressor().decompressobj()
171 def decompress(self, data: bytes) -> bytes:
172 if not data:
173 return b""
174 data_parts = [self._obj.decompress(data)]
175 while self._obj.eof and self._obj.unused_data:
176 unused_data = self._obj.unused_data
177 self._obj = zstd.ZstdDecompressor().decompressobj()
178 data_parts.append(self._obj.decompress(unused_data))
179 return b"".join(data_parts)
181 def flush(self) -> bytes:
182 ret = self._obj.flush() # note: this is a no-op
183 if not self._obj.eof:
184 raise DecodeError("Zstandard data is incomplete")
185 return ret
188class MultiDecoder(ContentDecoder):
189 """
190 From RFC7231:
191 If one or more encodings have been applied to a representation, the
192 sender that applied the encodings MUST generate a Content-Encoding
193 header field that lists the content codings in the order in which
194 they were applied.
195 """
197 def __init__(self, modes: str) -> None:
198 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
200 def flush(self) -> bytes:
201 return self._decoders[0].flush()
203 def decompress(self, data: bytes) -> bytes:
204 for d in reversed(self._decoders):
205 data = d.decompress(data)
206 return data
209def _get_decoder(mode: str) -> ContentDecoder:
210 if "," in mode:
211 return MultiDecoder(mode)
213 # According to RFC 9110 section 8.4.1.3, recipients should
214 # consider x-gzip equivalent to gzip
215 if mode in ("gzip", "x-gzip"):
216 return GzipDecoder()
218 if brotli is not None and mode == "br":
219 return BrotliDecoder()
221 if HAS_ZSTD and mode == "zstd":
222 return ZstdDecoder()
224 return DeflateDecoder()
227class BytesQueueBuffer:
228 """Memory-efficient bytes buffer
230 To return decoded data in read() and still follow the BufferedIOBase API, we need a
231 buffer to always return the correct amount of bytes.
233 This buffer should be filled using calls to put()
235 Our maximum memory usage is determined by the sum of the size of:
237 * self.buffer, which contains the full data
238 * the largest chunk that we will copy in get()
240 The worst case scenario is a single chunk, in which case we'll make a full copy of
241 the data inside get().
242 """
244 def __init__(self) -> None:
245 self.buffer: typing.Deque[bytes] = collections.deque()
246 self._size: int = 0
248 def __len__(self) -> int:
249 return self._size
251 def put(self, data: bytes) -> None:
252 self.buffer.append(data)
253 self._size += len(data)
255 def get(self, n: int) -> bytes:
256 if n == 0:
257 return b""
258 elif not self.buffer:
259 raise RuntimeError("buffer is empty")
260 elif n < 0:
261 raise ValueError("n should be > 0")
263 fetched = 0
264 ret = io.BytesIO()
265 while fetched < n:
266 remaining = n - fetched
267 chunk = self.buffer.popleft()
268 chunk_length = len(chunk)
269 if remaining < chunk_length:
270 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
271 ret.write(left_chunk)
272 self.buffer.appendleft(right_chunk)
273 self._size -= remaining
274 break
275 else:
276 ret.write(chunk)
277 self._size -= chunk_length
278 fetched += chunk_length
280 if not self.buffer:
281 break
283 return ret.getvalue()
285 def get_all(self) -> bytes:
286 buffer = self.buffer
287 if not buffer:
288 assert self._size == 0
289 return b""
290 if len(buffer) == 1:
291 result = buffer.pop()
292 else:
293 ret = io.BytesIO()
294 ret.writelines(buffer.popleft() for _ in range(len(buffer)))
295 result = ret.getvalue()
296 self._size = 0
297 return result
300class BaseHTTPResponse(io.IOBase):
301 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]
302 if brotli is not None:
303 CONTENT_DECODERS += ["br"]
304 if HAS_ZSTD:
305 CONTENT_DECODERS += ["zstd"]
306 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
308 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
309 if brotli is not None:
310 DECODER_ERROR_CLASSES += (brotli.error,)
312 if HAS_ZSTD:
313 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
315 def __init__(
316 self,
317 *,
318 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
319 status: int,
320 version: int,
321 version_string: str,
322 reason: str | None,
323 decode_content: bool,
324 request_url: str | None,
325 retries: Retry | None = None,
326 ) -> None:
327 if isinstance(headers, HTTPHeaderDict):
328 self.headers = headers
329 else:
330 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
331 self.status = status
332 self.version = version
333 self.version_string = version_string
334 self.reason = reason
335 self.decode_content = decode_content
336 self._has_decoded_content = False
337 self._request_url: str | None = request_url
338 self.retries = retries
340 self.chunked = False
341 tr_enc = self.headers.get("transfer-encoding", "").lower()
342 # Don't incur the penalty of creating a list and then discarding it
343 encodings = (enc.strip() for enc in tr_enc.split(","))
344 if "chunked" in encodings:
345 self.chunked = True
347 self._decoder: ContentDecoder | None = None
348 self.length_remaining: int | None
350 def get_redirect_location(self) -> str | None | typing.Literal[False]:
351 """
352 Should we redirect and where to?
354 :returns: Truthy redirect location string if we got a redirect status
355 code and valid location. ``None`` if redirect status and no
356 location. ``False`` if not a redirect status code.
357 """
358 if self.status in self.REDIRECT_STATUSES:
359 return self.headers.get("location")
360 return False
362 @property
363 def data(self) -> bytes:
364 raise NotImplementedError()
366 def json(self) -> typing.Any:
367 """
368 Deserializes the body of the HTTP response as a Python object.
370 The body of the HTTP response must be encoded using UTF-8, as per
371 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.
373 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to
374 your custom decoder instead.
376 If the body of the HTTP response is not decodable to UTF-8, a
377 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a
378 valid JSON document, a `json.JSONDecodeError` will be raised.
380 Read more :ref:`here <json_content>`.
382 :returns: The body of the HTTP response as a Python object.
383 """
384 data = self.data.decode("utf-8")
385 return _json.loads(data)
387 @property
388 def url(self) -> str | None:
389 raise NotImplementedError()
391 @url.setter
392 def url(self, url: str | None) -> None:
393 raise NotImplementedError()
395 @property
396 def connection(self) -> BaseHTTPConnection | None:
397 raise NotImplementedError()
399 @property
400 def retries(self) -> Retry | None:
401 return self._retries
403 @retries.setter
404 def retries(self, retries: Retry | None) -> None:
405 # Override the request_url if retries has a redirect location.
406 if retries is not None and retries.history:
407 self.url = retries.history[-1].redirect_location
408 self._retries = retries
410 def stream(
411 self, amt: int | None = 2**16, decode_content: bool | None = None
412 ) -> typing.Iterator[bytes]:
413 raise NotImplementedError()
415 def read(
416 self,
417 amt: int | None = None,
418 decode_content: bool | None = None,
419 cache_content: bool = False,
420 ) -> bytes:
421 raise NotImplementedError()
423 def read1(
424 self,
425 amt: int | None = None,
426 decode_content: bool | None = None,
427 ) -> bytes:
428 raise NotImplementedError()
430 def read_chunked(
431 self,
432 amt: int | None = None,
433 decode_content: bool | None = None,
434 ) -> typing.Iterator[bytes]:
435 raise NotImplementedError()
437 def release_conn(self) -> None:
438 raise NotImplementedError()
440 def drain_conn(self) -> None:
441 raise NotImplementedError()
443 def close(self) -> None:
444 raise NotImplementedError()
446 def _init_decoder(self) -> None:
447 """
448 Set-up the _decoder attribute if necessary.
449 """
450 # Note: content-encoding value should be case-insensitive, per RFC 7230
451 # Section 3.2
452 content_encoding = self.headers.get("content-encoding", "").lower()
453 if self._decoder is None:
454 if content_encoding in self.CONTENT_DECODERS:
455 self._decoder = _get_decoder(content_encoding)
456 elif "," in content_encoding:
457 encodings = [
458 e.strip()
459 for e in content_encoding.split(",")
460 if e.strip() in self.CONTENT_DECODERS
461 ]
462 if encodings:
463 self._decoder = _get_decoder(content_encoding)
465 def _decode(
466 self, data: bytes, decode_content: bool | None, flush_decoder: bool
467 ) -> bytes:
468 """
469 Decode the data passed in and potentially flush the decoder.
470 """
471 if not decode_content:
472 if self._has_decoded_content:
473 raise RuntimeError(
474 "Calling read(decode_content=False) is not supported after "
475 "read(decode_content=True) was called."
476 )
477 return data
479 try:
480 if self._decoder:
481 data = self._decoder.decompress(data)
482 self._has_decoded_content = True
483 except self.DECODER_ERROR_CLASSES as e:
484 content_encoding = self.headers.get("content-encoding", "").lower()
485 raise DecodeError(
486 "Received response with content-encoding: %s, but "
487 "failed to decode it." % content_encoding,
488 e,
489 ) from e
490 if flush_decoder:
491 data += self._flush_decoder()
493 return data
495 def _flush_decoder(self) -> bytes:
496 """
497 Flushes the decoder. Should only be called if the decoder is actually
498 being used.
499 """
500 if self._decoder:
501 return self._decoder.decompress(b"") + self._decoder.flush()
502 return b""
504 # Compatibility methods for `io` module
505 def readinto(self, b: bytearray) -> int:
506 temp = self.read(len(b))
507 if len(temp) == 0:
508 return 0
509 else:
510 b[: len(temp)] = temp
511 return len(temp)
513 # Compatibility methods for http.client.HTTPResponse
514 def getheaders(self) -> HTTPHeaderDict:
515 warnings.warn(
516 "HTTPResponse.getheaders() is deprecated and will be removed "
517 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",
518 category=DeprecationWarning,
519 stacklevel=2,
520 )
521 return self.headers
523 def getheader(self, name: str, default: str | None = None) -> str | None:
524 warnings.warn(
525 "HTTPResponse.getheader() is deprecated and will be removed "
526 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",
527 category=DeprecationWarning,
528 stacklevel=2,
529 )
530 return self.headers.get(name, default)
532 # Compatibility method for http.cookiejar
533 def info(self) -> HTTPHeaderDict:
534 return self.headers
536 def geturl(self) -> str | None:
537 return self.url
540class HTTPResponse(BaseHTTPResponse):
541 """
542 HTTP Response container.
544 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
545 loaded and decoded on-demand when the ``data`` property is accessed. This
546 class is also compatible with the Python standard library's :mod:`io`
547 module, and can hence be treated as a readable object in the context of that
548 framework.
550 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
552 :param preload_content:
553 If True, the response's body will be preloaded during construction.
555 :param decode_content:
556 If True, will attempt to decode the body based on the
557 'content-encoding' header.
559 :param original_response:
560 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
561 object, it's convenient to include the original for debug purposes. It's
562 otherwise unused.
564 :param retries:
565 The retries contains the last :class:`~urllib3.util.retry.Retry` that
566 was used during the request.
568 :param enforce_content_length:
569 Enforce content length checking. Body returned by server must match
570 value of Content-Length header, if present. Otherwise, raise error.
571 """
573 def __init__(
574 self,
575 body: _TYPE_BODY = "",
576 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
577 status: int = 0,
578 version: int = 0,
579 version_string: str = "HTTP/?",
580 reason: str | None = None,
581 preload_content: bool = True,
582 decode_content: bool = True,
583 original_response: _HttplibHTTPResponse | None = None,
584 pool: HTTPConnectionPool | None = None,
585 connection: HTTPConnection | None = None,
586 msg: _HttplibHTTPMessage | None = None,
587 retries: Retry | None = None,
588 enforce_content_length: bool = True,
589 request_method: str | None = None,
590 request_url: str | None = None,
591 auto_close: bool = True,
592 ) -> None:
593 super().__init__(
594 headers=headers,
595 status=status,
596 version=version,
597 version_string=version_string,
598 reason=reason,
599 decode_content=decode_content,
600 request_url=request_url,
601 retries=retries,
602 )
604 self.enforce_content_length = enforce_content_length
605 self.auto_close = auto_close
607 self._body = None
608 self._fp: _HttplibHTTPResponse | None = None
609 self._original_response = original_response
610 self._fp_bytes_read = 0
611 self.msg = msg
613 if body and isinstance(body, (str, bytes)):
614 self._body = body
616 self._pool = pool
617 self._connection = connection
619 if hasattr(body, "read"):
620 self._fp = body # type: ignore[assignment]
622 # Are we using the chunked-style of transfer encoding?
623 self.chunk_left: int | None = None
625 # Determine length of response
626 self.length_remaining = self._init_length(request_method)
628 # Used to return the correct amount of bytes for partial read()s
629 self._decoded_buffer = BytesQueueBuffer()
631 # If requested, preload the body.
632 if preload_content and not self._body:
633 self._body = self.read(decode_content=decode_content)
635 def release_conn(self) -> None:
636 if not self._pool or not self._connection:
637 return None
639 self._pool._put_conn(self._connection)
640 self._connection = None
642 def drain_conn(self) -> None:
643 """
644 Read and discard any remaining HTTP response data in the response connection.
646 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
647 """
648 try:
649 self.read()
650 except (HTTPError, OSError, BaseSSLError, HTTPException):
651 pass
653 @property
654 def data(self) -> bytes:
655 # For backwards-compat with earlier urllib3 0.4 and earlier.
656 if self._body:
657 return self._body # type: ignore[return-value]
659 if self._fp:
660 return self.read(cache_content=True)
662 return None # type: ignore[return-value]
664 @property
665 def connection(self) -> HTTPConnection | None:
666 return self._connection
668 def isclosed(self) -> bool:
669 return is_fp_closed(self._fp)
671 def tell(self) -> int:
672 """
673 Obtain the number of bytes pulled over the wire so far. May differ from
674 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
675 if bytes are encoded on the wire (e.g, compressed).
676 """
677 return self._fp_bytes_read
679 def _init_length(self, request_method: str | None) -> int | None:
680 """
681 Set initial length value for Response content if available.
682 """
683 length: int | None
684 content_length: str | None = self.headers.get("content-length")
686 if content_length is not None:
687 if self.chunked:
688 # This Response will fail with an IncompleteRead if it can't be
689 # received as chunked. This method falls back to attempt reading
690 # the response before raising an exception.
691 log.warning(
692 "Received response with both Content-Length and "
693 "Transfer-Encoding set. This is expressly forbidden "
694 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
695 "attempting to process response as Transfer-Encoding: "
696 "chunked."
697 )
698 return None
700 try:
701 # RFC 7230 section 3.3.2 specifies multiple content lengths can
702 # be sent in a single Content-Length header
703 # (e.g. Content-Length: 42, 42). This line ensures the values
704 # are all valid ints and that as long as the `set` length is 1,
705 # all values are the same. Otherwise, the header is invalid.
706 lengths = {int(val) for val in content_length.split(",")}
707 if len(lengths) > 1:
708 raise InvalidHeader(
709 "Content-Length contained multiple "
710 "unmatching values (%s)" % content_length
711 )
712 length = lengths.pop()
713 except ValueError:
714 length = None
715 else:
716 if length < 0:
717 length = None
719 else: # if content_length is None
720 length = None
722 # Convert status to int for comparison
723 # In some cases, httplib returns a status of "_UNKNOWN"
724 try:
725 status = int(self.status)
726 except ValueError:
727 status = 0
729 # Check for responses that shouldn't include a body
730 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
731 length = 0
733 return length
735 @contextmanager
736 def _error_catcher(self) -> typing.Generator[None, None, None]:
737 """
738 Catch low-level python exceptions, instead re-raising urllib3
739 variants, so that low-level exceptions are not leaked in the
740 high-level api.
742 On exit, release the connection back to the pool.
743 """
744 clean_exit = False
746 try:
747 try:
748 yield
750 except SocketTimeout as e:
751 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
752 # there is yet no clean way to get at it from this context.
753 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
755 except BaseSSLError as e:
756 # FIXME: Is there a better way to differentiate between SSLErrors?
757 if "read operation timed out" not in str(e):
758 # SSL errors related to framing/MAC get wrapped and reraised here
759 raise SSLError(e) from e
761 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
763 except IncompleteRead as e:
764 if (
765 e.expected is not None
766 and e.partial is not None
767 and e.expected == -e.partial
768 ):
769 arg = "Response may not contain content."
770 else:
771 arg = f"Connection broken: {e!r}"
772 raise ProtocolError(arg, e) from e
774 except (HTTPException, OSError) as e:
775 raise ProtocolError(f"Connection broken: {e!r}", e) from e
777 # If no exception is thrown, we should avoid cleaning up
778 # unnecessarily.
779 clean_exit = True
780 finally:
781 # If we didn't terminate cleanly, we need to throw away our
782 # connection.
783 if not clean_exit:
784 # The response may not be closed but we're not going to use it
785 # anymore so close it now to ensure that the connection is
786 # released back to the pool.
787 if self._original_response:
788 self._original_response.close()
790 # Closing the response may not actually be sufficient to close
791 # everything, so if we have a hold of the connection close that
792 # too.
793 if self._connection:
794 self._connection.close()
796 # If we hold the original response but it's closed now, we should
797 # return the connection back to the pool.
798 if self._original_response and self._original_response.isclosed():
799 self.release_conn()
801 def _fp_read(
802 self,
803 amt: int | None = None,
804 *,
805 read1: bool = False,
806 ) -> bytes:
807 """
808 Read a response with the thought that reading the number of bytes
809 larger than can fit in a 32-bit int at a time via SSL in some
810 known cases leads to an overflow error that has to be prevented
811 if `amt` or `self.length_remaining` indicate that a problem may
812 happen.
814 The known cases:
815 * 3.8 <= CPython < 3.9.7 because of a bug
816 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.
817 * urllib3 injected with pyOpenSSL-backed SSL-support.
818 * CPython < 3.10 only when `amt` does not fit 32-bit int.
819 """
820 assert self._fp
821 c_int_max = 2**31 - 1
822 if (
823 (amt and amt > c_int_max)
824 or (
825 amt is None
826 and self.length_remaining
827 and self.length_remaining > c_int_max
828 )
829 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):
830 if read1:
831 return self._fp.read1(c_int_max)
832 buffer = io.BytesIO()
833 # Besides `max_chunk_amt` being a maximum chunk size, it
834 # affects memory overhead of reading a response by this
835 # method in CPython.
836 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
837 # chunk size that does not lead to an overflow error, but
838 # 256 MiB is a compromise.
839 max_chunk_amt = 2**28
840 while amt is None or amt != 0:
841 if amt is not None:
842 chunk_amt = min(amt, max_chunk_amt)
843 amt -= chunk_amt
844 else:
845 chunk_amt = max_chunk_amt
846 data = self._fp.read(chunk_amt)
847 if not data:
848 break
849 buffer.write(data)
850 del data # to reduce peak memory usage by `max_chunk_amt`.
851 return buffer.getvalue()
852 elif read1:
853 return self._fp.read1(amt) if amt is not None else self._fp.read1()
854 else:
855 # StringIO doesn't like amt=None
856 return self._fp.read(amt) if amt is not None else self._fp.read()
858 def _raw_read(
859 self,
860 amt: int | None = None,
861 *,
862 read1: bool = False,
863 ) -> bytes:
864 """
865 Reads `amt` of bytes from the socket.
866 """
867 if self._fp is None:
868 return None # type: ignore[return-value]
870 fp_closed = getattr(self._fp, "closed", False)
872 with self._error_catcher():
873 data = self._fp_read(amt, read1=read1) if not fp_closed else b""
874 if amt is not None and amt != 0 and not data:
875 # Platform-specific: Buggy versions of Python.
876 # Close the connection when no data is returned
877 #
878 # This is redundant to what httplib/http.client _should_
879 # already do. However, versions of python released before
880 # December 15, 2012 (http://bugs.python.org/issue16298) do
881 # not properly close the connection in all cases. There is
882 # no harm in redundantly calling close.
883 self._fp.close()
884 if (
885 self.enforce_content_length
886 and self.length_remaining is not None
887 and self.length_remaining != 0
888 ):
889 # This is an edge case that httplib failed to cover due
890 # to concerns of backward compatibility. We're
891 # addressing it here to make sure IncompleteRead is
892 # raised during streaming, so all calls with incorrect
893 # Content-Length are caught.
894 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
895 elif read1 and (
896 (amt != 0 and not data) or self.length_remaining == len(data)
897 ):
898 # All data has been read, but `self._fp.read1` in
899 # CPython 3.12 and older doesn't always close
900 # `http.client.HTTPResponse`, so we close it here.
901 # See https://github.com/python/cpython/issues/113199
902 self._fp.close()
904 if data:
905 self._fp_bytes_read += len(data)
906 if self.length_remaining is not None:
907 self.length_remaining -= len(data)
908 return data
910 def read(
911 self,
912 amt: int | None = None,
913 decode_content: bool | None = None,
914 cache_content: bool = False,
915 ) -> bytes:
916 """
917 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
918 parameters: ``decode_content`` and ``cache_content``.
920 :param amt:
921 How much of the content to read. If specified, caching is skipped
922 because it doesn't make sense to cache partial content as the full
923 response.
925 :param decode_content:
926 If True, will attempt to decode the body based on the
927 'content-encoding' header.
929 :param cache_content:
930 If True, will save the returned data such that the same result is
931 returned despite of the state of the underlying file object. This
932 is useful if you want the ``.data`` property to continue working
933 after having ``.read()`` the file object. (Overridden if ``amt`` is
934 set.)
935 """
936 self._init_decoder()
937 if decode_content is None:
938 decode_content = self.decode_content
940 if amt and amt < 0:
941 # Negative numbers and `None` should be treated the same.
942 amt = None
943 elif amt is not None:
944 cache_content = False
946 if len(self._decoded_buffer) >= amt:
947 return self._decoded_buffer.get(amt)
949 data = self._raw_read(amt)
951 flush_decoder = amt is None or (amt != 0 and not data)
953 if not data and len(self._decoded_buffer) == 0:
954 return data
956 if amt is None:
957 data = self._decode(data, decode_content, flush_decoder)
958 if cache_content:
959 self._body = data
960 else:
961 # do not waste memory on buffer when not decoding
962 if not decode_content:
963 if self._has_decoded_content:
964 raise RuntimeError(
965 "Calling read(decode_content=False) is not supported after "
966 "read(decode_content=True) was called."
967 )
968 return data
970 decoded_data = self._decode(data, decode_content, flush_decoder)
971 self._decoded_buffer.put(decoded_data)
973 while len(self._decoded_buffer) < amt and data:
974 # TODO make sure to initially read enough data to get past the headers
975 # For example, the GZ file header takes 10 bytes, we don't want to read
976 # it one byte at a time
977 data = self._raw_read(amt)
978 decoded_data = self._decode(data, decode_content, flush_decoder)
979 self._decoded_buffer.put(decoded_data)
980 data = self._decoded_buffer.get(amt)
982 return data
984 def read1(
985 self,
986 amt: int | None = None,
987 decode_content: bool | None = None,
988 ) -> bytes:
989 """
990 Similar to ``http.client.HTTPResponse.read1`` and documented
991 in :meth:`io.BufferedReader.read1`, but with an additional parameter:
992 ``decode_content``.
994 :param amt:
995 How much of the content to read.
997 :param decode_content:
998 If True, will attempt to decode the body based on the
999 'content-encoding' header.
1000 """
1001 if decode_content is None:
1002 decode_content = self.decode_content
1003 if amt and amt < 0:
1004 # Negative numbers and `None` should be treated the same.
1005 amt = None
1006 # try and respond without going to the network
1007 if self._has_decoded_content:
1008 if not decode_content:
1009 raise RuntimeError(
1010 "Calling read1(decode_content=False) is not supported after "
1011 "read1(decode_content=True) was called."
1012 )
1013 if len(self._decoded_buffer) > 0:
1014 if amt is None:
1015 return self._decoded_buffer.get_all()
1016 return self._decoded_buffer.get(amt)
1017 if amt == 0:
1018 return b""
1020 # FIXME, this method's type doesn't say returning None is possible
1021 data = self._raw_read(amt, read1=True)
1022 if not decode_content or data is None:
1023 return data
1025 self._init_decoder()
1026 while True:
1027 flush_decoder = not data
1028 decoded_data = self._decode(data, decode_content, flush_decoder)
1029 self._decoded_buffer.put(decoded_data)
1030 if decoded_data or flush_decoder:
1031 break
1032 data = self._raw_read(8192, read1=True)
1034 if amt is None:
1035 return self._decoded_buffer.get_all()
1036 return self._decoded_buffer.get(amt)
1038 def stream(
1039 self, amt: int | None = 2**16, decode_content: bool | None = None
1040 ) -> typing.Generator[bytes, None, None]:
1041 """
1042 A generator wrapper for the read() method. A call will block until
1043 ``amt`` bytes have been read from the connection or until the
1044 connection is closed.
1046 :param amt:
1047 How much of the content to read. The generator will return up to
1048 much data per iteration, but may return less. This is particularly
1049 likely when using compressed data. However, the empty string will
1050 never be returned.
1052 :param decode_content:
1053 If True, will attempt to decode the body based on the
1054 'content-encoding' header.
1055 """
1056 if self.chunked and self.supports_chunked_reads():
1057 yield from self.read_chunked(amt, decode_content=decode_content)
1058 else:
1059 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
1060 data = self.read(amt=amt, decode_content=decode_content)
1062 if data:
1063 yield data
1065 # Overrides from io.IOBase
1066 def readable(self) -> bool:
1067 return True
1069 def close(self) -> None:
1070 if not self.closed and self._fp:
1071 self._fp.close()
1073 if self._connection:
1074 self._connection.close()
1076 if not self.auto_close:
1077 io.IOBase.close(self)
1079 @property
1080 def closed(self) -> bool:
1081 if not self.auto_close:
1082 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
1083 elif self._fp is None:
1084 return True
1085 elif hasattr(self._fp, "isclosed"):
1086 return self._fp.isclosed()
1087 elif hasattr(self._fp, "closed"):
1088 return self._fp.closed
1089 else:
1090 return True
1092 def fileno(self) -> int:
1093 if self._fp is None:
1094 raise OSError("HTTPResponse has no file to get a fileno from")
1095 elif hasattr(self._fp, "fileno"):
1096 return self._fp.fileno()
1097 else:
1098 raise OSError(
1099 "The file-like object this HTTPResponse is wrapped "
1100 "around has no file descriptor"
1101 )
1103 def flush(self) -> None:
1104 if (
1105 self._fp is not None
1106 and hasattr(self._fp, "flush")
1107 and not getattr(self._fp, "closed", False)
1108 ):
1109 return self._fp.flush()
1111 def supports_chunked_reads(self) -> bool:
1112 """
1113 Checks if the underlying file-like object looks like a
1114 :class:`http.client.HTTPResponse` object. We do this by testing for
1115 the fp attribute. If it is present we assume it returns raw chunks as
1116 processed by read_chunked().
1117 """
1118 return hasattr(self._fp, "fp")
1120 def _update_chunk_length(self) -> None:
1121 # First, we'll figure out length of a chunk and then
1122 # we'll try to read it from socket.
1123 if self.chunk_left is not None:
1124 return None
1125 line = self._fp.fp.readline() # type: ignore[union-attr]
1126 line = line.split(b";", 1)[0]
1127 try:
1128 self.chunk_left = int(line, 16)
1129 except ValueError:
1130 self.close()
1131 if line:
1132 # Invalid chunked protocol response, abort.
1133 raise InvalidChunkLength(self, line) from None
1134 else:
1135 # Truncated at start of next chunk
1136 raise ProtocolError("Response ended prematurely") from None
1138 def _handle_chunk(self, amt: int | None) -> bytes:
1139 returned_chunk = None
1140 if amt is None:
1141 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1142 returned_chunk = chunk
1143 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1144 self.chunk_left = None
1145 elif self.chunk_left is not None and amt < self.chunk_left:
1146 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1147 self.chunk_left = self.chunk_left - amt
1148 returned_chunk = value
1149 elif amt == self.chunk_left:
1150 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1151 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1152 self.chunk_left = None
1153 returned_chunk = value
1154 else: # amt > self.chunk_left
1155 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1156 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1157 self.chunk_left = None
1158 return returned_chunk # type: ignore[no-any-return]
1160 def read_chunked(
1161 self, amt: int | None = None, decode_content: bool | None = None
1162 ) -> typing.Generator[bytes, None, None]:
1163 """
1164 Similar to :meth:`HTTPResponse.read`, but with an additional
1165 parameter: ``decode_content``.
1167 :param amt:
1168 How much of the content to read. If specified, caching is skipped
1169 because it doesn't make sense to cache partial content as the full
1170 response.
1172 :param decode_content:
1173 If True, will attempt to decode the body based on the
1174 'content-encoding' header.
1175 """
1176 self._init_decoder()
1177 # FIXME: Rewrite this method and make it a class with a better structured logic.
1178 if not self.chunked:
1179 raise ResponseNotChunked(
1180 "Response is not chunked. "
1181 "Header 'transfer-encoding: chunked' is missing."
1182 )
1183 if not self.supports_chunked_reads():
1184 raise BodyNotHttplibCompatible(
1185 "Body should be http.client.HTTPResponse like. "
1186 "It should have have an fp attribute which returns raw chunks."
1187 )
1189 with self._error_catcher():
1190 # Don't bother reading the body of a HEAD request.
1191 if self._original_response and is_response_to_head(self._original_response):
1192 self._original_response.close()
1193 return None
1195 # If a response is already read and closed
1196 # then return immediately.
1197 if self._fp.fp is None: # type: ignore[union-attr]
1198 return None
1200 if amt and amt < 0:
1201 # Negative numbers and `None` should be treated the same,
1202 # but httplib handles only `None` correctly.
1203 amt = None
1205 while True:
1206 self._update_chunk_length()
1207 if self.chunk_left == 0:
1208 break
1209 chunk = self._handle_chunk(amt)
1210 decoded = self._decode(
1211 chunk, decode_content=decode_content, flush_decoder=False
1212 )
1213 if decoded:
1214 yield decoded
1216 if decode_content:
1217 # On CPython and PyPy, we should never need to flush the
1218 # decoder. However, on Jython we *might* need to, so
1219 # lets defensively do it anyway.
1220 decoded = self._flush_decoder()
1221 if decoded: # Platform-specific: Jython.
1222 yield decoded
1224 # Chunk content ends with \r\n: discard it.
1225 while self._fp is not None:
1226 line = self._fp.fp.readline()
1227 if not line:
1228 # Some sites may not end with '\r\n'.
1229 break
1230 if line == b"\r\n":
1231 break
1233 # We read everything; close the "file".
1234 if self._original_response:
1235 self._original_response.close()
1237 @property
1238 def url(self) -> str | None:
1239 """
1240 Returns the URL that was the source of this response.
1241 If the request that generated this response redirected, this method
1242 will return the final redirect location.
1243 """
1244 return self._request_url
1246 @url.setter
1247 def url(self, url: str) -> None:
1248 self._request_url = url
1250 def __iter__(self) -> typing.Iterator[bytes]:
1251 buffer: list[bytes] = []
1252 for chunk in self.stream(decode_content=True):
1253 if b"\n" in chunk:
1254 chunks = chunk.split(b"\n")
1255 yield b"".join(buffer) + chunks[0] + b"\n"
1256 for x in chunks[1:-1]:
1257 yield x + b"\n"
1258 if chunks[-1]:
1259 buffer = [chunks[-1]]
1260 else:
1261 buffer = []
1262 else:
1263 buffer.append(chunk)
1264 if buffer:
1265 yield b"".join(buffer)