Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 23%
564 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:32 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:32 +0000
1from __future__ import annotations
3import collections
4import io
5import json as _json
6import logging
7import re
8import sys
9import typing
10import warnings
11import zlib
12from contextlib import contextmanager
13from http.client import HTTPMessage as _HttplibHTTPMessage
14from http.client import HTTPResponse as _HttplibHTTPResponse
15from socket import timeout as SocketTimeout
17try:
18 try:
19 import brotlicffi as brotli # type: ignore[import]
20 except ImportError:
21 import brotli # type: ignore[import]
22except ImportError:
23 brotli = None
25try:
26 import zstandard as zstd # type: ignore[import]
28 # The package 'zstandard' added the 'eof' property starting
29 # in v0.18.0 which we require to ensure a complete and
30 # valid zstd stream was fed into the ZstdDecoder.
31 # See: https://github.com/urllib3/urllib3/pull/2624
32 _zstd_version = _zstd_version = tuple(
33 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]
34 )
35 if _zstd_version < (0, 18): # Defensive:
36 zstd = None
38except (AttributeError, ImportError, ValueError): # Defensive:
39 zstd = None
41from . import util
42from ._base_connection import _TYPE_BODY
43from ._collections import HTTPHeaderDict
44from .connection import BaseSSLError, HTTPConnection, HTTPException
45from .exceptions import (
46 BodyNotHttplibCompatible,
47 DecodeError,
48 HTTPError,
49 IncompleteRead,
50 InvalidChunkLength,
51 InvalidHeader,
52 ProtocolError,
53 ReadTimeoutError,
54 ResponseNotChunked,
55 SSLError,
56)
57from .util.response import is_fp_closed, is_response_to_head
58from .util.retry import Retry
60if typing.TYPE_CHECKING:
61 from typing_extensions import Literal
63 from .connectionpool import HTTPConnectionPool
65log = logging.getLogger(__name__)
68class ContentDecoder:
69 def decompress(self, data: bytes) -> bytes:
70 raise NotImplementedError()
72 def flush(self) -> bytes:
73 raise NotImplementedError()
76class DeflateDecoder(ContentDecoder):
77 def __init__(self) -> None:
78 self._first_try = True
79 self._data = b""
80 self._obj = zlib.decompressobj()
82 def decompress(self, data: bytes) -> bytes:
83 if not data:
84 return data
86 if not self._first_try:
87 return self._obj.decompress(data)
89 self._data += data
90 try:
91 decompressed = self._obj.decompress(data)
92 if decompressed:
93 self._first_try = False
94 self._data = None # type: ignore[assignment]
95 return decompressed
96 except zlib.error:
97 self._first_try = False
98 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
99 try:
100 return self.decompress(self._data)
101 finally:
102 self._data = None # type: ignore[assignment]
104 def flush(self) -> bytes:
105 return self._obj.flush()
108class GzipDecoderState:
109 FIRST_MEMBER = 0
110 OTHER_MEMBERS = 1
111 SWALLOW_DATA = 2
114class GzipDecoder(ContentDecoder):
115 def __init__(self) -> None:
116 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
117 self._state = GzipDecoderState.FIRST_MEMBER
119 def decompress(self, data: bytes) -> bytes:
120 ret = bytearray()
121 if self._state == GzipDecoderState.SWALLOW_DATA or not data:
122 return bytes(ret)
123 while True:
124 try:
125 ret += self._obj.decompress(data)
126 except zlib.error:
127 previous_state = self._state
128 # Ignore data after the first error
129 self._state = GzipDecoderState.SWALLOW_DATA
130 if previous_state == GzipDecoderState.OTHER_MEMBERS:
131 # Allow trailing garbage acceptable in other gzip clients
132 return bytes(ret)
133 raise
134 data = self._obj.unused_data
135 if not data:
136 return bytes(ret)
137 self._state = GzipDecoderState.OTHER_MEMBERS
138 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
140 def flush(self) -> bytes:
141 return self._obj.flush()
144if brotli is not None:
146 class BrotliDecoder(ContentDecoder):
147 # Supports both 'brotlipy' and 'Brotli' packages
148 # since they share an import name. The top branches
149 # are for 'brotlipy' and bottom branches for 'Brotli'
150 def __init__(self) -> None:
151 self._obj = brotli.Decompressor()
152 if hasattr(self._obj, "decompress"):
153 setattr(self, "decompress", self._obj.decompress)
154 else:
155 setattr(self, "decompress", self._obj.process)
157 def flush(self) -> bytes:
158 if hasattr(self._obj, "flush"):
159 return self._obj.flush() # type: ignore[no-any-return]
160 return b""
163if zstd is not None:
165 class ZstdDecoder(ContentDecoder):
166 def __init__(self) -> None:
167 self._obj = zstd.ZstdDecompressor().decompressobj()
169 def decompress(self, data: bytes) -> bytes:
170 if not data:
171 return b""
172 data_parts = [self._obj.decompress(data)]
173 while self._obj.eof and self._obj.unused_data:
174 unused_data = self._obj.unused_data
175 self._obj = zstd.ZstdDecompressor().decompressobj()
176 data_parts.append(self._obj.decompress(unused_data))
177 return b"".join(data_parts)
179 def flush(self) -> bytes:
180 ret = self._obj.flush() # note: this is a no-op
181 if not self._obj.eof:
182 raise DecodeError("Zstandard data is incomplete")
183 return ret # type: ignore[no-any-return]
186class MultiDecoder(ContentDecoder):
187 """
188 From RFC7231:
189 If one or more encodings have been applied to a representation, the
190 sender that applied the encodings MUST generate a Content-Encoding
191 header field that lists the content codings in the order in which
192 they were applied.
193 """
195 def __init__(self, modes: str) -> None:
196 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
198 def flush(self) -> bytes:
199 return self._decoders[0].flush()
201 def decompress(self, data: bytes) -> bytes:
202 for d in reversed(self._decoders):
203 data = d.decompress(data)
204 return data
207def _get_decoder(mode: str) -> ContentDecoder:
208 if "," in mode:
209 return MultiDecoder(mode)
211 if mode == "gzip":
212 return GzipDecoder()
214 if brotli is not None and mode == "br":
215 return BrotliDecoder()
217 if zstd is not None and mode == "zstd":
218 return ZstdDecoder()
220 return DeflateDecoder()
223class BytesQueueBuffer:
224 """Memory-efficient bytes buffer
226 To return decoded data in read() and still follow the BufferedIOBase API, we need a
227 buffer to always return the correct amount of bytes.
229 This buffer should be filled using calls to put()
231 Our maximum memory usage is determined by the sum of the size of:
233 * self.buffer, which contains the full data
234 * the largest chunk that we will copy in get()
236 The worst case scenario is a single chunk, in which case we'll make a full copy of
237 the data inside get().
238 """
240 def __init__(self) -> None:
241 self.buffer: typing.Deque[bytes] = collections.deque()
242 self._size: int = 0
244 def __len__(self) -> int:
245 return self._size
247 def put(self, data: bytes) -> None:
248 self.buffer.append(data)
249 self._size += len(data)
251 def get(self, n: int) -> bytes:
252 if n == 0:
253 return b""
254 elif not self.buffer:
255 raise RuntimeError("buffer is empty")
256 elif n < 0:
257 raise ValueError("n should be > 0")
259 fetched = 0
260 ret = io.BytesIO()
261 while fetched < n:
262 remaining = n - fetched
263 chunk = self.buffer.popleft()
264 chunk_length = len(chunk)
265 if remaining < chunk_length:
266 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
267 ret.write(left_chunk)
268 self.buffer.appendleft(right_chunk)
269 self._size -= remaining
270 break
271 else:
272 ret.write(chunk)
273 self._size -= chunk_length
274 fetched += chunk_length
276 if not self.buffer:
277 break
279 return ret.getvalue()
282class BaseHTTPResponse(io.IOBase):
283 CONTENT_DECODERS = ["gzip", "deflate"]
284 if brotli is not None:
285 CONTENT_DECODERS += ["br"]
286 if zstd is not None:
287 CONTENT_DECODERS += ["zstd"]
288 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
290 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
291 if brotli is not None:
292 DECODER_ERROR_CLASSES += (brotli.error,)
294 if zstd is not None:
295 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
297 def __init__(
298 self,
299 *,
300 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
301 status: int,
302 version: int,
303 reason: str | None,
304 decode_content: bool,
305 request_url: str | None,
306 retries: Retry | None = None,
307 ) -> None:
308 if isinstance(headers, HTTPHeaderDict):
309 self.headers = headers
310 else:
311 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
312 self.status = status
313 self.version = version
314 self.reason = reason
315 self.decode_content = decode_content
316 self._has_decoded_content = False
317 self._request_url: str | None = request_url
318 self.retries = retries
320 self.chunked = False
321 tr_enc = self.headers.get("transfer-encoding", "").lower()
322 # Don't incur the penalty of creating a list and then discarding it
323 encodings = (enc.strip() for enc in tr_enc.split(","))
324 if "chunked" in encodings:
325 self.chunked = True
327 self._decoder: ContentDecoder | None = None
329 def get_redirect_location(self) -> str | None | Literal[False]:
330 """
331 Should we redirect and where to?
333 :returns: Truthy redirect location string if we got a redirect status
334 code and valid location. ``None`` if redirect status and no
335 location. ``False`` if not a redirect status code.
336 """
337 if self.status in self.REDIRECT_STATUSES:
338 return self.headers.get("location")
339 return False
341 @property
342 def data(self) -> bytes:
343 raise NotImplementedError()
345 def json(self) -> typing.Any:
346 """
347 Parses the body of the HTTP response as JSON.
349 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to the decoder.
351 This method can raise either `UnicodeDecodeError` or `json.JSONDecodeError`.
353 Read more :ref:`here <json>`.
354 """
355 data = self.data.decode("utf-8")
356 return _json.loads(data)
358 @property
359 def url(self) -> str | None:
360 raise NotImplementedError()
362 @url.setter
363 def url(self, url: str | None) -> None:
364 raise NotImplementedError()
366 @property
367 def connection(self) -> HTTPConnection | None:
368 raise NotImplementedError()
370 @property
371 def retries(self) -> Retry | None:
372 return self._retries
374 @retries.setter
375 def retries(self, retries: Retry | None) -> None:
376 # Override the request_url if retries has a redirect location.
377 if retries is not None and retries.history:
378 self.url = retries.history[-1].redirect_location
379 self._retries = retries
381 def stream(
382 self, amt: int | None = 2**16, decode_content: bool | None = None
383 ) -> typing.Iterator[bytes]:
384 raise NotImplementedError()
386 def read(
387 self,
388 amt: int | None = None,
389 decode_content: bool | None = None,
390 cache_content: bool = False,
391 ) -> bytes:
392 raise NotImplementedError()
394 def read_chunked(
395 self,
396 amt: int | None = None,
397 decode_content: bool | None = None,
398 ) -> typing.Iterator[bytes]:
399 raise NotImplementedError()
401 def release_conn(self) -> None:
402 raise NotImplementedError()
404 def drain_conn(self) -> None:
405 raise NotImplementedError()
407 def close(self) -> None:
408 raise NotImplementedError()
410 def _init_decoder(self) -> None:
411 """
412 Set-up the _decoder attribute if necessary.
413 """
414 # Note: content-encoding value should be case-insensitive, per RFC 7230
415 # Section 3.2
416 content_encoding = self.headers.get("content-encoding", "").lower()
417 if self._decoder is None:
418 if content_encoding in self.CONTENT_DECODERS:
419 self._decoder = _get_decoder(content_encoding)
420 elif "," in content_encoding:
421 encodings = [
422 e.strip()
423 for e in content_encoding.split(",")
424 if e.strip() in self.CONTENT_DECODERS
425 ]
426 if encodings:
427 self._decoder = _get_decoder(content_encoding)
429 def _decode(
430 self, data: bytes, decode_content: bool | None, flush_decoder: bool
431 ) -> bytes:
432 """
433 Decode the data passed in and potentially flush the decoder.
434 """
435 if not decode_content:
436 if self._has_decoded_content:
437 raise RuntimeError(
438 "Calling read(decode_content=False) is not supported after "
439 "read(decode_content=True) was called."
440 )
441 return data
443 try:
444 if self._decoder:
445 data = self._decoder.decompress(data)
446 self._has_decoded_content = True
447 except self.DECODER_ERROR_CLASSES as e:
448 content_encoding = self.headers.get("content-encoding", "").lower()
449 raise DecodeError(
450 "Received response with content-encoding: %s, but "
451 "failed to decode it." % content_encoding,
452 e,
453 ) from e
454 if flush_decoder:
455 data += self._flush_decoder()
457 return data
459 def _flush_decoder(self) -> bytes:
460 """
461 Flushes the decoder. Should only be called if the decoder is actually
462 being used.
463 """
464 if self._decoder:
465 return self._decoder.decompress(b"") + self._decoder.flush()
466 return b""
468 # Compatibility methods for `io` module
469 def readinto(self, b: bytearray) -> int:
470 temp = self.read(len(b))
471 if len(temp) == 0:
472 return 0
473 else:
474 b[: len(temp)] = temp
475 return len(temp)
477 # Compatibility methods for http.client.HTTPResponse
478 def getheaders(self) -> HTTPHeaderDict:
479 warnings.warn(
480 "HTTPResponse.getheaders() is deprecated and will be removed "
481 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",
482 category=DeprecationWarning,
483 stacklevel=2,
484 )
485 return self.headers
487 def getheader(self, name: str, default: str | None = None) -> str | None:
488 warnings.warn(
489 "HTTPResponse.getheader() is deprecated and will be removed "
490 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",
491 category=DeprecationWarning,
492 stacklevel=2,
493 )
494 return self.headers.get(name, default)
496 # Compatibility method for http.cookiejar
497 def info(self) -> HTTPHeaderDict:
498 return self.headers
500 def geturl(self) -> str | None:
501 return self.url
504class HTTPResponse(BaseHTTPResponse):
505 """
506 HTTP Response container.
508 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
509 loaded and decoded on-demand when the ``data`` property is accessed. This
510 class is also compatible with the Python standard library's :mod:`io`
511 module, and can hence be treated as a readable object in the context of that
512 framework.
514 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
516 :param preload_content:
517 If True, the response's body will be preloaded during construction.
519 :param decode_content:
520 If True, will attempt to decode the body based on the
521 'content-encoding' header.
523 :param original_response:
524 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
525 object, it's convenient to include the original for debug purposes. It's
526 otherwise unused.
528 :param retries:
529 The retries contains the last :class:`~urllib3.util.retry.Retry` that
530 was used during the request.
532 :param enforce_content_length:
533 Enforce content length checking. Body returned by server must match
534 value of Content-Length header, if present. Otherwise, raise error.
535 """
537 def __init__(
538 self,
539 body: _TYPE_BODY = "",
540 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
541 status: int = 0,
542 version: int = 0,
543 reason: str | None = None,
544 preload_content: bool = True,
545 decode_content: bool = True,
546 original_response: _HttplibHTTPResponse | None = None,
547 pool: HTTPConnectionPool | None = None,
548 connection: HTTPConnection | None = None,
549 msg: _HttplibHTTPMessage | None = None,
550 retries: Retry | None = None,
551 enforce_content_length: bool = True,
552 request_method: str | None = None,
553 request_url: str | None = None,
554 auto_close: bool = True,
555 ) -> None:
556 super().__init__(
557 headers=headers,
558 status=status,
559 version=version,
560 reason=reason,
561 decode_content=decode_content,
562 request_url=request_url,
563 retries=retries,
564 )
566 self.enforce_content_length = enforce_content_length
567 self.auto_close = auto_close
569 self._body = None
570 self._fp: _HttplibHTTPResponse | None = None
571 self._original_response = original_response
572 self._fp_bytes_read = 0
573 self.msg = msg
575 if body and isinstance(body, (str, bytes)):
576 self._body = body
578 self._pool = pool
579 self._connection = connection
581 if hasattr(body, "read"):
582 self._fp = body # type: ignore[assignment]
584 # Are we using the chunked-style of transfer encoding?
585 self.chunk_left: int | None = None
587 # Determine length of response
588 self.length_remaining = self._init_length(request_method)
590 # Used to return the correct amount of bytes for partial read()s
591 self._decoded_buffer = BytesQueueBuffer()
593 # If requested, preload the body.
594 if preload_content and not self._body:
595 self._body = self.read(decode_content=decode_content)
597 def release_conn(self) -> None:
598 if not self._pool or not self._connection:
599 return None
601 self._pool._put_conn(self._connection)
602 self._connection = None
604 def drain_conn(self) -> None:
605 """
606 Read and discard any remaining HTTP response data in the response connection.
608 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
609 """
610 try:
611 self.read()
612 except (HTTPError, OSError, BaseSSLError, HTTPException):
613 pass
615 @property
616 def data(self) -> bytes:
617 # For backwards-compat with earlier urllib3 0.4 and earlier.
618 if self._body:
619 return self._body # type: ignore[return-value]
621 if self._fp:
622 return self.read(cache_content=True)
624 return None # type: ignore[return-value]
626 @property
627 def connection(self) -> HTTPConnection | None:
628 return self._connection
630 def isclosed(self) -> bool:
631 return is_fp_closed(self._fp)
633 def tell(self) -> int:
634 """
635 Obtain the number of bytes pulled over the wire so far. May differ from
636 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
637 if bytes are encoded on the wire (e.g, compressed).
638 """
639 return self._fp_bytes_read
641 def _init_length(self, request_method: str | None) -> int | None:
642 """
643 Set initial length value for Response content if available.
644 """
645 length: int | None
646 content_length: str | None = self.headers.get("content-length")
648 if content_length is not None:
649 if self.chunked:
650 # This Response will fail with an IncompleteRead if it can't be
651 # received as chunked. This method falls back to attempt reading
652 # the response before raising an exception.
653 log.warning(
654 "Received response with both Content-Length and "
655 "Transfer-Encoding set. This is expressly forbidden "
656 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
657 "attempting to process response as Transfer-Encoding: "
658 "chunked."
659 )
660 return None
662 try:
663 # RFC 7230 section 3.3.2 specifies multiple content lengths can
664 # be sent in a single Content-Length header
665 # (e.g. Content-Length: 42, 42). This line ensures the values
666 # are all valid ints and that as long as the `set` length is 1,
667 # all values are the same. Otherwise, the header is invalid.
668 lengths = {int(val) for val in content_length.split(",")}
669 if len(lengths) > 1:
670 raise InvalidHeader(
671 "Content-Length contained multiple "
672 "unmatching values (%s)" % content_length
673 )
674 length = lengths.pop()
675 except ValueError:
676 length = None
677 else:
678 if length < 0:
679 length = None
681 else: # if content_length is None
682 length = None
684 # Convert status to int for comparison
685 # In some cases, httplib returns a status of "_UNKNOWN"
686 try:
687 status = int(self.status)
688 except ValueError:
689 status = 0
691 # Check for responses that shouldn't include a body
692 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
693 length = 0
695 return length
697 @contextmanager
698 def _error_catcher(self) -> typing.Generator[None, None, None]:
699 """
700 Catch low-level python exceptions, instead re-raising urllib3
701 variants, so that low-level exceptions are not leaked in the
702 high-level api.
704 On exit, release the connection back to the pool.
705 """
706 clean_exit = False
708 try:
709 try:
710 yield
712 except SocketTimeout as e:
713 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
714 # there is yet no clean way to get at it from this context.
715 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
717 except BaseSSLError as e:
718 # FIXME: Is there a better way to differentiate between SSLErrors?
719 if "read operation timed out" not in str(e):
720 # SSL errors related to framing/MAC get wrapped and reraised here
721 raise SSLError(e) from e
723 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
725 except (HTTPException, OSError) as e:
726 # This includes IncompleteRead.
727 raise ProtocolError(f"Connection broken: {e!r}", e) from e
729 # If no exception is thrown, we should avoid cleaning up
730 # unnecessarily.
731 clean_exit = True
732 finally:
733 # If we didn't terminate cleanly, we need to throw away our
734 # connection.
735 if not clean_exit:
736 # The response may not be closed but we're not going to use it
737 # anymore so close it now to ensure that the connection is
738 # released back to the pool.
739 if self._original_response:
740 self._original_response.close()
742 # Closing the response may not actually be sufficient to close
743 # everything, so if we have a hold of the connection close that
744 # too.
745 if self._connection:
746 self._connection.close()
748 # If we hold the original response but it's closed now, we should
749 # return the connection back to the pool.
750 if self._original_response and self._original_response.isclosed():
751 self.release_conn()
753 def _fp_read(self, amt: int | None = None) -> bytes:
754 """
755 Read a response with the thought that reading the number of bytes
756 larger than can fit in a 32-bit int at a time via SSL in some
757 known cases leads to an overflow error that has to be prevented
758 if `amt` or `self.length_remaining` indicate that a problem may
759 happen.
761 The known cases:
762 * 3.8 <= CPython < 3.9.7 because of a bug
763 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.
764 * urllib3 injected with pyOpenSSL-backed SSL-support.
765 * CPython < 3.10 only when `amt` does not fit 32-bit int.
766 """
767 assert self._fp
768 c_int_max = 2**31 - 1
769 if (
770 (
771 (amt and amt > c_int_max)
772 or (self.length_remaining and self.length_remaining > c_int_max)
773 )
774 and not util.IS_SECURETRANSPORT
775 and (util.IS_PYOPENSSL or sys.version_info < (3, 10))
776 ):
777 buffer = io.BytesIO()
778 # Besides `max_chunk_amt` being a maximum chunk size, it
779 # affects memory overhead of reading a response by this
780 # method in CPython.
781 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
782 # chunk size that does not lead to an overflow error, but
783 # 256 MiB is a compromise.
784 max_chunk_amt = 2**28
785 while amt is None or amt != 0:
786 if amt is not None:
787 chunk_amt = min(amt, max_chunk_amt)
788 amt -= chunk_amt
789 else:
790 chunk_amt = max_chunk_amt
791 data = self._fp.read(chunk_amt)
792 if not data:
793 break
794 buffer.write(data)
795 del data # to reduce peak memory usage by `max_chunk_amt`.
796 return buffer.getvalue()
797 else:
798 # StringIO doesn't like amt=None
799 return self._fp.read(amt) if amt is not None else self._fp.read()
801 def _raw_read(
802 self,
803 amt: int | None = None,
804 ) -> bytes:
805 """
806 Reads `amt` of bytes from the socket.
807 """
808 if self._fp is None:
809 return None # type: ignore[return-value]
811 fp_closed = getattr(self._fp, "closed", False)
813 with self._error_catcher():
814 data = self._fp_read(amt) if not fp_closed else b""
815 if amt is not None and amt != 0 and not data:
816 # Platform-specific: Buggy versions of Python.
817 # Close the connection when no data is returned
818 #
819 # This is redundant to what httplib/http.client _should_
820 # already do. However, versions of python released before
821 # December 15, 2012 (http://bugs.python.org/issue16298) do
822 # not properly close the connection in all cases. There is
823 # no harm in redundantly calling close.
824 self._fp.close()
825 if (
826 self.enforce_content_length
827 and self.length_remaining is not None
828 and self.length_remaining != 0
829 ):
830 # This is an edge case that httplib failed to cover due
831 # to concerns of backward compatibility. We're
832 # addressing it here to make sure IncompleteRead is
833 # raised during streaming, so all calls with incorrect
834 # Content-Length are caught.
835 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
837 if data:
838 self._fp_bytes_read += len(data)
839 if self.length_remaining is not None:
840 self.length_remaining -= len(data)
841 return data
843 def read(
844 self,
845 amt: int | None = None,
846 decode_content: bool | None = None,
847 cache_content: bool = False,
848 ) -> bytes:
849 """
850 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
851 parameters: ``decode_content`` and ``cache_content``.
853 :param amt:
854 How much of the content to read. If specified, caching is skipped
855 because it doesn't make sense to cache partial content as the full
856 response.
858 :param decode_content:
859 If True, will attempt to decode the body based on the
860 'content-encoding' header.
862 :param cache_content:
863 If True, will save the returned data such that the same result is
864 returned despite of the state of the underlying file object. This
865 is useful if you want the ``.data`` property to continue working
866 after having ``.read()`` the file object. (Overridden if ``amt`` is
867 set.)
868 """
869 self._init_decoder()
870 if decode_content is None:
871 decode_content = self.decode_content
873 if amt is not None:
874 cache_content = False
876 if len(self._decoded_buffer) >= amt:
877 return self._decoded_buffer.get(amt)
879 data = self._raw_read(amt)
881 flush_decoder = False
882 if amt is None:
883 flush_decoder = True
884 elif amt != 0 and not data:
885 flush_decoder = True
887 if not data and len(self._decoded_buffer) == 0:
888 return data
890 if amt is None:
891 data = self._decode(data, decode_content, flush_decoder)
892 if cache_content:
893 self._body = data
894 else:
895 # do not waste memory on buffer when not decoding
896 if not decode_content:
897 if self._has_decoded_content:
898 raise RuntimeError(
899 "Calling read(decode_content=False) is not supported after "
900 "read(decode_content=True) was called."
901 )
902 return data
904 decoded_data = self._decode(data, decode_content, flush_decoder)
905 self._decoded_buffer.put(decoded_data)
907 while len(self._decoded_buffer) < amt and data:
908 # TODO make sure to initially read enough data to get past the headers
909 # For example, the GZ file header takes 10 bytes, we don't want to read
910 # it one byte at a time
911 data = self._raw_read(amt)
912 decoded_data = self._decode(data, decode_content, flush_decoder)
913 self._decoded_buffer.put(decoded_data)
914 data = self._decoded_buffer.get(amt)
916 return data
918 def stream(
919 self, amt: int | None = 2**16, decode_content: bool | None = None
920 ) -> typing.Generator[bytes, None, None]:
921 """
922 A generator wrapper for the read() method. A call will block until
923 ``amt`` bytes have been read from the connection or until the
924 connection is closed.
926 :param amt:
927 How much of the content to read. The generator will return up to
928 much data per iteration, but may return less. This is particularly
929 likely when using compressed data. However, the empty string will
930 never be returned.
932 :param decode_content:
933 If True, will attempt to decode the body based on the
934 'content-encoding' header.
935 """
936 if self.chunked and self.supports_chunked_reads():
937 yield from self.read_chunked(amt, decode_content=decode_content)
938 else:
939 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
940 data = self.read(amt=amt, decode_content=decode_content)
942 if data:
943 yield data
945 # Overrides from io.IOBase
946 def readable(self) -> bool:
947 return True
949 def close(self) -> None:
950 if not self.closed and self._fp:
951 self._fp.close()
953 if self._connection:
954 self._connection.close()
956 if not self.auto_close:
957 io.IOBase.close(self)
959 @property
960 def closed(self) -> bool:
961 if not self.auto_close:
962 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
963 elif self._fp is None:
964 return True
965 elif hasattr(self._fp, "isclosed"):
966 return self._fp.isclosed()
967 elif hasattr(self._fp, "closed"):
968 return self._fp.closed
969 else:
970 return True
972 def fileno(self) -> int:
973 if self._fp is None:
974 raise OSError("HTTPResponse has no file to get a fileno from")
975 elif hasattr(self._fp, "fileno"):
976 return self._fp.fileno()
977 else:
978 raise OSError(
979 "The file-like object this HTTPResponse is wrapped "
980 "around has no file descriptor"
981 )
983 def flush(self) -> None:
984 if (
985 self._fp is not None
986 and hasattr(self._fp, "flush")
987 and not getattr(self._fp, "closed", False)
988 ):
989 return self._fp.flush()
991 def supports_chunked_reads(self) -> bool:
992 """
993 Checks if the underlying file-like object looks like a
994 :class:`http.client.HTTPResponse` object. We do this by testing for
995 the fp attribute. If it is present we assume it returns raw chunks as
996 processed by read_chunked().
997 """
998 return hasattr(self._fp, "fp")
1000 def _update_chunk_length(self) -> None:
1001 # First, we'll figure out length of a chunk and then
1002 # we'll try to read it from socket.
1003 if self.chunk_left is not None:
1004 return None
1005 line = self._fp.fp.readline() # type: ignore[union-attr]
1006 line = line.split(b";", 1)[0]
1007 try:
1008 self.chunk_left = int(line, 16)
1009 except ValueError:
1010 # Invalid chunked protocol response, abort.
1011 self.close()
1012 raise InvalidChunkLength(self, line) from None
1014 def _handle_chunk(self, amt: int | None) -> bytes:
1015 returned_chunk = None
1016 if amt is None:
1017 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1018 returned_chunk = chunk
1019 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1020 self.chunk_left = None
1021 elif self.chunk_left is not None and amt < self.chunk_left:
1022 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1023 self.chunk_left = self.chunk_left - amt
1024 returned_chunk = value
1025 elif amt == self.chunk_left:
1026 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1027 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1028 self.chunk_left = None
1029 returned_chunk = value
1030 else: # amt > self.chunk_left
1031 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1032 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1033 self.chunk_left = None
1034 return returned_chunk # type: ignore[no-any-return]
1036 def read_chunked(
1037 self, amt: int | None = None, decode_content: bool | None = None
1038 ) -> typing.Generator[bytes, None, None]:
1039 """
1040 Similar to :meth:`HTTPResponse.read`, but with an additional
1041 parameter: ``decode_content``.
1043 :param amt:
1044 How much of the content to read. If specified, caching is skipped
1045 because it doesn't make sense to cache partial content as the full
1046 response.
1048 :param decode_content:
1049 If True, will attempt to decode the body based on the
1050 'content-encoding' header.
1051 """
1052 self._init_decoder()
1053 # FIXME: Rewrite this method and make it a class with a better structured logic.
1054 if not self.chunked:
1055 raise ResponseNotChunked(
1056 "Response is not chunked. "
1057 "Header 'transfer-encoding: chunked' is missing."
1058 )
1059 if not self.supports_chunked_reads():
1060 raise BodyNotHttplibCompatible(
1061 "Body should be http.client.HTTPResponse like. "
1062 "It should have have an fp attribute which returns raw chunks."
1063 )
1065 with self._error_catcher():
1066 # Don't bother reading the body of a HEAD request.
1067 if self._original_response and is_response_to_head(self._original_response):
1068 self._original_response.close()
1069 return None
1071 # If a response is already read and closed
1072 # then return immediately.
1073 if self._fp.fp is None: # type: ignore[union-attr]
1074 return None
1076 while True:
1077 self._update_chunk_length()
1078 if self.chunk_left == 0:
1079 break
1080 chunk = self._handle_chunk(amt)
1081 decoded = self._decode(
1082 chunk, decode_content=decode_content, flush_decoder=False
1083 )
1084 if decoded:
1085 yield decoded
1087 if decode_content:
1088 # On CPython and PyPy, we should never need to flush the
1089 # decoder. However, on Jython we *might* need to, so
1090 # lets defensively do it anyway.
1091 decoded = self._flush_decoder()
1092 if decoded: # Platform-specific: Jython.
1093 yield decoded
1095 # Chunk content ends with \r\n: discard it.
1096 while self._fp is not None:
1097 line = self._fp.fp.readline()
1098 if not line:
1099 # Some sites may not end with '\r\n'.
1100 break
1101 if line == b"\r\n":
1102 break
1104 # We read everything; close the "file".
1105 if self._original_response:
1106 self._original_response.close()
1108 @property
1109 def url(self) -> str | None:
1110 """
1111 Returns the URL that was the source of this response.
1112 If the request that generated this response redirected, this method
1113 will return the final redirect location.
1114 """
1115 return self._request_url
1117 @url.setter
1118 def url(self, url: str) -> None:
1119 self._request_url = url
1121 def __iter__(self) -> typing.Iterator[bytes]:
1122 buffer: list[bytes] = []
1123 for chunk in self.stream(decode_content=True):
1124 if b"\n" in chunk:
1125 chunks = chunk.split(b"\n")
1126 yield b"".join(buffer) + chunks[0] + b"\n"
1127 for x in chunks[1:-1]:
1128 yield x + b"\n"
1129 if chunks[-1]:
1130 buffer = [chunks[-1]]
1131 else:
1132 buffer = []
1133 else:
1134 buffer.append(chunk)
1135 if buffer:
1136 yield b"".join(buffer)