Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 23%
560 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:05 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:05 +0000
1from __future__ import annotations
3import collections
4import io
5import json as _json
6import logging
7import re
8import sys
9import typing
10import warnings
11import zlib
12from contextlib import contextmanager
13from http.client import HTTPMessage as _HttplibHTTPMessage
14from http.client import HTTPResponse as _HttplibHTTPResponse
15from socket import timeout as SocketTimeout
17try:
18 try:
19 import brotlicffi as brotli # type: ignore[import]
20 except ImportError:
21 import brotli # type: ignore[import]
22except ImportError:
23 brotli = None
25try:
26 import zstandard as zstd # type: ignore[import]
28 # The package 'zstandard' added the 'eof' property starting
29 # in v0.18.0 which we require to ensure a complete and
30 # valid zstd stream was fed into the ZstdDecoder.
31 # See: https://github.com/urllib3/urllib3/pull/2624
32 _zstd_version = _zstd_version = tuple(
33 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]
34 )
35 if _zstd_version < (0, 18): # Defensive:
36 zstd = None
38except (AttributeError, ImportError, ValueError): # Defensive:
39 zstd = None
41from . import util
42from ._base_connection import _TYPE_BODY
43from ._collections import HTTPHeaderDict
44from .connection import BaseSSLError, HTTPConnection, HTTPException
45from .exceptions import (
46 BodyNotHttplibCompatible,
47 DecodeError,
48 HTTPError,
49 IncompleteRead,
50 InvalidChunkLength,
51 InvalidHeader,
52 ProtocolError,
53 ReadTimeoutError,
54 ResponseNotChunked,
55 SSLError,
56)
57from .util.response import is_fp_closed, is_response_to_head
58from .util.retry import Retry
60if typing.TYPE_CHECKING:
61 from typing import Literal
63 from .connectionpool import HTTPConnectionPool
65log = logging.getLogger(__name__)
68class ContentDecoder:
69 def decompress(self, data: bytes) -> bytes:
70 raise NotImplementedError()
72 def flush(self) -> bytes:
73 raise NotImplementedError()
76class DeflateDecoder(ContentDecoder):
77 def __init__(self) -> None:
78 self._first_try = True
79 self._data = b""
80 self._obj = zlib.decompressobj()
82 def decompress(self, data: bytes) -> bytes:
83 if not data:
84 return data
86 if not self._first_try:
87 return self._obj.decompress(data)
89 self._data += data
90 try:
91 decompressed = self._obj.decompress(data)
92 if decompressed:
93 self._first_try = False
94 self._data = None # type: ignore[assignment]
95 return decompressed
96 except zlib.error:
97 self._first_try = False
98 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
99 try:
100 return self.decompress(self._data)
101 finally:
102 self._data = None # type: ignore[assignment]
104 def flush(self) -> bytes:
105 return self._obj.flush()
108class GzipDecoderState:
109 FIRST_MEMBER = 0
110 OTHER_MEMBERS = 1
111 SWALLOW_DATA = 2
114class GzipDecoder(ContentDecoder):
115 def __init__(self) -> None:
116 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
117 self._state = GzipDecoderState.FIRST_MEMBER
119 def decompress(self, data: bytes) -> bytes:
120 ret = bytearray()
121 if self._state == GzipDecoderState.SWALLOW_DATA or not data:
122 return bytes(ret)
123 while True:
124 try:
125 ret += self._obj.decompress(data)
126 except zlib.error:
127 previous_state = self._state
128 # Ignore data after the first error
129 self._state = GzipDecoderState.SWALLOW_DATA
130 if previous_state == GzipDecoderState.OTHER_MEMBERS:
131 # Allow trailing garbage acceptable in other gzip clients
132 return bytes(ret)
133 raise
134 data = self._obj.unused_data
135 if not data:
136 return bytes(ret)
137 self._state = GzipDecoderState.OTHER_MEMBERS
138 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
140 def flush(self) -> bytes:
141 return self._obj.flush()
144if brotli is not None:
146 class BrotliDecoder(ContentDecoder):
147 # Supports both 'brotlipy' and 'Brotli' packages
148 # since they share an import name. The top branches
149 # are for 'brotlipy' and bottom branches for 'Brotli'
150 def __init__(self) -> None:
151 self._obj = brotli.Decompressor()
152 if hasattr(self._obj, "decompress"):
153 setattr(self, "decompress", self._obj.decompress)
154 else:
155 setattr(self, "decompress", self._obj.process)
157 def flush(self) -> bytes:
158 if hasattr(self._obj, "flush"):
159 return self._obj.flush() # type: ignore[no-any-return]
160 return b""
163if zstd is not None:
165 class ZstdDecoder(ContentDecoder):
166 def __init__(self) -> None:
167 self._obj = zstd.ZstdDecompressor().decompressobj()
169 def decompress(self, data: bytes) -> bytes:
170 if not data:
171 return b""
172 data_parts = [self._obj.decompress(data)]
173 while self._obj.eof and self._obj.unused_data:
174 unused_data = self._obj.unused_data
175 self._obj = zstd.ZstdDecompressor().decompressobj()
176 data_parts.append(self._obj.decompress(unused_data))
177 return b"".join(data_parts)
179 def flush(self) -> bytes:
180 ret = self._obj.flush() # note: this is a no-op
181 if not self._obj.eof:
182 raise DecodeError("Zstandard data is incomplete")
183 return ret # type: ignore[no-any-return]
186class MultiDecoder(ContentDecoder):
187 """
188 From RFC7231:
189 If one or more encodings have been applied to a representation, the
190 sender that applied the encodings MUST generate a Content-Encoding
191 header field that lists the content codings in the order in which
192 they were applied.
193 """
195 def __init__(self, modes: str) -> None:
196 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
198 def flush(self) -> bytes:
199 return self._decoders[0].flush()
201 def decompress(self, data: bytes) -> bytes:
202 for d in reversed(self._decoders):
203 data = d.decompress(data)
204 return data
207def _get_decoder(mode: str) -> ContentDecoder:
208 if "," in mode:
209 return MultiDecoder(mode)
211 # According to RFC 9110 section 8.4.1.3, recipients should
212 # consider x-gzip equivalent to gzip
213 if mode in ("gzip", "x-gzip"):
214 return GzipDecoder()
216 if brotli is not None and mode == "br":
217 return BrotliDecoder()
219 if zstd is not None and mode == "zstd":
220 return ZstdDecoder()
222 return DeflateDecoder()
225class BytesQueueBuffer:
226 """Memory-efficient bytes buffer
228 To return decoded data in read() and still follow the BufferedIOBase API, we need a
229 buffer to always return the correct amount of bytes.
231 This buffer should be filled using calls to put()
233 Our maximum memory usage is determined by the sum of the size of:
235 * self.buffer, which contains the full data
236 * the largest chunk that we will copy in get()
238 The worst case scenario is a single chunk, in which case we'll make a full copy of
239 the data inside get().
240 """
242 def __init__(self) -> None:
243 self.buffer: typing.Deque[bytes] = collections.deque()
244 self._size: int = 0
246 def __len__(self) -> int:
247 return self._size
249 def put(self, data: bytes) -> None:
250 self.buffer.append(data)
251 self._size += len(data)
253 def get(self, n: int) -> bytes:
254 if n == 0:
255 return b""
256 elif not self.buffer:
257 raise RuntimeError("buffer is empty")
258 elif n < 0:
259 raise ValueError("n should be > 0")
261 fetched = 0
262 ret = io.BytesIO()
263 while fetched < n:
264 remaining = n - fetched
265 chunk = self.buffer.popleft()
266 chunk_length = len(chunk)
267 if remaining < chunk_length:
268 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
269 ret.write(left_chunk)
270 self.buffer.appendleft(right_chunk)
271 self._size -= remaining
272 break
273 else:
274 ret.write(chunk)
275 self._size -= chunk_length
276 fetched += chunk_length
278 if not self.buffer:
279 break
281 return ret.getvalue()
284class BaseHTTPResponse(io.IOBase):
285 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]
286 if brotli is not None:
287 CONTENT_DECODERS += ["br"]
288 if zstd is not None:
289 CONTENT_DECODERS += ["zstd"]
290 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
292 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
293 if brotli is not None:
294 DECODER_ERROR_CLASSES += (brotli.error,)
296 if zstd is not None:
297 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
299 def __init__(
300 self,
301 *,
302 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
303 status: int,
304 version: int,
305 reason: str | None,
306 decode_content: bool,
307 request_url: str | None,
308 retries: Retry | None = None,
309 ) -> None:
310 if isinstance(headers, HTTPHeaderDict):
311 self.headers = headers
312 else:
313 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
314 self.status = status
315 self.version = version
316 self.reason = reason
317 self.decode_content = decode_content
318 self._has_decoded_content = False
319 self._request_url: str | None = request_url
320 self.retries = retries
322 self.chunked = False
323 tr_enc = self.headers.get("transfer-encoding", "").lower()
324 # Don't incur the penalty of creating a list and then discarding it
325 encodings = (enc.strip() for enc in tr_enc.split(","))
326 if "chunked" in encodings:
327 self.chunked = True
329 self._decoder: ContentDecoder | None = None
331 def get_redirect_location(self) -> str | None | Literal[False]:
332 """
333 Should we redirect and where to?
335 :returns: Truthy redirect location string if we got a redirect status
336 code and valid location. ``None`` if redirect status and no
337 location. ``False`` if not a redirect status code.
338 """
339 if self.status in self.REDIRECT_STATUSES:
340 return self.headers.get("location")
341 return False
343 @property
344 def data(self) -> bytes:
345 raise NotImplementedError()
347 def json(self) -> typing.Any:
348 """
349 Parses the body of the HTTP response as JSON.
351 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to the decoder.
353 This method can raise either `UnicodeDecodeError` or `json.JSONDecodeError`.
355 Read more :ref:`here <json>`.
356 """
357 data = self.data.decode("utf-8")
358 return _json.loads(data)
360 @property
361 def url(self) -> str | None:
362 raise NotImplementedError()
364 @url.setter
365 def url(self, url: str | None) -> None:
366 raise NotImplementedError()
368 @property
369 def connection(self) -> HTTPConnection | None:
370 raise NotImplementedError()
372 @property
373 def retries(self) -> Retry | None:
374 return self._retries
376 @retries.setter
377 def retries(self, retries: Retry | None) -> None:
378 # Override the request_url if retries has a redirect location.
379 if retries is not None and retries.history:
380 self.url = retries.history[-1].redirect_location
381 self._retries = retries
383 def stream(
384 self, amt: int | None = 2**16, decode_content: bool | None = None
385 ) -> typing.Iterator[bytes]:
386 raise NotImplementedError()
388 def read(
389 self,
390 amt: int | None = None,
391 decode_content: bool | None = None,
392 cache_content: bool = False,
393 ) -> bytes:
394 raise NotImplementedError()
396 def read_chunked(
397 self,
398 amt: int | None = None,
399 decode_content: bool | None = None,
400 ) -> typing.Iterator[bytes]:
401 raise NotImplementedError()
403 def release_conn(self) -> None:
404 raise NotImplementedError()
406 def drain_conn(self) -> None:
407 raise NotImplementedError()
409 def close(self) -> None:
410 raise NotImplementedError()
412 def _init_decoder(self) -> None:
413 """
414 Set-up the _decoder attribute if necessary.
415 """
416 # Note: content-encoding value should be case-insensitive, per RFC 7230
417 # Section 3.2
418 content_encoding = self.headers.get("content-encoding", "").lower()
419 if self._decoder is None:
420 if content_encoding in self.CONTENT_DECODERS:
421 self._decoder = _get_decoder(content_encoding)
422 elif "," in content_encoding:
423 encodings = [
424 e.strip()
425 for e in content_encoding.split(",")
426 if e.strip() in self.CONTENT_DECODERS
427 ]
428 if encodings:
429 self._decoder = _get_decoder(content_encoding)
431 def _decode(
432 self, data: bytes, decode_content: bool | None, flush_decoder: bool
433 ) -> bytes:
434 """
435 Decode the data passed in and potentially flush the decoder.
436 """
437 if not decode_content:
438 if self._has_decoded_content:
439 raise RuntimeError(
440 "Calling read(decode_content=False) is not supported after "
441 "read(decode_content=True) was called."
442 )
443 return data
445 try:
446 if self._decoder:
447 data = self._decoder.decompress(data)
448 self._has_decoded_content = True
449 except self.DECODER_ERROR_CLASSES as e:
450 content_encoding = self.headers.get("content-encoding", "").lower()
451 raise DecodeError(
452 "Received response with content-encoding: %s, but "
453 "failed to decode it." % content_encoding,
454 e,
455 ) from e
456 if flush_decoder:
457 data += self._flush_decoder()
459 return data
461 def _flush_decoder(self) -> bytes:
462 """
463 Flushes the decoder. Should only be called if the decoder is actually
464 being used.
465 """
466 if self._decoder:
467 return self._decoder.decompress(b"") + self._decoder.flush()
468 return b""
470 # Compatibility methods for `io` module
471 def readinto(self, b: bytearray) -> int:
472 temp = self.read(len(b))
473 if len(temp) == 0:
474 return 0
475 else:
476 b[: len(temp)] = temp
477 return len(temp)
479 # Compatibility methods for http.client.HTTPResponse
480 def getheaders(self) -> HTTPHeaderDict:
481 warnings.warn(
482 "HTTPResponse.getheaders() is deprecated and will be removed "
483 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",
484 category=DeprecationWarning,
485 stacklevel=2,
486 )
487 return self.headers
489 def getheader(self, name: str, default: str | None = None) -> str | None:
490 warnings.warn(
491 "HTTPResponse.getheader() is deprecated and will be removed "
492 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",
493 category=DeprecationWarning,
494 stacklevel=2,
495 )
496 return self.headers.get(name, default)
498 # Compatibility method for http.cookiejar
499 def info(self) -> HTTPHeaderDict:
500 return self.headers
502 def geturl(self) -> str | None:
503 return self.url
506class HTTPResponse(BaseHTTPResponse):
507 """
508 HTTP Response container.
510 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
511 loaded and decoded on-demand when the ``data`` property is accessed. This
512 class is also compatible with the Python standard library's :mod:`io`
513 module, and can hence be treated as a readable object in the context of that
514 framework.
516 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
518 :param preload_content:
519 If True, the response's body will be preloaded during construction.
521 :param decode_content:
522 If True, will attempt to decode the body based on the
523 'content-encoding' header.
525 :param original_response:
526 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
527 object, it's convenient to include the original for debug purposes. It's
528 otherwise unused.
530 :param retries:
531 The retries contains the last :class:`~urllib3.util.retry.Retry` that
532 was used during the request.
534 :param enforce_content_length:
535 Enforce content length checking. Body returned by server must match
536 value of Content-Length header, if present. Otherwise, raise error.
537 """
539 def __init__(
540 self,
541 body: _TYPE_BODY = "",
542 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
543 status: int = 0,
544 version: int = 0,
545 reason: str | None = None,
546 preload_content: bool = True,
547 decode_content: bool = True,
548 original_response: _HttplibHTTPResponse | None = None,
549 pool: HTTPConnectionPool | None = None,
550 connection: HTTPConnection | None = None,
551 msg: _HttplibHTTPMessage | None = None,
552 retries: Retry | None = None,
553 enforce_content_length: bool = True,
554 request_method: str | None = None,
555 request_url: str | None = None,
556 auto_close: bool = True,
557 ) -> None:
558 super().__init__(
559 headers=headers,
560 status=status,
561 version=version,
562 reason=reason,
563 decode_content=decode_content,
564 request_url=request_url,
565 retries=retries,
566 )
568 self.enforce_content_length = enforce_content_length
569 self.auto_close = auto_close
571 self._body = None
572 self._fp: _HttplibHTTPResponse | None = None
573 self._original_response = original_response
574 self._fp_bytes_read = 0
575 self.msg = msg
577 if body and isinstance(body, (str, bytes)):
578 self._body = body
580 self._pool = pool
581 self._connection = connection
583 if hasattr(body, "read"):
584 self._fp = body # type: ignore[assignment]
586 # Are we using the chunked-style of transfer encoding?
587 self.chunk_left: int | None = None
589 # Determine length of response
590 self.length_remaining = self._init_length(request_method)
592 # Used to return the correct amount of bytes for partial read()s
593 self._decoded_buffer = BytesQueueBuffer()
595 # If requested, preload the body.
596 if preload_content and not self._body:
597 self._body = self.read(decode_content=decode_content)
599 def release_conn(self) -> None:
600 if not self._pool or not self._connection:
601 return None
603 self._pool._put_conn(self._connection)
604 self._connection = None
606 def drain_conn(self) -> None:
607 """
608 Read and discard any remaining HTTP response data in the response connection.
610 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
611 """
612 try:
613 self.read()
614 except (HTTPError, OSError, BaseSSLError, HTTPException):
615 pass
617 @property
618 def data(self) -> bytes:
619 # For backwards-compat with earlier urllib3 0.4 and earlier.
620 if self._body:
621 return self._body # type: ignore[return-value]
623 if self._fp:
624 return self.read(cache_content=True)
626 return None # type: ignore[return-value]
628 @property
629 def connection(self) -> HTTPConnection | None:
630 return self._connection
632 def isclosed(self) -> bool:
633 return is_fp_closed(self._fp)
635 def tell(self) -> int:
636 """
637 Obtain the number of bytes pulled over the wire so far. May differ from
638 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
639 if bytes are encoded on the wire (e.g, compressed).
640 """
641 return self._fp_bytes_read
643 def _init_length(self, request_method: str | None) -> int | None:
644 """
645 Set initial length value for Response content if available.
646 """
647 length: int | None
648 content_length: str | None = self.headers.get("content-length")
650 if content_length is not None:
651 if self.chunked:
652 # This Response will fail with an IncompleteRead if it can't be
653 # received as chunked. This method falls back to attempt reading
654 # the response before raising an exception.
655 log.warning(
656 "Received response with both Content-Length and "
657 "Transfer-Encoding set. This is expressly forbidden "
658 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
659 "attempting to process response as Transfer-Encoding: "
660 "chunked."
661 )
662 return None
664 try:
665 # RFC 7230 section 3.3.2 specifies multiple content lengths can
666 # be sent in a single Content-Length header
667 # (e.g. Content-Length: 42, 42). This line ensures the values
668 # are all valid ints and that as long as the `set` length is 1,
669 # all values are the same. Otherwise, the header is invalid.
670 lengths = {int(val) for val in content_length.split(",")}
671 if len(lengths) > 1:
672 raise InvalidHeader(
673 "Content-Length contained multiple "
674 "unmatching values (%s)" % content_length
675 )
676 length = lengths.pop()
677 except ValueError:
678 length = None
679 else:
680 if length < 0:
681 length = None
683 else: # if content_length is None
684 length = None
686 # Convert status to int for comparison
687 # In some cases, httplib returns a status of "_UNKNOWN"
688 try:
689 status = int(self.status)
690 except ValueError:
691 status = 0
693 # Check for responses that shouldn't include a body
694 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
695 length = 0
697 return length
699 @contextmanager
700 def _error_catcher(self) -> typing.Generator[None, None, None]:
701 """
702 Catch low-level python exceptions, instead re-raising urllib3
703 variants, so that low-level exceptions are not leaked in the
704 high-level api.
706 On exit, release the connection back to the pool.
707 """
708 clean_exit = False
710 try:
711 try:
712 yield
714 except SocketTimeout as e:
715 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
716 # there is yet no clean way to get at it from this context.
717 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
719 except BaseSSLError as e:
720 # FIXME: Is there a better way to differentiate between SSLErrors?
721 if "read operation timed out" not in str(e):
722 # SSL errors related to framing/MAC get wrapped and reraised here
723 raise SSLError(e) from e
725 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
727 except (HTTPException, OSError) as e:
728 # This includes IncompleteRead.
729 raise ProtocolError(f"Connection broken: {e!r}", e) from e
731 # If no exception is thrown, we should avoid cleaning up
732 # unnecessarily.
733 clean_exit = True
734 finally:
735 # If we didn't terminate cleanly, we need to throw away our
736 # connection.
737 if not clean_exit:
738 # The response may not be closed but we're not going to use it
739 # anymore so close it now to ensure that the connection is
740 # released back to the pool.
741 if self._original_response:
742 self._original_response.close()
744 # Closing the response may not actually be sufficient to close
745 # everything, so if we have a hold of the connection close that
746 # too.
747 if self._connection:
748 self._connection.close()
750 # If we hold the original response but it's closed now, we should
751 # return the connection back to the pool.
752 if self._original_response and self._original_response.isclosed():
753 self.release_conn()
755 def _fp_read(self, amt: int | None = None) -> bytes:
756 """
757 Read a response with the thought that reading the number of bytes
758 larger than can fit in a 32-bit int at a time via SSL in some
759 known cases leads to an overflow error that has to be prevented
760 if `amt` or `self.length_remaining` indicate that a problem may
761 happen.
763 The known cases:
764 * 3.8 <= CPython < 3.9.7 because of a bug
765 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.
766 * urllib3 injected with pyOpenSSL-backed SSL-support.
767 * CPython < 3.10 only when `amt` does not fit 32-bit int.
768 """
769 assert self._fp
770 c_int_max = 2**31 - 1
771 if (
772 (amt and amt > c_int_max)
773 or (self.length_remaining and self.length_remaining > c_int_max)
774 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):
775 buffer = io.BytesIO()
776 # Besides `max_chunk_amt` being a maximum chunk size, it
777 # affects memory overhead of reading a response by this
778 # method in CPython.
779 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
780 # chunk size that does not lead to an overflow error, but
781 # 256 MiB is a compromise.
782 max_chunk_amt = 2**28
783 while amt is None or amt != 0:
784 if amt is not None:
785 chunk_amt = min(amt, max_chunk_amt)
786 amt -= chunk_amt
787 else:
788 chunk_amt = max_chunk_amt
789 data = self._fp.read(chunk_amt)
790 if not data:
791 break
792 buffer.write(data)
793 del data # to reduce peak memory usage by `max_chunk_amt`.
794 return buffer.getvalue()
795 else:
796 # StringIO doesn't like amt=None
797 return self._fp.read(amt) if amt is not None else self._fp.read()
799 def _raw_read(
800 self,
801 amt: int | None = None,
802 ) -> bytes:
803 """
804 Reads `amt` of bytes from the socket.
805 """
806 if self._fp is None:
807 return None # type: ignore[return-value]
809 fp_closed = getattr(self._fp, "closed", False)
811 with self._error_catcher():
812 data = self._fp_read(amt) if not fp_closed else b""
813 if amt is not None and amt != 0 and not data:
814 # Platform-specific: Buggy versions of Python.
815 # Close the connection when no data is returned
816 #
817 # This is redundant to what httplib/http.client _should_
818 # already do. However, versions of python released before
819 # December 15, 2012 (http://bugs.python.org/issue16298) do
820 # not properly close the connection in all cases. There is
821 # no harm in redundantly calling close.
822 self._fp.close()
823 if (
824 self.enforce_content_length
825 and self.length_remaining is not None
826 and self.length_remaining != 0
827 ):
828 # This is an edge case that httplib failed to cover due
829 # to concerns of backward compatibility. We're
830 # addressing it here to make sure IncompleteRead is
831 # raised during streaming, so all calls with incorrect
832 # Content-Length are caught.
833 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
835 if data:
836 self._fp_bytes_read += len(data)
837 if self.length_remaining is not None:
838 self.length_remaining -= len(data)
839 return data
841 def read(
842 self,
843 amt: int | None = None,
844 decode_content: bool | None = None,
845 cache_content: bool = False,
846 ) -> bytes:
847 """
848 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
849 parameters: ``decode_content`` and ``cache_content``.
851 :param amt:
852 How much of the content to read. If specified, caching is skipped
853 because it doesn't make sense to cache partial content as the full
854 response.
856 :param decode_content:
857 If True, will attempt to decode the body based on the
858 'content-encoding' header.
860 :param cache_content:
861 If True, will save the returned data such that the same result is
862 returned despite of the state of the underlying file object. This
863 is useful if you want the ``.data`` property to continue working
864 after having ``.read()`` the file object. (Overridden if ``amt`` is
865 set.)
866 """
867 self._init_decoder()
868 if decode_content is None:
869 decode_content = self.decode_content
871 if amt is not None:
872 cache_content = False
874 if len(self._decoded_buffer) >= amt:
875 return self._decoded_buffer.get(amt)
877 data = self._raw_read(amt)
879 flush_decoder = amt is None or (amt != 0 and not data)
881 if not data and len(self._decoded_buffer) == 0:
882 return data
884 if amt is None:
885 data = self._decode(data, decode_content, flush_decoder)
886 if cache_content:
887 self._body = data
888 else:
889 # do not waste memory on buffer when not decoding
890 if not decode_content:
891 if self._has_decoded_content:
892 raise RuntimeError(
893 "Calling read(decode_content=False) is not supported after "
894 "read(decode_content=True) was called."
895 )
896 return data
898 decoded_data = self._decode(data, decode_content, flush_decoder)
899 self._decoded_buffer.put(decoded_data)
901 while len(self._decoded_buffer) < amt and data:
902 # TODO make sure to initially read enough data to get past the headers
903 # For example, the GZ file header takes 10 bytes, we don't want to read
904 # it one byte at a time
905 data = self._raw_read(amt)
906 decoded_data = self._decode(data, decode_content, flush_decoder)
907 self._decoded_buffer.put(decoded_data)
908 data = self._decoded_buffer.get(amt)
910 return data
912 def stream(
913 self, amt: int | None = 2**16, decode_content: bool | None = None
914 ) -> typing.Generator[bytes, None, None]:
915 """
916 A generator wrapper for the read() method. A call will block until
917 ``amt`` bytes have been read from the connection or until the
918 connection is closed.
920 :param amt:
921 How much of the content to read. The generator will return up to
922 much data per iteration, but may return less. This is particularly
923 likely when using compressed data. However, the empty string will
924 never be returned.
926 :param decode_content:
927 If True, will attempt to decode the body based on the
928 'content-encoding' header.
929 """
930 if self.chunked and self.supports_chunked_reads():
931 yield from self.read_chunked(amt, decode_content=decode_content)
932 else:
933 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
934 data = self.read(amt=amt, decode_content=decode_content)
936 if data:
937 yield data
939 # Overrides from io.IOBase
940 def readable(self) -> bool:
941 return True
943 def close(self) -> None:
944 if not self.closed and self._fp:
945 self._fp.close()
947 if self._connection:
948 self._connection.close()
950 if not self.auto_close:
951 io.IOBase.close(self)
953 @property
954 def closed(self) -> bool:
955 if not self.auto_close:
956 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
957 elif self._fp is None:
958 return True
959 elif hasattr(self._fp, "isclosed"):
960 return self._fp.isclosed()
961 elif hasattr(self._fp, "closed"):
962 return self._fp.closed
963 else:
964 return True
966 def fileno(self) -> int:
967 if self._fp is None:
968 raise OSError("HTTPResponse has no file to get a fileno from")
969 elif hasattr(self._fp, "fileno"):
970 return self._fp.fileno()
971 else:
972 raise OSError(
973 "The file-like object this HTTPResponse is wrapped "
974 "around has no file descriptor"
975 )
977 def flush(self) -> None:
978 if (
979 self._fp is not None
980 and hasattr(self._fp, "flush")
981 and not getattr(self._fp, "closed", False)
982 ):
983 return self._fp.flush()
985 def supports_chunked_reads(self) -> bool:
986 """
987 Checks if the underlying file-like object looks like a
988 :class:`http.client.HTTPResponse` object. We do this by testing for
989 the fp attribute. If it is present we assume it returns raw chunks as
990 processed by read_chunked().
991 """
992 return hasattr(self._fp, "fp")
994 def _update_chunk_length(self) -> None:
995 # First, we'll figure out length of a chunk and then
996 # we'll try to read it from socket.
997 if self.chunk_left is not None:
998 return None
999 line = self._fp.fp.readline() # type: ignore[union-attr]
1000 line = line.split(b";", 1)[0]
1001 try:
1002 self.chunk_left = int(line, 16)
1003 except ValueError:
1004 # Invalid chunked protocol response, abort.
1005 self.close()
1006 raise InvalidChunkLength(self, line) from None
1008 def _handle_chunk(self, amt: int | None) -> bytes:
1009 returned_chunk = None
1010 if amt is None:
1011 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1012 returned_chunk = chunk
1013 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1014 self.chunk_left = None
1015 elif self.chunk_left is not None and amt < self.chunk_left:
1016 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1017 self.chunk_left = self.chunk_left - amt
1018 returned_chunk = value
1019 elif amt == self.chunk_left:
1020 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1021 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1022 self.chunk_left = None
1023 returned_chunk = value
1024 else: # amt > self.chunk_left
1025 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1026 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1027 self.chunk_left = None
1028 return returned_chunk # type: ignore[no-any-return]
1030 def read_chunked(
1031 self, amt: int | None = None, decode_content: bool | None = None
1032 ) -> typing.Generator[bytes, None, None]:
1033 """
1034 Similar to :meth:`HTTPResponse.read`, but with an additional
1035 parameter: ``decode_content``.
1037 :param amt:
1038 How much of the content to read. If specified, caching is skipped
1039 because it doesn't make sense to cache partial content as the full
1040 response.
1042 :param decode_content:
1043 If True, will attempt to decode the body based on the
1044 'content-encoding' header.
1045 """
1046 self._init_decoder()
1047 # FIXME: Rewrite this method and make it a class with a better structured logic.
1048 if not self.chunked:
1049 raise ResponseNotChunked(
1050 "Response is not chunked. "
1051 "Header 'transfer-encoding: chunked' is missing."
1052 )
1053 if not self.supports_chunked_reads():
1054 raise BodyNotHttplibCompatible(
1055 "Body should be http.client.HTTPResponse like. "
1056 "It should have have an fp attribute which returns raw chunks."
1057 )
1059 with self._error_catcher():
1060 # Don't bother reading the body of a HEAD request.
1061 if self._original_response and is_response_to_head(self._original_response):
1062 self._original_response.close()
1063 return None
1065 # If a response is already read and closed
1066 # then return immediately.
1067 if self._fp.fp is None: # type: ignore[union-attr]
1068 return None
1070 while True:
1071 self._update_chunk_length()
1072 if self.chunk_left == 0:
1073 break
1074 chunk = self._handle_chunk(amt)
1075 decoded = self._decode(
1076 chunk, decode_content=decode_content, flush_decoder=False
1077 )
1078 if decoded:
1079 yield decoded
1081 if decode_content:
1082 # On CPython and PyPy, we should never need to flush the
1083 # decoder. However, on Jython we *might* need to, so
1084 # lets defensively do it anyway.
1085 decoded = self._flush_decoder()
1086 if decoded: # Platform-specific: Jython.
1087 yield decoded
1089 # Chunk content ends with \r\n: discard it.
1090 while self._fp is not None:
1091 line = self._fp.fp.readline()
1092 if not line:
1093 # Some sites may not end with '\r\n'.
1094 break
1095 if line == b"\r\n":
1096 break
1098 # We read everything; close the "file".
1099 if self._original_response:
1100 self._original_response.close()
1102 @property
1103 def url(self) -> str | None:
1104 """
1105 Returns the URL that was the source of this response.
1106 If the request that generated this response redirected, this method
1107 will return the final redirect location.
1108 """
1109 return self._request_url
1111 @url.setter
1112 def url(self, url: str) -> None:
1113 self._request_url = url
1115 def __iter__(self) -> typing.Iterator[bytes]:
1116 buffer: list[bytes] = []
1117 for chunk in self.stream(decode_content=True):
1118 if b"\n" in chunk:
1119 chunks = chunk.split(b"\n")
1120 yield b"".join(buffer) + chunks[0] + b"\n"
1121 for x in chunks[1:-1]:
1122 yield x + b"\n"
1123 if chunks[-1]:
1124 buffer = [chunks[-1]]
1125 else:
1126 buffer = []
1127 else:
1128 buffer.append(chunk)
1129 if buffer:
1130 yield b"".join(buffer)