Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 23%
559 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:35 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:35 +0000
1from __future__ import annotations
3import collections
4import io
5import json as _json
6import logging
7import re
8import sys
9import typing
10import warnings
11import zlib
12from contextlib import contextmanager
13from http.client import HTTPMessage as _HttplibHTTPMessage
14from http.client import HTTPResponse as _HttplibHTTPResponse
15from socket import timeout as SocketTimeout
17try:
18 try:
19 import brotlicffi as brotli # type: ignore[import]
20 except ImportError:
21 import brotli # type: ignore[import]
22except ImportError:
23 brotli = None
25try:
26 import zstandard as zstd # type: ignore[import]
28 # The package 'zstandard' added the 'eof' property starting
29 # in v0.18.0 which we require to ensure a complete and
30 # valid zstd stream was fed into the ZstdDecoder.
31 # See: https://github.com/urllib3/urllib3/pull/2624
32 _zstd_version = _zstd_version = tuple(
33 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]
34 )
35 if _zstd_version < (0, 18): # Defensive:
36 zstd = None
38except (AttributeError, ImportError, ValueError): # Defensive:
39 zstd = None
41from . import util
42from ._base_connection import _TYPE_BODY
43from ._collections import HTTPHeaderDict
44from .connection import BaseSSLError, HTTPConnection, HTTPException
45from .exceptions import (
46 BodyNotHttplibCompatible,
47 DecodeError,
48 HTTPError,
49 IncompleteRead,
50 InvalidChunkLength,
51 InvalidHeader,
52 ProtocolError,
53 ReadTimeoutError,
54 ResponseNotChunked,
55 SSLError,
56)
57from .util.response import is_fp_closed, is_response_to_head
58from .util.retry import Retry
60if typing.TYPE_CHECKING:
61 from typing_extensions import Literal
63 from .connectionpool import HTTPConnectionPool
65log = logging.getLogger(__name__)
68class ContentDecoder:
69 def decompress(self, data: bytes) -> bytes:
70 raise NotImplementedError()
72 def flush(self) -> bytes:
73 raise NotImplementedError()
76class DeflateDecoder(ContentDecoder):
77 def __init__(self) -> None:
78 self._first_try = True
79 self._data = b""
80 self._obj = zlib.decompressobj()
82 def decompress(self, data: bytes) -> bytes:
83 if not data:
84 return data
86 if not self._first_try:
87 return self._obj.decompress(data)
89 self._data += data
90 try:
91 decompressed = self._obj.decompress(data)
92 if decompressed:
93 self._first_try = False
94 self._data = None # type: ignore[assignment]
95 return decompressed
96 except zlib.error:
97 self._first_try = False
98 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
99 try:
100 return self.decompress(self._data)
101 finally:
102 self._data = None # type: ignore[assignment]
104 def flush(self) -> bytes:
105 return self._obj.flush()
108class GzipDecoderState:
109 FIRST_MEMBER = 0
110 OTHER_MEMBERS = 1
111 SWALLOW_DATA = 2
114class GzipDecoder(ContentDecoder):
115 def __init__(self) -> None:
116 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
117 self._state = GzipDecoderState.FIRST_MEMBER
119 def decompress(self, data: bytes) -> bytes:
120 ret = bytearray()
121 if self._state == GzipDecoderState.SWALLOW_DATA or not data:
122 return bytes(ret)
123 while True:
124 try:
125 ret += self._obj.decompress(data)
126 except zlib.error:
127 previous_state = self._state
128 # Ignore data after the first error
129 self._state = GzipDecoderState.SWALLOW_DATA
130 if previous_state == GzipDecoderState.OTHER_MEMBERS:
131 # Allow trailing garbage acceptable in other gzip clients
132 return bytes(ret)
133 raise
134 data = self._obj.unused_data
135 if not data:
136 return bytes(ret)
137 self._state = GzipDecoderState.OTHER_MEMBERS
138 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
140 def flush(self) -> bytes:
141 return self._obj.flush()
144if brotli is not None:
146 class BrotliDecoder(ContentDecoder):
147 # Supports both 'brotlipy' and 'Brotli' packages
148 # since they share an import name. The top branches
149 # are for 'brotlipy' and bottom branches for 'Brotli'
150 def __init__(self) -> None:
151 self._obj = brotli.Decompressor()
152 if hasattr(self._obj, "decompress"):
153 setattr(self, "decompress", self._obj.decompress)
154 else:
155 setattr(self, "decompress", self._obj.process)
157 def flush(self) -> bytes:
158 if hasattr(self._obj, "flush"):
159 return self._obj.flush() # type: ignore[no-any-return]
160 return b""
163if zstd is not None:
165 class ZstdDecoder(ContentDecoder):
166 def __init__(self) -> None:
167 self._obj = zstd.ZstdDecompressor().decompressobj()
169 def decompress(self, data: bytes) -> bytes:
170 if not data:
171 return b""
172 return self._obj.decompress(data) # type: ignore[no-any-return]
174 def flush(self) -> bytes:
175 ret = self._obj.flush()
176 if not self._obj.eof:
177 raise DecodeError("Zstandard data is incomplete")
178 return ret # type: ignore[no-any-return]
181class MultiDecoder(ContentDecoder):
182 """
183 From RFC7231:
184 If one or more encodings have been applied to a representation, the
185 sender that applied the encodings MUST generate a Content-Encoding
186 header field that lists the content codings in the order in which
187 they were applied.
188 """
190 def __init__(self, modes: str) -> None:
191 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
193 def flush(self) -> bytes:
194 return self._decoders[0].flush()
196 def decompress(self, data: bytes) -> bytes:
197 for d in reversed(self._decoders):
198 data = d.decompress(data)
199 return data
202def _get_decoder(mode: str) -> ContentDecoder:
203 if "," in mode:
204 return MultiDecoder(mode)
206 if mode == "gzip":
207 return GzipDecoder()
209 if brotli is not None and mode == "br":
210 return BrotliDecoder()
212 if zstd is not None and mode == "zstd":
213 return ZstdDecoder()
215 return DeflateDecoder()
218class BytesQueueBuffer:
219 """Memory-efficient bytes buffer
221 To return decoded data in read() and still follow the BufferedIOBase API, we need a
222 buffer to always return the correct amount of bytes.
224 This buffer should be filled using calls to put()
226 Our maximum memory usage is determined by the sum of the size of:
228 * self.buffer, which contains the full data
229 * the largest chunk that we will copy in get()
231 The worst case scenario is a single chunk, in which case we'll make a full copy of
232 the data inside get().
233 """
235 def __init__(self) -> None:
236 self.buffer: typing.Deque[bytes] = collections.deque()
237 self._size: int = 0
239 def __len__(self) -> int:
240 return self._size
242 def put(self, data: bytes) -> None:
243 self.buffer.append(data)
244 self._size += len(data)
246 def get(self, n: int) -> bytes:
247 if n == 0:
248 return b""
249 elif not self.buffer:
250 raise RuntimeError("buffer is empty")
251 elif n < 0:
252 raise ValueError("n should be > 0")
254 fetched = 0
255 ret = io.BytesIO()
256 while fetched < n:
257 remaining = n - fetched
258 chunk = self.buffer.popleft()
259 chunk_length = len(chunk)
260 if remaining < chunk_length:
261 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
262 ret.write(left_chunk)
263 self.buffer.appendleft(right_chunk)
264 self._size -= remaining
265 break
266 else:
267 ret.write(chunk)
268 self._size -= chunk_length
269 fetched += chunk_length
271 if not self.buffer:
272 break
274 return ret.getvalue()
277class BaseHTTPResponse(io.IOBase):
278 CONTENT_DECODERS = ["gzip", "deflate"]
279 if brotli is not None:
280 CONTENT_DECODERS += ["br"]
281 if zstd is not None:
282 CONTENT_DECODERS += ["zstd"]
283 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
285 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
286 if brotli is not None:
287 DECODER_ERROR_CLASSES += (brotli.error,)
289 if zstd is not None:
290 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
292 def __init__(
293 self,
294 *,
295 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
296 status: int,
297 version: int,
298 reason: str | None,
299 decode_content: bool,
300 request_url: str | None,
301 retries: Retry | None = None,
302 ) -> None:
303 if isinstance(headers, HTTPHeaderDict):
304 self.headers = headers
305 else:
306 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
307 self.status = status
308 self.version = version
309 self.reason = reason
310 self.decode_content = decode_content
311 self._has_decoded_content = False
312 self._request_url: str | None = request_url
313 self.retries = retries
315 self.chunked = False
316 tr_enc = self.headers.get("transfer-encoding", "").lower()
317 # Don't incur the penalty of creating a list and then discarding it
318 encodings = (enc.strip() for enc in tr_enc.split(","))
319 if "chunked" in encodings:
320 self.chunked = True
322 self._decoder: ContentDecoder | None = None
324 def get_redirect_location(self) -> str | None | Literal[False]:
325 """
326 Should we redirect and where to?
328 :returns: Truthy redirect location string if we got a redirect status
329 code and valid location. ``None`` if redirect status and no
330 location. ``False`` if not a redirect status code.
331 """
332 if self.status in self.REDIRECT_STATUSES:
333 return self.headers.get("location")
334 return False
336 @property
337 def data(self) -> bytes:
338 raise NotImplementedError()
340 def json(self) -> typing.Any:
341 """
342 Parses the body of the HTTP response as JSON.
344 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to the decoder.
346 This method can raise either `UnicodeDecodeError` or `json.JSONDecodeError`.
348 Read more :ref:`here <json>`.
349 """
350 data = self.data.decode("utf-8")
351 return _json.loads(data)
353 @property
354 def url(self) -> str | None:
355 raise NotImplementedError()
357 @url.setter
358 def url(self, url: str | None) -> None:
359 raise NotImplementedError()
361 @property
362 def connection(self) -> HTTPConnection | None:
363 raise NotImplementedError()
365 @property
366 def retries(self) -> Retry | None:
367 return self._retries
369 @retries.setter
370 def retries(self, retries: Retry | None) -> None:
371 # Override the request_url if retries has a redirect location.
372 if retries is not None and retries.history:
373 self.url = retries.history[-1].redirect_location
374 self._retries = retries
376 def stream(
377 self, amt: int | None = 2**16, decode_content: bool | None = None
378 ) -> typing.Iterator[bytes]:
379 raise NotImplementedError()
381 def read(
382 self,
383 amt: int | None = None,
384 decode_content: bool | None = None,
385 cache_content: bool = False,
386 ) -> bytes:
387 raise NotImplementedError()
389 def read_chunked(
390 self,
391 amt: int | None = None,
392 decode_content: bool | None = None,
393 ) -> typing.Iterator[bytes]:
394 raise NotImplementedError()
396 def release_conn(self) -> None:
397 raise NotImplementedError()
399 def drain_conn(self) -> None:
400 raise NotImplementedError()
402 def close(self) -> None:
403 raise NotImplementedError()
405 def _init_decoder(self) -> None:
406 """
407 Set-up the _decoder attribute if necessary.
408 """
409 # Note: content-encoding value should be case-insensitive, per RFC 7230
410 # Section 3.2
411 content_encoding = self.headers.get("content-encoding", "").lower()
412 if self._decoder is None:
413 if content_encoding in self.CONTENT_DECODERS:
414 self._decoder = _get_decoder(content_encoding)
415 elif "," in content_encoding:
416 encodings = [
417 e.strip()
418 for e in content_encoding.split(",")
419 if e.strip() in self.CONTENT_DECODERS
420 ]
421 if encodings:
422 self._decoder = _get_decoder(content_encoding)
424 def _decode(
425 self, data: bytes, decode_content: bool | None, flush_decoder: bool
426 ) -> bytes:
427 """
428 Decode the data passed in and potentially flush the decoder.
429 """
430 if not decode_content:
431 if self._has_decoded_content:
432 raise RuntimeError(
433 "Calling read(decode_content=False) is not supported after "
434 "read(decode_content=True) was called."
435 )
436 return data
438 try:
439 if self._decoder:
440 data = self._decoder.decompress(data)
441 self._has_decoded_content = True
442 except self.DECODER_ERROR_CLASSES as e:
443 content_encoding = self.headers.get("content-encoding", "").lower()
444 raise DecodeError(
445 "Received response with content-encoding: %s, but "
446 "failed to decode it." % content_encoding,
447 e,
448 ) from e
449 if flush_decoder:
450 data += self._flush_decoder()
452 return data
454 def _flush_decoder(self) -> bytes:
455 """
456 Flushes the decoder. Should only be called if the decoder is actually
457 being used.
458 """
459 if self._decoder:
460 return self._decoder.decompress(b"") + self._decoder.flush()
461 return b""
463 # Compatibility methods for `io` module
464 def readinto(self, b: bytearray) -> int:
465 temp = self.read(len(b))
466 if len(temp) == 0:
467 return 0
468 else:
469 b[: len(temp)] = temp
470 return len(temp)
472 # Compatibility methods for http.client.HTTPResponse
473 def getheaders(self) -> HTTPHeaderDict:
474 warnings.warn(
475 "HTTPResponse.getheaders() is deprecated and will be removed "
476 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",
477 category=DeprecationWarning,
478 stacklevel=2,
479 )
480 return self.headers
482 def getheader(self, name: str, default: str | None = None) -> str | None:
483 warnings.warn(
484 "HTTPResponse.getheader() is deprecated and will be removed "
485 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",
486 category=DeprecationWarning,
487 stacklevel=2,
488 )
489 return self.headers.get(name, default)
491 # Compatibility method for http.cookiejar
492 def info(self) -> HTTPHeaderDict:
493 return self.headers
495 def geturl(self) -> str | None:
496 return self.url
499class HTTPResponse(BaseHTTPResponse):
500 """
501 HTTP Response container.
503 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
504 loaded and decoded on-demand when the ``data`` property is accessed. This
505 class is also compatible with the Python standard library's :mod:`io`
506 module, and can hence be treated as a readable object in the context of that
507 framework.
509 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
511 :param preload_content:
512 If True, the response's body will be preloaded during construction.
514 :param decode_content:
515 If True, will attempt to decode the body based on the
516 'content-encoding' header.
518 :param original_response:
519 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
520 object, it's convenient to include the original for debug purposes. It's
521 otherwise unused.
523 :param retries:
524 The retries contains the last :class:`~urllib3.util.retry.Retry` that
525 was used during the request.
527 :param enforce_content_length:
528 Enforce content length checking. Body returned by server must match
529 value of Content-Length header, if present. Otherwise, raise error.
530 """
532 def __init__(
533 self,
534 body: _TYPE_BODY = "",
535 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
536 status: int = 0,
537 version: int = 0,
538 reason: str | None = None,
539 preload_content: bool = True,
540 decode_content: bool = True,
541 original_response: _HttplibHTTPResponse | None = None,
542 pool: HTTPConnectionPool | None = None,
543 connection: HTTPConnection | None = None,
544 msg: _HttplibHTTPMessage | None = None,
545 retries: Retry | None = None,
546 enforce_content_length: bool = True,
547 request_method: str | None = None,
548 request_url: str | None = None,
549 auto_close: bool = True,
550 ) -> None:
551 super().__init__(
552 headers=headers,
553 status=status,
554 version=version,
555 reason=reason,
556 decode_content=decode_content,
557 request_url=request_url,
558 retries=retries,
559 )
561 self.enforce_content_length = enforce_content_length
562 self.auto_close = auto_close
564 self._body = None
565 self._fp: _HttplibHTTPResponse | None = None
566 self._original_response = original_response
567 self._fp_bytes_read = 0
568 self.msg = msg
570 if body and isinstance(body, (str, bytes)):
571 self._body = body
573 self._pool = pool
574 self._connection = connection
576 if hasattr(body, "read"):
577 self._fp = body # type: ignore[assignment]
579 # Are we using the chunked-style of transfer encoding?
580 self.chunk_left: int | None = None
582 # Determine length of response
583 self.length_remaining = self._init_length(request_method)
585 # Used to return the correct amount of bytes for partial read()s
586 self._decoded_buffer = BytesQueueBuffer()
588 # If requested, preload the body.
589 if preload_content and not self._body:
590 self._body = self.read(decode_content=decode_content)
592 def release_conn(self) -> None:
593 if not self._pool or not self._connection:
594 return None
596 self._pool._put_conn(self._connection)
597 self._connection = None
599 def drain_conn(self) -> None:
600 """
601 Read and discard any remaining HTTP response data in the response connection.
603 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
604 """
605 try:
606 self.read()
607 except (HTTPError, OSError, BaseSSLError, HTTPException):
608 pass
610 @property
611 def data(self) -> bytes:
612 # For backwards-compat with earlier urllib3 0.4 and earlier.
613 if self._body:
614 return self._body # type: ignore[return-value]
616 if self._fp:
617 return self.read(cache_content=True)
619 return None # type: ignore[return-value]
621 @property
622 def connection(self) -> HTTPConnection | None:
623 return self._connection
625 def isclosed(self) -> bool:
626 return is_fp_closed(self._fp)
628 def tell(self) -> int:
629 """
630 Obtain the number of bytes pulled over the wire so far. May differ from
631 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
632 if bytes are encoded on the wire (e.g, compressed).
633 """
634 return self._fp_bytes_read
636 def _init_length(self, request_method: str | None) -> int | None:
637 """
638 Set initial length value for Response content if available.
639 """
640 length: int | None
641 content_length: str | None = self.headers.get("content-length")
643 if content_length is not None:
644 if self.chunked:
645 # This Response will fail with an IncompleteRead if it can't be
646 # received as chunked. This method falls back to attempt reading
647 # the response before raising an exception.
648 log.warning(
649 "Received response with both Content-Length and "
650 "Transfer-Encoding set. This is expressly forbidden "
651 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
652 "attempting to process response as Transfer-Encoding: "
653 "chunked."
654 )
655 return None
657 try:
658 # RFC 7230 section 3.3.2 specifies multiple content lengths can
659 # be sent in a single Content-Length header
660 # (e.g. Content-Length: 42, 42). This line ensures the values
661 # are all valid ints and that as long as the `set` length is 1,
662 # all values are the same. Otherwise, the header is invalid.
663 lengths = {int(val) for val in content_length.split(",")}
664 if len(lengths) > 1:
665 raise InvalidHeader(
666 "Content-Length contained multiple "
667 "unmatching values (%s)" % content_length
668 )
669 length = lengths.pop()
670 except ValueError:
671 length = None
672 else:
673 if length < 0:
674 length = None
676 else: # if content_length is None
677 length = None
679 # Convert status to int for comparison
680 # In some cases, httplib returns a status of "_UNKNOWN"
681 try:
682 status = int(self.status)
683 except ValueError:
684 status = 0
686 # Check for responses that shouldn't include a body
687 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
688 length = 0
690 return length
692 @contextmanager
693 def _error_catcher(self) -> typing.Generator[None, None, None]:
694 """
695 Catch low-level python exceptions, instead re-raising urllib3
696 variants, so that low-level exceptions are not leaked in the
697 high-level api.
699 On exit, release the connection back to the pool.
700 """
701 clean_exit = False
703 try:
704 try:
705 yield
707 except SocketTimeout as e:
708 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
709 # there is yet no clean way to get at it from this context.
710 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
712 except BaseSSLError as e:
713 # FIXME: Is there a better way to differentiate between SSLErrors?
714 if "read operation timed out" not in str(e):
715 # SSL errors related to framing/MAC get wrapped and reraised here
716 raise SSLError(e) from e
718 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
720 except (HTTPException, OSError) as e:
721 # This includes IncompleteRead.
722 raise ProtocolError(f"Connection broken: {e!r}", e) from e
724 # If no exception is thrown, we should avoid cleaning up
725 # unnecessarily.
726 clean_exit = True
727 finally:
728 # If we didn't terminate cleanly, we need to throw away our
729 # connection.
730 if not clean_exit:
731 # The response may not be closed but we're not going to use it
732 # anymore so close it now to ensure that the connection is
733 # released back to the pool.
734 if self._original_response:
735 self._original_response.close()
737 # Closing the response may not actually be sufficient to close
738 # everything, so if we have a hold of the connection close that
739 # too.
740 if self._connection:
741 self._connection.close()
743 # If we hold the original response but it's closed now, we should
744 # return the connection back to the pool.
745 if self._original_response and self._original_response.isclosed():
746 self.release_conn()
748 def _fp_read(self, amt: int | None = None) -> bytes:
749 """
750 Read a response with the thought that reading the number of bytes
751 larger than can fit in a 32-bit int at a time via SSL in some
752 known cases leads to an overflow error that has to be prevented
753 if `amt` or `self.length_remaining` indicate that a problem may
754 happen.
756 The known cases:
757 * 3.8 <= CPython < 3.9.7 because of a bug
758 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.
759 * urllib3 injected with pyOpenSSL-backed SSL-support.
760 * CPython < 3.10 only when `amt` does not fit 32-bit int.
761 """
762 assert self._fp
763 c_int_max = 2**31 - 1
764 if (
765 (
766 (amt and amt > c_int_max)
767 or (self.length_remaining and self.length_remaining > c_int_max)
768 )
769 and not util.IS_SECURETRANSPORT
770 and (util.IS_PYOPENSSL or sys.version_info < (3, 10))
771 ):
772 buffer = io.BytesIO()
773 # Besides `max_chunk_amt` being a maximum chunk size, it
774 # affects memory overhead of reading a response by this
775 # method in CPython.
776 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
777 # chunk size that does not lead to an overflow error, but
778 # 256 MiB is a compromise.
779 max_chunk_amt = 2**28
780 while amt is None or amt != 0:
781 if amt is not None:
782 chunk_amt = min(amt, max_chunk_amt)
783 amt -= chunk_amt
784 else:
785 chunk_amt = max_chunk_amt
786 data = self._fp.read(chunk_amt)
787 if not data:
788 break
789 buffer.write(data)
790 del data # to reduce peak memory usage by `max_chunk_amt`.
791 return buffer.getvalue()
792 else:
793 # StringIO doesn't like amt=None
794 return self._fp.read(amt) if amt is not None else self._fp.read()
796 def _raw_read(
797 self,
798 amt: int | None = None,
799 ) -> bytes:
800 """
801 Reads `amt` of bytes from the socket.
802 """
803 if self._fp is None:
804 return None # type: ignore[return-value]
806 fp_closed = getattr(self._fp, "closed", False)
808 with self._error_catcher():
809 data = self._fp_read(amt) if not fp_closed else b""
810 if amt is not None and amt != 0 and not data:
811 # Platform-specific: Buggy versions of Python.
812 # Close the connection when no data is returned
813 #
814 # This is redundant to what httplib/http.client _should_
815 # already do. However, versions of python released before
816 # December 15, 2012 (http://bugs.python.org/issue16298) do
817 # not properly close the connection in all cases. There is
818 # no harm in redundantly calling close.
819 self._fp.close()
820 if (
821 self.enforce_content_length
822 and self.length_remaining is not None
823 and self.length_remaining != 0
824 ):
825 # This is an edge case that httplib failed to cover due
826 # to concerns of backward compatibility. We're
827 # addressing it here to make sure IncompleteRead is
828 # raised during streaming, so all calls with incorrect
829 # Content-Length are caught.
830 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
832 if data:
833 self._fp_bytes_read += len(data)
834 if self.length_remaining is not None:
835 self.length_remaining -= len(data)
836 return data
838 def read(
839 self,
840 amt: int | None = None,
841 decode_content: bool | None = None,
842 cache_content: bool = False,
843 ) -> bytes:
844 """
845 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
846 parameters: ``decode_content`` and ``cache_content``.
848 :param amt:
849 How much of the content to read. If specified, caching is skipped
850 because it doesn't make sense to cache partial content as the full
851 response.
853 :param decode_content:
854 If True, will attempt to decode the body based on the
855 'content-encoding' header.
857 :param cache_content:
858 If True, will save the returned data such that the same result is
859 returned despite of the state of the underlying file object. This
860 is useful if you want the ``.data`` property to continue working
861 after having ``.read()`` the file object. (Overridden if ``amt`` is
862 set.)
863 """
864 self._init_decoder()
865 if decode_content is None:
866 decode_content = self.decode_content
868 if amt is not None:
869 cache_content = False
871 if len(self._decoded_buffer) >= amt:
872 return self._decoded_buffer.get(amt)
874 data = self._raw_read(amt)
876 flush_decoder = False
877 if amt is None:
878 flush_decoder = True
879 elif amt != 0 and not data:
880 flush_decoder = True
882 if not data and len(self._decoded_buffer) == 0:
883 return data
885 if amt is None:
886 data = self._decode(data, decode_content, flush_decoder)
887 if cache_content:
888 self._body = data
889 else:
890 # do not waste memory on buffer when not decoding
891 if not decode_content:
892 if self._has_decoded_content:
893 raise RuntimeError(
894 "Calling read(decode_content=False) is not supported after "
895 "read(decode_content=True) was called."
896 )
897 return data
899 decoded_data = self._decode(data, decode_content, flush_decoder)
900 self._decoded_buffer.put(decoded_data)
902 while len(self._decoded_buffer) < amt and data:
903 # TODO make sure to initially read enough data to get past the headers
904 # For example, the GZ file header takes 10 bytes, we don't want to read
905 # it one byte at a time
906 data = self._raw_read(amt)
907 decoded_data = self._decode(data, decode_content, flush_decoder)
908 self._decoded_buffer.put(decoded_data)
909 data = self._decoded_buffer.get(amt)
911 return data
913 def stream(
914 self, amt: int | None = 2**16, decode_content: bool | None = None
915 ) -> typing.Generator[bytes, None, None]:
916 """
917 A generator wrapper for the read() method. A call will block until
918 ``amt`` bytes have been read from the connection or until the
919 connection is closed.
921 :param amt:
922 How much of the content to read. The generator will return up to
923 much data per iteration, but may return less. This is particularly
924 likely when using compressed data. However, the empty string will
925 never be returned.
927 :param decode_content:
928 If True, will attempt to decode the body based on the
929 'content-encoding' header.
930 """
931 if self.chunked and self.supports_chunked_reads():
932 yield from self.read_chunked(amt, decode_content=decode_content)
933 else:
934 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
935 data = self.read(amt=amt, decode_content=decode_content)
937 if data:
938 yield data
940 # Overrides from io.IOBase
941 def readable(self) -> bool:
942 return True
944 def close(self) -> None:
945 if not self.closed and self._fp:
946 self._fp.close()
948 if self._connection:
949 self._connection.close()
951 if not self.auto_close:
952 io.IOBase.close(self)
954 @property
955 def closed(self) -> bool:
956 if not self.auto_close:
957 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
958 elif self._fp is None:
959 return True
960 elif hasattr(self._fp, "isclosed"):
961 return self._fp.isclosed()
962 elif hasattr(self._fp, "closed"):
963 return self._fp.closed
964 else:
965 return True
967 def fileno(self) -> int:
968 if self._fp is None:
969 raise OSError("HTTPResponse has no file to get a fileno from")
970 elif hasattr(self._fp, "fileno"):
971 return self._fp.fileno()
972 else:
973 raise OSError(
974 "The file-like object this HTTPResponse is wrapped "
975 "around has no file descriptor"
976 )
978 def flush(self) -> None:
979 if (
980 self._fp is not None
981 and hasattr(self._fp, "flush")
982 and not getattr(self._fp, "closed", False)
983 ):
984 return self._fp.flush()
986 def supports_chunked_reads(self) -> bool:
987 """
988 Checks if the underlying file-like object looks like a
989 :class:`http.client.HTTPResponse` object. We do this by testing for
990 the fp attribute. If it is present we assume it returns raw chunks as
991 processed by read_chunked().
992 """
993 return hasattr(self._fp, "fp")
995 def _update_chunk_length(self) -> None:
996 # First, we'll figure out length of a chunk and then
997 # we'll try to read it from socket.
998 if self.chunk_left is not None:
999 return None
1000 line = self._fp.fp.readline() # type: ignore[union-attr]
1001 line = line.split(b";", 1)[0]
1002 try:
1003 self.chunk_left = int(line, 16)
1004 except ValueError:
1005 # Invalid chunked protocol response, abort.
1006 self.close()
1007 raise InvalidChunkLength(self, line) from None
1009 def _handle_chunk(self, amt: int | None) -> bytes:
1010 returned_chunk = None
1011 if amt is None:
1012 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1013 returned_chunk = chunk
1014 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1015 self.chunk_left = None
1016 elif self.chunk_left is not None and amt < self.chunk_left:
1017 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1018 self.chunk_left = self.chunk_left - amt
1019 returned_chunk = value
1020 elif amt == self.chunk_left:
1021 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1022 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1023 self.chunk_left = None
1024 returned_chunk = value
1025 else: # amt > self.chunk_left
1026 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1027 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1028 self.chunk_left = None
1029 return returned_chunk # type: ignore[no-any-return]
1031 def read_chunked(
1032 self, amt: int | None = None, decode_content: bool | None = None
1033 ) -> typing.Generator[bytes, None, None]:
1034 """
1035 Similar to :meth:`HTTPResponse.read`, but with an additional
1036 parameter: ``decode_content``.
1038 :param amt:
1039 How much of the content to read. If specified, caching is skipped
1040 because it doesn't make sense to cache partial content as the full
1041 response.
1043 :param decode_content:
1044 If True, will attempt to decode the body based on the
1045 'content-encoding' header.
1046 """
1047 self._init_decoder()
1048 # FIXME: Rewrite this method and make it a class with a better structured logic.
1049 if not self.chunked:
1050 raise ResponseNotChunked(
1051 "Response is not chunked. "
1052 "Header 'transfer-encoding: chunked' is missing."
1053 )
1054 if not self.supports_chunked_reads():
1055 raise BodyNotHttplibCompatible(
1056 "Body should be http.client.HTTPResponse like. "
1057 "It should have have an fp attribute which returns raw chunks."
1058 )
1060 with self._error_catcher():
1061 # Don't bother reading the body of a HEAD request.
1062 if self._original_response and is_response_to_head(self._original_response):
1063 self._original_response.close()
1064 return None
1066 # If a response is already read and closed
1067 # then return immediately.
1068 if self._fp.fp is None: # type: ignore[union-attr]
1069 return None
1071 while True:
1072 self._update_chunk_length()
1073 if self.chunk_left == 0:
1074 break
1075 chunk = self._handle_chunk(amt)
1076 decoded = self._decode(
1077 chunk, decode_content=decode_content, flush_decoder=False
1078 )
1079 if decoded:
1080 yield decoded
1082 if decode_content:
1083 # On CPython and PyPy, we should never need to flush the
1084 # decoder. However, on Jython we *might* need to, so
1085 # lets defensively do it anyway.
1086 decoded = self._flush_decoder()
1087 if decoded: # Platform-specific: Jython.
1088 yield decoded
1090 # Chunk content ends with \r\n: discard it.
1091 while self._fp is not None:
1092 line = self._fp.fp.readline()
1093 if not line:
1094 # Some sites may not end with '\r\n'.
1095 break
1096 if line == b"\r\n":
1097 break
1099 # We read everything; close the "file".
1100 if self._original_response:
1101 self._original_response.close()
1103 @property
1104 def url(self) -> str | None:
1105 """
1106 Returns the URL that was the source of this response.
1107 If the request that generated this response redirected, this method
1108 will return the final redirect location.
1109 """
1110 return self._request_url
1112 @url.setter
1113 def url(self, url: str) -> None:
1114 self._request_url = url
1116 def __iter__(self) -> typing.Iterator[bytes]:
1117 buffer: list[bytes] = []
1118 for chunk in self.stream(decode_content=True):
1119 if b"\n" in chunk:
1120 chunks = chunk.split(b"\n")
1121 yield b"".join(buffer) + chunks[0] + b"\n"
1122 for x in chunks[1:-1]:
1123 yield x + b"\n"
1124 if chunks[-1]:
1125 buffer = [chunks[-1]]
1126 else:
1127 buffer = []
1128 else:
1129 buffer.append(chunk)
1130 if buffer:
1131 yield b"".join(buffer)