Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/urllib3/response.py: 23%
557 statements
« prev ^ index » next coverage.py v7.2.0, created at 2023-02-23 06:30 +0000
« prev ^ index » next coverage.py v7.2.0, created at 2023-02-23 06:30 +0000
1from __future__ import annotations
3import collections
4import io
5import json as _json
6import logging
7import re
8import sys
9import typing
10import warnings
11import zlib
12from contextlib import contextmanager
13from http.client import HTTPMessage as _HttplibHTTPMessage
14from http.client import HTTPResponse as _HttplibHTTPResponse
15from socket import timeout as SocketTimeout
17try:
18 try:
19 import brotlicffi as brotli # type: ignore[import]
20 except ImportError:
21 import brotli # type: ignore[import]
22except ImportError:
23 brotli = None
25try:
26 import zstandard as zstd # type: ignore[import]
28 # The package 'zstandard' added the 'eof' property starting
29 # in v0.18.0 which we require to ensure a complete and
30 # valid zstd stream was fed into the ZstdDecoder.
31 # See: https://github.com/urllib3/urllib3/pull/2624
32 _zstd_version = _zstd_version = tuple(
33 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]
34 )
35 if _zstd_version < (0, 18): # Defensive:
36 zstd = None
38except (AttributeError, ImportError, ValueError): # Defensive:
39 zstd = None
41from . import util
42from ._base_connection import _TYPE_BODY
43from ._collections import HTTPHeaderDict
44from .connection import BaseSSLError, HTTPConnection, HTTPException
45from .exceptions import (
46 BodyNotHttplibCompatible,
47 DecodeError,
48 HTTPError,
49 IncompleteRead,
50 InvalidChunkLength,
51 InvalidHeader,
52 ProtocolError,
53 ReadTimeoutError,
54 ResponseNotChunked,
55 SSLError,
56)
57from .util.response import is_fp_closed, is_response_to_head
58from .util.retry import Retry
60if typing.TYPE_CHECKING:
61 from typing_extensions import Literal
63 from .connectionpool import HTTPConnectionPool
65log = logging.getLogger(__name__)
68class ContentDecoder:
69 def decompress(self, data: bytes) -> bytes:
70 raise NotImplementedError()
72 def flush(self) -> bytes:
73 raise NotImplementedError()
76class DeflateDecoder(ContentDecoder):
77 def __init__(self) -> None:
78 self._first_try = True
79 self._data = b""
80 self._obj = zlib.decompressobj()
82 def decompress(self, data: bytes) -> bytes:
83 if not data:
84 return data
86 if not self._first_try:
87 return self._obj.decompress(data)
89 self._data += data
90 try:
91 decompressed = self._obj.decompress(data)
92 if decompressed:
93 self._first_try = False
94 self._data = None # type: ignore[assignment]
95 return decompressed
96 except zlib.error:
97 self._first_try = False
98 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
99 try:
100 return self.decompress(self._data)
101 finally:
102 self._data = None # type: ignore[assignment]
104 def flush(self) -> bytes:
105 return self._obj.flush()
108class GzipDecoderState:
109 FIRST_MEMBER = 0
110 OTHER_MEMBERS = 1
111 SWALLOW_DATA = 2
114class GzipDecoder(ContentDecoder):
115 def __init__(self) -> None:
116 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
117 self._state = GzipDecoderState.FIRST_MEMBER
119 def decompress(self, data: bytes) -> bytes:
120 ret = bytearray()
121 if self._state == GzipDecoderState.SWALLOW_DATA or not data:
122 return bytes(ret)
123 while True:
124 try:
125 ret += self._obj.decompress(data)
126 except zlib.error:
127 previous_state = self._state
128 # Ignore data after the first error
129 self._state = GzipDecoderState.SWALLOW_DATA
130 if previous_state == GzipDecoderState.OTHER_MEMBERS:
131 # Allow trailing garbage acceptable in other gzip clients
132 return bytes(ret)
133 raise
134 data = self._obj.unused_data
135 if not data:
136 return bytes(ret)
137 self._state = GzipDecoderState.OTHER_MEMBERS
138 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
140 def flush(self) -> bytes:
141 return self._obj.flush()
144if brotli is not None:
146 class BrotliDecoder(ContentDecoder):
147 # Supports both 'brotlipy' and 'Brotli' packages
148 # since they share an import name. The top branches
149 # are for 'brotlipy' and bottom branches for 'Brotli'
150 def __init__(self) -> None:
151 self._obj = brotli.Decompressor()
152 if hasattr(self._obj, "decompress"):
153 setattr(self, "decompress", self._obj.decompress)
154 else:
155 setattr(self, "decompress", self._obj.process)
157 def flush(self) -> bytes:
158 if hasattr(self._obj, "flush"):
159 return self._obj.flush() # type: ignore[no-any-return]
160 return b""
163if zstd is not None:
165 class ZstdDecoder(ContentDecoder):
166 def __init__(self) -> None:
167 self._obj = zstd.ZstdDecompressor().decompressobj()
169 def decompress(self, data: bytes) -> bytes:
170 if not data:
171 return b""
172 return self._obj.decompress(data) # type: ignore[no-any-return]
174 def flush(self) -> bytes:
175 ret = self._obj.flush()
176 if not self._obj.eof:
177 raise DecodeError("Zstandard data is incomplete")
178 return ret # type: ignore[no-any-return]
181class MultiDecoder(ContentDecoder):
182 """
183 From RFC7231:
184 If one or more encodings have been applied to a representation, the
185 sender that applied the encodings MUST generate a Content-Encoding
186 header field that lists the content codings in the order in which
187 they were applied.
188 """
190 def __init__(self, modes: str) -> None:
191 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
193 def flush(self) -> bytes:
194 return self._decoders[0].flush()
196 def decompress(self, data: bytes) -> bytes:
197 for d in reversed(self._decoders):
198 data = d.decompress(data)
199 return data
202def _get_decoder(mode: str) -> ContentDecoder:
203 if "," in mode:
204 return MultiDecoder(mode)
206 if mode == "gzip":
207 return GzipDecoder()
209 if brotli is not None and mode == "br":
210 return BrotliDecoder()
212 if zstd is not None and mode == "zstd":
213 return ZstdDecoder()
215 return DeflateDecoder()
218class BytesQueueBuffer:
219 """Memory-efficient bytes buffer
221 To return decoded data in read() and still follow the BufferedIOBase API, we need a
222 buffer to always return the correct amount of bytes.
224 This buffer should be filled using calls to put()
226 Our maximum memory usage is determined by the sum of the size of:
228 * self.buffer, which contains the full data
229 * the largest chunk that we will copy in get()
231 The worst case scenario is a single chunk, in which case we'll make a full copy of
232 the data inside get().
233 """
235 def __init__(self) -> None:
236 self.buffer: typing.Deque[bytes] = collections.deque()
237 self._size: int = 0
239 def __len__(self) -> int:
240 return self._size
242 def put(self, data: bytes) -> None:
243 self.buffer.append(data)
244 self._size += len(data)
246 def get(self, n: int) -> bytes:
247 if not self.buffer:
248 raise RuntimeError("buffer is empty")
249 elif n < 0:
250 raise ValueError("n should be > 0")
252 fetched = 0
253 ret = io.BytesIO()
254 while fetched < n:
255 remaining = n - fetched
256 chunk = self.buffer.popleft()
257 chunk_length = len(chunk)
258 if remaining < chunk_length:
259 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
260 ret.write(left_chunk)
261 self.buffer.appendleft(right_chunk)
262 self._size -= remaining
263 break
264 else:
265 ret.write(chunk)
266 self._size -= chunk_length
267 fetched += chunk_length
269 if not self.buffer:
270 break
272 return ret.getvalue()
275class BaseHTTPResponse(io.IOBase):
276 CONTENT_DECODERS = ["gzip", "deflate"]
277 if brotli is not None:
278 CONTENT_DECODERS += ["br"]
279 if zstd is not None:
280 CONTENT_DECODERS += ["zstd"]
281 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
283 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
284 if brotli is not None:
285 DECODER_ERROR_CLASSES += (brotli.error,)
287 if zstd is not None:
288 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
290 def __init__(
291 self,
292 *,
293 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
294 status: int,
295 version: int,
296 reason: str | None,
297 decode_content: bool,
298 request_url: str | None,
299 retries: Retry | None = None,
300 ) -> None:
301 if isinstance(headers, HTTPHeaderDict):
302 self.headers = headers
303 else:
304 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
305 self.status = status
306 self.version = version
307 self.reason = reason
308 self.decode_content = decode_content
309 self._has_decoded_content = False
310 self._request_url: str | None = request_url
311 self.retries = retries
313 self.chunked = False
314 tr_enc = self.headers.get("transfer-encoding", "").lower()
315 # Don't incur the penalty of creating a list and then discarding it
316 encodings = (enc.strip() for enc in tr_enc.split(","))
317 if "chunked" in encodings:
318 self.chunked = True
320 self._decoder: ContentDecoder | None = None
322 def get_redirect_location(self) -> str | None | Literal[False]:
323 """
324 Should we redirect and where to?
326 :returns: Truthy redirect location string if we got a redirect status
327 code and valid location. ``None`` if redirect status and no
328 location. ``False`` if not a redirect status code.
329 """
330 if self.status in self.REDIRECT_STATUSES:
331 return self.headers.get("location")
332 return False
334 @property
335 def data(self) -> bytes:
336 raise NotImplementedError()
338 def json(self) -> typing.Any:
339 """
340 Parses the body of the HTTP response as JSON.
342 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to the decoder.
344 This method can raise either `UnicodeDecodeError` or `json.JSONDecodeError`.
346 Read more :ref:`here <json>`.
347 """
348 data = self.data.decode("utf-8")
349 return _json.loads(data)
351 @property
352 def url(self) -> str | None:
353 raise NotImplementedError()
355 @url.setter
356 def url(self, url: str | None) -> None:
357 raise NotImplementedError()
359 @property
360 def connection(self) -> HTTPConnection | None:
361 raise NotImplementedError()
363 @property
364 def retries(self) -> Retry | None:
365 return self._retries
367 @retries.setter
368 def retries(self, retries: Retry | None) -> None:
369 # Override the request_url if retries has a redirect location.
370 if retries is not None and retries.history:
371 self.url = retries.history[-1].redirect_location
372 self._retries = retries
374 def stream(
375 self, amt: int | None = 2**16, decode_content: bool | None = None
376 ) -> typing.Iterator[bytes]:
377 raise NotImplementedError()
379 def read(
380 self,
381 amt: int | None = None,
382 decode_content: bool | None = None,
383 cache_content: bool = False,
384 ) -> bytes:
385 raise NotImplementedError()
387 def read_chunked(
388 self,
389 amt: int | None = None,
390 decode_content: bool | None = None,
391 ) -> typing.Iterator[bytes]:
392 raise NotImplementedError()
394 def release_conn(self) -> None:
395 raise NotImplementedError()
397 def drain_conn(self) -> None:
398 raise NotImplementedError()
400 def close(self) -> None:
401 raise NotImplementedError()
403 def _init_decoder(self) -> None:
404 """
405 Set-up the _decoder attribute if necessary.
406 """
407 # Note: content-encoding value should be case-insensitive, per RFC 7230
408 # Section 3.2
409 content_encoding = self.headers.get("content-encoding", "").lower()
410 if self._decoder is None:
411 if content_encoding in self.CONTENT_DECODERS:
412 self._decoder = _get_decoder(content_encoding)
413 elif "," in content_encoding:
414 encodings = [
415 e.strip()
416 for e in content_encoding.split(",")
417 if e.strip() in self.CONTENT_DECODERS
418 ]
419 if encodings:
420 self._decoder = _get_decoder(content_encoding)
422 def _decode(
423 self, data: bytes, decode_content: bool | None, flush_decoder: bool
424 ) -> bytes:
425 """
426 Decode the data passed in and potentially flush the decoder.
427 """
428 if not decode_content:
429 if self._has_decoded_content:
430 raise RuntimeError(
431 "Calling read(decode_content=False) is not supported after "
432 "read(decode_content=True) was called."
433 )
434 return data
436 try:
437 if self._decoder:
438 data = self._decoder.decompress(data)
439 self._has_decoded_content = True
440 except self.DECODER_ERROR_CLASSES as e:
441 content_encoding = self.headers.get("content-encoding", "").lower()
442 raise DecodeError(
443 "Received response with content-encoding: %s, but "
444 "failed to decode it." % content_encoding,
445 e,
446 ) from e
447 if flush_decoder:
448 data += self._flush_decoder()
450 return data
452 def _flush_decoder(self) -> bytes:
453 """
454 Flushes the decoder. Should only be called if the decoder is actually
455 being used.
456 """
457 if self._decoder:
458 return self._decoder.decompress(b"") + self._decoder.flush()
459 return b""
461 # Compatibility methods for `io` module
462 def readinto(self, b: bytearray) -> int:
463 temp = self.read(len(b))
464 if len(temp) == 0:
465 return 0
466 else:
467 b[: len(temp)] = temp
468 return len(temp)
470 # Compatibility methods for http.client.HTTPResponse
471 def getheaders(self) -> HTTPHeaderDict:
472 warnings.warn(
473 "HTTPResponse.getheaders() is deprecated and will be removed "
474 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",
475 category=DeprecationWarning,
476 stacklevel=2,
477 )
478 return self.headers
480 def getheader(self, name: str, default: str | None = None) -> str | None:
481 warnings.warn(
482 "HTTPResponse.getheader() is deprecated and will be removed "
483 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",
484 category=DeprecationWarning,
485 stacklevel=2,
486 )
487 return self.headers.get(name, default)
489 # Compatibility method for http.cookiejar
490 def info(self) -> HTTPHeaderDict:
491 return self.headers
493 def geturl(self) -> str | None:
494 return self.url
497class HTTPResponse(BaseHTTPResponse):
498 """
499 HTTP Response container.
501 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
502 loaded and decoded on-demand when the ``data`` property is accessed. This
503 class is also compatible with the Python standard library's :mod:`io`
504 module, and can hence be treated as a readable object in the context of that
505 framework.
507 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
509 :param preload_content:
510 If True, the response's body will be preloaded during construction.
512 :param decode_content:
513 If True, will attempt to decode the body based on the
514 'content-encoding' header.
516 :param original_response:
517 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
518 object, it's convenient to include the original for debug purposes. It's
519 otherwise unused.
521 :param retries:
522 The retries contains the last :class:`~urllib3.util.retry.Retry` that
523 was used during the request.
525 :param enforce_content_length:
526 Enforce content length checking. Body returned by server must match
527 value of Content-Length header, if present. Otherwise, raise error.
528 """
530 def __init__(
531 self,
532 body: _TYPE_BODY = "",
533 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
534 status: int = 0,
535 version: int = 0,
536 reason: str | None = None,
537 preload_content: bool = True,
538 decode_content: bool = True,
539 original_response: _HttplibHTTPResponse | None = None,
540 pool: HTTPConnectionPool | None = None,
541 connection: HTTPConnection | None = None,
542 msg: _HttplibHTTPMessage | None = None,
543 retries: Retry | None = None,
544 enforce_content_length: bool = True,
545 request_method: str | None = None,
546 request_url: str | None = None,
547 auto_close: bool = True,
548 ) -> None:
549 super().__init__(
550 headers=headers,
551 status=status,
552 version=version,
553 reason=reason,
554 decode_content=decode_content,
555 request_url=request_url,
556 retries=retries,
557 )
559 self.enforce_content_length = enforce_content_length
560 self.auto_close = auto_close
562 self._body = None
563 self._fp: _HttplibHTTPResponse | None = None
564 self._original_response = original_response
565 self._fp_bytes_read = 0
566 self.msg = msg
568 if body and isinstance(body, (str, bytes)):
569 self._body = body
571 self._pool = pool
572 self._connection = connection
574 if hasattr(body, "read"):
575 self._fp = body # type: ignore[assignment]
577 # Are we using the chunked-style of transfer encoding?
578 self.chunk_left: int | None = None
580 # Determine length of response
581 self.length_remaining = self._init_length(request_method)
583 # Used to return the correct amount of bytes for partial read()s
584 self._decoded_buffer = BytesQueueBuffer()
586 # If requested, preload the body.
587 if preload_content and not self._body:
588 self._body = self.read(decode_content=decode_content)
590 def release_conn(self) -> None:
591 if not self._pool or not self._connection:
592 return None
594 self._pool._put_conn(self._connection)
595 self._connection = None
597 def drain_conn(self) -> None:
598 """
599 Read and discard any remaining HTTP response data in the response connection.
601 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
602 """
603 try:
604 self.read()
605 except (HTTPError, OSError, BaseSSLError, HTTPException):
606 pass
608 @property
609 def data(self) -> bytes:
610 # For backwards-compat with earlier urllib3 0.4 and earlier.
611 if self._body:
612 return self._body # type: ignore[return-value]
614 if self._fp:
615 return self.read(cache_content=True)
617 return None # type: ignore[return-value]
619 @property
620 def connection(self) -> HTTPConnection | None:
621 return self._connection
623 def isclosed(self) -> bool:
624 return is_fp_closed(self._fp)
626 def tell(self) -> int:
627 """
628 Obtain the number of bytes pulled over the wire so far. May differ from
629 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
630 if bytes are encoded on the wire (e.g, compressed).
631 """
632 return self._fp_bytes_read
634 def _init_length(self, request_method: str | None) -> int | None:
635 """
636 Set initial length value for Response content if available.
637 """
638 length: int | None
639 content_length: str | None = self.headers.get("content-length")
641 if content_length is not None:
642 if self.chunked:
643 # This Response will fail with an IncompleteRead if it can't be
644 # received as chunked. This method falls back to attempt reading
645 # the response before raising an exception.
646 log.warning(
647 "Received response with both Content-Length and "
648 "Transfer-Encoding set. This is expressly forbidden "
649 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
650 "attempting to process response as Transfer-Encoding: "
651 "chunked."
652 )
653 return None
655 try:
656 # RFC 7230 section 3.3.2 specifies multiple content lengths can
657 # be sent in a single Content-Length header
658 # (e.g. Content-Length: 42, 42). This line ensures the values
659 # are all valid ints and that as long as the `set` length is 1,
660 # all values are the same. Otherwise, the header is invalid.
661 lengths = {int(val) for val in content_length.split(",")}
662 if len(lengths) > 1:
663 raise InvalidHeader(
664 "Content-Length contained multiple "
665 "unmatching values (%s)" % content_length
666 )
667 length = lengths.pop()
668 except ValueError:
669 length = None
670 else:
671 if length < 0:
672 length = None
674 else: # if content_length is None
675 length = None
677 # Convert status to int for comparison
678 # In some cases, httplib returns a status of "_UNKNOWN"
679 try:
680 status = int(self.status)
681 except ValueError:
682 status = 0
684 # Check for responses that shouldn't include a body
685 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
686 length = 0
688 return length
690 @contextmanager
691 def _error_catcher(self) -> typing.Generator[None, None, None]:
692 """
693 Catch low-level python exceptions, instead re-raising urllib3
694 variants, so that low-level exceptions are not leaked in the
695 high-level api.
697 On exit, release the connection back to the pool.
698 """
699 clean_exit = False
701 try:
702 try:
703 yield
705 except SocketTimeout as e:
706 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
707 # there is yet no clean way to get at it from this context.
708 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
710 except BaseSSLError as e:
711 # FIXME: Is there a better way to differentiate between SSLErrors?
712 if "read operation timed out" not in str(e):
713 # SSL errors related to framing/MAC get wrapped and reraised here
714 raise SSLError(e) from e
716 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
718 except (HTTPException, OSError) as e:
719 # This includes IncompleteRead.
720 raise ProtocolError(f"Connection broken: {e!r}", e) from e
722 # If no exception is thrown, we should avoid cleaning up
723 # unnecessarily.
724 clean_exit = True
725 finally:
726 # If we didn't terminate cleanly, we need to throw away our
727 # connection.
728 if not clean_exit:
729 # The response may not be closed but we're not going to use it
730 # anymore so close it now to ensure that the connection is
731 # released back to the pool.
732 if self._original_response:
733 self._original_response.close()
735 # Closing the response may not actually be sufficient to close
736 # everything, so if we have a hold of the connection close that
737 # too.
738 if self._connection:
739 self._connection.close()
741 # If we hold the original response but it's closed now, we should
742 # return the connection back to the pool.
743 if self._original_response and self._original_response.isclosed():
744 self.release_conn()
746 def _fp_read(self, amt: int | None = None) -> bytes:
747 """
748 Read a response with the thought that reading the number of bytes
749 larger than can fit in a 32-bit int at a time via SSL in some
750 known cases leads to an overflow error that has to be prevented
751 if `amt` or `self.length_remaining` indicate that a problem may
752 happen.
754 The known cases:
755 * 3.8 <= CPython < 3.9.7 because of a bug
756 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.
757 * urllib3 injected with pyOpenSSL-backed SSL-support.
758 * CPython < 3.10 only when `amt` does not fit 32-bit int.
759 """
760 assert self._fp
761 c_int_max = 2**31 - 1
762 if (
763 (
764 (amt and amt > c_int_max)
765 or (self.length_remaining and self.length_remaining > c_int_max)
766 )
767 and not util.IS_SECURETRANSPORT
768 and (util.IS_PYOPENSSL or sys.version_info < (3, 10))
769 ):
770 buffer = io.BytesIO()
771 # Besides `max_chunk_amt` being a maximum chunk size, it
772 # affects memory overhead of reading a response by this
773 # method in CPython.
774 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
775 # chunk size that does not lead to an overflow error, but
776 # 256 MiB is a compromise.
777 max_chunk_amt = 2**28
778 while amt is None or amt != 0:
779 if amt is not None:
780 chunk_amt = min(amt, max_chunk_amt)
781 amt -= chunk_amt
782 else:
783 chunk_amt = max_chunk_amt
784 data = self._fp.read(chunk_amt)
785 if not data:
786 break
787 buffer.write(data)
788 del data # to reduce peak memory usage by `max_chunk_amt`.
789 return buffer.getvalue()
790 else:
791 # StringIO doesn't like amt=None
792 return self._fp.read(amt) if amt is not None else self._fp.read()
794 def _raw_read(
795 self,
796 amt: int | None = None,
797 ) -> bytes:
798 """
799 Reads `amt` of bytes from the socket.
800 """
801 if self._fp is None:
802 return None # type: ignore[return-value]
804 fp_closed = getattr(self._fp, "closed", False)
806 with self._error_catcher():
807 data = self._fp_read(amt) if not fp_closed else b""
808 if amt is not None and amt != 0 and not data:
809 # Platform-specific: Buggy versions of Python.
810 # Close the connection when no data is returned
811 #
812 # This is redundant to what httplib/http.client _should_
813 # already do. However, versions of python released before
814 # December 15, 2012 (http://bugs.python.org/issue16298) do
815 # not properly close the connection in all cases. There is
816 # no harm in redundantly calling close.
817 self._fp.close()
818 if (
819 self.enforce_content_length
820 and self.length_remaining is not None
821 and self.length_remaining != 0
822 ):
823 # This is an edge case that httplib failed to cover due
824 # to concerns of backward compatibility. We're
825 # addressing it here to make sure IncompleteRead is
826 # raised during streaming, so all calls with incorrect
827 # Content-Length are caught.
828 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
830 if data:
831 self._fp_bytes_read += len(data)
832 if self.length_remaining is not None:
833 self.length_remaining -= len(data)
834 return data
836 def read(
837 self,
838 amt: int | None = None,
839 decode_content: bool | None = None,
840 cache_content: bool = False,
841 ) -> bytes:
842 """
843 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
844 parameters: ``decode_content`` and ``cache_content``.
846 :param amt:
847 How much of the content to read. If specified, caching is skipped
848 because it doesn't make sense to cache partial content as the full
849 response.
851 :param decode_content:
852 If True, will attempt to decode the body based on the
853 'content-encoding' header.
855 :param cache_content:
856 If True, will save the returned data such that the same result is
857 returned despite of the state of the underlying file object. This
858 is useful if you want the ``.data`` property to continue working
859 after having ``.read()`` the file object. (Overridden if ``amt`` is
860 set.)
861 """
862 self._init_decoder()
863 if decode_content is None:
864 decode_content = self.decode_content
866 if amt is not None:
867 cache_content = False
869 if len(self._decoded_buffer) >= amt:
870 return self._decoded_buffer.get(amt)
872 data = self._raw_read(amt)
874 flush_decoder = False
875 if amt is None:
876 flush_decoder = True
877 elif amt != 0 and not data:
878 flush_decoder = True
880 if not data and len(self._decoded_buffer) == 0:
881 return data
883 if amt is None:
884 data = self._decode(data, decode_content, flush_decoder)
885 if cache_content:
886 self._body = data
887 else:
888 # do not waste memory on buffer when not decoding
889 if not decode_content:
890 if self._has_decoded_content:
891 raise RuntimeError(
892 "Calling read(decode_content=False) is not supported after "
893 "read(decode_content=True) was called."
894 )
895 return data
897 decoded_data = self._decode(data, decode_content, flush_decoder)
898 self._decoded_buffer.put(decoded_data)
900 while len(self._decoded_buffer) < amt and data:
901 # TODO make sure to initially read enough data to get past the headers
902 # For example, the GZ file header takes 10 bytes, we don't want to read
903 # it one byte at a time
904 data = self._raw_read(amt)
905 decoded_data = self._decode(data, decode_content, flush_decoder)
906 self._decoded_buffer.put(decoded_data)
907 data = self._decoded_buffer.get(amt)
909 return data
911 def stream(
912 self, amt: int | None = 2**16, decode_content: bool | None = None
913 ) -> typing.Generator[bytes, None, None]:
914 """
915 A generator wrapper for the read() method. A call will block until
916 ``amt`` bytes have been read from the connection or until the
917 connection is closed.
919 :param amt:
920 How much of the content to read. The generator will return up to
921 much data per iteration, but may return less. This is particularly
922 likely when using compressed data. However, the empty string will
923 never be returned.
925 :param decode_content:
926 If True, will attempt to decode the body based on the
927 'content-encoding' header.
928 """
929 if self.chunked and self.supports_chunked_reads():
930 yield from self.read_chunked(amt, decode_content=decode_content)
931 else:
932 while not is_fp_closed(self._fp):
933 data = self.read(amt=amt, decode_content=decode_content)
935 if data:
936 yield data
938 # Overrides from io.IOBase
939 def readable(self) -> bool:
940 return True
942 def close(self) -> None:
943 if not self.closed and self._fp:
944 self._fp.close()
946 if self._connection:
947 self._connection.close()
949 if not self.auto_close:
950 io.IOBase.close(self)
952 @property
953 def closed(self) -> bool:
954 if not self.auto_close:
955 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
956 elif self._fp is None:
957 return True
958 elif hasattr(self._fp, "isclosed"):
959 return self._fp.isclosed()
960 elif hasattr(self._fp, "closed"):
961 return self._fp.closed
962 else:
963 return True
965 def fileno(self) -> int:
966 if self._fp is None:
967 raise OSError("HTTPResponse has no file to get a fileno from")
968 elif hasattr(self._fp, "fileno"):
969 return self._fp.fileno()
970 else:
971 raise OSError(
972 "The file-like object this HTTPResponse is wrapped "
973 "around has no file descriptor"
974 )
976 def flush(self) -> None:
977 if (
978 self._fp is not None
979 and hasattr(self._fp, "flush")
980 and not getattr(self._fp, "closed", False)
981 ):
982 return self._fp.flush()
984 def supports_chunked_reads(self) -> bool:
985 """
986 Checks if the underlying file-like object looks like a
987 :class:`http.client.HTTPResponse` object. We do this by testing for
988 the fp attribute. If it is present we assume it returns raw chunks as
989 processed by read_chunked().
990 """
991 return hasattr(self._fp, "fp")
993 def _update_chunk_length(self) -> None:
994 # First, we'll figure out length of a chunk and then
995 # we'll try to read it from socket.
996 if self.chunk_left is not None:
997 return None
998 line = self._fp.fp.readline() # type: ignore[union-attr]
999 line = line.split(b";", 1)[0]
1000 try:
1001 self.chunk_left = int(line, 16)
1002 except ValueError:
1003 # Invalid chunked protocol response, abort.
1004 self.close()
1005 raise InvalidChunkLength(self, line) from None
1007 def _handle_chunk(self, amt: int | None) -> bytes:
1008 returned_chunk = None
1009 if amt is None:
1010 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1011 returned_chunk = chunk
1012 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1013 self.chunk_left = None
1014 elif self.chunk_left is not None and amt < self.chunk_left:
1015 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1016 self.chunk_left = self.chunk_left - amt
1017 returned_chunk = value
1018 elif amt == self.chunk_left:
1019 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1020 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1021 self.chunk_left = None
1022 returned_chunk = value
1023 else: # amt > self.chunk_left
1024 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1025 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1026 self.chunk_left = None
1027 return returned_chunk # type: ignore[no-any-return]
1029 def read_chunked(
1030 self, amt: int | None = None, decode_content: bool | None = None
1031 ) -> typing.Generator[bytes, None, None]:
1032 """
1033 Similar to :meth:`HTTPResponse.read`, but with an additional
1034 parameter: ``decode_content``.
1036 :param amt:
1037 How much of the content to read. If specified, caching is skipped
1038 because it doesn't make sense to cache partial content as the full
1039 response.
1041 :param decode_content:
1042 If True, will attempt to decode the body based on the
1043 'content-encoding' header.
1044 """
1045 self._init_decoder()
1046 # FIXME: Rewrite this method and make it a class with a better structured logic.
1047 if not self.chunked:
1048 raise ResponseNotChunked(
1049 "Response is not chunked. "
1050 "Header 'transfer-encoding: chunked' is missing."
1051 )
1052 if not self.supports_chunked_reads():
1053 raise BodyNotHttplibCompatible(
1054 "Body should be http.client.HTTPResponse like. "
1055 "It should have have an fp attribute which returns raw chunks."
1056 )
1058 with self._error_catcher():
1059 # Don't bother reading the body of a HEAD request.
1060 if self._original_response and is_response_to_head(self._original_response):
1061 self._original_response.close()
1062 return None
1064 # If a response is already read and closed
1065 # then return immediately.
1066 if self._fp.fp is None: # type: ignore[union-attr]
1067 return None
1069 while True:
1070 self._update_chunk_length()
1071 if self.chunk_left == 0:
1072 break
1073 chunk = self._handle_chunk(amt)
1074 decoded = self._decode(
1075 chunk, decode_content=decode_content, flush_decoder=False
1076 )
1077 if decoded:
1078 yield decoded
1080 if decode_content:
1081 # On CPython and PyPy, we should never need to flush the
1082 # decoder. However, on Jython we *might* need to, so
1083 # lets defensively do it anyway.
1084 decoded = self._flush_decoder()
1085 if decoded: # Platform-specific: Jython.
1086 yield decoded
1088 # Chunk content ends with \r\n: discard it.
1089 while self._fp is not None:
1090 line = self._fp.fp.readline()
1091 if not line:
1092 # Some sites may not end with '\r\n'.
1093 break
1094 if line == b"\r\n":
1095 break
1097 # We read everything; close the "file".
1098 if self._original_response:
1099 self._original_response.close()
1101 @property
1102 def url(self) -> str | None:
1103 """
1104 Returns the URL that was the source of this response.
1105 If the request that generated this response redirected, this method
1106 will return the final redirect location.
1107 """
1108 return self._request_url
1110 @url.setter
1111 def url(self, url: str) -> None:
1112 self._request_url = url
1114 def __iter__(self) -> typing.Iterator[bytes]:
1115 buffer: list[bytes] = []
1116 for chunk in self.stream(decode_content=True):
1117 if b"\n" in chunk:
1118 chunks = chunk.split(b"\n")
1119 yield b"".join(buffer) + chunks[0] + b"\n"
1120 for x in chunks[1:-1]:
1121 yield x + b"\n"
1122 if chunks[-1]:
1123 buffer = [chunks[-1]]
1124 else:
1125 buffer = []
1126 else:
1127 buffer.append(chunk)
1128 if buffer:
1129 yield b"".join(buffer)