Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/urllib3/response.py: 20%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import collections
4import io
5import json as _json
6import logging
7import socket
8import sys
9import typing
10import warnings
11import zlib
12from contextlib import contextmanager
13from http.client import HTTPMessage as _HttplibHTTPMessage
14from http.client import HTTPResponse as _HttplibHTTPResponse
15from socket import timeout as SocketTimeout
17if typing.TYPE_CHECKING:
18 from ._base_connection import BaseHTTPConnection
20try:
21 try:
22 import brotlicffi as brotli # type: ignore[import-not-found]
23 except ImportError:
24 import brotli # type: ignore[import-not-found]
25except ImportError:
26 brotli = None
28from . import util
29from ._base_connection import _TYPE_BODY
30from ._collections import HTTPHeaderDict
31from .connection import BaseSSLError, HTTPConnection, HTTPException
32from .exceptions import (
33 BodyNotHttplibCompatible,
34 DecodeError,
35 DependencyWarning,
36 HTTPError,
37 IncompleteRead,
38 InvalidChunkLength,
39 InvalidHeader,
40 ProtocolError,
41 ReadTimeoutError,
42 ResponseNotChunked,
43 SSLError,
44)
45from .util.response import is_fp_closed, is_response_to_head
46from .util.retry import Retry
48if typing.TYPE_CHECKING:
49 from .connectionpool import HTTPConnectionPool
51log = logging.getLogger(__name__)
54class ContentDecoder:
55 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
56 raise NotImplementedError()
58 @property
59 def has_unconsumed_tail(self) -> bool:
60 raise NotImplementedError()
62 def flush(self) -> bytes:
63 raise NotImplementedError()
66class DeflateDecoder(ContentDecoder):
67 def __init__(self) -> None:
68 self._first_try = True
69 self._first_try_data = b""
70 self._unfed_data = b""
71 self._obj = zlib.decompressobj()
73 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
74 data = self._unfed_data + data
75 self._unfed_data = b""
76 if not data and not self._obj.unconsumed_tail:
77 return data
78 original_max_length = max_length
79 if original_max_length < 0:
80 max_length = 0
81 elif original_max_length == 0:
82 # We should not pass 0 to the zlib decompressor because 0 is
83 # the default value that will make zlib decompress without a
84 # length limit.
85 # Data should be stored for subsequent calls.
86 self._unfed_data = data
87 return b""
89 # Subsequent calls always reuse `self._obj`. zlib requires
90 # passing the unconsumed tail if decompression is to continue.
91 if not self._first_try:
92 return self._obj.decompress(
93 self._obj.unconsumed_tail + data, max_length=max_length
94 )
96 # First call tries with RFC 1950 ZLIB format.
97 self._first_try_data += data
98 try:
99 decompressed = self._obj.decompress(data, max_length=max_length)
100 if decompressed:
101 self._first_try = False
102 self._first_try_data = b""
103 return decompressed
104 # On failure, it falls back to RFC 1951 DEFLATE format.
105 except zlib.error:
106 self._first_try = False
107 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
108 try:
109 return self.decompress(
110 self._first_try_data, max_length=original_max_length
111 )
112 finally:
113 self._first_try_data = b""
115 @property
116 def has_unconsumed_tail(self) -> bool:
117 return bool(self._unfed_data) or (
118 bool(self._obj.unconsumed_tail) and not self._first_try
119 )
121 def flush(self) -> bytes:
122 return self._obj.flush()
125class GzipDecoderState:
126 FIRST_MEMBER = 0
127 OTHER_MEMBERS = 1
128 SWALLOW_DATA = 2
131class GzipDecoder(ContentDecoder):
132 def __init__(self) -> None:
133 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
134 self._state = GzipDecoderState.FIRST_MEMBER
135 self._unconsumed_tail = b""
137 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
138 ret = bytearray()
139 if self._state == GzipDecoderState.SWALLOW_DATA:
140 return bytes(ret)
142 if max_length == 0:
143 # We should not pass 0 to the zlib decompressor because 0 is
144 # the default value that will make zlib decompress without a
145 # length limit.
146 # Data should be stored for subsequent calls.
147 self._unconsumed_tail += data
148 return b""
150 # zlib requires passing the unconsumed tail to the subsequent
151 # call if decompression is to continue.
152 data = self._unconsumed_tail + data
153 if not data and self._obj.eof:
154 return bytes(ret)
156 while True:
157 try:
158 ret += self._obj.decompress(
159 data, max_length=max(max_length - len(ret), 0)
160 )
161 except zlib.error:
162 previous_state = self._state
163 # Ignore data after the first error
164 self._state = GzipDecoderState.SWALLOW_DATA
165 self._unconsumed_tail = b""
166 if previous_state == GzipDecoderState.OTHER_MEMBERS:
167 # Allow trailing garbage acceptable in other gzip clients
168 return bytes(ret)
169 raise
171 self._unconsumed_tail = data = (
172 self._obj.unconsumed_tail or self._obj.unused_data
173 )
174 if max_length > 0 and len(ret) >= max_length:
175 break
177 if not data:
178 return bytes(ret)
179 # When the end of a gzip member is reached, a new decompressor
180 # must be created for unused (possibly future) data.
181 if self._obj.eof:
182 self._state = GzipDecoderState.OTHER_MEMBERS
183 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
185 return bytes(ret)
187 @property
188 def has_unconsumed_tail(self) -> bool:
189 return bool(self._unconsumed_tail)
191 def flush(self) -> bytes:
192 return self._obj.flush()
195if brotli is not None:
197 class BrotliDecoder(ContentDecoder):
198 # Supports both 'brotlipy' and 'Brotli' packages
199 # since they share an import name. The top branches
200 # are for 'brotlipy' and bottom branches for 'Brotli'
201 def __init__(self) -> None:
202 self._obj = brotli.Decompressor()
203 if hasattr(self._obj, "decompress"):
204 setattr(self, "_decompress", self._obj.decompress)
205 else:
206 setattr(self, "_decompress", self._obj.process)
208 # Requires Brotli >= 1.2.0 for `output_buffer_limit`.
209 def _decompress(self, data: bytes, output_buffer_limit: int = -1) -> bytes:
210 raise NotImplementedError()
212 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
213 try:
214 if max_length > 0:
215 return self._decompress(data, output_buffer_limit=max_length)
216 else:
217 return self._decompress(data)
218 except TypeError:
219 # Fallback for Brotli/brotlicffi/brotlipy versions without
220 # the `output_buffer_limit` parameter.
221 warnings.warn(
222 "Brotli >= 1.2.0 is required to prevent decompression bombs.",
223 DependencyWarning,
224 )
225 return self._decompress(data)
227 @property
228 def has_unconsumed_tail(self) -> bool:
229 try:
230 return not self._obj.can_accept_more_data()
231 except AttributeError:
232 return False
234 def flush(self) -> bytes:
235 if hasattr(self._obj, "flush"):
236 return self._obj.flush() # type: ignore[no-any-return]
237 return b""
240try:
241 if sys.version_info >= (3, 14):
242 from compression import zstd
243 else:
244 from backports import zstd
245except ImportError:
246 HAS_ZSTD = False
247else:
248 HAS_ZSTD = True
250 class ZstdDecoder(ContentDecoder):
251 def __init__(self) -> None:
252 self._obj = zstd.ZstdDecompressor()
254 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
255 if not data and not self.has_unconsumed_tail:
256 return b""
257 if self._obj.eof:
258 data = self._obj.unused_data + data
259 self._obj = zstd.ZstdDecompressor()
260 part = self._obj.decompress(data, max_length=max_length)
261 length = len(part)
262 data_parts = [part]
263 # Every loop iteration is supposed to read data from a separate frame.
264 # The loop breaks when:
265 # - enough data is read;
266 # - no more unused data is available;
267 # - end of the last read frame has not been reached (i.e.,
268 # more data has to be fed).
269 while (
270 self._obj.eof
271 and self._obj.unused_data
272 and (max_length < 0 or length < max_length)
273 ):
274 unused_data = self._obj.unused_data
275 if not self._obj.needs_input:
276 self._obj = zstd.ZstdDecompressor()
277 part = self._obj.decompress(
278 unused_data,
279 max_length=(max_length - length) if max_length > 0 else -1,
280 )
281 if part_length := len(part):
282 data_parts.append(part)
283 length += part_length
284 elif self._obj.needs_input:
285 break
286 return b"".join(data_parts)
288 @property
289 def has_unconsumed_tail(self) -> bool:
290 return not (self._obj.needs_input or self._obj.eof) or bool(
291 self._obj.unused_data
292 )
294 def flush(self) -> bytes:
295 if not self._obj.eof:
296 raise DecodeError("Zstandard data is incomplete")
297 return b""
300class MultiDecoder(ContentDecoder):
301 """
302 From RFC7231:
303 If one or more encodings have been applied to a representation, the
304 sender that applied the encodings MUST generate a Content-Encoding
305 header field that lists the content codings in the order in which
306 they were applied.
307 """
309 # Maximum allowed number of chained HTTP encodings in the
310 # Content-Encoding header.
311 max_decode_links = 5
313 def __init__(self, modes: str) -> None:
314 encodings = [m.strip() for m in modes.split(",")]
315 if len(encodings) > self.max_decode_links:
316 raise DecodeError(
317 "Too many content encodings in the chain: "
318 f"{len(encodings)} > {self.max_decode_links}"
319 )
320 self._decoders = [_get_decoder(e) for e in encodings]
322 def flush(self) -> bytes:
323 return self._decoders[0].flush()
325 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
326 if max_length <= 0:
327 for d in reversed(self._decoders):
328 data = d.decompress(data)
329 return data
331 ret = bytearray()
332 # Every while loop iteration goes through all decoders once.
333 # It exits when enough data is read or no more data can be read.
334 # It is possible that the while loop iteration does not produce
335 # any data because we retrieve up to `max_length` from every
336 # decoder, and the amount of bytes may be insufficient for the
337 # next decoder to produce enough/any output.
338 while True:
339 any_data = False
340 for d in reversed(self._decoders):
341 data = d.decompress(data, max_length=max_length - len(ret))
342 if data:
343 any_data = True
344 # We should not break when no data is returned because
345 # next decoders may produce data even with empty input.
346 ret += data
347 if not any_data or len(ret) >= max_length:
348 return bytes(ret)
349 data = b""
351 @property
352 def has_unconsumed_tail(self) -> bool:
353 return any(d.has_unconsumed_tail for d in self._decoders)
356def _get_decoder(mode: str) -> ContentDecoder:
357 if "," in mode:
358 return MultiDecoder(mode)
360 # According to RFC 9110 section 8.4.1.3, recipients should
361 # consider x-gzip equivalent to gzip
362 if mode in ("gzip", "x-gzip"):
363 return GzipDecoder()
365 if brotli is not None and mode == "br":
366 return BrotliDecoder()
368 if HAS_ZSTD and mode == "zstd":
369 return ZstdDecoder()
371 return DeflateDecoder()
374class BytesQueueBuffer:
375 """Memory-efficient bytes buffer
377 To return decoded data in read() and still follow the BufferedIOBase API, we need a
378 buffer to always return the correct amount of bytes.
380 This buffer should be filled using calls to put()
382 Our maximum memory usage is determined by the sum of the size of:
384 * self.buffer, which contains the full data
385 * the largest chunk that we will copy in get()
386 """
388 def __init__(self) -> None:
389 self.buffer: typing.Deque[bytes | memoryview[bytes]] = collections.deque()
390 self._size: int = 0
392 def __len__(self) -> int:
393 return self._size
395 def put(self, data: bytes) -> None:
396 self.buffer.append(data)
397 self._size += len(data)
399 def get(self, n: int) -> bytes:
400 if n == 0:
401 return b""
402 elif not self.buffer:
403 raise RuntimeError("buffer is empty")
404 elif n < 0:
405 raise ValueError("n should be > 0")
407 if len(self.buffer[0]) == n and isinstance(self.buffer[0], bytes):
408 self._size -= n
409 return self.buffer.popleft()
411 fetched = 0
412 ret = io.BytesIO()
413 while fetched < n:
414 remaining = n - fetched
415 chunk = self.buffer.popleft()
416 chunk_length = len(chunk)
417 if remaining < chunk_length:
418 chunk = memoryview(chunk)
419 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
420 ret.write(left_chunk)
421 self.buffer.appendleft(right_chunk)
422 self._size -= remaining
423 break
424 else:
425 ret.write(chunk)
426 self._size -= chunk_length
427 fetched += chunk_length
429 if not self.buffer:
430 break
432 return ret.getvalue()
434 def get_all(self) -> bytes:
435 buffer = self.buffer
436 if not buffer:
437 assert self._size == 0
438 return b""
439 if len(buffer) == 1:
440 result = buffer.pop()
441 if isinstance(result, memoryview):
442 result = result.tobytes()
443 else:
444 ret = io.BytesIO()
445 ret.writelines(buffer.popleft() for _ in range(len(buffer)))
446 result = ret.getvalue()
447 self._size = 0
448 return result
451class BaseHTTPResponse(io.IOBase):
452 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]
453 if brotli is not None:
454 CONTENT_DECODERS += ["br"]
455 if HAS_ZSTD:
456 CONTENT_DECODERS += ["zstd"]
457 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
459 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
460 if brotli is not None:
461 DECODER_ERROR_CLASSES += (brotli.error,)
463 if HAS_ZSTD:
464 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
466 def __init__(
467 self,
468 *,
469 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
470 status: int,
471 version: int,
472 version_string: str,
473 reason: str | None,
474 decode_content: bool,
475 request_url: str | None,
476 retries: Retry | None = None,
477 ) -> None:
478 if isinstance(headers, HTTPHeaderDict):
479 self.headers = headers
480 else:
481 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
482 self.status = status
483 self.version = version
484 self.version_string = version_string
485 self.reason = reason
486 self.decode_content = decode_content
487 self._has_decoded_content = False
488 self._request_url: str | None = request_url
489 self.retries = retries
491 self.chunked = False
492 tr_enc = self.headers.get("transfer-encoding", "").lower()
493 # Don't incur the penalty of creating a list and then discarding it
494 encodings = (enc.strip() for enc in tr_enc.split(","))
495 if "chunked" in encodings:
496 self.chunked = True
498 self._decoder: ContentDecoder | None = None
499 self.length_remaining: int | None
501 def get_redirect_location(self) -> str | None | typing.Literal[False]:
502 """
503 Should we redirect and where to?
505 :returns: Truthy redirect location string if we got a redirect status
506 code and valid location. ``None`` if redirect status and no
507 location. ``False`` if not a redirect status code.
508 """
509 if self.status in self.REDIRECT_STATUSES:
510 return self.headers.get("location")
511 return False
513 @property
514 def data(self) -> bytes:
515 raise NotImplementedError()
517 def json(self) -> typing.Any:
518 """
519 Deserializes the body of the HTTP response as a Python object.
521 The body of the HTTP response must be encoded using UTF-8, as per
522 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.
524 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to
525 your custom decoder instead.
527 If the body of the HTTP response is not decodable to UTF-8, a
528 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a
529 valid JSON document, a `json.JSONDecodeError` will be raised.
531 Read more :ref:`here <json_content>`.
533 :returns: The body of the HTTP response as a Python object.
534 """
535 data = self.data.decode("utf-8")
536 return _json.loads(data)
538 @property
539 def url(self) -> str | None:
540 raise NotImplementedError()
542 @url.setter
543 def url(self, url: str | None) -> None:
544 raise NotImplementedError()
546 @property
547 def connection(self) -> BaseHTTPConnection | None:
548 raise NotImplementedError()
550 @property
551 def retries(self) -> Retry | None:
552 return self._retries
554 @retries.setter
555 def retries(self, retries: Retry | None) -> None:
556 # Override the request_url if retries has a redirect location.
557 if retries is not None and retries.history:
558 self.url = retries.history[-1].redirect_location
559 self._retries = retries
561 def stream(
562 self, amt: int | None = 2**16, decode_content: bool | None = None
563 ) -> typing.Iterator[bytes]:
564 raise NotImplementedError()
566 def read(
567 self,
568 amt: int | None = None,
569 decode_content: bool | None = None,
570 cache_content: bool = False,
571 ) -> bytes:
572 raise NotImplementedError()
574 def read1(
575 self,
576 amt: int | None = None,
577 decode_content: bool | None = None,
578 ) -> bytes:
579 raise NotImplementedError()
581 def read_chunked(
582 self,
583 amt: int | None = None,
584 decode_content: bool | None = None,
585 ) -> typing.Iterator[bytes]:
586 raise NotImplementedError()
588 def release_conn(self) -> None:
589 raise NotImplementedError()
591 def drain_conn(self) -> None:
592 raise NotImplementedError()
594 def shutdown(self) -> None:
595 raise NotImplementedError()
597 def close(self) -> None:
598 raise NotImplementedError()
600 def _init_decoder(self) -> None:
601 """
602 Set-up the _decoder attribute if necessary.
603 """
604 # Note: content-encoding value should be case-insensitive, per RFC 7230
605 # Section 3.2
606 content_encoding = self.headers.get("content-encoding", "").lower()
607 if self._decoder is None:
608 if content_encoding in self.CONTENT_DECODERS:
609 self._decoder = _get_decoder(content_encoding)
610 elif "," in content_encoding:
611 encodings = [
612 e.strip()
613 for e in content_encoding.split(",")
614 if e.strip() in self.CONTENT_DECODERS
615 ]
616 if encodings:
617 self._decoder = _get_decoder(content_encoding)
619 def _decode(
620 self,
621 data: bytes,
622 decode_content: bool | None,
623 flush_decoder: bool,
624 max_length: int | None = None,
625 ) -> bytes:
626 """
627 Decode the data passed in and potentially flush the decoder.
628 """
629 if not decode_content:
630 if self._has_decoded_content:
631 raise RuntimeError(
632 "Calling read(decode_content=False) is not supported after "
633 "read(decode_content=True) was called."
634 )
635 return data
637 if max_length is None or flush_decoder:
638 max_length = -1
640 try:
641 if self._decoder:
642 data = self._decoder.decompress(data, max_length=max_length)
643 self._has_decoded_content = True
644 except self.DECODER_ERROR_CLASSES as e:
645 content_encoding = self.headers.get("content-encoding", "").lower()
646 raise DecodeError(
647 "Received response with content-encoding: %s, but "
648 "failed to decode it." % content_encoding,
649 e,
650 ) from e
651 if flush_decoder:
652 data += self._flush_decoder()
654 return data
656 def _flush_decoder(self) -> bytes:
657 """
658 Flushes the decoder. Should only be called if the decoder is actually
659 being used.
660 """
661 if self._decoder:
662 return self._decoder.decompress(b"") + self._decoder.flush()
663 return b""
665 # Compatibility methods for `io` module
666 def readinto(self, b: bytearray) -> int:
667 temp = self.read(len(b))
668 if len(temp) == 0:
669 return 0
670 else:
671 b[: len(temp)] = temp
672 return len(temp)
674 # Compatibility method for http.cookiejar
675 def info(self) -> HTTPHeaderDict:
676 return self.headers
678 def geturl(self) -> str | None:
679 return self.url
682class HTTPResponse(BaseHTTPResponse):
683 """
684 HTTP Response container.
686 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
687 loaded and decoded on-demand when the ``data`` property is accessed. This
688 class is also compatible with the Python standard library's :mod:`io`
689 module, and can hence be treated as a readable object in the context of that
690 framework.
692 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
694 :param preload_content:
695 If True, the response's body will be preloaded during construction.
697 :param decode_content:
698 If True, will attempt to decode the body based on the
699 'content-encoding' header.
701 :param original_response:
702 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
703 object, it's convenient to include the original for debug purposes. It's
704 otherwise unused.
706 :param retries:
707 The retries contains the last :class:`~urllib3.util.retry.Retry` that
708 was used during the request.
710 :param enforce_content_length:
711 Enforce content length checking. Body returned by server must match
712 value of Content-Length header, if present. Otherwise, raise error.
713 """
715 def __init__(
716 self,
717 body: _TYPE_BODY = "",
718 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
719 status: int = 0,
720 version: int = 0,
721 version_string: str = "HTTP/?",
722 reason: str | None = None,
723 preload_content: bool = True,
724 decode_content: bool = True,
725 original_response: _HttplibHTTPResponse | None = None,
726 pool: HTTPConnectionPool | None = None,
727 connection: HTTPConnection | None = None,
728 msg: _HttplibHTTPMessage | None = None,
729 retries: Retry | None = None,
730 enforce_content_length: bool = True,
731 request_method: str | None = None,
732 request_url: str | None = None,
733 auto_close: bool = True,
734 sock_shutdown: typing.Callable[[int], None] | None = None,
735 ) -> None:
736 super().__init__(
737 headers=headers,
738 status=status,
739 version=version,
740 version_string=version_string,
741 reason=reason,
742 decode_content=decode_content,
743 request_url=request_url,
744 retries=retries,
745 )
747 self.enforce_content_length = enforce_content_length
748 self.auto_close = auto_close
750 self._body = None
751 self._fp: _HttplibHTTPResponse | None = None
752 self._original_response = original_response
753 self._fp_bytes_read = 0
754 self.msg = msg
756 if body and isinstance(body, (str, bytes)):
757 self._body = body
759 self._pool = pool
760 self._connection = connection
762 if hasattr(body, "read"):
763 self._fp = body # type: ignore[assignment]
764 self._sock_shutdown = sock_shutdown
766 # Are we using the chunked-style of transfer encoding?
767 self.chunk_left: int | None = None
769 # Determine length of response
770 self.length_remaining = self._init_length(request_method)
772 # Used to return the correct amount of bytes for partial read()s
773 self._decoded_buffer = BytesQueueBuffer()
775 # If requested, preload the body.
776 if preload_content and not self._body:
777 self._body = self.read(decode_content=decode_content)
779 def release_conn(self) -> None:
780 if not self._pool or not self._connection:
781 return None
783 self._pool._put_conn(self._connection)
784 self._connection = None
786 def drain_conn(self) -> None:
787 """
788 Read and discard any remaining HTTP response data in the response connection.
790 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
791 """
792 try:
793 self.read()
794 except (HTTPError, OSError, BaseSSLError, HTTPException):
795 pass
797 @property
798 def data(self) -> bytes:
799 # For backwards-compat with earlier urllib3 0.4 and earlier.
800 if self._body:
801 return self._body # type: ignore[return-value]
803 if self._fp:
804 return self.read(cache_content=True)
806 return None # type: ignore[return-value]
808 @property
809 def connection(self) -> HTTPConnection | None:
810 return self._connection
812 def isclosed(self) -> bool:
813 return is_fp_closed(self._fp)
815 def tell(self) -> int:
816 """
817 Obtain the number of bytes pulled over the wire so far. May differ from
818 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
819 if bytes are encoded on the wire (e.g, compressed).
820 """
821 return self._fp_bytes_read
823 def _init_length(self, request_method: str | None) -> int | None:
824 """
825 Set initial length value for Response content if available.
826 """
827 length: int | None
828 content_length: str | None = self.headers.get("content-length")
830 if content_length is not None:
831 if self.chunked:
832 # This Response will fail with an IncompleteRead if it can't be
833 # received as chunked. This method falls back to attempt reading
834 # the response before raising an exception.
835 log.warning(
836 "Received response with both Content-Length and "
837 "Transfer-Encoding set. This is expressly forbidden "
838 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
839 "attempting to process response as Transfer-Encoding: "
840 "chunked."
841 )
842 return None
844 try:
845 # RFC 7230 section 3.3.2 specifies multiple content lengths can
846 # be sent in a single Content-Length header
847 # (e.g. Content-Length: 42, 42). This line ensures the values
848 # are all valid ints and that as long as the `set` length is 1,
849 # all values are the same. Otherwise, the header is invalid.
850 lengths = {int(val) for val in content_length.split(",")}
851 if len(lengths) > 1:
852 raise InvalidHeader(
853 "Content-Length contained multiple "
854 "unmatching values (%s)" % content_length
855 )
856 length = lengths.pop()
857 except ValueError:
858 length = None
859 else:
860 if length < 0:
861 length = None
863 else: # if content_length is None
864 length = None
866 # Convert status to int for comparison
867 # In some cases, httplib returns a status of "_UNKNOWN"
868 try:
869 status = int(self.status)
870 except ValueError:
871 status = 0
873 # Check for responses that shouldn't include a body
874 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
875 length = 0
877 return length
879 @contextmanager
880 def _error_catcher(self) -> typing.Generator[None]:
881 """
882 Catch low-level python exceptions, instead re-raising urllib3
883 variants, so that low-level exceptions are not leaked in the
884 high-level api.
886 On exit, release the connection back to the pool.
887 """
888 clean_exit = False
890 try:
891 try:
892 yield
894 except SocketTimeout as e:
895 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
896 # there is yet no clean way to get at it from this context.
897 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
899 except BaseSSLError as e:
900 # FIXME: Is there a better way to differentiate between SSLErrors?
901 if "read operation timed out" not in str(e):
902 # SSL errors related to framing/MAC get wrapped and reraised here
903 raise SSLError(e) from e
905 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
907 except IncompleteRead as e:
908 if (
909 e.expected is not None
910 and e.partial is not None
911 and e.expected == -e.partial
912 ):
913 arg = "Response may not contain content."
914 else:
915 arg = f"Connection broken: {e!r}"
916 raise ProtocolError(arg, e) from e
918 except (HTTPException, OSError) as e:
919 raise ProtocolError(f"Connection broken: {e!r}", e) from e
921 # If no exception is thrown, we should avoid cleaning up
922 # unnecessarily.
923 clean_exit = True
924 finally:
925 # If we didn't terminate cleanly, we need to throw away our
926 # connection.
927 if not clean_exit:
928 # The response may not be closed but we're not going to use it
929 # anymore so close it now to ensure that the connection is
930 # released back to the pool.
931 if self._original_response:
932 self._original_response.close()
934 # Closing the response may not actually be sufficient to close
935 # everything, so if we have a hold of the connection close that
936 # too.
937 if self._connection:
938 self._connection.close()
940 # If we hold the original response but it's closed now, we should
941 # return the connection back to the pool.
942 if self._original_response and self._original_response.isclosed():
943 self.release_conn()
945 def _fp_read(
946 self,
947 amt: int | None = None,
948 *,
949 read1: bool = False,
950 ) -> bytes:
951 """
952 Read a response with the thought that reading the number of bytes
953 larger than can fit in a 32-bit int at a time via SSL in some
954 known cases leads to an overflow error that has to be prevented
955 if `amt` or `self.length_remaining` indicate that a problem may
956 happen.
958 The known cases:
959 * CPython < 3.9.7 because of a bug
960 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.
961 * urllib3 injected with pyOpenSSL-backed SSL-support.
962 * CPython < 3.10 only when `amt` does not fit 32-bit int.
963 """
964 assert self._fp
965 c_int_max = 2**31 - 1
966 if (
967 (amt and amt > c_int_max)
968 or (
969 amt is None
970 and self.length_remaining
971 and self.length_remaining > c_int_max
972 )
973 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):
974 if read1:
975 return self._fp.read1(c_int_max)
976 buffer = io.BytesIO()
977 # Besides `max_chunk_amt` being a maximum chunk size, it
978 # affects memory overhead of reading a response by this
979 # method in CPython.
980 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
981 # chunk size that does not lead to an overflow error, but
982 # 256 MiB is a compromise.
983 max_chunk_amt = 2**28
984 while amt is None or amt != 0:
985 if amt is not None:
986 chunk_amt = min(amt, max_chunk_amt)
987 amt -= chunk_amt
988 else:
989 chunk_amt = max_chunk_amt
990 data = self._fp.read(chunk_amt)
991 if not data:
992 break
993 buffer.write(data)
994 del data # to reduce peak memory usage by `max_chunk_amt`.
995 return buffer.getvalue()
996 elif read1:
997 return self._fp.read1(amt) if amt is not None else self._fp.read1()
998 else:
999 # StringIO doesn't like amt=None
1000 return self._fp.read(amt) if amt is not None else self._fp.read()
1002 def _raw_read(
1003 self,
1004 amt: int | None = None,
1005 *,
1006 read1: bool = False,
1007 ) -> bytes:
1008 """
1009 Reads `amt` of bytes from the socket.
1010 """
1011 if self._fp is None:
1012 return None # type: ignore[return-value]
1014 fp_closed = getattr(self._fp, "closed", False)
1016 with self._error_catcher():
1017 data = self._fp_read(amt, read1=read1) if not fp_closed else b""
1018 if amt is not None and amt != 0 and not data:
1019 # Platform-specific: Buggy versions of Python.
1020 # Close the connection when no data is returned
1021 #
1022 # This is redundant to what httplib/http.client _should_
1023 # already do. However, versions of python released before
1024 # December 15, 2012 (http://bugs.python.org/issue16298) do
1025 # not properly close the connection in all cases. There is
1026 # no harm in redundantly calling close.
1027 self._fp.close()
1028 if (
1029 self.enforce_content_length
1030 and self.length_remaining is not None
1031 and self.length_remaining != 0
1032 ):
1033 # This is an edge case that httplib failed to cover due
1034 # to concerns of backward compatibility. We're
1035 # addressing it here to make sure IncompleteRead is
1036 # raised during streaming, so all calls with incorrect
1037 # Content-Length are caught.
1038 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
1039 elif read1 and (
1040 (amt != 0 and not data) or self.length_remaining == len(data)
1041 ):
1042 # All data has been read, but `self._fp.read1` in
1043 # CPython 3.12 and older doesn't always close
1044 # `http.client.HTTPResponse`, so we close it here.
1045 # See https://github.com/python/cpython/issues/113199
1046 self._fp.close()
1048 if data:
1049 self._fp_bytes_read += len(data)
1050 if self.length_remaining is not None:
1051 self.length_remaining -= len(data)
1052 return data
1054 def read(
1055 self,
1056 amt: int | None = None,
1057 decode_content: bool | None = None,
1058 cache_content: bool = False,
1059 ) -> bytes:
1060 """
1061 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
1062 parameters: ``decode_content`` and ``cache_content``.
1064 :param amt:
1065 How much of the content to read. If specified, caching is skipped
1066 because it doesn't make sense to cache partial content as the full
1067 response.
1069 :param decode_content:
1070 If True, will attempt to decode the body based on the
1071 'content-encoding' header.
1073 :param cache_content:
1074 If True, will save the returned data such that the same result is
1075 returned despite of the state of the underlying file object. This
1076 is useful if you want the ``.data`` property to continue working
1077 after having ``.read()`` the file object. (Overridden if ``amt`` is
1078 set.)
1079 """
1080 self._init_decoder()
1081 if decode_content is None:
1082 decode_content = self.decode_content
1084 if amt and amt < 0:
1085 # Negative numbers and `None` should be treated the same.
1086 amt = None
1087 elif amt is not None:
1088 cache_content = False
1090 if self._decoder and self._decoder.has_unconsumed_tail:
1091 decoded_data = self._decode(
1092 b"",
1093 decode_content,
1094 flush_decoder=False,
1095 max_length=amt - len(self._decoded_buffer),
1096 )
1097 self._decoded_buffer.put(decoded_data)
1098 if len(self._decoded_buffer) >= amt:
1099 return self._decoded_buffer.get(amt)
1101 data = self._raw_read(amt)
1103 flush_decoder = amt is None or (amt != 0 and not data)
1105 if (
1106 not data
1107 and len(self._decoded_buffer) == 0
1108 and not (self._decoder and self._decoder.has_unconsumed_tail)
1109 ):
1110 return data
1112 if amt is None:
1113 data = self._decode(data, decode_content, flush_decoder)
1114 if cache_content:
1115 self._body = data
1116 else:
1117 # do not waste memory on buffer when not decoding
1118 if not decode_content:
1119 if self._has_decoded_content:
1120 raise RuntimeError(
1121 "Calling read(decode_content=False) is not supported after "
1122 "read(decode_content=True) was called."
1123 )
1124 return data
1126 decoded_data = self._decode(
1127 data,
1128 decode_content,
1129 flush_decoder,
1130 max_length=amt - len(self._decoded_buffer),
1131 )
1132 self._decoded_buffer.put(decoded_data)
1134 while len(self._decoded_buffer) < amt and data:
1135 # TODO make sure to initially read enough data to get past the headers
1136 # For example, the GZ file header takes 10 bytes, we don't want to read
1137 # it one byte at a time
1138 data = self._raw_read(amt)
1139 decoded_data = self._decode(
1140 data,
1141 decode_content,
1142 flush_decoder,
1143 max_length=amt - len(self._decoded_buffer),
1144 )
1145 self._decoded_buffer.put(decoded_data)
1146 data = self._decoded_buffer.get(amt)
1148 return data
1150 def read1(
1151 self,
1152 amt: int | None = None,
1153 decode_content: bool | None = None,
1154 ) -> bytes:
1155 """
1156 Similar to ``http.client.HTTPResponse.read1`` and documented
1157 in :meth:`io.BufferedReader.read1`, but with an additional parameter:
1158 ``decode_content``.
1160 :param amt:
1161 How much of the content to read.
1163 :param decode_content:
1164 If True, will attempt to decode the body based on the
1165 'content-encoding' header.
1166 """
1167 if decode_content is None:
1168 decode_content = self.decode_content
1169 if amt and amt < 0:
1170 # Negative numbers and `None` should be treated the same.
1171 amt = None
1172 # try and respond without going to the network
1173 if self._has_decoded_content:
1174 if not decode_content:
1175 raise RuntimeError(
1176 "Calling read1(decode_content=False) is not supported after "
1177 "read1(decode_content=True) was called."
1178 )
1179 if (
1180 self._decoder
1181 and self._decoder.has_unconsumed_tail
1182 and (amt is None or len(self._decoded_buffer) < amt)
1183 ):
1184 decoded_data = self._decode(
1185 b"",
1186 decode_content,
1187 flush_decoder=False,
1188 max_length=(
1189 amt - len(self._decoded_buffer) if amt is not None else None
1190 ),
1191 )
1192 self._decoded_buffer.put(decoded_data)
1193 if len(self._decoded_buffer) > 0:
1194 if amt is None:
1195 return self._decoded_buffer.get_all()
1196 return self._decoded_buffer.get(amt)
1197 if amt == 0:
1198 return b""
1200 # FIXME, this method's type doesn't say returning None is possible
1201 data = self._raw_read(amt, read1=True)
1202 if not decode_content or data is None:
1203 return data
1205 self._init_decoder()
1206 while True:
1207 flush_decoder = not data
1208 decoded_data = self._decode(
1209 data, decode_content, flush_decoder, max_length=amt
1210 )
1211 self._decoded_buffer.put(decoded_data)
1212 if decoded_data or flush_decoder:
1213 break
1214 data = self._raw_read(8192, read1=True)
1216 if amt is None:
1217 return self._decoded_buffer.get_all()
1218 return self._decoded_buffer.get(amt)
1220 def stream(
1221 self, amt: int | None = 2**16, decode_content: bool | None = None
1222 ) -> typing.Generator[bytes]:
1223 """
1224 A generator wrapper for the read() method. A call will block until
1225 ``amt`` bytes have been read from the connection or until the
1226 connection is closed.
1228 :param amt:
1229 How much of the content to read. The generator will return up to
1230 much data per iteration, but may return less. This is particularly
1231 likely when using compressed data. However, the empty string will
1232 never be returned.
1234 :param decode_content:
1235 If True, will attempt to decode the body based on the
1236 'content-encoding' header.
1237 """
1238 if self.chunked and self.supports_chunked_reads():
1239 yield from self.read_chunked(amt, decode_content=decode_content)
1240 else:
1241 while (
1242 not is_fp_closed(self._fp)
1243 or len(self._decoded_buffer) > 0
1244 or (self._decoder and self._decoder.has_unconsumed_tail)
1245 ):
1246 data = self.read(amt=amt, decode_content=decode_content)
1248 if data:
1249 yield data
1251 # Overrides from io.IOBase
1252 def readable(self) -> bool:
1253 return True
1255 def shutdown(self) -> None:
1256 if not self._sock_shutdown:
1257 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")
1258 if self._connection is None:
1259 raise RuntimeError(
1260 "Cannot shutdown as connection has already been released to the pool"
1261 )
1262 self._sock_shutdown(socket.SHUT_RD)
1264 def close(self) -> None:
1265 self._sock_shutdown = None
1267 if not self.closed and self._fp:
1268 self._fp.close()
1270 if self._connection:
1271 self._connection.close()
1273 if not self.auto_close:
1274 io.IOBase.close(self)
1276 @property
1277 def closed(self) -> bool:
1278 if not self.auto_close:
1279 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
1280 elif self._fp is None:
1281 return True
1282 elif hasattr(self._fp, "isclosed"):
1283 return self._fp.isclosed()
1284 elif hasattr(self._fp, "closed"):
1285 return self._fp.closed
1286 else:
1287 return True
1289 def fileno(self) -> int:
1290 if self._fp is None:
1291 raise OSError("HTTPResponse has no file to get a fileno from")
1292 elif hasattr(self._fp, "fileno"):
1293 return self._fp.fileno()
1294 else:
1295 raise OSError(
1296 "The file-like object this HTTPResponse is wrapped "
1297 "around has no file descriptor"
1298 )
1300 def flush(self) -> None:
1301 if (
1302 self._fp is not None
1303 and hasattr(self._fp, "flush")
1304 and not getattr(self._fp, "closed", False)
1305 ):
1306 return self._fp.flush()
1308 def supports_chunked_reads(self) -> bool:
1309 """
1310 Checks if the underlying file-like object looks like a
1311 :class:`http.client.HTTPResponse` object. We do this by testing for
1312 the fp attribute. If it is present we assume it returns raw chunks as
1313 processed by read_chunked().
1314 """
1315 return hasattr(self._fp, "fp")
1317 def _update_chunk_length(self) -> None:
1318 # First, we'll figure out length of a chunk and then
1319 # we'll try to read it from socket.
1320 if self.chunk_left is not None:
1321 return None
1322 line = self._fp.fp.readline() # type: ignore[union-attr]
1323 line = line.split(b";", 1)[0]
1324 try:
1325 self.chunk_left = int(line, 16)
1326 except ValueError:
1327 self.close()
1328 if line:
1329 # Invalid chunked protocol response, abort.
1330 raise InvalidChunkLength(self, line) from None
1331 else:
1332 # Truncated at start of next chunk
1333 raise ProtocolError("Response ended prematurely") from None
1335 def _handle_chunk(self, amt: int | None) -> bytes:
1336 returned_chunk = None
1337 if amt is None:
1338 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1339 returned_chunk = chunk
1340 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1341 self.chunk_left = None
1342 elif self.chunk_left is not None and amt < self.chunk_left:
1343 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1344 self.chunk_left = self.chunk_left - amt
1345 returned_chunk = value
1346 elif amt == self.chunk_left:
1347 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1348 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1349 self.chunk_left = None
1350 returned_chunk = value
1351 else: # amt > self.chunk_left
1352 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1353 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1354 self.chunk_left = None
1355 return returned_chunk # type: ignore[no-any-return]
1357 def read_chunked(
1358 self, amt: int | None = None, decode_content: bool | None = None
1359 ) -> typing.Generator[bytes]:
1360 """
1361 Similar to :meth:`HTTPResponse.read`, but with an additional
1362 parameter: ``decode_content``.
1364 :param amt:
1365 How much of the content to read. If specified, caching is skipped
1366 because it doesn't make sense to cache partial content as the full
1367 response.
1369 :param decode_content:
1370 If True, will attempt to decode the body based on the
1371 'content-encoding' header.
1372 """
1373 self._init_decoder()
1374 # FIXME: Rewrite this method and make it a class with a better structured logic.
1375 if not self.chunked:
1376 raise ResponseNotChunked(
1377 "Response is not chunked. "
1378 "Header 'transfer-encoding: chunked' is missing."
1379 )
1380 if not self.supports_chunked_reads():
1381 raise BodyNotHttplibCompatible(
1382 "Body should be http.client.HTTPResponse like. "
1383 "It should have have an fp attribute which returns raw chunks."
1384 )
1386 with self._error_catcher():
1387 # Don't bother reading the body of a HEAD request.
1388 if self._original_response and is_response_to_head(self._original_response):
1389 self._original_response.close()
1390 return None
1392 # If a response is already read and closed
1393 # then return immediately.
1394 if self._fp.fp is None: # type: ignore[union-attr]
1395 return None
1397 if amt and amt < 0:
1398 # Negative numbers and `None` should be treated the same,
1399 # but httplib handles only `None` correctly.
1400 amt = None
1402 while True:
1403 self._update_chunk_length()
1404 if self.chunk_left == 0:
1405 break
1406 chunk = self._handle_chunk(amt)
1407 decoded = self._decode(
1408 chunk,
1409 decode_content=decode_content,
1410 flush_decoder=False,
1411 max_length=amt,
1412 )
1413 if decoded:
1414 yield decoded
1416 if decode_content:
1417 # On CPython and PyPy, we should never need to flush the
1418 # decoder. However, on Jython we *might* need to, so
1419 # lets defensively do it anyway.
1420 decoded = self._flush_decoder()
1421 if decoded: # Platform-specific: Jython.
1422 yield decoded
1424 # Chunk content ends with \r\n: discard it.
1425 while self._fp is not None:
1426 line = self._fp.fp.readline()
1427 if not line:
1428 # Some sites may not end with '\r\n'.
1429 break
1430 if line == b"\r\n":
1431 break
1433 # We read everything; close the "file".
1434 if self._original_response:
1435 self._original_response.close()
1437 @property
1438 def url(self) -> str | None:
1439 """
1440 Returns the URL that was the source of this response.
1441 If the request that generated this response redirected, this method
1442 will return the final redirect location.
1443 """
1444 return self._request_url
1446 @url.setter
1447 def url(self, url: str | None) -> None:
1448 self._request_url = url
1450 def __iter__(self) -> typing.Iterator[bytes]:
1451 buffer: list[bytes] = []
1452 for chunk in self.stream(decode_content=True):
1453 if b"\n" in chunk:
1454 chunks = chunk.split(b"\n")
1455 yield b"".join(buffer) + chunks[0] + b"\n"
1456 for x in chunks[1:-1]:
1457 yield x + b"\n"
1458 if chunks[-1]:
1459 buffer = [chunks[-1]]
1460 else:
1461 buffer = []
1462 else:
1463 buffer.append(chunk)
1464 if buffer:
1465 yield b"".join(buffer)