Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/urllib3/response.py: 20%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import collections
4import io
5import json as _json
6import logging
7import socket
8import sys
9import typing
10import warnings
11import zlib
12from contextlib import contextmanager
13from http.client import HTTPMessage as _HttplibHTTPMessage
14from http.client import HTTPResponse as _HttplibHTTPResponse
15from socket import timeout as SocketTimeout
17if typing.TYPE_CHECKING:
18 from ._base_connection import BaseHTTPConnection
20try:
21 try:
22 import brotlicffi as brotli # type: ignore[import-not-found]
23 except ImportError:
24 import brotli # type: ignore[import-not-found]
25except ImportError:
26 brotli = None
28from . import util
29from ._base_connection import _TYPE_BODY
30from ._collections import HTTPHeaderDict
31from .connection import BaseSSLError, HTTPConnection, HTTPException
32from .exceptions import (
33 BodyNotHttplibCompatible,
34 DecodeError,
35 DependencyWarning,
36 HTTPError,
37 IncompleteRead,
38 InvalidChunkLength,
39 InvalidHeader,
40 ProtocolError,
41 ReadTimeoutError,
42 ResponseNotChunked,
43 SSLError,
44)
45from .util.response import is_fp_closed, is_response_to_head
46from .util.retry import Retry
48if typing.TYPE_CHECKING:
49 from .connectionpool import HTTPConnectionPool
51log = logging.getLogger(__name__)
53# Read in 64 KiB chunks
54_READ_CHUNK_SIZE = 2**16
57class ContentDecoder:
58 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
59 raise NotImplementedError()
61 @property
62 def has_unconsumed_tail(self) -> bool:
63 raise NotImplementedError()
65 def flush(self) -> bytes:
66 raise NotImplementedError()
69class DeflateDecoder(ContentDecoder):
70 def __init__(self) -> None:
71 self._first_try = True
72 self._first_try_data = b""
73 self._unfed_data = b""
74 self._obj = zlib.decompressobj()
76 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
77 data = self._unfed_data + data
78 self._unfed_data = b""
79 if not data and not self._obj.unconsumed_tail:
80 return data
81 original_max_length = max_length
82 if original_max_length < 0:
83 max_length = 0
84 elif original_max_length == 0:
85 # We should not pass 0 to the zlib decompressor because 0 is
86 # the default value that will make zlib decompress without a
87 # length limit.
88 # Data should be stored for subsequent calls.
89 self._unfed_data = data
90 return b""
92 # Subsequent calls always reuse `self._obj`. zlib requires
93 # passing the unconsumed tail if decompression is to continue.
94 if not self._first_try:
95 return self._obj.decompress(
96 self._obj.unconsumed_tail + data, max_length=max_length
97 )
99 # First call tries with RFC 1950 ZLIB format.
100 self._first_try_data += data
101 try:
102 decompressed = self._obj.decompress(data, max_length=max_length)
103 if decompressed:
104 self._first_try = False
105 self._first_try_data = b""
106 return decompressed
107 # On failure, it falls back to RFC 1951 DEFLATE format.
108 except zlib.error:
109 self._first_try = False
110 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
111 try:
112 return self.decompress(
113 self._first_try_data, max_length=original_max_length
114 )
115 finally:
116 self._first_try_data = b""
118 @property
119 def has_unconsumed_tail(self) -> bool:
120 return bool(self._unfed_data) or (
121 bool(self._obj.unconsumed_tail) and not self._first_try
122 )
124 def flush(self) -> bytes:
125 return self._obj.flush()
128class GzipDecoderState:
129 FIRST_MEMBER = 0
130 OTHER_MEMBERS = 1
131 SWALLOW_DATA = 2
134class GzipDecoder(ContentDecoder):
135 def __init__(self) -> None:
136 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
137 self._state = GzipDecoderState.FIRST_MEMBER
138 self._unconsumed_tail = b""
140 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
141 ret = bytearray()
142 if self._state == GzipDecoderState.SWALLOW_DATA:
143 return bytes(ret)
145 if max_length == 0:
146 # We should not pass 0 to the zlib decompressor because 0 is
147 # the default value that will make zlib decompress without a
148 # length limit.
149 # Data should be stored for subsequent calls.
150 self._unconsumed_tail += data
151 return b""
153 # zlib requires passing the unconsumed tail to the subsequent
154 # call if decompression is to continue.
155 data = self._unconsumed_tail + data
156 if not data and self._obj.eof:
157 return bytes(ret)
159 while True:
160 try:
161 ret += self._obj.decompress(
162 data, max_length=max(max_length - len(ret), 0)
163 )
164 except zlib.error:
165 previous_state = self._state
166 # Ignore data after the first error
167 self._state = GzipDecoderState.SWALLOW_DATA
168 self._unconsumed_tail = b""
169 if previous_state == GzipDecoderState.OTHER_MEMBERS:
170 # Allow trailing garbage acceptable in other gzip clients
171 return bytes(ret)
172 raise
174 self._unconsumed_tail = data = (
175 self._obj.unconsumed_tail or self._obj.unused_data
176 )
177 if max_length > 0 and len(ret) >= max_length:
178 break
180 if not data:
181 return bytes(ret)
182 # When the end of a gzip member is reached, a new decompressor
183 # must be created for unused (possibly future) data.
184 if self._obj.eof:
185 self._state = GzipDecoderState.OTHER_MEMBERS
186 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
188 return bytes(ret)
190 @property
191 def has_unconsumed_tail(self) -> bool:
192 return bool(self._unconsumed_tail)
194 def flush(self) -> bytes:
195 return self._obj.flush()
198if brotli is not None:
200 class BrotliDecoder(ContentDecoder):
201 # Supports both 'brotlipy' and 'Brotli' packages
202 # since they share an import name. The top branches
203 # are for 'brotlipy' and bottom branches for 'Brotli'
204 def __init__(self) -> None:
205 self._obj = brotli.Decompressor()
206 if hasattr(self._obj, "decompress"):
207 setattr(self, "_decompress", self._obj.decompress)
208 else:
209 setattr(self, "_decompress", self._obj.process)
211 # Requires Brotli >= 1.2.0 for `output_buffer_limit`.
212 def _decompress(self, data: bytes, output_buffer_limit: int = -1) -> bytes:
213 raise NotImplementedError()
215 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
216 try:
217 if max_length > 0:
218 return self._decompress(data, output_buffer_limit=max_length)
219 else:
220 return self._decompress(data)
221 except TypeError:
222 # Fallback for Brotli/brotlicffi/brotlipy versions without
223 # the `output_buffer_limit` parameter.
224 warnings.warn(
225 "Brotli >= 1.2.0 is required to prevent decompression bombs.",
226 DependencyWarning,
227 )
228 return self._decompress(data)
230 @property
231 def has_unconsumed_tail(self) -> bool:
232 try:
233 return not self._obj.can_accept_more_data()
234 except AttributeError:
235 return False
237 def flush(self) -> bytes:
238 if hasattr(self._obj, "flush"):
239 return self._obj.flush() # type: ignore[no-any-return]
240 return b""
243try:
244 if sys.version_info >= (3, 14):
245 from compression import zstd
246 else:
247 from backports import zstd
248except ImportError:
249 HAS_ZSTD = False
250else:
251 HAS_ZSTD = True
253 class ZstdDecoder(ContentDecoder):
254 def __init__(self) -> None:
255 self._obj = zstd.ZstdDecompressor()
257 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
258 if not data and not self.has_unconsumed_tail:
259 return b""
260 if self._obj.eof:
261 data = self._obj.unused_data + data
262 self._obj = zstd.ZstdDecompressor()
263 part = self._obj.decompress(data, max_length=max_length)
264 length = len(part)
265 data_parts = [part]
266 # Every loop iteration is supposed to read data from a separate frame.
267 # The loop breaks when:
268 # - enough data is read;
269 # - no more unused data is available;
270 # - end of the last read frame has not been reached (i.e.,
271 # more data has to be fed).
272 while (
273 self._obj.eof
274 and self._obj.unused_data
275 and (max_length < 0 or length < max_length)
276 ):
277 unused_data = self._obj.unused_data
278 if not self._obj.needs_input:
279 self._obj = zstd.ZstdDecompressor()
280 part = self._obj.decompress(
281 unused_data,
282 max_length=(max_length - length) if max_length > 0 else -1,
283 )
284 if part_length := len(part):
285 data_parts.append(part)
286 length += part_length
287 elif self._obj.needs_input:
288 break
289 return b"".join(data_parts)
291 @property
292 def has_unconsumed_tail(self) -> bool:
293 return not (self._obj.needs_input or self._obj.eof) or bool(
294 self._obj.unused_data
295 )
297 def flush(self) -> bytes:
298 if not self._obj.eof:
299 raise DecodeError("Zstandard data is incomplete")
300 return b""
303class MultiDecoder(ContentDecoder):
304 """
305 From RFC7231:
306 If one or more encodings have been applied to a representation, the
307 sender that applied the encodings MUST generate a Content-Encoding
308 header field that lists the content codings in the order in which
309 they were applied.
310 """
312 # Maximum allowed number of chained HTTP encodings in the
313 # Content-Encoding header.
314 max_decode_links = 5
316 def __init__(self, modes: str) -> None:
317 encodings = [m.strip() for m in modes.split(",")]
318 if len(encodings) > self.max_decode_links:
319 raise DecodeError(
320 "Too many content encodings in the chain: "
321 f"{len(encodings)} > {self.max_decode_links}"
322 )
323 self._decoders = [_get_decoder(e) for e in encodings]
325 def flush(self) -> bytes:
326 return self._decoders[0].flush()
328 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
329 if max_length <= 0:
330 for d in reversed(self._decoders):
331 data = d.decompress(data)
332 return data
334 ret = bytearray()
335 # Every while loop iteration goes through all decoders once.
336 # It exits when enough data is read or no more data can be read.
337 # It is possible that the while loop iteration does not produce
338 # any data because we retrieve up to `max_length` from every
339 # decoder, and the amount of bytes may be insufficient for the
340 # next decoder to produce enough/any output.
341 while True:
342 any_data = False
343 for d in reversed(self._decoders):
344 data = d.decompress(data, max_length=max_length - len(ret))
345 if data:
346 any_data = True
347 # We should not break when no data is returned because
348 # next decoders may produce data even with empty input.
349 ret += data
350 if not any_data or len(ret) >= max_length:
351 return bytes(ret)
352 data = b""
354 @property
355 def has_unconsumed_tail(self) -> bool:
356 return any(d.has_unconsumed_tail for d in self._decoders)
359def _get_decoder(mode: str) -> ContentDecoder:
360 if "," in mode:
361 return MultiDecoder(mode)
363 # According to RFC 9110 section 8.4.1.3, recipients should
364 # consider x-gzip equivalent to gzip
365 if mode in ("gzip", "x-gzip"):
366 return GzipDecoder()
368 if brotli is not None and mode == "br":
369 return BrotliDecoder()
371 if HAS_ZSTD and mode == "zstd":
372 return ZstdDecoder()
374 return DeflateDecoder()
377class BytesQueueBuffer:
378 """Memory-efficient bytes buffer
380 To return decoded data in read() and still follow the BufferedIOBase API, we need a
381 buffer to always return the correct amount of bytes.
383 This buffer should be filled using calls to put()
385 Our maximum memory usage is determined by the sum of the size of:
387 * self.buffer, which contains the full data
388 * the largest chunk that we will copy in get()
389 """
391 def __init__(self) -> None:
392 self.buffer: typing.Deque[bytes | memoryview[bytes]] = collections.deque()
393 self._size: int = 0
395 def __len__(self) -> int:
396 return self._size
398 def put(self, data: bytes) -> None:
399 self.buffer.append(data)
400 self._size += len(data)
402 def get(self, n: int) -> bytes:
403 if n == 0:
404 return b""
405 elif not self.buffer:
406 raise RuntimeError("buffer is empty")
407 elif n < 0:
408 raise ValueError("n should be > 0")
410 if len(self.buffer[0]) == n and isinstance(self.buffer[0], bytes):
411 self._size -= n
412 return self.buffer.popleft()
414 fetched = 0
415 ret = io.BytesIO()
416 while fetched < n:
417 remaining = n - fetched
418 chunk = self.buffer.popleft()
419 chunk_length = len(chunk)
420 if remaining < chunk_length:
421 chunk = memoryview(chunk)
422 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
423 ret.write(left_chunk)
424 self.buffer.appendleft(right_chunk)
425 self._size -= remaining
426 break
427 else:
428 ret.write(chunk)
429 self._size -= chunk_length
430 fetched += chunk_length
432 if not self.buffer:
433 break
435 return ret.getvalue()
437 def get_all(self) -> bytes:
438 buffer = self.buffer
439 if not buffer:
440 assert self._size == 0
441 return b""
442 if len(buffer) == 1:
443 result = buffer.pop()
444 if isinstance(result, memoryview):
445 result = result.tobytes()
446 else:
447 ret = io.BytesIO()
448 ret.writelines(buffer.popleft() for _ in range(len(buffer)))
449 result = ret.getvalue()
450 self._size = 0
451 return result
454class BaseHTTPResponse(io.IOBase):
455 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]
456 if brotli is not None:
457 CONTENT_DECODERS += ["br"]
458 if HAS_ZSTD:
459 CONTENT_DECODERS += ["zstd"]
460 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
462 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
463 if brotli is not None:
464 DECODER_ERROR_CLASSES += (brotli.error,)
466 if HAS_ZSTD:
467 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
469 def __init__(
470 self,
471 *,
472 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
473 status: int,
474 version: int,
475 version_string: str,
476 reason: str | None,
477 decode_content: bool,
478 request_url: str | None,
479 retries: Retry | None = None,
480 ) -> None:
481 if isinstance(headers, HTTPHeaderDict):
482 self.headers = headers
483 else:
484 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
485 self.status = status
486 self.version = version
487 self.version_string = version_string
488 self.reason = reason
489 self.decode_content = decode_content
490 self._has_decoded_content = False
491 self._request_url: str | None = request_url
492 self.retries = retries
494 self.chunked = False
495 tr_enc = self.headers.get("transfer-encoding", "").lower()
496 # Don't incur the penalty of creating a list and then discarding it
497 encodings = (enc.strip() for enc in tr_enc.split(","))
498 if "chunked" in encodings:
499 self.chunked = True
501 self._decoder: ContentDecoder | None = None
502 self.length_remaining: int | None
504 def get_redirect_location(self) -> str | None | typing.Literal[False]:
505 """
506 Should we redirect and where to?
508 :returns: Truthy redirect location string if we got a redirect status
509 code and valid location. ``None`` if redirect status and no
510 location. ``False`` if not a redirect status code.
511 """
512 if self.status in self.REDIRECT_STATUSES:
513 return self.headers.get("location")
514 return False
516 @property
517 def data(self) -> bytes:
518 raise NotImplementedError()
520 def json(self) -> typing.Any:
521 """
522 Deserializes the body of the HTTP response as a Python object.
524 The body of the HTTP response must be encoded using UTF-8, as per
525 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.
527 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to
528 your custom decoder instead.
530 If the body of the HTTP response is not decodable to UTF-8, a
531 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a
532 valid JSON document, a `json.JSONDecodeError` will be raised.
534 Read more :ref:`here <json_content>`.
536 :returns: The body of the HTTP response as a Python object.
537 """
538 data = self.data.decode("utf-8")
539 return _json.loads(data)
541 @property
542 def url(self) -> str | None:
543 raise NotImplementedError()
545 @url.setter
546 def url(self, url: str | None) -> None:
547 raise NotImplementedError()
549 @property
550 def connection(self) -> BaseHTTPConnection | None:
551 raise NotImplementedError()
553 @property
554 def retries(self) -> Retry | None:
555 return self._retries
557 @retries.setter
558 def retries(self, retries: Retry | None) -> None:
559 # Override the request_url if retries has a redirect location.
560 if retries is not None and retries.history:
561 self.url = retries.history[-1].redirect_location
562 self._retries = retries
564 def stream(
565 self, amt: int | None = _READ_CHUNK_SIZE, decode_content: bool | None = None
566 ) -> typing.Iterator[bytes]:
567 raise NotImplementedError()
569 def read(
570 self,
571 amt: int | None = None,
572 decode_content: bool | None = None,
573 cache_content: bool = False,
574 ) -> bytes:
575 raise NotImplementedError()
577 def read1(
578 self,
579 amt: int | None = None,
580 decode_content: bool | None = None,
581 ) -> bytes:
582 raise NotImplementedError()
584 def read_chunked(
585 self,
586 amt: int | None = None,
587 decode_content: bool | None = None,
588 ) -> typing.Iterator[bytes]:
589 raise NotImplementedError()
591 def release_conn(self) -> None:
592 raise NotImplementedError()
594 def drain_conn(self) -> None:
595 raise NotImplementedError()
597 def shutdown(self) -> None:
598 raise NotImplementedError()
600 def close(self) -> None:
601 raise NotImplementedError()
603 def _init_decoder(self) -> None:
604 """
605 Set-up the _decoder attribute if necessary.
606 """
607 # Note: content-encoding value should be case-insensitive, per RFC 7230
608 # Section 3.2
609 content_encoding = self.headers.get("content-encoding", "").lower()
610 if self._decoder is None:
611 if content_encoding in self.CONTENT_DECODERS:
612 self._decoder = _get_decoder(content_encoding)
613 elif "," in content_encoding:
614 encodings = [
615 e.strip()
616 for e in content_encoding.split(",")
617 if e.strip() in self.CONTENT_DECODERS
618 ]
619 if encodings:
620 self._decoder = _get_decoder(content_encoding)
622 def _decode(
623 self,
624 data: bytes,
625 decode_content: bool | None,
626 flush_decoder: bool,
627 max_length: int | None = None,
628 ) -> bytes:
629 """
630 Decode the data passed in and potentially flush the decoder.
631 """
632 if not decode_content:
633 if self._has_decoded_content:
634 raise RuntimeError(
635 "Calling read(decode_content=False) is not supported after "
636 "read(decode_content=True) was called."
637 )
638 return data
640 if max_length is None or flush_decoder:
641 max_length = -1
643 try:
644 if self._decoder:
645 data = self._decoder.decompress(data, max_length=max_length)
646 self._has_decoded_content = True
647 except self.DECODER_ERROR_CLASSES as e:
648 content_encoding = self.headers.get("content-encoding", "").lower()
649 raise DecodeError(
650 "Received response with content-encoding: %s, but "
651 "failed to decode it." % content_encoding,
652 e,
653 ) from e
654 if flush_decoder:
655 data += self._flush_decoder()
657 return data
659 def _flush_decoder(self) -> bytes:
660 """
661 Flushes the decoder. Should only be called if the decoder is actually
662 being used.
663 """
664 if self._decoder:
665 return self._decoder.decompress(b"") + self._decoder.flush()
666 return b""
668 # Compatibility methods for `io` module
669 def readinto(self, b: bytearray | memoryview[int]) -> int:
670 temp = self.read(len(b))
671 if len(temp) == 0:
672 return 0
673 else:
674 b[: len(temp)] = temp
675 return len(temp)
677 # Methods used by dependent libraries
678 def getheaders(self) -> HTTPHeaderDict:
679 return self.headers
681 def getheader(self, name: str, default: str | None = None) -> str | None:
682 return self.headers.get(name, default)
684 # Compatibility method for http.cookiejar
685 def info(self) -> HTTPHeaderDict:
686 return self.headers
688 def geturl(self) -> str | None:
689 return self.url
692class HTTPResponse(BaseHTTPResponse):
693 """
694 HTTP Response container.
696 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
697 loaded and decoded on-demand when the ``data`` property is accessed. This
698 class is also compatible with the Python standard library's :mod:`io`
699 module, and can hence be treated as a readable object in the context of that
700 framework.
702 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
704 :param preload_content:
705 If True, the response's body will be preloaded during construction.
707 :param decode_content:
708 If True, will attempt to decode the body based on the
709 'content-encoding' header.
711 :param original_response:
712 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
713 object, it's convenient to include the original for debug purposes. It's
714 otherwise unused.
716 :param retries:
717 The retries contains the last :class:`~urllib3.util.retry.Retry` that
718 was used during the request.
720 :param enforce_content_length:
721 Enforce content length checking. Body returned by server must match
722 value of Content-Length header, if present. Otherwise, raise error.
723 """
725 def __init__(
726 self,
727 body: _TYPE_BODY = "",
728 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
729 status: int = 0,
730 version: int = 0,
731 version_string: str = "HTTP/?",
732 reason: str | None = None,
733 preload_content: bool = True,
734 decode_content: bool = True,
735 original_response: _HttplibHTTPResponse | None = None,
736 pool: HTTPConnectionPool | None = None,
737 connection: HTTPConnection | None = None,
738 msg: _HttplibHTTPMessage | None = None,
739 retries: Retry | None = None,
740 enforce_content_length: bool = True,
741 request_method: str | None = None,
742 request_url: str | None = None,
743 auto_close: bool = True,
744 sock_shutdown: typing.Callable[[int], None] | None = None,
745 ) -> None:
746 super().__init__(
747 headers=headers,
748 status=status,
749 version=version,
750 version_string=version_string,
751 reason=reason,
752 decode_content=decode_content,
753 request_url=request_url,
754 retries=retries,
755 )
757 self.enforce_content_length = enforce_content_length
758 self.auto_close = auto_close
760 self._body = None
761 self._uncached_read_occurred = False
762 self._fp: _HttplibHTTPResponse | None = None
763 self._original_response = original_response
764 self._fp_bytes_read = 0
765 self.msg = msg
767 if body and isinstance(body, (str, bytes)):
768 self._body = body
770 self._pool = pool
771 self._connection = connection
773 if hasattr(body, "read"):
774 self._fp = body # type: ignore[assignment]
775 self._sock_shutdown = sock_shutdown
777 # Are we using the chunked-style of transfer encoding?
778 self.chunk_left: int | None = None
780 # Determine length of response
781 self.length_remaining = self._init_length(request_method)
783 # Used to return the correct amount of bytes for partial read()s
784 self._decoded_buffer = BytesQueueBuffer()
786 # If requested, preload the body.
787 if preload_content and not self._body:
788 self._body = self.read(decode_content=decode_content)
790 def release_conn(self) -> None:
791 if not self._pool or not self._connection:
792 return None
794 self._pool._put_conn(self._connection)
795 self._connection = None
797 def drain_conn(self) -> None:
798 """
799 Read and discard any remaining HTTP response data in the response connection.
801 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
802 """
803 try:
804 while self._raw_read(_READ_CHUNK_SIZE):
805 pass
806 except (HTTPError, OSError, BaseSSLError, HTTPException):
807 pass
808 if self._has_decoded_content:
809 # `_raw_read` skips decompression, so we should clean up the
810 # decoder to avoid keeping unnecessary data in memory.
811 self._decoded_buffer = BytesQueueBuffer()
812 self._decoder = None
814 @property
815 def data(self) -> bytes:
816 # For backwards-compat with earlier urllib3 0.4 and earlier.
817 if self._body:
818 return self._body # type: ignore[return-value]
820 if self._fp:
821 return self.read(cache_content=True)
823 return None # type: ignore[return-value]
825 @property
826 def connection(self) -> HTTPConnection | None:
827 return self._connection
829 def isclosed(self) -> bool:
830 return is_fp_closed(self._fp)
832 def tell(self) -> int:
833 """
834 Obtain the number of bytes pulled over the wire so far. May differ from
835 the amount of content returned by :meth:`HTTPResponse.read`
836 if bytes are encoded on the wire (e.g, compressed).
837 """
838 return self._fp_bytes_read
840 def _init_length(self, request_method: str | None) -> int | None:
841 """
842 Set initial length value for Response content if available.
843 """
844 length: int | None
845 content_length: str | None = self.headers.get("content-length")
847 if content_length is not None:
848 if self.chunked:
849 # This Response will fail with an IncompleteRead if it can't be
850 # received as chunked. This method falls back to attempt reading
851 # the response before raising an exception.
852 log.warning(
853 "Received response with both Content-Length and "
854 "Transfer-Encoding set. This is expressly forbidden "
855 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
856 "attempting to process response as Transfer-Encoding: "
857 "chunked."
858 )
859 return None
861 try:
862 # RFC 7230 section 3.3.2 specifies multiple content lengths can
863 # be sent in a single Content-Length header
864 # (e.g. Content-Length: 42, 42). This line ensures the values
865 # are all valid ints and that as long as the `set` length is 1,
866 # all values are the same. Otherwise, the header is invalid.
867 lengths = {int(val) for val in content_length.split(",")}
868 if len(lengths) > 1:
869 raise InvalidHeader(
870 "Content-Length contained multiple "
871 "unmatching values (%s)" % content_length
872 )
873 length = lengths.pop()
874 except ValueError:
875 length = None
876 else:
877 if length < 0:
878 length = None
880 else: # if content_length is None
881 length = None
883 # Convert status to int for comparison
884 # In some cases, httplib returns a status of "_UNKNOWN"
885 try:
886 status = int(self.status)
887 except ValueError:
888 status = 0
890 # Check for responses that shouldn't include a body
891 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
892 length = 0
894 return length
896 @contextmanager
897 def _error_catcher(self) -> typing.Generator[None]:
898 """
899 Catch low-level python exceptions, instead re-raising urllib3
900 variants, so that low-level exceptions are not leaked in the
901 high-level api.
903 On exit, release the connection back to the pool.
904 """
905 clean_exit = False
907 try:
908 try:
909 yield
911 except SocketTimeout as e:
912 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
913 # there is yet no clean way to get at it from this context.
914 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
916 except BaseSSLError as e:
917 # SSL errors related to framing/MAC get wrapped and reraised here
918 raise SSLError(e) from e
920 except IncompleteRead as e:
921 if (
922 e.expected is not None
923 and e.partial is not None
924 and e.expected == -e.partial
925 ):
926 arg = "Response may not contain content."
927 else:
928 arg = f"Connection broken: {e!r}"
929 raise ProtocolError(arg, e) from e
931 except (HTTPException, OSError) as e:
932 raise ProtocolError(f"Connection broken: {e!r}", e) from e
934 # If no exception is thrown, we should avoid cleaning up
935 # unnecessarily.
936 clean_exit = True
937 finally:
938 # If we didn't terminate cleanly, we need to throw away our
939 # connection.
940 if not clean_exit:
941 # The response may not be closed but we're not going to use it
942 # anymore so close it now to ensure that the connection is
943 # released back to the pool.
944 if self._original_response:
945 self._original_response.close()
947 # Closing the response may not actually be sufficient to close
948 # everything, so if we have a hold of the connection close that
949 # too.
950 if self._connection:
951 self._connection.close()
953 # If we hold the original response but it's closed now, we should
954 # return the connection back to the pool.
955 if self._original_response and self._original_response.isclosed():
956 self.release_conn()
958 def _fp_read(
959 self,
960 amt: int | None = None,
961 *,
962 read1: bool = False,
963 ) -> bytes:
964 """
965 Read a response with the thought that reading the number of bytes
966 larger than can fit in a 32-bit int at a time via SSL in some
967 known cases leads to an overflow error that has to be prevented
968 if `amt` or `self.length_remaining` indicate that a problem may
969 happen.
971 This happens to urllib3 injected with pyOpenSSL-backed SSL-support.
972 """
973 assert self._fp
974 c_int_max = 2**31 - 1
975 if (
976 (amt and amt > c_int_max)
977 or (
978 amt is None
979 and self.length_remaining
980 and self.length_remaining > c_int_max
981 )
982 ) and util.IS_PYOPENSSL:
983 if read1:
984 return self._fp.read1(c_int_max)
985 buffer = io.BytesIO()
986 # Besides `max_chunk_amt` being a maximum chunk size, it
987 # affects memory overhead of reading a response by this
988 # method in CPython.
989 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
990 # chunk size that does not lead to an overflow error, but
991 # 256 MiB is a compromise.
992 max_chunk_amt = 2**28
993 while amt is None or amt != 0:
994 if amt is not None:
995 chunk_amt = min(amt, max_chunk_amt)
996 amt -= chunk_amt
997 else:
998 chunk_amt = max_chunk_amt
999 data = self._fp.read(chunk_amt)
1000 if not data:
1001 break
1002 buffer.write(data)
1003 del data # to reduce peak memory usage by `max_chunk_amt`.
1004 return buffer.getvalue()
1005 elif read1:
1006 return self._fp.read1(amt) if amt is not None else self._fp.read1()
1007 else:
1008 # StringIO doesn't like amt=None
1009 return self._fp.read(amt) if amt is not None else self._fp.read()
1011 def _raw_read(
1012 self,
1013 amt: int | None = None,
1014 *,
1015 read1: bool = False,
1016 ) -> bytes:
1017 """
1018 Reads `amt` of bytes from the socket.
1019 """
1020 if self._fp is None:
1021 return None # type: ignore[return-value]
1023 fp_closed = getattr(self._fp, "closed", False)
1025 with self._error_catcher():
1026 data = self._fp_read(amt, read1=read1) if not fp_closed else b""
1027 if amt is not None and amt != 0 and not data:
1028 # Platform-specific: Buggy versions of Python.
1029 # Close the connection when no data is returned
1030 #
1031 # This is redundant to what httplib/http.client _should_
1032 # already do. However, versions of python released before
1033 # December 15, 2012 (http://bugs.python.org/issue16298) do
1034 # not properly close the connection in all cases. There is
1035 # no harm in redundantly calling close.
1036 self._fp.close()
1037 if (
1038 self.enforce_content_length
1039 and self.length_remaining is not None
1040 and self.length_remaining != 0
1041 ):
1042 # This is an edge case that httplib failed to cover due
1043 # to concerns of backward compatibility. We're
1044 # addressing it here to make sure IncompleteRead is
1045 # raised during streaming, so all calls with incorrect
1046 # Content-Length are caught.
1047 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
1048 elif read1 and (
1049 (amt != 0 and not data) or self.length_remaining == len(data)
1050 ):
1051 # All data has been read, but `self._fp.read1` in
1052 # CPython 3.12 and older doesn't always close
1053 # `http.client.HTTPResponse`, so we close it here.
1054 # See https://github.com/python/cpython/issues/113199
1055 self._fp.close()
1057 if data:
1058 self._fp_bytes_read += len(data)
1059 if self.length_remaining is not None:
1060 self.length_remaining -= len(data)
1061 return data
1063 def read(
1064 self,
1065 amt: int | None = None,
1066 decode_content: bool | None = None,
1067 cache_content: bool = False,
1068 ) -> bytes:
1069 """
1070 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
1071 parameters: ``decode_content`` and ``cache_content``.
1073 :param amt:
1074 How much of the content to read. If specified, caching is skipped
1075 because it doesn't make sense to cache partial content as the full
1076 response.
1078 :param decode_content:
1079 If True, will attempt to decode the body based on the
1080 'content-encoding' header.
1082 :param cache_content:
1083 If True, will save the returned data such that the same result is
1084 returned despite of the state of the underlying file object. This
1085 is useful if you want the ``.data`` property to continue working
1086 after having ``.read()`` the file object. (Overridden if ``amt`` is
1087 set.)
1088 """
1089 self._init_decoder()
1090 if decode_content is None:
1091 decode_content = self.decode_content
1093 if amt and amt < 0:
1094 # Negative numbers and `None` should be treated the same.
1095 amt = None
1096 elif amt is not None:
1097 cache_content = False
1099 if (
1100 self._decoder
1101 and self._decoder.has_unconsumed_tail
1102 and len(self._decoded_buffer) < amt
1103 ):
1104 decoded_data = self._decode(
1105 b"",
1106 decode_content,
1107 flush_decoder=False,
1108 max_length=amt - len(self._decoded_buffer),
1109 )
1110 self._decoded_buffer.put(decoded_data)
1111 if len(self._decoded_buffer) >= amt:
1112 return self._decoded_buffer.get(amt)
1114 data = self._raw_read(amt)
1115 if not cache_content:
1116 self._uncached_read_occurred = True
1118 flush_decoder = amt is None or (amt != 0 and not data)
1120 if (
1121 not data
1122 and len(self._decoded_buffer) == 0
1123 and not (self._decoder and self._decoder.has_unconsumed_tail)
1124 ):
1125 return data
1127 if amt is None:
1128 data = self._decode(data, decode_content, flush_decoder)
1129 # It's possible that there is buffered decoded data after a
1130 # partial read.
1131 if decode_content and len(self._decoded_buffer) > 0:
1132 self._decoded_buffer.put(data)
1133 data = self._decoded_buffer.get_all()
1135 if cache_content and not self._uncached_read_occurred:
1136 self._body = data
1137 else:
1138 # do not waste memory on buffer when not decoding
1139 if not decode_content:
1140 if self._has_decoded_content:
1141 raise RuntimeError(
1142 "Calling read(decode_content=False) is not supported after "
1143 "read(decode_content=True) was called."
1144 )
1145 return data
1147 decoded_data = self._decode(
1148 data,
1149 decode_content,
1150 flush_decoder,
1151 max_length=amt - len(self._decoded_buffer),
1152 )
1153 self._decoded_buffer.put(decoded_data)
1155 while len(self._decoded_buffer) < amt and data:
1156 # TODO make sure to initially read enough data to get past the headers
1157 # For example, the GZ file header takes 10 bytes, we don't want to read
1158 # it one byte at a time
1159 data = self._raw_read(amt)
1160 decoded_data = self._decode(
1161 data,
1162 decode_content,
1163 flush_decoder,
1164 max_length=amt - len(self._decoded_buffer),
1165 )
1166 self._decoded_buffer.put(decoded_data)
1167 data = self._decoded_buffer.get(amt)
1169 return data
1171 def read1(
1172 self,
1173 amt: int | None = None,
1174 decode_content: bool | None = None,
1175 ) -> bytes:
1176 """
1177 Similar to ``http.client.HTTPResponse.read1`` and documented
1178 in :meth:`io.BufferedReader.read1`, but with an additional parameter:
1179 ``decode_content``.
1181 :param amt:
1182 How much of the content to read.
1184 :param decode_content:
1185 If True, will attempt to decode the body based on the
1186 'content-encoding' header.
1187 """
1188 if decode_content is None:
1189 decode_content = self.decode_content
1190 if amt and amt < 0:
1191 # Negative numbers and `None` should be treated the same.
1192 amt = None
1193 # try and respond without going to the network
1194 if self._has_decoded_content:
1195 if not decode_content:
1196 raise RuntimeError(
1197 "Calling read1(decode_content=False) is not supported after "
1198 "read1(decode_content=True) was called."
1199 )
1200 if (
1201 self._decoder
1202 and self._decoder.has_unconsumed_tail
1203 and (amt is None or len(self._decoded_buffer) < amt)
1204 ):
1205 decoded_data = self._decode(
1206 b"",
1207 decode_content,
1208 flush_decoder=False,
1209 max_length=(
1210 amt - len(self._decoded_buffer) if amt is not None else None
1211 ),
1212 )
1213 self._decoded_buffer.put(decoded_data)
1214 if len(self._decoded_buffer) > 0:
1215 if amt is None:
1216 return self._decoded_buffer.get_all()
1217 return self._decoded_buffer.get(amt)
1218 if amt == 0:
1219 return b""
1221 # FIXME, this method's type doesn't say returning None is possible
1222 data = self._raw_read(amt, read1=True)
1223 self._uncached_read_occurred = True
1224 if not decode_content or data is None:
1225 return data
1227 self._init_decoder()
1228 while True:
1229 flush_decoder = not data
1230 decoded_data = self._decode(
1231 data, decode_content, flush_decoder, max_length=amt
1232 )
1233 self._decoded_buffer.put(decoded_data)
1234 if decoded_data or flush_decoder:
1235 break
1236 data = self._raw_read(8192, read1=True)
1238 if amt is None:
1239 return self._decoded_buffer.get_all()
1240 return self._decoded_buffer.get(amt)
1242 def stream(
1243 self, amt: int | None = _READ_CHUNK_SIZE, decode_content: bool | None = None
1244 ) -> typing.Generator[bytes]:
1245 """
1246 A generator wrapper for the read() method. A call will block until
1247 ``amt`` bytes have been read from the connection or until the
1248 connection is closed.
1250 :param amt:
1251 How much of the content to read. The generator will return up to
1252 much data per iteration, but may return less. This is particularly
1253 likely when using compressed data. However, the empty string will
1254 never be returned.
1256 :param decode_content:
1257 If True, will attempt to decode the body based on the
1258 'content-encoding' header.
1259 """
1260 if amt == 0:
1261 return
1263 if self.chunked and self.supports_chunked_reads():
1264 yield from self.read_chunked(amt, decode_content=decode_content)
1265 else:
1266 while (
1267 not is_fp_closed(self._fp)
1268 or len(self._decoded_buffer) > 0
1269 or (self._decoder and self._decoder.has_unconsumed_tail)
1270 ):
1271 data = self.read(amt=amt, decode_content=decode_content)
1273 if data:
1274 yield data
1276 # Overrides from io.IOBase
1277 def readable(self) -> bool:
1278 return True
1280 def shutdown(self) -> None:
1281 if not self._sock_shutdown:
1282 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")
1283 if self._connection is None:
1284 raise RuntimeError(
1285 "Cannot shutdown as connection has already been released to the pool"
1286 )
1287 self._sock_shutdown(socket.SHUT_RD)
1289 def close(self) -> None:
1290 self._sock_shutdown = None
1292 if not self.closed and self._fp:
1293 self._fp.close()
1295 if self._connection:
1296 self._connection.close()
1298 if not self.auto_close:
1299 io.IOBase.close(self)
1301 @property
1302 def closed(self) -> bool:
1303 if not self.auto_close:
1304 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
1305 elif self._fp is None:
1306 return True
1307 elif hasattr(self._fp, "isclosed"):
1308 return self._fp.isclosed()
1309 elif hasattr(self._fp, "closed"):
1310 return self._fp.closed
1311 else:
1312 return True
1314 def fileno(self) -> int:
1315 if self._fp is None:
1316 raise OSError("HTTPResponse has no file to get a fileno from")
1317 elif hasattr(self._fp, "fileno"):
1318 return self._fp.fileno()
1319 else:
1320 raise OSError(
1321 "The file-like object this HTTPResponse is wrapped "
1322 "around has no file descriptor"
1323 )
1325 def flush(self) -> None:
1326 if (
1327 self._fp is not None
1328 and hasattr(self._fp, "flush")
1329 and not getattr(self._fp, "closed", False)
1330 ):
1331 return self._fp.flush()
1333 def supports_chunked_reads(self) -> bool:
1334 """
1335 Checks if the underlying file-like object looks like a
1336 :class:`http.client.HTTPResponse` object. We do this by testing for
1337 the fp attribute. If it is present we assume it returns raw chunks as
1338 processed by read_chunked().
1339 """
1340 return hasattr(self._fp, "fp")
1342 def _update_chunk_length(self) -> None:
1343 # First, we'll figure out length of a chunk and then
1344 # we'll try to read it from socket.
1345 if self.chunk_left is not None:
1346 return None
1347 line = self._fp.fp.readline() # type: ignore[union-attr]
1348 line = line.split(b";", 1)[0]
1349 try:
1350 self.chunk_left = int(line, 16)
1351 except ValueError:
1352 self.close()
1353 if line:
1354 # Invalid chunked protocol response, abort.
1355 raise InvalidChunkLength(self, line) from None
1356 else:
1357 # Truncated at start of next chunk
1358 raise ProtocolError("Response ended prematurely") from None
1360 def _handle_chunk(self, amt: int | None) -> bytes:
1361 returned_chunk = None
1362 if amt is None:
1363 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1364 returned_chunk = chunk
1365 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1366 self.chunk_left = None
1367 elif self.chunk_left is not None and amt < self.chunk_left:
1368 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1369 self.chunk_left = self.chunk_left - amt
1370 returned_chunk = value
1371 elif amt == self.chunk_left:
1372 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1373 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1374 self.chunk_left = None
1375 returned_chunk = value
1376 else: # amt > self.chunk_left
1377 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1378 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1379 self.chunk_left = None
1380 return returned_chunk # type: ignore[no-any-return]
1382 def read_chunked(
1383 self, amt: int | None = None, decode_content: bool | None = None
1384 ) -> typing.Generator[bytes]:
1385 """
1386 Similar to :meth:`HTTPResponse.read`, but with an additional
1387 parameter: ``decode_content``.
1389 :param amt:
1390 How much of the content to read. If specified, caching is skipped
1391 because it doesn't make sense to cache partial content as the full
1392 response.
1394 :param decode_content:
1395 If True, will attempt to decode the body based on the
1396 'content-encoding' header.
1397 """
1398 self._init_decoder()
1399 # FIXME: Rewrite this method and make it a class with a better structured logic.
1400 if not self.chunked:
1401 raise ResponseNotChunked(
1402 "Response is not chunked. "
1403 "Header 'transfer-encoding: chunked' is missing."
1404 )
1405 if not self.supports_chunked_reads():
1406 raise BodyNotHttplibCompatible(
1407 "Body should be http.client.HTTPResponse like. "
1408 "It should have have an fp attribute which returns raw chunks."
1409 )
1411 with self._error_catcher():
1412 # Don't bother reading the body of a HEAD request.
1413 if self._original_response and is_response_to_head(self._original_response):
1414 self._original_response.close()
1415 return None
1417 # If a response is already read and closed
1418 # then return immediately.
1419 if self._fp.fp is None: # type: ignore[union-attr]
1420 return None
1422 if amt == 0:
1423 return
1424 elif amt and amt < 0:
1425 # Negative numbers and `None` should be treated the same,
1426 # but httplib handles only `None` correctly.
1427 amt = None
1429 while True:
1430 # First, check if any data is left in the decoder's buffer.
1431 if self._decoder and self._decoder.has_unconsumed_tail:
1432 chunk = b""
1433 else:
1434 self._update_chunk_length()
1435 self._uncached_read_occurred = True
1436 if self.chunk_left == 0:
1437 break
1438 chunk = self._handle_chunk(amt)
1439 decoded = self._decode(
1440 chunk,
1441 decode_content=decode_content,
1442 flush_decoder=False,
1443 max_length=amt,
1444 )
1445 if decoded:
1446 yield decoded
1448 if decode_content:
1449 # On CPython and PyPy, we should never need to flush the
1450 # decoder. However, on Jython we *might* need to, so
1451 # lets defensively do it anyway.
1452 decoded = self._flush_decoder()
1453 if decoded: # Platform-specific: Jython.
1454 yield decoded
1456 # Chunk content ends with \r\n: discard it.
1457 while self._fp is not None:
1458 line = self._fp.fp.readline()
1459 if not line:
1460 # Some sites may not end with '\r\n'.
1461 break
1462 if line == b"\r\n":
1463 break
1465 # We read everything; close the "file".
1466 if self._original_response:
1467 self._original_response.close()
1469 @property
1470 def url(self) -> str | None:
1471 """
1472 Returns the URL that was the source of this response.
1473 If the request that generated this response redirected, this method
1474 will return the final redirect location.
1475 """
1476 return self._request_url
1478 @url.setter
1479 def url(self, url: str | None) -> None:
1480 self._request_url = url
1482 def __iter__(self) -> typing.Iterator[bytes]:
1483 buffer: list[bytes] = []
1484 for chunk in self.stream(decode_content=True):
1485 if b"\n" in chunk:
1486 chunks = chunk.split(b"\n")
1487 yield b"".join(buffer) + chunks[0] + b"\n"
1488 for x in chunks[1:-1]:
1489 yield x + b"\n"
1490 if chunks[-1]:
1491 buffer = [chunks[-1]]
1492 else:
1493 buffer = []
1494 else:
1495 buffer.append(chunk)
1496 if buffer:
1497 yield b"".join(buffer)