Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/urllib3/response.py: 35%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import collections
4import io
5import json as _json
6import logging
7import socket
8import sys
9import typing
10import warnings
11import zlib
12from contextlib import contextmanager
13from http.client import HTTPMessage as _HttplibHTTPMessage
14from http.client import HTTPResponse as _HttplibHTTPResponse
15from socket import timeout as SocketTimeout
17if typing.TYPE_CHECKING:
18 from ._base_connection import BaseHTTPConnection
20try:
21 try:
22 import brotlicffi as brotli # type: ignore[import-not-found]
23 except ImportError:
24 import brotli # type: ignore[import-not-found]
25except ImportError:
26 brotli = None
28from . import util
29from ._base_connection import _TYPE_BODY
30from ._collections import HTTPHeaderDict
31from .connection import BaseSSLError, HTTPConnection, HTTPException
32from .exceptions import (
33 BodyNotHttplibCompatible,
34 DecodeError,
35 DependencyWarning,
36 HTTPError,
37 IncompleteRead,
38 InvalidChunkLength,
39 InvalidHeader,
40 ProtocolError,
41 ReadTimeoutError,
42 ResponseNotChunked,
43 SSLError,
44)
45from .util.response import is_fp_closed, is_response_to_head
46from .util.retry import Retry
48if typing.TYPE_CHECKING:
49 from .connectionpool import HTTPConnectionPool
51log = logging.getLogger(__name__)
54class ContentDecoder:
55 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
56 raise NotImplementedError()
58 @property
59 def has_unconsumed_tail(self) -> bool:
60 raise NotImplementedError()
62 def flush(self) -> bytes:
63 raise NotImplementedError()
66class DeflateDecoder(ContentDecoder):
67 def __init__(self) -> None:
68 self._first_try = True
69 self._first_try_data = b""
70 self._unfed_data = b""
71 self._obj = zlib.decompressobj()
73 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
74 data = self._unfed_data + data
75 self._unfed_data = b""
76 if not data and not self._obj.unconsumed_tail:
77 return data
78 original_max_length = max_length
79 if original_max_length < 0:
80 max_length = 0
81 elif original_max_length == 0:
82 # We should not pass 0 to the zlib decompressor because 0 is
83 # the default value that will make zlib decompress without a
84 # length limit.
85 # Data should be stored for subsequent calls.
86 self._unfed_data = data
87 return b""
89 # Subsequent calls always reuse `self._obj`. zlib requires
90 # passing the unconsumed tail if decompression is to continue.
91 if not self._first_try:
92 return self._obj.decompress(
93 self._obj.unconsumed_tail + data, max_length=max_length
94 )
96 # First call tries with RFC 1950 ZLIB format.
97 self._first_try_data += data
98 try:
99 decompressed = self._obj.decompress(data, max_length=max_length)
100 if decompressed:
101 self._first_try = False
102 self._first_try_data = b""
103 return decompressed
104 # On failure, it falls back to RFC 1951 DEFLATE format.
105 except zlib.error:
106 self._first_try = False
107 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
108 try:
109 return self.decompress(
110 self._first_try_data, max_length=original_max_length
111 )
112 finally:
113 self._first_try_data = b""
115 @property
116 def has_unconsumed_tail(self) -> bool:
117 return bool(self._unfed_data) or (
118 bool(self._obj.unconsumed_tail) and not self._first_try
119 )
121 def flush(self) -> bytes:
122 return self._obj.flush()
125class GzipDecoderState:
126 FIRST_MEMBER = 0
127 OTHER_MEMBERS = 1
128 SWALLOW_DATA = 2
131class GzipDecoder(ContentDecoder):
132 def __init__(self) -> None:
133 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
134 self._state = GzipDecoderState.FIRST_MEMBER
135 self._unconsumed_tail = b""
137 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
138 ret = bytearray()
139 if self._state == GzipDecoderState.SWALLOW_DATA:
140 return bytes(ret)
142 if max_length == 0:
143 # We should not pass 0 to the zlib decompressor because 0 is
144 # the default value that will make zlib decompress without a
145 # length limit.
146 # Data should be stored for subsequent calls.
147 self._unconsumed_tail += data
148 return b""
150 # zlib requires passing the unconsumed tail to the subsequent
151 # call if decompression is to continue.
152 data = self._unconsumed_tail + data
153 if not data and self._obj.eof:
154 return bytes(ret)
156 while True:
157 try:
158 ret += self._obj.decompress(
159 data, max_length=max(max_length - len(ret), 0)
160 )
161 except zlib.error:
162 previous_state = self._state
163 # Ignore data after the first error
164 self._state = GzipDecoderState.SWALLOW_DATA
165 self._unconsumed_tail = b""
166 if previous_state == GzipDecoderState.OTHER_MEMBERS:
167 # Allow trailing garbage acceptable in other gzip clients
168 return bytes(ret)
169 raise
171 self._unconsumed_tail = data = (
172 self._obj.unconsumed_tail or self._obj.unused_data
173 )
174 if max_length > 0 and len(ret) >= max_length:
175 break
177 if not data:
178 return bytes(ret)
179 # When the end of a gzip member is reached, a new decompressor
180 # must be created for unused (possibly future) data.
181 if self._obj.eof:
182 self._state = GzipDecoderState.OTHER_MEMBERS
183 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
185 return bytes(ret)
187 @property
188 def has_unconsumed_tail(self) -> bool:
189 return bool(self._unconsumed_tail)
191 def flush(self) -> bytes:
192 return self._obj.flush()
195if brotli is not None:
197 class BrotliDecoder(ContentDecoder):
198 # Supports both 'brotlipy' and 'Brotli' packages
199 # since they share an import name. The top branches
200 # are for 'brotlipy' and bottom branches for 'Brotli'
201 def __init__(self) -> None:
202 self._obj = brotli.Decompressor()
203 if hasattr(self._obj, "decompress"):
204 setattr(self, "_decompress", self._obj.decompress)
205 else:
206 setattr(self, "_decompress", self._obj.process)
208 # Requires Brotli >= 1.2.0 for `output_buffer_limit`.
209 def _decompress(self, data: bytes, output_buffer_limit: int = -1) -> bytes:
210 raise NotImplementedError()
212 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
213 try:
214 if max_length > 0:
215 return self._decompress(data, output_buffer_limit=max_length)
216 else:
217 return self._decompress(data)
218 except TypeError:
219 # Fallback for Brotli/brotlicffi/brotlipy versions without
220 # the `output_buffer_limit` parameter.
221 warnings.warn(
222 "Brotli >= 1.2.0 is required to prevent decompression bombs.",
223 DependencyWarning,
224 )
225 return self._decompress(data)
227 @property
228 def has_unconsumed_tail(self) -> bool:
229 try:
230 return not self._obj.can_accept_more_data()
231 except AttributeError:
232 return False
234 def flush(self) -> bytes:
235 if hasattr(self._obj, "flush"):
236 return self._obj.flush() # type: ignore[no-any-return]
237 return b""
240try:
241 if sys.version_info >= (3, 14):
242 from compression import zstd
243 else:
244 from backports import zstd
245except ImportError:
246 HAS_ZSTD = False
247else:
248 HAS_ZSTD = True
250 class ZstdDecoder(ContentDecoder):
251 def __init__(self) -> None:
252 self._obj = zstd.ZstdDecompressor()
254 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
255 if not data and not self.has_unconsumed_tail:
256 return b""
257 if self._obj.eof:
258 data = self._obj.unused_data + data
259 self._obj = zstd.ZstdDecompressor()
260 part = self._obj.decompress(data, max_length=max_length)
261 length = len(part)
262 data_parts = [part]
263 # Every loop iteration is supposed to read data from a separate frame.
264 # The loop breaks when:
265 # - enough data is read;
266 # - no more unused data is available;
267 # - end of the last read frame has not been reached (i.e.,
268 # more data has to be fed).
269 while (
270 self._obj.eof
271 and self._obj.unused_data
272 and (max_length < 0 or length < max_length)
273 ):
274 unused_data = self._obj.unused_data
275 if not self._obj.needs_input:
276 self._obj = zstd.ZstdDecompressor()
277 part = self._obj.decompress(
278 unused_data,
279 max_length=(max_length - length) if max_length > 0 else -1,
280 )
281 if part_length := len(part):
282 data_parts.append(part)
283 length += part_length
284 elif self._obj.needs_input:
285 break
286 return b"".join(data_parts)
288 @property
289 def has_unconsumed_tail(self) -> bool:
290 return not (self._obj.needs_input or self._obj.eof) or bool(
291 self._obj.unused_data
292 )
294 def flush(self) -> bytes:
295 if not self._obj.eof:
296 raise DecodeError("Zstandard data is incomplete")
297 return b""
300class MultiDecoder(ContentDecoder):
301 """
302 From RFC7231:
303 If one or more encodings have been applied to a representation, the
304 sender that applied the encodings MUST generate a Content-Encoding
305 header field that lists the content codings in the order in which
306 they were applied.
307 """
309 # Maximum allowed number of chained HTTP encodings in the
310 # Content-Encoding header.
311 max_decode_links = 5
313 def __init__(self, modes: str) -> None:
314 encodings = [m.strip() for m in modes.split(",")]
315 if len(encodings) > self.max_decode_links:
316 raise DecodeError(
317 "Too many content encodings in the chain: "
318 f"{len(encodings)} > {self.max_decode_links}"
319 )
320 self._decoders = [_get_decoder(e) for e in encodings]
322 def flush(self) -> bytes:
323 return self._decoders[0].flush()
325 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
326 if max_length <= 0:
327 for d in reversed(self._decoders):
328 data = d.decompress(data)
329 return data
331 ret = bytearray()
332 # Every while loop iteration goes through all decoders once.
333 # It exits when enough data is read or no more data can be read.
334 # It is possible that the while loop iteration does not produce
335 # any data because we retrieve up to `max_length` from every
336 # decoder, and the amount of bytes may be insufficient for the
337 # next decoder to produce enough/any output.
338 while True:
339 any_data = False
340 for d in reversed(self._decoders):
341 data = d.decompress(data, max_length=max_length - len(ret))
342 if data:
343 any_data = True
344 # We should not break when no data is returned because
345 # next decoders may produce data even with empty input.
346 ret += data
347 if not any_data or len(ret) >= max_length:
348 return bytes(ret)
349 data = b""
351 @property
352 def has_unconsumed_tail(self) -> bool:
353 return any(d.has_unconsumed_tail for d in self._decoders)
356def _get_decoder(mode: str) -> ContentDecoder:
357 if "," in mode:
358 return MultiDecoder(mode)
360 # According to RFC 9110 section 8.4.1.3, recipients should
361 # consider x-gzip equivalent to gzip
362 if mode in ("gzip", "x-gzip"):
363 return GzipDecoder()
365 if brotli is not None and mode == "br":
366 return BrotliDecoder()
368 if HAS_ZSTD and mode == "zstd":
369 return ZstdDecoder()
371 return DeflateDecoder()
374class BytesQueueBuffer:
375 """Memory-efficient bytes buffer
377 To return decoded data in read() and still follow the BufferedIOBase API, we need a
378 buffer to always return the correct amount of bytes.
380 This buffer should be filled using calls to put()
382 Our maximum memory usage is determined by the sum of the size of:
384 * self.buffer, which contains the full data
385 * the largest chunk that we will copy in get()
386 """
388 def __init__(self) -> None:
389 self.buffer: typing.Deque[bytes | memoryview[bytes]] = collections.deque()
390 self._size: int = 0
392 def __len__(self) -> int:
393 return self._size
395 def put(self, data: bytes) -> None:
396 self.buffer.append(data)
397 self._size += len(data)
399 def get(self, n: int) -> bytes:
400 if n == 0:
401 return b""
402 elif not self.buffer:
403 raise RuntimeError("buffer is empty")
404 elif n < 0:
405 raise ValueError("n should be > 0")
407 if len(self.buffer[0]) == n and isinstance(self.buffer[0], bytes):
408 self._size -= n
409 return self.buffer.popleft()
411 fetched = 0
412 ret = io.BytesIO()
413 while fetched < n:
414 remaining = n - fetched
415 chunk = self.buffer.popleft()
416 chunk_length = len(chunk)
417 if remaining < chunk_length:
418 chunk = memoryview(chunk)
419 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
420 ret.write(left_chunk)
421 self.buffer.appendleft(right_chunk)
422 self._size -= remaining
423 break
424 else:
425 ret.write(chunk)
426 self._size -= chunk_length
427 fetched += chunk_length
429 if not self.buffer:
430 break
432 return ret.getvalue()
434 def get_all(self) -> bytes:
435 buffer = self.buffer
436 if not buffer:
437 assert self._size == 0
438 return b""
439 if len(buffer) == 1:
440 result = buffer.pop()
441 if isinstance(result, memoryview):
442 result = result.tobytes()
443 else:
444 ret = io.BytesIO()
445 ret.writelines(buffer.popleft() for _ in range(len(buffer)))
446 result = ret.getvalue()
447 self._size = 0
448 return result
451class BaseHTTPResponse(io.IOBase):
452 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]
453 if brotli is not None:
454 CONTENT_DECODERS += ["br"]
455 if HAS_ZSTD:
456 CONTENT_DECODERS += ["zstd"]
457 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
459 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
460 if brotli is not None:
461 DECODER_ERROR_CLASSES += (brotli.error,)
463 if HAS_ZSTD:
464 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
466 def __init__(
467 self,
468 *,
469 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
470 status: int,
471 version: int,
472 version_string: str,
473 reason: str | None,
474 decode_content: bool,
475 request_url: str | None,
476 retries: Retry | None = None,
477 ) -> None:
478 if isinstance(headers, HTTPHeaderDict):
479 self.headers = headers
480 else:
481 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
482 self.status = status
483 self.version = version
484 self.version_string = version_string
485 self.reason = reason
486 self.decode_content = decode_content
487 self._has_decoded_content = False
488 self._request_url: str | None = request_url
489 self.retries = retries
491 self.chunked = False
492 tr_enc = self.headers.get("transfer-encoding", "").lower()
493 # Don't incur the penalty of creating a list and then discarding it
494 encodings = (enc.strip() for enc in tr_enc.split(","))
495 if "chunked" in encodings:
496 self.chunked = True
498 self._decoder: ContentDecoder | None = None
499 self.length_remaining: int | None
501 def get_redirect_location(self) -> str | None | typing.Literal[False]:
502 """
503 Should we redirect and where to?
505 :returns: Truthy redirect location string if we got a redirect status
506 code and valid location. ``None`` if redirect status and no
507 location. ``False`` if not a redirect status code.
508 """
509 if self.status in self.REDIRECT_STATUSES:
510 return self.headers.get("location")
511 return False
513 @property
514 def data(self) -> bytes:
515 raise NotImplementedError()
517 def json(self) -> typing.Any:
518 """
519 Deserializes the body of the HTTP response as a Python object.
521 The body of the HTTP response must be encoded using UTF-8, as per
522 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.
524 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to
525 your custom decoder instead.
527 If the body of the HTTP response is not decodable to UTF-8, a
528 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a
529 valid JSON document, a `json.JSONDecodeError` will be raised.
531 Read more :ref:`here <json_content>`.
533 :returns: The body of the HTTP response as a Python object.
534 """
535 data = self.data.decode("utf-8")
536 return _json.loads(data)
538 @property
539 def url(self) -> str | None:
540 raise NotImplementedError()
542 @url.setter
543 def url(self, url: str | None) -> None:
544 raise NotImplementedError()
546 @property
547 def connection(self) -> BaseHTTPConnection | None:
548 raise NotImplementedError()
550 @property
551 def retries(self) -> Retry | None:
552 return self._retries
554 @retries.setter
555 def retries(self, retries: Retry | None) -> None:
556 # Override the request_url if retries has a redirect location.
557 if retries is not None and retries.history:
558 self.url = retries.history[-1].redirect_location
559 self._retries = retries
561 def stream(
562 self, amt: int | None = 2**16, decode_content: bool | None = None
563 ) -> typing.Iterator[bytes]:
564 raise NotImplementedError()
566 def read(
567 self,
568 amt: int | None = None,
569 decode_content: bool | None = None,
570 cache_content: bool = False,
571 ) -> bytes:
572 raise NotImplementedError()
574 def read1(
575 self,
576 amt: int | None = None,
577 decode_content: bool | None = None,
578 ) -> bytes:
579 raise NotImplementedError()
581 def read_chunked(
582 self,
583 amt: int | None = None,
584 decode_content: bool | None = None,
585 ) -> typing.Iterator[bytes]:
586 raise NotImplementedError()
588 def release_conn(self) -> None:
589 raise NotImplementedError()
591 def drain_conn(self) -> None:
592 raise NotImplementedError()
594 def shutdown(self) -> None:
595 raise NotImplementedError()
597 def close(self) -> None:
598 raise NotImplementedError()
600 def _init_decoder(self) -> None:
601 """
602 Set-up the _decoder attribute if necessary.
603 """
604 # Note: content-encoding value should be case-insensitive, per RFC 7230
605 # Section 3.2
606 content_encoding = self.headers.get("content-encoding", "").lower()
607 if self._decoder is None:
608 if content_encoding in self.CONTENT_DECODERS:
609 self._decoder = _get_decoder(content_encoding)
610 elif "," in content_encoding:
611 encodings = [
612 e.strip()
613 for e in content_encoding.split(",")
614 if e.strip() in self.CONTENT_DECODERS
615 ]
616 if encodings:
617 self._decoder = _get_decoder(content_encoding)
619 def _decode(
620 self,
621 data: bytes,
622 decode_content: bool | None,
623 flush_decoder: bool,
624 max_length: int | None = None,
625 ) -> bytes:
626 """
627 Decode the data passed in and potentially flush the decoder.
628 """
629 if not decode_content:
630 if self._has_decoded_content:
631 raise RuntimeError(
632 "Calling read(decode_content=False) is not supported after "
633 "read(decode_content=True) was called."
634 )
635 return data
637 if max_length is None or flush_decoder:
638 max_length = -1
640 try:
641 if self._decoder:
642 data = self._decoder.decompress(data, max_length=max_length)
643 self._has_decoded_content = True
644 except self.DECODER_ERROR_CLASSES as e:
645 content_encoding = self.headers.get("content-encoding", "").lower()
646 raise DecodeError(
647 "Received response with content-encoding: %s, but "
648 "failed to decode it." % content_encoding,
649 e,
650 ) from e
651 if flush_decoder:
652 data += self._flush_decoder()
654 return data
656 def _flush_decoder(self) -> bytes:
657 """
658 Flushes the decoder. Should only be called if the decoder is actually
659 being used.
660 """
661 if self._decoder:
662 return self._decoder.decompress(b"") + self._decoder.flush()
663 return b""
665 # Compatibility methods for `io` module
666 def readinto(self, b: bytearray | memoryview[int]) -> int:
667 temp = self.read(len(b))
668 if len(temp) == 0:
669 return 0
670 else:
671 b[: len(temp)] = temp
672 return len(temp)
674 # Methods used by dependent libraries
675 def getheaders(self) -> HTTPHeaderDict:
676 return self.headers
678 def getheader(self, name: str, default: str | None = None) -> str | None:
679 return self.headers.get(name, default)
681 # Compatibility method for http.cookiejar
682 def info(self) -> HTTPHeaderDict:
683 return self.headers
685 def geturl(self) -> str | None:
686 return self.url
689class HTTPResponse(BaseHTTPResponse):
690 """
691 HTTP Response container.
693 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
694 loaded and decoded on-demand when the ``data`` property is accessed. This
695 class is also compatible with the Python standard library's :mod:`io`
696 module, and can hence be treated as a readable object in the context of that
697 framework.
699 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
701 :param preload_content:
702 If True, the response's body will be preloaded during construction.
704 :param decode_content:
705 If True, will attempt to decode the body based on the
706 'content-encoding' header.
708 :param original_response:
709 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
710 object, it's convenient to include the original for debug purposes. It's
711 otherwise unused.
713 :param retries:
714 The retries contains the last :class:`~urllib3.util.retry.Retry` that
715 was used during the request.
717 :param enforce_content_length:
718 Enforce content length checking. Body returned by server must match
719 value of Content-Length header, if present. Otherwise, raise error.
720 """
722 def __init__(
723 self,
724 body: _TYPE_BODY = "",
725 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
726 status: int = 0,
727 version: int = 0,
728 version_string: str = "HTTP/?",
729 reason: str | None = None,
730 preload_content: bool = True,
731 decode_content: bool = True,
732 original_response: _HttplibHTTPResponse | None = None,
733 pool: HTTPConnectionPool | None = None,
734 connection: HTTPConnection | None = None,
735 msg: _HttplibHTTPMessage | None = None,
736 retries: Retry | None = None,
737 enforce_content_length: bool = True,
738 request_method: str | None = None,
739 request_url: str | None = None,
740 auto_close: bool = True,
741 sock_shutdown: typing.Callable[[int], None] | None = None,
742 ) -> None:
743 super().__init__(
744 headers=headers,
745 status=status,
746 version=version,
747 version_string=version_string,
748 reason=reason,
749 decode_content=decode_content,
750 request_url=request_url,
751 retries=retries,
752 )
754 self.enforce_content_length = enforce_content_length
755 self.auto_close = auto_close
757 self._body = None
758 self._fp: _HttplibHTTPResponse | None = None
759 self._original_response = original_response
760 self._fp_bytes_read = 0
761 self.msg = msg
763 if body and isinstance(body, (str, bytes)):
764 self._body = body
766 self._pool = pool
767 self._connection = connection
769 if hasattr(body, "read"):
770 self._fp = body # type: ignore[assignment]
771 self._sock_shutdown = sock_shutdown
773 # Are we using the chunked-style of transfer encoding?
774 self.chunk_left: int | None = None
776 # Determine length of response
777 self.length_remaining = self._init_length(request_method)
779 # Used to return the correct amount of bytes for partial read()s
780 self._decoded_buffer = BytesQueueBuffer()
782 # If requested, preload the body.
783 if preload_content and not self._body:
784 self._body = self.read(decode_content=decode_content)
786 def release_conn(self) -> None:
787 if not self._pool or not self._connection:
788 return None
790 self._pool._put_conn(self._connection)
791 self._connection = None
793 def drain_conn(self) -> None:
794 """
795 Read and discard any remaining HTTP response data in the response connection.
797 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
798 """
799 try:
800 self.read(
801 # Do not spend resources decoding the content unless
802 # decoding has already been initiated.
803 decode_content=self._has_decoded_content,
804 )
805 except (HTTPError, OSError, BaseSSLError, HTTPException):
806 pass
808 @property
809 def data(self) -> bytes:
810 # For backwards-compat with earlier urllib3 0.4 and earlier.
811 if self._body:
812 return self._body # type: ignore[return-value]
814 if self._fp:
815 return self.read(cache_content=True)
817 return None # type: ignore[return-value]
819 @property
820 def connection(self) -> HTTPConnection | None:
821 return self._connection
823 def isclosed(self) -> bool:
824 return is_fp_closed(self._fp)
826 def tell(self) -> int:
827 """
828 Obtain the number of bytes pulled over the wire so far. May differ from
829 the amount of content returned by :meth:`HTTPResponse.read`
830 if bytes are encoded on the wire (e.g, compressed).
831 """
832 return self._fp_bytes_read
834 def _init_length(self, request_method: str | None) -> int | None:
835 """
836 Set initial length value for Response content if available.
837 """
838 length: int | None
839 content_length: str | None = self.headers.get("content-length")
841 if content_length is not None:
842 if self.chunked:
843 # This Response will fail with an IncompleteRead if it can't be
844 # received as chunked. This method falls back to attempt reading
845 # the response before raising an exception.
846 log.warning(
847 "Received response with both Content-Length and "
848 "Transfer-Encoding set. This is expressly forbidden "
849 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
850 "attempting to process response as Transfer-Encoding: "
851 "chunked."
852 )
853 return None
855 try:
856 # RFC 7230 section 3.3.2 specifies multiple content lengths can
857 # be sent in a single Content-Length header
858 # (e.g. Content-Length: 42, 42). This line ensures the values
859 # are all valid ints and that as long as the `set` length is 1,
860 # all values are the same. Otherwise, the header is invalid.
861 lengths = {int(val) for val in content_length.split(",")}
862 if len(lengths) > 1:
863 raise InvalidHeader(
864 "Content-Length contained multiple "
865 "unmatching values (%s)" % content_length
866 )
867 length = lengths.pop()
868 except ValueError:
869 length = None
870 else:
871 if length < 0:
872 length = None
874 else: # if content_length is None
875 length = None
877 # Convert status to int for comparison
878 # In some cases, httplib returns a status of "_UNKNOWN"
879 try:
880 status = int(self.status)
881 except ValueError:
882 status = 0
884 # Check for responses that shouldn't include a body
885 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
886 length = 0
888 return length
890 @contextmanager
891 def _error_catcher(self) -> typing.Generator[None]:
892 """
893 Catch low-level python exceptions, instead re-raising urllib3
894 variants, so that low-level exceptions are not leaked in the
895 high-level api.
897 On exit, release the connection back to the pool.
898 """
899 clean_exit = False
901 try:
902 try:
903 yield
905 except SocketTimeout as e:
906 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
907 # there is yet no clean way to get at it from this context.
908 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
910 except BaseSSLError as e:
911 # SSL errors related to framing/MAC get wrapped and reraised here
912 raise SSLError(e) from e
914 except IncompleteRead as e:
915 if (
916 e.expected is not None
917 and e.partial is not None
918 and e.expected == -e.partial
919 ):
920 arg = "Response may not contain content."
921 else:
922 arg = f"Connection broken: {e!r}"
923 raise ProtocolError(arg, e) from e
925 except (HTTPException, OSError) as e:
926 raise ProtocolError(f"Connection broken: {e!r}", e) from e
928 # If no exception is thrown, we should avoid cleaning up
929 # unnecessarily.
930 clean_exit = True
931 finally:
932 # If we didn't terminate cleanly, we need to throw away our
933 # connection.
934 if not clean_exit:
935 # The response may not be closed but we're not going to use it
936 # anymore so close it now to ensure that the connection is
937 # released back to the pool.
938 if self._original_response:
939 self._original_response.close()
941 # Closing the response may not actually be sufficient to close
942 # everything, so if we have a hold of the connection close that
943 # too.
944 if self._connection:
945 self._connection.close()
947 # If we hold the original response but it's closed now, we should
948 # return the connection back to the pool.
949 if self._original_response and self._original_response.isclosed():
950 self.release_conn()
952 def _fp_read(
953 self,
954 amt: int | None = None,
955 *,
956 read1: bool = False,
957 ) -> bytes:
958 """
959 Read a response with the thought that reading the number of bytes
960 larger than can fit in a 32-bit int at a time via SSL in some
961 known cases leads to an overflow error that has to be prevented
962 if `amt` or `self.length_remaining` indicate that a problem may
963 happen.
965 This happens to urllib3 injected with pyOpenSSL-backed SSL-support.
966 """
967 assert self._fp
968 c_int_max = 2**31 - 1
969 if (
970 (amt and amt > c_int_max)
971 or (
972 amt is None
973 and self.length_remaining
974 and self.length_remaining > c_int_max
975 )
976 ) and util.IS_PYOPENSSL:
977 if read1:
978 return self._fp.read1(c_int_max)
979 buffer = io.BytesIO()
980 # Besides `max_chunk_amt` being a maximum chunk size, it
981 # affects memory overhead of reading a response by this
982 # method in CPython.
983 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
984 # chunk size that does not lead to an overflow error, but
985 # 256 MiB is a compromise.
986 max_chunk_amt = 2**28
987 while amt is None or amt != 0:
988 if amt is not None:
989 chunk_amt = min(amt, max_chunk_amt)
990 amt -= chunk_amt
991 else:
992 chunk_amt = max_chunk_amt
993 data = self._fp.read(chunk_amt)
994 if not data:
995 break
996 buffer.write(data)
997 del data # to reduce peak memory usage by `max_chunk_amt`.
998 return buffer.getvalue()
999 elif read1:
1000 return self._fp.read1(amt) if amt is not None else self._fp.read1()
1001 else:
1002 # StringIO doesn't like amt=None
1003 return self._fp.read(amt) if amt is not None else self._fp.read()
1005 def _raw_read(
1006 self,
1007 amt: int | None = None,
1008 *,
1009 read1: bool = False,
1010 ) -> bytes:
1011 """
1012 Reads `amt` of bytes from the socket.
1013 """
1014 if self._fp is None:
1015 return None # type: ignore[return-value]
1017 fp_closed = getattr(self._fp, "closed", False)
1019 with self._error_catcher():
1020 data = self._fp_read(amt, read1=read1) if not fp_closed else b""
1021 if amt is not None and amt != 0 and not data:
1022 # Platform-specific: Buggy versions of Python.
1023 # Close the connection when no data is returned
1024 #
1025 # This is redundant to what httplib/http.client _should_
1026 # already do. However, versions of python released before
1027 # December 15, 2012 (http://bugs.python.org/issue16298) do
1028 # not properly close the connection in all cases. There is
1029 # no harm in redundantly calling close.
1030 self._fp.close()
1031 if (
1032 self.enforce_content_length
1033 and self.length_remaining is not None
1034 and self.length_remaining != 0
1035 ):
1036 # This is an edge case that httplib failed to cover due
1037 # to concerns of backward compatibility. We're
1038 # addressing it here to make sure IncompleteRead is
1039 # raised during streaming, so all calls with incorrect
1040 # Content-Length are caught.
1041 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
1042 elif read1 and (
1043 (amt != 0 and not data) or self.length_remaining == len(data)
1044 ):
1045 # All data has been read, but `self._fp.read1` in
1046 # CPython 3.12 and older doesn't always close
1047 # `http.client.HTTPResponse`, so we close it here.
1048 # See https://github.com/python/cpython/issues/113199
1049 self._fp.close()
1051 if data:
1052 self._fp_bytes_read += len(data)
1053 if self.length_remaining is not None:
1054 self.length_remaining -= len(data)
1055 return data
1057 def read(
1058 self,
1059 amt: int | None = None,
1060 decode_content: bool | None = None,
1061 cache_content: bool = False,
1062 ) -> bytes:
1063 """
1064 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
1065 parameters: ``decode_content`` and ``cache_content``.
1067 :param amt:
1068 How much of the content to read. If specified, caching is skipped
1069 because it doesn't make sense to cache partial content as the full
1070 response.
1072 :param decode_content:
1073 If True, will attempt to decode the body based on the
1074 'content-encoding' header.
1076 :param cache_content:
1077 If True, will save the returned data such that the same result is
1078 returned despite of the state of the underlying file object. This
1079 is useful if you want the ``.data`` property to continue working
1080 after having ``.read()`` the file object. (Overridden if ``amt`` is
1081 set.)
1082 """
1083 self._init_decoder()
1084 if decode_content is None:
1085 decode_content = self.decode_content
1087 if amt and amt < 0:
1088 # Negative numbers and `None` should be treated the same.
1089 amt = None
1090 elif amt is not None:
1091 cache_content = False
1093 if self._decoder and self._decoder.has_unconsumed_tail:
1094 decoded_data = self._decode(
1095 b"",
1096 decode_content,
1097 flush_decoder=False,
1098 max_length=amt - len(self._decoded_buffer),
1099 )
1100 self._decoded_buffer.put(decoded_data)
1101 if len(self._decoded_buffer) >= amt:
1102 return self._decoded_buffer.get(amt)
1104 data = self._raw_read(amt)
1106 flush_decoder = amt is None or (amt != 0 and not data)
1108 if (
1109 not data
1110 and len(self._decoded_buffer) == 0
1111 and not (self._decoder and self._decoder.has_unconsumed_tail)
1112 ):
1113 return data
1115 if amt is None:
1116 data = self._decode(data, decode_content, flush_decoder)
1117 if cache_content:
1118 self._body = data
1119 else:
1120 # do not waste memory on buffer when not decoding
1121 if not decode_content:
1122 if self._has_decoded_content:
1123 raise RuntimeError(
1124 "Calling read(decode_content=False) is not supported after "
1125 "read(decode_content=True) was called."
1126 )
1127 return data
1129 decoded_data = self._decode(
1130 data,
1131 decode_content,
1132 flush_decoder,
1133 max_length=amt - len(self._decoded_buffer),
1134 )
1135 self._decoded_buffer.put(decoded_data)
1137 while len(self._decoded_buffer) < amt and data:
1138 # TODO make sure to initially read enough data to get past the headers
1139 # For example, the GZ file header takes 10 bytes, we don't want to read
1140 # it one byte at a time
1141 data = self._raw_read(amt)
1142 decoded_data = self._decode(
1143 data,
1144 decode_content,
1145 flush_decoder,
1146 max_length=amt - len(self._decoded_buffer),
1147 )
1148 self._decoded_buffer.put(decoded_data)
1149 data = self._decoded_buffer.get(amt)
1151 return data
1153 def read1(
1154 self,
1155 amt: int | None = None,
1156 decode_content: bool | None = None,
1157 ) -> bytes:
1158 """
1159 Similar to ``http.client.HTTPResponse.read1`` and documented
1160 in :meth:`io.BufferedReader.read1`, but with an additional parameter:
1161 ``decode_content``.
1163 :param amt:
1164 How much of the content to read.
1166 :param decode_content:
1167 If True, will attempt to decode the body based on the
1168 'content-encoding' header.
1169 """
1170 if decode_content is None:
1171 decode_content = self.decode_content
1172 if amt and amt < 0:
1173 # Negative numbers and `None` should be treated the same.
1174 amt = None
1175 # try and respond without going to the network
1176 if self._has_decoded_content:
1177 if not decode_content:
1178 raise RuntimeError(
1179 "Calling read1(decode_content=False) is not supported after "
1180 "read1(decode_content=True) was called."
1181 )
1182 if (
1183 self._decoder
1184 and self._decoder.has_unconsumed_tail
1185 and (amt is None or len(self._decoded_buffer) < amt)
1186 ):
1187 decoded_data = self._decode(
1188 b"",
1189 decode_content,
1190 flush_decoder=False,
1191 max_length=(
1192 amt - len(self._decoded_buffer) if amt is not None else None
1193 ),
1194 )
1195 self._decoded_buffer.put(decoded_data)
1196 if len(self._decoded_buffer) > 0:
1197 if amt is None:
1198 return self._decoded_buffer.get_all()
1199 return self._decoded_buffer.get(amt)
1200 if amt == 0:
1201 return b""
1203 # FIXME, this method's type doesn't say returning None is possible
1204 data = self._raw_read(amt, read1=True)
1205 if not decode_content or data is None:
1206 return data
1208 self._init_decoder()
1209 while True:
1210 flush_decoder = not data
1211 decoded_data = self._decode(
1212 data, decode_content, flush_decoder, max_length=amt
1213 )
1214 self._decoded_buffer.put(decoded_data)
1215 if decoded_data or flush_decoder:
1216 break
1217 data = self._raw_read(8192, read1=True)
1219 if amt is None:
1220 return self._decoded_buffer.get_all()
1221 return self._decoded_buffer.get(amt)
1223 def stream(
1224 self, amt: int | None = 2**16, decode_content: bool | None = None
1225 ) -> typing.Generator[bytes]:
1226 """
1227 A generator wrapper for the read() method. A call will block until
1228 ``amt`` bytes have been read from the connection or until the
1229 connection is closed.
1231 :param amt:
1232 How much of the content to read. The generator will return up to
1233 much data per iteration, but may return less. This is particularly
1234 likely when using compressed data. However, the empty string will
1235 never be returned.
1237 :param decode_content:
1238 If True, will attempt to decode the body based on the
1239 'content-encoding' header.
1240 """
1241 if self.chunked and self.supports_chunked_reads():
1242 yield from self.read_chunked(amt, decode_content=decode_content)
1243 else:
1244 while (
1245 not is_fp_closed(self._fp)
1246 or len(self._decoded_buffer) > 0
1247 or (self._decoder and self._decoder.has_unconsumed_tail)
1248 ):
1249 data = self.read(amt=amt, decode_content=decode_content)
1251 if data:
1252 yield data
1254 # Overrides from io.IOBase
1255 def readable(self) -> bool:
1256 return True
1258 def shutdown(self) -> None:
1259 if not self._sock_shutdown:
1260 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")
1261 if self._connection is None:
1262 raise RuntimeError(
1263 "Cannot shutdown as connection has already been released to the pool"
1264 )
1265 self._sock_shutdown(socket.SHUT_RD)
1267 def close(self) -> None:
1268 self._sock_shutdown = None
1270 if not self.closed and self._fp:
1271 self._fp.close()
1273 if self._connection:
1274 self._connection.close()
1276 if not self.auto_close:
1277 io.IOBase.close(self)
1279 @property
1280 def closed(self) -> bool:
1281 if not self.auto_close:
1282 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
1283 elif self._fp is None:
1284 return True
1285 elif hasattr(self._fp, "isclosed"):
1286 return self._fp.isclosed()
1287 elif hasattr(self._fp, "closed"):
1288 return self._fp.closed
1289 else:
1290 return True
1292 def fileno(self) -> int:
1293 if self._fp is None:
1294 raise OSError("HTTPResponse has no file to get a fileno from")
1295 elif hasattr(self._fp, "fileno"):
1296 return self._fp.fileno()
1297 else:
1298 raise OSError(
1299 "The file-like object this HTTPResponse is wrapped "
1300 "around has no file descriptor"
1301 )
1303 def flush(self) -> None:
1304 if (
1305 self._fp is not None
1306 and hasattr(self._fp, "flush")
1307 and not getattr(self._fp, "closed", False)
1308 ):
1309 return self._fp.flush()
1311 def supports_chunked_reads(self) -> bool:
1312 """
1313 Checks if the underlying file-like object looks like a
1314 :class:`http.client.HTTPResponse` object. We do this by testing for
1315 the fp attribute. If it is present we assume it returns raw chunks as
1316 processed by read_chunked().
1317 """
1318 return hasattr(self._fp, "fp")
1320 def _update_chunk_length(self) -> None:
1321 # First, we'll figure out length of a chunk and then
1322 # we'll try to read it from socket.
1323 if self.chunk_left is not None:
1324 return None
1325 line = self._fp.fp.readline() # type: ignore[union-attr]
1326 line = line.split(b";", 1)[0]
1327 try:
1328 self.chunk_left = int(line, 16)
1329 except ValueError:
1330 self.close()
1331 if line:
1332 # Invalid chunked protocol response, abort.
1333 raise InvalidChunkLength(self, line) from None
1334 else:
1335 # Truncated at start of next chunk
1336 raise ProtocolError("Response ended prematurely") from None
1338 def _handle_chunk(self, amt: int | None) -> bytes:
1339 returned_chunk = None
1340 if amt is None:
1341 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1342 returned_chunk = chunk
1343 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1344 self.chunk_left = None
1345 elif self.chunk_left is not None and amt < self.chunk_left:
1346 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1347 self.chunk_left = self.chunk_left - amt
1348 returned_chunk = value
1349 elif amt == self.chunk_left:
1350 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1351 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1352 self.chunk_left = None
1353 returned_chunk = value
1354 else: # amt > self.chunk_left
1355 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1356 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1357 self.chunk_left = None
1358 return returned_chunk # type: ignore[no-any-return]
1360 def read_chunked(
1361 self, amt: int | None = None, decode_content: bool | None = None
1362 ) -> typing.Generator[bytes]:
1363 """
1364 Similar to :meth:`HTTPResponse.read`, but with an additional
1365 parameter: ``decode_content``.
1367 :param amt:
1368 How much of the content to read. If specified, caching is skipped
1369 because it doesn't make sense to cache partial content as the full
1370 response.
1372 :param decode_content:
1373 If True, will attempt to decode the body based on the
1374 'content-encoding' header.
1375 """
1376 self._init_decoder()
1377 # FIXME: Rewrite this method and make it a class with a better structured logic.
1378 if not self.chunked:
1379 raise ResponseNotChunked(
1380 "Response is not chunked. "
1381 "Header 'transfer-encoding: chunked' is missing."
1382 )
1383 if not self.supports_chunked_reads():
1384 raise BodyNotHttplibCompatible(
1385 "Body should be http.client.HTTPResponse like. "
1386 "It should have have an fp attribute which returns raw chunks."
1387 )
1389 with self._error_catcher():
1390 # Don't bother reading the body of a HEAD request.
1391 if self._original_response and is_response_to_head(self._original_response):
1392 self._original_response.close()
1393 return None
1395 # If a response is already read and closed
1396 # then return immediately.
1397 if self._fp.fp is None: # type: ignore[union-attr]
1398 return None
1400 if amt and amt < 0:
1401 # Negative numbers and `None` should be treated the same,
1402 # but httplib handles only `None` correctly.
1403 amt = None
1405 while True:
1406 # First, check if any data is left in the decoder's buffer.
1407 if self._decoder and self._decoder.has_unconsumed_tail:
1408 chunk = b""
1409 else:
1410 self._update_chunk_length()
1411 if self.chunk_left == 0:
1412 break
1413 chunk = self._handle_chunk(amt)
1414 decoded = self._decode(
1415 chunk,
1416 decode_content=decode_content,
1417 flush_decoder=False,
1418 max_length=amt,
1419 )
1420 if decoded:
1421 yield decoded
1423 if decode_content:
1424 # On CPython and PyPy, we should never need to flush the
1425 # decoder. However, on Jython we *might* need to, so
1426 # lets defensively do it anyway.
1427 decoded = self._flush_decoder()
1428 if decoded: # Platform-specific: Jython.
1429 yield decoded
1431 # Chunk content ends with \r\n: discard it.
1432 while self._fp is not None:
1433 line = self._fp.fp.readline()
1434 if not line:
1435 # Some sites may not end with '\r\n'.
1436 break
1437 if line == b"\r\n":
1438 break
1440 # We read everything; close the "file".
1441 if self._original_response:
1442 self._original_response.close()
1444 @property
1445 def url(self) -> str | None:
1446 """
1447 Returns the URL that was the source of this response.
1448 If the request that generated this response redirected, this method
1449 will return the final redirect location.
1450 """
1451 return self._request_url
1453 @url.setter
1454 def url(self, url: str | None) -> None:
1455 self._request_url = url
1457 def __iter__(self) -> typing.Iterator[bytes]:
1458 buffer: list[bytes] = []
1459 for chunk in self.stream(decode_content=True):
1460 if b"\n" in chunk:
1461 chunks = chunk.split(b"\n")
1462 yield b"".join(buffer) + chunks[0] + b"\n"
1463 for x in chunks[1:-1]:
1464 yield x + b"\n"
1465 if chunks[-1]:
1466 buffer = [chunks[-1]]
1467 else:
1468 buffer = []
1469 else:
1470 buffer.append(chunk)
1471 if buffer:
1472 yield b"".join(buffer)