Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/urllib3/response.py: 20%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import collections
4import io
5import json as _json
6import logging
7import socket
8import sys
9import typing
10import warnings
11import zlib
12from contextlib import contextmanager
13from http.client import HTTPMessage as _HttplibHTTPMessage
14from http.client import HTTPResponse as _HttplibHTTPResponse
15from socket import timeout as SocketTimeout
17if typing.TYPE_CHECKING:
18 from ._base_connection import BaseHTTPConnection
20try:
21 try:
22 import brotlicffi as brotli # type: ignore[import-not-found]
23 except ImportError:
24 import brotli # type: ignore[import-not-found]
25except ImportError:
26 brotli = None
28from . import util
29from ._base_connection import _TYPE_BODY
30from ._collections import HTTPHeaderDict
31from .connection import BaseSSLError, HTTPConnection, HTTPException
32from .exceptions import (
33 BodyNotHttplibCompatible,
34 DecodeError,
35 DependencyWarning,
36 HTTPError,
37 IncompleteRead,
38 InvalidChunkLength,
39 InvalidHeader,
40 ProtocolError,
41 ReadTimeoutError,
42 ResponseNotChunked,
43 SSLError,
44)
45from .util.response import is_fp_closed, is_response_to_head
46from .util.retry import Retry
48if typing.TYPE_CHECKING:
49 from .connectionpool import HTTPConnectionPool
51log = logging.getLogger(__name__)
54class ContentDecoder:
55 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
56 raise NotImplementedError()
58 @property
59 def has_unconsumed_tail(self) -> bool:
60 raise NotImplementedError()
62 def flush(self) -> bytes:
63 raise NotImplementedError()
66class DeflateDecoder(ContentDecoder):
67 def __init__(self) -> None:
68 self._first_try = True
69 self._first_try_data = b""
70 self._unfed_data = b""
71 self._obj = zlib.decompressobj()
73 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
74 data = self._unfed_data + data
75 self._unfed_data = b""
76 if not data and not self._obj.unconsumed_tail:
77 return data
78 original_max_length = max_length
79 if original_max_length < 0:
80 max_length = 0
81 elif original_max_length == 0:
82 # We should not pass 0 to the zlib decompressor because 0 is
83 # the default value that will make zlib decompress without a
84 # length limit.
85 # Data should be stored for subsequent calls.
86 self._unfed_data = data
87 return b""
89 # Subsequent calls always reuse `self._obj`. zlib requires
90 # passing the unconsumed tail if decompression is to continue.
91 if not self._first_try:
92 return self._obj.decompress(
93 self._obj.unconsumed_tail + data, max_length=max_length
94 )
96 # First call tries with RFC 1950 ZLIB format.
97 self._first_try_data += data
98 try:
99 decompressed = self._obj.decompress(data, max_length=max_length)
100 if decompressed:
101 self._first_try = False
102 self._first_try_data = b""
103 return decompressed
104 # On failure, it falls back to RFC 1951 DEFLATE format.
105 except zlib.error:
106 self._first_try = False
107 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
108 try:
109 return self.decompress(
110 self._first_try_data, max_length=original_max_length
111 )
112 finally:
113 self._first_try_data = b""
115 @property
116 def has_unconsumed_tail(self) -> bool:
117 return bool(self._unfed_data) or (
118 bool(self._obj.unconsumed_tail) and not self._first_try
119 )
121 def flush(self) -> bytes:
122 return self._obj.flush()
125class GzipDecoderState:
126 FIRST_MEMBER = 0
127 OTHER_MEMBERS = 1
128 SWALLOW_DATA = 2
131class GzipDecoder(ContentDecoder):
132 def __init__(self) -> None:
133 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
134 self._state = GzipDecoderState.FIRST_MEMBER
135 self._unconsumed_tail = b""
137 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
138 ret = bytearray()
139 if self._state == GzipDecoderState.SWALLOW_DATA:
140 return bytes(ret)
142 if max_length == 0:
143 # We should not pass 0 to the zlib decompressor because 0 is
144 # the default value that will make zlib decompress without a
145 # length limit.
146 # Data should be stored for subsequent calls.
147 self._unconsumed_tail += data
148 return b""
150 # zlib requires passing the unconsumed tail to the subsequent
151 # call if decompression is to continue.
152 data = self._unconsumed_tail + data
153 if not data and self._obj.eof:
154 return bytes(ret)
156 while True:
157 try:
158 ret += self._obj.decompress(
159 data, max_length=max(max_length - len(ret), 0)
160 )
161 except zlib.error:
162 previous_state = self._state
163 # Ignore data after the first error
164 self._state = GzipDecoderState.SWALLOW_DATA
165 self._unconsumed_tail = b""
166 if previous_state == GzipDecoderState.OTHER_MEMBERS:
167 # Allow trailing garbage acceptable in other gzip clients
168 return bytes(ret)
169 raise
171 self._unconsumed_tail = data = (
172 self._obj.unconsumed_tail or self._obj.unused_data
173 )
174 if max_length > 0 and len(ret) >= max_length:
175 break
177 if not data:
178 return bytes(ret)
179 # When the end of a gzip member is reached, a new decompressor
180 # must be created for unused (possibly future) data.
181 if self._obj.eof:
182 self._state = GzipDecoderState.OTHER_MEMBERS
183 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
185 return bytes(ret)
187 @property
188 def has_unconsumed_tail(self) -> bool:
189 return bool(self._unconsumed_tail)
191 def flush(self) -> bytes:
192 return self._obj.flush()
195if brotli is not None:
197 class BrotliDecoder(ContentDecoder):
198 # Supports both 'brotlipy' and 'Brotli' packages
199 # since they share an import name. The top branches
200 # are for 'brotlipy' and bottom branches for 'Brotli'
201 def __init__(self) -> None:
202 self._obj = brotli.Decompressor()
203 if hasattr(self._obj, "decompress"):
204 setattr(self, "_decompress", self._obj.decompress)
205 else:
206 setattr(self, "_decompress", self._obj.process)
208 # Requires Brotli >= 1.2.0 for `output_buffer_limit`.
209 def _decompress(self, data: bytes, output_buffer_limit: int = -1) -> bytes:
210 raise NotImplementedError()
212 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
213 try:
214 if max_length > 0:
215 return self._decompress(data, output_buffer_limit=max_length)
216 else:
217 return self._decompress(data)
218 except TypeError:
219 # Fallback for Brotli/brotlicffi/brotlipy versions without
220 # the `output_buffer_limit` parameter.
221 warnings.warn(
222 "Brotli >= 1.2.0 is required to prevent decompression bombs.",
223 DependencyWarning,
224 )
225 return self._decompress(data)
227 @property
228 def has_unconsumed_tail(self) -> bool:
229 try:
230 return not self._obj.can_accept_more_data()
231 except AttributeError:
232 return False
234 def flush(self) -> bytes:
235 if hasattr(self._obj, "flush"):
236 return self._obj.flush() # type: ignore[no-any-return]
237 return b""
240try:
241 if sys.version_info >= (3, 14):
242 from compression import zstd
243 else:
244 from backports import zstd
245except ImportError:
246 HAS_ZSTD = False
247else:
248 HAS_ZSTD = True
250 class ZstdDecoder(ContentDecoder):
251 def __init__(self) -> None:
252 self._obj = zstd.ZstdDecompressor()
254 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
255 if not data and not self.has_unconsumed_tail:
256 return b""
257 if self._obj.eof:
258 data = self._obj.unused_data + data
259 self._obj = zstd.ZstdDecompressor()
260 part = self._obj.decompress(data, max_length=max_length)
261 length = len(part)
262 data_parts = [part]
263 # Every loop iteration is supposed to read data from a separate frame.
264 # The loop breaks when:
265 # - enough data is read;
266 # - no more unused data is available;
267 # - end of the last read frame has not been reached (i.e.,
268 # more data has to be fed).
269 while (
270 self._obj.eof
271 and self._obj.unused_data
272 and (max_length < 0 or length < max_length)
273 ):
274 unused_data = self._obj.unused_data
275 if not self._obj.needs_input:
276 self._obj = zstd.ZstdDecompressor()
277 part = self._obj.decompress(
278 unused_data,
279 max_length=(max_length - length) if max_length > 0 else -1,
280 )
281 if part_length := len(part):
282 data_parts.append(part)
283 length += part_length
284 elif self._obj.needs_input:
285 break
286 return b"".join(data_parts)
288 @property
289 def has_unconsumed_tail(self) -> bool:
290 return not (self._obj.needs_input or self._obj.eof) or bool(
291 self._obj.unused_data
292 )
294 def flush(self) -> bytes:
295 if not self._obj.eof:
296 raise DecodeError("Zstandard data is incomplete")
297 return b""
300class MultiDecoder(ContentDecoder):
301 """
302 From RFC7231:
303 If one or more encodings have been applied to a representation, the
304 sender that applied the encodings MUST generate a Content-Encoding
305 header field that lists the content codings in the order in which
306 they were applied.
307 """
309 # Maximum allowed number of chained HTTP encodings in the
310 # Content-Encoding header.
311 max_decode_links = 5
313 def __init__(self, modes: str) -> None:
314 encodings = [m.strip() for m in modes.split(",")]
315 if len(encodings) > self.max_decode_links:
316 raise DecodeError(
317 "Too many content encodings in the chain: "
318 f"{len(encodings)} > {self.max_decode_links}"
319 )
320 self._decoders = [_get_decoder(e) for e in encodings]
322 def flush(self) -> bytes:
323 return self._decoders[0].flush()
325 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
326 if max_length <= 0:
327 for d in reversed(self._decoders):
328 data = d.decompress(data)
329 return data
331 ret = bytearray()
332 # Every while loop iteration goes through all decoders once.
333 # It exits when enough data is read or no more data can be read.
334 # It is possible that the while loop iteration does not produce
335 # any data because we retrieve up to `max_length` from every
336 # decoder, and the amount of bytes may be insufficient for the
337 # next decoder to produce enough/any output.
338 while True:
339 any_data = False
340 for d in reversed(self._decoders):
341 data = d.decompress(data, max_length=max_length - len(ret))
342 if data:
343 any_data = True
344 # We should not break when no data is returned because
345 # next decoders may produce data even with empty input.
346 ret += data
347 if not any_data or len(ret) >= max_length:
348 return bytes(ret)
349 data = b""
351 @property
352 def has_unconsumed_tail(self) -> bool:
353 return any(d.has_unconsumed_tail for d in self._decoders)
356def _get_decoder(mode: str) -> ContentDecoder:
357 if "," in mode:
358 return MultiDecoder(mode)
360 # According to RFC 9110 section 8.4.1.3, recipients should
361 # consider x-gzip equivalent to gzip
362 if mode in ("gzip", "x-gzip"):
363 return GzipDecoder()
365 if brotli is not None and mode == "br":
366 return BrotliDecoder()
368 if HAS_ZSTD and mode == "zstd":
369 return ZstdDecoder()
371 return DeflateDecoder()
374class BytesQueueBuffer:
375 """Memory-efficient bytes buffer
377 To return decoded data in read() and still follow the BufferedIOBase API, we need a
378 buffer to always return the correct amount of bytes.
380 This buffer should be filled using calls to put()
382 Our maximum memory usage is determined by the sum of the size of:
384 * self.buffer, which contains the full data
385 * the largest chunk that we will copy in get()
386 """
388 def __init__(self) -> None:
389 self.buffer: typing.Deque[bytes | memoryview[bytes]] = collections.deque()
390 self._size: int = 0
392 def __len__(self) -> int:
393 return self._size
395 def put(self, data: bytes) -> None:
396 self.buffer.append(data)
397 self._size += len(data)
399 def get(self, n: int) -> bytes:
400 if n == 0:
401 return b""
402 elif not self.buffer:
403 raise RuntimeError("buffer is empty")
404 elif n < 0:
405 raise ValueError("n should be > 0")
407 if len(self.buffer[0]) == n and isinstance(self.buffer[0], bytes):
408 self._size -= n
409 return self.buffer.popleft()
411 fetched = 0
412 ret = io.BytesIO()
413 while fetched < n:
414 remaining = n - fetched
415 chunk = self.buffer.popleft()
416 chunk_length = len(chunk)
417 if remaining < chunk_length:
418 chunk = memoryview(chunk)
419 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
420 ret.write(left_chunk)
421 self.buffer.appendleft(right_chunk)
422 self._size -= remaining
423 break
424 else:
425 ret.write(chunk)
426 self._size -= chunk_length
427 fetched += chunk_length
429 if not self.buffer:
430 break
432 return ret.getvalue()
434 def get_all(self) -> bytes:
435 buffer = self.buffer
436 if not buffer:
437 assert self._size == 0
438 return b""
439 if len(buffer) == 1:
440 result = buffer.pop()
441 if isinstance(result, memoryview):
442 result = result.tobytes()
443 else:
444 ret = io.BytesIO()
445 ret.writelines(buffer.popleft() for _ in range(len(buffer)))
446 result = ret.getvalue()
447 self._size = 0
448 return result
451class BaseHTTPResponse(io.IOBase):
452 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]
453 if brotli is not None:
454 CONTENT_DECODERS += ["br"]
455 if HAS_ZSTD:
456 CONTENT_DECODERS += ["zstd"]
457 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
459 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
460 if brotli is not None:
461 DECODER_ERROR_CLASSES += (brotli.error,)
463 if HAS_ZSTD:
464 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
466 def __init__(
467 self,
468 *,
469 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
470 status: int,
471 version: int,
472 version_string: str,
473 reason: str | None,
474 decode_content: bool,
475 request_url: str | None,
476 retries: Retry | None = None,
477 ) -> None:
478 if isinstance(headers, HTTPHeaderDict):
479 self.headers = headers
480 else:
481 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
482 self.status = status
483 self.version = version
484 self.version_string = version_string
485 self.reason = reason
486 self.decode_content = decode_content
487 self._has_decoded_content = False
488 self._request_url: str | None = request_url
489 self.retries = retries
491 self.chunked = False
492 tr_enc = self.headers.get("transfer-encoding", "").lower()
493 # Don't incur the penalty of creating a list and then discarding it
494 encodings = (enc.strip() for enc in tr_enc.split(","))
495 if "chunked" in encodings:
496 self.chunked = True
498 self._decoder: ContentDecoder | None = None
499 self.length_remaining: int | None
501 def get_redirect_location(self) -> str | None | typing.Literal[False]:
502 """
503 Should we redirect and where to?
505 :returns: Truthy redirect location string if we got a redirect status
506 code and valid location. ``None`` if redirect status and no
507 location. ``False`` if not a redirect status code.
508 """
509 if self.status in self.REDIRECT_STATUSES:
510 return self.headers.get("location")
511 return False
513 @property
514 def data(self) -> bytes:
515 raise NotImplementedError()
517 def json(self) -> typing.Any:
518 """
519 Deserializes the body of the HTTP response as a Python object.
521 The body of the HTTP response must be encoded using UTF-8, as per
522 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.
524 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to
525 your custom decoder instead.
527 If the body of the HTTP response is not decodable to UTF-8, a
528 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a
529 valid JSON document, a `json.JSONDecodeError` will be raised.
531 Read more :ref:`here <json_content>`.
533 :returns: The body of the HTTP response as a Python object.
534 """
535 data = self.data.decode("utf-8")
536 return _json.loads(data)
538 @property
539 def url(self) -> str | None:
540 raise NotImplementedError()
542 @url.setter
543 def url(self, url: str | None) -> None:
544 raise NotImplementedError()
546 @property
547 def connection(self) -> BaseHTTPConnection | None:
548 raise NotImplementedError()
550 @property
551 def retries(self) -> Retry | None:
552 return self._retries
554 @retries.setter
555 def retries(self, retries: Retry | None) -> None:
556 # Override the request_url if retries has a redirect location.
557 if retries is not None and retries.history:
558 self.url = retries.history[-1].redirect_location
559 self._retries = retries
561 def stream(
562 self, amt: int | None = 2**16, decode_content: bool | None = None
563 ) -> typing.Iterator[bytes]:
564 raise NotImplementedError()
566 def read(
567 self,
568 amt: int | None = None,
569 decode_content: bool | None = None,
570 cache_content: bool = False,
571 ) -> bytes:
572 raise NotImplementedError()
574 def read1(
575 self,
576 amt: int | None = None,
577 decode_content: bool | None = None,
578 ) -> bytes:
579 raise NotImplementedError()
581 def read_chunked(
582 self,
583 amt: int | None = None,
584 decode_content: bool | None = None,
585 ) -> typing.Iterator[bytes]:
586 raise NotImplementedError()
588 def release_conn(self) -> None:
589 raise NotImplementedError()
591 def drain_conn(self) -> None:
592 raise NotImplementedError()
594 def shutdown(self) -> None:
595 raise NotImplementedError()
597 def close(self) -> None:
598 raise NotImplementedError()
600 def _init_decoder(self) -> None:
601 """
602 Set-up the _decoder attribute if necessary.
603 """
604 # Note: content-encoding value should be case-insensitive, per RFC 7230
605 # Section 3.2
606 content_encoding = self.headers.get("content-encoding", "").lower()
607 if self._decoder is None:
608 if content_encoding in self.CONTENT_DECODERS:
609 self._decoder = _get_decoder(content_encoding)
610 elif "," in content_encoding:
611 encodings = [
612 e.strip()
613 for e in content_encoding.split(",")
614 if e.strip() in self.CONTENT_DECODERS
615 ]
616 if encodings:
617 self._decoder = _get_decoder(content_encoding)
619 def _decode(
620 self,
621 data: bytes,
622 decode_content: bool | None,
623 flush_decoder: bool,
624 max_length: int | None = None,
625 ) -> bytes:
626 """
627 Decode the data passed in and potentially flush the decoder.
628 """
629 if not decode_content:
630 if self._has_decoded_content:
631 raise RuntimeError(
632 "Calling read(decode_content=False) is not supported after "
633 "read(decode_content=True) was called."
634 )
635 return data
637 if max_length is None or flush_decoder:
638 max_length = -1
640 try:
641 if self._decoder:
642 data = self._decoder.decompress(data, max_length=max_length)
643 self._has_decoded_content = True
644 except self.DECODER_ERROR_CLASSES as e:
645 content_encoding = self.headers.get("content-encoding", "").lower()
646 raise DecodeError(
647 "Received response with content-encoding: %s, but "
648 "failed to decode it." % content_encoding,
649 e,
650 ) from e
651 if flush_decoder:
652 data += self._flush_decoder()
654 return data
656 def _flush_decoder(self) -> bytes:
657 """
658 Flushes the decoder. Should only be called if the decoder is actually
659 being used.
660 """
661 if self._decoder:
662 return self._decoder.decompress(b"") + self._decoder.flush()
663 return b""
665 # Compatibility methods for `io` module
666 def readinto(self, b: bytearray | memoryview[int]) -> int:
667 temp = self.read(len(b))
668 if len(temp) == 0:
669 return 0
670 else:
671 b[: len(temp)] = temp
672 return len(temp)
674 # Methods used by dependent libraries
675 def getheaders(self) -> HTTPHeaderDict:
676 return self.headers
678 def getheader(self, name: str, default: str | None = None) -> str | None:
679 return self.headers.get(name, default)
681 # Compatibility method for http.cookiejar
682 def info(self) -> HTTPHeaderDict:
683 return self.headers
685 def geturl(self) -> str | None:
686 return self.url
689class HTTPResponse(BaseHTTPResponse):
690 """
691 HTTP Response container.
693 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
694 loaded and decoded on-demand when the ``data`` property is accessed. This
695 class is also compatible with the Python standard library's :mod:`io`
696 module, and can hence be treated as a readable object in the context of that
697 framework.
699 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
701 :param preload_content:
702 If True, the response's body will be preloaded during construction.
704 :param decode_content:
705 If True, will attempt to decode the body based on the
706 'content-encoding' header.
708 :param original_response:
709 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
710 object, it's convenient to include the original for debug purposes. It's
711 otherwise unused.
713 :param retries:
714 The retries contains the last :class:`~urllib3.util.retry.Retry` that
715 was used during the request.
717 :param enforce_content_length:
718 Enforce content length checking. Body returned by server must match
719 value of Content-Length header, if present. Otherwise, raise error.
720 """
722 def __init__(
723 self,
724 body: _TYPE_BODY = "",
725 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
726 status: int = 0,
727 version: int = 0,
728 version_string: str = "HTTP/?",
729 reason: str | None = None,
730 preload_content: bool = True,
731 decode_content: bool = True,
732 original_response: _HttplibHTTPResponse | None = None,
733 pool: HTTPConnectionPool | None = None,
734 connection: HTTPConnection | None = None,
735 msg: _HttplibHTTPMessage | None = None,
736 retries: Retry | None = None,
737 enforce_content_length: bool = True,
738 request_method: str | None = None,
739 request_url: str | None = None,
740 auto_close: bool = True,
741 sock_shutdown: typing.Callable[[int], None] | None = None,
742 ) -> None:
743 super().__init__(
744 headers=headers,
745 status=status,
746 version=version,
747 version_string=version_string,
748 reason=reason,
749 decode_content=decode_content,
750 request_url=request_url,
751 retries=retries,
752 )
754 self.enforce_content_length = enforce_content_length
755 self.auto_close = auto_close
757 self._body = None
758 self._uncached_read_occurred = False
759 self._fp: _HttplibHTTPResponse | None = None
760 self._original_response = original_response
761 self._fp_bytes_read = 0
762 self.msg = msg
764 if body and isinstance(body, (str, bytes)):
765 self._body = body
767 self._pool = pool
768 self._connection = connection
770 if hasattr(body, "read"):
771 self._fp = body # type: ignore[assignment]
772 self._sock_shutdown = sock_shutdown
774 # Are we using the chunked-style of transfer encoding?
775 self.chunk_left: int | None = None
777 # Determine length of response
778 self.length_remaining = self._init_length(request_method)
780 # Used to return the correct amount of bytes for partial read()s
781 self._decoded_buffer = BytesQueueBuffer()
783 # If requested, preload the body.
784 if preload_content and not self._body:
785 self._body = self.read(decode_content=decode_content)
787 def release_conn(self) -> None:
788 if not self._pool or not self._connection:
789 return None
791 self._pool._put_conn(self._connection)
792 self._connection = None
794 def drain_conn(self) -> None:
795 """
796 Read and discard any remaining HTTP response data in the response connection.
798 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
799 """
800 try:
801 self._raw_read()
802 except (HTTPError, OSError, BaseSSLError, HTTPException):
803 pass
804 if self._has_decoded_content:
805 # `_raw_read` skips decompression, so we should clean up the
806 # decoder to avoid keeping unnecessary data in memory.
807 self._decoded_buffer = BytesQueueBuffer()
808 self._decoder = None
810 @property
811 def data(self) -> bytes:
812 # For backwards-compat with earlier urllib3 0.4 and earlier.
813 if self._body:
814 return self._body # type: ignore[return-value]
816 if self._fp:
817 return self.read(cache_content=True)
819 return None # type: ignore[return-value]
821 @property
822 def connection(self) -> HTTPConnection | None:
823 return self._connection
825 def isclosed(self) -> bool:
826 return is_fp_closed(self._fp)
828 def tell(self) -> int:
829 """
830 Obtain the number of bytes pulled over the wire so far. May differ from
831 the amount of content returned by :meth:`HTTPResponse.read`
832 if bytes are encoded on the wire (e.g, compressed).
833 """
834 return self._fp_bytes_read
836 def _init_length(self, request_method: str | None) -> int | None:
837 """
838 Set initial length value for Response content if available.
839 """
840 length: int | None
841 content_length: str | None = self.headers.get("content-length")
843 if content_length is not None:
844 if self.chunked:
845 # This Response will fail with an IncompleteRead if it can't be
846 # received as chunked. This method falls back to attempt reading
847 # the response before raising an exception.
848 log.warning(
849 "Received response with both Content-Length and "
850 "Transfer-Encoding set. This is expressly forbidden "
851 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
852 "attempting to process response as Transfer-Encoding: "
853 "chunked."
854 )
855 return None
857 try:
858 # RFC 7230 section 3.3.2 specifies multiple content lengths can
859 # be sent in a single Content-Length header
860 # (e.g. Content-Length: 42, 42). This line ensures the values
861 # are all valid ints and that as long as the `set` length is 1,
862 # all values are the same. Otherwise, the header is invalid.
863 lengths = {int(val) for val in content_length.split(",")}
864 if len(lengths) > 1:
865 raise InvalidHeader(
866 "Content-Length contained multiple "
867 "unmatching values (%s)" % content_length
868 )
869 length = lengths.pop()
870 except ValueError:
871 length = None
872 else:
873 if length < 0:
874 length = None
876 else: # if content_length is None
877 length = None
879 # Convert status to int for comparison
880 # In some cases, httplib returns a status of "_UNKNOWN"
881 try:
882 status = int(self.status)
883 except ValueError:
884 status = 0
886 # Check for responses that shouldn't include a body
887 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
888 length = 0
890 return length
892 @contextmanager
893 def _error_catcher(self) -> typing.Generator[None]:
894 """
895 Catch low-level python exceptions, instead re-raising urllib3
896 variants, so that low-level exceptions are not leaked in the
897 high-level api.
899 On exit, release the connection back to the pool.
900 """
901 clean_exit = False
903 try:
904 try:
905 yield
907 except SocketTimeout as e:
908 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
909 # there is yet no clean way to get at it from this context.
910 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
912 except BaseSSLError as e:
913 # SSL errors related to framing/MAC get wrapped and reraised here
914 raise SSLError(e) from e
916 except IncompleteRead as e:
917 if (
918 e.expected is not None
919 and e.partial is not None
920 and e.expected == -e.partial
921 ):
922 arg = "Response may not contain content."
923 else:
924 arg = f"Connection broken: {e!r}"
925 raise ProtocolError(arg, e) from e
927 except (HTTPException, OSError) as e:
928 raise ProtocolError(f"Connection broken: {e!r}", e) from e
930 # If no exception is thrown, we should avoid cleaning up
931 # unnecessarily.
932 clean_exit = True
933 finally:
934 # If we didn't terminate cleanly, we need to throw away our
935 # connection.
936 if not clean_exit:
937 # The response may not be closed but we're not going to use it
938 # anymore so close it now to ensure that the connection is
939 # released back to the pool.
940 if self._original_response:
941 self._original_response.close()
943 # Closing the response may not actually be sufficient to close
944 # everything, so if we have a hold of the connection close that
945 # too.
946 if self._connection:
947 self._connection.close()
949 # If we hold the original response but it's closed now, we should
950 # return the connection back to the pool.
951 if self._original_response and self._original_response.isclosed():
952 self.release_conn()
954 def _fp_read(
955 self,
956 amt: int | None = None,
957 *,
958 read1: bool = False,
959 ) -> bytes:
960 """
961 Read a response with the thought that reading the number of bytes
962 larger than can fit in a 32-bit int at a time via SSL in some
963 known cases leads to an overflow error that has to be prevented
964 if `amt` or `self.length_remaining` indicate that a problem may
965 happen.
967 This happens to urllib3 injected with pyOpenSSL-backed SSL-support.
968 """
969 assert self._fp
970 c_int_max = 2**31 - 1
971 if (
972 (amt and amt > c_int_max)
973 or (
974 amt is None
975 and self.length_remaining
976 and self.length_remaining > c_int_max
977 )
978 ) and util.IS_PYOPENSSL:
979 if read1:
980 return self._fp.read1(c_int_max)
981 buffer = io.BytesIO()
982 # Besides `max_chunk_amt` being a maximum chunk size, it
983 # affects memory overhead of reading a response by this
984 # method in CPython.
985 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
986 # chunk size that does not lead to an overflow error, but
987 # 256 MiB is a compromise.
988 max_chunk_amt = 2**28
989 while amt is None or amt != 0:
990 if amt is not None:
991 chunk_amt = min(amt, max_chunk_amt)
992 amt -= chunk_amt
993 else:
994 chunk_amt = max_chunk_amt
995 data = self._fp.read(chunk_amt)
996 if not data:
997 break
998 buffer.write(data)
999 del data # to reduce peak memory usage by `max_chunk_amt`.
1000 return buffer.getvalue()
1001 elif read1:
1002 return self._fp.read1(amt) if amt is not None else self._fp.read1()
1003 else:
1004 # StringIO doesn't like amt=None
1005 return self._fp.read(amt) if amt is not None else self._fp.read()
1007 def _raw_read(
1008 self,
1009 amt: int | None = None,
1010 *,
1011 read1: bool = False,
1012 ) -> bytes:
1013 """
1014 Reads `amt` of bytes from the socket.
1015 """
1016 if self._fp is None:
1017 return None # type: ignore[return-value]
1019 fp_closed = getattr(self._fp, "closed", False)
1021 with self._error_catcher():
1022 data = self._fp_read(amt, read1=read1) if not fp_closed else b""
1023 if amt is not None and amt != 0 and not data:
1024 # Platform-specific: Buggy versions of Python.
1025 # Close the connection when no data is returned
1026 #
1027 # This is redundant to what httplib/http.client _should_
1028 # already do. However, versions of python released before
1029 # December 15, 2012 (http://bugs.python.org/issue16298) do
1030 # not properly close the connection in all cases. There is
1031 # no harm in redundantly calling close.
1032 self._fp.close()
1033 if (
1034 self.enforce_content_length
1035 and self.length_remaining is not None
1036 and self.length_remaining != 0
1037 ):
1038 # This is an edge case that httplib failed to cover due
1039 # to concerns of backward compatibility. We're
1040 # addressing it here to make sure IncompleteRead is
1041 # raised during streaming, so all calls with incorrect
1042 # Content-Length are caught.
1043 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
1044 elif read1 and (
1045 (amt != 0 and not data) or self.length_remaining == len(data)
1046 ):
1047 # All data has been read, but `self._fp.read1` in
1048 # CPython 3.12 and older doesn't always close
1049 # `http.client.HTTPResponse`, so we close it here.
1050 # See https://github.com/python/cpython/issues/113199
1051 self._fp.close()
1053 if data:
1054 self._fp_bytes_read += len(data)
1055 if self.length_remaining is not None:
1056 self.length_remaining -= len(data)
1057 return data
1059 def read(
1060 self,
1061 amt: int | None = None,
1062 decode_content: bool | None = None,
1063 cache_content: bool = False,
1064 ) -> bytes:
1065 """
1066 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
1067 parameters: ``decode_content`` and ``cache_content``.
1069 :param amt:
1070 How much of the content to read. If specified, caching is skipped
1071 because it doesn't make sense to cache partial content as the full
1072 response.
1074 :param decode_content:
1075 If True, will attempt to decode the body based on the
1076 'content-encoding' header.
1078 :param cache_content:
1079 If True, will save the returned data such that the same result is
1080 returned despite of the state of the underlying file object. This
1081 is useful if you want the ``.data`` property to continue working
1082 after having ``.read()`` the file object. (Overridden if ``amt`` is
1083 set.)
1084 """
1085 self._init_decoder()
1086 if decode_content is None:
1087 decode_content = self.decode_content
1089 if amt and amt < 0:
1090 # Negative numbers and `None` should be treated the same.
1091 amt = None
1092 elif amt is not None:
1093 cache_content = False
1095 if (
1096 self._decoder
1097 and self._decoder.has_unconsumed_tail
1098 and len(self._decoded_buffer) < amt
1099 ):
1100 decoded_data = self._decode(
1101 b"",
1102 decode_content,
1103 flush_decoder=False,
1104 max_length=amt - len(self._decoded_buffer),
1105 )
1106 self._decoded_buffer.put(decoded_data)
1107 if len(self._decoded_buffer) >= amt:
1108 return self._decoded_buffer.get(amt)
1110 data = self._raw_read(amt)
1111 if not cache_content:
1112 self._uncached_read_occurred = True
1114 flush_decoder = amt is None or (amt != 0 and not data)
1116 if (
1117 not data
1118 and len(self._decoded_buffer) == 0
1119 and not (self._decoder and self._decoder.has_unconsumed_tail)
1120 ):
1121 return data
1123 if amt is None:
1124 data = self._decode(data, decode_content, flush_decoder)
1125 # It's possible that there is buffered decoded data after a
1126 # partial read.
1127 if decode_content and len(self._decoded_buffer) > 0:
1128 self._decoded_buffer.put(data)
1129 data = self._decoded_buffer.get_all()
1131 if cache_content and not self._uncached_read_occurred:
1132 self._body = data
1133 else:
1134 # do not waste memory on buffer when not decoding
1135 if not decode_content:
1136 if self._has_decoded_content:
1137 raise RuntimeError(
1138 "Calling read(decode_content=False) is not supported after "
1139 "read(decode_content=True) was called."
1140 )
1141 return data
1143 decoded_data = self._decode(
1144 data,
1145 decode_content,
1146 flush_decoder,
1147 max_length=amt - len(self._decoded_buffer),
1148 )
1149 self._decoded_buffer.put(decoded_data)
1151 while len(self._decoded_buffer) < amt and data:
1152 # TODO make sure to initially read enough data to get past the headers
1153 # For example, the GZ file header takes 10 bytes, we don't want to read
1154 # it one byte at a time
1155 data = self._raw_read(amt)
1156 decoded_data = self._decode(
1157 data,
1158 decode_content,
1159 flush_decoder,
1160 max_length=amt - len(self._decoded_buffer),
1161 )
1162 self._decoded_buffer.put(decoded_data)
1163 data = self._decoded_buffer.get(amt)
1165 return data
1167 def read1(
1168 self,
1169 amt: int | None = None,
1170 decode_content: bool | None = None,
1171 ) -> bytes:
1172 """
1173 Similar to ``http.client.HTTPResponse.read1`` and documented
1174 in :meth:`io.BufferedReader.read1`, but with an additional parameter:
1175 ``decode_content``.
1177 :param amt:
1178 How much of the content to read.
1180 :param decode_content:
1181 If True, will attempt to decode the body based on the
1182 'content-encoding' header.
1183 """
1184 if decode_content is None:
1185 decode_content = self.decode_content
1186 if amt and amt < 0:
1187 # Negative numbers and `None` should be treated the same.
1188 amt = None
1189 # try and respond without going to the network
1190 if self._has_decoded_content:
1191 if not decode_content:
1192 raise RuntimeError(
1193 "Calling read1(decode_content=False) is not supported after "
1194 "read1(decode_content=True) was called."
1195 )
1196 if (
1197 self._decoder
1198 and self._decoder.has_unconsumed_tail
1199 and (amt is None or len(self._decoded_buffer) < amt)
1200 ):
1201 decoded_data = self._decode(
1202 b"",
1203 decode_content,
1204 flush_decoder=False,
1205 max_length=(
1206 amt - len(self._decoded_buffer) if amt is not None else None
1207 ),
1208 )
1209 self._decoded_buffer.put(decoded_data)
1210 if len(self._decoded_buffer) > 0:
1211 if amt is None:
1212 return self._decoded_buffer.get_all()
1213 return self._decoded_buffer.get(amt)
1214 if amt == 0:
1215 return b""
1217 # FIXME, this method's type doesn't say returning None is possible
1218 data = self._raw_read(amt, read1=True)
1219 self._uncached_read_occurred = True
1220 if not decode_content or data is None:
1221 return data
1223 self._init_decoder()
1224 while True:
1225 flush_decoder = not data
1226 decoded_data = self._decode(
1227 data, decode_content, flush_decoder, max_length=amt
1228 )
1229 self._decoded_buffer.put(decoded_data)
1230 if decoded_data or flush_decoder:
1231 break
1232 data = self._raw_read(8192, read1=True)
1234 if amt is None:
1235 return self._decoded_buffer.get_all()
1236 return self._decoded_buffer.get(amt)
1238 def stream(
1239 self, amt: int | None = 2**16, decode_content: bool | None = None
1240 ) -> typing.Generator[bytes]:
1241 """
1242 A generator wrapper for the read() method. A call will block until
1243 ``amt`` bytes have been read from the connection or until the
1244 connection is closed.
1246 :param amt:
1247 How much of the content to read. The generator will return up to
1248 much data per iteration, but may return less. This is particularly
1249 likely when using compressed data. However, the empty string will
1250 never be returned.
1252 :param decode_content:
1253 If True, will attempt to decode the body based on the
1254 'content-encoding' header.
1255 """
1256 if amt == 0:
1257 return
1259 if self.chunked and self.supports_chunked_reads():
1260 yield from self.read_chunked(amt, decode_content=decode_content)
1261 else:
1262 while (
1263 not is_fp_closed(self._fp)
1264 or len(self._decoded_buffer) > 0
1265 or (self._decoder and self._decoder.has_unconsumed_tail)
1266 ):
1267 data = self.read(amt=amt, decode_content=decode_content)
1269 if data:
1270 yield data
1272 # Overrides from io.IOBase
1273 def readable(self) -> bool:
1274 return True
1276 def shutdown(self) -> None:
1277 if not self._sock_shutdown:
1278 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")
1279 if self._connection is None:
1280 raise RuntimeError(
1281 "Cannot shutdown as connection has already been released to the pool"
1282 )
1283 self._sock_shutdown(socket.SHUT_RD)
1285 def close(self) -> None:
1286 self._sock_shutdown = None
1288 if not self.closed and self._fp:
1289 self._fp.close()
1291 if self._connection:
1292 self._connection.close()
1294 if not self.auto_close:
1295 io.IOBase.close(self)
1297 @property
1298 def closed(self) -> bool:
1299 if not self.auto_close:
1300 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
1301 elif self._fp is None:
1302 return True
1303 elif hasattr(self._fp, "isclosed"):
1304 return self._fp.isclosed()
1305 elif hasattr(self._fp, "closed"):
1306 return self._fp.closed
1307 else:
1308 return True
1310 def fileno(self) -> int:
1311 if self._fp is None:
1312 raise OSError("HTTPResponse has no file to get a fileno from")
1313 elif hasattr(self._fp, "fileno"):
1314 return self._fp.fileno()
1315 else:
1316 raise OSError(
1317 "The file-like object this HTTPResponse is wrapped "
1318 "around has no file descriptor"
1319 )
1321 def flush(self) -> None:
1322 if (
1323 self._fp is not None
1324 and hasattr(self._fp, "flush")
1325 and not getattr(self._fp, "closed", False)
1326 ):
1327 return self._fp.flush()
1329 def supports_chunked_reads(self) -> bool:
1330 """
1331 Checks if the underlying file-like object looks like a
1332 :class:`http.client.HTTPResponse` object. We do this by testing for
1333 the fp attribute. If it is present we assume it returns raw chunks as
1334 processed by read_chunked().
1335 """
1336 return hasattr(self._fp, "fp")
1338 def _update_chunk_length(self) -> None:
1339 # First, we'll figure out length of a chunk and then
1340 # we'll try to read it from socket.
1341 if self.chunk_left is not None:
1342 return None
1343 line = self._fp.fp.readline() # type: ignore[union-attr]
1344 line = line.split(b";", 1)[0]
1345 try:
1346 self.chunk_left = int(line, 16)
1347 except ValueError:
1348 self.close()
1349 if line:
1350 # Invalid chunked protocol response, abort.
1351 raise InvalidChunkLength(self, line) from None
1352 else:
1353 # Truncated at start of next chunk
1354 raise ProtocolError("Response ended prematurely") from None
1356 def _handle_chunk(self, amt: int | None) -> bytes:
1357 returned_chunk = None
1358 if amt is None:
1359 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1360 returned_chunk = chunk
1361 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1362 self.chunk_left = None
1363 elif self.chunk_left is not None and amt < self.chunk_left:
1364 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1365 self.chunk_left = self.chunk_left - amt
1366 returned_chunk = value
1367 elif amt == self.chunk_left:
1368 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1369 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1370 self.chunk_left = None
1371 returned_chunk = value
1372 else: # amt > self.chunk_left
1373 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1374 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1375 self.chunk_left = None
1376 return returned_chunk # type: ignore[no-any-return]
1378 def read_chunked(
1379 self, amt: int | None = None, decode_content: bool | None = None
1380 ) -> typing.Generator[bytes]:
1381 """
1382 Similar to :meth:`HTTPResponse.read`, but with an additional
1383 parameter: ``decode_content``.
1385 :param amt:
1386 How much of the content to read. If specified, caching is skipped
1387 because it doesn't make sense to cache partial content as the full
1388 response.
1390 :param decode_content:
1391 If True, will attempt to decode the body based on the
1392 'content-encoding' header.
1393 """
1394 self._init_decoder()
1395 # FIXME: Rewrite this method and make it a class with a better structured logic.
1396 if not self.chunked:
1397 raise ResponseNotChunked(
1398 "Response is not chunked. "
1399 "Header 'transfer-encoding: chunked' is missing."
1400 )
1401 if not self.supports_chunked_reads():
1402 raise BodyNotHttplibCompatible(
1403 "Body should be http.client.HTTPResponse like. "
1404 "It should have have an fp attribute which returns raw chunks."
1405 )
1407 with self._error_catcher():
1408 # Don't bother reading the body of a HEAD request.
1409 if self._original_response and is_response_to_head(self._original_response):
1410 self._original_response.close()
1411 return None
1413 # If a response is already read and closed
1414 # then return immediately.
1415 if self._fp.fp is None: # type: ignore[union-attr]
1416 return None
1418 if amt == 0:
1419 return
1420 elif amt and amt < 0:
1421 # Negative numbers and `None` should be treated the same,
1422 # but httplib handles only `None` correctly.
1423 amt = None
1425 while True:
1426 # First, check if any data is left in the decoder's buffer.
1427 if self._decoder and self._decoder.has_unconsumed_tail:
1428 chunk = b""
1429 else:
1430 self._update_chunk_length()
1431 self._uncached_read_occurred = True
1432 if self.chunk_left == 0:
1433 break
1434 chunk = self._handle_chunk(amt)
1435 decoded = self._decode(
1436 chunk,
1437 decode_content=decode_content,
1438 flush_decoder=False,
1439 max_length=amt,
1440 )
1441 if decoded:
1442 yield decoded
1444 if decode_content:
1445 # On CPython and PyPy, we should never need to flush the
1446 # decoder. However, on Jython we *might* need to, so
1447 # lets defensively do it anyway.
1448 decoded = self._flush_decoder()
1449 if decoded: # Platform-specific: Jython.
1450 yield decoded
1452 # Chunk content ends with \r\n: discard it.
1453 while self._fp is not None:
1454 line = self._fp.fp.readline()
1455 if not line:
1456 # Some sites may not end with '\r\n'.
1457 break
1458 if line == b"\r\n":
1459 break
1461 # We read everything; close the "file".
1462 if self._original_response:
1463 self._original_response.close()
1465 @property
1466 def url(self) -> str | None:
1467 """
1468 Returns the URL that was the source of this response.
1469 If the request that generated this response redirected, this method
1470 will return the final redirect location.
1471 """
1472 return self._request_url
1474 @url.setter
1475 def url(self, url: str | None) -> None:
1476 self._request_url = url
1478 def __iter__(self) -> typing.Iterator[bytes]:
1479 buffer: list[bytes] = []
1480 for chunk in self.stream(decode_content=True):
1481 if b"\n" in chunk:
1482 chunks = chunk.split(b"\n")
1483 yield b"".join(buffer) + chunks[0] + b"\n"
1484 for x in chunks[1:-1]:
1485 yield x + b"\n"
1486 if chunks[-1]:
1487 buffer = [chunks[-1]]
1488 else:
1489 buffer = []
1490 else:
1491 buffer.append(chunk)
1492 if buffer:
1493 yield b"".join(buffer)