1from __future__ import annotations
2
3import collections
4import io
5import json as _json
6import logging
7import re
8import socket
9import sys
10import typing
11import warnings
12import zlib
13from contextlib import contextmanager
14from http.client import HTTPMessage as _HttplibHTTPMessage
15from http.client import HTTPResponse as _HttplibHTTPResponse
16from socket import timeout as SocketTimeout
17
18if typing.TYPE_CHECKING:
19 from ._base_connection import BaseHTTPConnection
20
21try:
22 try:
23 import brotlicffi as brotli # type: ignore[import-not-found]
24 except ImportError:
25 import brotli # type: ignore[import-not-found]
26except ImportError:
27 brotli = None
28
29try:
30 import zstandard as zstd
31except (AttributeError, ImportError, ValueError): # Defensive:
32 HAS_ZSTD = False
33else:
34 # The package 'zstandard' added the 'eof' property starting
35 # in v0.18.0 which we require to ensure a complete and
36 # valid zstd stream was fed into the ZstdDecoder.
37 # See: https://github.com/urllib3/urllib3/pull/2624
38 _zstd_version = tuple(
39 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]
40 )
41 if _zstd_version < (0, 18): # Defensive:
42 HAS_ZSTD = False
43 else:
44 HAS_ZSTD = True
45
46from . import util
47from ._base_connection import _TYPE_BODY
48from ._collections import HTTPHeaderDict
49from .connection import BaseSSLError, HTTPConnection, HTTPException
50from .exceptions import (
51 BodyNotHttplibCompatible,
52 DecodeError,
53 HTTPError,
54 IncompleteRead,
55 InvalidChunkLength,
56 InvalidHeader,
57 ProtocolError,
58 ReadTimeoutError,
59 ResponseNotChunked,
60 SSLError,
61)
62from .util.response import is_fp_closed, is_response_to_head
63from .util.retry import Retry
64
65if typing.TYPE_CHECKING:
66 from .connectionpool import HTTPConnectionPool
67
68log = logging.getLogger(__name__)
69
70
71class ContentDecoder:
72 def decompress(self, data: bytes) -> bytes:
73 raise NotImplementedError()
74
75 def flush(self) -> bytes:
76 raise NotImplementedError()
77
78
79class DeflateDecoder(ContentDecoder):
80 def __init__(self) -> None:
81 self._first_try = True
82 self._data = b""
83 self._obj = zlib.decompressobj()
84
85 def decompress(self, data: bytes) -> bytes:
86 if not data:
87 return data
88
89 if not self._first_try:
90 return self._obj.decompress(data)
91
92 self._data += data
93 try:
94 decompressed = self._obj.decompress(data)
95 if decompressed:
96 self._first_try = False
97 self._data = None # type: ignore[assignment]
98 return decompressed
99 except zlib.error:
100 self._first_try = False
101 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
102 try:
103 return self.decompress(self._data)
104 finally:
105 self._data = None # type: ignore[assignment]
106
107 def flush(self) -> bytes:
108 return self._obj.flush()
109
110
111class GzipDecoderState:
112 FIRST_MEMBER = 0
113 OTHER_MEMBERS = 1
114 SWALLOW_DATA = 2
115
116
117class GzipDecoder(ContentDecoder):
118 def __init__(self) -> None:
119 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
120 self._state = GzipDecoderState.FIRST_MEMBER
121
122 def decompress(self, data: bytes) -> bytes:
123 ret = bytearray()
124 if self._state == GzipDecoderState.SWALLOW_DATA or not data:
125 return bytes(ret)
126 while True:
127 try:
128 ret += self._obj.decompress(data)
129 except zlib.error:
130 previous_state = self._state
131 # Ignore data after the first error
132 self._state = GzipDecoderState.SWALLOW_DATA
133 if previous_state == GzipDecoderState.OTHER_MEMBERS:
134 # Allow trailing garbage acceptable in other gzip clients
135 return bytes(ret)
136 raise
137 data = self._obj.unused_data
138 if not data:
139 return bytes(ret)
140 self._state = GzipDecoderState.OTHER_MEMBERS
141 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
142
143 def flush(self) -> bytes:
144 return self._obj.flush()
145
146
147if brotli is not None:
148
149 class BrotliDecoder(ContentDecoder):
150 # Supports both 'brotlipy' and 'Brotli' packages
151 # since they share an import name. The top branches
152 # are for 'brotlipy' and bottom branches for 'Brotli'
153 def __init__(self) -> None:
154 self._obj = brotli.Decompressor()
155 if hasattr(self._obj, "decompress"):
156 setattr(self, "decompress", self._obj.decompress)
157 else:
158 setattr(self, "decompress", self._obj.process)
159
160 def flush(self) -> bytes:
161 if hasattr(self._obj, "flush"):
162 return self._obj.flush() # type: ignore[no-any-return]
163 return b""
164
165
166if HAS_ZSTD:
167
168 class ZstdDecoder(ContentDecoder):
169 def __init__(self) -> None:
170 self._obj = zstd.ZstdDecompressor().decompressobj()
171
172 def decompress(self, data: bytes) -> bytes:
173 if not data:
174 return b""
175 data_parts = [self._obj.decompress(data)]
176 while self._obj.eof and self._obj.unused_data:
177 unused_data = self._obj.unused_data
178 self._obj = zstd.ZstdDecompressor().decompressobj()
179 data_parts.append(self._obj.decompress(unused_data))
180 return b"".join(data_parts)
181
182 def flush(self) -> bytes:
183 ret = self._obj.flush() # note: this is a no-op
184 if not self._obj.eof:
185 raise DecodeError("Zstandard data is incomplete")
186 return ret
187
188
189class MultiDecoder(ContentDecoder):
190 """
191 From RFC7231:
192 If one or more encodings have been applied to a representation, the
193 sender that applied the encodings MUST generate a Content-Encoding
194 header field that lists the content codings in the order in which
195 they were applied.
196 """
197
198 def __init__(self, modes: str) -> None:
199 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
200
201 def flush(self) -> bytes:
202 return self._decoders[0].flush()
203
204 def decompress(self, data: bytes) -> bytes:
205 for d in reversed(self._decoders):
206 data = d.decompress(data)
207 return data
208
209
210def _get_decoder(mode: str) -> ContentDecoder:
211 if "," in mode:
212 return MultiDecoder(mode)
213
214 # According to RFC 9110 section 8.4.1.3, recipients should
215 # consider x-gzip equivalent to gzip
216 if mode in ("gzip", "x-gzip"):
217 return GzipDecoder()
218
219 if brotli is not None and mode == "br":
220 return BrotliDecoder()
221
222 if HAS_ZSTD and mode == "zstd":
223 return ZstdDecoder()
224
225 return DeflateDecoder()
226
227
228class BytesQueueBuffer:
229 """Memory-efficient bytes buffer
230
231 To return decoded data in read() and still follow the BufferedIOBase API, we need a
232 buffer to always return the correct amount of bytes.
233
234 This buffer should be filled using calls to put()
235
236 Our maximum memory usage is determined by the sum of the size of:
237
238 * self.buffer, which contains the full data
239 * the largest chunk that we will copy in get()
240
241 The worst case scenario is a single chunk, in which case we'll make a full copy of
242 the data inside get().
243 """
244
245 def __init__(self) -> None:
246 self.buffer: typing.Deque[bytes] = collections.deque()
247 self._size: int = 0
248
249 def __len__(self) -> int:
250 return self._size
251
252 def put(self, data: bytes) -> None:
253 self.buffer.append(data)
254 self._size += len(data)
255
256 def get(self, n: int) -> bytes:
257 if n == 0:
258 return b""
259 elif not self.buffer:
260 raise RuntimeError("buffer is empty")
261 elif n < 0:
262 raise ValueError("n should be > 0")
263
264 fetched = 0
265 ret = io.BytesIO()
266 while fetched < n:
267 remaining = n - fetched
268 chunk = self.buffer.popleft()
269 chunk_length = len(chunk)
270 if remaining < chunk_length:
271 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
272 ret.write(left_chunk)
273 self.buffer.appendleft(right_chunk)
274 self._size -= remaining
275 break
276 else:
277 ret.write(chunk)
278 self._size -= chunk_length
279 fetched += chunk_length
280
281 if not self.buffer:
282 break
283
284 return ret.getvalue()
285
286 def get_all(self) -> bytes:
287 buffer = self.buffer
288 if not buffer:
289 assert self._size == 0
290 return b""
291 if len(buffer) == 1:
292 result = buffer.pop()
293 else:
294 ret = io.BytesIO()
295 ret.writelines(buffer.popleft() for _ in range(len(buffer)))
296 result = ret.getvalue()
297 self._size = 0
298 return result
299
300
301class BaseHTTPResponse(io.IOBase):
302 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]
303 if brotli is not None:
304 CONTENT_DECODERS += ["br"]
305 if HAS_ZSTD:
306 CONTENT_DECODERS += ["zstd"]
307 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
308
309 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
310 if brotli is not None:
311 DECODER_ERROR_CLASSES += (brotli.error,)
312
313 if HAS_ZSTD:
314 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
315
316 def __init__(
317 self,
318 *,
319 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
320 status: int,
321 version: int,
322 version_string: str,
323 reason: str | None,
324 decode_content: bool,
325 request_url: str | None,
326 retries: Retry | None = None,
327 ) -> None:
328 if isinstance(headers, HTTPHeaderDict):
329 self.headers = headers
330 else:
331 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
332 self.status = status
333 self.version = version
334 self.version_string = version_string
335 self.reason = reason
336 self.decode_content = decode_content
337 self._has_decoded_content = False
338 self._request_url: str | None = request_url
339 self.retries = retries
340
341 self.chunked = False
342 tr_enc = self.headers.get("transfer-encoding", "").lower()
343 # Don't incur the penalty of creating a list and then discarding it
344 encodings = (enc.strip() for enc in tr_enc.split(","))
345 if "chunked" in encodings:
346 self.chunked = True
347
348 self._decoder: ContentDecoder | None = None
349 self.length_remaining: int | None
350
351 def get_redirect_location(self) -> str | None | typing.Literal[False]:
352 """
353 Should we redirect and where to?
354
355 :returns: Truthy redirect location string if we got a redirect status
356 code and valid location. ``None`` if redirect status and no
357 location. ``False`` if not a redirect status code.
358 """
359 if self.status in self.REDIRECT_STATUSES:
360 return self.headers.get("location")
361 return False
362
363 @property
364 def data(self) -> bytes:
365 raise NotImplementedError()
366
367 def json(self) -> typing.Any:
368 """
369 Deserializes the body of the HTTP response as a Python object.
370
371 The body of the HTTP response must be encoded using UTF-8, as per
372 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.
373
374 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to
375 your custom decoder instead.
376
377 If the body of the HTTP response is not decodable to UTF-8, a
378 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a
379 valid JSON document, a `json.JSONDecodeError` will be raised.
380
381 Read more :ref:`here <json_content>`.
382
383 :returns: The body of the HTTP response as a Python object.
384 """
385 data = self.data.decode("utf-8")
386 return _json.loads(data)
387
388 @property
389 def url(self) -> str | None:
390 raise NotImplementedError()
391
392 @url.setter
393 def url(self, url: str | None) -> None:
394 raise NotImplementedError()
395
396 @property
397 def connection(self) -> BaseHTTPConnection | None:
398 raise NotImplementedError()
399
400 @property
401 def retries(self) -> Retry | None:
402 return self._retries
403
404 @retries.setter
405 def retries(self, retries: Retry | None) -> None:
406 # Override the request_url if retries has a redirect location.
407 if retries is not None and retries.history:
408 self.url = retries.history[-1].redirect_location
409 self._retries = retries
410
411 def stream(
412 self, amt: int | None = 2**16, decode_content: bool | None = None
413 ) -> typing.Iterator[bytes]:
414 raise NotImplementedError()
415
416 def read(
417 self,
418 amt: int | None = None,
419 decode_content: bool | None = None,
420 cache_content: bool = False,
421 ) -> bytes:
422 raise NotImplementedError()
423
424 def read1(
425 self,
426 amt: int | None = None,
427 decode_content: bool | None = None,
428 ) -> bytes:
429 raise NotImplementedError()
430
431 def read_chunked(
432 self,
433 amt: int | None = None,
434 decode_content: bool | None = None,
435 ) -> typing.Iterator[bytes]:
436 raise NotImplementedError()
437
438 def release_conn(self) -> None:
439 raise NotImplementedError()
440
441 def drain_conn(self) -> None:
442 raise NotImplementedError()
443
444 def shutdown(self) -> None:
445 raise NotImplementedError()
446
447 def close(self) -> None:
448 raise NotImplementedError()
449
450 def _init_decoder(self) -> None:
451 """
452 Set-up the _decoder attribute if necessary.
453 """
454 # Note: content-encoding value should be case-insensitive, per RFC 7230
455 # Section 3.2
456 content_encoding = self.headers.get("content-encoding", "").lower()
457 if self._decoder is None:
458 if content_encoding in self.CONTENT_DECODERS:
459 self._decoder = _get_decoder(content_encoding)
460 elif "," in content_encoding:
461 encodings = [
462 e.strip()
463 for e in content_encoding.split(",")
464 if e.strip() in self.CONTENT_DECODERS
465 ]
466 if encodings:
467 self._decoder = _get_decoder(content_encoding)
468
469 def _decode(
470 self, data: bytes, decode_content: bool | None, flush_decoder: bool
471 ) -> bytes:
472 """
473 Decode the data passed in and potentially flush the decoder.
474 """
475 if not decode_content:
476 if self._has_decoded_content:
477 raise RuntimeError(
478 "Calling read(decode_content=False) is not supported after "
479 "read(decode_content=True) was called."
480 )
481 return data
482
483 try:
484 if self._decoder:
485 data = self._decoder.decompress(data)
486 self._has_decoded_content = True
487 except self.DECODER_ERROR_CLASSES as e:
488 content_encoding = self.headers.get("content-encoding", "").lower()
489 raise DecodeError(
490 "Received response with content-encoding: %s, but "
491 "failed to decode it." % content_encoding,
492 e,
493 ) from e
494 if flush_decoder:
495 data += self._flush_decoder()
496
497 return data
498
499 def _flush_decoder(self) -> bytes:
500 """
501 Flushes the decoder. Should only be called if the decoder is actually
502 being used.
503 """
504 if self._decoder:
505 return self._decoder.decompress(b"") + self._decoder.flush()
506 return b""
507
508 # Compatibility methods for `io` module
509 def readinto(self, b: bytearray) -> int:
510 temp = self.read(len(b))
511 if len(temp) == 0:
512 return 0
513 else:
514 b[: len(temp)] = temp
515 return len(temp)
516
517 # Compatibility methods for http.client.HTTPResponse
518 def getheaders(self) -> HTTPHeaderDict:
519 warnings.warn(
520 "HTTPResponse.getheaders() is deprecated and will be removed "
521 "in urllib3 v2.1.0. Instead access HTTPResponse.headers directly.",
522 category=DeprecationWarning,
523 stacklevel=2,
524 )
525 return self.headers
526
527 def getheader(self, name: str, default: str | None = None) -> str | None:
528 warnings.warn(
529 "HTTPResponse.getheader() is deprecated and will be removed "
530 "in urllib3 v2.1.0. Instead use HTTPResponse.headers.get(name, default).",
531 category=DeprecationWarning,
532 stacklevel=2,
533 )
534 return self.headers.get(name, default)
535
536 # Compatibility method for http.cookiejar
537 def info(self) -> HTTPHeaderDict:
538 return self.headers
539
540 def geturl(self) -> str | None:
541 return self.url
542
543
544class HTTPResponse(BaseHTTPResponse):
545 """
546 HTTP Response container.
547
548 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
549 loaded and decoded on-demand when the ``data`` property is accessed. This
550 class is also compatible with the Python standard library's :mod:`io`
551 module, and can hence be treated as a readable object in the context of that
552 framework.
553
554 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
555
556 :param preload_content:
557 If True, the response's body will be preloaded during construction.
558
559 :param decode_content:
560 If True, will attempt to decode the body based on the
561 'content-encoding' header.
562
563 :param original_response:
564 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
565 object, it's convenient to include the original for debug purposes. It's
566 otherwise unused.
567
568 :param retries:
569 The retries contains the last :class:`~urllib3.util.retry.Retry` that
570 was used during the request.
571
572 :param enforce_content_length:
573 Enforce content length checking. Body returned by server must match
574 value of Content-Length header, if present. Otherwise, raise error.
575 """
576
577 def __init__(
578 self,
579 body: _TYPE_BODY = "",
580 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
581 status: int = 0,
582 version: int = 0,
583 version_string: str = "HTTP/?",
584 reason: str | None = None,
585 preload_content: bool = True,
586 decode_content: bool = True,
587 original_response: _HttplibHTTPResponse | None = None,
588 pool: HTTPConnectionPool | None = None,
589 connection: HTTPConnection | None = None,
590 msg: _HttplibHTTPMessage | None = None,
591 retries: Retry | None = None,
592 enforce_content_length: bool = True,
593 request_method: str | None = None,
594 request_url: str | None = None,
595 auto_close: bool = True,
596 sock_shutdown: typing.Callable[[int], None] | None = None,
597 ) -> None:
598 super().__init__(
599 headers=headers,
600 status=status,
601 version=version,
602 version_string=version_string,
603 reason=reason,
604 decode_content=decode_content,
605 request_url=request_url,
606 retries=retries,
607 )
608
609 self.enforce_content_length = enforce_content_length
610 self.auto_close = auto_close
611
612 self._body = None
613 self._fp: _HttplibHTTPResponse | None = None
614 self._original_response = original_response
615 self._fp_bytes_read = 0
616 self.msg = msg
617
618 if body and isinstance(body, (str, bytes)):
619 self._body = body
620
621 self._pool = pool
622 self._connection = connection
623
624 if hasattr(body, "read"):
625 self._fp = body # type: ignore[assignment]
626 self._sock_shutdown = sock_shutdown
627
628 # Are we using the chunked-style of transfer encoding?
629 self.chunk_left: int | None = None
630
631 # Determine length of response
632 self.length_remaining = self._init_length(request_method)
633
634 # Used to return the correct amount of bytes for partial read()s
635 self._decoded_buffer = BytesQueueBuffer()
636
637 # If requested, preload the body.
638 if preload_content and not self._body:
639 self._body = self.read(decode_content=decode_content)
640
641 def release_conn(self) -> None:
642 if not self._pool or not self._connection:
643 return None
644
645 self._pool._put_conn(self._connection)
646 self._connection = None
647
648 def drain_conn(self) -> None:
649 """
650 Read and discard any remaining HTTP response data in the response connection.
651
652 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
653 """
654 try:
655 self.read()
656 except (HTTPError, OSError, BaseSSLError, HTTPException):
657 pass
658
659 @property
660 def data(self) -> bytes:
661 # For backwards-compat with earlier urllib3 0.4 and earlier.
662 if self._body:
663 return self._body # type: ignore[return-value]
664
665 if self._fp:
666 return self.read(cache_content=True)
667
668 return None # type: ignore[return-value]
669
670 @property
671 def connection(self) -> HTTPConnection | None:
672 return self._connection
673
674 def isclosed(self) -> bool:
675 return is_fp_closed(self._fp)
676
677 def tell(self) -> int:
678 """
679 Obtain the number of bytes pulled over the wire so far. May differ from
680 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
681 if bytes are encoded on the wire (e.g, compressed).
682 """
683 return self._fp_bytes_read
684
685 def _init_length(self, request_method: str | None) -> int | None:
686 """
687 Set initial length value for Response content if available.
688 """
689 length: int | None
690 content_length: str | None = self.headers.get("content-length")
691
692 if content_length is not None:
693 if self.chunked:
694 # This Response will fail with an IncompleteRead if it can't be
695 # received as chunked. This method falls back to attempt reading
696 # the response before raising an exception.
697 log.warning(
698 "Received response with both Content-Length and "
699 "Transfer-Encoding set. This is expressly forbidden "
700 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
701 "attempting to process response as Transfer-Encoding: "
702 "chunked."
703 )
704 return None
705
706 try:
707 # RFC 7230 section 3.3.2 specifies multiple content lengths can
708 # be sent in a single Content-Length header
709 # (e.g. Content-Length: 42, 42). This line ensures the values
710 # are all valid ints and that as long as the `set` length is 1,
711 # all values are the same. Otherwise, the header is invalid.
712 lengths = {int(val) for val in content_length.split(",")}
713 if len(lengths) > 1:
714 raise InvalidHeader(
715 "Content-Length contained multiple "
716 "unmatching values (%s)" % content_length
717 )
718 length = lengths.pop()
719 except ValueError:
720 length = None
721 else:
722 if length < 0:
723 length = None
724
725 else: # if content_length is None
726 length = None
727
728 # Convert status to int for comparison
729 # In some cases, httplib returns a status of "_UNKNOWN"
730 try:
731 status = int(self.status)
732 except ValueError:
733 status = 0
734
735 # Check for responses that shouldn't include a body
736 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
737 length = 0
738
739 return length
740
741 @contextmanager
742 def _error_catcher(self) -> typing.Generator[None]:
743 """
744 Catch low-level python exceptions, instead re-raising urllib3
745 variants, so that low-level exceptions are not leaked in the
746 high-level api.
747
748 On exit, release the connection back to the pool.
749 """
750 clean_exit = False
751
752 try:
753 try:
754 yield
755
756 except SocketTimeout as e:
757 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
758 # there is yet no clean way to get at it from this context.
759 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
760
761 except BaseSSLError as e:
762 # FIXME: Is there a better way to differentiate between SSLErrors?
763 if "read operation timed out" not in str(e):
764 # SSL errors related to framing/MAC get wrapped and reraised here
765 raise SSLError(e) from e
766
767 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
768
769 except IncompleteRead as e:
770 if (
771 e.expected is not None
772 and e.partial is not None
773 and e.expected == -e.partial
774 ):
775 arg = "Response may not contain content."
776 else:
777 arg = f"Connection broken: {e!r}"
778 raise ProtocolError(arg, e) from e
779
780 except (HTTPException, OSError) as e:
781 raise ProtocolError(f"Connection broken: {e!r}", e) from e
782
783 # If no exception is thrown, we should avoid cleaning up
784 # unnecessarily.
785 clean_exit = True
786 finally:
787 # If we didn't terminate cleanly, we need to throw away our
788 # connection.
789 if not clean_exit:
790 # The response may not be closed but we're not going to use it
791 # anymore so close it now to ensure that the connection is
792 # released back to the pool.
793 if self._original_response:
794 self._original_response.close()
795
796 # Closing the response may not actually be sufficient to close
797 # everything, so if we have a hold of the connection close that
798 # too.
799 if self._connection:
800 self._connection.close()
801
802 # If we hold the original response but it's closed now, we should
803 # return the connection back to the pool.
804 if self._original_response and self._original_response.isclosed():
805 self.release_conn()
806
807 def _fp_read(
808 self,
809 amt: int | None = None,
810 *,
811 read1: bool = False,
812 ) -> bytes:
813 """
814 Read a response with the thought that reading the number of bytes
815 larger than can fit in a 32-bit int at a time via SSL in some
816 known cases leads to an overflow error that has to be prevented
817 if `amt` or `self.length_remaining` indicate that a problem may
818 happen.
819
820 The known cases:
821 * CPython < 3.9.7 because of a bug
822 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.
823 * urllib3 injected with pyOpenSSL-backed SSL-support.
824 * CPython < 3.10 only when `amt` does not fit 32-bit int.
825 """
826 assert self._fp
827 c_int_max = 2**31 - 1
828 if (
829 (amt and amt > c_int_max)
830 or (
831 amt is None
832 and self.length_remaining
833 and self.length_remaining > c_int_max
834 )
835 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):
836 if read1:
837 return self._fp.read1(c_int_max)
838 buffer = io.BytesIO()
839 # Besides `max_chunk_amt` being a maximum chunk size, it
840 # affects memory overhead of reading a response by this
841 # method in CPython.
842 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
843 # chunk size that does not lead to an overflow error, but
844 # 256 MiB is a compromise.
845 max_chunk_amt = 2**28
846 while amt is None or amt != 0:
847 if amt is not None:
848 chunk_amt = min(amt, max_chunk_amt)
849 amt -= chunk_amt
850 else:
851 chunk_amt = max_chunk_amt
852 data = self._fp.read(chunk_amt)
853 if not data:
854 break
855 buffer.write(data)
856 del data # to reduce peak memory usage by `max_chunk_amt`.
857 return buffer.getvalue()
858 elif read1:
859 return self._fp.read1(amt) if amt is not None else self._fp.read1()
860 else:
861 # StringIO doesn't like amt=None
862 return self._fp.read(amt) if amt is not None else self._fp.read()
863
864 def _raw_read(
865 self,
866 amt: int | None = None,
867 *,
868 read1: bool = False,
869 ) -> bytes:
870 """
871 Reads `amt` of bytes from the socket.
872 """
873 if self._fp is None:
874 return None # type: ignore[return-value]
875
876 fp_closed = getattr(self._fp, "closed", False)
877
878 with self._error_catcher():
879 data = self._fp_read(amt, read1=read1) if not fp_closed else b""
880 if amt is not None and amt != 0 and not data:
881 # Platform-specific: Buggy versions of Python.
882 # Close the connection when no data is returned
883 #
884 # This is redundant to what httplib/http.client _should_
885 # already do. However, versions of python released before
886 # December 15, 2012 (http://bugs.python.org/issue16298) do
887 # not properly close the connection in all cases. There is
888 # no harm in redundantly calling close.
889 self._fp.close()
890 if (
891 self.enforce_content_length
892 and self.length_remaining is not None
893 and self.length_remaining != 0
894 ):
895 # This is an edge case that httplib failed to cover due
896 # to concerns of backward compatibility. We're
897 # addressing it here to make sure IncompleteRead is
898 # raised during streaming, so all calls with incorrect
899 # Content-Length are caught.
900 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
901 elif read1 and (
902 (amt != 0 and not data) or self.length_remaining == len(data)
903 ):
904 # All data has been read, but `self._fp.read1` in
905 # CPython 3.12 and older doesn't always close
906 # `http.client.HTTPResponse`, so we close it here.
907 # See https://github.com/python/cpython/issues/113199
908 self._fp.close()
909
910 if data:
911 self._fp_bytes_read += len(data)
912 if self.length_remaining is not None:
913 self.length_remaining -= len(data)
914 return data
915
916 def read(
917 self,
918 amt: int | None = None,
919 decode_content: bool | None = None,
920 cache_content: bool = False,
921 ) -> bytes:
922 """
923 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
924 parameters: ``decode_content`` and ``cache_content``.
925
926 :param amt:
927 How much of the content to read. If specified, caching is skipped
928 because it doesn't make sense to cache partial content as the full
929 response.
930
931 :param decode_content:
932 If True, will attempt to decode the body based on the
933 'content-encoding' header.
934
935 :param cache_content:
936 If True, will save the returned data such that the same result is
937 returned despite of the state of the underlying file object. This
938 is useful if you want the ``.data`` property to continue working
939 after having ``.read()`` the file object. (Overridden if ``amt`` is
940 set.)
941 """
942 self._init_decoder()
943 if decode_content is None:
944 decode_content = self.decode_content
945
946 if amt and amt < 0:
947 # Negative numbers and `None` should be treated the same.
948 amt = None
949 elif amt is not None:
950 cache_content = False
951
952 if len(self._decoded_buffer) >= amt:
953 return self._decoded_buffer.get(amt)
954
955 data = self._raw_read(amt)
956
957 flush_decoder = amt is None or (amt != 0 and not data)
958
959 if not data and len(self._decoded_buffer) == 0:
960 return data
961
962 if amt is None:
963 data = self._decode(data, decode_content, flush_decoder)
964 if cache_content:
965 self._body = data
966 else:
967 # do not waste memory on buffer when not decoding
968 if not decode_content:
969 if self._has_decoded_content:
970 raise RuntimeError(
971 "Calling read(decode_content=False) is not supported after "
972 "read(decode_content=True) was called."
973 )
974 return data
975
976 decoded_data = self._decode(data, decode_content, flush_decoder)
977 self._decoded_buffer.put(decoded_data)
978
979 while len(self._decoded_buffer) < amt and data:
980 # TODO make sure to initially read enough data to get past the headers
981 # For example, the GZ file header takes 10 bytes, we don't want to read
982 # it one byte at a time
983 data = self._raw_read(amt)
984 decoded_data = self._decode(data, decode_content, flush_decoder)
985 self._decoded_buffer.put(decoded_data)
986 data = self._decoded_buffer.get(amt)
987
988 return data
989
990 def read1(
991 self,
992 amt: int | None = None,
993 decode_content: bool | None = None,
994 ) -> bytes:
995 """
996 Similar to ``http.client.HTTPResponse.read1`` and documented
997 in :meth:`io.BufferedReader.read1`, but with an additional parameter:
998 ``decode_content``.
999
1000 :param amt:
1001 How much of the content to read.
1002
1003 :param decode_content:
1004 If True, will attempt to decode the body based on the
1005 'content-encoding' header.
1006 """
1007 if decode_content is None:
1008 decode_content = self.decode_content
1009 if amt and amt < 0:
1010 # Negative numbers and `None` should be treated the same.
1011 amt = None
1012 # try and respond without going to the network
1013 if self._has_decoded_content:
1014 if not decode_content:
1015 raise RuntimeError(
1016 "Calling read1(decode_content=False) is not supported after "
1017 "read1(decode_content=True) was called."
1018 )
1019 if len(self._decoded_buffer) > 0:
1020 if amt is None:
1021 return self._decoded_buffer.get_all()
1022 return self._decoded_buffer.get(amt)
1023 if amt == 0:
1024 return b""
1025
1026 # FIXME, this method's type doesn't say returning None is possible
1027 data = self._raw_read(amt, read1=True)
1028 if not decode_content or data is None:
1029 return data
1030
1031 self._init_decoder()
1032 while True:
1033 flush_decoder = not data
1034 decoded_data = self._decode(data, decode_content, flush_decoder)
1035 self._decoded_buffer.put(decoded_data)
1036 if decoded_data or flush_decoder:
1037 break
1038 data = self._raw_read(8192, read1=True)
1039
1040 if amt is None:
1041 return self._decoded_buffer.get_all()
1042 return self._decoded_buffer.get(amt)
1043
1044 def stream(
1045 self, amt: int | None = 2**16, decode_content: bool | None = None
1046 ) -> typing.Generator[bytes]:
1047 """
1048 A generator wrapper for the read() method. A call will block until
1049 ``amt`` bytes have been read from the connection or until the
1050 connection is closed.
1051
1052 :param amt:
1053 How much of the content to read. The generator will return up to
1054 much data per iteration, but may return less. This is particularly
1055 likely when using compressed data. However, the empty string will
1056 never be returned.
1057
1058 :param decode_content:
1059 If True, will attempt to decode the body based on the
1060 'content-encoding' header.
1061 """
1062 if self.chunked and self.supports_chunked_reads():
1063 yield from self.read_chunked(amt, decode_content=decode_content)
1064 else:
1065 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
1066 data = self.read(amt=amt, decode_content=decode_content)
1067
1068 if data:
1069 yield data
1070
1071 # Overrides from io.IOBase
1072 def readable(self) -> bool:
1073 return True
1074
1075 def shutdown(self) -> None:
1076 if not self._sock_shutdown:
1077 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")
1078 self._sock_shutdown(socket.SHUT_RD)
1079
1080 def close(self) -> None:
1081 self._sock_shutdown = None
1082
1083 if not self.closed and self._fp:
1084 self._fp.close()
1085
1086 if self._connection:
1087 self._connection.close()
1088
1089 if not self.auto_close:
1090 io.IOBase.close(self)
1091
1092 @property
1093 def closed(self) -> bool:
1094 if not self.auto_close:
1095 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
1096 elif self._fp is None:
1097 return True
1098 elif hasattr(self._fp, "isclosed"):
1099 return self._fp.isclosed()
1100 elif hasattr(self._fp, "closed"):
1101 return self._fp.closed
1102 else:
1103 return True
1104
1105 def fileno(self) -> int:
1106 if self._fp is None:
1107 raise OSError("HTTPResponse has no file to get a fileno from")
1108 elif hasattr(self._fp, "fileno"):
1109 return self._fp.fileno()
1110 else:
1111 raise OSError(
1112 "The file-like object this HTTPResponse is wrapped "
1113 "around has no file descriptor"
1114 )
1115
1116 def flush(self) -> None:
1117 if (
1118 self._fp is not None
1119 and hasattr(self._fp, "flush")
1120 and not getattr(self._fp, "closed", False)
1121 ):
1122 return self._fp.flush()
1123
1124 def supports_chunked_reads(self) -> bool:
1125 """
1126 Checks if the underlying file-like object looks like a
1127 :class:`http.client.HTTPResponse` object. We do this by testing for
1128 the fp attribute. If it is present we assume it returns raw chunks as
1129 processed by read_chunked().
1130 """
1131 return hasattr(self._fp, "fp")
1132
1133 def _update_chunk_length(self) -> None:
1134 # First, we'll figure out length of a chunk and then
1135 # we'll try to read it from socket.
1136 if self.chunk_left is not None:
1137 return None
1138 line = self._fp.fp.readline() # type: ignore[union-attr]
1139 line = line.split(b";", 1)[0]
1140 try:
1141 self.chunk_left = int(line, 16)
1142 except ValueError:
1143 self.close()
1144 if line:
1145 # Invalid chunked protocol response, abort.
1146 raise InvalidChunkLength(self, line) from None
1147 else:
1148 # Truncated at start of next chunk
1149 raise ProtocolError("Response ended prematurely") from None
1150
1151 def _handle_chunk(self, amt: int | None) -> bytes:
1152 returned_chunk = None
1153 if amt is None:
1154 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1155 returned_chunk = chunk
1156 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1157 self.chunk_left = None
1158 elif self.chunk_left is not None and amt < self.chunk_left:
1159 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1160 self.chunk_left = self.chunk_left - amt
1161 returned_chunk = value
1162 elif amt == self.chunk_left:
1163 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1164 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1165 self.chunk_left = None
1166 returned_chunk = value
1167 else: # amt > self.chunk_left
1168 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1169 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1170 self.chunk_left = None
1171 return returned_chunk # type: ignore[no-any-return]
1172
1173 def read_chunked(
1174 self, amt: int | None = None, decode_content: bool | None = None
1175 ) -> typing.Generator[bytes]:
1176 """
1177 Similar to :meth:`HTTPResponse.read`, but with an additional
1178 parameter: ``decode_content``.
1179
1180 :param amt:
1181 How much of the content to read. If specified, caching is skipped
1182 because it doesn't make sense to cache partial content as the full
1183 response.
1184
1185 :param decode_content:
1186 If True, will attempt to decode the body based on the
1187 'content-encoding' header.
1188 """
1189 self._init_decoder()
1190 # FIXME: Rewrite this method and make it a class with a better structured logic.
1191 if not self.chunked:
1192 raise ResponseNotChunked(
1193 "Response is not chunked. "
1194 "Header 'transfer-encoding: chunked' is missing."
1195 )
1196 if not self.supports_chunked_reads():
1197 raise BodyNotHttplibCompatible(
1198 "Body should be http.client.HTTPResponse like. "
1199 "It should have have an fp attribute which returns raw chunks."
1200 )
1201
1202 with self._error_catcher():
1203 # Don't bother reading the body of a HEAD request.
1204 if self._original_response and is_response_to_head(self._original_response):
1205 self._original_response.close()
1206 return None
1207
1208 # If a response is already read and closed
1209 # then return immediately.
1210 if self._fp.fp is None: # type: ignore[union-attr]
1211 return None
1212
1213 if amt and amt < 0:
1214 # Negative numbers and `None` should be treated the same,
1215 # but httplib handles only `None` correctly.
1216 amt = None
1217
1218 while True:
1219 self._update_chunk_length()
1220 if self.chunk_left == 0:
1221 break
1222 chunk = self._handle_chunk(amt)
1223 decoded = self._decode(
1224 chunk, decode_content=decode_content, flush_decoder=False
1225 )
1226 if decoded:
1227 yield decoded
1228
1229 if decode_content:
1230 # On CPython and PyPy, we should never need to flush the
1231 # decoder. However, on Jython we *might* need to, so
1232 # lets defensively do it anyway.
1233 decoded = self._flush_decoder()
1234 if decoded: # Platform-specific: Jython.
1235 yield decoded
1236
1237 # Chunk content ends with \r\n: discard it.
1238 while self._fp is not None:
1239 line = self._fp.fp.readline()
1240 if not line:
1241 # Some sites may not end with '\r\n'.
1242 break
1243 if line == b"\r\n":
1244 break
1245
1246 # We read everything; close the "file".
1247 if self._original_response:
1248 self._original_response.close()
1249
1250 @property
1251 def url(self) -> str | None:
1252 """
1253 Returns the URL that was the source of this response.
1254 If the request that generated this response redirected, this method
1255 will return the final redirect location.
1256 """
1257 return self._request_url
1258
1259 @url.setter
1260 def url(self, url: str) -> None:
1261 self._request_url = url
1262
1263 def __iter__(self) -> typing.Iterator[bytes]:
1264 buffer: list[bytes] = []
1265 for chunk in self.stream(decode_content=True):
1266 if b"\n" in chunk:
1267 chunks = chunk.split(b"\n")
1268 yield b"".join(buffer) + chunks[0] + b"\n"
1269 for x in chunks[1:-1]:
1270 yield x + b"\n"
1271 if chunks[-1]:
1272 buffer = [chunks[-1]]
1273 else:
1274 buffer = []
1275 else:
1276 buffer.append(chunk)
1277 if buffer:
1278 yield b"".join(buffer)