1from __future__ import annotations
2
3import collections
4import io
5import json as _json
6import logging
7import re
8import socket
9import sys
10import typing
11import zlib
12from contextlib import contextmanager
13from http.client import HTTPMessage as _HttplibHTTPMessage
14from http.client import HTTPResponse as _HttplibHTTPResponse
15from socket import timeout as SocketTimeout
16
17if typing.TYPE_CHECKING:
18 from ._base_connection import BaseHTTPConnection
19
20try:
21 try:
22 import brotlicffi as brotli # type: ignore[import-not-found]
23 except ImportError:
24 import brotli # type: ignore[import-not-found]
25except ImportError:
26 brotli = None
27
28from . import util
29from ._base_connection import _TYPE_BODY
30from ._collections import HTTPHeaderDict
31from .connection import BaseSSLError, HTTPConnection, HTTPException
32from .exceptions import (
33 BodyNotHttplibCompatible,
34 DecodeError,
35 HTTPError,
36 IncompleteRead,
37 InvalidChunkLength,
38 InvalidHeader,
39 ProtocolError,
40 ReadTimeoutError,
41 ResponseNotChunked,
42 SSLError,
43)
44from .util.response import is_fp_closed, is_response_to_head
45from .util.retry import Retry
46
47if typing.TYPE_CHECKING:
48 from .connectionpool import HTTPConnectionPool
49
50log = logging.getLogger(__name__)
51
52
53class ContentDecoder:
54 def decompress(self, data: bytes) -> bytes:
55 raise NotImplementedError()
56
57 def flush(self) -> bytes:
58 raise NotImplementedError()
59
60
61class DeflateDecoder(ContentDecoder):
62 def __init__(self) -> None:
63 self._first_try = True
64 self._data = b""
65 self._obj = zlib.decompressobj()
66
67 def decompress(self, data: bytes) -> bytes:
68 if not data:
69 return data
70
71 if not self._first_try:
72 return self._obj.decompress(data)
73
74 self._data += data
75 try:
76 decompressed = self._obj.decompress(data)
77 if decompressed:
78 self._first_try = False
79 self._data = None # type: ignore[assignment]
80 return decompressed
81 except zlib.error:
82 self._first_try = False
83 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
84 try:
85 return self.decompress(self._data)
86 finally:
87 self._data = None # type: ignore[assignment]
88
89 def flush(self) -> bytes:
90 return self._obj.flush()
91
92
93class GzipDecoderState:
94 FIRST_MEMBER = 0
95 OTHER_MEMBERS = 1
96 SWALLOW_DATA = 2
97
98
99class GzipDecoder(ContentDecoder):
100 def __init__(self) -> None:
101 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
102 self._state = GzipDecoderState.FIRST_MEMBER
103
104 def decompress(self, data: bytes) -> bytes:
105 ret = bytearray()
106 if self._state == GzipDecoderState.SWALLOW_DATA or not data:
107 return bytes(ret)
108 while True:
109 try:
110 ret += self._obj.decompress(data)
111 except zlib.error:
112 previous_state = self._state
113 # Ignore data after the first error
114 self._state = GzipDecoderState.SWALLOW_DATA
115 if previous_state == GzipDecoderState.OTHER_MEMBERS:
116 # Allow trailing garbage acceptable in other gzip clients
117 return bytes(ret)
118 raise
119 data = self._obj.unused_data
120 if not data:
121 return bytes(ret)
122 self._state = GzipDecoderState.OTHER_MEMBERS
123 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
124
125 def flush(self) -> bytes:
126 return self._obj.flush()
127
128
129if brotli is not None:
130
131 class BrotliDecoder(ContentDecoder):
132 # Supports both 'brotlipy' and 'Brotli' packages
133 # since they share an import name. The top branches
134 # are for 'brotlipy' and bottom branches for 'Brotli'
135 def __init__(self) -> None:
136 self._obj = brotli.Decompressor()
137 if hasattr(self._obj, "decompress"):
138 setattr(self, "decompress", self._obj.decompress)
139 else:
140 setattr(self, "decompress", self._obj.process)
141
142 def flush(self) -> bytes:
143 if hasattr(self._obj, "flush"):
144 return self._obj.flush() # type: ignore[no-any-return]
145 return b""
146
147
148try:
149 # Python 3.14+
150 from compression import zstd # type: ignore[import-not-found] # noqa: F401
151
152 HAS_ZSTD = True
153
154 class ZstdDecoder(ContentDecoder):
155 def __init__(self) -> None:
156 self._obj = zstd.ZstdDecompressor()
157
158 def decompress(self, data: bytes) -> bytes:
159 if not data:
160 return b""
161 data_parts = [self._obj.decompress(data)]
162 while self._obj.eof and self._obj.unused_data:
163 unused_data = self._obj.unused_data
164 self._obj = zstd.ZstdDecompressor()
165 data_parts.append(self._obj.decompress(unused_data))
166 return b"".join(data_parts)
167
168 def flush(self) -> bytes:
169 if not self._obj.eof:
170 raise DecodeError("Zstandard data is incomplete")
171 return b""
172
173except ImportError:
174 try:
175 # Python 3.13 and earlier require the 'zstandard' module.
176 import zstandard as zstd
177
178 # The package 'zstandard' added the 'eof' property starting
179 # in v0.18.0 which we require to ensure a complete and
180 # valid zstd stream was fed into the ZstdDecoder.
181 # See: https://github.com/urllib3/urllib3/pull/2624
182 _zstd_version = tuple(
183 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]
184 )
185 if _zstd_version < (0, 18): # Defensive:
186 raise ImportError("zstandard module doesn't have eof")
187 except (AttributeError, ImportError, ValueError): # Defensive:
188 HAS_ZSTD = False
189 else:
190 HAS_ZSTD = True
191
192 class ZstdDecoder(ContentDecoder): # type: ignore[no-redef]
193 def __init__(self) -> None:
194 self._obj = zstd.ZstdDecompressor().decompressobj()
195
196 def decompress(self, data: bytes) -> bytes:
197 if not data:
198 return b""
199 data_parts = [self._obj.decompress(data)]
200 while self._obj.eof and self._obj.unused_data:
201 unused_data = self._obj.unused_data
202 self._obj = zstd.ZstdDecompressor().decompressobj()
203 data_parts.append(self._obj.decompress(unused_data))
204 return b"".join(data_parts)
205
206 def flush(self) -> bytes:
207 ret = self._obj.flush() # note: this is a no-op
208 if not self._obj.eof:
209 raise DecodeError("Zstandard data is incomplete")
210 return ret # type: ignore[no-any-return]
211
212
213class MultiDecoder(ContentDecoder):
214 """
215 From RFC7231:
216 If one or more encodings have been applied to a representation, the
217 sender that applied the encodings MUST generate a Content-Encoding
218 header field that lists the content codings in the order in which
219 they were applied.
220 """
221
222 def __init__(self, modes: str) -> None:
223 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
224
225 def flush(self) -> bytes:
226 return self._decoders[0].flush()
227
228 def decompress(self, data: bytes) -> bytes:
229 for d in reversed(self._decoders):
230 data = d.decompress(data)
231 return data
232
233
234def _get_decoder(mode: str) -> ContentDecoder:
235 if "," in mode:
236 return MultiDecoder(mode)
237
238 # According to RFC 9110 section 8.4.1.3, recipients should
239 # consider x-gzip equivalent to gzip
240 if mode in ("gzip", "x-gzip"):
241 return GzipDecoder()
242
243 if brotli is not None and mode == "br":
244 return BrotliDecoder()
245
246 if HAS_ZSTD and mode == "zstd":
247 return ZstdDecoder()
248
249 return DeflateDecoder()
250
251
252class BytesQueueBuffer:
253 """Memory-efficient bytes buffer
254
255 To return decoded data in read() and still follow the BufferedIOBase API, we need a
256 buffer to always return the correct amount of bytes.
257
258 This buffer should be filled using calls to put()
259
260 Our maximum memory usage is determined by the sum of the size of:
261
262 * self.buffer, which contains the full data
263 * the largest chunk that we will copy in get()
264
265 The worst case scenario is a single chunk, in which case we'll make a full copy of
266 the data inside get().
267 """
268
269 def __init__(self) -> None:
270 self.buffer: typing.Deque[bytes] = collections.deque()
271 self._size: int = 0
272
273 def __len__(self) -> int:
274 return self._size
275
276 def put(self, data: bytes) -> None:
277 self.buffer.append(data)
278 self._size += len(data)
279
280 def get(self, n: int) -> bytes:
281 if n == 0:
282 return b""
283 elif not self.buffer:
284 raise RuntimeError("buffer is empty")
285 elif n < 0:
286 raise ValueError("n should be > 0")
287
288 fetched = 0
289 ret = io.BytesIO()
290 while fetched < n:
291 remaining = n - fetched
292 chunk = self.buffer.popleft()
293 chunk_length = len(chunk)
294 if remaining < chunk_length:
295 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
296 ret.write(left_chunk)
297 self.buffer.appendleft(right_chunk)
298 self._size -= remaining
299 break
300 else:
301 ret.write(chunk)
302 self._size -= chunk_length
303 fetched += chunk_length
304
305 if not self.buffer:
306 break
307
308 return ret.getvalue()
309
310 def get_all(self) -> bytes:
311 buffer = self.buffer
312 if not buffer:
313 assert self._size == 0
314 return b""
315 if len(buffer) == 1:
316 result = buffer.pop()
317 else:
318 ret = io.BytesIO()
319 ret.writelines(buffer.popleft() for _ in range(len(buffer)))
320 result = ret.getvalue()
321 self._size = 0
322 return result
323
324
325class BaseHTTPResponse(io.IOBase):
326 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]
327 if brotli is not None:
328 CONTENT_DECODERS += ["br"]
329 if HAS_ZSTD:
330 CONTENT_DECODERS += ["zstd"]
331 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
332
333 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
334 if brotli is not None:
335 DECODER_ERROR_CLASSES += (brotli.error,)
336
337 if HAS_ZSTD:
338 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
339
340 def __init__(
341 self,
342 *,
343 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
344 status: int,
345 version: int,
346 version_string: str,
347 reason: str | None,
348 decode_content: bool,
349 request_url: str | None,
350 retries: Retry | None = None,
351 ) -> None:
352 if isinstance(headers, HTTPHeaderDict):
353 self.headers = headers
354 else:
355 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
356 self.status = status
357 self.version = version
358 self.version_string = version_string
359 self.reason = reason
360 self.decode_content = decode_content
361 self._has_decoded_content = False
362 self._request_url: str | None = request_url
363 self.retries = retries
364
365 self.chunked = False
366 tr_enc = self.headers.get("transfer-encoding", "").lower()
367 # Don't incur the penalty of creating a list and then discarding it
368 encodings = (enc.strip() for enc in tr_enc.split(","))
369 if "chunked" in encodings:
370 self.chunked = True
371
372 self._decoder: ContentDecoder | None = None
373 self.length_remaining: int | None
374
375 def get_redirect_location(self) -> str | None | typing.Literal[False]:
376 """
377 Should we redirect and where to?
378
379 :returns: Truthy redirect location string if we got a redirect status
380 code and valid location. ``None`` if redirect status and no
381 location. ``False`` if not a redirect status code.
382 """
383 if self.status in self.REDIRECT_STATUSES:
384 return self.headers.get("location")
385 return False
386
387 @property
388 def data(self) -> bytes:
389 raise NotImplementedError()
390
391 def json(self) -> typing.Any:
392 """
393 Deserializes the body of the HTTP response as a Python object.
394
395 The body of the HTTP response must be encoded using UTF-8, as per
396 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.
397
398 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to
399 your custom decoder instead.
400
401 If the body of the HTTP response is not decodable to UTF-8, a
402 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a
403 valid JSON document, a `json.JSONDecodeError` will be raised.
404
405 Read more :ref:`here <json_content>`.
406
407 :returns: The body of the HTTP response as a Python object.
408 """
409 data = self.data.decode("utf-8")
410 return _json.loads(data)
411
412 @property
413 def url(self) -> str | None:
414 raise NotImplementedError()
415
416 @url.setter
417 def url(self, url: str | None) -> None:
418 raise NotImplementedError()
419
420 @property
421 def connection(self) -> BaseHTTPConnection | None:
422 raise NotImplementedError()
423
424 @property
425 def retries(self) -> Retry | None:
426 return self._retries
427
428 @retries.setter
429 def retries(self, retries: Retry | None) -> None:
430 # Override the request_url if retries has a redirect location.
431 if retries is not None and retries.history:
432 self.url = retries.history[-1].redirect_location
433 self._retries = retries
434
435 def stream(
436 self, amt: int | None = 2**16, decode_content: bool | None = None
437 ) -> typing.Iterator[bytes]:
438 raise NotImplementedError()
439
440 def read(
441 self,
442 amt: int | None = None,
443 decode_content: bool | None = None,
444 cache_content: bool = False,
445 ) -> bytes:
446 raise NotImplementedError()
447
448 def read1(
449 self,
450 amt: int | None = None,
451 decode_content: bool | None = None,
452 ) -> bytes:
453 raise NotImplementedError()
454
455 def read_chunked(
456 self,
457 amt: int | None = None,
458 decode_content: bool | None = None,
459 ) -> typing.Iterator[bytes]:
460 raise NotImplementedError()
461
462 def release_conn(self) -> None:
463 raise NotImplementedError()
464
465 def drain_conn(self) -> None:
466 raise NotImplementedError()
467
468 def shutdown(self) -> None:
469 raise NotImplementedError()
470
471 def close(self) -> None:
472 raise NotImplementedError()
473
474 def _init_decoder(self) -> None:
475 """
476 Set-up the _decoder attribute if necessary.
477 """
478 # Note: content-encoding value should be case-insensitive, per RFC 7230
479 # Section 3.2
480 content_encoding = self.headers.get("content-encoding", "").lower()
481 if self._decoder is None:
482 if content_encoding in self.CONTENT_DECODERS:
483 self._decoder = _get_decoder(content_encoding)
484 elif "," in content_encoding:
485 encodings = [
486 e.strip()
487 for e in content_encoding.split(",")
488 if e.strip() in self.CONTENT_DECODERS
489 ]
490 if encodings:
491 self._decoder = _get_decoder(content_encoding)
492
493 def _decode(
494 self, data: bytes, decode_content: bool | None, flush_decoder: bool
495 ) -> bytes:
496 """
497 Decode the data passed in and potentially flush the decoder.
498 """
499 if not decode_content:
500 if self._has_decoded_content:
501 raise RuntimeError(
502 "Calling read(decode_content=False) is not supported after "
503 "read(decode_content=True) was called."
504 )
505 return data
506
507 try:
508 if self._decoder:
509 data = self._decoder.decompress(data)
510 self._has_decoded_content = True
511 except self.DECODER_ERROR_CLASSES as e:
512 content_encoding = self.headers.get("content-encoding", "").lower()
513 raise DecodeError(
514 "Received response with content-encoding: %s, but "
515 "failed to decode it." % content_encoding,
516 e,
517 ) from e
518 if flush_decoder:
519 data += self._flush_decoder()
520
521 return data
522
523 def _flush_decoder(self) -> bytes:
524 """
525 Flushes the decoder. Should only be called if the decoder is actually
526 being used.
527 """
528 if self._decoder:
529 return self._decoder.decompress(b"") + self._decoder.flush()
530 return b""
531
532 # Compatibility methods for `io` module
533 def readinto(self, b: bytearray) -> int:
534 temp = self.read(len(b))
535 if len(temp) == 0:
536 return 0
537 else:
538 b[: len(temp)] = temp
539 return len(temp)
540
541 # Compatibility method for http.cookiejar
542 def info(self) -> HTTPHeaderDict:
543 return self.headers
544
545 def geturl(self) -> str | None:
546 return self.url
547
548
549class HTTPResponse(BaseHTTPResponse):
550 """
551 HTTP Response container.
552
553 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
554 loaded and decoded on-demand when the ``data`` property is accessed. This
555 class is also compatible with the Python standard library's :mod:`io`
556 module, and can hence be treated as a readable object in the context of that
557 framework.
558
559 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
560
561 :param preload_content:
562 If True, the response's body will be preloaded during construction.
563
564 :param decode_content:
565 If True, will attempt to decode the body based on the
566 'content-encoding' header.
567
568 :param original_response:
569 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
570 object, it's convenient to include the original for debug purposes. It's
571 otherwise unused.
572
573 :param retries:
574 The retries contains the last :class:`~urllib3.util.retry.Retry` that
575 was used during the request.
576
577 :param enforce_content_length:
578 Enforce content length checking. Body returned by server must match
579 value of Content-Length header, if present. Otherwise, raise error.
580 """
581
582 def __init__(
583 self,
584 body: _TYPE_BODY = "",
585 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
586 status: int = 0,
587 version: int = 0,
588 version_string: str = "HTTP/?",
589 reason: str | None = None,
590 preload_content: bool = True,
591 decode_content: bool = True,
592 original_response: _HttplibHTTPResponse | None = None,
593 pool: HTTPConnectionPool | None = None,
594 connection: HTTPConnection | None = None,
595 msg: _HttplibHTTPMessage | None = None,
596 retries: Retry | None = None,
597 enforce_content_length: bool = True,
598 request_method: str | None = None,
599 request_url: str | None = None,
600 auto_close: bool = True,
601 sock_shutdown: typing.Callable[[int], None] | None = None,
602 ) -> None:
603 super().__init__(
604 headers=headers,
605 status=status,
606 version=version,
607 version_string=version_string,
608 reason=reason,
609 decode_content=decode_content,
610 request_url=request_url,
611 retries=retries,
612 )
613
614 self.enforce_content_length = enforce_content_length
615 self.auto_close = auto_close
616
617 self._body = None
618 self._fp: _HttplibHTTPResponse | None = None
619 self._original_response = original_response
620 self._fp_bytes_read = 0
621 self.msg = msg
622
623 if body and isinstance(body, (str, bytes)):
624 self._body = body
625
626 self._pool = pool
627 self._connection = connection
628
629 if hasattr(body, "read"):
630 self._fp = body # type: ignore[assignment]
631 self._sock_shutdown = sock_shutdown
632
633 # Are we using the chunked-style of transfer encoding?
634 self.chunk_left: int | None = None
635
636 # Determine length of response
637 self.length_remaining = self._init_length(request_method)
638
639 # Used to return the correct amount of bytes for partial read()s
640 self._decoded_buffer = BytesQueueBuffer()
641
642 # If requested, preload the body.
643 if preload_content and not self._body:
644 self._body = self.read(decode_content=decode_content)
645
646 def release_conn(self) -> None:
647 if not self._pool or not self._connection:
648 return None
649
650 self._pool._put_conn(self._connection)
651 self._connection = None
652
653 def drain_conn(self) -> None:
654 """
655 Read and discard any remaining HTTP response data in the response connection.
656
657 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
658 """
659 try:
660 self.read()
661 except (HTTPError, OSError, BaseSSLError, HTTPException):
662 pass
663
664 @property
665 def data(self) -> bytes:
666 # For backwards-compat with earlier urllib3 0.4 and earlier.
667 if self._body:
668 return self._body # type: ignore[return-value]
669
670 if self._fp:
671 return self.read(cache_content=True)
672
673 return None # type: ignore[return-value]
674
675 @property
676 def connection(self) -> HTTPConnection | None:
677 return self._connection
678
679 def isclosed(self) -> bool:
680 return is_fp_closed(self._fp)
681
682 def tell(self) -> int:
683 """
684 Obtain the number of bytes pulled over the wire so far. May differ from
685 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
686 if bytes are encoded on the wire (e.g, compressed).
687 """
688 return self._fp_bytes_read
689
690 def _init_length(self, request_method: str | None) -> int | None:
691 """
692 Set initial length value for Response content if available.
693 """
694 length: int | None
695 content_length: str | None = self.headers.get("content-length")
696
697 if content_length is not None:
698 if self.chunked:
699 # This Response will fail with an IncompleteRead if it can't be
700 # received as chunked. This method falls back to attempt reading
701 # the response before raising an exception.
702 log.warning(
703 "Received response with both Content-Length and "
704 "Transfer-Encoding set. This is expressly forbidden "
705 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
706 "attempting to process response as Transfer-Encoding: "
707 "chunked."
708 )
709 return None
710
711 try:
712 # RFC 7230 section 3.3.2 specifies multiple content lengths can
713 # be sent in a single Content-Length header
714 # (e.g. Content-Length: 42, 42). This line ensures the values
715 # are all valid ints and that as long as the `set` length is 1,
716 # all values are the same. Otherwise, the header is invalid.
717 lengths = {int(val) for val in content_length.split(",")}
718 if len(lengths) > 1:
719 raise InvalidHeader(
720 "Content-Length contained multiple "
721 "unmatching values (%s)" % content_length
722 )
723 length = lengths.pop()
724 except ValueError:
725 length = None
726 else:
727 if length < 0:
728 length = None
729
730 else: # if content_length is None
731 length = None
732
733 # Convert status to int for comparison
734 # In some cases, httplib returns a status of "_UNKNOWN"
735 try:
736 status = int(self.status)
737 except ValueError:
738 status = 0
739
740 # Check for responses that shouldn't include a body
741 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
742 length = 0
743
744 return length
745
746 @contextmanager
747 def _error_catcher(self) -> typing.Generator[None]:
748 """
749 Catch low-level python exceptions, instead re-raising urllib3
750 variants, so that low-level exceptions are not leaked in the
751 high-level api.
752
753 On exit, release the connection back to the pool.
754 """
755 clean_exit = False
756
757 try:
758 try:
759 yield
760
761 except SocketTimeout as e:
762 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
763 # there is yet no clean way to get at it from this context.
764 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
765
766 except BaseSSLError as e:
767 # FIXME: Is there a better way to differentiate between SSLErrors?
768 if "read operation timed out" not in str(e):
769 # SSL errors related to framing/MAC get wrapped and reraised here
770 raise SSLError(e) from e
771
772 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
773
774 except IncompleteRead as e:
775 if (
776 e.expected is not None
777 and e.partial is not None
778 and e.expected == -e.partial
779 ):
780 arg = "Response may not contain content."
781 else:
782 arg = f"Connection broken: {e!r}"
783 raise ProtocolError(arg, e) from e
784
785 except (HTTPException, OSError) as e:
786 raise ProtocolError(f"Connection broken: {e!r}", e) from e
787
788 # If no exception is thrown, we should avoid cleaning up
789 # unnecessarily.
790 clean_exit = True
791 finally:
792 # If we didn't terminate cleanly, we need to throw away our
793 # connection.
794 if not clean_exit:
795 # The response may not be closed but we're not going to use it
796 # anymore so close it now to ensure that the connection is
797 # released back to the pool.
798 if self._original_response:
799 self._original_response.close()
800
801 # Closing the response may not actually be sufficient to close
802 # everything, so if we have a hold of the connection close that
803 # too.
804 if self._connection:
805 self._connection.close()
806
807 # If we hold the original response but it's closed now, we should
808 # return the connection back to the pool.
809 if self._original_response and self._original_response.isclosed():
810 self.release_conn()
811
812 def _fp_read(
813 self,
814 amt: int | None = None,
815 *,
816 read1: bool = False,
817 ) -> bytes:
818 """
819 Read a response with the thought that reading the number of bytes
820 larger than can fit in a 32-bit int at a time via SSL in some
821 known cases leads to an overflow error that has to be prevented
822 if `amt` or `self.length_remaining` indicate that a problem may
823 happen.
824
825 The known cases:
826 * CPython < 3.9.7 because of a bug
827 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.
828 * urllib3 injected with pyOpenSSL-backed SSL-support.
829 * CPython < 3.10 only when `amt` does not fit 32-bit int.
830 """
831 assert self._fp
832 c_int_max = 2**31 - 1
833 if (
834 (amt and amt > c_int_max)
835 or (
836 amt is None
837 and self.length_remaining
838 and self.length_remaining > c_int_max
839 )
840 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):
841 if read1:
842 return self._fp.read1(c_int_max)
843 buffer = io.BytesIO()
844 # Besides `max_chunk_amt` being a maximum chunk size, it
845 # affects memory overhead of reading a response by this
846 # method in CPython.
847 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
848 # chunk size that does not lead to an overflow error, but
849 # 256 MiB is a compromise.
850 max_chunk_amt = 2**28
851 while amt is None or amt != 0:
852 if amt is not None:
853 chunk_amt = min(amt, max_chunk_amt)
854 amt -= chunk_amt
855 else:
856 chunk_amt = max_chunk_amt
857 data = self._fp.read(chunk_amt)
858 if not data:
859 break
860 buffer.write(data)
861 del data # to reduce peak memory usage by `max_chunk_amt`.
862 return buffer.getvalue()
863 elif read1:
864 return self._fp.read1(amt) if amt is not None else self._fp.read1()
865 else:
866 # StringIO doesn't like amt=None
867 return self._fp.read(amt) if amt is not None else self._fp.read()
868
869 def _raw_read(
870 self,
871 amt: int | None = None,
872 *,
873 read1: bool = False,
874 ) -> bytes:
875 """
876 Reads `amt` of bytes from the socket.
877 """
878 if self._fp is None:
879 return None # type: ignore[return-value]
880
881 fp_closed = getattr(self._fp, "closed", False)
882
883 with self._error_catcher():
884 data = self._fp_read(amt, read1=read1) if not fp_closed else b""
885 if amt is not None and amt != 0 and not data:
886 # Platform-specific: Buggy versions of Python.
887 # Close the connection when no data is returned
888 #
889 # This is redundant to what httplib/http.client _should_
890 # already do. However, versions of python released before
891 # December 15, 2012 (http://bugs.python.org/issue16298) do
892 # not properly close the connection in all cases. There is
893 # no harm in redundantly calling close.
894 self._fp.close()
895 if (
896 self.enforce_content_length
897 and self.length_remaining is not None
898 and self.length_remaining != 0
899 ):
900 # This is an edge case that httplib failed to cover due
901 # to concerns of backward compatibility. We're
902 # addressing it here to make sure IncompleteRead is
903 # raised during streaming, so all calls with incorrect
904 # Content-Length are caught.
905 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
906 elif read1 and (
907 (amt != 0 and not data) or self.length_remaining == len(data)
908 ):
909 # All data has been read, but `self._fp.read1` in
910 # CPython 3.12 and older doesn't always close
911 # `http.client.HTTPResponse`, so we close it here.
912 # See https://github.com/python/cpython/issues/113199
913 self._fp.close()
914
915 if data:
916 self._fp_bytes_read += len(data)
917 if self.length_remaining is not None:
918 self.length_remaining -= len(data)
919 return data
920
921 def read(
922 self,
923 amt: int | None = None,
924 decode_content: bool | None = None,
925 cache_content: bool = False,
926 ) -> bytes:
927 """
928 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
929 parameters: ``decode_content`` and ``cache_content``.
930
931 :param amt:
932 How much of the content to read. If specified, caching is skipped
933 because it doesn't make sense to cache partial content as the full
934 response.
935
936 :param decode_content:
937 If True, will attempt to decode the body based on the
938 'content-encoding' header.
939
940 :param cache_content:
941 If True, will save the returned data such that the same result is
942 returned despite of the state of the underlying file object. This
943 is useful if you want the ``.data`` property to continue working
944 after having ``.read()`` the file object. (Overridden if ``amt`` is
945 set.)
946 """
947 self._init_decoder()
948 if decode_content is None:
949 decode_content = self.decode_content
950
951 if amt and amt < 0:
952 # Negative numbers and `None` should be treated the same.
953 amt = None
954 elif amt is not None:
955 cache_content = False
956
957 if len(self._decoded_buffer) >= amt:
958 return self._decoded_buffer.get(amt)
959
960 data = self._raw_read(amt)
961
962 flush_decoder = amt is None or (amt != 0 and not data)
963
964 if not data and len(self._decoded_buffer) == 0:
965 return data
966
967 if amt is None:
968 data = self._decode(data, decode_content, flush_decoder)
969 if cache_content:
970 self._body = data
971 else:
972 # do not waste memory on buffer when not decoding
973 if not decode_content:
974 if self._has_decoded_content:
975 raise RuntimeError(
976 "Calling read(decode_content=False) is not supported after "
977 "read(decode_content=True) was called."
978 )
979 return data
980
981 decoded_data = self._decode(data, decode_content, flush_decoder)
982 self._decoded_buffer.put(decoded_data)
983
984 while len(self._decoded_buffer) < amt and data:
985 # TODO make sure to initially read enough data to get past the headers
986 # For example, the GZ file header takes 10 bytes, we don't want to read
987 # it one byte at a time
988 data = self._raw_read(amt)
989 decoded_data = self._decode(data, decode_content, flush_decoder)
990 self._decoded_buffer.put(decoded_data)
991 data = self._decoded_buffer.get(amt)
992
993 return data
994
995 def read1(
996 self,
997 amt: int | None = None,
998 decode_content: bool | None = None,
999 ) -> bytes:
1000 """
1001 Similar to ``http.client.HTTPResponse.read1`` and documented
1002 in :meth:`io.BufferedReader.read1`, but with an additional parameter:
1003 ``decode_content``.
1004
1005 :param amt:
1006 How much of the content to read.
1007
1008 :param decode_content:
1009 If True, will attempt to decode the body based on the
1010 'content-encoding' header.
1011 """
1012 if decode_content is None:
1013 decode_content = self.decode_content
1014 if amt and amt < 0:
1015 # Negative numbers and `None` should be treated the same.
1016 amt = None
1017 # try and respond without going to the network
1018 if self._has_decoded_content:
1019 if not decode_content:
1020 raise RuntimeError(
1021 "Calling read1(decode_content=False) is not supported after "
1022 "read1(decode_content=True) was called."
1023 )
1024 if len(self._decoded_buffer) > 0:
1025 if amt is None:
1026 return self._decoded_buffer.get_all()
1027 return self._decoded_buffer.get(amt)
1028 if amt == 0:
1029 return b""
1030
1031 # FIXME, this method's type doesn't say returning None is possible
1032 data = self._raw_read(amt, read1=True)
1033 if not decode_content or data is None:
1034 return data
1035
1036 self._init_decoder()
1037 while True:
1038 flush_decoder = not data
1039 decoded_data = self._decode(data, decode_content, flush_decoder)
1040 self._decoded_buffer.put(decoded_data)
1041 if decoded_data or flush_decoder:
1042 break
1043 data = self._raw_read(8192, read1=True)
1044
1045 if amt is None:
1046 return self._decoded_buffer.get_all()
1047 return self._decoded_buffer.get(amt)
1048
1049 def stream(
1050 self, amt: int | None = 2**16, decode_content: bool | None = None
1051 ) -> typing.Generator[bytes]:
1052 """
1053 A generator wrapper for the read() method. A call will block until
1054 ``amt`` bytes have been read from the connection or until the
1055 connection is closed.
1056
1057 :param amt:
1058 How much of the content to read. The generator will return up to
1059 much data per iteration, but may return less. This is particularly
1060 likely when using compressed data. However, the empty string will
1061 never be returned.
1062
1063 :param decode_content:
1064 If True, will attempt to decode the body based on the
1065 'content-encoding' header.
1066 """
1067 if self.chunked and self.supports_chunked_reads():
1068 yield from self.read_chunked(amt, decode_content=decode_content)
1069 else:
1070 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
1071 data = self.read(amt=amt, decode_content=decode_content)
1072
1073 if data:
1074 yield data
1075
1076 # Overrides from io.IOBase
1077 def readable(self) -> bool:
1078 return True
1079
1080 def shutdown(self) -> None:
1081 if not self._sock_shutdown:
1082 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")
1083 if self._connection is None:
1084 raise RuntimeError(
1085 "Cannot shutdown as connection has already been released to the pool"
1086 )
1087 self._sock_shutdown(socket.SHUT_RD)
1088
1089 def close(self) -> None:
1090 self._sock_shutdown = None
1091
1092 if not self.closed and self._fp:
1093 self._fp.close()
1094
1095 if self._connection:
1096 self._connection.close()
1097
1098 if not self.auto_close:
1099 io.IOBase.close(self)
1100
1101 @property
1102 def closed(self) -> bool:
1103 if not self.auto_close:
1104 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
1105 elif self._fp is None:
1106 return True
1107 elif hasattr(self._fp, "isclosed"):
1108 return self._fp.isclosed()
1109 elif hasattr(self._fp, "closed"):
1110 return self._fp.closed
1111 else:
1112 return True
1113
1114 def fileno(self) -> int:
1115 if self._fp is None:
1116 raise OSError("HTTPResponse has no file to get a fileno from")
1117 elif hasattr(self._fp, "fileno"):
1118 return self._fp.fileno()
1119 else:
1120 raise OSError(
1121 "The file-like object this HTTPResponse is wrapped "
1122 "around has no file descriptor"
1123 )
1124
1125 def flush(self) -> None:
1126 if (
1127 self._fp is not None
1128 and hasattr(self._fp, "flush")
1129 and not getattr(self._fp, "closed", False)
1130 ):
1131 return self._fp.flush()
1132
1133 def supports_chunked_reads(self) -> bool:
1134 """
1135 Checks if the underlying file-like object looks like a
1136 :class:`http.client.HTTPResponse` object. We do this by testing for
1137 the fp attribute. If it is present we assume it returns raw chunks as
1138 processed by read_chunked().
1139 """
1140 return hasattr(self._fp, "fp")
1141
1142 def _update_chunk_length(self) -> None:
1143 # First, we'll figure out length of a chunk and then
1144 # we'll try to read it from socket.
1145 if self.chunk_left is not None:
1146 return None
1147 line = self._fp.fp.readline() # type: ignore[union-attr]
1148 line = line.split(b";", 1)[0]
1149 try:
1150 self.chunk_left = int(line, 16)
1151 except ValueError:
1152 self.close()
1153 if line:
1154 # Invalid chunked protocol response, abort.
1155 raise InvalidChunkLength(self, line) from None
1156 else:
1157 # Truncated at start of next chunk
1158 raise ProtocolError("Response ended prematurely") from None
1159
1160 def _handle_chunk(self, amt: int | None) -> bytes:
1161 returned_chunk = None
1162 if amt is None:
1163 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1164 returned_chunk = chunk
1165 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1166 self.chunk_left = None
1167 elif self.chunk_left is not None and amt < self.chunk_left:
1168 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1169 self.chunk_left = self.chunk_left - amt
1170 returned_chunk = value
1171 elif amt == self.chunk_left:
1172 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1173 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1174 self.chunk_left = None
1175 returned_chunk = value
1176 else: # amt > self.chunk_left
1177 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1178 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1179 self.chunk_left = None
1180 return returned_chunk # type: ignore[no-any-return]
1181
1182 def read_chunked(
1183 self, amt: int | None = None, decode_content: bool | None = None
1184 ) -> typing.Generator[bytes]:
1185 """
1186 Similar to :meth:`HTTPResponse.read`, but with an additional
1187 parameter: ``decode_content``.
1188
1189 :param amt:
1190 How much of the content to read. If specified, caching is skipped
1191 because it doesn't make sense to cache partial content as the full
1192 response.
1193
1194 :param decode_content:
1195 If True, will attempt to decode the body based on the
1196 'content-encoding' header.
1197 """
1198 self._init_decoder()
1199 # FIXME: Rewrite this method and make it a class with a better structured logic.
1200 if not self.chunked:
1201 raise ResponseNotChunked(
1202 "Response is not chunked. "
1203 "Header 'transfer-encoding: chunked' is missing."
1204 )
1205 if not self.supports_chunked_reads():
1206 raise BodyNotHttplibCompatible(
1207 "Body should be http.client.HTTPResponse like. "
1208 "It should have have an fp attribute which returns raw chunks."
1209 )
1210
1211 with self._error_catcher():
1212 # Don't bother reading the body of a HEAD request.
1213 if self._original_response and is_response_to_head(self._original_response):
1214 self._original_response.close()
1215 return None
1216
1217 # If a response is already read and closed
1218 # then return immediately.
1219 if self._fp.fp is None: # type: ignore[union-attr]
1220 return None
1221
1222 if amt and amt < 0:
1223 # Negative numbers and `None` should be treated the same,
1224 # but httplib handles only `None` correctly.
1225 amt = None
1226
1227 while True:
1228 self._update_chunk_length()
1229 if self.chunk_left == 0:
1230 break
1231 chunk = self._handle_chunk(amt)
1232 decoded = self._decode(
1233 chunk, decode_content=decode_content, flush_decoder=False
1234 )
1235 if decoded:
1236 yield decoded
1237
1238 if decode_content:
1239 # On CPython and PyPy, we should never need to flush the
1240 # decoder. However, on Jython we *might* need to, so
1241 # lets defensively do it anyway.
1242 decoded = self._flush_decoder()
1243 if decoded: # Platform-specific: Jython.
1244 yield decoded
1245
1246 # Chunk content ends with \r\n: discard it.
1247 while self._fp is not None:
1248 line = self._fp.fp.readline()
1249 if not line:
1250 # Some sites may not end with '\r\n'.
1251 break
1252 if line == b"\r\n":
1253 break
1254
1255 # We read everything; close the "file".
1256 if self._original_response:
1257 self._original_response.close()
1258
1259 @property
1260 def url(self) -> str | None:
1261 """
1262 Returns the URL that was the source of this response.
1263 If the request that generated this response redirected, this method
1264 will return the final redirect location.
1265 """
1266 return self._request_url
1267
1268 @url.setter
1269 def url(self, url: str | None) -> None:
1270 self._request_url = url
1271
1272 def __iter__(self) -> typing.Iterator[bytes]:
1273 buffer: list[bytes] = []
1274 for chunk in self.stream(decode_content=True):
1275 if b"\n" in chunk:
1276 chunks = chunk.split(b"\n")
1277 yield b"".join(buffer) + chunks[0] + b"\n"
1278 for x in chunks[1:-1]:
1279 yield x + b"\n"
1280 if chunks[-1]:
1281 buffer = [chunks[-1]]
1282 else:
1283 buffer = []
1284 else:
1285 buffer.append(chunk)
1286 if buffer:
1287 yield b"".join(buffer)