1from __future__ import annotations
2
3import collections
4import io
5import json as _json
6import logging
7import socket
8import sys
9import typing
10import warnings
11import zlib
12from contextlib import contextmanager
13from http.client import HTTPMessage as _HttplibHTTPMessage
14from http.client import HTTPResponse as _HttplibHTTPResponse
15from socket import timeout as SocketTimeout
16
17if typing.TYPE_CHECKING:
18 from ._base_connection import BaseHTTPConnection
19
20brotli = None
21
22from . import util
23from ._base_connection import _TYPE_BODY
24from ._collections import HTTPHeaderDict
25from .connection import BaseSSLError, HTTPConnection, HTTPException
26from .exceptions import (
27 BodyNotHttplibCompatible,
28 DecodeError,
29 DependencyWarning,
30 HTTPError,
31 IncompleteRead,
32 InvalidChunkLength,
33 InvalidHeader,
34 ProtocolError,
35 ReadTimeoutError,
36 ResponseNotChunked,
37 SSLError,
38)
39from .util.response import is_fp_closed, is_response_to_head
40from .util.retry import Retry
41
42if typing.TYPE_CHECKING:
43 from .connectionpool import HTTPConnectionPool
44
45log = logging.getLogger(__name__)
46
47
48class ContentDecoder:
49 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
50 raise NotImplementedError()
51
52 @property
53 def has_unconsumed_tail(self) -> bool:
54 raise NotImplementedError()
55
56 def flush(self) -> bytes:
57 raise NotImplementedError()
58
59
60class DeflateDecoder(ContentDecoder):
61 def __init__(self) -> None:
62 self._first_try = True
63 self._first_try_data = b""
64 self._unfed_data = b""
65 self._obj = zlib.decompressobj()
66
67 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
68 data = self._unfed_data + data
69 self._unfed_data = b""
70 if not data and not self._obj.unconsumed_tail:
71 return data
72 original_max_length = max_length
73 if original_max_length < 0:
74 max_length = 0
75 elif original_max_length == 0:
76 # We should not pass 0 to the zlib decompressor because 0 is
77 # the default value that will make zlib decompress without a
78 # length limit.
79 # Data should be stored for subsequent calls.
80 self._unfed_data = data
81 return b""
82
83 # Subsequent calls always reuse `self._obj`. zlib requires
84 # passing the unconsumed tail if decompression is to continue.
85 if not self._first_try:
86 return self._obj.decompress(
87 self._obj.unconsumed_tail + data, max_length=max_length
88 )
89
90 # First call tries with RFC 1950 ZLIB format.
91 self._first_try_data += data
92 try:
93 decompressed = self._obj.decompress(data, max_length=max_length)
94 if decompressed:
95 self._first_try = False
96 self._first_try_data = b""
97 return decompressed
98 # On failure, it falls back to RFC 1951 DEFLATE format.
99 except zlib.error:
100 self._first_try = False
101 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
102 try:
103 return self.decompress(
104 self._first_try_data, max_length=original_max_length
105 )
106 finally:
107 self._first_try_data = b""
108
109 @property
110 def has_unconsumed_tail(self) -> bool:
111 return bool(self._unfed_data) or (
112 bool(self._obj.unconsumed_tail) and not self._first_try
113 )
114
115 def flush(self) -> bytes:
116 return self._obj.flush()
117
118
119class GzipDecoderState:
120 FIRST_MEMBER = 0
121 OTHER_MEMBERS = 1
122 SWALLOW_DATA = 2
123
124
125class GzipDecoder(ContentDecoder):
126 def __init__(self) -> None:
127 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
128 self._state = GzipDecoderState.FIRST_MEMBER
129 self._unconsumed_tail = b""
130
131 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
132 ret = bytearray()
133 if self._state == GzipDecoderState.SWALLOW_DATA:
134 return bytes(ret)
135
136 if max_length == 0:
137 # We should not pass 0 to the zlib decompressor because 0 is
138 # the default value that will make zlib decompress without a
139 # length limit.
140 # Data should be stored for subsequent calls.
141 self._unconsumed_tail += data
142 return b""
143
144 # zlib requires passing the unconsumed tail to the subsequent
145 # call if decompression is to continue.
146 data = self._unconsumed_tail + data
147 if not data and self._obj.eof:
148 return bytes(ret)
149
150 while True:
151 try:
152 ret += self._obj.decompress(
153 data, max_length=max(max_length - len(ret), 0)
154 )
155 except zlib.error:
156 previous_state = self._state
157 # Ignore data after the first error
158 self._state = GzipDecoderState.SWALLOW_DATA
159 self._unconsumed_tail = b""
160 if previous_state == GzipDecoderState.OTHER_MEMBERS:
161 # Allow trailing garbage acceptable in other gzip clients
162 return bytes(ret)
163 raise
164
165 self._unconsumed_tail = data = (
166 self._obj.unconsumed_tail or self._obj.unused_data
167 )
168 if max_length > 0 and len(ret) >= max_length:
169 break
170
171 if not data:
172 return bytes(ret)
173 # When the end of a gzip member is reached, a new decompressor
174 # must be created for unused (possibly future) data.
175 if self._obj.eof:
176 self._state = GzipDecoderState.OTHER_MEMBERS
177 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
178
179 return bytes(ret)
180
181 @property
182 def has_unconsumed_tail(self) -> bool:
183 return bool(self._unconsumed_tail)
184
185 def flush(self) -> bytes:
186 return self._obj.flush()
187
188
189if brotli is not None:
190
191 class BrotliDecoder(ContentDecoder):
192 # Supports both 'brotlipy' and 'Brotli' packages
193 # since they share an import name. The top branches
194 # are for 'brotlipy' and bottom branches for 'Brotli'
195 def __init__(self) -> None:
196 self._obj = brotli.Decompressor()
197 if hasattr(self._obj, "decompress"):
198 setattr(self, "_decompress", self._obj.decompress)
199 else:
200 setattr(self, "_decompress", self._obj.process)
201
202 # Requires Brotli >= 1.2.0 for `output_buffer_limit`.
203 def _decompress(self, data: bytes, output_buffer_limit: int = -1) -> bytes:
204 raise NotImplementedError()
205
206 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
207 try:
208 if max_length > 0:
209 return self._decompress(data, output_buffer_limit=max_length)
210 else:
211 return self._decompress(data)
212 except TypeError:
213 # Fallback for Brotli/brotlicffi/brotlipy versions without
214 # the `output_buffer_limit` parameter.
215 warnings.warn(
216 "Brotli >= 1.2.0 is required to prevent decompression bombs.",
217 DependencyWarning,
218 )
219 return self._decompress(data)
220
221 @property
222 def has_unconsumed_tail(self) -> bool:
223 try:
224 return not self._obj.can_accept_more_data()
225 except AttributeError:
226 return False
227
228 def flush(self) -> bytes:
229 if hasattr(self._obj, "flush"):
230 return self._obj.flush() # type: ignore[no-any-return]
231 return b""
232
233
234try:
235 if sys.version_info >= (3, 14):
236 from compression import zstd
237 else:
238 from backports import zstd
239except ImportError:
240 HAS_ZSTD = False
241else:
242 HAS_ZSTD = True
243
244 class ZstdDecoder(ContentDecoder):
245 def __init__(self) -> None:
246 self._obj = zstd.ZstdDecompressor()
247
248 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
249 if not data and not self.has_unconsumed_tail:
250 return b""
251 if self._obj.eof:
252 data = self._obj.unused_data + data
253 self._obj = zstd.ZstdDecompressor()
254 part = self._obj.decompress(data, max_length=max_length)
255 length = len(part)
256 data_parts = [part]
257 # Every loop iteration is supposed to read data from a separate frame.
258 # The loop breaks when:
259 # - enough data is read;
260 # - no more unused data is available;
261 # - end of the last read frame has not been reached (i.e.,
262 # more data has to be fed).
263 while (
264 self._obj.eof
265 and self._obj.unused_data
266 and (max_length < 0 or length < max_length)
267 ):
268 unused_data = self._obj.unused_data
269 if not self._obj.needs_input:
270 self._obj = zstd.ZstdDecompressor()
271 part = self._obj.decompress(
272 unused_data,
273 max_length=(max_length - length) if max_length > 0 else -1,
274 )
275 if part_length := len(part):
276 data_parts.append(part)
277 length += part_length
278 elif self._obj.needs_input:
279 break
280 return b"".join(data_parts)
281
282 @property
283 def has_unconsumed_tail(self) -> bool:
284 return not (self._obj.needs_input or self._obj.eof) or bool(
285 self._obj.unused_data
286 )
287
288 def flush(self) -> bytes:
289 if not self._obj.eof:
290 raise DecodeError("Zstandard data is incomplete")
291 return b""
292
293
294class MultiDecoder(ContentDecoder):
295 """
296 From RFC7231:
297 If one or more encodings have been applied to a representation, the
298 sender that applied the encodings MUST generate a Content-Encoding
299 header field that lists the content codings in the order in which
300 they were applied.
301 """
302
303 # Maximum allowed number of chained HTTP encodings in the
304 # Content-Encoding header.
305 max_decode_links = 5
306
307 def __init__(self, modes: str) -> None:
308 encodings = [m.strip() for m in modes.split(",")]
309 if len(encodings) > self.max_decode_links:
310 raise DecodeError(
311 "Too many content encodings in the chain: "
312 f"{len(encodings)} > {self.max_decode_links}"
313 )
314 self._decoders = [_get_decoder(e) for e in encodings]
315
316 def flush(self) -> bytes:
317 return self._decoders[0].flush()
318
319 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
320 if max_length <= 0:
321 for d in reversed(self._decoders):
322 data = d.decompress(data)
323 return data
324
325 ret = bytearray()
326 # Every while loop iteration goes through all decoders once.
327 # It exits when enough data is read or no more data can be read.
328 # It is possible that the while loop iteration does not produce
329 # any data because we retrieve up to `max_length` from every
330 # decoder, and the amount of bytes may be insufficient for the
331 # next decoder to produce enough/any output.
332 while True:
333 any_data = False
334 for d in reversed(self._decoders):
335 data = d.decompress(data, max_length=max_length - len(ret))
336 if data:
337 any_data = True
338 # We should not break when no data is returned because
339 # next decoders may produce data even with empty input.
340 ret += data
341 if not any_data or len(ret) >= max_length:
342 return bytes(ret)
343 data = b""
344
345 @property
346 def has_unconsumed_tail(self) -> bool:
347 return any(d.has_unconsumed_tail for d in self._decoders)
348
349
350def _get_decoder(mode: str) -> ContentDecoder:
351 if "," in mode:
352 return MultiDecoder(mode)
353
354 # According to RFC 9110 section 8.4.1.3, recipients should
355 # consider x-gzip equivalent to gzip
356 if mode in ("gzip", "x-gzip"):
357 return GzipDecoder()
358
359 if brotli is not None and mode == "br":
360 return BrotliDecoder()
361
362 if HAS_ZSTD and mode == "zstd":
363 return ZstdDecoder()
364
365 return DeflateDecoder()
366
367
368class BytesQueueBuffer:
369 """Memory-efficient bytes buffer
370
371 To return decoded data in read() and still follow the BufferedIOBase API, we need a
372 buffer to always return the correct amount of bytes.
373
374 This buffer should be filled using calls to put()
375
376 Our maximum memory usage is determined by the sum of the size of:
377
378 * self.buffer, which contains the full data
379 * the largest chunk that we will copy in get()
380 """
381
382 def __init__(self) -> None:
383 self.buffer: typing.Deque[bytes | memoryview[bytes]] = collections.deque()
384 self._size: int = 0
385
386 def __len__(self) -> int:
387 return self._size
388
389 def put(self, data: bytes) -> None:
390 self.buffer.append(data)
391 self._size += len(data)
392
393 def get(self, n: int) -> bytes:
394 if n == 0:
395 return b""
396 elif not self.buffer:
397 raise RuntimeError("buffer is empty")
398 elif n < 0:
399 raise ValueError("n should be > 0")
400
401 if len(self.buffer[0]) == n and isinstance(self.buffer[0], bytes):
402 self._size -= n
403 return self.buffer.popleft()
404
405 fetched = 0
406 ret = io.BytesIO()
407 while fetched < n:
408 remaining = n - fetched
409 chunk = self.buffer.popleft()
410 chunk_length = len(chunk)
411 if remaining < chunk_length:
412 chunk = memoryview(chunk)
413 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
414 ret.write(left_chunk)
415 self.buffer.appendleft(right_chunk)
416 self._size -= remaining
417 break
418 else:
419 ret.write(chunk)
420 self._size -= chunk_length
421 fetched += chunk_length
422
423 if not self.buffer:
424 break
425
426 return ret.getvalue()
427
428 def get_all(self) -> bytes:
429 buffer = self.buffer
430 if not buffer:
431 assert self._size == 0
432 return b""
433 if len(buffer) == 1:
434 result = buffer.pop()
435 if isinstance(result, memoryview):
436 result = result.tobytes()
437 else:
438 ret = io.BytesIO()
439 ret.writelines(buffer.popleft() for _ in range(len(buffer)))
440 result = ret.getvalue()
441 self._size = 0
442 return result
443
444
445class BaseHTTPResponse(io.IOBase):
446 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]
447 if brotli is not None:
448 CONTENT_DECODERS += ["br"]
449 if HAS_ZSTD:
450 CONTENT_DECODERS += ["zstd"]
451 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
452
453 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
454 if brotli is not None:
455 DECODER_ERROR_CLASSES += (brotli.error,)
456
457 if HAS_ZSTD:
458 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
459
460 def __init__(
461 self,
462 *,
463 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
464 status: int,
465 version: int,
466 version_string: str,
467 reason: str | None,
468 decode_content: bool,
469 request_url: str | None,
470 retries: Retry | None = None,
471 ) -> None:
472 if isinstance(headers, HTTPHeaderDict):
473 self.headers = headers
474 else:
475 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
476 self.status = status
477 self.version = version
478 self.version_string = version_string
479 self.reason = reason
480 self.decode_content = decode_content
481 self._has_decoded_content = False
482 self._request_url: str | None = request_url
483 self.retries = retries
484
485 self.chunked = False
486 tr_enc = self.headers.get("transfer-encoding", "").lower()
487 # Don't incur the penalty of creating a list and then discarding it
488 encodings = (enc.strip() for enc in tr_enc.split(","))
489 if "chunked" in encodings:
490 self.chunked = True
491
492 self._decoder: ContentDecoder | None = None
493 self.length_remaining: int | None
494
495 def get_redirect_location(self) -> str | None | typing.Literal[False]:
496 """
497 Should we redirect and where to?
498
499 :returns: Truthy redirect location string if we got a redirect status
500 code and valid location. ``None`` if redirect status and no
501 location. ``False`` if not a redirect status code.
502 """
503 if self.status in self.REDIRECT_STATUSES:
504 return self.headers.get("location")
505 return False
506
507 @property
508 def data(self) -> bytes:
509 raise NotImplementedError()
510
511 def json(self) -> typing.Any:
512 """
513 Deserializes the body of the HTTP response as a Python object.
514
515 The body of the HTTP response must be encoded using UTF-8, as per
516 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.
517
518 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to
519 your custom decoder instead.
520
521 If the body of the HTTP response is not decodable to UTF-8, a
522 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a
523 valid JSON document, a `json.JSONDecodeError` will be raised.
524
525 Read more :ref:`here <json_content>`.
526
527 :returns: The body of the HTTP response as a Python object.
528 """
529 data = self.data.decode("utf-8")
530 return _json.loads(data)
531
532 @property
533 def url(self) -> str | None:
534 raise NotImplementedError()
535
536 @url.setter
537 def url(self, url: str | None) -> None:
538 raise NotImplementedError()
539
540 @property
541 def connection(self) -> BaseHTTPConnection | None:
542 raise NotImplementedError()
543
544 @property
545 def retries(self) -> Retry | None:
546 return self._retries
547
548 @retries.setter
549 def retries(self, retries: Retry | None) -> None:
550 # Override the request_url if retries has a redirect location.
551 if retries is not None and retries.history:
552 self.url = retries.history[-1].redirect_location
553 self._retries = retries
554
555 def stream(
556 self, amt: int | None = 2**16, decode_content: bool | None = None
557 ) -> typing.Iterator[bytes]:
558 raise NotImplementedError()
559
560 def read(
561 self,
562 amt: int | None = None,
563 decode_content: bool | None = None,
564 cache_content: bool = False,
565 ) -> bytes:
566 raise NotImplementedError()
567
568 def read1(
569 self,
570 amt: int | None = None,
571 decode_content: bool | None = None,
572 ) -> bytes:
573 raise NotImplementedError()
574
575 def read_chunked(
576 self,
577 amt: int | None = None,
578 decode_content: bool | None = None,
579 ) -> typing.Iterator[bytes]:
580 raise NotImplementedError()
581
582 def release_conn(self) -> None:
583 raise NotImplementedError()
584
585 def drain_conn(self) -> None:
586 raise NotImplementedError()
587
588 def shutdown(self) -> None:
589 raise NotImplementedError()
590
591 def close(self) -> None:
592 raise NotImplementedError()
593
594 def _init_decoder(self) -> None:
595 """
596 Set-up the _decoder attribute if necessary.
597 """
598 # Note: content-encoding value should be case-insensitive, per RFC 7230
599 # Section 3.2
600 content_encoding = self.headers.get("content-encoding", "").lower()
601 if self._decoder is None:
602 if content_encoding in self.CONTENT_DECODERS:
603 self._decoder = _get_decoder(content_encoding)
604 elif "," in content_encoding:
605 encodings = [
606 e.strip()
607 for e in content_encoding.split(",")
608 if e.strip() in self.CONTENT_DECODERS
609 ]
610 if encodings:
611 self._decoder = _get_decoder(content_encoding)
612
613 def _decode(
614 self,
615 data: bytes,
616 decode_content: bool | None,
617 flush_decoder: bool,
618 max_length: int | None = None,
619 ) -> bytes:
620 """
621 Decode the data passed in and potentially flush the decoder.
622 """
623 if not decode_content:
624 if self._has_decoded_content:
625 raise RuntimeError(
626 "Calling read(decode_content=False) is not supported after "
627 "read(decode_content=True) was called."
628 )
629 return data
630
631 if max_length is None or flush_decoder:
632 max_length = -1
633
634 try:
635 if self._decoder:
636 data = self._decoder.decompress(data, max_length=max_length)
637 self._has_decoded_content = True
638 except self.DECODER_ERROR_CLASSES as e:
639 content_encoding = self.headers.get("content-encoding", "").lower()
640 raise DecodeError(
641 "Received response with content-encoding: %s, but "
642 "failed to decode it." % content_encoding,
643 e,
644 ) from e
645 if flush_decoder:
646 data += self._flush_decoder()
647
648 return data
649
650 def _flush_decoder(self) -> bytes:
651 """
652 Flushes the decoder. Should only be called if the decoder is actually
653 being used.
654 """
655 if self._decoder:
656 return self._decoder.decompress(b"") + self._decoder.flush()
657 return b""
658
659 # Compatibility methods for `io` module
660 def readinto(self, b: bytearray | memoryview[int]) -> int:
661 temp = self.read(len(b))
662 if len(temp) == 0:
663 return 0
664 else:
665 b[: len(temp)] = temp
666 return len(temp)
667
668 # Methods used by dependent libraries
669 def getheaders(self) -> HTTPHeaderDict:
670 return self.headers
671
672 def getheader(self, name: str, default: str | None = None) -> str | None:
673 return self.headers.get(name, default)
674
675 # Compatibility method for http.cookiejar
676 def info(self) -> HTTPHeaderDict:
677 return self.headers
678
679 def geturl(self) -> str | None:
680 return self.url
681
682
683class HTTPResponse(BaseHTTPResponse):
684 """
685 HTTP Response container.
686
687 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
688 loaded and decoded on-demand when the ``data`` property is accessed. This
689 class is also compatible with the Python standard library's :mod:`io`
690 module, and can hence be treated as a readable object in the context of that
691 framework.
692
693 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
694
695 :param preload_content:
696 If True, the response's body will be preloaded during construction.
697
698 :param decode_content:
699 If True, will attempt to decode the body based on the
700 'content-encoding' header.
701
702 :param original_response:
703 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
704 object, it's convenient to include the original for debug purposes. It's
705 otherwise unused.
706
707 :param retries:
708 The retries contains the last :class:`~urllib3.util.retry.Retry` that
709 was used during the request.
710
711 :param enforce_content_length:
712 Enforce content length checking. Body returned by server must match
713 value of Content-Length header, if present. Otherwise, raise error.
714 """
715
716 def __init__(
717 self,
718 body: _TYPE_BODY = "",
719 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
720 status: int = 0,
721 version: int = 0,
722 version_string: str = "HTTP/?",
723 reason: str | None = None,
724 preload_content: bool = True,
725 decode_content: bool = True,
726 original_response: _HttplibHTTPResponse | None = None,
727 pool: HTTPConnectionPool | None = None,
728 connection: HTTPConnection | None = None,
729 msg: _HttplibHTTPMessage | None = None,
730 retries: Retry | None = None,
731 enforce_content_length: bool = True,
732 request_method: str | None = None,
733 request_url: str | None = None,
734 auto_close: bool = True,
735 sock_shutdown: typing.Callable[[int], None] | None = None,
736 ) -> None:
737 super().__init__(
738 headers=headers,
739 status=status,
740 version=version,
741 version_string=version_string,
742 reason=reason,
743 decode_content=decode_content,
744 request_url=request_url,
745 retries=retries,
746 )
747
748 self.enforce_content_length = enforce_content_length
749 self.auto_close = auto_close
750
751 self._body = None
752 self._uncached_read_occurred = False
753 self._fp: _HttplibHTTPResponse | None = None
754 self._original_response = original_response
755 self._fp_bytes_read = 0
756 self.msg = msg
757
758 if body and isinstance(body, (str, bytes)):
759 self._body = body
760
761 self._pool = pool
762 self._connection = connection
763
764 if hasattr(body, "read"):
765 self._fp = body # type: ignore[assignment]
766 self._sock_shutdown = sock_shutdown
767
768 # Are we using the chunked-style of transfer encoding?
769 self.chunk_left: int | None = None
770
771 # Determine length of response
772 self.length_remaining = self._init_length(request_method)
773
774 # Used to return the correct amount of bytes for partial read()s
775 self._decoded_buffer = BytesQueueBuffer()
776
777 # If requested, preload the body.
778 if preload_content and not self._body:
779 self._body = self.read(decode_content=decode_content)
780
781 def release_conn(self) -> None:
782 if not self._pool or not self._connection:
783 return None
784
785 self._pool._put_conn(self._connection)
786 self._connection = None
787
788 def drain_conn(self) -> None:
789 """
790 Read and discard any remaining HTTP response data in the response connection.
791
792 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
793 """
794 try:
795 self._raw_read()
796 except (HTTPError, OSError, BaseSSLError, HTTPException):
797 pass
798 if self._has_decoded_content:
799 # `_raw_read` skips decompression, so we should clean up the
800 # decoder to avoid keeping unnecessary data in memory.
801 self._decoded_buffer = BytesQueueBuffer()
802 self._decoder = None
803
804 @property
805 def data(self) -> bytes:
806 # For backwards-compat with earlier urllib3 0.4 and earlier.
807 if self._body:
808 return self._body # type: ignore[return-value]
809
810 if self._fp:
811 return self.read(cache_content=True)
812
813 return None # type: ignore[return-value]
814
815 @property
816 def connection(self) -> HTTPConnection | None:
817 return self._connection
818
819 def isclosed(self) -> bool:
820 return is_fp_closed(self._fp)
821
822 def tell(self) -> int:
823 """
824 Obtain the number of bytes pulled over the wire so far. May differ from
825 the amount of content returned by :meth:`HTTPResponse.read`
826 if bytes are encoded on the wire (e.g, compressed).
827 """
828 return self._fp_bytes_read
829
830 def _init_length(self, request_method: str | None) -> int | None:
831 """
832 Set initial length value for Response content if available.
833 """
834 length: int | None
835 content_length: str | None = self.headers.get("content-length")
836
837 if content_length is not None:
838 if self.chunked:
839 # This Response will fail with an IncompleteRead if it can't be
840 # received as chunked. This method falls back to attempt reading
841 # the response before raising an exception.
842 log.warning(
843 "Received response with both Content-Length and "
844 "Transfer-Encoding set. This is expressly forbidden "
845 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
846 "attempting to process response as Transfer-Encoding: "
847 "chunked."
848 )
849 return None
850
851 try:
852 # RFC 7230 section 3.3.2 specifies multiple content lengths can
853 # be sent in a single Content-Length header
854 # (e.g. Content-Length: 42, 42). This line ensures the values
855 # are all valid ints and that as long as the `set` length is 1,
856 # all values are the same. Otherwise, the header is invalid.
857 lengths = {int(val) for val in content_length.split(",")}
858 if len(lengths) > 1:
859 raise InvalidHeader(
860 "Content-Length contained multiple "
861 "unmatching values (%s)" % content_length
862 )
863 length = lengths.pop()
864 except ValueError:
865 length = None
866 else:
867 if length < 0:
868 length = None
869
870 else: # if content_length is None
871 length = None
872
873 # Convert status to int for comparison
874 # In some cases, httplib returns a status of "_UNKNOWN"
875 try:
876 status = int(self.status)
877 except ValueError:
878 status = 0
879
880 # Check for responses that shouldn't include a body
881 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
882 length = 0
883
884 return length
885
886 @contextmanager
887 def _error_catcher(self) -> typing.Generator[None]:
888 """
889 Catch low-level python exceptions, instead re-raising urllib3
890 variants, so that low-level exceptions are not leaked in the
891 high-level api.
892
893 On exit, release the connection back to the pool.
894 """
895 clean_exit = False
896
897 try:
898 try:
899 yield
900
901 except SocketTimeout as e:
902 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
903 # there is yet no clean way to get at it from this context.
904 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
905
906 except BaseSSLError as e:
907 # SSL errors related to framing/MAC get wrapped and reraised here
908 raise SSLError(e) from e
909
910 except IncompleteRead as e:
911 if (
912 e.expected is not None
913 and e.partial is not None
914 and e.expected == -e.partial
915 ):
916 arg = "Response may not contain content."
917 else:
918 arg = f"Connection broken: {e!r}"
919 raise ProtocolError(arg, e) from e
920
921 except (HTTPException, OSError) as e:
922 raise ProtocolError(f"Connection broken: {e!r}", e) from e
923
924 # If no exception is thrown, we should avoid cleaning up
925 # unnecessarily.
926 clean_exit = True
927 finally:
928 # If we didn't terminate cleanly, we need to throw away our
929 # connection.
930 if not clean_exit:
931 # The response may not be closed but we're not going to use it
932 # anymore so close it now to ensure that the connection is
933 # released back to the pool.
934 if self._original_response:
935 self._original_response.close()
936
937 # Closing the response may not actually be sufficient to close
938 # everything, so if we have a hold of the connection close that
939 # too.
940 if self._connection:
941 self._connection.close()
942
943 # If we hold the original response but it's closed now, we should
944 # return the connection back to the pool.
945 if self._original_response and self._original_response.isclosed():
946 self.release_conn()
947
948 def _fp_read(
949 self,
950 amt: int | None = None,
951 *,
952 read1: bool = False,
953 ) -> bytes:
954 """
955 Read a response with the thought that reading the number of bytes
956 larger than can fit in a 32-bit int at a time via SSL in some
957 known cases leads to an overflow error that has to be prevented
958 if `amt` or `self.length_remaining` indicate that a problem may
959 happen.
960
961 This happens to urllib3 injected with pyOpenSSL-backed SSL-support.
962 """
963 assert self._fp
964 c_int_max = 2**31 - 1
965 if (
966 (amt and amt > c_int_max)
967 or (
968 amt is None
969 and self.length_remaining
970 and self.length_remaining > c_int_max
971 )
972 ) and util.IS_PYOPENSSL:
973 if read1:
974 return self._fp.read1(c_int_max)
975 buffer = io.BytesIO()
976 # Besides `max_chunk_amt` being a maximum chunk size, it
977 # affects memory overhead of reading a response by this
978 # method in CPython.
979 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
980 # chunk size that does not lead to an overflow error, but
981 # 256 MiB is a compromise.
982 max_chunk_amt = 2**28
983 while amt is None or amt != 0:
984 if amt is not None:
985 chunk_amt = min(amt, max_chunk_amt)
986 amt -= chunk_amt
987 else:
988 chunk_amt = max_chunk_amt
989 data = self._fp.read(chunk_amt)
990 if not data:
991 break
992 buffer.write(data)
993 del data # to reduce peak memory usage by `max_chunk_amt`.
994 return buffer.getvalue()
995 elif read1:
996 return self._fp.read1(amt) if amt is not None else self._fp.read1()
997 else:
998 # StringIO doesn't like amt=None
999 return self._fp.read(amt) if amt is not None else self._fp.read()
1000
1001 def _raw_read(
1002 self,
1003 amt: int | None = None,
1004 *,
1005 read1: bool = False,
1006 ) -> bytes:
1007 """
1008 Reads `amt` of bytes from the socket.
1009 """
1010 if self._fp is None:
1011 return None # type: ignore[return-value]
1012
1013 fp_closed = getattr(self._fp, "closed", False)
1014
1015 with self._error_catcher():
1016 data = self._fp_read(amt, read1=read1) if not fp_closed else b""
1017 if amt is not None and amt != 0 and not data:
1018 # Platform-specific: Buggy versions of Python.
1019 # Close the connection when no data is returned
1020 #
1021 # This is redundant to what httplib/http.client _should_
1022 # already do. However, versions of python released before
1023 # December 15, 2012 (http://bugs.python.org/issue16298) do
1024 # not properly close the connection in all cases. There is
1025 # no harm in redundantly calling close.
1026 self._fp.close()
1027 if (
1028 self.enforce_content_length
1029 and self.length_remaining is not None
1030 and self.length_remaining != 0
1031 ):
1032 # This is an edge case that httplib failed to cover due
1033 # to concerns of backward compatibility. We're
1034 # addressing it here to make sure IncompleteRead is
1035 # raised during streaming, so all calls with incorrect
1036 # Content-Length are caught.
1037 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
1038 elif read1 and (
1039 (amt != 0 and not data) or self.length_remaining == len(data)
1040 ):
1041 # All data has been read, but `self._fp.read1` in
1042 # CPython 3.12 and older doesn't always close
1043 # `http.client.HTTPResponse`, so we close it here.
1044 # See https://github.com/python/cpython/issues/113199
1045 self._fp.close()
1046
1047 if data:
1048 self._fp_bytes_read += len(data)
1049 if self.length_remaining is not None:
1050 self.length_remaining -= len(data)
1051 return data
1052
1053 def read(
1054 self,
1055 amt: int | None = None,
1056 decode_content: bool | None = None,
1057 cache_content: bool = False,
1058 ) -> bytes:
1059 """
1060 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
1061 parameters: ``decode_content`` and ``cache_content``.
1062
1063 :param amt:
1064 How much of the content to read. If specified, caching is skipped
1065 because it doesn't make sense to cache partial content as the full
1066 response.
1067
1068 :param decode_content:
1069 If True, will attempt to decode the body based on the
1070 'content-encoding' header.
1071
1072 :param cache_content:
1073 If True, will save the returned data such that the same result is
1074 returned despite of the state of the underlying file object. This
1075 is useful if you want the ``.data`` property to continue working
1076 after having ``.read()`` the file object. (Overridden if ``amt`` is
1077 set.)
1078 """
1079 self._init_decoder()
1080 if decode_content is None:
1081 decode_content = self.decode_content
1082
1083 if amt and amt < 0:
1084 # Negative numbers and `None` should be treated the same.
1085 amt = None
1086 elif amt is not None:
1087 cache_content = False
1088
1089 if (
1090 self._decoder
1091 and self._decoder.has_unconsumed_tail
1092 and len(self._decoded_buffer) < amt
1093 ):
1094 decoded_data = self._decode(
1095 b"",
1096 decode_content,
1097 flush_decoder=False,
1098 max_length=amt - len(self._decoded_buffer),
1099 )
1100 self._decoded_buffer.put(decoded_data)
1101 if len(self._decoded_buffer) >= amt:
1102 return self._decoded_buffer.get(amt)
1103
1104 data = self._raw_read(amt)
1105 if not cache_content:
1106 self._uncached_read_occurred = True
1107
1108 flush_decoder = amt is None or (amt != 0 and not data)
1109
1110 if (
1111 not data
1112 and len(self._decoded_buffer) == 0
1113 and not (self._decoder and self._decoder.has_unconsumed_tail)
1114 ):
1115 return data
1116
1117 if amt is None:
1118 data = self._decode(data, decode_content, flush_decoder)
1119 # It's possible that there is buffered decoded data after a
1120 # partial read.
1121 if decode_content and len(self._decoded_buffer) > 0:
1122 self._decoded_buffer.put(data)
1123 data = self._decoded_buffer.get_all()
1124
1125 if cache_content and not self._uncached_read_occurred:
1126 self._body = data
1127 else:
1128 # do not waste memory on buffer when not decoding
1129 if not decode_content:
1130 if self._has_decoded_content:
1131 raise RuntimeError(
1132 "Calling read(decode_content=False) is not supported after "
1133 "read(decode_content=True) was called."
1134 )
1135 return data
1136
1137 decoded_data = self._decode(
1138 data,
1139 decode_content,
1140 flush_decoder,
1141 max_length=amt - len(self._decoded_buffer),
1142 )
1143 self._decoded_buffer.put(decoded_data)
1144
1145 while len(self._decoded_buffer) < amt and data:
1146 # TODO make sure to initially read enough data to get past the headers
1147 # For example, the GZ file header takes 10 bytes, we don't want to read
1148 # it one byte at a time
1149 data = self._raw_read(amt)
1150 decoded_data = self._decode(
1151 data,
1152 decode_content,
1153 flush_decoder,
1154 max_length=amt - len(self._decoded_buffer),
1155 )
1156 self._decoded_buffer.put(decoded_data)
1157 data = self._decoded_buffer.get(amt)
1158
1159 return data
1160
1161 def read1(
1162 self,
1163 amt: int | None = None,
1164 decode_content: bool | None = None,
1165 ) -> bytes:
1166 """
1167 Similar to ``http.client.HTTPResponse.read1`` and documented
1168 in :meth:`io.BufferedReader.read1`, but with an additional parameter:
1169 ``decode_content``.
1170
1171 :param amt:
1172 How much of the content to read.
1173
1174 :param decode_content:
1175 If True, will attempt to decode the body based on the
1176 'content-encoding' header.
1177 """
1178 if decode_content is None:
1179 decode_content = self.decode_content
1180 if amt and amt < 0:
1181 # Negative numbers and `None` should be treated the same.
1182 amt = None
1183 # try and respond without going to the network
1184 if self._has_decoded_content:
1185 if not decode_content:
1186 raise RuntimeError(
1187 "Calling read1(decode_content=False) is not supported after "
1188 "read1(decode_content=True) was called."
1189 )
1190 if (
1191 self._decoder
1192 and self._decoder.has_unconsumed_tail
1193 and (amt is None or len(self._decoded_buffer) < amt)
1194 ):
1195 decoded_data = self._decode(
1196 b"",
1197 decode_content,
1198 flush_decoder=False,
1199 max_length=(
1200 amt - len(self._decoded_buffer) if amt is not None else None
1201 ),
1202 )
1203 self._decoded_buffer.put(decoded_data)
1204 if len(self._decoded_buffer) > 0:
1205 if amt is None:
1206 return self._decoded_buffer.get_all()
1207 return self._decoded_buffer.get(amt)
1208 if amt == 0:
1209 return b""
1210
1211 # FIXME, this method's type doesn't say returning None is possible
1212 data = self._raw_read(amt, read1=True)
1213 self._uncached_read_occurred = True
1214 if not decode_content or data is None:
1215 return data
1216
1217 self._init_decoder()
1218 while True:
1219 flush_decoder = not data
1220 decoded_data = self._decode(
1221 data, decode_content, flush_decoder, max_length=amt
1222 )
1223 self._decoded_buffer.put(decoded_data)
1224 if decoded_data or flush_decoder:
1225 break
1226 data = self._raw_read(8192, read1=True)
1227
1228 if amt is None:
1229 return self._decoded_buffer.get_all()
1230 return self._decoded_buffer.get(amt)
1231
1232 def stream(
1233 self, amt: int | None = 2**16, decode_content: bool | None = None
1234 ) -> typing.Generator[bytes]:
1235 """
1236 A generator wrapper for the read() method. A call will block until
1237 ``amt`` bytes have been read from the connection or until the
1238 connection is closed.
1239
1240 :param amt:
1241 How much of the content to read. The generator will return up to
1242 much data per iteration, but may return less. This is particularly
1243 likely when using compressed data. However, the empty string will
1244 never be returned.
1245
1246 :param decode_content:
1247 If True, will attempt to decode the body based on the
1248 'content-encoding' header.
1249 """
1250 if amt == 0:
1251 return
1252
1253 if self.chunked and self.supports_chunked_reads():
1254 yield from self.read_chunked(amt, decode_content=decode_content)
1255 else:
1256 while (
1257 not is_fp_closed(self._fp)
1258 or len(self._decoded_buffer) > 0
1259 or (self._decoder and self._decoder.has_unconsumed_tail)
1260 ):
1261 data = self.read(amt=amt, decode_content=decode_content)
1262
1263 if data:
1264 yield data
1265
1266 # Overrides from io.IOBase
1267 def readable(self) -> bool:
1268 return True
1269
1270 def shutdown(self) -> None:
1271 if not self._sock_shutdown:
1272 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")
1273 if self._connection is None:
1274 raise RuntimeError(
1275 "Cannot shutdown as connection has already been released to the pool"
1276 )
1277 self._sock_shutdown(socket.SHUT_RD)
1278
1279 def close(self) -> None:
1280 self._sock_shutdown = None
1281
1282 if not self.closed and self._fp:
1283 self._fp.close()
1284
1285 if self._connection:
1286 self._connection.close()
1287
1288 if not self.auto_close:
1289 io.IOBase.close(self)
1290
1291 @property
1292 def closed(self) -> bool:
1293 if not self.auto_close:
1294 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
1295 elif self._fp is None:
1296 return True
1297 elif hasattr(self._fp, "isclosed"):
1298 return self._fp.isclosed()
1299 elif hasattr(self._fp, "closed"):
1300 return self._fp.closed
1301 else:
1302 return True
1303
1304 def fileno(self) -> int:
1305 if self._fp is None:
1306 raise OSError("HTTPResponse has no file to get a fileno from")
1307 elif hasattr(self._fp, "fileno"):
1308 return self._fp.fileno()
1309 else:
1310 raise OSError(
1311 "The file-like object this HTTPResponse is wrapped "
1312 "around has no file descriptor"
1313 )
1314
1315 def flush(self) -> None:
1316 if (
1317 self._fp is not None
1318 and hasattr(self._fp, "flush")
1319 and not getattr(self._fp, "closed", False)
1320 ):
1321 return self._fp.flush()
1322
1323 def supports_chunked_reads(self) -> bool:
1324 """
1325 Checks if the underlying file-like object looks like a
1326 :class:`http.client.HTTPResponse` object. We do this by testing for
1327 the fp attribute. If it is present we assume it returns raw chunks as
1328 processed by read_chunked().
1329 """
1330 return hasattr(self._fp, "fp")
1331
1332 def _update_chunk_length(self) -> None:
1333 # First, we'll figure out length of a chunk and then
1334 # we'll try to read it from socket.
1335 if self.chunk_left is not None:
1336 return None
1337 line = self._fp.fp.readline() # type: ignore[union-attr]
1338 line = line.split(b";", 1)[0]
1339 try:
1340 self.chunk_left = int(line, 16)
1341 except ValueError:
1342 self.close()
1343 if line:
1344 # Invalid chunked protocol response, abort.
1345 raise InvalidChunkLength(self, line) from None
1346 else:
1347 # Truncated at start of next chunk
1348 raise ProtocolError("Response ended prematurely") from None
1349
1350 def _handle_chunk(self, amt: int | None) -> bytes:
1351 returned_chunk = None
1352 if amt is None:
1353 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1354 returned_chunk = chunk
1355 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1356 self.chunk_left = None
1357 elif self.chunk_left is not None and amt < self.chunk_left:
1358 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1359 self.chunk_left = self.chunk_left - amt
1360 returned_chunk = value
1361 elif amt == self.chunk_left:
1362 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1363 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1364 self.chunk_left = None
1365 returned_chunk = value
1366 else: # amt > self.chunk_left
1367 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1368 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1369 self.chunk_left = None
1370 return returned_chunk # type: ignore[no-any-return]
1371
1372 def read_chunked(
1373 self, amt: int | None = None, decode_content: bool | None = None
1374 ) -> typing.Generator[bytes]:
1375 """
1376 Similar to :meth:`HTTPResponse.read`, but with an additional
1377 parameter: ``decode_content``.
1378
1379 :param amt:
1380 How much of the content to read. If specified, caching is skipped
1381 because it doesn't make sense to cache partial content as the full
1382 response.
1383
1384 :param decode_content:
1385 If True, will attempt to decode the body based on the
1386 'content-encoding' header.
1387 """
1388 self._init_decoder()
1389 # FIXME: Rewrite this method and make it a class with a better structured logic.
1390 if not self.chunked:
1391 raise ResponseNotChunked(
1392 "Response is not chunked. "
1393 "Header 'transfer-encoding: chunked' is missing."
1394 )
1395 if not self.supports_chunked_reads():
1396 raise BodyNotHttplibCompatible(
1397 "Body should be http.client.HTTPResponse like. "
1398 "It should have have an fp attribute which returns raw chunks."
1399 )
1400
1401 with self._error_catcher():
1402 # Don't bother reading the body of a HEAD request.
1403 if self._original_response and is_response_to_head(self._original_response):
1404 self._original_response.close()
1405 return None
1406
1407 # If a response is already read and closed
1408 # then return immediately.
1409 if self._fp.fp is None: # type: ignore[union-attr]
1410 return None
1411
1412 if amt == 0:
1413 return
1414 elif amt and amt < 0:
1415 # Negative numbers and `None` should be treated the same,
1416 # but httplib handles only `None` correctly.
1417 amt = None
1418
1419 while True:
1420 # First, check if any data is left in the decoder's buffer.
1421 if self._decoder and self._decoder.has_unconsumed_tail:
1422 chunk = b""
1423 else:
1424 self._update_chunk_length()
1425 self._uncached_read_occurred = True
1426 if self.chunk_left == 0:
1427 break
1428 chunk = self._handle_chunk(amt)
1429 decoded = self._decode(
1430 chunk,
1431 decode_content=decode_content,
1432 flush_decoder=False,
1433 max_length=amt,
1434 )
1435 if decoded:
1436 yield decoded
1437
1438 if decode_content:
1439 # On CPython and PyPy, we should never need to flush the
1440 # decoder. However, on Jython we *might* need to, so
1441 # lets defensively do it anyway.
1442 decoded = self._flush_decoder()
1443 if decoded: # Platform-specific: Jython.
1444 yield decoded
1445
1446 # Chunk content ends with \r\n: discard it.
1447 while self._fp is not None:
1448 line = self._fp.fp.readline()
1449 if not line:
1450 # Some sites may not end with '\r\n'.
1451 break
1452 if line == b"\r\n":
1453 break
1454
1455 # We read everything; close the "file".
1456 if self._original_response:
1457 self._original_response.close()
1458
1459 @property
1460 def url(self) -> str | None:
1461 """
1462 Returns the URL that was the source of this response.
1463 If the request that generated this response redirected, this method
1464 will return the final redirect location.
1465 """
1466 return self._request_url
1467
1468 @url.setter
1469 def url(self, url: str | None) -> None:
1470 self._request_url = url
1471
1472 def __iter__(self) -> typing.Iterator[bytes]:
1473 buffer: list[bytes] = []
1474 for chunk in self.stream(decode_content=True):
1475 if b"\n" in chunk:
1476 chunks = chunk.split(b"\n")
1477 yield b"".join(buffer) + chunks[0] + b"\n"
1478 for x in chunks[1:-1]:
1479 yield x + b"\n"
1480 if chunks[-1]:
1481 buffer = [chunks[-1]]
1482 else:
1483 buffer = []
1484 else:
1485 buffer.append(chunk)
1486 if buffer:
1487 yield b"".join(buffer)