1from __future__ import annotations
2
3import collections
4import io
5import json as _json
6import logging
7import socket
8import sys
9import typing
10import warnings
11import zlib
12from contextlib import contextmanager
13from http.client import HTTPMessage as _HttplibHTTPMessage
14from http.client import HTTPResponse as _HttplibHTTPResponse
15from socket import timeout as SocketTimeout
16
17if typing.TYPE_CHECKING:
18 from ._base_connection import BaseHTTPConnection
19
20brotli = None
21
22from . import util
23from ._base_connection import _TYPE_BODY
24from ._collections import HTTPHeaderDict
25from .connection import BaseSSLError, HTTPConnection, HTTPException
26from .exceptions import (
27 BodyNotHttplibCompatible,
28 DecodeError,
29 DependencyWarning,
30 HTTPError,
31 IncompleteRead,
32 InvalidChunkLength,
33 InvalidHeader,
34 ProtocolError,
35 ReadTimeoutError,
36 ResponseNotChunked,
37 SSLError,
38)
39from .util.response import is_fp_closed, is_response_to_head
40from .util.retry import Retry
41
42if typing.TYPE_CHECKING:
43 from .connectionpool import HTTPConnectionPool
44
45log = logging.getLogger(__name__)
46
47
48class ContentDecoder:
49 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
50 raise NotImplementedError()
51
52 @property
53 def has_unconsumed_tail(self) -> bool:
54 raise NotImplementedError()
55
56 def flush(self) -> bytes:
57 raise NotImplementedError()
58
59
60class DeflateDecoder(ContentDecoder):
61 def __init__(self) -> None:
62 self._first_try = True
63 self._first_try_data = b""
64 self._unfed_data = b""
65 self._obj = zlib.decompressobj()
66
67 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
68 data = self._unfed_data + data
69 self._unfed_data = b""
70 if not data and not self._obj.unconsumed_tail:
71 return data
72 original_max_length = max_length
73 if original_max_length < 0:
74 max_length = 0
75 elif original_max_length == 0:
76 # We should not pass 0 to the zlib decompressor because 0 is
77 # the default value that will make zlib decompress without a
78 # length limit.
79 # Data should be stored for subsequent calls.
80 self._unfed_data = data
81 return b""
82
83 # Subsequent calls always reuse `self._obj`. zlib requires
84 # passing the unconsumed tail if decompression is to continue.
85 if not self._first_try:
86 return self._obj.decompress(
87 self._obj.unconsumed_tail + data, max_length=max_length
88 )
89
90 # First call tries with RFC 1950 ZLIB format.
91 self._first_try_data += data
92 try:
93 decompressed = self._obj.decompress(data, max_length=max_length)
94 if decompressed:
95 self._first_try = False
96 self._first_try_data = b""
97 return decompressed
98 # On failure, it falls back to RFC 1951 DEFLATE format.
99 except zlib.error:
100 self._first_try = False
101 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
102 try:
103 return self.decompress(
104 self._first_try_data, max_length=original_max_length
105 )
106 finally:
107 self._first_try_data = b""
108
109 @property
110 def has_unconsumed_tail(self) -> bool:
111 return bool(self._unfed_data) or (
112 bool(self._obj.unconsumed_tail) and not self._first_try
113 )
114
115 def flush(self) -> bytes:
116 return self._obj.flush()
117
118
119class GzipDecoderState:
120 FIRST_MEMBER = 0
121 OTHER_MEMBERS = 1
122 SWALLOW_DATA = 2
123
124
125class GzipDecoder(ContentDecoder):
126 def __init__(self) -> None:
127 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
128 self._state = GzipDecoderState.FIRST_MEMBER
129 self._unconsumed_tail = b""
130
131 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
132 ret = bytearray()
133 if self._state == GzipDecoderState.SWALLOW_DATA:
134 return bytes(ret)
135
136 if max_length == 0:
137 # We should not pass 0 to the zlib decompressor because 0 is
138 # the default value that will make zlib decompress without a
139 # length limit.
140 # Data should be stored for subsequent calls.
141 self._unconsumed_tail += data
142 return b""
143
144 # zlib requires passing the unconsumed tail to the subsequent
145 # call if decompression is to continue.
146 data = self._unconsumed_tail + data
147 if not data and self._obj.eof:
148 return bytes(ret)
149
150 while True:
151 try:
152 ret += self._obj.decompress(
153 data, max_length=max(max_length - len(ret), 0)
154 )
155 except zlib.error:
156 previous_state = self._state
157 # Ignore data after the first error
158 self._state = GzipDecoderState.SWALLOW_DATA
159 self._unconsumed_tail = b""
160 if previous_state == GzipDecoderState.OTHER_MEMBERS:
161 # Allow trailing garbage acceptable in other gzip clients
162 return bytes(ret)
163 raise
164
165 self._unconsumed_tail = data = (
166 self._obj.unconsumed_tail or self._obj.unused_data
167 )
168 if max_length > 0 and len(ret) >= max_length:
169 break
170
171 if not data:
172 return bytes(ret)
173 # When the end of a gzip member is reached, a new decompressor
174 # must be created for unused (possibly future) data.
175 if self._obj.eof:
176 self._state = GzipDecoderState.OTHER_MEMBERS
177 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
178
179 return bytes(ret)
180
181 @property
182 def has_unconsumed_tail(self) -> bool:
183 return bool(self._unconsumed_tail)
184
185 def flush(self) -> bytes:
186 return self._obj.flush()
187
188
189if brotli is not None:
190
191 class BrotliDecoder(ContentDecoder):
192 # Supports both 'brotlipy' and 'Brotli' packages
193 # since they share an import name. The top branches
194 # are for 'brotlipy' and bottom branches for 'Brotli'
195 def __init__(self) -> None:
196 self._obj = brotli.Decompressor()
197 if hasattr(self._obj, "decompress"):
198 setattr(self, "_decompress", self._obj.decompress)
199 else:
200 setattr(self, "_decompress", self._obj.process)
201
202 # Requires Brotli >= 1.2.0 for `output_buffer_limit`.
203 def _decompress(self, data: bytes, output_buffer_limit: int = -1) -> bytes:
204 raise NotImplementedError()
205
206 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
207 try:
208 if max_length > 0:
209 return self._decompress(data, output_buffer_limit=max_length)
210 else:
211 return self._decompress(data)
212 except TypeError:
213 # Fallback for Brotli/brotlicffi/brotlipy versions without
214 # the `output_buffer_limit` parameter.
215 warnings.warn(
216 "Brotli >= 1.2.0 is required to prevent decompression bombs.",
217 DependencyWarning,
218 )
219 return self._decompress(data)
220
221 @property
222 def has_unconsumed_tail(self) -> bool:
223 try:
224 return not self._obj.can_accept_more_data()
225 except AttributeError:
226 return False
227
228 def flush(self) -> bytes:
229 if hasattr(self._obj, "flush"):
230 return self._obj.flush() # type: ignore[no-any-return]
231 return b""
232
233
234try:
235 if sys.version_info >= (3, 14):
236 from compression import zstd
237 else:
238 from backports import zstd
239except ImportError:
240 HAS_ZSTD = False
241else:
242 HAS_ZSTD = True
243
244 class ZstdDecoder(ContentDecoder):
245 def __init__(self) -> None:
246 self._obj = zstd.ZstdDecompressor()
247
248 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
249 if not data and not self.has_unconsumed_tail:
250 return b""
251 if self._obj.eof:
252 data = self._obj.unused_data + data
253 self._obj = zstd.ZstdDecompressor()
254 part = self._obj.decompress(data, max_length=max_length)
255 length = len(part)
256 data_parts = [part]
257 # Every loop iteration is supposed to read data from a separate frame.
258 # The loop breaks when:
259 # - enough data is read;
260 # - no more unused data is available;
261 # - end of the last read frame has not been reached (i.e.,
262 # more data has to be fed).
263 while (
264 self._obj.eof
265 and self._obj.unused_data
266 and (max_length < 0 or length < max_length)
267 ):
268 unused_data = self._obj.unused_data
269 if not self._obj.needs_input:
270 self._obj = zstd.ZstdDecompressor()
271 part = self._obj.decompress(
272 unused_data,
273 max_length=(max_length - length) if max_length > 0 else -1,
274 )
275 if part_length := len(part):
276 data_parts.append(part)
277 length += part_length
278 elif self._obj.needs_input:
279 break
280 return b"".join(data_parts)
281
282 @property
283 def has_unconsumed_tail(self) -> bool:
284 return not (self._obj.needs_input or self._obj.eof) or bool(
285 self._obj.unused_data
286 )
287
288 def flush(self) -> bytes:
289 if not self._obj.eof:
290 raise DecodeError("Zstandard data is incomplete")
291 return b""
292
293
294class MultiDecoder(ContentDecoder):
295 """
296 From RFC7231:
297 If one or more encodings have been applied to a representation, the
298 sender that applied the encodings MUST generate a Content-Encoding
299 header field that lists the content codings in the order in which
300 they were applied.
301 """
302
303 # Maximum allowed number of chained HTTP encodings in the
304 # Content-Encoding header.
305 max_decode_links = 5
306
307 def __init__(self, modes: str) -> None:
308 encodings = [m.strip() for m in modes.split(",")]
309 if len(encodings) > self.max_decode_links:
310 raise DecodeError(
311 "Too many content encodings in the chain: "
312 f"{len(encodings)} > {self.max_decode_links}"
313 )
314 self._decoders = [_get_decoder(e) for e in encodings]
315
316 def flush(self) -> bytes:
317 return self._decoders[0].flush()
318
319 def decompress(self, data: bytes, max_length: int = -1) -> bytes:
320 if max_length <= 0:
321 for d in reversed(self._decoders):
322 data = d.decompress(data)
323 return data
324
325 ret = bytearray()
326 # Every while loop iteration goes through all decoders once.
327 # It exits when enough data is read or no more data can be read.
328 # It is possible that the while loop iteration does not produce
329 # any data because we retrieve up to `max_length` from every
330 # decoder, and the amount of bytes may be insufficient for the
331 # next decoder to produce enough/any output.
332 while True:
333 any_data = False
334 for d in reversed(self._decoders):
335 data = d.decompress(data, max_length=max_length - len(ret))
336 if data:
337 any_data = True
338 # We should not break when no data is returned because
339 # next decoders may produce data even with empty input.
340 ret += data
341 if not any_data or len(ret) >= max_length:
342 return bytes(ret)
343 data = b""
344
345 @property
346 def has_unconsumed_tail(self) -> bool:
347 return any(d.has_unconsumed_tail for d in self._decoders)
348
349
350def _get_decoder(mode: str) -> ContentDecoder:
351 if "," in mode:
352 return MultiDecoder(mode)
353
354 # According to RFC 9110 section 8.4.1.3, recipients should
355 # consider x-gzip equivalent to gzip
356 if mode in ("gzip", "x-gzip"):
357 return GzipDecoder()
358
359 if brotli is not None and mode == "br":
360 return BrotliDecoder()
361
362 if HAS_ZSTD and mode == "zstd":
363 return ZstdDecoder()
364
365 return DeflateDecoder()
366
367
368class BytesQueueBuffer:
369 """Memory-efficient bytes buffer
370
371 To return decoded data in read() and still follow the BufferedIOBase API, we need a
372 buffer to always return the correct amount of bytes.
373
374 This buffer should be filled using calls to put()
375
376 Our maximum memory usage is determined by the sum of the size of:
377
378 * self.buffer, which contains the full data
379 * the largest chunk that we will copy in get()
380 """
381
382 def __init__(self) -> None:
383 self.buffer: typing.Deque[bytes | memoryview[bytes]] = collections.deque()
384 self._size: int = 0
385
386 def __len__(self) -> int:
387 return self._size
388
389 def put(self, data: bytes) -> None:
390 self.buffer.append(data)
391 self._size += len(data)
392
393 def get(self, n: int) -> bytes:
394 if n == 0:
395 return b""
396 elif not self.buffer:
397 raise RuntimeError("buffer is empty")
398 elif n < 0:
399 raise ValueError("n should be > 0")
400
401 if len(self.buffer[0]) == n and isinstance(self.buffer[0], bytes):
402 self._size -= n
403 return self.buffer.popleft()
404
405 fetched = 0
406 ret = io.BytesIO()
407 while fetched < n:
408 remaining = n - fetched
409 chunk = self.buffer.popleft()
410 chunk_length = len(chunk)
411 if remaining < chunk_length:
412 chunk = memoryview(chunk)
413 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
414 ret.write(left_chunk)
415 self.buffer.appendleft(right_chunk)
416 self._size -= remaining
417 break
418 else:
419 ret.write(chunk)
420 self._size -= chunk_length
421 fetched += chunk_length
422
423 if not self.buffer:
424 break
425
426 return ret.getvalue()
427
428 def get_all(self) -> bytes:
429 buffer = self.buffer
430 if not buffer:
431 assert self._size == 0
432 return b""
433 if len(buffer) == 1:
434 result = buffer.pop()
435 if isinstance(result, memoryview):
436 result = result.tobytes()
437 else:
438 ret = io.BytesIO()
439 ret.writelines(buffer.popleft() for _ in range(len(buffer)))
440 result = ret.getvalue()
441 self._size = 0
442 return result
443
444
445class BaseHTTPResponse(io.IOBase):
446 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]
447 if brotli is not None:
448 CONTENT_DECODERS += ["br"]
449 if HAS_ZSTD:
450 CONTENT_DECODERS += ["zstd"]
451 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
452
453 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
454 if brotli is not None:
455 DECODER_ERROR_CLASSES += (brotli.error,)
456
457 if HAS_ZSTD:
458 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
459
460 def __init__(
461 self,
462 *,
463 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
464 status: int,
465 version: int,
466 version_string: str,
467 reason: str | None,
468 decode_content: bool,
469 request_url: str | None,
470 retries: Retry | None = None,
471 ) -> None:
472 if isinstance(headers, HTTPHeaderDict):
473 self.headers = headers
474 else:
475 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
476 self.status = status
477 self.version = version
478 self.version_string = version_string
479 self.reason = reason
480 self.decode_content = decode_content
481 self._has_decoded_content = False
482 self._request_url: str | None = request_url
483 self.retries = retries
484
485 self.chunked = False
486 tr_enc = self.headers.get("transfer-encoding", "").lower()
487 # Don't incur the penalty of creating a list and then discarding it
488 encodings = (enc.strip() for enc in tr_enc.split(","))
489 if "chunked" in encodings:
490 self.chunked = True
491
492 self._decoder: ContentDecoder | None = None
493 self.length_remaining: int | None
494
495 def get_redirect_location(self) -> str | None | typing.Literal[False]:
496 """
497 Should we redirect and where to?
498
499 :returns: Truthy redirect location string if we got a redirect status
500 code and valid location. ``None`` if redirect status and no
501 location. ``False`` if not a redirect status code.
502 """
503 if self.status in self.REDIRECT_STATUSES:
504 return self.headers.get("location")
505 return False
506
507 @property
508 def data(self) -> bytes:
509 raise NotImplementedError()
510
511 def json(self) -> typing.Any:
512 """
513 Deserializes the body of the HTTP response as a Python object.
514
515 The body of the HTTP response must be encoded using UTF-8, as per
516 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.
517
518 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to
519 your custom decoder instead.
520
521 If the body of the HTTP response is not decodable to UTF-8, a
522 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a
523 valid JSON document, a `json.JSONDecodeError` will be raised.
524
525 Read more :ref:`here <json_content>`.
526
527 :returns: The body of the HTTP response as a Python object.
528 """
529 data = self.data.decode("utf-8")
530 return _json.loads(data)
531
532 @property
533 def url(self) -> str | None:
534 raise NotImplementedError()
535
536 @url.setter
537 def url(self, url: str | None) -> None:
538 raise NotImplementedError()
539
540 @property
541 def connection(self) -> BaseHTTPConnection | None:
542 raise NotImplementedError()
543
544 @property
545 def retries(self) -> Retry | None:
546 return self._retries
547
548 @retries.setter
549 def retries(self, retries: Retry | None) -> None:
550 # Override the request_url if retries has a redirect location.
551 if retries is not None and retries.history:
552 self.url = retries.history[-1].redirect_location
553 self._retries = retries
554
555 def stream(
556 self, amt: int | None = 2**16, decode_content: bool | None = None
557 ) -> typing.Iterator[bytes]:
558 raise NotImplementedError()
559
560 def read(
561 self,
562 amt: int | None = None,
563 decode_content: bool | None = None,
564 cache_content: bool = False,
565 ) -> bytes:
566 raise NotImplementedError()
567
568 def read1(
569 self,
570 amt: int | None = None,
571 decode_content: bool | None = None,
572 ) -> bytes:
573 raise NotImplementedError()
574
575 def read_chunked(
576 self,
577 amt: int | None = None,
578 decode_content: bool | None = None,
579 ) -> typing.Iterator[bytes]:
580 raise NotImplementedError()
581
582 def release_conn(self) -> None:
583 raise NotImplementedError()
584
585 def drain_conn(self) -> None:
586 raise NotImplementedError()
587
588 def shutdown(self) -> None:
589 raise NotImplementedError()
590
591 def close(self) -> None:
592 raise NotImplementedError()
593
594 def _init_decoder(self) -> None:
595 """
596 Set-up the _decoder attribute if necessary.
597 """
598 # Note: content-encoding value should be case-insensitive, per RFC 7230
599 # Section 3.2
600 content_encoding = self.headers.get("content-encoding", "").lower()
601 if self._decoder is None:
602 if content_encoding in self.CONTENT_DECODERS:
603 self._decoder = _get_decoder(content_encoding)
604 elif "," in content_encoding:
605 encodings = [
606 e.strip()
607 for e in content_encoding.split(",")
608 if e.strip() in self.CONTENT_DECODERS
609 ]
610 if encodings:
611 self._decoder = _get_decoder(content_encoding)
612
613 def _decode(
614 self,
615 data: bytes,
616 decode_content: bool | None,
617 flush_decoder: bool,
618 max_length: int | None = None,
619 ) -> bytes:
620 """
621 Decode the data passed in and potentially flush the decoder.
622 """
623 if not decode_content:
624 if self._has_decoded_content:
625 raise RuntimeError(
626 "Calling read(decode_content=False) is not supported after "
627 "read(decode_content=True) was called."
628 )
629 return data
630
631 if max_length is None or flush_decoder:
632 max_length = -1
633
634 try:
635 if self._decoder:
636 data = self._decoder.decompress(data, max_length=max_length)
637 self._has_decoded_content = True
638 except self.DECODER_ERROR_CLASSES as e:
639 content_encoding = self.headers.get("content-encoding", "").lower()
640 raise DecodeError(
641 "Received response with content-encoding: %s, but "
642 "failed to decode it." % content_encoding,
643 e,
644 ) from e
645 if flush_decoder:
646 data += self._flush_decoder()
647
648 return data
649
650 def _flush_decoder(self) -> bytes:
651 """
652 Flushes the decoder. Should only be called if the decoder is actually
653 being used.
654 """
655 if self._decoder:
656 return self._decoder.decompress(b"") + self._decoder.flush()
657 return b""
658
659 # Compatibility methods for `io` module
660 def readinto(self, b: bytearray) -> int:
661 temp = self.read(len(b))
662 if len(temp) == 0:
663 return 0
664 else:
665 b[: len(temp)] = temp
666 return len(temp)
667
668 # Methods used by dependent libraries
669 def getheaders(self) -> HTTPHeaderDict:
670 return self.headers
671
672 def getheader(self, name: str, default: str | None = None) -> str | None:
673 return self.headers.get(name, default)
674
675 # Compatibility method for http.cookiejar
676 def info(self) -> HTTPHeaderDict:
677 return self.headers
678
679 def geturl(self) -> str | None:
680 return self.url
681
682
683class HTTPResponse(BaseHTTPResponse):
684 """
685 HTTP Response container.
686
687 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
688 loaded and decoded on-demand when the ``data`` property is accessed. This
689 class is also compatible with the Python standard library's :mod:`io`
690 module, and can hence be treated as a readable object in the context of that
691 framework.
692
693 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
694
695 :param preload_content:
696 If True, the response's body will be preloaded during construction.
697
698 :param decode_content:
699 If True, will attempt to decode the body based on the
700 'content-encoding' header.
701
702 :param original_response:
703 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
704 object, it's convenient to include the original for debug purposes. It's
705 otherwise unused.
706
707 :param retries:
708 The retries contains the last :class:`~urllib3.util.retry.Retry` that
709 was used during the request.
710
711 :param enforce_content_length:
712 Enforce content length checking. Body returned by server must match
713 value of Content-Length header, if present. Otherwise, raise error.
714 """
715
716 def __init__(
717 self,
718 body: _TYPE_BODY = "",
719 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
720 status: int = 0,
721 version: int = 0,
722 version_string: str = "HTTP/?",
723 reason: str | None = None,
724 preload_content: bool = True,
725 decode_content: bool = True,
726 original_response: _HttplibHTTPResponse | None = None,
727 pool: HTTPConnectionPool | None = None,
728 connection: HTTPConnection | None = None,
729 msg: _HttplibHTTPMessage | None = None,
730 retries: Retry | None = None,
731 enforce_content_length: bool = True,
732 request_method: str | None = None,
733 request_url: str | None = None,
734 auto_close: bool = True,
735 sock_shutdown: typing.Callable[[int], None] | None = None,
736 ) -> None:
737 super().__init__(
738 headers=headers,
739 status=status,
740 version=version,
741 version_string=version_string,
742 reason=reason,
743 decode_content=decode_content,
744 request_url=request_url,
745 retries=retries,
746 )
747
748 self.enforce_content_length = enforce_content_length
749 self.auto_close = auto_close
750
751 self._body = None
752 self._fp: _HttplibHTTPResponse | None = None
753 self._original_response = original_response
754 self._fp_bytes_read = 0
755 self.msg = msg
756
757 if body and isinstance(body, (str, bytes)):
758 self._body = body
759
760 self._pool = pool
761 self._connection = connection
762
763 if hasattr(body, "read"):
764 self._fp = body # type: ignore[assignment]
765 self._sock_shutdown = sock_shutdown
766
767 # Are we using the chunked-style of transfer encoding?
768 self.chunk_left: int | None = None
769
770 # Determine length of response
771 self.length_remaining = self._init_length(request_method)
772
773 # Used to return the correct amount of bytes for partial read()s
774 self._decoded_buffer = BytesQueueBuffer()
775
776 # If requested, preload the body.
777 if preload_content and not self._body:
778 self._body = self.read(decode_content=decode_content)
779
780 def release_conn(self) -> None:
781 if not self._pool or not self._connection:
782 return None
783
784 self._pool._put_conn(self._connection)
785 self._connection = None
786
787 def drain_conn(self) -> None:
788 """
789 Read and discard any remaining HTTP response data in the response connection.
790
791 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
792 """
793 try:
794 self.read(
795 # Do not spend resources decoding the content unless
796 # decoding has already been initiated.
797 decode_content=self._has_decoded_content,
798 )
799 except (HTTPError, OSError, BaseSSLError, HTTPException):
800 pass
801
802 @property
803 def data(self) -> bytes:
804 # For backwards-compat with earlier urllib3 0.4 and earlier.
805 if self._body:
806 return self._body # type: ignore[return-value]
807
808 if self._fp:
809 return self.read(cache_content=True)
810
811 return None # type: ignore[return-value]
812
813 @property
814 def connection(self) -> HTTPConnection | None:
815 return self._connection
816
817 def isclosed(self) -> bool:
818 return is_fp_closed(self._fp)
819
820 def tell(self) -> int:
821 """
822 Obtain the number of bytes pulled over the wire so far. May differ from
823 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
824 if bytes are encoded on the wire (e.g, compressed).
825 """
826 return self._fp_bytes_read
827
828 def _init_length(self, request_method: str | None) -> int | None:
829 """
830 Set initial length value for Response content if available.
831 """
832 length: int | None
833 content_length: str | None = self.headers.get("content-length")
834
835 if content_length is not None:
836 if self.chunked:
837 # This Response will fail with an IncompleteRead if it can't be
838 # received as chunked. This method falls back to attempt reading
839 # the response before raising an exception.
840 log.warning(
841 "Received response with both Content-Length and "
842 "Transfer-Encoding set. This is expressly forbidden "
843 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
844 "attempting to process response as Transfer-Encoding: "
845 "chunked."
846 )
847 return None
848
849 try:
850 # RFC 7230 section 3.3.2 specifies multiple content lengths can
851 # be sent in a single Content-Length header
852 # (e.g. Content-Length: 42, 42). This line ensures the values
853 # are all valid ints and that as long as the `set` length is 1,
854 # all values are the same. Otherwise, the header is invalid.
855 lengths = {int(val) for val in content_length.split(",")}
856 if len(lengths) > 1:
857 raise InvalidHeader(
858 "Content-Length contained multiple "
859 "unmatching values (%s)" % content_length
860 )
861 length = lengths.pop()
862 except ValueError:
863 length = None
864 else:
865 if length < 0:
866 length = None
867
868 else: # if content_length is None
869 length = None
870
871 # Convert status to int for comparison
872 # In some cases, httplib returns a status of "_UNKNOWN"
873 try:
874 status = int(self.status)
875 except ValueError:
876 status = 0
877
878 # Check for responses that shouldn't include a body
879 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
880 length = 0
881
882 return length
883
884 @contextmanager
885 def _error_catcher(self) -> typing.Generator[None]:
886 """
887 Catch low-level python exceptions, instead re-raising urllib3
888 variants, so that low-level exceptions are not leaked in the
889 high-level api.
890
891 On exit, release the connection back to the pool.
892 """
893 clean_exit = False
894
895 try:
896 try:
897 yield
898
899 except SocketTimeout as e:
900 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
901 # there is yet no clean way to get at it from this context.
902 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
903
904 except BaseSSLError as e:
905 # FIXME: Is there a better way to differentiate between SSLErrors?
906 if "read operation timed out" not in str(e):
907 # SSL errors related to framing/MAC get wrapped and reraised here
908 raise SSLError(e) from e
909
910 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
911
912 except IncompleteRead as e:
913 if (
914 e.expected is not None
915 and e.partial is not None
916 and e.expected == -e.partial
917 ):
918 arg = "Response may not contain content."
919 else:
920 arg = f"Connection broken: {e!r}"
921 raise ProtocolError(arg, e) from e
922
923 except (HTTPException, OSError) as e:
924 raise ProtocolError(f"Connection broken: {e!r}", e) from e
925
926 # If no exception is thrown, we should avoid cleaning up
927 # unnecessarily.
928 clean_exit = True
929 finally:
930 # If we didn't terminate cleanly, we need to throw away our
931 # connection.
932 if not clean_exit:
933 # The response may not be closed but we're not going to use it
934 # anymore so close it now to ensure that the connection is
935 # released back to the pool.
936 if self._original_response:
937 self._original_response.close()
938
939 # Closing the response may not actually be sufficient to close
940 # everything, so if we have a hold of the connection close that
941 # too.
942 if self._connection:
943 self._connection.close()
944
945 # If we hold the original response but it's closed now, we should
946 # return the connection back to the pool.
947 if self._original_response and self._original_response.isclosed():
948 self.release_conn()
949
950 def _fp_read(
951 self,
952 amt: int | None = None,
953 *,
954 read1: bool = False,
955 ) -> bytes:
956 """
957 Read a response with the thought that reading the number of bytes
958 larger than can fit in a 32-bit int at a time via SSL in some
959 known cases leads to an overflow error that has to be prevented
960 if `amt` or `self.length_remaining` indicate that a problem may
961 happen.
962
963 The known cases:
964 * CPython < 3.9.7 because of a bug
965 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.
966 * urllib3 injected with pyOpenSSL-backed SSL-support.
967 * CPython < 3.10 only when `amt` does not fit 32-bit int.
968 """
969 assert self._fp
970 c_int_max = 2**31 - 1
971 if (
972 (amt and amt > c_int_max)
973 or (
974 amt is None
975 and self.length_remaining
976 and self.length_remaining > c_int_max
977 )
978 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):
979 if read1:
980 return self._fp.read1(c_int_max)
981 buffer = io.BytesIO()
982 # Besides `max_chunk_amt` being a maximum chunk size, it
983 # affects memory overhead of reading a response by this
984 # method in CPython.
985 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
986 # chunk size that does not lead to an overflow error, but
987 # 256 MiB is a compromise.
988 max_chunk_amt = 2**28
989 while amt is None or amt != 0:
990 if amt is not None:
991 chunk_amt = min(amt, max_chunk_amt)
992 amt -= chunk_amt
993 else:
994 chunk_amt = max_chunk_amt
995 data = self._fp.read(chunk_amt)
996 if not data:
997 break
998 buffer.write(data)
999 del data # to reduce peak memory usage by `max_chunk_amt`.
1000 return buffer.getvalue()
1001 elif read1:
1002 return self._fp.read1(amt) if amt is not None else self._fp.read1()
1003 else:
1004 # StringIO doesn't like amt=None
1005 return self._fp.read(amt) if amt is not None else self._fp.read()
1006
1007 def _raw_read(
1008 self,
1009 amt: int | None = None,
1010 *,
1011 read1: bool = False,
1012 ) -> bytes:
1013 """
1014 Reads `amt` of bytes from the socket.
1015 """
1016 if self._fp is None:
1017 return None # type: ignore[return-value]
1018
1019 fp_closed = getattr(self._fp, "closed", False)
1020
1021 with self._error_catcher():
1022 data = self._fp_read(amt, read1=read1) if not fp_closed else b""
1023 if amt is not None and amt != 0 and not data:
1024 # Platform-specific: Buggy versions of Python.
1025 # Close the connection when no data is returned
1026 #
1027 # This is redundant to what httplib/http.client _should_
1028 # already do. However, versions of python released before
1029 # December 15, 2012 (http://bugs.python.org/issue16298) do
1030 # not properly close the connection in all cases. There is
1031 # no harm in redundantly calling close.
1032 self._fp.close()
1033 if (
1034 self.enforce_content_length
1035 and self.length_remaining is not None
1036 and self.length_remaining != 0
1037 ):
1038 # This is an edge case that httplib failed to cover due
1039 # to concerns of backward compatibility. We're
1040 # addressing it here to make sure IncompleteRead is
1041 # raised during streaming, so all calls with incorrect
1042 # Content-Length are caught.
1043 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
1044 elif read1 and (
1045 (amt != 0 and not data) or self.length_remaining == len(data)
1046 ):
1047 # All data has been read, but `self._fp.read1` in
1048 # CPython 3.12 and older doesn't always close
1049 # `http.client.HTTPResponse`, so we close it here.
1050 # See https://github.com/python/cpython/issues/113199
1051 self._fp.close()
1052
1053 if data:
1054 self._fp_bytes_read += len(data)
1055 if self.length_remaining is not None:
1056 self.length_remaining -= len(data)
1057 return data
1058
1059 def read(
1060 self,
1061 amt: int | None = None,
1062 decode_content: bool | None = None,
1063 cache_content: bool = False,
1064 ) -> bytes:
1065 """
1066 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
1067 parameters: ``decode_content`` and ``cache_content``.
1068
1069 :param amt:
1070 How much of the content to read. If specified, caching is skipped
1071 because it doesn't make sense to cache partial content as the full
1072 response.
1073
1074 :param decode_content:
1075 If True, will attempt to decode the body based on the
1076 'content-encoding' header.
1077
1078 :param cache_content:
1079 If True, will save the returned data such that the same result is
1080 returned despite of the state of the underlying file object. This
1081 is useful if you want the ``.data`` property to continue working
1082 after having ``.read()`` the file object. (Overridden if ``amt`` is
1083 set.)
1084 """
1085 self._init_decoder()
1086 if decode_content is None:
1087 decode_content = self.decode_content
1088
1089 if amt and amt < 0:
1090 # Negative numbers and `None` should be treated the same.
1091 amt = None
1092 elif amt is not None:
1093 cache_content = False
1094
1095 if self._decoder and self._decoder.has_unconsumed_tail:
1096 decoded_data = self._decode(
1097 b"",
1098 decode_content,
1099 flush_decoder=False,
1100 max_length=amt - len(self._decoded_buffer),
1101 )
1102 self._decoded_buffer.put(decoded_data)
1103 if len(self._decoded_buffer) >= amt:
1104 return self._decoded_buffer.get(amt)
1105
1106 data = self._raw_read(amt)
1107
1108 flush_decoder = amt is None or (amt != 0 and not data)
1109
1110 if (
1111 not data
1112 and len(self._decoded_buffer) == 0
1113 and not (self._decoder and self._decoder.has_unconsumed_tail)
1114 ):
1115 return data
1116
1117 if amt is None:
1118 data = self._decode(data, decode_content, flush_decoder)
1119 if cache_content:
1120 self._body = data
1121 else:
1122 # do not waste memory on buffer when not decoding
1123 if not decode_content:
1124 if self._has_decoded_content:
1125 raise RuntimeError(
1126 "Calling read(decode_content=False) is not supported after "
1127 "read(decode_content=True) was called."
1128 )
1129 return data
1130
1131 decoded_data = self._decode(
1132 data,
1133 decode_content,
1134 flush_decoder,
1135 max_length=amt - len(self._decoded_buffer),
1136 )
1137 self._decoded_buffer.put(decoded_data)
1138
1139 while len(self._decoded_buffer) < amt and data:
1140 # TODO make sure to initially read enough data to get past the headers
1141 # For example, the GZ file header takes 10 bytes, we don't want to read
1142 # it one byte at a time
1143 data = self._raw_read(amt)
1144 decoded_data = self._decode(
1145 data,
1146 decode_content,
1147 flush_decoder,
1148 max_length=amt - len(self._decoded_buffer),
1149 )
1150 self._decoded_buffer.put(decoded_data)
1151 data = self._decoded_buffer.get(amt)
1152
1153 return data
1154
1155 def read1(
1156 self,
1157 amt: int | None = None,
1158 decode_content: bool | None = None,
1159 ) -> bytes:
1160 """
1161 Similar to ``http.client.HTTPResponse.read1`` and documented
1162 in :meth:`io.BufferedReader.read1`, but with an additional parameter:
1163 ``decode_content``.
1164
1165 :param amt:
1166 How much of the content to read.
1167
1168 :param decode_content:
1169 If True, will attempt to decode the body based on the
1170 'content-encoding' header.
1171 """
1172 if decode_content is None:
1173 decode_content = self.decode_content
1174 if amt and amt < 0:
1175 # Negative numbers and `None` should be treated the same.
1176 amt = None
1177 # try and respond without going to the network
1178 if self._has_decoded_content:
1179 if not decode_content:
1180 raise RuntimeError(
1181 "Calling read1(decode_content=False) is not supported after "
1182 "read1(decode_content=True) was called."
1183 )
1184 if (
1185 self._decoder
1186 and self._decoder.has_unconsumed_tail
1187 and (amt is None or len(self._decoded_buffer) < amt)
1188 ):
1189 decoded_data = self._decode(
1190 b"",
1191 decode_content,
1192 flush_decoder=False,
1193 max_length=(
1194 amt - len(self._decoded_buffer) if amt is not None else None
1195 ),
1196 )
1197 self._decoded_buffer.put(decoded_data)
1198 if len(self._decoded_buffer) > 0:
1199 if amt is None:
1200 return self._decoded_buffer.get_all()
1201 return self._decoded_buffer.get(amt)
1202 if amt == 0:
1203 return b""
1204
1205 # FIXME, this method's type doesn't say returning None is possible
1206 data = self._raw_read(amt, read1=True)
1207 if not decode_content or data is None:
1208 return data
1209
1210 self._init_decoder()
1211 while True:
1212 flush_decoder = not data
1213 decoded_data = self._decode(
1214 data, decode_content, flush_decoder, max_length=amt
1215 )
1216 self._decoded_buffer.put(decoded_data)
1217 if decoded_data or flush_decoder:
1218 break
1219 data = self._raw_read(8192, read1=True)
1220
1221 if amt is None:
1222 return self._decoded_buffer.get_all()
1223 return self._decoded_buffer.get(amt)
1224
1225 def stream(
1226 self, amt: int | None = 2**16, decode_content: bool | None = None
1227 ) -> typing.Generator[bytes]:
1228 """
1229 A generator wrapper for the read() method. A call will block until
1230 ``amt`` bytes have been read from the connection or until the
1231 connection is closed.
1232
1233 :param amt:
1234 How much of the content to read. The generator will return up to
1235 much data per iteration, but may return less. This is particularly
1236 likely when using compressed data. However, the empty string will
1237 never be returned.
1238
1239 :param decode_content:
1240 If True, will attempt to decode the body based on the
1241 'content-encoding' header.
1242 """
1243 if self.chunked and self.supports_chunked_reads():
1244 yield from self.read_chunked(amt, decode_content=decode_content)
1245 else:
1246 while (
1247 not is_fp_closed(self._fp)
1248 or len(self._decoded_buffer) > 0
1249 or (self._decoder and self._decoder.has_unconsumed_tail)
1250 ):
1251 data = self.read(amt=amt, decode_content=decode_content)
1252
1253 if data:
1254 yield data
1255
1256 # Overrides from io.IOBase
1257 def readable(self) -> bool:
1258 return True
1259
1260 def shutdown(self) -> None:
1261 if not self._sock_shutdown:
1262 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")
1263 if self._connection is None:
1264 raise RuntimeError(
1265 "Cannot shutdown as connection has already been released to the pool"
1266 )
1267 self._sock_shutdown(socket.SHUT_RD)
1268
1269 def close(self) -> None:
1270 self._sock_shutdown = None
1271
1272 if not self.closed and self._fp:
1273 self._fp.close()
1274
1275 if self._connection:
1276 self._connection.close()
1277
1278 if not self.auto_close:
1279 io.IOBase.close(self)
1280
1281 @property
1282 def closed(self) -> bool:
1283 if not self.auto_close:
1284 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
1285 elif self._fp is None:
1286 return True
1287 elif hasattr(self._fp, "isclosed"):
1288 return self._fp.isclosed()
1289 elif hasattr(self._fp, "closed"):
1290 return self._fp.closed
1291 else:
1292 return True
1293
1294 def fileno(self) -> int:
1295 if self._fp is None:
1296 raise OSError("HTTPResponse has no file to get a fileno from")
1297 elif hasattr(self._fp, "fileno"):
1298 return self._fp.fileno()
1299 else:
1300 raise OSError(
1301 "The file-like object this HTTPResponse is wrapped "
1302 "around has no file descriptor"
1303 )
1304
1305 def flush(self) -> None:
1306 if (
1307 self._fp is not None
1308 and hasattr(self._fp, "flush")
1309 and not getattr(self._fp, "closed", False)
1310 ):
1311 return self._fp.flush()
1312
1313 def supports_chunked_reads(self) -> bool:
1314 """
1315 Checks if the underlying file-like object looks like a
1316 :class:`http.client.HTTPResponse` object. We do this by testing for
1317 the fp attribute. If it is present we assume it returns raw chunks as
1318 processed by read_chunked().
1319 """
1320 return hasattr(self._fp, "fp")
1321
1322 def _update_chunk_length(self) -> None:
1323 # First, we'll figure out length of a chunk and then
1324 # we'll try to read it from socket.
1325 if self.chunk_left is not None:
1326 return None
1327 line = self._fp.fp.readline() # type: ignore[union-attr]
1328 line = line.split(b";", 1)[0]
1329 try:
1330 self.chunk_left = int(line, 16)
1331 except ValueError:
1332 self.close()
1333 if line:
1334 # Invalid chunked protocol response, abort.
1335 raise InvalidChunkLength(self, line) from None
1336 else:
1337 # Truncated at start of next chunk
1338 raise ProtocolError("Response ended prematurely") from None
1339
1340 def _handle_chunk(self, amt: int | None) -> bytes:
1341 returned_chunk = None
1342 if amt is None:
1343 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1344 returned_chunk = chunk
1345 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1346 self.chunk_left = None
1347 elif self.chunk_left is not None and amt < self.chunk_left:
1348 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1349 self.chunk_left = self.chunk_left - amt
1350 returned_chunk = value
1351 elif amt == self.chunk_left:
1352 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1353 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1354 self.chunk_left = None
1355 returned_chunk = value
1356 else: # amt > self.chunk_left
1357 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1358 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1359 self.chunk_left = None
1360 return returned_chunk # type: ignore[no-any-return]
1361
1362 def read_chunked(
1363 self, amt: int | None = None, decode_content: bool | None = None
1364 ) -> typing.Generator[bytes]:
1365 """
1366 Similar to :meth:`HTTPResponse.read`, but with an additional
1367 parameter: ``decode_content``.
1368
1369 :param amt:
1370 How much of the content to read. If specified, caching is skipped
1371 because it doesn't make sense to cache partial content as the full
1372 response.
1373
1374 :param decode_content:
1375 If True, will attempt to decode the body based on the
1376 'content-encoding' header.
1377 """
1378 self._init_decoder()
1379 # FIXME: Rewrite this method and make it a class with a better structured logic.
1380 if not self.chunked:
1381 raise ResponseNotChunked(
1382 "Response is not chunked. "
1383 "Header 'transfer-encoding: chunked' is missing."
1384 )
1385 if not self.supports_chunked_reads():
1386 raise BodyNotHttplibCompatible(
1387 "Body should be http.client.HTTPResponse like. "
1388 "It should have have an fp attribute which returns raw chunks."
1389 )
1390
1391 with self._error_catcher():
1392 # Don't bother reading the body of a HEAD request.
1393 if self._original_response and is_response_to_head(self._original_response):
1394 self._original_response.close()
1395 return None
1396
1397 # If a response is already read and closed
1398 # then return immediately.
1399 if self._fp.fp is None: # type: ignore[union-attr]
1400 return None
1401
1402 if amt and amt < 0:
1403 # Negative numbers and `None` should be treated the same,
1404 # but httplib handles only `None` correctly.
1405 amt = None
1406
1407 while True:
1408 # First, check if any data is left in the decoder's buffer.
1409 if self._decoder and self._decoder.has_unconsumed_tail:
1410 chunk = b""
1411 else:
1412 self._update_chunk_length()
1413 if self.chunk_left == 0:
1414 break
1415 chunk = self._handle_chunk(amt)
1416 decoded = self._decode(
1417 chunk,
1418 decode_content=decode_content,
1419 flush_decoder=False,
1420 max_length=amt,
1421 )
1422 if decoded:
1423 yield decoded
1424
1425 if decode_content:
1426 # On CPython and PyPy, we should never need to flush the
1427 # decoder. However, on Jython we *might* need to, so
1428 # lets defensively do it anyway.
1429 decoded = self._flush_decoder()
1430 if decoded: # Platform-specific: Jython.
1431 yield decoded
1432
1433 # Chunk content ends with \r\n: discard it.
1434 while self._fp is not None:
1435 line = self._fp.fp.readline()
1436 if not line:
1437 # Some sites may not end with '\r\n'.
1438 break
1439 if line == b"\r\n":
1440 break
1441
1442 # We read everything; close the "file".
1443 if self._original_response:
1444 self._original_response.close()
1445
1446 @property
1447 def url(self) -> str | None:
1448 """
1449 Returns the URL that was the source of this response.
1450 If the request that generated this response redirected, this method
1451 will return the final redirect location.
1452 """
1453 return self._request_url
1454
1455 @url.setter
1456 def url(self, url: str | None) -> None:
1457 self._request_url = url
1458
1459 def __iter__(self) -> typing.Iterator[bytes]:
1460 buffer: list[bytes] = []
1461 for chunk in self.stream(decode_content=True):
1462 if b"\n" in chunk:
1463 chunks = chunk.split(b"\n")
1464 yield b"".join(buffer) + chunks[0] + b"\n"
1465 for x in chunks[1:-1]:
1466 yield x + b"\n"
1467 if chunks[-1]:
1468 buffer = [chunks[-1]]
1469 else:
1470 buffer = []
1471 else:
1472 buffer.append(chunk)
1473 if buffer:
1474 yield b"".join(buffer)