1from __future__ import annotations
2
3import collections
4import io
5import json as _json
6import logging
7import re
8import socket
9import sys
10import typing
11import warnings
12import zlib
13from contextlib import contextmanager
14from http.client import HTTPMessage as _HttplibHTTPMessage
15from http.client import HTTPResponse as _HttplibHTTPResponse
16from socket import timeout as SocketTimeout
17
18if typing.TYPE_CHECKING:
19 from ._base_connection import BaseHTTPConnection
20
21try:
22 try:
23 import brotlicffi as brotli # type: ignore[import-not-found]
24 except ImportError:
25 import brotli # type: ignore[import-not-found]
26except ImportError:
27 brotli = None
28
29from . import util
30from ._base_connection import _TYPE_BODY
31from ._collections import HTTPHeaderDict
32from .connection import BaseSSLError, HTTPConnection, HTTPException
33from .exceptions import (
34 BodyNotHttplibCompatible,
35 DecodeError,
36 HTTPError,
37 IncompleteRead,
38 InvalidChunkLength,
39 InvalidHeader,
40 ProtocolError,
41 ReadTimeoutError,
42 ResponseNotChunked,
43 SSLError,
44)
45from .util.response import is_fp_closed, is_response_to_head
46from .util.retry import Retry
47
48if typing.TYPE_CHECKING:
49 from .connectionpool import HTTPConnectionPool
50
51log = logging.getLogger(__name__)
52
53
54class ContentDecoder:
55 def decompress(self, data: bytes) -> bytes:
56 raise NotImplementedError()
57
58 def flush(self) -> bytes:
59 raise NotImplementedError()
60
61
62class DeflateDecoder(ContentDecoder):
63 def __init__(self) -> None:
64 self._first_try = True
65 self._data = b""
66 self._obj = zlib.decompressobj()
67
68 def decompress(self, data: bytes) -> bytes:
69 if not data:
70 return data
71
72 if not self._first_try:
73 return self._obj.decompress(data)
74
75 self._data += data
76 try:
77 decompressed = self._obj.decompress(data)
78 if decompressed:
79 self._first_try = False
80 self._data = None # type: ignore[assignment]
81 return decompressed
82 except zlib.error:
83 self._first_try = False
84 self._obj = zlib.decompressobj(-zlib.MAX_WBITS)
85 try:
86 return self.decompress(self._data)
87 finally:
88 self._data = None # type: ignore[assignment]
89
90 def flush(self) -> bytes:
91 return self._obj.flush()
92
93
94class GzipDecoderState:
95 FIRST_MEMBER = 0
96 OTHER_MEMBERS = 1
97 SWALLOW_DATA = 2
98
99
100class GzipDecoder(ContentDecoder):
101 def __init__(self) -> None:
102 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
103 self._state = GzipDecoderState.FIRST_MEMBER
104
105 def decompress(self, data: bytes) -> bytes:
106 ret = bytearray()
107 if self._state == GzipDecoderState.SWALLOW_DATA or not data:
108 return bytes(ret)
109 while True:
110 try:
111 ret += self._obj.decompress(data)
112 except zlib.error:
113 previous_state = self._state
114 # Ignore data after the first error
115 self._state = GzipDecoderState.SWALLOW_DATA
116 if previous_state == GzipDecoderState.OTHER_MEMBERS:
117 # Allow trailing garbage acceptable in other gzip clients
118 return bytes(ret)
119 raise
120 data = self._obj.unused_data
121 if not data:
122 return bytes(ret)
123 self._state = GzipDecoderState.OTHER_MEMBERS
124 self._obj = zlib.decompressobj(16 + zlib.MAX_WBITS)
125
126 def flush(self) -> bytes:
127 return self._obj.flush()
128
129
130if brotli is not None:
131
132 class BrotliDecoder(ContentDecoder):
133 # Supports both 'brotlipy' and 'Brotli' packages
134 # since they share an import name. The top branches
135 # are for 'brotlipy' and bottom branches for 'Brotli'
136 def __init__(self) -> None:
137 self._obj = brotli.Decompressor()
138 if hasattr(self._obj, "decompress"):
139 setattr(self, "decompress", self._obj.decompress)
140 else:
141 setattr(self, "decompress", self._obj.process)
142
143 def flush(self) -> bytes:
144 if hasattr(self._obj, "flush"):
145 return self._obj.flush() # type: ignore[no-any-return]
146 return b""
147
148
149try:
150 # Python 3.14+
151 from compression import zstd # type: ignore[import-not-found] # noqa: F401
152
153 HAS_ZSTD = True
154
155 class ZstdDecoder(ContentDecoder):
156 def __init__(self) -> None:
157 self._obj = zstd.ZstdDecompressor()
158
159 def decompress(self, data: bytes) -> bytes:
160 if not data:
161 return b""
162 data_parts = [self._obj.decompress(data)]
163 while self._obj.eof and self._obj.unused_data:
164 unused_data = self._obj.unused_data
165 self._obj = zstd.ZstdDecompressor()
166 data_parts.append(self._obj.decompress(unused_data))
167 return b"".join(data_parts)
168
169 def flush(self) -> bytes:
170 if not self._obj.eof:
171 raise DecodeError("Zstandard data is incomplete")
172 return b""
173
174except ImportError:
175 try:
176 # Python 3.13 and earlier require the 'zstandard' module.
177 import zstandard as zstd
178
179 # The package 'zstandard' added the 'eof' property starting
180 # in v0.18.0 which we require to ensure a complete and
181 # valid zstd stream was fed into the ZstdDecoder.
182 # See: https://github.com/urllib3/urllib3/pull/2624
183 _zstd_version = tuple(
184 map(int, re.search(r"^([0-9]+)\.([0-9]+)", zstd.__version__).groups()) # type: ignore[union-attr]
185 )
186 if _zstd_version < (0, 18): # Defensive:
187 raise ImportError("zstandard module doesn't have eof")
188 except (AttributeError, ImportError, ValueError): # Defensive:
189 HAS_ZSTD = False
190 else:
191 HAS_ZSTD = True
192
193 class ZstdDecoder(ContentDecoder): # type: ignore[no-redef]
194 def __init__(self) -> None:
195 self._obj = zstd.ZstdDecompressor().decompressobj()
196
197 def decompress(self, data: bytes) -> bytes:
198 if not data:
199 return b""
200 data_parts = [self._obj.decompress(data)]
201 while self._obj.eof and self._obj.unused_data:
202 unused_data = self._obj.unused_data
203 self._obj = zstd.ZstdDecompressor().decompressobj()
204 data_parts.append(self._obj.decompress(unused_data))
205 return b"".join(data_parts)
206
207 def flush(self) -> bytes:
208 ret = self._obj.flush() # note: this is a no-op
209 if not self._obj.eof:
210 raise DecodeError("Zstandard data is incomplete")
211 return ret # type: ignore[no-any-return]
212
213
214class MultiDecoder(ContentDecoder):
215 """
216 From RFC7231:
217 If one or more encodings have been applied to a representation, the
218 sender that applied the encodings MUST generate a Content-Encoding
219 header field that lists the content codings in the order in which
220 they were applied.
221 """
222
223 def __init__(self, modes: str) -> None:
224 self._decoders = [_get_decoder(m.strip()) for m in modes.split(",")]
225
226 def flush(self) -> bytes:
227 return self._decoders[0].flush()
228
229 def decompress(self, data: bytes) -> bytes:
230 for d in reversed(self._decoders):
231 data = d.decompress(data)
232 return data
233
234
235def _get_decoder(mode: str) -> ContentDecoder:
236 if "," in mode:
237 return MultiDecoder(mode)
238
239 # According to RFC 9110 section 8.4.1.3, recipients should
240 # consider x-gzip equivalent to gzip
241 if mode in ("gzip", "x-gzip"):
242 return GzipDecoder()
243
244 if brotli is not None and mode == "br":
245 return BrotliDecoder()
246
247 if HAS_ZSTD and mode == "zstd":
248 return ZstdDecoder()
249
250 return DeflateDecoder()
251
252
253class BytesQueueBuffer:
254 """Memory-efficient bytes buffer
255
256 To return decoded data in read() and still follow the BufferedIOBase API, we need a
257 buffer to always return the correct amount of bytes.
258
259 This buffer should be filled using calls to put()
260
261 Our maximum memory usage is determined by the sum of the size of:
262
263 * self.buffer, which contains the full data
264 * the largest chunk that we will copy in get()
265
266 The worst case scenario is a single chunk, in which case we'll make a full copy of
267 the data inside get().
268 """
269
270 def __init__(self) -> None:
271 self.buffer: typing.Deque[bytes] = collections.deque()
272 self._size: int = 0
273
274 def __len__(self) -> int:
275 return self._size
276
277 def put(self, data: bytes) -> None:
278 self.buffer.append(data)
279 self._size += len(data)
280
281 def get(self, n: int) -> bytes:
282 if n == 0:
283 return b""
284 elif not self.buffer:
285 raise RuntimeError("buffer is empty")
286 elif n < 0:
287 raise ValueError("n should be > 0")
288
289 fetched = 0
290 ret = io.BytesIO()
291 while fetched < n:
292 remaining = n - fetched
293 chunk = self.buffer.popleft()
294 chunk_length = len(chunk)
295 if remaining < chunk_length:
296 left_chunk, right_chunk = chunk[:remaining], chunk[remaining:]
297 ret.write(left_chunk)
298 self.buffer.appendleft(right_chunk)
299 self._size -= remaining
300 break
301 else:
302 ret.write(chunk)
303 self._size -= chunk_length
304 fetched += chunk_length
305
306 if not self.buffer:
307 break
308
309 return ret.getvalue()
310
311 def get_all(self) -> bytes:
312 buffer = self.buffer
313 if not buffer:
314 assert self._size == 0
315 return b""
316 if len(buffer) == 1:
317 result = buffer.pop()
318 else:
319 ret = io.BytesIO()
320 ret.writelines(buffer.popleft() for _ in range(len(buffer)))
321 result = ret.getvalue()
322 self._size = 0
323 return result
324
325
326class BaseHTTPResponse(io.IOBase):
327 CONTENT_DECODERS = ["gzip", "x-gzip", "deflate"]
328 if brotli is not None:
329 CONTENT_DECODERS += ["br"]
330 if HAS_ZSTD:
331 CONTENT_DECODERS += ["zstd"]
332 REDIRECT_STATUSES = [301, 302, 303, 307, 308]
333
334 DECODER_ERROR_CLASSES: tuple[type[Exception], ...] = (IOError, zlib.error)
335 if brotli is not None:
336 DECODER_ERROR_CLASSES += (brotli.error,)
337
338 if HAS_ZSTD:
339 DECODER_ERROR_CLASSES += (zstd.ZstdError,)
340
341 def __init__(
342 self,
343 *,
344 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
345 status: int,
346 version: int,
347 version_string: str,
348 reason: str | None,
349 decode_content: bool,
350 request_url: str | None,
351 retries: Retry | None = None,
352 ) -> None:
353 if isinstance(headers, HTTPHeaderDict):
354 self.headers = headers
355 else:
356 self.headers = HTTPHeaderDict(headers) # type: ignore[arg-type]
357 self.status = status
358 self.version = version
359 self.version_string = version_string
360 self.reason = reason
361 self.decode_content = decode_content
362 self._has_decoded_content = False
363 self._request_url: str | None = request_url
364 self.retries = retries
365
366 self.chunked = False
367 tr_enc = self.headers.get("transfer-encoding", "").lower()
368 # Don't incur the penalty of creating a list and then discarding it
369 encodings = (enc.strip() for enc in tr_enc.split(","))
370 if "chunked" in encodings:
371 self.chunked = True
372
373 self._decoder: ContentDecoder | None = None
374 self.length_remaining: int | None
375
376 def get_redirect_location(self) -> str | None | typing.Literal[False]:
377 """
378 Should we redirect and where to?
379
380 :returns: Truthy redirect location string if we got a redirect status
381 code and valid location. ``None`` if redirect status and no
382 location. ``False`` if not a redirect status code.
383 """
384 if self.status in self.REDIRECT_STATUSES:
385 return self.headers.get("location")
386 return False
387
388 @property
389 def data(self) -> bytes:
390 raise NotImplementedError()
391
392 def json(self) -> typing.Any:
393 """
394 Deserializes the body of the HTTP response as a Python object.
395
396 The body of the HTTP response must be encoded using UTF-8, as per
397 `RFC 8529 Section 8.1 <https://www.rfc-editor.org/rfc/rfc8259#section-8.1>`_.
398
399 To use a custom JSON decoder pass the result of :attr:`HTTPResponse.data` to
400 your custom decoder instead.
401
402 If the body of the HTTP response is not decodable to UTF-8, a
403 `UnicodeDecodeError` will be raised. If the body of the HTTP response is not a
404 valid JSON document, a `json.JSONDecodeError` will be raised.
405
406 Read more :ref:`here <json_content>`.
407
408 :returns: The body of the HTTP response as a Python object.
409 """
410 data = self.data.decode("utf-8")
411 return _json.loads(data)
412
413 @property
414 def url(self) -> str | None:
415 raise NotImplementedError()
416
417 @url.setter
418 def url(self, url: str | None) -> None:
419 raise NotImplementedError()
420
421 @property
422 def connection(self) -> BaseHTTPConnection | None:
423 raise NotImplementedError()
424
425 @property
426 def retries(self) -> Retry | None:
427 return self._retries
428
429 @retries.setter
430 def retries(self, retries: Retry | None) -> None:
431 # Override the request_url if retries has a redirect location.
432 if retries is not None and retries.history:
433 self.url = retries.history[-1].redirect_location
434 self._retries = retries
435
436 def stream(
437 self, amt: int | None = 2**16, decode_content: bool | None = None
438 ) -> typing.Iterator[bytes]:
439 raise NotImplementedError()
440
441 def read(
442 self,
443 amt: int | None = None,
444 decode_content: bool | None = None,
445 cache_content: bool = False,
446 ) -> bytes:
447 raise NotImplementedError()
448
449 def read1(
450 self,
451 amt: int | None = None,
452 decode_content: bool | None = None,
453 ) -> bytes:
454 raise NotImplementedError()
455
456 def read_chunked(
457 self,
458 amt: int | None = None,
459 decode_content: bool | None = None,
460 ) -> typing.Iterator[bytes]:
461 raise NotImplementedError()
462
463 def release_conn(self) -> None:
464 raise NotImplementedError()
465
466 def drain_conn(self) -> None:
467 raise NotImplementedError()
468
469 def shutdown(self) -> None:
470 raise NotImplementedError()
471
472 def close(self) -> None:
473 raise NotImplementedError()
474
475 def _init_decoder(self) -> None:
476 """
477 Set-up the _decoder attribute if necessary.
478 """
479 # Note: content-encoding value should be case-insensitive, per RFC 7230
480 # Section 3.2
481 content_encoding = self.headers.get("content-encoding", "").lower()
482 if self._decoder is None:
483 if content_encoding in self.CONTENT_DECODERS:
484 self._decoder = _get_decoder(content_encoding)
485 elif "," in content_encoding:
486 encodings = [
487 e.strip()
488 for e in content_encoding.split(",")
489 if e.strip() in self.CONTENT_DECODERS
490 ]
491 if encodings:
492 self._decoder = _get_decoder(content_encoding)
493
494 def _decode(
495 self, data: bytes, decode_content: bool | None, flush_decoder: bool
496 ) -> bytes:
497 """
498 Decode the data passed in and potentially flush the decoder.
499 """
500 if not decode_content:
501 if self._has_decoded_content:
502 raise RuntimeError(
503 "Calling read(decode_content=False) is not supported after "
504 "read(decode_content=True) was called."
505 )
506 return data
507
508 try:
509 if self._decoder:
510 data = self._decoder.decompress(data)
511 self._has_decoded_content = True
512 except self.DECODER_ERROR_CLASSES as e:
513 content_encoding = self.headers.get("content-encoding", "").lower()
514 raise DecodeError(
515 "Received response with content-encoding: %s, but "
516 "failed to decode it." % content_encoding,
517 e,
518 ) from e
519 if flush_decoder:
520 data += self._flush_decoder()
521
522 return data
523
524 def _flush_decoder(self) -> bytes:
525 """
526 Flushes the decoder. Should only be called if the decoder is actually
527 being used.
528 """
529 if self._decoder:
530 return self._decoder.decompress(b"") + self._decoder.flush()
531 return b""
532
533 # Compatibility methods for `io` module
534 def readinto(self, b: bytearray) -> int:
535 temp = self.read(len(b))
536 if len(temp) == 0:
537 return 0
538 else:
539 b[: len(temp)] = temp
540 return len(temp)
541
542 # Compatibility methods for http.client.HTTPResponse
543 def getheaders(self) -> HTTPHeaderDict:
544 warnings.warn(
545 "HTTPResponse.getheaders() is deprecated and will be removed "
546 "in urllib3 v2.6.0. Instead access HTTPResponse.headers directly.",
547 category=DeprecationWarning,
548 stacklevel=2,
549 )
550 return self.headers
551
552 def getheader(self, name: str, default: str | None = None) -> str | None:
553 warnings.warn(
554 "HTTPResponse.getheader() is deprecated and will be removed "
555 "in urllib3 v2.6.0. Instead use HTTPResponse.headers.get(name, default).",
556 category=DeprecationWarning,
557 stacklevel=2,
558 )
559 return self.headers.get(name, default)
560
561 # Compatibility method for http.cookiejar
562 def info(self) -> HTTPHeaderDict:
563 return self.headers
564
565 def geturl(self) -> str | None:
566 return self.url
567
568
569class HTTPResponse(BaseHTTPResponse):
570 """
571 HTTP Response container.
572
573 Backwards-compatible with :class:`http.client.HTTPResponse` but the response ``body`` is
574 loaded and decoded on-demand when the ``data`` property is accessed. This
575 class is also compatible with the Python standard library's :mod:`io`
576 module, and can hence be treated as a readable object in the context of that
577 framework.
578
579 Extra parameters for behaviour not present in :class:`http.client.HTTPResponse`:
580
581 :param preload_content:
582 If True, the response's body will be preloaded during construction.
583
584 :param decode_content:
585 If True, will attempt to decode the body based on the
586 'content-encoding' header.
587
588 :param original_response:
589 When this HTTPResponse wrapper is generated from an :class:`http.client.HTTPResponse`
590 object, it's convenient to include the original for debug purposes. It's
591 otherwise unused.
592
593 :param retries:
594 The retries contains the last :class:`~urllib3.util.retry.Retry` that
595 was used during the request.
596
597 :param enforce_content_length:
598 Enforce content length checking. Body returned by server must match
599 value of Content-Length header, if present. Otherwise, raise error.
600 """
601
602 def __init__(
603 self,
604 body: _TYPE_BODY = "",
605 headers: typing.Mapping[str, str] | typing.Mapping[bytes, bytes] | None = None,
606 status: int = 0,
607 version: int = 0,
608 version_string: str = "HTTP/?",
609 reason: str | None = None,
610 preload_content: bool = True,
611 decode_content: bool = True,
612 original_response: _HttplibHTTPResponse | None = None,
613 pool: HTTPConnectionPool | None = None,
614 connection: HTTPConnection | None = None,
615 msg: _HttplibHTTPMessage | None = None,
616 retries: Retry | None = None,
617 enforce_content_length: bool = True,
618 request_method: str | None = None,
619 request_url: str | None = None,
620 auto_close: bool = True,
621 sock_shutdown: typing.Callable[[int], None] | None = None,
622 ) -> None:
623 super().__init__(
624 headers=headers,
625 status=status,
626 version=version,
627 version_string=version_string,
628 reason=reason,
629 decode_content=decode_content,
630 request_url=request_url,
631 retries=retries,
632 )
633
634 self.enforce_content_length = enforce_content_length
635 self.auto_close = auto_close
636
637 self._body = None
638 self._fp: _HttplibHTTPResponse | None = None
639 self._original_response = original_response
640 self._fp_bytes_read = 0
641 self.msg = msg
642
643 if body and isinstance(body, (str, bytes)):
644 self._body = body
645
646 self._pool = pool
647 self._connection = connection
648
649 if hasattr(body, "read"):
650 self._fp = body # type: ignore[assignment]
651 self._sock_shutdown = sock_shutdown
652
653 # Are we using the chunked-style of transfer encoding?
654 self.chunk_left: int | None = None
655
656 # Determine length of response
657 self.length_remaining = self._init_length(request_method)
658
659 # Used to return the correct amount of bytes for partial read()s
660 self._decoded_buffer = BytesQueueBuffer()
661
662 # If requested, preload the body.
663 if preload_content and not self._body:
664 self._body = self.read(decode_content=decode_content)
665
666 def release_conn(self) -> None:
667 if not self._pool or not self._connection:
668 return None
669
670 self._pool._put_conn(self._connection)
671 self._connection = None
672
673 def drain_conn(self) -> None:
674 """
675 Read and discard any remaining HTTP response data in the response connection.
676
677 Unread data in the HTTPResponse connection blocks the connection from being released back to the pool.
678 """
679 try:
680 self.read()
681 except (HTTPError, OSError, BaseSSLError, HTTPException):
682 pass
683
684 @property
685 def data(self) -> bytes:
686 # For backwards-compat with earlier urllib3 0.4 and earlier.
687 if self._body:
688 return self._body # type: ignore[return-value]
689
690 if self._fp:
691 return self.read(cache_content=True)
692
693 return None # type: ignore[return-value]
694
695 @property
696 def connection(self) -> HTTPConnection | None:
697 return self._connection
698
699 def isclosed(self) -> bool:
700 return is_fp_closed(self._fp)
701
702 def tell(self) -> int:
703 """
704 Obtain the number of bytes pulled over the wire so far. May differ from
705 the amount of content returned by :meth:``urllib3.response.HTTPResponse.read``
706 if bytes are encoded on the wire (e.g, compressed).
707 """
708 return self._fp_bytes_read
709
710 def _init_length(self, request_method: str | None) -> int | None:
711 """
712 Set initial length value for Response content if available.
713 """
714 length: int | None
715 content_length: str | None = self.headers.get("content-length")
716
717 if content_length is not None:
718 if self.chunked:
719 # This Response will fail with an IncompleteRead if it can't be
720 # received as chunked. This method falls back to attempt reading
721 # the response before raising an exception.
722 log.warning(
723 "Received response with both Content-Length and "
724 "Transfer-Encoding set. This is expressly forbidden "
725 "by RFC 7230 sec 3.3.2. Ignoring Content-Length and "
726 "attempting to process response as Transfer-Encoding: "
727 "chunked."
728 )
729 return None
730
731 try:
732 # RFC 7230 section 3.3.2 specifies multiple content lengths can
733 # be sent in a single Content-Length header
734 # (e.g. Content-Length: 42, 42). This line ensures the values
735 # are all valid ints and that as long as the `set` length is 1,
736 # all values are the same. Otherwise, the header is invalid.
737 lengths = {int(val) for val in content_length.split(",")}
738 if len(lengths) > 1:
739 raise InvalidHeader(
740 "Content-Length contained multiple "
741 "unmatching values (%s)" % content_length
742 )
743 length = lengths.pop()
744 except ValueError:
745 length = None
746 else:
747 if length < 0:
748 length = None
749
750 else: # if content_length is None
751 length = None
752
753 # Convert status to int for comparison
754 # In some cases, httplib returns a status of "_UNKNOWN"
755 try:
756 status = int(self.status)
757 except ValueError:
758 status = 0
759
760 # Check for responses that shouldn't include a body
761 if status in (204, 304) or 100 <= status < 200 or request_method == "HEAD":
762 length = 0
763
764 return length
765
766 @contextmanager
767 def _error_catcher(self) -> typing.Generator[None]:
768 """
769 Catch low-level python exceptions, instead re-raising urllib3
770 variants, so that low-level exceptions are not leaked in the
771 high-level api.
772
773 On exit, release the connection back to the pool.
774 """
775 clean_exit = False
776
777 try:
778 try:
779 yield
780
781 except SocketTimeout as e:
782 # FIXME: Ideally we'd like to include the url in the ReadTimeoutError but
783 # there is yet no clean way to get at it from this context.
784 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
785
786 except BaseSSLError as e:
787 # FIXME: Is there a better way to differentiate between SSLErrors?
788 if "read operation timed out" not in str(e):
789 # SSL errors related to framing/MAC get wrapped and reraised here
790 raise SSLError(e) from e
791
792 raise ReadTimeoutError(self._pool, None, "Read timed out.") from e # type: ignore[arg-type]
793
794 except IncompleteRead as e:
795 if (
796 e.expected is not None
797 and e.partial is not None
798 and e.expected == -e.partial
799 ):
800 arg = "Response may not contain content."
801 else:
802 arg = f"Connection broken: {e!r}"
803 raise ProtocolError(arg, e) from e
804
805 except (HTTPException, OSError) as e:
806 raise ProtocolError(f"Connection broken: {e!r}", e) from e
807
808 # If no exception is thrown, we should avoid cleaning up
809 # unnecessarily.
810 clean_exit = True
811 finally:
812 # If we didn't terminate cleanly, we need to throw away our
813 # connection.
814 if not clean_exit:
815 # The response may not be closed but we're not going to use it
816 # anymore so close it now to ensure that the connection is
817 # released back to the pool.
818 if self._original_response:
819 self._original_response.close()
820
821 # Closing the response may not actually be sufficient to close
822 # everything, so if we have a hold of the connection close that
823 # too.
824 if self._connection:
825 self._connection.close()
826
827 # If we hold the original response but it's closed now, we should
828 # return the connection back to the pool.
829 if self._original_response and self._original_response.isclosed():
830 self.release_conn()
831
832 def _fp_read(
833 self,
834 amt: int | None = None,
835 *,
836 read1: bool = False,
837 ) -> bytes:
838 """
839 Read a response with the thought that reading the number of bytes
840 larger than can fit in a 32-bit int at a time via SSL in some
841 known cases leads to an overflow error that has to be prevented
842 if `amt` or `self.length_remaining` indicate that a problem may
843 happen.
844
845 The known cases:
846 * CPython < 3.9.7 because of a bug
847 https://github.com/urllib3/urllib3/issues/2513#issuecomment-1152559900.
848 * urllib3 injected with pyOpenSSL-backed SSL-support.
849 * CPython < 3.10 only when `amt` does not fit 32-bit int.
850 """
851 assert self._fp
852 c_int_max = 2**31 - 1
853 if (
854 (amt and amt > c_int_max)
855 or (
856 amt is None
857 and self.length_remaining
858 and self.length_remaining > c_int_max
859 )
860 ) and (util.IS_PYOPENSSL or sys.version_info < (3, 10)):
861 if read1:
862 return self._fp.read1(c_int_max)
863 buffer = io.BytesIO()
864 # Besides `max_chunk_amt` being a maximum chunk size, it
865 # affects memory overhead of reading a response by this
866 # method in CPython.
867 # `c_int_max` equal to 2 GiB - 1 byte is the actual maximum
868 # chunk size that does not lead to an overflow error, but
869 # 256 MiB is a compromise.
870 max_chunk_amt = 2**28
871 while amt is None or amt != 0:
872 if amt is not None:
873 chunk_amt = min(amt, max_chunk_amt)
874 amt -= chunk_amt
875 else:
876 chunk_amt = max_chunk_amt
877 data = self._fp.read(chunk_amt)
878 if not data:
879 break
880 buffer.write(data)
881 del data # to reduce peak memory usage by `max_chunk_amt`.
882 return buffer.getvalue()
883 elif read1:
884 return self._fp.read1(amt) if amt is not None else self._fp.read1()
885 else:
886 # StringIO doesn't like amt=None
887 return self._fp.read(amt) if amt is not None else self._fp.read()
888
889 def _raw_read(
890 self,
891 amt: int | None = None,
892 *,
893 read1: bool = False,
894 ) -> bytes:
895 """
896 Reads `amt` of bytes from the socket.
897 """
898 if self._fp is None:
899 return None # type: ignore[return-value]
900
901 fp_closed = getattr(self._fp, "closed", False)
902
903 with self._error_catcher():
904 data = self._fp_read(amt, read1=read1) if not fp_closed else b""
905 if amt is not None and amt != 0 and not data:
906 # Platform-specific: Buggy versions of Python.
907 # Close the connection when no data is returned
908 #
909 # This is redundant to what httplib/http.client _should_
910 # already do. However, versions of python released before
911 # December 15, 2012 (http://bugs.python.org/issue16298) do
912 # not properly close the connection in all cases. There is
913 # no harm in redundantly calling close.
914 self._fp.close()
915 if (
916 self.enforce_content_length
917 and self.length_remaining is not None
918 and self.length_remaining != 0
919 ):
920 # This is an edge case that httplib failed to cover due
921 # to concerns of backward compatibility. We're
922 # addressing it here to make sure IncompleteRead is
923 # raised during streaming, so all calls with incorrect
924 # Content-Length are caught.
925 raise IncompleteRead(self._fp_bytes_read, self.length_remaining)
926 elif read1 and (
927 (amt != 0 and not data) or self.length_remaining == len(data)
928 ):
929 # All data has been read, but `self._fp.read1` in
930 # CPython 3.12 and older doesn't always close
931 # `http.client.HTTPResponse`, so we close it here.
932 # See https://github.com/python/cpython/issues/113199
933 self._fp.close()
934
935 if data:
936 self._fp_bytes_read += len(data)
937 if self.length_remaining is not None:
938 self.length_remaining -= len(data)
939 return data
940
941 def read(
942 self,
943 amt: int | None = None,
944 decode_content: bool | None = None,
945 cache_content: bool = False,
946 ) -> bytes:
947 """
948 Similar to :meth:`http.client.HTTPResponse.read`, but with two additional
949 parameters: ``decode_content`` and ``cache_content``.
950
951 :param amt:
952 How much of the content to read. If specified, caching is skipped
953 because it doesn't make sense to cache partial content as the full
954 response.
955
956 :param decode_content:
957 If True, will attempt to decode the body based on the
958 'content-encoding' header.
959
960 :param cache_content:
961 If True, will save the returned data such that the same result is
962 returned despite of the state of the underlying file object. This
963 is useful if you want the ``.data`` property to continue working
964 after having ``.read()`` the file object. (Overridden if ``amt`` is
965 set.)
966 """
967 self._init_decoder()
968 if decode_content is None:
969 decode_content = self.decode_content
970
971 if amt and amt < 0:
972 # Negative numbers and `None` should be treated the same.
973 amt = None
974 elif amt is not None:
975 cache_content = False
976
977 if len(self._decoded_buffer) >= amt:
978 return self._decoded_buffer.get(amt)
979
980 data = self._raw_read(amt)
981
982 flush_decoder = amt is None or (amt != 0 and not data)
983
984 if not data and len(self._decoded_buffer) == 0:
985 return data
986
987 if amt is None:
988 data = self._decode(data, decode_content, flush_decoder)
989 if cache_content:
990 self._body = data
991 else:
992 # do not waste memory on buffer when not decoding
993 if not decode_content:
994 if self._has_decoded_content:
995 raise RuntimeError(
996 "Calling read(decode_content=False) is not supported after "
997 "read(decode_content=True) was called."
998 )
999 return data
1000
1001 decoded_data = self._decode(data, decode_content, flush_decoder)
1002 self._decoded_buffer.put(decoded_data)
1003
1004 while len(self._decoded_buffer) < amt and data:
1005 # TODO make sure to initially read enough data to get past the headers
1006 # For example, the GZ file header takes 10 bytes, we don't want to read
1007 # it one byte at a time
1008 data = self._raw_read(amt)
1009 decoded_data = self._decode(data, decode_content, flush_decoder)
1010 self._decoded_buffer.put(decoded_data)
1011 data = self._decoded_buffer.get(amt)
1012
1013 return data
1014
1015 def read1(
1016 self,
1017 amt: int | None = None,
1018 decode_content: bool | None = None,
1019 ) -> bytes:
1020 """
1021 Similar to ``http.client.HTTPResponse.read1`` and documented
1022 in :meth:`io.BufferedReader.read1`, but with an additional parameter:
1023 ``decode_content``.
1024
1025 :param amt:
1026 How much of the content to read.
1027
1028 :param decode_content:
1029 If True, will attempt to decode the body based on the
1030 'content-encoding' header.
1031 """
1032 if decode_content is None:
1033 decode_content = self.decode_content
1034 if amt and amt < 0:
1035 # Negative numbers and `None` should be treated the same.
1036 amt = None
1037 # try and respond without going to the network
1038 if self._has_decoded_content:
1039 if not decode_content:
1040 raise RuntimeError(
1041 "Calling read1(decode_content=False) is not supported after "
1042 "read1(decode_content=True) was called."
1043 )
1044 if len(self._decoded_buffer) > 0:
1045 if amt is None:
1046 return self._decoded_buffer.get_all()
1047 return self._decoded_buffer.get(amt)
1048 if amt == 0:
1049 return b""
1050
1051 # FIXME, this method's type doesn't say returning None is possible
1052 data = self._raw_read(amt, read1=True)
1053 if not decode_content or data is None:
1054 return data
1055
1056 self._init_decoder()
1057 while True:
1058 flush_decoder = not data
1059 decoded_data = self._decode(data, decode_content, flush_decoder)
1060 self._decoded_buffer.put(decoded_data)
1061 if decoded_data or flush_decoder:
1062 break
1063 data = self._raw_read(8192, read1=True)
1064
1065 if amt is None:
1066 return self._decoded_buffer.get_all()
1067 return self._decoded_buffer.get(amt)
1068
1069 def stream(
1070 self, amt: int | None = 2**16, decode_content: bool | None = None
1071 ) -> typing.Generator[bytes]:
1072 """
1073 A generator wrapper for the read() method. A call will block until
1074 ``amt`` bytes have been read from the connection or until the
1075 connection is closed.
1076
1077 :param amt:
1078 How much of the content to read. The generator will return up to
1079 much data per iteration, but may return less. This is particularly
1080 likely when using compressed data. However, the empty string will
1081 never be returned.
1082
1083 :param decode_content:
1084 If True, will attempt to decode the body based on the
1085 'content-encoding' header.
1086 """
1087 if self.chunked and self.supports_chunked_reads():
1088 yield from self.read_chunked(amt, decode_content=decode_content)
1089 else:
1090 while not is_fp_closed(self._fp) or len(self._decoded_buffer) > 0:
1091 data = self.read(amt=amt, decode_content=decode_content)
1092
1093 if data:
1094 yield data
1095
1096 # Overrides from io.IOBase
1097 def readable(self) -> bool:
1098 return True
1099
1100 def shutdown(self) -> None:
1101 if not self._sock_shutdown:
1102 raise ValueError("Cannot shutdown socket as self._sock_shutdown is not set")
1103 if self._connection is None:
1104 raise RuntimeError(
1105 "Cannot shutdown as connection has already been released to the pool"
1106 )
1107 self._sock_shutdown(socket.SHUT_RD)
1108
1109 def close(self) -> None:
1110 self._sock_shutdown = None
1111
1112 if not self.closed and self._fp:
1113 self._fp.close()
1114
1115 if self._connection:
1116 self._connection.close()
1117
1118 if not self.auto_close:
1119 io.IOBase.close(self)
1120
1121 @property
1122 def closed(self) -> bool:
1123 if not self.auto_close:
1124 return io.IOBase.closed.__get__(self) # type: ignore[no-any-return]
1125 elif self._fp is None:
1126 return True
1127 elif hasattr(self._fp, "isclosed"):
1128 return self._fp.isclosed()
1129 elif hasattr(self._fp, "closed"):
1130 return self._fp.closed
1131 else:
1132 return True
1133
1134 def fileno(self) -> int:
1135 if self._fp is None:
1136 raise OSError("HTTPResponse has no file to get a fileno from")
1137 elif hasattr(self._fp, "fileno"):
1138 return self._fp.fileno()
1139 else:
1140 raise OSError(
1141 "The file-like object this HTTPResponse is wrapped "
1142 "around has no file descriptor"
1143 )
1144
1145 def flush(self) -> None:
1146 if (
1147 self._fp is not None
1148 and hasattr(self._fp, "flush")
1149 and not getattr(self._fp, "closed", False)
1150 ):
1151 return self._fp.flush()
1152
1153 def supports_chunked_reads(self) -> bool:
1154 """
1155 Checks if the underlying file-like object looks like a
1156 :class:`http.client.HTTPResponse` object. We do this by testing for
1157 the fp attribute. If it is present we assume it returns raw chunks as
1158 processed by read_chunked().
1159 """
1160 return hasattr(self._fp, "fp")
1161
1162 def _update_chunk_length(self) -> None:
1163 # First, we'll figure out length of a chunk and then
1164 # we'll try to read it from socket.
1165 if self.chunk_left is not None:
1166 return None
1167 line = self._fp.fp.readline() # type: ignore[union-attr]
1168 line = line.split(b";", 1)[0]
1169 try:
1170 self.chunk_left = int(line, 16)
1171 except ValueError:
1172 self.close()
1173 if line:
1174 # Invalid chunked protocol response, abort.
1175 raise InvalidChunkLength(self, line) from None
1176 else:
1177 # Truncated at start of next chunk
1178 raise ProtocolError("Response ended prematurely") from None
1179
1180 def _handle_chunk(self, amt: int | None) -> bytes:
1181 returned_chunk = None
1182 if amt is None:
1183 chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1184 returned_chunk = chunk
1185 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1186 self.chunk_left = None
1187 elif self.chunk_left is not None and amt < self.chunk_left:
1188 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1189 self.chunk_left = self.chunk_left - amt
1190 returned_chunk = value
1191 elif amt == self.chunk_left:
1192 value = self._fp._safe_read(amt) # type: ignore[union-attr]
1193 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1194 self.chunk_left = None
1195 returned_chunk = value
1196 else: # amt > self.chunk_left
1197 returned_chunk = self._fp._safe_read(self.chunk_left) # type: ignore[union-attr]
1198 self._fp._safe_read(2) # type: ignore[union-attr] # Toss the CRLF at the end of the chunk.
1199 self.chunk_left = None
1200 return returned_chunk # type: ignore[no-any-return]
1201
1202 def read_chunked(
1203 self, amt: int | None = None, decode_content: bool | None = None
1204 ) -> typing.Generator[bytes]:
1205 """
1206 Similar to :meth:`HTTPResponse.read`, but with an additional
1207 parameter: ``decode_content``.
1208
1209 :param amt:
1210 How much of the content to read. If specified, caching is skipped
1211 because it doesn't make sense to cache partial content as the full
1212 response.
1213
1214 :param decode_content:
1215 If True, will attempt to decode the body based on the
1216 'content-encoding' header.
1217 """
1218 self._init_decoder()
1219 # FIXME: Rewrite this method and make it a class with a better structured logic.
1220 if not self.chunked:
1221 raise ResponseNotChunked(
1222 "Response is not chunked. "
1223 "Header 'transfer-encoding: chunked' is missing."
1224 )
1225 if not self.supports_chunked_reads():
1226 raise BodyNotHttplibCompatible(
1227 "Body should be http.client.HTTPResponse like. "
1228 "It should have have an fp attribute which returns raw chunks."
1229 )
1230
1231 with self._error_catcher():
1232 # Don't bother reading the body of a HEAD request.
1233 if self._original_response and is_response_to_head(self._original_response):
1234 self._original_response.close()
1235 return None
1236
1237 # If a response is already read and closed
1238 # then return immediately.
1239 if self._fp.fp is None: # type: ignore[union-attr]
1240 return None
1241
1242 if amt and amt < 0:
1243 # Negative numbers and `None` should be treated the same,
1244 # but httplib handles only `None` correctly.
1245 amt = None
1246
1247 while True:
1248 self._update_chunk_length()
1249 if self.chunk_left == 0:
1250 break
1251 chunk = self._handle_chunk(amt)
1252 decoded = self._decode(
1253 chunk, decode_content=decode_content, flush_decoder=False
1254 )
1255 if decoded:
1256 yield decoded
1257
1258 if decode_content:
1259 # On CPython and PyPy, we should never need to flush the
1260 # decoder. However, on Jython we *might* need to, so
1261 # lets defensively do it anyway.
1262 decoded = self._flush_decoder()
1263 if decoded: # Platform-specific: Jython.
1264 yield decoded
1265
1266 # Chunk content ends with \r\n: discard it.
1267 while self._fp is not None:
1268 line = self._fp.fp.readline()
1269 if not line:
1270 # Some sites may not end with '\r\n'.
1271 break
1272 if line == b"\r\n":
1273 break
1274
1275 # We read everything; close the "file".
1276 if self._original_response:
1277 self._original_response.close()
1278
1279 @property
1280 def url(self) -> str | None:
1281 """
1282 Returns the URL that was the source of this response.
1283 If the request that generated this response redirected, this method
1284 will return the final redirect location.
1285 """
1286 return self._request_url
1287
1288 @url.setter
1289 def url(self, url: str) -> None:
1290 self._request_url = url
1291
1292 def __iter__(self) -> typing.Iterator[bytes]:
1293 buffer: list[bytes] = []
1294 for chunk in self.stream(decode_content=True):
1295 if b"\n" in chunk:
1296 chunks = chunk.split(b"\n")
1297 yield b"".join(buffer) + chunks[0] + b"\n"
1298 for x in chunks[1:-1]:
1299 yield x + b"\n"
1300 if chunks[-1]:
1301 buffer = [chunks[-1]]
1302 else:
1303 buffer = []
1304 else:
1305 buffer.append(chunk)
1306 if buffer:
1307 yield b"".join(buffer)