Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/aiohttp/http_parser.py: 20%
497 statements
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:40 +0000
« prev ^ index » next coverage.py v7.3.2, created at 2023-12-08 06:40 +0000
1import abc
2import asyncio
3import re
4import string
5from contextlib import suppress
6from enum import IntEnum
7from typing import (
8 Any,
9 ClassVar,
10 Final,
11 Generic,
12 List,
13 Literal,
14 NamedTuple,
15 Optional,
16 Pattern,
17 Set,
18 Tuple,
19 Type,
20 TypeVar,
21 Union,
22)
24from multidict import CIMultiDict, CIMultiDictProxy, istr
25from yarl import URL
27from . import hdrs
28from .base_protocol import BaseProtocol
29from .compression_utils import HAS_BROTLI, BrotliDecompressor, ZLibDecompressor
30from .helpers import (
31 DEBUG,
32 NO_EXTENSIONS,
33 BaseTimerContext,
34 method_must_be_empty_body,
35 status_code_must_be_empty_body,
36)
37from .http_exceptions import (
38 BadHttpMessage,
39 BadStatusLine,
40 ContentEncodingError,
41 ContentLengthError,
42 InvalidHeader,
43 InvalidURLError,
44 LineTooLong,
45 TransferEncodingError,
46)
47from .http_writer import HttpVersion, HttpVersion10
48from .log import internal_logger
49from .streams import EMPTY_PAYLOAD, StreamReader
50from .typedefs import RawHeaders
52__all__ = (
53 "HeadersParser",
54 "HttpParser",
55 "HttpRequestParser",
56 "HttpResponseParser",
57 "RawRequestMessage",
58 "RawResponseMessage",
59)
61_SEP = Literal[b"\r\n", b"\n"]
63ASCIISET: Final[Set[str]] = set(string.printable)
65# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview
66# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens
67#
68# method = token
69# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
70# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
71# token = 1*tchar
72METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
73VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)")
74HDRRE: Final[Pattern[bytes]] = re.compile(
75 rb"[\x00-\x1F\x7F-\xFF()<>@,;:\[\]={} \t\"\\]"
76)
77HEXDIGIT = re.compile(rb"[0-9a-fA-F]+")
80class RawRequestMessage(NamedTuple):
81 method: str
82 path: str
83 version: HttpVersion
84 headers: "CIMultiDictProxy[str]"
85 raw_headers: RawHeaders
86 should_close: bool
87 compression: Optional[str]
88 upgrade: bool
89 chunked: bool
90 url: URL
93class RawResponseMessage(NamedTuple):
94 version: HttpVersion
95 code: int
96 reason: str
97 headers: CIMultiDictProxy[str]
98 raw_headers: RawHeaders
99 should_close: bool
100 compression: Optional[str]
101 upgrade: bool
102 chunked: bool
105_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage)
108class ParseState(IntEnum):
110 PARSE_NONE = 0
111 PARSE_LENGTH = 1
112 PARSE_CHUNKED = 2
113 PARSE_UNTIL_EOF = 3
116class ChunkState(IntEnum):
117 PARSE_CHUNKED_SIZE = 0
118 PARSE_CHUNKED_CHUNK = 1
119 PARSE_CHUNKED_CHUNK_EOF = 2
120 PARSE_MAYBE_TRAILERS = 3
121 PARSE_TRAILERS = 4
124class HeadersParser:
125 def __init__(
126 self,
127 max_line_size: int = 8190,
128 max_headers: int = 32768,
129 max_field_size: int = 8190,
130 ) -> None:
131 self.max_line_size = max_line_size
132 self.max_headers = max_headers
133 self.max_field_size = max_field_size
135 def parse_headers(
136 self, lines: List[bytes]
137 ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]:
138 headers: CIMultiDict[str] = CIMultiDict()
139 raw_headers = []
141 lines_idx = 1
142 line = lines[1]
143 line_count = len(lines)
145 while line:
146 # Parse initial header name : value pair.
147 try:
148 bname, bvalue = line.split(b":", 1)
149 except ValueError:
150 raise InvalidHeader(line) from None
152 # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
153 if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}
154 raise InvalidHeader(line)
156 bvalue = bvalue.lstrip(b" \t")
157 if HDRRE.search(bname):
158 raise InvalidHeader(bname)
159 if len(bname) > self.max_field_size:
160 raise LineTooLong(
161 "request header name {}".format(
162 bname.decode("utf8", "backslashreplace")
163 ),
164 str(self.max_field_size),
165 str(len(bname)),
166 )
168 header_length = len(bvalue)
170 # next line
171 lines_idx += 1
172 line = lines[lines_idx]
174 # consume continuation lines
175 continuation = line and line[0] in (32, 9) # (' ', '\t')
177 # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding
178 if continuation:
179 bvalue_lst = [bvalue]
180 while continuation:
181 header_length += len(line)
182 if header_length > self.max_field_size:
183 raise LineTooLong(
184 "request header field {}".format(
185 bname.decode("utf8", "backslashreplace")
186 ),
187 str(self.max_field_size),
188 str(header_length),
189 )
190 bvalue_lst.append(line)
192 # next line
193 lines_idx += 1
194 if lines_idx < line_count:
195 line = lines[lines_idx]
196 if line:
197 continuation = line[0] in (32, 9) # (' ', '\t')
198 else:
199 line = b""
200 break
201 bvalue = b"".join(bvalue_lst)
202 else:
203 if header_length > self.max_field_size:
204 raise LineTooLong(
205 "request header field {}".format(
206 bname.decode("utf8", "backslashreplace")
207 ),
208 str(self.max_field_size),
209 str(header_length),
210 )
212 bvalue = bvalue.strip(b" \t")
213 name = bname.decode("utf-8", "surrogateescape")
214 value = bvalue.decode("utf-8", "surrogateescape")
216 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
217 if "\n" in value or "\r" in value or "\x00" in value:
218 raise InvalidHeader(bvalue)
220 headers.add(name, value)
221 raw_headers.append((bname, bvalue))
223 return (CIMultiDictProxy(headers), tuple(raw_headers))
226class HttpParser(abc.ABC, Generic[_MsgT]):
227 lax: ClassVar[bool] = False
229 def __init__(
230 self,
231 protocol: Optional[BaseProtocol] = None,
232 loop: Optional[asyncio.AbstractEventLoop] = None,
233 limit: int = 2**16,
234 max_line_size: int = 8190,
235 max_headers: int = 32768,
236 max_field_size: int = 8190,
237 timer: Optional[BaseTimerContext] = None,
238 code: Optional[int] = None,
239 method: Optional[str] = None,
240 readall: bool = False,
241 payload_exception: Optional[Type[BaseException]] = None,
242 response_with_body: bool = True,
243 read_until_eof: bool = False,
244 auto_decompress: bool = True,
245 ) -> None:
246 self.protocol = protocol
247 self.loop = loop
248 self.max_line_size = max_line_size
249 self.max_headers = max_headers
250 self.max_field_size = max_field_size
251 self.timer = timer
252 self.code = code
253 self.method = method
254 self.readall = readall
255 self.payload_exception = payload_exception
256 self.response_with_body = response_with_body
257 self.read_until_eof = read_until_eof
259 self._lines: List[bytes] = []
260 self._tail = b""
261 self._upgraded = False
262 self._payload = None
263 self._payload_parser: Optional[HttpPayloadParser] = None
264 self._auto_decompress = auto_decompress
265 self._limit = limit
266 self._headers_parser = HeadersParser(max_line_size, max_headers, max_field_size)
268 @abc.abstractmethod
269 def parse_message(self, lines: List[bytes]) -> _MsgT:
270 pass
272 def feed_eof(self) -> Optional[_MsgT]:
273 if self._payload_parser is not None:
274 self._payload_parser.feed_eof()
275 self._payload_parser = None
276 else:
277 # try to extract partial message
278 if self._tail:
279 self._lines.append(self._tail)
281 if self._lines:
282 if self._lines[-1] != "\r\n":
283 self._lines.append(b"")
284 with suppress(Exception):
285 return self.parse_message(self._lines)
286 return None
288 def feed_data(
289 self,
290 data: bytes,
291 SEP: _SEP = b"\r\n",
292 EMPTY: bytes = b"",
293 CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH,
294 METH_CONNECT: str = hdrs.METH_CONNECT,
295 SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1,
296 ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]:
298 messages = []
300 if self._tail:
301 data, self._tail = self._tail + data, b""
303 data_len = len(data)
304 start_pos = 0
305 loop = self.loop
307 while start_pos < data_len:
309 # read HTTP message (request/response line + headers), \r\n\r\n
310 # and split by lines
311 if self._payload_parser is None and not self._upgraded:
312 pos = data.find(SEP, start_pos)
313 # consume \r\n
314 if pos == start_pos and not self._lines:
315 start_pos = pos + len(SEP)
316 continue
318 if pos >= start_pos:
319 # line found
320 line = data[start_pos:pos]
321 if SEP == b"\n": # For lax response parsing
322 line = line.rstrip(b"\r")
323 self._lines.append(line)
324 start_pos = pos + len(SEP)
326 # \r\n\r\n found
327 if self._lines[-1] == EMPTY:
328 try:
329 msg: _MsgT = self.parse_message(self._lines)
330 finally:
331 self._lines.clear()
333 def get_content_length() -> Optional[int]:
334 # payload length
335 length_hdr = msg.headers.get(CONTENT_LENGTH)
336 if length_hdr is None:
337 return None
339 # Shouldn't allow +/- or other number formats.
340 # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
341 if not length_hdr.strip(" \t").isdecimal():
342 raise InvalidHeader(CONTENT_LENGTH)
344 return int(length_hdr)
346 length = get_content_length()
347 # do not support old websocket spec
348 if SEC_WEBSOCKET_KEY1 in msg.headers:
349 raise InvalidHeader(SEC_WEBSOCKET_KEY1)
351 self._upgraded = msg.upgrade
353 method = getattr(msg, "method", self.method)
354 # code is only present on responses
355 code = getattr(msg, "code", 0)
357 assert self.protocol is not None
358 # calculate payload
359 empty_body = status_code_must_be_empty_body(code) or bool(
360 method and method_must_be_empty_body(method)
361 )
362 if not empty_body and (
363 (length is not None and length > 0)
364 or msg.chunked
365 and not msg.upgrade
366 ):
367 payload = StreamReader(
368 self.protocol,
369 timer=self.timer,
370 loop=loop,
371 limit=self._limit,
372 )
373 payload_parser = HttpPayloadParser(
374 payload,
375 length=length,
376 chunked=msg.chunked,
377 method=method,
378 compression=msg.compression,
379 code=self.code,
380 readall=self.readall,
381 response_with_body=self.response_with_body,
382 auto_decompress=self._auto_decompress,
383 lax=self.lax,
384 )
385 if not payload_parser.done:
386 self._payload_parser = payload_parser
387 elif method == METH_CONNECT:
388 assert isinstance(msg, RawRequestMessage)
389 payload = StreamReader(
390 self.protocol,
391 timer=self.timer,
392 loop=loop,
393 limit=self._limit,
394 )
395 self._upgraded = True
396 self._payload_parser = HttpPayloadParser(
397 payload,
398 method=msg.method,
399 compression=msg.compression,
400 readall=True,
401 auto_decompress=self._auto_decompress,
402 lax=self.lax,
403 )
404 elif not empty_body and length is None and self.read_until_eof:
405 payload = StreamReader(
406 self.protocol,
407 timer=self.timer,
408 loop=loop,
409 limit=self._limit,
410 )
411 payload_parser = HttpPayloadParser(
412 payload,
413 length=length,
414 chunked=msg.chunked,
415 method=method,
416 compression=msg.compression,
417 code=self.code,
418 readall=True,
419 response_with_body=self.response_with_body,
420 auto_decompress=self._auto_decompress,
421 lax=self.lax,
422 )
423 if not payload_parser.done:
424 self._payload_parser = payload_parser
425 else:
426 payload = EMPTY_PAYLOAD
428 messages.append((msg, payload))
429 else:
430 self._tail = data[start_pos:]
431 data = EMPTY
432 break
434 # no parser, just store
435 elif self._payload_parser is None and self._upgraded:
436 assert not self._lines
437 break
439 # feed payload
440 elif data and start_pos < data_len:
441 assert not self._lines
442 assert self._payload_parser is not None
443 try:
444 eof, data = self._payload_parser.feed_data(data[start_pos:], SEP)
445 except BaseException as exc:
446 if self.payload_exception is not None:
447 self._payload_parser.payload.set_exception(
448 self.payload_exception(str(exc))
449 )
450 else:
451 self._payload_parser.payload.set_exception(exc)
453 eof = True
454 data = b""
456 if eof:
457 start_pos = 0
458 data_len = len(data)
459 self._payload_parser = None
460 continue
461 else:
462 break
464 if data and start_pos < data_len:
465 data = data[start_pos:]
466 else:
467 data = EMPTY
469 return messages, self._upgraded, data
471 def parse_headers(
472 self, lines: List[bytes]
473 ) -> Tuple[
474 "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool
475 ]:
476 """Parses RFC 5322 headers from a stream.
478 Line continuations are supported. Returns list of header name
479 and value pairs. Header name is in upper case.
480 """
481 headers, raw_headers = self._headers_parser.parse_headers(lines)
482 close_conn = None
483 encoding = None
484 upgrade = False
485 chunked = False
487 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
488 # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf
489 singletons = (
490 hdrs.CONTENT_LENGTH,
491 hdrs.CONTENT_LOCATION,
492 hdrs.CONTENT_RANGE,
493 hdrs.CONTENT_TYPE,
494 hdrs.ETAG,
495 hdrs.HOST,
496 hdrs.MAX_FORWARDS,
497 hdrs.SERVER,
498 hdrs.TRANSFER_ENCODING,
499 hdrs.USER_AGENT,
500 )
501 bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)
502 if bad_hdr is not None:
503 raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")
505 # keep-alive
506 conn = headers.get(hdrs.CONNECTION)
507 if conn:
508 v = conn.lower()
509 if v == "close":
510 close_conn = True
511 elif v == "keep-alive":
512 close_conn = False
513 # https://www.rfc-editor.org/rfc/rfc9110.html#name-101-switching-protocols
514 elif v == "upgrade" and headers.get(hdrs.UPGRADE):
515 upgrade = True
517 # encoding
518 enc = headers.get(hdrs.CONTENT_ENCODING)
519 if enc:
520 enc = enc.lower()
521 if enc in ("gzip", "deflate", "br"):
522 encoding = enc
524 # chunking
525 te = headers.get(hdrs.TRANSFER_ENCODING)
526 if te is not None:
527 if "chunked" == te.lower():
528 chunked = True
529 else:
530 raise BadHttpMessage("Request has invalid `Transfer-Encoding`")
532 if hdrs.CONTENT_LENGTH in headers:
533 raise BadHttpMessage(
534 "Transfer-Encoding can't be present with Content-Length",
535 )
537 return (headers, raw_headers, close_conn, encoding, upgrade, chunked)
539 def set_upgraded(self, val: bool) -> None:
540 """Set connection upgraded (to websocket) mode.
542 :param bool val: new state.
543 """
544 self._upgraded = val
547class HttpRequestParser(HttpParser[RawRequestMessage]):
548 """Read request status line.
550 Exception .http_exceptions.BadStatusLine
551 could be raised in case of any errors in status line.
552 Returns RawRequestMessage.
553 """
555 def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
556 # request line
557 line = lines[0].decode("utf-8", "surrogateescape")
558 try:
559 method, path, version = line.split(" ", maxsplit=2)
560 except ValueError:
561 raise BadStatusLine(line) from None
563 if len(path) > self.max_line_size:
564 raise LineTooLong(
565 "Status line is too long", str(self.max_line_size), str(len(path))
566 )
568 # method
569 if not METHRE.fullmatch(method):
570 raise BadStatusLine(method)
572 # version
573 match = VERSRE.fullmatch(version)
574 if match is None:
575 raise BadStatusLine(line)
576 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
578 if method == "CONNECT":
579 # authority-form,
580 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3
581 url = URL.build(authority=path, encoded=True)
582 elif path.startswith("/"):
583 # origin-form,
584 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1
585 path_part, _hash_separator, url_fragment = path.partition("#")
586 path_part, _question_mark_separator, qs_part = path_part.partition("?")
588 # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based
589 # NOTE: parser does, otherwise it results into the same
590 # NOTE: HTTP Request-Line input producing different
591 # NOTE: `yarl.URL()` objects
592 url = URL.build(
593 path=path_part,
594 query_string=qs_part,
595 fragment=url_fragment,
596 encoded=True,
597 )
598 elif path == "*" and method == "OPTIONS":
599 # asterisk-form,
600 url = URL(path, encoded=True)
601 else:
602 # absolute-form for proxy maybe,
603 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2
604 url = URL(path, encoded=True)
605 if url.scheme == "":
606 # not absolute-form
607 raise InvalidURLError(
608 path.encode(errors="surrogateescape").decode("latin1")
609 )
611 # read headers
612 (
613 headers,
614 raw_headers,
615 close,
616 compression,
617 upgrade,
618 chunked,
619 ) = self.parse_headers(lines)
621 if close is None: # then the headers weren't set in the request
622 if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close
623 close = True
624 else: # HTTP 1.1 must ask to close.
625 close = False
627 return RawRequestMessage(
628 method,
629 path,
630 version_o,
631 headers,
632 raw_headers,
633 close,
634 compression,
635 upgrade,
636 chunked,
637 url,
638 )
641class HttpResponseParser(HttpParser[RawResponseMessage]):
642 """Read response status line and headers.
644 BadStatusLine could be raised in case of any errors in status line.
645 Returns RawResponseMessage.
646 """
648 # Lax mode should only be enabled on response parser.
649 lax = not DEBUG
651 def feed_data(
652 self,
653 data: bytes,
654 SEP: Optional[_SEP] = None,
655 *args: Any,
656 **kwargs: Any,
657 ) -> Tuple[List[Tuple[RawResponseMessage, StreamReader]], bool, bytes]:
658 if SEP is None:
659 SEP = b"\r\n" if DEBUG else b"\n"
660 return super().feed_data(data, SEP, *args, **kwargs)
662 def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
663 line = lines[0].decode("utf-8", "surrogateescape")
664 try:
665 version, status = line.split(maxsplit=1)
666 except ValueError:
667 raise BadStatusLine(line) from None
669 try:
670 status, reason = status.split(maxsplit=1)
671 except ValueError:
672 status = status.strip()
673 reason = ""
675 if len(reason) > self.max_line_size:
676 raise LineTooLong(
677 "Status line is too long", str(self.max_line_size), str(len(reason))
678 )
680 # version
681 match = VERSRE.fullmatch(version)
682 if match is None:
683 raise BadStatusLine(line)
684 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
686 # The status code is a three-digit number
687 if len(status) != 3 or not status.isdecimal():
688 raise BadStatusLine(line)
689 status_i = int(status)
691 # read headers
692 (
693 headers,
694 raw_headers,
695 close,
696 compression,
697 upgrade,
698 chunked,
699 ) = self.parse_headers(lines)
701 if close is None:
702 close = version_o <= HttpVersion10
704 return RawResponseMessage(
705 version_o,
706 status_i,
707 reason.strip(),
708 headers,
709 raw_headers,
710 close,
711 compression,
712 upgrade,
713 chunked,
714 )
717class HttpPayloadParser:
718 def __init__(
719 self,
720 payload: StreamReader,
721 length: Optional[int] = None,
722 chunked: bool = False,
723 compression: Optional[str] = None,
724 code: Optional[int] = None,
725 method: Optional[str] = None,
726 readall: bool = False,
727 response_with_body: bool = True,
728 auto_decompress: bool = True,
729 lax: bool = False,
730 ) -> None:
731 self._length = 0
732 self._type = ParseState.PARSE_NONE
733 self._chunk = ChunkState.PARSE_CHUNKED_SIZE
734 self._chunk_size = 0
735 self._chunk_tail = b""
736 self._auto_decompress = auto_decompress
737 self._lax = lax
738 self.done = False
740 # payload decompression wrapper
741 if response_with_body and compression and self._auto_decompress:
742 real_payload: Union[StreamReader, DeflateBuffer] = DeflateBuffer(
743 payload, compression
744 )
745 else:
746 real_payload = payload
748 # payload parser
749 if not response_with_body:
750 # don't parse payload if it's not expected to be received
751 self._type = ParseState.PARSE_NONE
752 real_payload.feed_eof()
753 self.done = True
755 elif chunked:
756 self._type = ParseState.PARSE_CHUNKED
757 elif length is not None:
758 self._type = ParseState.PARSE_LENGTH
759 self._length = length
760 if self._length == 0:
761 real_payload.feed_eof()
762 self.done = True
763 else:
764 if readall and code != 204:
765 self._type = ParseState.PARSE_UNTIL_EOF
766 elif method in ("PUT", "POST"):
767 internal_logger.warning( # pragma: no cover
768 "Content-Length or Transfer-Encoding header is required"
769 )
770 self._type = ParseState.PARSE_NONE
771 real_payload.feed_eof()
772 self.done = True
774 self.payload = real_payload
776 def feed_eof(self) -> None:
777 if self._type == ParseState.PARSE_UNTIL_EOF:
778 self.payload.feed_eof()
779 elif self._type == ParseState.PARSE_LENGTH:
780 raise ContentLengthError(
781 "Not enough data for satisfy content length header."
782 )
783 elif self._type == ParseState.PARSE_CHUNKED:
784 raise TransferEncodingError(
785 "Not enough data for satisfy transfer length header."
786 )
788 def feed_data(
789 self, chunk: bytes, SEP: _SEP = b"\r\n", CHUNK_EXT: bytes = b";"
790 ) -> Tuple[bool, bytes]:
791 # Read specified amount of bytes
792 if self._type == ParseState.PARSE_LENGTH:
793 required = self._length
794 chunk_len = len(chunk)
796 if required >= chunk_len:
797 self._length = required - chunk_len
798 self.payload.feed_data(chunk, chunk_len)
799 if self._length == 0:
800 self.payload.feed_eof()
801 return True, b""
802 else:
803 self._length = 0
804 self.payload.feed_data(chunk[:required], required)
805 self.payload.feed_eof()
806 return True, chunk[required:]
808 # Chunked transfer encoding parser
809 elif self._type == ParseState.PARSE_CHUNKED:
810 if self._chunk_tail:
811 chunk = self._chunk_tail + chunk
812 self._chunk_tail = b""
814 while chunk:
816 # read next chunk size
817 if self._chunk == ChunkState.PARSE_CHUNKED_SIZE:
818 pos = chunk.find(SEP)
819 if pos >= 0:
820 i = chunk.find(CHUNK_EXT, 0, pos)
821 if i >= 0:
822 size_b = chunk[:i] # strip chunk-extensions
823 else:
824 size_b = chunk[:pos]
826 if self._lax: # Allow whitespace in lax mode.
827 size_b = size_b.strip()
829 if not re.fullmatch(HEXDIGIT, size_b):
830 exc = TransferEncodingError(
831 chunk[:pos].decode("ascii", "surrogateescape")
832 )
833 self.payload.set_exception(exc)
834 raise exc
835 size = int(bytes(size_b), 16)
837 chunk = chunk[pos + len(SEP) :]
838 if size == 0: # eof marker
839 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS
840 if self._lax and chunk.startswith(b"\r"):
841 chunk = chunk[1:]
842 else:
843 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK
844 self._chunk_size = size
845 self.payload.begin_http_chunk_receiving()
846 else:
847 self._chunk_tail = chunk
848 return False, b""
850 # read chunk and feed buffer
851 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK:
852 required = self._chunk_size
853 chunk_len = len(chunk)
855 if required > chunk_len:
856 self._chunk_size = required - chunk_len
857 self.payload.feed_data(chunk, chunk_len)
858 return False, b""
859 else:
860 self._chunk_size = 0
861 self.payload.feed_data(chunk[:required], required)
862 chunk = chunk[required:]
863 if self._lax and chunk.startswith(b"\r"):
864 chunk = chunk[1:]
865 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF
866 self.payload.end_http_chunk_receiving()
868 # toss the CRLF at the end of the chunk
869 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF:
870 if chunk[: len(SEP)] == SEP:
871 chunk = chunk[len(SEP) :]
872 self._chunk = ChunkState.PARSE_CHUNKED_SIZE
873 else:
874 self._chunk_tail = chunk
875 return False, b""
877 # if stream does not contain trailer, after 0\r\n
878 # we should get another \r\n otherwise
879 # trailers needs to be skipped until \r\n\r\n
880 if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS:
881 head = chunk[: len(SEP)]
882 if head == SEP:
883 # end of stream
884 self.payload.feed_eof()
885 return True, chunk[len(SEP) :]
886 # Both CR and LF, or only LF may not be received yet. It is
887 # expected that CRLF or LF will be shown at the very first
888 # byte next time, otherwise trailers should come. The last
889 # CRLF which marks the end of response might not be
890 # contained in the same TCP segment which delivered the
891 # size indicator.
892 if not head:
893 return False, b""
894 if head == SEP[:1]:
895 self._chunk_tail = head
896 return False, b""
897 self._chunk = ChunkState.PARSE_TRAILERS
899 # read and discard trailer up to the CRLF terminator
900 if self._chunk == ChunkState.PARSE_TRAILERS:
901 pos = chunk.find(SEP)
902 if pos >= 0:
903 chunk = chunk[pos + len(SEP) :]
904 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS
905 else:
906 self._chunk_tail = chunk
907 return False, b""
909 # Read all bytes until eof
910 elif self._type == ParseState.PARSE_UNTIL_EOF:
911 self.payload.feed_data(chunk, len(chunk))
913 return False, b""
916class DeflateBuffer:
917 """DeflateStream decompress stream and feed data into specified stream."""
919 decompressor: Any
921 def __init__(self, out: StreamReader, encoding: Optional[str]) -> None:
922 self.out = out
923 self.size = 0
924 self.encoding = encoding
925 self._started_decoding = False
927 self.decompressor: Union[BrotliDecompressor, ZLibDecompressor]
928 if encoding == "br":
929 if not HAS_BROTLI: # pragma: no cover
930 raise ContentEncodingError(
931 "Can not decode content-encoding: brotli (br). "
932 "Please install `Brotli`"
933 )
934 self.decompressor = BrotliDecompressor()
935 else:
936 self.decompressor = ZLibDecompressor(encoding=encoding)
938 def set_exception(self, exc: BaseException) -> None:
939 self.out.set_exception(exc)
941 def feed_data(self, chunk: bytes, size: int) -> None:
942 if not size:
943 return
945 self.size += size
947 # RFC1950
948 # bits 0..3 = CM = 0b1000 = 8 = "deflate"
949 # bits 4..7 = CINFO = 1..7 = windows size.
950 if (
951 not self._started_decoding
952 and self.encoding == "deflate"
953 and chunk[0] & 0xF != 8
954 ):
955 # Change the decoder to decompress incorrectly compressed data
956 # Actually we should issue a warning about non-RFC-compliant data.
957 self.decompressor = ZLibDecompressor(
958 encoding=self.encoding, suppress_deflate_header=True
959 )
961 try:
962 chunk = self.decompressor.decompress_sync(chunk)
963 except Exception:
964 raise ContentEncodingError(
965 "Can not decode content-encoding: %s" % self.encoding
966 )
968 self._started_decoding = True
970 if chunk:
971 self.out.feed_data(chunk, len(chunk))
973 def feed_eof(self) -> None:
974 chunk = self.decompressor.flush()
976 if chunk or self.size > 0:
977 self.out.feed_data(chunk, len(chunk))
978 if self.encoding == "deflate" and not self.decompressor.eof:
979 raise ContentEncodingError("deflate")
981 self.out.feed_eof()
983 def begin_http_chunk_receiving(self) -> None:
984 self.out.begin_http_chunk_receiving()
986 def end_http_chunk_receiving(self) -> None:
987 self.out.end_http_chunk_receiving()
990HttpRequestParserPy = HttpRequestParser
991HttpResponseParserPy = HttpResponseParser
992RawRequestMessagePy = RawRequestMessage
993RawResponseMessagePy = RawResponseMessage
995try:
996 if not NO_EXTENSIONS:
997 from ._http_parser import ( # type: ignore[import-not-found,no-redef]
998 HttpRequestParser,
999 HttpResponseParser,
1000 RawRequestMessage,
1001 RawResponseMessage,
1002 )
1004 HttpRequestParserC = HttpRequestParser
1005 HttpResponseParserC = HttpResponseParser
1006 RawRequestMessageC = RawRequestMessage
1007 RawResponseMessageC = RawResponseMessage
1008except ImportError: # pragma: no cover
1009 pass