Coverage for /pythoncovmergedfiles/medio/medio/src/aiohttp/aiohttp/http_parser.py: 59%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import abc
2import asyncio
3import re
4import string
5from contextlib import suppress
6from enum import IntEnum
7from typing import (
8 Any,
9 ClassVar,
10 Final,
11 Generic,
12 List,
13 Literal,
14 NamedTuple,
15 Optional,
16 Pattern,
17 Set,
18 Tuple,
19 Type,
20 TypeVar,
21 Union,
22)
24from multidict import CIMultiDict, CIMultiDictProxy, istr
25from yarl import URL
27from . import hdrs
28from .base_protocol import BaseProtocol
29from .compression_utils import HAS_BROTLI, BrotliDecompressor, ZLibDecompressor
30from .helpers import (
31 _EXC_SENTINEL,
32 DEBUG,
33 EMPTY_BODY_METHODS,
34 EMPTY_BODY_STATUS_CODES,
35 NO_EXTENSIONS,
36 BaseTimerContext,
37 set_exception,
38)
39from .http_exceptions import (
40 BadHttpMessage,
41 BadHttpMethod,
42 BadStatusLine,
43 ContentEncodingError,
44 ContentLengthError,
45 InvalidHeader,
46 InvalidURLError,
47 LineTooLong,
48 TransferEncodingError,
49)
50from .http_writer import HttpVersion, HttpVersion10
51from .streams import EMPTY_PAYLOAD, StreamReader
52from .typedefs import RawHeaders
54__all__ = (
55 "HeadersParser",
56 "HttpParser",
57 "HttpRequestParser",
58 "HttpResponseParser",
59 "RawRequestMessage",
60 "RawResponseMessage",
61)
63_SEP = Literal[b"\r\n", b"\n"]
65ASCIISET: Final[Set[str]] = set(string.printable)
67# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview
68# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens
69#
70# method = token
71# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
72# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
73# token = 1*tchar
74_TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`|~")
75TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+")
76VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII)
77DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII)
78HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+")
81class RawRequestMessage(NamedTuple):
82 method: str
83 path: str
84 version: HttpVersion
85 headers: CIMultiDictProxy[str]
86 raw_headers: RawHeaders
87 should_close: bool
88 compression: Optional[str]
89 upgrade: bool
90 chunked: bool
91 url: URL
94class RawResponseMessage(NamedTuple):
95 version: HttpVersion
96 code: int
97 reason: str
98 headers: CIMultiDictProxy[str]
99 raw_headers: RawHeaders
100 should_close: bool
101 compression: Optional[str]
102 upgrade: bool
103 chunked: bool
106_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage)
109class ParseState(IntEnum):
110 PARSE_NONE = 0
111 PARSE_LENGTH = 1
112 PARSE_CHUNKED = 2
113 PARSE_UNTIL_EOF = 3
116class ChunkState(IntEnum):
117 PARSE_CHUNKED_SIZE = 0
118 PARSE_CHUNKED_CHUNK = 1
119 PARSE_CHUNKED_CHUNK_EOF = 2
120 PARSE_MAYBE_TRAILERS = 3
121 PARSE_TRAILERS = 4
124class HeadersParser:
125 def __init__(
126 self, max_line_size: int = 8190, max_field_size: int = 8190, lax: bool = False
127 ) -> None:
128 self.max_line_size = max_line_size
129 self.max_field_size = max_field_size
130 self._lax = lax
132 def parse_headers(
133 self, lines: List[bytes]
134 ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]:
135 headers: CIMultiDict[str] = CIMultiDict()
136 # note: "raw" does not mean inclusion of OWS before/after the field value
137 raw_headers = []
139 lines_idx = 1
140 line = lines[1]
141 line_count = len(lines)
143 while line:
144 # Parse initial header name : value pair.
145 try:
146 bname, bvalue = line.split(b":", 1)
147 except ValueError:
148 raise InvalidHeader(line) from None
150 if len(bname) == 0:
151 raise InvalidHeader(bname)
153 # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
154 if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}
155 raise InvalidHeader(line)
157 bvalue = bvalue.lstrip(b" \t")
158 if len(bname) > self.max_field_size:
159 raise LineTooLong(
160 "request header name {}".format(
161 bname.decode("utf8", "backslashreplace")
162 ),
163 str(self.max_field_size),
164 str(len(bname)),
165 )
166 name = bname.decode("utf-8", "surrogateescape")
167 if not TOKENRE.fullmatch(name):
168 raise InvalidHeader(bname)
170 header_length = len(bvalue)
172 # next line
173 lines_idx += 1
174 line = lines[lines_idx]
176 # consume continuation lines
177 continuation = self._lax and line and line[0] in (32, 9) # (' ', '\t')
179 # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding
180 if continuation:
181 bvalue_lst = [bvalue]
182 while continuation:
183 header_length += len(line)
184 if header_length > self.max_field_size:
185 raise LineTooLong(
186 "request header field {}".format(
187 bname.decode("utf8", "backslashreplace")
188 ),
189 str(self.max_field_size),
190 str(header_length),
191 )
192 bvalue_lst.append(line)
194 # next line
195 lines_idx += 1
196 if lines_idx < line_count:
197 line = lines[lines_idx]
198 if line:
199 continuation = line[0] in (32, 9) # (' ', '\t')
200 else:
201 line = b""
202 break
203 bvalue = b"".join(bvalue_lst)
204 else:
205 if header_length > self.max_field_size:
206 raise LineTooLong(
207 "request header field {}".format(
208 bname.decode("utf8", "backslashreplace")
209 ),
210 str(self.max_field_size),
211 str(header_length),
212 )
214 bvalue = bvalue.strip(b" \t")
215 value = bvalue.decode("utf-8", "surrogateescape")
217 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
218 if "\n" in value or "\r" in value or "\x00" in value:
219 raise InvalidHeader(bvalue)
221 headers.add(name, value)
222 raw_headers.append((bname, bvalue))
224 return (CIMultiDictProxy(headers), tuple(raw_headers))
227def _is_supported_upgrade(headers: CIMultiDictProxy[str]) -> bool:
228 """Check if the upgrade header is supported."""
229 return headers.get(hdrs.UPGRADE, "").lower() in {"tcp", "websocket"}
232class HttpParser(abc.ABC, Generic[_MsgT]):
233 lax: ClassVar[bool] = False
235 def __init__(
236 self,
237 protocol: BaseProtocol,
238 loop: asyncio.AbstractEventLoop,
239 limit: int,
240 max_line_size: int = 8190,
241 max_field_size: int = 8190,
242 timer: Optional[BaseTimerContext] = None,
243 code: Optional[int] = None,
244 method: Optional[str] = None,
245 payload_exception: Optional[Type[BaseException]] = None,
246 response_with_body: bool = True,
247 read_until_eof: bool = False,
248 auto_decompress: bool = True,
249 ) -> None:
250 self.protocol = protocol
251 self.loop = loop
252 self.max_line_size = max_line_size
253 self.max_field_size = max_field_size
254 self.timer = timer
255 self.code = code
256 self.method = method
257 self.payload_exception = payload_exception
258 self.response_with_body = response_with_body
259 self.read_until_eof = read_until_eof
261 self._lines: List[bytes] = []
262 self._tail = b""
263 self._upgraded = False
264 self._payload = None
265 self._payload_parser: Optional[HttpPayloadParser] = None
266 self._auto_decompress = auto_decompress
267 self._limit = limit
268 self._headers_parser = HeadersParser(max_line_size, max_field_size, self.lax)
270 @abc.abstractmethod
271 def parse_message(self, lines: List[bytes]) -> _MsgT: ...
273 @abc.abstractmethod
274 def _is_chunked_te(self, te: str) -> bool: ...
276 def feed_eof(self) -> Optional[_MsgT]:
277 if self._payload_parser is not None:
278 self._payload_parser.feed_eof()
279 self._payload_parser = None
280 else:
281 # try to extract partial message
282 if self._tail:
283 self._lines.append(self._tail)
285 if self._lines:
286 if self._lines[-1] != "\r\n":
287 self._lines.append(b"")
288 with suppress(Exception):
289 return self.parse_message(self._lines)
290 return None
292 def feed_data(
293 self,
294 data: bytes,
295 SEP: _SEP = b"\r\n",
296 EMPTY: bytes = b"",
297 CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH,
298 METH_CONNECT: str = hdrs.METH_CONNECT,
299 SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1,
300 ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]:
301 messages = []
303 if self._tail:
304 data, self._tail = self._tail + data, b""
306 data_len = len(data)
307 start_pos = 0
308 loop = self.loop
310 should_close = False
311 while start_pos < data_len:
312 # read HTTP message (request/response line + headers), \r\n\r\n
313 # and split by lines
314 if self._payload_parser is None and not self._upgraded:
315 pos = data.find(SEP, start_pos)
316 # consume \r\n
317 if pos == start_pos and not self._lines:
318 start_pos = pos + len(SEP)
319 continue
321 if pos >= start_pos:
322 if should_close:
323 raise BadHttpMessage("Data after `Connection: close`")
325 # line found
326 line = data[start_pos:pos]
327 if SEP == b"\n": # For lax response parsing
328 line = line.rstrip(b"\r")
329 self._lines.append(line)
330 start_pos = pos + len(SEP)
332 # \r\n\r\n found
333 if self._lines[-1] == EMPTY:
334 try:
335 msg: _MsgT = self.parse_message(self._lines)
336 finally:
337 self._lines.clear()
339 def get_content_length() -> Optional[int]:
340 # payload length
341 length_hdr = msg.headers.get(CONTENT_LENGTH)
342 if length_hdr is None:
343 return None
345 # Shouldn't allow +/- or other number formats.
346 # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
347 # msg.headers is already stripped of leading/trailing wsp
348 if not DIGITS.fullmatch(length_hdr):
349 raise InvalidHeader(CONTENT_LENGTH)
351 return int(length_hdr)
353 length = get_content_length()
354 # do not support old websocket spec
355 if SEC_WEBSOCKET_KEY1 in msg.headers:
356 raise InvalidHeader(SEC_WEBSOCKET_KEY1)
358 self._upgraded = msg.upgrade and _is_supported_upgrade(
359 msg.headers
360 )
362 method = getattr(msg, "method", self.method)
363 # code is only present on responses
364 code = getattr(msg, "code", 0)
366 assert self.protocol is not None
367 # calculate payload
368 empty_body = code in EMPTY_BODY_STATUS_CODES or bool(
369 method and method in EMPTY_BODY_METHODS
370 )
371 if not empty_body and (
372 ((length is not None and length > 0) or msg.chunked)
373 and not self._upgraded
374 ):
375 payload = StreamReader(
376 self.protocol,
377 timer=self.timer,
378 loop=loop,
379 limit=self._limit,
380 )
381 payload_parser = HttpPayloadParser(
382 payload,
383 length=length,
384 chunked=msg.chunked,
385 method=method,
386 compression=msg.compression,
387 code=self.code,
388 response_with_body=self.response_with_body,
389 auto_decompress=self._auto_decompress,
390 lax=self.lax,
391 )
392 if not payload_parser.done:
393 self._payload_parser = payload_parser
394 elif method == METH_CONNECT:
395 assert isinstance(msg, RawRequestMessage)
396 payload = StreamReader(
397 self.protocol,
398 timer=self.timer,
399 loop=loop,
400 limit=self._limit,
401 )
402 self._upgraded = True
403 self._payload_parser = HttpPayloadParser(
404 payload,
405 method=msg.method,
406 compression=msg.compression,
407 auto_decompress=self._auto_decompress,
408 lax=self.lax,
409 )
410 elif not empty_body and length is None and self.read_until_eof:
411 payload = StreamReader(
412 self.protocol,
413 timer=self.timer,
414 loop=loop,
415 limit=self._limit,
416 )
417 payload_parser = HttpPayloadParser(
418 payload,
419 length=length,
420 chunked=msg.chunked,
421 method=method,
422 compression=msg.compression,
423 code=self.code,
424 response_with_body=self.response_with_body,
425 auto_decompress=self._auto_decompress,
426 lax=self.lax,
427 )
428 if not payload_parser.done:
429 self._payload_parser = payload_parser
430 else:
431 payload = EMPTY_PAYLOAD
433 messages.append((msg, payload))
434 should_close = msg.should_close
435 else:
436 self._tail = data[start_pos:]
437 data = EMPTY
438 break
440 # no parser, just store
441 elif self._payload_parser is None and self._upgraded:
442 assert not self._lines
443 break
445 # feed payload
446 elif data and start_pos < data_len:
447 assert not self._lines
448 assert self._payload_parser is not None
449 try:
450 eof, data = self._payload_parser.feed_data(data[start_pos:], SEP)
451 except BaseException as underlying_exc:
452 reraised_exc = underlying_exc
453 if self.payload_exception is not None:
454 reraised_exc = self.payload_exception(str(underlying_exc))
456 set_exception(
457 self._payload_parser.payload,
458 reraised_exc,
459 underlying_exc,
460 )
462 eof = True
463 data = b""
465 if eof:
466 start_pos = 0
467 data_len = len(data)
468 self._payload_parser = None
469 continue
470 else:
471 break
473 if data and start_pos < data_len:
474 data = data[start_pos:]
475 else:
476 data = EMPTY
478 return messages, self._upgraded, data
480 def parse_headers(
481 self, lines: List[bytes]
482 ) -> Tuple[
483 "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool
484 ]:
485 """Parses RFC 5322 headers from a stream.
487 Line continuations are supported. Returns list of header name
488 and value pairs. Header name is in upper case.
489 """
490 headers, raw_headers = self._headers_parser.parse_headers(lines)
491 close_conn = None
492 encoding = None
493 upgrade = False
494 chunked = False
496 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
497 # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf
498 singletons = (
499 hdrs.CONTENT_LENGTH,
500 hdrs.CONTENT_LOCATION,
501 hdrs.CONTENT_RANGE,
502 hdrs.CONTENT_TYPE,
503 hdrs.ETAG,
504 hdrs.HOST,
505 hdrs.MAX_FORWARDS,
506 hdrs.SERVER,
507 hdrs.TRANSFER_ENCODING,
508 hdrs.USER_AGENT,
509 )
510 bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)
511 if bad_hdr is not None:
512 raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")
514 # keep-alive
515 conn = headers.get(hdrs.CONNECTION)
516 if conn:
517 v = conn.lower()
518 if v == "close":
519 close_conn = True
520 elif v == "keep-alive":
521 close_conn = False
522 # https://www.rfc-editor.org/rfc/rfc9110.html#name-101-switching-protocols
523 elif v == "upgrade" and headers.get(hdrs.UPGRADE):
524 upgrade = True
526 # encoding
527 enc = headers.get(hdrs.CONTENT_ENCODING)
528 if enc:
529 enc = enc.lower()
530 if enc in ("gzip", "deflate", "br"):
531 encoding = enc
533 # chunking
534 te = headers.get(hdrs.TRANSFER_ENCODING)
535 if te is not None:
536 if self._is_chunked_te(te):
537 chunked = True
539 if hdrs.CONTENT_LENGTH in headers:
540 raise BadHttpMessage(
541 "Transfer-Encoding can't be present with Content-Length",
542 )
544 return (headers, raw_headers, close_conn, encoding, upgrade, chunked)
546 def set_upgraded(self, val: bool) -> None:
547 """Set connection upgraded (to websocket) mode.
549 :param bool val: new state.
550 """
551 self._upgraded = val
554class HttpRequestParser(HttpParser[RawRequestMessage]):
555 """Read request status line.
557 Exception .http_exceptions.BadStatusLine
558 could be raised in case of any errors in status line.
559 Returns RawRequestMessage.
560 """
562 def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
563 # request line
564 line = lines[0].decode("utf-8", "surrogateescape")
565 try:
566 method, path, version = line.split(" ", maxsplit=2)
567 except ValueError:
568 raise BadHttpMethod(line) from None
570 if len(path) > self.max_line_size:
571 raise LineTooLong(
572 "Status line is too long", str(self.max_line_size), str(len(path))
573 )
575 # method
576 if not TOKENRE.fullmatch(method):
577 raise BadHttpMethod(method)
579 # version
580 match = VERSRE.fullmatch(version)
581 if match is None:
582 raise BadStatusLine(line)
583 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
585 if method == "CONNECT":
586 # authority-form,
587 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3
588 url = URL.build(authority=path, encoded=True)
589 elif path.startswith("/"):
590 # origin-form,
591 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1
592 path_part, _hash_separator, url_fragment = path.partition("#")
593 path_part, _question_mark_separator, qs_part = path_part.partition("?")
595 # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based
596 # NOTE: parser does, otherwise it results into the same
597 # NOTE: HTTP Request-Line input producing different
598 # NOTE: `yarl.URL()` objects
599 url = URL.build(
600 path=path_part,
601 query_string=qs_part,
602 fragment=url_fragment,
603 encoded=True,
604 )
605 elif path == "*" and method == "OPTIONS":
606 # asterisk-form,
607 url = URL(path, encoded=True)
608 else:
609 # absolute-form for proxy maybe,
610 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2
611 url = URL(path, encoded=True)
612 if url.scheme == "":
613 # not absolute-form
614 raise InvalidURLError(
615 path.encode(errors="surrogateescape").decode("latin1")
616 )
618 # read headers
619 (
620 headers,
621 raw_headers,
622 close,
623 compression,
624 upgrade,
625 chunked,
626 ) = self.parse_headers(lines)
628 if close is None: # then the headers weren't set in the request
629 if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close
630 close = True
631 else: # HTTP 1.1 must ask to close.
632 close = False
634 return RawRequestMessage(
635 method,
636 path,
637 version_o,
638 headers,
639 raw_headers,
640 close,
641 compression,
642 upgrade,
643 chunked,
644 url,
645 )
647 def _is_chunked_te(self, te: str) -> bool:
648 if te.rsplit(",", maxsplit=1)[-1].strip(" \t").lower() == "chunked":
649 return True
650 # https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.3
651 raise BadHttpMessage("Request has invalid `Transfer-Encoding`")
654class HttpResponseParser(HttpParser[RawResponseMessage]):
655 """Read response status line and headers.
657 BadStatusLine could be raised in case of any errors in status line.
658 Returns RawResponseMessage.
659 """
661 # Lax mode should only be enabled on response parser.
662 lax = not DEBUG
664 def feed_data(
665 self,
666 data: bytes,
667 SEP: Optional[_SEP] = None,
668 *args: Any,
669 **kwargs: Any,
670 ) -> Tuple[List[Tuple[RawResponseMessage, StreamReader]], bool, bytes]:
671 if SEP is None:
672 SEP = b"\r\n" if DEBUG else b"\n"
673 return super().feed_data(data, SEP, *args, **kwargs)
675 def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
676 line = lines[0].decode("utf-8", "surrogateescape")
677 try:
678 version, status = line.split(maxsplit=1)
679 except ValueError:
680 raise BadStatusLine(line) from None
682 try:
683 status, reason = status.split(maxsplit=1)
684 except ValueError:
685 status = status.strip()
686 reason = ""
688 if len(reason) > self.max_line_size:
689 raise LineTooLong(
690 "Status line is too long", str(self.max_line_size), str(len(reason))
691 )
693 # version
694 match = VERSRE.fullmatch(version)
695 if match is None:
696 raise BadStatusLine(line)
697 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
699 # The status code is a three-digit ASCII number, no padding
700 if len(status) != 3 or not DIGITS.fullmatch(status):
701 raise BadStatusLine(line)
702 status_i = int(status)
704 # read headers
705 (
706 headers,
707 raw_headers,
708 close,
709 compression,
710 upgrade,
711 chunked,
712 ) = self.parse_headers(lines)
714 if close is None:
715 if version_o <= HttpVersion10:
716 close = True
717 # https://www.rfc-editor.org/rfc/rfc9112.html#name-message-body-length
718 elif 100 <= status_i < 200 or status_i in {204, 304}:
719 close = False
720 elif hdrs.CONTENT_LENGTH in headers or hdrs.TRANSFER_ENCODING in headers:
721 close = False
722 else:
723 # https://www.rfc-editor.org/rfc/rfc9112.html#section-6.3-2.8
724 close = True
726 return RawResponseMessage(
727 version_o,
728 status_i,
729 reason.strip(),
730 headers,
731 raw_headers,
732 close,
733 compression,
734 upgrade,
735 chunked,
736 )
738 def _is_chunked_te(self, te: str) -> bool:
739 # https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.2
740 return te.rsplit(",", maxsplit=1)[-1].strip(" \t").lower() == "chunked"
743class HttpPayloadParser:
744 def __init__(
745 self,
746 payload: StreamReader,
747 length: Optional[int] = None,
748 chunked: bool = False,
749 compression: Optional[str] = None,
750 code: Optional[int] = None,
751 method: Optional[str] = None,
752 response_with_body: bool = True,
753 auto_decompress: bool = True,
754 lax: bool = False,
755 ) -> None:
756 self._length = 0
757 self._type = ParseState.PARSE_UNTIL_EOF
758 self._chunk = ChunkState.PARSE_CHUNKED_SIZE
759 self._chunk_size = 0
760 self._chunk_tail = b""
761 self._auto_decompress = auto_decompress
762 self._lax = lax
763 self.done = False
765 # payload decompression wrapper
766 if response_with_body and compression and self._auto_decompress:
767 real_payload: Union[StreamReader, DeflateBuffer] = DeflateBuffer(
768 payload, compression
769 )
770 else:
771 real_payload = payload
773 # payload parser
774 if not response_with_body:
775 # don't parse payload if it's not expected to be received
776 self._type = ParseState.PARSE_NONE
777 real_payload.feed_eof()
778 self.done = True
779 elif chunked:
780 self._type = ParseState.PARSE_CHUNKED
781 elif length is not None:
782 self._type = ParseState.PARSE_LENGTH
783 self._length = length
784 if self._length == 0:
785 real_payload.feed_eof()
786 self.done = True
788 self.payload = real_payload
790 def feed_eof(self) -> None:
791 if self._type == ParseState.PARSE_UNTIL_EOF:
792 self.payload.feed_eof()
793 elif self._type == ParseState.PARSE_LENGTH:
794 raise ContentLengthError(
795 "Not enough data to satisfy content length header."
796 )
797 elif self._type == ParseState.PARSE_CHUNKED:
798 raise TransferEncodingError(
799 "Not enough data to satisfy transfer length header."
800 )
802 def feed_data(
803 self, chunk: bytes, SEP: _SEP = b"\r\n", CHUNK_EXT: bytes = b";"
804 ) -> Tuple[bool, bytes]:
805 # Read specified amount of bytes
806 if self._type == ParseState.PARSE_LENGTH:
807 required = self._length
808 self._length = max(required - len(chunk), 0)
809 self.payload.feed_data(chunk[:required])
810 if self._length == 0:
811 self.payload.feed_eof()
812 return True, chunk[required:]
814 # Chunked transfer encoding parser
815 elif self._type == ParseState.PARSE_CHUNKED:
816 if self._chunk_tail:
817 chunk = self._chunk_tail + chunk
818 self._chunk_tail = b""
820 while chunk:
821 # read next chunk size
822 if self._chunk == ChunkState.PARSE_CHUNKED_SIZE:
823 pos = chunk.find(SEP)
824 if pos >= 0:
825 i = chunk.find(CHUNK_EXT, 0, pos)
826 if i >= 0:
827 size_b = chunk[:i] # strip chunk-extensions
828 # Verify no LF in the chunk-extension
829 if b"\n" in (ext := chunk[i:pos]):
830 exc = BadHttpMessage(
831 f"Unexpected LF in chunk-extension: {ext!r}"
832 )
833 set_exception(self.payload, exc)
834 raise exc
835 else:
836 size_b = chunk[:pos]
838 if self._lax: # Allow whitespace in lax mode.
839 size_b = size_b.strip()
841 if not re.fullmatch(HEXDIGITS, size_b):
842 exc = TransferEncodingError(
843 chunk[:pos].decode("ascii", "surrogateescape")
844 )
845 set_exception(self.payload, exc)
846 raise exc
847 size = int(bytes(size_b), 16)
849 chunk = chunk[pos + len(SEP) :]
850 if size == 0: # eof marker
851 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS
852 if self._lax and chunk.startswith(b"\r"):
853 chunk = chunk[1:]
854 else:
855 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK
856 self._chunk_size = size
857 self.payload.begin_http_chunk_receiving()
858 else:
859 self._chunk_tail = chunk
860 return False, b""
862 # read chunk and feed buffer
863 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK:
864 required = self._chunk_size
865 self._chunk_size = max(required - len(chunk), 0)
866 self.payload.feed_data(chunk[:required])
868 if self._chunk_size:
869 return False, b""
870 chunk = chunk[required:]
871 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF
872 self.payload.end_http_chunk_receiving()
874 # toss the CRLF at the end of the chunk
875 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF:
876 if self._lax and chunk.startswith(b"\r"):
877 chunk = chunk[1:]
878 if chunk[: len(SEP)] == SEP:
879 chunk = chunk[len(SEP) :]
880 self._chunk = ChunkState.PARSE_CHUNKED_SIZE
881 else:
882 self._chunk_tail = chunk
883 return False, b""
885 # if stream does not contain trailer, after 0\r\n
886 # we should get another \r\n otherwise
887 # trailers needs to be skipped until \r\n\r\n
888 if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS:
889 head = chunk[: len(SEP)]
890 if head == SEP:
891 # end of stream
892 self.payload.feed_eof()
893 return True, chunk[len(SEP) :]
894 # Both CR and LF, or only LF may not be received yet. It is
895 # expected that CRLF or LF will be shown at the very first
896 # byte next time, otherwise trailers should come. The last
897 # CRLF which marks the end of response might not be
898 # contained in the same TCP segment which delivered the
899 # size indicator.
900 if not head:
901 return False, b""
902 if head == SEP[:1]:
903 self._chunk_tail = head
904 return False, b""
905 self._chunk = ChunkState.PARSE_TRAILERS
907 # read and discard trailer up to the CRLF terminator
908 if self._chunk == ChunkState.PARSE_TRAILERS:
909 pos = chunk.find(SEP)
910 if pos >= 0:
911 chunk = chunk[pos + len(SEP) :]
912 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS
913 else:
914 self._chunk_tail = chunk
915 return False, b""
917 # Read all bytes until eof
918 elif self._type == ParseState.PARSE_UNTIL_EOF:
919 self.payload.feed_data(chunk)
921 return False, b""
924class DeflateBuffer:
925 """DeflateStream decompress stream and feed data into specified stream."""
927 def __init__(self, out: StreamReader, encoding: Optional[str]) -> None:
928 self.out = out
929 self.size = 0
930 self.encoding = encoding
931 self._started_decoding = False
933 self.decompressor: Union[BrotliDecompressor, ZLibDecompressor]
934 if encoding == "br":
935 if not HAS_BROTLI:
936 raise ContentEncodingError(
937 "Can not decode content-encoding: brotli (br). "
938 "Please install `Brotli`"
939 )
940 self.decompressor = BrotliDecompressor()
941 else:
942 self.decompressor = ZLibDecompressor(encoding=encoding)
944 def set_exception(
945 self,
946 exc: Union[Type[BaseException], BaseException],
947 exc_cause: BaseException = _EXC_SENTINEL,
948 ) -> None:
949 set_exception(self.out, exc, exc_cause)
951 def feed_data(self, chunk: bytes) -> None:
952 if not chunk:
953 return
955 self.size += len(chunk)
957 # RFC1950
958 # bits 0..3 = CM = 0b1000 = 8 = "deflate"
959 # bits 4..7 = CINFO = 1..7 = windows size.
960 if (
961 not self._started_decoding
962 and self.encoding == "deflate"
963 and chunk[0] & 0xF != 8
964 ):
965 # Change the decoder to decompress incorrectly compressed data
966 # Actually we should issue a warning about non-RFC-compliant data.
967 self.decompressor = ZLibDecompressor(
968 encoding=self.encoding, suppress_deflate_header=True
969 )
971 try:
972 chunk = self.decompressor.decompress_sync(chunk)
973 except Exception:
974 raise ContentEncodingError(
975 "Can not decode content-encoding: %s" % self.encoding
976 )
978 self._started_decoding = True
980 if chunk:
981 self.out.feed_data(chunk)
983 def feed_eof(self) -> None:
984 chunk = self.decompressor.flush()
986 if chunk or self.size > 0:
987 self.out.feed_data(chunk)
988 # decompressor is not brotli unless encoding is "br"
989 if self.encoding == "deflate" and not self.decompressor.eof: # type: ignore[union-attr]
990 raise ContentEncodingError("deflate")
992 self.out.feed_eof()
994 def begin_http_chunk_receiving(self) -> None:
995 self.out.begin_http_chunk_receiving()
997 def end_http_chunk_receiving(self) -> None:
998 self.out.end_http_chunk_receiving()
1001HttpRequestParserPy = HttpRequestParser
1002HttpResponseParserPy = HttpResponseParser
1003RawRequestMessagePy = RawRequestMessage
1004RawResponseMessagePy = RawResponseMessage
1006with suppress(ImportError):
1007 if not NO_EXTENSIONS:
1008 from ._http_parser import ( # type: ignore[import-not-found,no-redef]
1009 HttpRequestParser,
1010 HttpResponseParser,
1011 RawRequestMessage,
1012 RawResponseMessage,
1013 )
1015 HttpRequestParserC = HttpRequestParser
1016 HttpResponseParserC = HttpResponseParser
1017 RawRequestMessageC = RawRequestMessage
1018 RawResponseMessageC = RawResponseMessage