Coverage for /pythoncovmergedfiles/medio/medio/src/aiohttp/aiohttp/http_parser.py: 83%
494 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-26 06:16 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-26 06:16 +0000
1import abc
2import asyncio
3import re
4import string
5from contextlib import suppress
6from enum import IntEnum
7from typing import (
8 Any,
9 ClassVar,
10 Final,
11 Generic,
12 List,
13 Literal,
14 NamedTuple,
15 Optional,
16 Pattern,
17 Set,
18 Tuple,
19 Type,
20 TypeVar,
21 Union,
22)
24from multidict import CIMultiDict, CIMultiDictProxy, istr
25from yarl import URL
27from . import hdrs
28from .base_protocol import BaseProtocol
29from .compression_utils import HAS_BROTLI, BrotliDecompressor, ZLibDecompressor
30from .helpers import (
31 DEBUG,
32 NO_EXTENSIONS,
33 BaseTimerContext,
34 method_must_be_empty_body,
35 status_code_must_be_empty_body,
36)
37from .http_exceptions import (
38 BadHttpMessage,
39 BadStatusLine,
40 ContentEncodingError,
41 ContentLengthError,
42 InvalidHeader,
43 InvalidURLError,
44 LineTooLong,
45 TransferEncodingError,
46)
47from .http_writer import HttpVersion, HttpVersion10
48from .log import internal_logger
49from .streams import EMPTY_PAYLOAD, StreamReader
50from .typedefs import RawHeaders
52__all__ = (
53 "HeadersParser",
54 "HttpParser",
55 "HttpRequestParser",
56 "HttpResponseParser",
57 "RawRequestMessage",
58 "RawResponseMessage",
59)
61_SEP = Literal[b"\r\n", b"\n"]
63ASCIISET: Final[Set[str]] = set(string.printable)
65# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview
66# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens
67#
68# method = token
69# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
70# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
71# token = 1*tchar
72METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")
73VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)")
74HDRRE: Final[Pattern[bytes]] = re.compile(
75 rb"[\x00-\x1F\x7F-\xFF()<>@,;:\[\]={} \t\"\\]"
76)
77HEXDIGIT = re.compile(rb"[0-9a-fA-F]+")
80class RawRequestMessage(NamedTuple):
81 method: str
82 path: str
83 version: HttpVersion
84 headers: CIMultiDictProxy[str]
85 raw_headers: RawHeaders
86 should_close: bool
87 compression: Optional[str]
88 upgrade: bool
89 chunked: bool
90 url: URL
93class RawResponseMessage(NamedTuple):
94 version: HttpVersion
95 code: int
96 reason: str
97 headers: CIMultiDictProxy[str]
98 raw_headers: RawHeaders
99 should_close: bool
100 compression: Optional[str]
101 upgrade: bool
102 chunked: bool
105_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage)
108class ParseState(IntEnum):
109 PARSE_NONE = 0
110 PARSE_LENGTH = 1
111 PARSE_CHUNKED = 2
112 PARSE_UNTIL_EOF = 3
115class ChunkState(IntEnum):
116 PARSE_CHUNKED_SIZE = 0
117 PARSE_CHUNKED_CHUNK = 1
118 PARSE_CHUNKED_CHUNK_EOF = 2
119 PARSE_MAYBE_TRAILERS = 3
120 PARSE_TRAILERS = 4
123class HeadersParser:
124 def __init__(
125 self,
126 max_line_size: int = 8190,
127 max_field_size: int = 8190,
128 ) -> None:
129 self.max_line_size = max_line_size
130 self.max_field_size = max_field_size
132 def parse_headers(
133 self, lines: List[bytes]
134 ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]:
135 headers: CIMultiDict[str] = CIMultiDict()
136 raw_headers = []
138 lines_idx = 1
139 line = lines[1]
140 line_count = len(lines)
142 while line:
143 # Parse initial header name : value pair.
144 try:
145 bname, bvalue = line.split(b":", 1)
146 except ValueError:
147 raise InvalidHeader(line) from None
149 # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
150 if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}
151 raise InvalidHeader(line)
153 bvalue = bvalue.lstrip(b" \t")
154 if HDRRE.search(bname):
155 raise InvalidHeader(bname)
156 if len(bname) > self.max_field_size:
157 raise LineTooLong(
158 "request header name {}".format(
159 bname.decode("utf8", "backslashreplace")
160 ),
161 str(self.max_field_size),
162 str(len(bname)),
163 )
165 header_length = len(bvalue)
167 # next line
168 lines_idx += 1
169 line = lines[lines_idx]
171 # consume continuation lines
172 continuation = line and line[0] in (32, 9) # (' ', '\t')
174 # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding
175 if continuation:
176 bvalue_lst = [bvalue]
177 while continuation:
178 header_length += len(line)
179 if header_length > self.max_field_size:
180 raise LineTooLong(
181 "request header field {}".format(
182 bname.decode("utf8", "backslashreplace")
183 ),
184 str(self.max_field_size),
185 str(header_length),
186 )
187 bvalue_lst.append(line)
189 # next line
190 lines_idx += 1
191 if lines_idx < line_count:
192 line = lines[lines_idx]
193 if line:
194 continuation = line[0] in (32, 9) # (' ', '\t')
195 else:
196 line = b""
197 break
198 bvalue = b"".join(bvalue_lst)
199 else:
200 if header_length > self.max_field_size:
201 raise LineTooLong(
202 "request header field {}".format(
203 bname.decode("utf8", "backslashreplace")
204 ),
205 str(self.max_field_size),
206 str(header_length),
207 )
209 bvalue = bvalue.strip(b" \t")
210 name = bname.decode("utf-8", "surrogateescape")
211 value = bvalue.decode("utf-8", "surrogateescape")
213 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
214 if "\n" in value or "\r" in value or "\x00" in value:
215 raise InvalidHeader(bvalue)
217 headers.add(name, value)
218 raw_headers.append((bname, bvalue))
220 return (CIMultiDictProxy(headers), tuple(raw_headers))
223class HttpParser(abc.ABC, Generic[_MsgT]):
224 lax: ClassVar[bool] = False
226 def __init__(
227 self,
228 protocol: BaseProtocol,
229 loop: asyncio.AbstractEventLoop,
230 limit: int,
231 max_line_size: int = 8190,
232 max_field_size: int = 8190,
233 timer: Optional[BaseTimerContext] = None,
234 code: Optional[int] = None,
235 method: Optional[str] = None,
236 readall: bool = False,
237 payload_exception: Optional[Type[BaseException]] = None,
238 response_with_body: bool = True,
239 read_until_eof: bool = False,
240 auto_decompress: bool = True,
241 ) -> None:
242 self.protocol = protocol
243 self.loop = loop
244 self.max_line_size = max_line_size
245 self.max_field_size = max_field_size
246 self.timer = timer
247 self.code = code
248 self.method = method
249 self.readall = readall
250 self.payload_exception = payload_exception
251 self.response_with_body = response_with_body
252 self.read_until_eof = read_until_eof
254 self._lines: List[bytes] = []
255 self._tail = b""
256 self._upgraded = False
257 self._payload = None
258 self._payload_parser: Optional[HttpPayloadParser] = None
259 self._auto_decompress = auto_decompress
260 self._limit = limit
261 self._headers_parser = HeadersParser(max_line_size, max_field_size)
263 @abc.abstractmethod
264 def parse_message(self, lines: List[bytes]) -> _MsgT:
265 pass
267 def feed_eof(self) -> Optional[_MsgT]:
268 if self._payload_parser is not None:
269 self._payload_parser.feed_eof()
270 self._payload_parser = None
271 else:
272 # try to extract partial message
273 if self._tail:
274 self._lines.append(self._tail)
276 if self._lines:
277 if self._lines[-1] != "\r\n":
278 self._lines.append(b"")
279 with suppress(Exception):
280 return self.parse_message(self._lines)
281 return None
283 def feed_data(
284 self,
285 data: bytes,
286 SEP: _SEP = b"\r\n",
287 EMPTY: bytes = b"",
288 CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH,
289 METH_CONNECT: str = hdrs.METH_CONNECT,
290 SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1,
291 ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]:
292 messages = []
294 if self._tail:
295 data, self._tail = self._tail + data, b""
297 data_len = len(data)
298 start_pos = 0
299 loop = self.loop
301 while start_pos < data_len:
302 # read HTTP message (request/response line + headers), \r\n\r\n
303 # and split by lines
304 if self._payload_parser is None and not self._upgraded:
305 pos = data.find(SEP, start_pos)
306 # consume \r\n
307 if pos == start_pos and not self._lines:
308 start_pos = pos + len(SEP)
309 continue
311 if pos >= start_pos:
312 # line found
313 line = data[start_pos:pos]
314 if SEP == b"\n": # For lax response parsing
315 line = line.rstrip(b"\r")
316 self._lines.append(line)
317 start_pos = pos + len(SEP)
319 # \r\n\r\n found
320 if self._lines[-1] == EMPTY:
321 try:
322 msg: _MsgT = self.parse_message(self._lines)
323 finally:
324 self._lines.clear()
326 def get_content_length() -> Optional[int]:
327 # payload length
328 length_hdr = msg.headers.get(CONTENT_LENGTH)
329 if length_hdr is None:
330 return None
332 # Shouldn't allow +/- or other number formats.
333 # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
334 if not length_hdr.strip(" \t").isdecimal():
335 raise InvalidHeader(CONTENT_LENGTH)
337 return int(length_hdr)
339 length = get_content_length()
340 # do not support old websocket spec
341 if SEC_WEBSOCKET_KEY1 in msg.headers:
342 raise InvalidHeader(SEC_WEBSOCKET_KEY1)
344 self._upgraded = msg.upgrade
346 method = getattr(msg, "method", self.method)
347 # code is only present on responses
348 code = getattr(msg, "code", 0)
350 assert self.protocol is not None
351 # calculate payload
352 empty_body = status_code_must_be_empty_body(code) or bool(
353 method and method_must_be_empty_body(method)
354 )
355 if not empty_body and (
356 (length is not None and length > 0)
357 or msg.chunked
358 and not msg.upgrade
359 ):
360 payload = StreamReader(
361 self.protocol,
362 timer=self.timer,
363 loop=loop,
364 limit=self._limit,
365 )
366 payload_parser = HttpPayloadParser(
367 payload,
368 length=length,
369 chunked=msg.chunked,
370 method=method,
371 compression=msg.compression,
372 code=self.code,
373 readall=self.readall,
374 response_with_body=self.response_with_body,
375 auto_decompress=self._auto_decompress,
376 lax=self.lax,
377 )
378 if not payload_parser.done:
379 self._payload_parser = payload_parser
380 elif method == METH_CONNECT:
381 assert isinstance(msg, RawRequestMessage)
382 payload = StreamReader(
383 self.protocol,
384 timer=self.timer,
385 loop=loop,
386 limit=self._limit,
387 )
388 self._upgraded = True
389 self._payload_parser = HttpPayloadParser(
390 payload,
391 method=msg.method,
392 compression=msg.compression,
393 readall=True,
394 auto_decompress=self._auto_decompress,
395 lax=self.lax,
396 )
397 elif not empty_body and length is None and self.read_until_eof:
398 payload = StreamReader(
399 self.protocol,
400 timer=self.timer,
401 loop=loop,
402 limit=self._limit,
403 )
404 payload_parser = HttpPayloadParser(
405 payload,
406 length=length,
407 chunked=msg.chunked,
408 method=method,
409 compression=msg.compression,
410 code=self.code,
411 readall=True,
412 response_with_body=self.response_with_body,
413 auto_decompress=self._auto_decompress,
414 lax=self.lax,
415 )
416 if not payload_parser.done:
417 self._payload_parser = payload_parser
418 else:
419 payload = EMPTY_PAYLOAD
421 messages.append((msg, payload))
422 else:
423 self._tail = data[start_pos:]
424 data = EMPTY
425 break
427 # no parser, just store
428 elif self._payload_parser is None and self._upgraded:
429 assert not self._lines
430 break
432 # feed payload
433 elif data and start_pos < data_len:
434 assert not self._lines
435 assert self._payload_parser is not None
436 try:
437 eof, data = self._payload_parser.feed_data(data[start_pos:], SEP)
438 except BaseException as exc:
439 if self.payload_exception is not None:
440 self._payload_parser.payload.set_exception(
441 self.payload_exception(str(exc))
442 )
443 else:
444 self._payload_parser.payload.set_exception(exc)
446 eof = True
447 data = b""
449 if eof:
450 start_pos = 0
451 data_len = len(data)
452 self._payload_parser = None
453 continue
454 else:
455 break
457 if data and start_pos < data_len:
458 data = data[start_pos:]
459 else:
460 data = EMPTY
462 return messages, self._upgraded, data
464 def parse_headers(
465 self, lines: List[bytes]
466 ) -> Tuple[
467 "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool
468 ]:
469 """Parses RFC 5322 headers from a stream.
471 Line continuations are supported. Returns list of header name
472 and value pairs. Header name is in upper case.
473 """
474 headers, raw_headers = self._headers_parser.parse_headers(lines)
475 close_conn = None
476 encoding = None
477 upgrade = False
478 chunked = False
480 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
481 # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf
482 singletons = (
483 hdrs.CONTENT_LENGTH,
484 hdrs.CONTENT_LOCATION,
485 hdrs.CONTENT_RANGE,
486 hdrs.CONTENT_TYPE,
487 hdrs.ETAG,
488 hdrs.HOST,
489 hdrs.MAX_FORWARDS,
490 hdrs.SERVER,
491 hdrs.TRANSFER_ENCODING,
492 hdrs.USER_AGENT,
493 )
494 bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)
495 if bad_hdr is not None:
496 raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")
498 # keep-alive
499 conn = headers.get(hdrs.CONNECTION)
500 if conn:
501 v = conn.lower()
502 if v == "close":
503 close_conn = True
504 elif v == "keep-alive":
505 close_conn = False
506 # https://www.rfc-editor.org/rfc/rfc9110.html#name-101-switching-protocols
507 elif v == "upgrade" and headers.get(hdrs.UPGRADE):
508 upgrade = True
510 # encoding
511 enc = headers.get(hdrs.CONTENT_ENCODING)
512 if enc:
513 enc = enc.lower()
514 if enc in ("gzip", "deflate", "br"):
515 encoding = enc
517 # chunking
518 te = headers.get(hdrs.TRANSFER_ENCODING)
519 if te is not None:
520 if "chunked" == te.lower():
521 chunked = True
522 else:
523 raise BadHttpMessage("Request has invalid `Transfer-Encoding`")
525 if hdrs.CONTENT_LENGTH in headers:
526 raise BadHttpMessage(
527 "Transfer-Encoding can't be present with Content-Length",
528 )
530 return (headers, raw_headers, close_conn, encoding, upgrade, chunked)
532 def set_upgraded(self, val: bool) -> None:
533 """Set connection upgraded (to websocket) mode.
535 :param bool val: new state.
536 """
537 self._upgraded = val
540class HttpRequestParser(HttpParser[RawRequestMessage]):
541 """Read request status line.
543 Exception .http_exceptions.BadStatusLine
544 could be raised in case of any errors in status line.
545 Returns RawRequestMessage.
546 """
548 def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
549 # request line
550 line = lines[0].decode("utf-8", "surrogateescape")
551 try:
552 method, path, version = line.split(" ", maxsplit=2)
553 except ValueError:
554 raise BadStatusLine(line) from None
556 if len(path) > self.max_line_size:
557 raise LineTooLong(
558 "Status line is too long", str(self.max_line_size), str(len(path))
559 )
561 # method
562 if not METHRE.fullmatch(method):
563 raise BadStatusLine(method)
565 # version
566 match = VERSRE.fullmatch(version)
567 if match is None:
568 raise BadStatusLine(line)
569 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
571 if method == "CONNECT":
572 # authority-form,
573 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3
574 url = URL.build(authority=path, encoded=True)
575 elif path.startswith("/"):
576 # origin-form,
577 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1
578 path_part, _hash_separator, url_fragment = path.partition("#")
579 path_part, _question_mark_separator, qs_part = path_part.partition("?")
581 # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based
582 # NOTE: parser does, otherwise it results into the same
583 # NOTE: HTTP Request-Line input producing different
584 # NOTE: `yarl.URL()` objects
585 url = URL.build(
586 path=path_part,
587 query_string=qs_part,
588 fragment=url_fragment,
589 encoded=True,
590 )
591 elif path == "*" and method == "OPTIONS":
592 # asterisk-form,
593 url = URL(path, encoded=True)
594 else:
595 # absolute-form for proxy maybe,
596 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2
597 url = URL(path, encoded=True)
598 if url.scheme == "":
599 # not absolute-form
600 raise InvalidURLError(
601 path.encode(errors="surrogateescape").decode("latin1")
602 )
604 # read headers
605 (
606 headers,
607 raw_headers,
608 close,
609 compression,
610 upgrade,
611 chunked,
612 ) = self.parse_headers(lines)
614 if close is None: # then the headers weren't set in the request
615 if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close
616 close = True
617 else: # HTTP 1.1 must ask to close.
618 close = False
620 return RawRequestMessage(
621 method,
622 path,
623 version_o,
624 headers,
625 raw_headers,
626 close,
627 compression,
628 upgrade,
629 chunked,
630 url,
631 )
634class HttpResponseParser(HttpParser[RawResponseMessage]):
635 """Read response status line and headers.
637 BadStatusLine could be raised in case of any errors in status line.
638 Returns RawResponseMessage.
639 """
641 # Lax mode should only be enabled on response parser.
642 lax = not DEBUG
644 def feed_data(
645 self,
646 data: bytes,
647 SEP: Optional[_SEP] = None,
648 *args: Any,
649 **kwargs: Any,
650 ) -> Tuple[List[Tuple[RawResponseMessage, StreamReader]], bool, bytes]:
651 if SEP is None:
652 SEP = b"\r\n" if DEBUG else b"\n"
653 return super().feed_data(data, SEP, *args, **kwargs)
655 def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
656 line = lines[0].decode("utf-8", "surrogateescape")
657 try:
658 version, status = line.split(maxsplit=1)
659 except ValueError:
660 raise BadStatusLine(line) from None
662 try:
663 status, reason = status.split(maxsplit=1)
664 except ValueError:
665 status = status.strip()
666 reason = ""
668 if len(reason) > self.max_line_size:
669 raise LineTooLong(
670 "Status line is too long", str(self.max_line_size), str(len(reason))
671 )
673 # version
674 match = VERSRE.fullmatch(version)
675 if match is None:
676 raise BadStatusLine(line)
677 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
679 # The status code is a three-digit number
680 if len(status) != 3 or not status.isdecimal():
681 raise BadStatusLine(line)
682 status_i = int(status)
684 # read headers
685 (
686 headers,
687 raw_headers,
688 close,
689 compression,
690 upgrade,
691 chunked,
692 ) = self.parse_headers(lines)
694 if close is None:
695 close = version_o <= HttpVersion10
697 return RawResponseMessage(
698 version_o,
699 status_i,
700 reason.strip(),
701 headers,
702 raw_headers,
703 close,
704 compression,
705 upgrade,
706 chunked,
707 )
710class HttpPayloadParser:
711 def __init__(
712 self,
713 payload: StreamReader,
714 length: Optional[int] = None,
715 chunked: bool = False,
716 compression: Optional[str] = None,
717 code: Optional[int] = None,
718 method: Optional[str] = None,
719 readall: bool = False,
720 response_with_body: bool = True,
721 auto_decompress: bool = True,
722 lax: bool = False,
723 ) -> None:
724 self._length = 0
725 self._type = ParseState.PARSE_NONE
726 self._chunk = ChunkState.PARSE_CHUNKED_SIZE
727 self._chunk_size = 0
728 self._chunk_tail = b""
729 self._auto_decompress = auto_decompress
730 self._lax = lax
731 self.done = False
733 # payload decompression wrapper
734 if response_with_body and compression and self._auto_decompress:
735 real_payload: Union[StreamReader, DeflateBuffer] = DeflateBuffer(
736 payload, compression
737 )
738 else:
739 real_payload = payload
741 # payload parser
742 if not response_with_body:
743 # don't parse payload if it's not expected to be received
744 self._type = ParseState.PARSE_NONE
745 real_payload.feed_eof()
746 self.done = True
748 elif chunked:
749 self._type = ParseState.PARSE_CHUNKED
750 elif length is not None:
751 self._type = ParseState.PARSE_LENGTH
752 self._length = length
753 if self._length == 0:
754 real_payload.feed_eof()
755 self.done = True
756 else:
757 if readall and code != 204:
758 self._type = ParseState.PARSE_UNTIL_EOF
759 elif method in ("PUT", "POST"):
760 internal_logger.warning( # pragma: no cover
761 "Content-Length or Transfer-Encoding header is required"
762 )
763 self._type = ParseState.PARSE_NONE
764 real_payload.feed_eof()
765 self.done = True
767 self.payload = real_payload
769 def feed_eof(self) -> None:
770 if self._type == ParseState.PARSE_UNTIL_EOF:
771 self.payload.feed_eof()
772 elif self._type == ParseState.PARSE_LENGTH:
773 raise ContentLengthError(
774 "Not enough data for satisfy content length header."
775 )
776 elif self._type == ParseState.PARSE_CHUNKED:
777 raise TransferEncodingError(
778 "Not enough data for satisfy transfer length header."
779 )
781 def feed_data(
782 self, chunk: bytes, SEP: _SEP = b"\r\n", CHUNK_EXT: bytes = b";"
783 ) -> Tuple[bool, bytes]:
784 # Read specified amount of bytes
785 if self._type == ParseState.PARSE_LENGTH:
786 required = self._length
787 chunk_len = len(chunk)
789 if required >= chunk_len:
790 self._length = required - chunk_len
791 self.payload.feed_data(chunk, chunk_len)
792 if self._length == 0:
793 self.payload.feed_eof()
794 return True, b""
795 else:
796 self._length = 0
797 self.payload.feed_data(chunk[:required], required)
798 self.payload.feed_eof()
799 return True, chunk[required:]
801 # Chunked transfer encoding parser
802 elif self._type == ParseState.PARSE_CHUNKED:
803 if self._chunk_tail:
804 chunk = self._chunk_tail + chunk
805 self._chunk_tail = b""
807 while chunk:
808 # read next chunk size
809 if self._chunk == ChunkState.PARSE_CHUNKED_SIZE:
810 pos = chunk.find(SEP)
811 if pos >= 0:
812 i = chunk.find(CHUNK_EXT, 0, pos)
813 if i >= 0:
814 size_b = chunk[:i] # strip chunk-extensions
815 else:
816 size_b = chunk[:pos]
818 if self._lax: # Allow whitespace in lax mode.
819 size_b = size_b.strip()
821 if not re.fullmatch(HEXDIGIT, size_b):
822 exc = TransferEncodingError(
823 chunk[:pos].decode("ascii", "surrogateescape")
824 )
825 self.payload.set_exception(exc)
826 raise exc
827 size = int(bytes(size_b), 16)
829 chunk = chunk[pos + len(SEP) :]
830 if size == 0: # eof marker
831 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS
832 if self._lax and chunk.startswith(b"\r"):
833 chunk = chunk[1:]
834 else:
835 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK
836 self._chunk_size = size
837 self.payload.begin_http_chunk_receiving()
838 else:
839 self._chunk_tail = chunk
840 return False, b""
842 # read chunk and feed buffer
843 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK:
844 required = self._chunk_size
845 chunk_len = len(chunk)
847 if required > chunk_len:
848 self._chunk_size = required - chunk_len
849 self.payload.feed_data(chunk, chunk_len)
850 return False, b""
851 else:
852 self._chunk_size = 0
853 self.payload.feed_data(chunk[:required], required)
854 chunk = chunk[required:]
855 if self._lax and chunk.startswith(b"\r"):
856 chunk = chunk[1:]
857 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF
858 self.payload.end_http_chunk_receiving()
860 # toss the CRLF at the end of the chunk
861 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF:
862 if chunk[: len(SEP)] == SEP:
863 chunk = chunk[len(SEP) :]
864 self._chunk = ChunkState.PARSE_CHUNKED_SIZE
865 else:
866 self._chunk_tail = chunk
867 return False, b""
869 # if stream does not contain trailer, after 0\r\n
870 # we should get another \r\n otherwise
871 # trailers needs to be skipped until \r\n\r\n
872 if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS:
873 head = chunk[: len(SEP)]
874 if head == SEP:
875 # end of stream
876 self.payload.feed_eof()
877 return True, chunk[len(SEP) :]
878 # Both CR and LF, or only LF may not be received yet. It is
879 # expected that CRLF or LF will be shown at the very first
880 # byte next time, otherwise trailers should come. The last
881 # CRLF which marks the end of response might not be
882 # contained in the same TCP segment which delivered the
883 # size indicator.
884 if not head:
885 return False, b""
886 if head == SEP[:1]:
887 self._chunk_tail = head
888 return False, b""
889 self._chunk = ChunkState.PARSE_TRAILERS
891 # read and discard trailer up to the CRLF terminator
892 if self._chunk == ChunkState.PARSE_TRAILERS:
893 pos = chunk.find(SEP)
894 if pos >= 0:
895 chunk = chunk[pos + len(SEP) :]
896 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS
897 else:
898 self._chunk_tail = chunk
899 return False, b""
901 # Read all bytes until eof
902 elif self._type == ParseState.PARSE_UNTIL_EOF:
903 self.payload.feed_data(chunk, len(chunk))
905 return False, b""
908class DeflateBuffer:
909 """DeflateStream decompress stream and feed data into specified stream."""
911 def __init__(self, out: StreamReader, encoding: Optional[str]) -> None:
912 self.out = out
913 self.size = 0
914 self.encoding = encoding
915 self._started_decoding = False
917 self.decompressor: Union[BrotliDecompressor, ZLibDecompressor]
918 if encoding == "br":
919 if not HAS_BROTLI: # pragma: no cover
920 raise ContentEncodingError(
921 "Can not decode content-encoding: brotli (br). "
922 "Please install `Brotli`"
923 )
924 self.decompressor = BrotliDecompressor()
925 else:
926 self.decompressor = ZLibDecompressor(encoding=encoding)
928 def set_exception(self, exc: BaseException) -> None:
929 self.out.set_exception(exc)
931 def feed_data(self, chunk: bytes, size: int) -> None:
932 if not size:
933 return
935 self.size += size
937 # RFC1950
938 # bits 0..3 = CM = 0b1000 = 8 = "deflate"
939 # bits 4..7 = CINFO = 1..7 = windows size.
940 if (
941 not self._started_decoding
942 and self.encoding == "deflate"
943 and chunk[0] & 0xF != 8
944 ):
945 # Change the decoder to decompress incorrectly compressed data
946 # Actually we should issue a warning about non-RFC-compliant data.
947 self.decompressor = ZLibDecompressor(
948 encoding=self.encoding, suppress_deflate_header=True
949 )
951 try:
952 chunk = self.decompressor.decompress_sync(chunk)
953 except Exception:
954 raise ContentEncodingError(
955 "Can not decode content-encoding: %s" % self.encoding
956 )
958 self._started_decoding = True
960 if chunk:
961 self.out.feed_data(chunk, len(chunk))
963 def feed_eof(self) -> None:
964 chunk = self.decompressor.flush()
966 if chunk or self.size > 0:
967 self.out.feed_data(chunk, len(chunk))
968 # decompressor is not brotli unless encoding is "br"
969 if self.encoding == "deflate" and not self.decompressor.eof: # type: ignore[union-attr]
970 raise ContentEncodingError("deflate")
972 self.out.feed_eof()
974 def begin_http_chunk_receiving(self) -> None:
975 self.out.begin_http_chunk_receiving()
977 def end_http_chunk_receiving(self) -> None:
978 self.out.end_http_chunk_receiving()
981HttpRequestParserPy = HttpRequestParser
982HttpResponseParserPy = HttpResponseParser
983RawRequestMessagePy = RawRequestMessage
984RawResponseMessagePy = RawResponseMessage
986try:
987 if not NO_EXTENSIONS:
988 from ._http_parser import ( # type: ignore[import-not-found,no-redef]
989 HttpRequestParser,
990 HttpResponseParser,
991 RawRequestMessage,
992 RawResponseMessage,
993 )
995 HttpRequestParserC = HttpRequestParser
996 HttpResponseParserC = HttpResponseParser
997 RawRequestMessageC = RawRequestMessage
998 RawResponseMessageC = RawResponseMessage
999except ImportError: # pragma: no cover
1000 pass