Coverage for /pythoncovmergedfiles/medio/medio/src/aiohttp/aiohttp/http_parser.py: 84%
497 statements
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-09 06:47 +0000
« prev ^ index » next coverage.py v7.4.1, created at 2024-02-09 06:47 +0000
1import abc
2import asyncio
3import re
4import string
5from contextlib import suppress
6from enum import IntEnum
7from typing import (
8 Any,
9 ClassVar,
10 Final,
11 Generic,
12 List,
13 Literal,
14 NamedTuple,
15 Optional,
16 Pattern,
17 Set,
18 Tuple,
19 Type,
20 TypeVar,
21 Union,
22)
24from multidict import CIMultiDict, CIMultiDictProxy, istr
25from yarl import URL
27from . import hdrs
28from .base_protocol import BaseProtocol
29from .compression_utils import HAS_BROTLI, BrotliDecompressor, ZLibDecompressor
30from .helpers import (
31 DEBUG,
32 NO_EXTENSIONS,
33 BaseTimerContext,
34 method_must_be_empty_body,
35 status_code_must_be_empty_body,
36)
37from .http_exceptions import (
38 BadHttpMessage,
39 BadStatusLine,
40 ContentEncodingError,
41 ContentLengthError,
42 InvalidHeader,
43 InvalidURLError,
44 LineTooLong,
45 TransferEncodingError,
46)
47from .http_writer import HttpVersion, HttpVersion10
48from .log import internal_logger
49from .streams import EMPTY_PAYLOAD, StreamReader
50from .typedefs import RawHeaders
52__all__ = (
53 "HeadersParser",
54 "HttpParser",
55 "HttpRequestParser",
56 "HttpResponseParser",
57 "RawRequestMessage",
58 "RawResponseMessage",
59)
61_SEP = Literal[b"\r\n", b"\n"]
63ASCIISET: Final[Set[str]] = set(string.printable)
65# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview
66# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens
67#
68# method = token
69# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /
70# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA
71# token = 1*tchar
72_TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`|~")
73TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+")
74VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII)
75DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII)
76HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+")
79class RawRequestMessage(NamedTuple):
80 method: str
81 path: str
82 version: HttpVersion
83 headers: CIMultiDictProxy[str]
84 raw_headers: RawHeaders
85 should_close: bool
86 compression: Optional[str]
87 upgrade: bool
88 chunked: bool
89 url: URL
92class RawResponseMessage(NamedTuple):
93 version: HttpVersion
94 code: int
95 reason: str
96 headers: CIMultiDictProxy[str]
97 raw_headers: RawHeaders
98 should_close: bool
99 compression: Optional[str]
100 upgrade: bool
101 chunked: bool
104_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage)
107class ParseState(IntEnum):
108 PARSE_NONE = 0
109 PARSE_LENGTH = 1
110 PARSE_CHUNKED = 2
111 PARSE_UNTIL_EOF = 3
114class ChunkState(IntEnum):
115 PARSE_CHUNKED_SIZE = 0
116 PARSE_CHUNKED_CHUNK = 1
117 PARSE_CHUNKED_CHUNK_EOF = 2
118 PARSE_MAYBE_TRAILERS = 3
119 PARSE_TRAILERS = 4
122class HeadersParser:
123 def __init__(
124 self,
125 max_line_size: int = 8190,
126 max_field_size: int = 8190,
127 ) -> None:
128 self.max_line_size = max_line_size
129 self.max_field_size = max_field_size
131 def parse_headers(
132 self, lines: List[bytes]
133 ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]:
134 headers: CIMultiDict[str] = CIMultiDict()
135 # note: "raw" does not mean inclusion of OWS before/after the field value
136 raw_headers = []
138 lines_idx = 1
139 line = lines[1]
140 line_count = len(lines)
142 while line:
143 # Parse initial header name : value pair.
144 try:
145 bname, bvalue = line.split(b":", 1)
146 except ValueError:
147 raise InvalidHeader(line) from None
149 if len(bname) == 0:
150 raise InvalidHeader(bname)
152 # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2
153 if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}
154 raise InvalidHeader(line)
156 bvalue = bvalue.lstrip(b" \t")
157 if len(bname) > self.max_field_size:
158 raise LineTooLong(
159 "request header name {}".format(
160 bname.decode("utf8", "backslashreplace")
161 ),
162 str(self.max_field_size),
163 str(len(bname)),
164 )
165 name = bname.decode("utf-8", "surrogateescape")
166 if not TOKENRE.fullmatch(name):
167 raise InvalidHeader(bname)
169 header_length = len(bvalue)
171 # next line
172 lines_idx += 1
173 line = lines[lines_idx]
175 # consume continuation lines
176 continuation = line and line[0] in (32, 9) # (' ', '\t')
178 # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding
179 if continuation:
180 bvalue_lst = [bvalue]
181 while continuation:
182 header_length += len(line)
183 if header_length > self.max_field_size:
184 raise LineTooLong(
185 "request header field {}".format(
186 bname.decode("utf8", "backslashreplace")
187 ),
188 str(self.max_field_size),
189 str(header_length),
190 )
191 bvalue_lst.append(line)
193 # next line
194 lines_idx += 1
195 if lines_idx < line_count:
196 line = lines[lines_idx]
197 if line:
198 continuation = line[0] in (32, 9) # (' ', '\t')
199 else:
200 line = b""
201 break
202 bvalue = b"".join(bvalue_lst)
203 else:
204 if header_length > self.max_field_size:
205 raise LineTooLong(
206 "request header field {}".format(
207 bname.decode("utf8", "backslashreplace")
208 ),
209 str(self.max_field_size),
210 str(header_length),
211 )
213 bvalue = bvalue.strip(b" \t")
214 value = bvalue.decode("utf-8", "surrogateescape")
216 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5
217 if "\n" in value or "\r" in value or "\x00" in value:
218 raise InvalidHeader(bvalue)
220 headers.add(name, value)
221 raw_headers.append((bname, bvalue))
223 return (CIMultiDictProxy(headers), tuple(raw_headers))
226class HttpParser(abc.ABC, Generic[_MsgT]):
227 lax: ClassVar[bool] = False
229 def __init__(
230 self,
231 protocol: BaseProtocol,
232 loop: asyncio.AbstractEventLoop,
233 limit: int,
234 max_line_size: int = 8190,
235 max_field_size: int = 8190,
236 timer: Optional[BaseTimerContext] = None,
237 code: Optional[int] = None,
238 method: Optional[str] = None,
239 readall: bool = False,
240 payload_exception: Optional[Type[BaseException]] = None,
241 response_with_body: bool = True,
242 read_until_eof: bool = False,
243 auto_decompress: bool = True,
244 ) -> None:
245 self.protocol = protocol
246 self.loop = loop
247 self.max_line_size = max_line_size
248 self.max_field_size = max_field_size
249 self.timer = timer
250 self.code = code
251 self.method = method
252 self.readall = readall
253 self.payload_exception = payload_exception
254 self.response_with_body = response_with_body
255 self.read_until_eof = read_until_eof
257 self._lines: List[bytes] = []
258 self._tail = b""
259 self._upgraded = False
260 self._payload = None
261 self._payload_parser: Optional[HttpPayloadParser] = None
262 self._auto_decompress = auto_decompress
263 self._limit = limit
264 self._headers_parser = HeadersParser(max_line_size, max_field_size)
266 @abc.abstractmethod
267 def parse_message(self, lines: List[bytes]) -> _MsgT:
268 pass
270 def feed_eof(self) -> Optional[_MsgT]:
271 if self._payload_parser is not None:
272 self._payload_parser.feed_eof()
273 self._payload_parser = None
274 else:
275 # try to extract partial message
276 if self._tail:
277 self._lines.append(self._tail)
279 if self._lines:
280 if self._lines[-1] != "\r\n":
281 self._lines.append(b"")
282 with suppress(Exception):
283 return self.parse_message(self._lines)
284 return None
286 def feed_data(
287 self,
288 data: bytes,
289 SEP: _SEP = b"\r\n",
290 EMPTY: bytes = b"",
291 CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH,
292 METH_CONNECT: str = hdrs.METH_CONNECT,
293 SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1,
294 ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]:
295 messages = []
297 if self._tail:
298 data, self._tail = self._tail + data, b""
300 data_len = len(data)
301 start_pos = 0
302 loop = self.loop
304 while start_pos < data_len:
305 # read HTTP message (request/response line + headers), \r\n\r\n
306 # and split by lines
307 if self._payload_parser is None and not self._upgraded:
308 pos = data.find(SEP, start_pos)
309 # consume \r\n
310 if pos == start_pos and not self._lines:
311 start_pos = pos + len(SEP)
312 continue
314 if pos >= start_pos:
315 # line found
316 line = data[start_pos:pos]
317 if SEP == b"\n": # For lax response parsing
318 line = line.rstrip(b"\r")
319 self._lines.append(line)
320 start_pos = pos + len(SEP)
322 # \r\n\r\n found
323 if self._lines[-1] == EMPTY:
324 try:
325 msg: _MsgT = self.parse_message(self._lines)
326 finally:
327 self._lines.clear()
329 def get_content_length() -> Optional[int]:
330 # payload length
331 length_hdr = msg.headers.get(CONTENT_LENGTH)
332 if length_hdr is None:
333 return None
335 # Shouldn't allow +/- or other number formats.
336 # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2
337 # msg.headers is already stripped of leading/trailing wsp
338 if not DIGITS.fullmatch(length_hdr):
339 raise InvalidHeader(CONTENT_LENGTH)
341 return int(length_hdr)
343 length = get_content_length()
344 # do not support old websocket spec
345 if SEC_WEBSOCKET_KEY1 in msg.headers:
346 raise InvalidHeader(SEC_WEBSOCKET_KEY1)
348 self._upgraded = msg.upgrade
350 method = getattr(msg, "method", self.method)
351 # code is only present on responses
352 code = getattr(msg, "code", 0)
354 assert self.protocol is not None
355 # calculate payload
356 empty_body = status_code_must_be_empty_body(code) or bool(
357 method and method_must_be_empty_body(method)
358 )
359 if not empty_body and (
360 (length is not None and length > 0)
361 or msg.chunked
362 and not msg.upgrade
363 ):
364 payload = StreamReader(
365 self.protocol,
366 timer=self.timer,
367 loop=loop,
368 limit=self._limit,
369 )
370 payload_parser = HttpPayloadParser(
371 payload,
372 length=length,
373 chunked=msg.chunked,
374 method=method,
375 compression=msg.compression,
376 code=self.code,
377 readall=self.readall,
378 response_with_body=self.response_with_body,
379 auto_decompress=self._auto_decompress,
380 lax=self.lax,
381 )
382 if not payload_parser.done:
383 self._payload_parser = payload_parser
384 elif method == METH_CONNECT:
385 assert isinstance(msg, RawRequestMessage)
386 payload = StreamReader(
387 self.protocol,
388 timer=self.timer,
389 loop=loop,
390 limit=self._limit,
391 )
392 self._upgraded = True
393 self._payload_parser = HttpPayloadParser(
394 payload,
395 method=msg.method,
396 compression=msg.compression,
397 readall=True,
398 auto_decompress=self._auto_decompress,
399 lax=self.lax,
400 )
401 elif not empty_body and length is None and self.read_until_eof:
402 payload = StreamReader(
403 self.protocol,
404 timer=self.timer,
405 loop=loop,
406 limit=self._limit,
407 )
408 payload_parser = HttpPayloadParser(
409 payload,
410 length=length,
411 chunked=msg.chunked,
412 method=method,
413 compression=msg.compression,
414 code=self.code,
415 readall=True,
416 response_with_body=self.response_with_body,
417 auto_decompress=self._auto_decompress,
418 lax=self.lax,
419 )
420 if not payload_parser.done:
421 self._payload_parser = payload_parser
422 else:
423 payload = EMPTY_PAYLOAD
425 messages.append((msg, payload))
426 else:
427 self._tail = data[start_pos:]
428 data = EMPTY
429 break
431 # no parser, just store
432 elif self._payload_parser is None and self._upgraded:
433 assert not self._lines
434 break
436 # feed payload
437 elif data and start_pos < data_len:
438 assert not self._lines
439 assert self._payload_parser is not None
440 try:
441 eof, data = self._payload_parser.feed_data(data[start_pos:], SEP)
442 except BaseException as exc:
443 if self.payload_exception is not None:
444 self._payload_parser.payload.set_exception(
445 self.payload_exception(str(exc))
446 )
447 else:
448 self._payload_parser.payload.set_exception(exc)
450 eof = True
451 data = b""
453 if eof:
454 start_pos = 0
455 data_len = len(data)
456 self._payload_parser = None
457 continue
458 else:
459 break
461 if data and start_pos < data_len:
462 data = data[start_pos:]
463 else:
464 data = EMPTY
466 return messages, self._upgraded, data
468 def parse_headers(
469 self, lines: List[bytes]
470 ) -> Tuple[
471 "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool
472 ]:
473 """Parses RFC 5322 headers from a stream.
475 Line continuations are supported. Returns list of header name
476 and value pairs. Header name is in upper case.
477 """
478 headers, raw_headers = self._headers_parser.parse_headers(lines)
479 close_conn = None
480 encoding = None
481 upgrade = False
482 chunked = False
484 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6
485 # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf
486 singletons = (
487 hdrs.CONTENT_LENGTH,
488 hdrs.CONTENT_LOCATION,
489 hdrs.CONTENT_RANGE,
490 hdrs.CONTENT_TYPE,
491 hdrs.ETAG,
492 hdrs.HOST,
493 hdrs.MAX_FORWARDS,
494 hdrs.SERVER,
495 hdrs.TRANSFER_ENCODING,
496 hdrs.USER_AGENT,
497 )
498 bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)
499 if bad_hdr is not None:
500 raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")
502 # keep-alive
503 conn = headers.get(hdrs.CONNECTION)
504 if conn:
505 v = conn.lower()
506 if v == "close":
507 close_conn = True
508 elif v == "keep-alive":
509 close_conn = False
510 # https://www.rfc-editor.org/rfc/rfc9110.html#name-101-switching-protocols
511 elif v == "upgrade" and headers.get(hdrs.UPGRADE):
512 upgrade = True
514 # encoding
515 enc = headers.get(hdrs.CONTENT_ENCODING)
516 if enc:
517 enc = enc.lower()
518 if enc in ("gzip", "deflate", "br"):
519 encoding = enc
521 # chunking
522 te = headers.get(hdrs.TRANSFER_ENCODING)
523 if te is not None:
524 if "chunked" == te.lower():
525 chunked = True
526 else:
527 raise BadHttpMessage("Request has invalid `Transfer-Encoding`")
529 if hdrs.CONTENT_LENGTH in headers:
530 raise BadHttpMessage(
531 "Transfer-Encoding can't be present with Content-Length",
532 )
534 return (headers, raw_headers, close_conn, encoding, upgrade, chunked)
536 def set_upgraded(self, val: bool) -> None:
537 """Set connection upgraded (to websocket) mode.
539 :param bool val: new state.
540 """
541 self._upgraded = val
544class HttpRequestParser(HttpParser[RawRequestMessage]):
545 """Read request status line.
547 Exception .http_exceptions.BadStatusLine
548 could be raised in case of any errors in status line.
549 Returns RawRequestMessage.
550 """
552 def parse_message(self, lines: List[bytes]) -> RawRequestMessage:
553 # request line
554 line = lines[0].decode("utf-8", "surrogateescape")
555 try:
556 method, path, version = line.split(" ", maxsplit=2)
557 except ValueError:
558 raise BadStatusLine(line) from None
560 if len(path) > self.max_line_size:
561 raise LineTooLong(
562 "Status line is too long", str(self.max_line_size), str(len(path))
563 )
565 # method
566 if not TOKENRE.fullmatch(method):
567 raise BadStatusLine(method)
569 # version
570 match = VERSRE.fullmatch(version)
571 if match is None:
572 raise BadStatusLine(line)
573 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
575 if method == "CONNECT":
576 # authority-form,
577 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3
578 url = URL.build(authority=path, encoded=True)
579 elif path.startswith("/"):
580 # origin-form,
581 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1
582 path_part, _hash_separator, url_fragment = path.partition("#")
583 path_part, _question_mark_separator, qs_part = path_part.partition("?")
585 # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based
586 # NOTE: parser does, otherwise it results into the same
587 # NOTE: HTTP Request-Line input producing different
588 # NOTE: `yarl.URL()` objects
589 url = URL.build(
590 path=path_part,
591 query_string=qs_part,
592 fragment=url_fragment,
593 encoded=True,
594 )
595 elif path == "*" and method == "OPTIONS":
596 # asterisk-form,
597 url = URL(path, encoded=True)
598 else:
599 # absolute-form for proxy maybe,
600 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2
601 url = URL(path, encoded=True)
602 if url.scheme == "":
603 # not absolute-form
604 raise InvalidURLError(
605 path.encode(errors="surrogateescape").decode("latin1")
606 )
608 # read headers
609 (
610 headers,
611 raw_headers,
612 close,
613 compression,
614 upgrade,
615 chunked,
616 ) = self.parse_headers(lines)
618 if close is None: # then the headers weren't set in the request
619 if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close
620 close = True
621 else: # HTTP 1.1 must ask to close.
622 close = False
624 return RawRequestMessage(
625 method,
626 path,
627 version_o,
628 headers,
629 raw_headers,
630 close,
631 compression,
632 upgrade,
633 chunked,
634 url,
635 )
638class HttpResponseParser(HttpParser[RawResponseMessage]):
639 """Read response status line and headers.
641 BadStatusLine could be raised in case of any errors in status line.
642 Returns RawResponseMessage.
643 """
645 # Lax mode should only be enabled on response parser.
646 lax = not DEBUG
648 def feed_data(
649 self,
650 data: bytes,
651 SEP: Optional[_SEP] = None,
652 *args: Any,
653 **kwargs: Any,
654 ) -> Tuple[List[Tuple[RawResponseMessage, StreamReader]], bool, bytes]:
655 if SEP is None:
656 SEP = b"\r\n" if DEBUG else b"\n"
657 return super().feed_data(data, SEP, *args, **kwargs)
659 def parse_message(self, lines: List[bytes]) -> RawResponseMessage:
660 line = lines[0].decode("utf-8", "surrogateescape")
661 try:
662 version, status = line.split(maxsplit=1)
663 except ValueError:
664 raise BadStatusLine(line) from None
666 try:
667 status, reason = status.split(maxsplit=1)
668 except ValueError:
669 status = status.strip()
670 reason = ""
672 if len(reason) > self.max_line_size:
673 raise LineTooLong(
674 "Status line is too long", str(self.max_line_size), str(len(reason))
675 )
677 # version
678 match = VERSRE.fullmatch(version)
679 if match is None:
680 raise BadStatusLine(line)
681 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))
683 # The status code is a three-digit ASCII number, no padding
684 if len(status) != 3 or not DIGITS.fullmatch(status):
685 raise BadStatusLine(line)
686 status_i = int(status)
688 # read headers
689 (
690 headers,
691 raw_headers,
692 close,
693 compression,
694 upgrade,
695 chunked,
696 ) = self.parse_headers(lines)
698 if close is None:
699 close = version_o <= HttpVersion10
701 return RawResponseMessage(
702 version_o,
703 status_i,
704 reason.strip(),
705 headers,
706 raw_headers,
707 close,
708 compression,
709 upgrade,
710 chunked,
711 )
714class HttpPayloadParser:
715 def __init__(
716 self,
717 payload: StreamReader,
718 length: Optional[int] = None,
719 chunked: bool = False,
720 compression: Optional[str] = None,
721 code: Optional[int] = None,
722 method: Optional[str] = None,
723 readall: bool = False,
724 response_with_body: bool = True,
725 auto_decompress: bool = True,
726 lax: bool = False,
727 ) -> None:
728 self._length = 0
729 self._type = ParseState.PARSE_NONE
730 self._chunk = ChunkState.PARSE_CHUNKED_SIZE
731 self._chunk_size = 0
732 self._chunk_tail = b""
733 self._auto_decompress = auto_decompress
734 self._lax = lax
735 self.done = False
737 # payload decompression wrapper
738 if response_with_body and compression and self._auto_decompress:
739 real_payload: Union[StreamReader, DeflateBuffer] = DeflateBuffer(
740 payload, compression
741 )
742 else:
743 real_payload = payload
745 # payload parser
746 if not response_with_body:
747 # don't parse payload if it's not expected to be received
748 self._type = ParseState.PARSE_NONE
749 real_payload.feed_eof()
750 self.done = True
752 elif chunked:
753 self._type = ParseState.PARSE_CHUNKED
754 elif length is not None:
755 self._type = ParseState.PARSE_LENGTH
756 self._length = length
757 if self._length == 0:
758 real_payload.feed_eof()
759 self.done = True
760 else:
761 if readall and code != 204:
762 self._type = ParseState.PARSE_UNTIL_EOF
763 elif method in ("PUT", "POST"):
764 internal_logger.warning( # pragma: no cover
765 "Content-Length or Transfer-Encoding header is required"
766 )
767 self._type = ParseState.PARSE_NONE
768 real_payload.feed_eof()
769 self.done = True
771 self.payload = real_payload
773 def feed_eof(self) -> None:
774 if self._type == ParseState.PARSE_UNTIL_EOF:
775 self.payload.feed_eof()
776 elif self._type == ParseState.PARSE_LENGTH:
777 raise ContentLengthError(
778 "Not enough data for satisfy content length header."
779 )
780 elif self._type == ParseState.PARSE_CHUNKED:
781 raise TransferEncodingError(
782 "Not enough data for satisfy transfer length header."
783 )
785 def feed_data(
786 self, chunk: bytes, SEP: _SEP = b"\r\n", CHUNK_EXT: bytes = b";"
787 ) -> Tuple[bool, bytes]:
788 # Read specified amount of bytes
789 if self._type == ParseState.PARSE_LENGTH:
790 required = self._length
791 chunk_len = len(chunk)
793 if required >= chunk_len:
794 self._length = required - chunk_len
795 self.payload.feed_data(chunk, chunk_len)
796 if self._length == 0:
797 self.payload.feed_eof()
798 return True, b""
799 else:
800 self._length = 0
801 self.payload.feed_data(chunk[:required], required)
802 self.payload.feed_eof()
803 return True, chunk[required:]
805 # Chunked transfer encoding parser
806 elif self._type == ParseState.PARSE_CHUNKED:
807 if self._chunk_tail:
808 chunk = self._chunk_tail + chunk
809 self._chunk_tail = b""
811 while chunk:
812 # read next chunk size
813 if self._chunk == ChunkState.PARSE_CHUNKED_SIZE:
814 pos = chunk.find(SEP)
815 if pos >= 0:
816 i = chunk.find(CHUNK_EXT, 0, pos)
817 if i >= 0:
818 size_b = chunk[:i] # strip chunk-extensions
819 else:
820 size_b = chunk[:pos]
822 if self._lax: # Allow whitespace in lax mode.
823 size_b = size_b.strip()
825 if not re.fullmatch(HEXDIGITS, size_b):
826 exc = TransferEncodingError(
827 chunk[:pos].decode("ascii", "surrogateescape")
828 )
829 self.payload.set_exception(exc)
830 raise exc
831 size = int(bytes(size_b), 16)
833 chunk = chunk[pos + len(SEP) :]
834 if size == 0: # eof marker
835 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS
836 if self._lax and chunk.startswith(b"\r"):
837 chunk = chunk[1:]
838 else:
839 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK
840 self._chunk_size = size
841 self.payload.begin_http_chunk_receiving()
842 else:
843 self._chunk_tail = chunk
844 return False, b""
846 # read chunk and feed buffer
847 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK:
848 required = self._chunk_size
849 chunk_len = len(chunk)
851 if required > chunk_len:
852 self._chunk_size = required - chunk_len
853 self.payload.feed_data(chunk, chunk_len)
854 return False, b""
855 else:
856 self._chunk_size = 0
857 self.payload.feed_data(chunk[:required], required)
858 chunk = chunk[required:]
859 if self._lax and chunk.startswith(b"\r"):
860 chunk = chunk[1:]
861 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF
862 self.payload.end_http_chunk_receiving()
864 # toss the CRLF at the end of the chunk
865 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF:
866 if chunk[: len(SEP)] == SEP:
867 chunk = chunk[len(SEP) :]
868 self._chunk = ChunkState.PARSE_CHUNKED_SIZE
869 else:
870 self._chunk_tail = chunk
871 return False, b""
873 # if stream does not contain trailer, after 0\r\n
874 # we should get another \r\n otherwise
875 # trailers needs to be skipped until \r\n\r\n
876 if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS:
877 head = chunk[: len(SEP)]
878 if head == SEP:
879 # end of stream
880 self.payload.feed_eof()
881 return True, chunk[len(SEP) :]
882 # Both CR and LF, or only LF may not be received yet. It is
883 # expected that CRLF or LF will be shown at the very first
884 # byte next time, otherwise trailers should come. The last
885 # CRLF which marks the end of response might not be
886 # contained in the same TCP segment which delivered the
887 # size indicator.
888 if not head:
889 return False, b""
890 if head == SEP[:1]:
891 self._chunk_tail = head
892 return False, b""
893 self._chunk = ChunkState.PARSE_TRAILERS
895 # read and discard trailer up to the CRLF terminator
896 if self._chunk == ChunkState.PARSE_TRAILERS:
897 pos = chunk.find(SEP)
898 if pos >= 0:
899 chunk = chunk[pos + len(SEP) :]
900 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS
901 else:
902 self._chunk_tail = chunk
903 return False, b""
905 # Read all bytes until eof
906 elif self._type == ParseState.PARSE_UNTIL_EOF:
907 self.payload.feed_data(chunk, len(chunk))
909 return False, b""
912class DeflateBuffer:
913 """DeflateStream decompress stream and feed data into specified stream."""
915 def __init__(self, out: StreamReader, encoding: Optional[str]) -> None:
916 self.out = out
917 self.size = 0
918 self.encoding = encoding
919 self._started_decoding = False
921 self.decompressor: Union[BrotliDecompressor, ZLibDecompressor]
922 if encoding == "br":
923 if not HAS_BROTLI: # pragma: no cover
924 raise ContentEncodingError(
925 "Can not decode content-encoding: brotli (br). "
926 "Please install `Brotli`"
927 )
928 self.decompressor = BrotliDecompressor()
929 else:
930 self.decompressor = ZLibDecompressor(encoding=encoding)
932 def set_exception(self, exc: BaseException) -> None:
933 self.out.set_exception(exc)
935 def feed_data(self, chunk: bytes, size: int) -> None:
936 if not size:
937 return
939 self.size += size
941 # RFC1950
942 # bits 0..3 = CM = 0b1000 = 8 = "deflate"
943 # bits 4..7 = CINFO = 1..7 = windows size.
944 if (
945 not self._started_decoding
946 and self.encoding == "deflate"
947 and chunk[0] & 0xF != 8
948 ):
949 # Change the decoder to decompress incorrectly compressed data
950 # Actually we should issue a warning about non-RFC-compliant data.
951 self.decompressor = ZLibDecompressor(
952 encoding=self.encoding, suppress_deflate_header=True
953 )
955 try:
956 chunk = self.decompressor.decompress_sync(chunk)
957 except Exception:
958 raise ContentEncodingError(
959 "Can not decode content-encoding: %s" % self.encoding
960 )
962 self._started_decoding = True
964 if chunk:
965 self.out.feed_data(chunk, len(chunk))
967 def feed_eof(self) -> None:
968 chunk = self.decompressor.flush()
970 if chunk or self.size > 0:
971 self.out.feed_data(chunk, len(chunk))
972 # decompressor is not brotli unless encoding is "br"
973 if self.encoding == "deflate" and not self.decompressor.eof: # type: ignore[union-attr]
974 raise ContentEncodingError("deflate")
976 self.out.feed_eof()
978 def begin_http_chunk_receiving(self) -> None:
979 self.out.begin_http_chunk_receiving()
981 def end_http_chunk_receiving(self) -> None:
982 self.out.end_http_chunk_receiving()
985HttpRequestParserPy = HttpRequestParser
986HttpResponseParserPy = HttpResponseParser
987RawRequestMessagePy = RawRequestMessage
988RawResponseMessagePy = RawResponseMessage
990try:
991 if not NO_EXTENSIONS:
992 from ._http_parser import ( # type: ignore[import-not-found,no-redef]
993 HttpRequestParser,
994 HttpResponseParser,
995 RawRequestMessage,
996 RawResponseMessage,
997 )
999 HttpRequestParserC = HttpRequestParser
1000 HttpResponseParserC = HttpResponseParser
1001 RawRequestMessageC = RawRequestMessage
1002 RawResponseMessageC = RawResponseMessage
1003except ImportError: # pragma: no cover
1004 pass