Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/aiohttp/http

1import abc

2import asyncio

3import re

4import string

5from contextlib import suppress

6from enum import IntEnum

7from typing import (

8 Any,

9 ClassVar,

10 Final,

11 Generic,

12 List,

13 Literal,

14 NamedTuple,

15 Optional,

16 Pattern,

17 Set,

18 Tuple,

19 Type,

20 TypeVar,

21 Union,

22)

24from multidict import CIMultiDict, CIMultiDictProxy, istr

25from yarl import URL

27from . import hdrs

28from .base_protocol import BaseProtocol

29from .compression_utils import (

30 DEFAULT_MAX_DECOMPRESS_SIZE,

31 HAS_BROTLI,

32 HAS_ZSTD,

33 BrotliDecompressor,

34 ZLibDecompressor,

35 ZSTDDecompressor,

36)

37from .helpers import (

38 _EXC_SENTINEL,

39 DEBUG,

40 EMPTY_BODY_METHODS,

41 EMPTY_BODY_STATUS_CODES,

42 NO_EXTENSIONS,

43 BaseTimerContext,

44 set_exception,

45)

46from .http_exceptions import (

47 BadHttpMessage,

48 BadHttpMethod,

49 BadStatusLine,

50 ContentEncodingError,

51 ContentLengthError,

52 DecompressSizeError,

53 InvalidHeader,

54 InvalidURLError,

55 LineTooLong,

56 TransferEncodingError,

57)

58from .http_writer import HttpVersion, HttpVersion10

59from .streams import EMPTY_PAYLOAD, StreamReader

60from .typedefs import RawHeaders

62__all__ = (

63 "HeadersParser",

64 "HttpParser",

65 "HttpRequestParser",

66 "HttpResponseParser",

67 "RawRequestMessage",

68 "RawResponseMessage",

69)

71_SEP = Literal[b"\r\n", b"\n"]

73ASCIISET: Final[Set[str]] = set(string.printable)

75# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview

76# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens

77#

78# method = token

79# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /

80# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA

81# token = 1*tchar

82_TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`|~")

83TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+")

84VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII)

85DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII)

86HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+")

89class RawRequestMessage(NamedTuple):

90 method: str

91 path: str

92 version: HttpVersion

93 headers: "CIMultiDictProxy[str]"

94 raw_headers: RawHeaders

95 should_close: bool

96 compression: Optional[str]

97 upgrade: bool

98 chunked: bool

99 url: URL

100

101

102class RawResponseMessage(NamedTuple):

103 version: HttpVersion

104 code: int

105 reason: str

106 headers: CIMultiDictProxy[str]

107 raw_headers: RawHeaders

108 should_close: bool

109 compression: Optional[str]

110 upgrade: bool

111 chunked: bool

112

113

114_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage)

115

116

117class ParseState(IntEnum):

118

119 PARSE_NONE = 0

120 PARSE_LENGTH = 1

121 PARSE_CHUNKED = 2

122 PARSE_UNTIL_EOF = 3

123

124

125class ChunkState(IntEnum):

126 PARSE_CHUNKED_SIZE = 0

127 PARSE_CHUNKED_CHUNK = 1

128 PARSE_CHUNKED_CHUNK_EOF = 2

129 PARSE_MAYBE_TRAILERS = 3

130 PARSE_TRAILERS = 4

131

132

133class HeadersParser:

134 def __init__(

135 self,

136 max_line_size: int = 8190,

137 max_headers: int = 32768,

138 max_field_size: int = 8190,

139 lax: bool = False,

140 ) -> None:

141 self.max_line_size = max_line_size

142 self.max_headers = max_headers

143 self.max_field_size = max_field_size

144 self._lax = lax

145

146 def parse_headers(

147 self, lines: List[bytes]

148 ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]:

149 headers: CIMultiDict[str] = CIMultiDict()

150 # note: "raw" does not mean inclusion of OWS before/after the field value

151 raw_headers = []

152

153 lines_idx = 0

154 line = lines[lines_idx]

155 line_count = len(lines)

156

157 while line:

158 # Parse initial header name : value pair.

159 try:

160 bname, bvalue = line.split(b":", 1)

161 except ValueError:

162 raise InvalidHeader(line) from None

163

164 if len(bname) == 0:

165 raise InvalidHeader(bname)

166

167 # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2

168 if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}

169 raise InvalidHeader(line)

170

171 bvalue = bvalue.lstrip(b" \t")

172 if len(bname) > self.max_field_size:

173 raise LineTooLong(

174 "request header name {}".format(

175 bname.decode("utf8", "backslashreplace")

176 ),

177 str(self.max_field_size),

178 str(len(bname)),

179 )

180 name = bname.decode("utf-8", "surrogateescape")

181 if not TOKENRE.fullmatch(name):

182 raise InvalidHeader(bname)

183

184 header_length = len(bvalue)

185

186 # next line

187 lines_idx += 1

188 line = lines[lines_idx]

189

190 # consume continuation lines

191 continuation = self._lax and line and line[0] in (32, 9) # (' ', '\t')

192

193 # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding

194 if continuation:

195 bvalue_lst = [bvalue]

196 while continuation:

197 header_length += len(line)

198 if header_length > self.max_field_size:

199 raise LineTooLong(

200 "request header field {}".format(

201 bname.decode("utf8", "backslashreplace")

202 ),

203 str(self.max_field_size),

204 str(header_length),

205 )

206 bvalue_lst.append(line)

207

208 # next line

209 lines_idx += 1

210 if lines_idx < line_count:

211 line = lines[lines_idx]

212 if line:

213 continuation = line[0] in (32, 9) # (' ', '\t')

214 else:

215 line = b""

216 break

217 bvalue = b"".join(bvalue_lst)

218 else:

219 if header_length > self.max_field_size:

220 raise LineTooLong(

221 "request header field {}".format(

222 bname.decode("utf8", "backslashreplace")

223 ),

224 str(self.max_field_size),

225 str(header_length),

226 )

227

228 bvalue = bvalue.strip(b" \t")

229 value = bvalue.decode("utf-8", "surrogateescape")

230

231 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5

232 if "\n" in value or "\r" in value or "\x00" in value:

233 raise InvalidHeader(bvalue)

234

235 headers.add(name, value)

236 raw_headers.append((bname, bvalue))

237

238 return (CIMultiDictProxy(headers), tuple(raw_headers))

239

240

241def _is_supported_upgrade(headers: CIMultiDictProxy[str]) -> bool:

242 """Check if the upgrade header is supported."""

243 u = headers.get(hdrs.UPGRADE, "")

244 # .lower() can transform non-ascii characters.

245 return u.isascii() and u.lower() in {"tcp", "websocket"}

246

247

248class HttpParser(abc.ABC, Generic[_MsgT]):

249 lax: ClassVar[bool] = False

250

251 def __init__(

252 self,

253 protocol: Optional[BaseProtocol] = None,

254 loop: Optional[asyncio.AbstractEventLoop] = None,

255 limit: int = 2**16,

256 max_line_size: int = 8190,

257 max_headers: int = 32768,

258 max_field_size: int = 8190,

259 timer: Optional[BaseTimerContext] = None,

260 code: Optional[int] = None,

261 method: Optional[str] = None,

262 payload_exception: Optional[Type[BaseException]] = None,

263 response_with_body: bool = True,

264 read_until_eof: bool = False,

265 auto_decompress: bool = True,

266 ) -> None:

267 self.protocol = protocol

268 self.loop = loop

269 self.max_line_size = max_line_size

270 self.max_headers = max_headers

271 self.max_field_size = max_field_size

272 self.timer = timer

273 self.code = code

274 self.method = method

275 self.payload_exception = payload_exception

276 self.response_with_body = response_with_body

277 self.read_until_eof = read_until_eof

278

279 self._lines: List[bytes] = []

280 self._tail = b""

281 self._upgraded = False

282 self._payload = None

283 self._payload_parser: Optional[HttpPayloadParser] = None

284 self._auto_decompress = auto_decompress

285 self._limit = limit

286 self._headers_parser = HeadersParser(

287 max_line_size, max_headers, max_field_size, self.lax

288 )

289

290 @abc.abstractmethod

291 def parse_message(self, lines: List[bytes]) -> _MsgT: ...

292

293 @abc.abstractmethod

294 def _is_chunked_te(self, te: str) -> bool: ...

295

296 def feed_eof(self) -> Optional[_MsgT]:

297 if self._payload_parser is not None:

298 self._payload_parser.feed_eof()

299 self._payload_parser = None

300 else:

301 # try to extract partial message

302 if self._tail:

303 self._lines.append(self._tail)

304

305 if self._lines:

306 if self._lines[-1] != "\r\n":

307 self._lines.append(b"")

308 with suppress(Exception):

309 return self.parse_message(self._lines)

310 return None

311

312 def feed_data(

313 self,

314 data: bytes,

315 SEP: _SEP = b"\r\n",

316 EMPTY: bytes = b"",

317 CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH,

318 METH_CONNECT: str = hdrs.METH_CONNECT,

319 SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1,

320 ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]:

321

322 messages = []

323

324 if self._tail:

325 data, self._tail = self._tail + data, b""

326

327 data_len = len(data)

328 start_pos = 0

329 loop = self.loop

330

331 should_close = False

332 while start_pos < data_len:

333

334 # read HTTP message (request/response line + headers), \r\n\r\n

335 # and split by lines

336 if self._payload_parser is None and not self._upgraded:

337 pos = data.find(SEP, start_pos)

338 # consume \r\n

339 if pos == start_pos and not self._lines:

340 start_pos = pos + len(SEP)

341 continue

342

343 if pos >= start_pos:

344 if should_close:

345 raise BadHttpMessage("Data after `Connection: close`")

346

347 # line found

348 line = data[start_pos:pos]

349 if SEP == b"\n": # For lax response parsing

350 line = line.rstrip(b"\r")

351 self._lines.append(line)

352 start_pos = pos + len(SEP)

353

354 # \r\n\r\n found

355 if self._lines[-1] == EMPTY:

356 try:

357 msg: _MsgT = self.parse_message(self._lines)

358 finally:

359 self._lines.clear()

360

361 def get_content_length() -> Optional[int]:

362 # payload length

363 length_hdr = msg.headers.get(CONTENT_LENGTH)

364 if length_hdr is None:

365 return None

366

367 # Shouldn't allow +/- or other number formats.

368 # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2

369 # msg.headers is already stripped of leading/trailing wsp

370 if not DIGITS.fullmatch(length_hdr):

371 raise InvalidHeader(CONTENT_LENGTH)

372

373 return int(length_hdr)

374

375 length = get_content_length()

376 # do not support old websocket spec

377 if SEC_WEBSOCKET_KEY1 in msg.headers:

378 raise InvalidHeader(SEC_WEBSOCKET_KEY1)

379

380 self._upgraded = msg.upgrade and _is_supported_upgrade(

381 msg.headers

382 )

383

384 method = getattr(msg, "method", self.method)

385 # code is only present on responses

386 code = getattr(msg, "code", 0)

387

388 assert self.protocol is not None

389 # calculate payload

390 empty_body = code in EMPTY_BODY_STATUS_CODES or bool(

391 method and method in EMPTY_BODY_METHODS

392 )

393 if not empty_body and (

394 ((length is not None and length > 0) or msg.chunked)

395 and not self._upgraded

396 ):

397 payload = StreamReader(

398 self.protocol,

399 timer=self.timer,

400 loop=loop,

401 limit=self._limit,

402 )

403 payload_parser = HttpPayloadParser(

404 payload,

405 length=length,

406 chunked=msg.chunked,

407 method=method,

408 compression=msg.compression,

409 code=self.code,

410 response_with_body=self.response_with_body,

411 auto_decompress=self._auto_decompress,

412 lax=self.lax,

413 headers_parser=self._headers_parser,

414 )

415 if not payload_parser.done:

416 self._payload_parser = payload_parser

417 elif method == METH_CONNECT:

418 assert isinstance(msg, RawRequestMessage)

419 payload = StreamReader(

420 self.protocol,

421 timer=self.timer,

422 loop=loop,

423 limit=self._limit,

424 )

425 self._upgraded = True

426 self._payload_parser = HttpPayloadParser(

427 payload,

428 method=msg.method,

429 compression=msg.compression,

430 auto_decompress=self._auto_decompress,

431 lax=self.lax,

432 headers_parser=self._headers_parser,

433 )

434 elif not empty_body and length is None and self.read_until_eof:

435 payload = StreamReader(

436 self.protocol,

437 timer=self.timer,

438 loop=loop,

439 limit=self._limit,

440 )

441 payload_parser = HttpPayloadParser(

442 payload,

443 length=length,

444 chunked=msg.chunked,

445 method=method,

446 compression=msg.compression,

447 code=self.code,

448 response_with_body=self.response_with_body,

449 auto_decompress=self._auto_decompress,

450 lax=self.lax,

451 headers_parser=self._headers_parser,

452 )

453 if not payload_parser.done:

454 self._payload_parser = payload_parser

455 else:

456 payload = EMPTY_PAYLOAD

457

458 messages.append((msg, payload))

459 should_close = msg.should_close

460 else:

461 self._tail = data[start_pos:]

462 data = EMPTY

463 break

464

465 # no parser, just store

466 elif self._payload_parser is None and self._upgraded:

467 assert not self._lines

468 break

469

470 # feed payload

471 elif data and start_pos < data_len:

472 assert not self._lines

473 assert self._payload_parser is not None

474 try:

475 eof, data = self._payload_parser.feed_data(data[start_pos:], SEP)

476 except BaseException as underlying_exc:

477 reraised_exc = underlying_exc

478 if self.payload_exception is not None:

479 reraised_exc = self.payload_exception(str(underlying_exc))

480

481 set_exception(

482 self._payload_parser.payload,

483 reraised_exc,

484 underlying_exc,

485 )

486

487 eof = True

488 data = b""

489 if isinstance(

490 underlying_exc, (InvalidHeader, TransferEncodingError)

491 ):

492 raise

493

494 if eof:

495 start_pos = 0

496 data_len = len(data)

497 self._payload_parser = None

498 continue

499 else:

500 break

501

502 if data and start_pos < data_len:

503 data = data[start_pos:]

504 else:

505 data = EMPTY

506

507 return messages, self._upgraded, data

508

509 def parse_headers(

510 self, lines: List[bytes]

511 ) -> Tuple[

512 "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool

513 ]:

514 """Parses RFC 5322 headers from a stream.

515

516 Line continuations are supported. Returns list of header name

517 and value pairs. Header name is in upper case.

518 """

519 headers, raw_headers = self._headers_parser.parse_headers(lines)

520 close_conn = None

521 encoding = None

522 upgrade = False

523 chunked = False

524

525 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6

526 # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf

527 singletons = (

528 hdrs.CONTENT_LENGTH,

529 hdrs.CONTENT_LOCATION,

530 hdrs.CONTENT_RANGE,

531 hdrs.CONTENT_TYPE,

532 hdrs.ETAG,

533 hdrs.HOST,

534 hdrs.MAX_FORWARDS,

535 hdrs.SERVER,

536 hdrs.TRANSFER_ENCODING,

537 hdrs.USER_AGENT,

538 )

539 bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)

540 if bad_hdr is not None:

541 raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")

542

543 # keep-alive

544 conn = headers.get(hdrs.CONNECTION)

545 if conn:

546 v = conn.lower()

547 if v == "close":

548 close_conn = True

549 elif v == "keep-alive":

550 close_conn = False

551 # https://www.rfc-editor.org/rfc/rfc9110.html#name-101-switching-protocols

552 elif v == "upgrade" and headers.get(hdrs.UPGRADE):

553 upgrade = True

554

555 # encoding

556 enc = headers.get(hdrs.CONTENT_ENCODING, "")

557 if enc.isascii() and enc.lower() in {"gzip", "deflate", "br", "zstd"}:

558 encoding = enc

559

560 # chunking

561 te = headers.get(hdrs.TRANSFER_ENCODING)

562 if te is not None:

563 if self._is_chunked_te(te):

564 chunked = True

565

566 if hdrs.CONTENT_LENGTH in headers:

567 raise BadHttpMessage(

568 "Transfer-Encoding can't be present with Content-Length",

569 )

570

571 return (headers, raw_headers, close_conn, encoding, upgrade, chunked)

572

573 def set_upgraded(self, val: bool) -> None:

574 """Set connection upgraded (to websocket) mode.

575

576 :param bool val: new state.

577 """

578 self._upgraded = val

579

580

581class HttpRequestParser(HttpParser[RawRequestMessage]):

582 """Read request status line.

583

584 Exception .http_exceptions.BadStatusLine

585 could be raised in case of any errors in status line.

586 Returns RawRequestMessage.

587 """

588

589 def parse_message(self, lines: List[bytes]) -> RawRequestMessage:

590 # request line

591 line = lines[0].decode("utf-8", "surrogateescape")

592 try:

593 method, path, version = line.split(" ", maxsplit=2)

594 except ValueError:

595 raise BadHttpMethod(line) from None

596

597 if len(path) > self.max_line_size:

598 raise LineTooLong(

599 "Status line is too long", str(self.max_line_size), str(len(path))

600 )

601

602 # method

603 if not TOKENRE.fullmatch(method):

604 raise BadHttpMethod(method)

605

606 # version

607 match = VERSRE.fullmatch(version)

608 if match is None:

609 raise BadStatusLine(line)

610 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))

611

612 if method == "CONNECT":

613 # authority-form,

614 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3

615 url = URL.build(authority=path, encoded=True)

616 elif path.startswith("/"):

617 # origin-form,

618 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1

619 path_part, _hash_separator, url_fragment = path.partition("#")

620 path_part, _question_mark_separator, qs_part = path_part.partition("?")

621

622 # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based

623 # NOTE: parser does, otherwise it results into the same

624 # NOTE: HTTP Request-Line input producing different

625 # NOTE: `yarl.URL()` objects

626 url = URL.build(

627 path=path_part,

628 query_string=qs_part,

629 fragment=url_fragment,

630 encoded=True,

631 )

632 elif path == "*" and method == "OPTIONS":

633 # asterisk-form,

634 url = URL(path, encoded=True)

635 else:

636 # absolute-form for proxy maybe,

637 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2

638 url = URL(path, encoded=True)

639 if url.scheme == "":

640 # not absolute-form

641 raise InvalidURLError(

642 path.encode(errors="surrogateescape").decode("latin1")

643 )

644

645 # read headers

646 (

647 headers,

648 raw_headers,

649 close,

650 compression,

651 upgrade,

652 chunked,

653 ) = self.parse_headers(lines[1:])

654

655 if close is None: # then the headers weren't set in the request

656 if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close

657 close = True

658 else: # HTTP 1.1 must ask to close.

659 close = False

660

661 return RawRequestMessage(

662 method,

663 path,

664 version_o,

665 headers,

666 raw_headers,

667 close,

668 compression,

669 upgrade,

670 chunked,

671 url,

672 )

673

674 def _is_chunked_te(self, te: str) -> bool:

675 te = te.rsplit(",", maxsplit=1)[-1].strip(" \t")

676 # .lower() transforms some non-ascii chars, so must check first.

677 if te.isascii() and te.lower() == "chunked":

678 return True

679 # https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.3

680 raise BadHttpMessage("Request has invalid `Transfer-Encoding`")

681

682

683class HttpResponseParser(HttpParser[RawResponseMessage]):

684 """Read response status line and headers.

685

686 BadStatusLine could be raised in case of any errors in status line.

687 Returns RawResponseMessage.

688 """

689

690 # Lax mode should only be enabled on response parser.

691 lax = not DEBUG

692

693 def feed_data(

694 self,

695 data: bytes,

696 SEP: Optional[_SEP] = None,

697 *args: Any,

698 **kwargs: Any,

699 ) -> Tuple[List[Tuple[RawResponseMessage, StreamReader]], bool, bytes]:

700 if SEP is None:

701 SEP = b"\r\n" if DEBUG else b"\n"

702 return super().feed_data(data, SEP, *args, **kwargs)

703

704 def parse_message(self, lines: List[bytes]) -> RawResponseMessage:

705 line = lines[0].decode("utf-8", "surrogateescape")

706 try:

707 version, status = line.split(maxsplit=1)

708 except ValueError:

709 raise BadStatusLine(line) from None

710

711 try:

712 status, reason = status.split(maxsplit=1)

713 except ValueError:

714 status = status.strip()

715 reason = ""

716

717 if len(reason) > self.max_line_size:

718 raise LineTooLong(

719 "Status line is too long", str(self.max_line_size), str(len(reason))

720 )

721

722 # version

723 match = VERSRE.fullmatch(version)

724 if match is None:

725 raise BadStatusLine(line)

726 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))

727

728 # The status code is a three-digit ASCII number, no padding

729 if len(status) != 3 or not DIGITS.fullmatch(status):

730 raise BadStatusLine(line)

731 status_i = int(status)

732

733 # read headers

734 (

735 headers,

736 raw_headers,

737 close,

738 compression,

739 upgrade,

740 chunked,

741 ) = self.parse_headers(lines[1:])

742

743 if close is None:

744 if version_o <= HttpVersion10:

745 close = True

746 # https://www.rfc-editor.org/rfc/rfc9112.html#name-message-body-length

747 elif 100 <= status_i < 200 or status_i in {204, 304}:

748 close = False

749 elif hdrs.CONTENT_LENGTH in headers or hdrs.TRANSFER_ENCODING in headers:

750 close = False

751 else:

752 # https://www.rfc-editor.org/rfc/rfc9112.html#section-6.3-2.8

753 close = True

754

755 return RawResponseMessage(

756 version_o,

757 status_i,

758 reason.strip(),

759 headers,

760 raw_headers,

761 close,

762 compression,

763 upgrade,

764 chunked,

765 )

766

767 def _is_chunked_te(self, te: str) -> bool:

768 # https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.2

769 return te.rsplit(",", maxsplit=1)[-1].strip(" \t").lower() == "chunked"

770

771

772class HttpPayloadParser:

773 def __init__(

774 self,

775 payload: StreamReader,

776 length: Optional[int] = None,

777 chunked: bool = False,

778 compression: Optional[str] = None,

779 code: Optional[int] = None,

780 method: Optional[str] = None,

781 response_with_body: bool = True,

782 auto_decompress: bool = True,

783 lax: bool = False,

784 *,

785 headers_parser: HeadersParser,

786 ) -> None:

787 self._length = 0

788 self._type = ParseState.PARSE_UNTIL_EOF

789 self._chunk = ChunkState.PARSE_CHUNKED_SIZE

790 self._chunk_size = 0

791 self._chunk_tail = b""

792 self._auto_decompress = auto_decompress

793 self._lax = lax

794 self._headers_parser = headers_parser

795 self._trailer_lines: list[bytes] = []

796 self.done = False

797

798 # payload decompression wrapper

799 if response_with_body and compression and self._auto_decompress:

800 real_payload: Union[StreamReader, DeflateBuffer] = DeflateBuffer(

801 payload, compression

802 )

803 else:

804 real_payload = payload

805

806 # payload parser

807 if not response_with_body:

808 # don't parse payload if it's not expected to be received

809 self._type = ParseState.PARSE_NONE

810 real_payload.feed_eof()

811 self.done = True

812 elif chunked:

813 self._type = ParseState.PARSE_CHUNKED

814 elif length is not None:

815 self._type = ParseState.PARSE_LENGTH

816 self._length = length

817 if self._length == 0:

818 real_payload.feed_eof()

819 self.done = True

820

821 self.payload = real_payload

822

823 def feed_eof(self) -> None:

824 if self._type == ParseState.PARSE_UNTIL_EOF:

825 self.payload.feed_eof()

826 elif self._type == ParseState.PARSE_LENGTH:

827 raise ContentLengthError(

828 "Not enough data to satisfy content length header."

829 )

830 elif self._type == ParseState.PARSE_CHUNKED:

831 raise TransferEncodingError(

832 "Not enough data to satisfy transfer length header."

833 )

834

835 def feed_data(

836 self, chunk: bytes, SEP: _SEP = b"\r\n", CHUNK_EXT: bytes = b";"

837 ) -> Tuple[bool, bytes]:

838 # Read specified amount of bytes

839 if self._type == ParseState.PARSE_LENGTH:

840 required = self._length

841 chunk_len = len(chunk)

842

843 if required >= chunk_len:

844 self._length = required - chunk_len

845 self.payload.feed_data(chunk, chunk_len)

846 if self._length == 0:

847 self.payload.feed_eof()

848 return True, b""

849 else:

850 self._length = 0

851 self.payload.feed_data(chunk[:required], required)

852 self.payload.feed_eof()

853 return True, chunk[required:]

854

855 # Chunked transfer encoding parser

856 elif self._type == ParseState.PARSE_CHUNKED:

857 if self._chunk_tail:

858 chunk = self._chunk_tail + chunk

859 self._chunk_tail = b""

860

861 while chunk:

862

863 # read next chunk size

864 if self._chunk == ChunkState.PARSE_CHUNKED_SIZE:

865 pos = chunk.find(SEP)

866 if pos >= 0:

867 i = chunk.find(CHUNK_EXT, 0, pos)

868 if i >= 0:

869 size_b = chunk[:i] # strip chunk-extensions

870 # Verify no LF in the chunk-extension

871 if b"\n" in (ext := chunk[i:pos]):

872 exc = TransferEncodingError(

873 f"Unexpected LF in chunk-extension: {ext!r}"

874 )

875 set_exception(self.payload, exc)

876 raise exc

877 else:

878 size_b = chunk[:pos]

879

880 if self._lax: # Allow whitespace in lax mode.

881 size_b = size_b.strip()

882

883 if not re.fullmatch(HEXDIGITS, size_b):

884 exc = TransferEncodingError(

885 chunk[:pos].decode("ascii", "surrogateescape")

886 )

887 set_exception(self.payload, exc)

888 raise exc

889 size = int(bytes(size_b), 16)

890

891 chunk = chunk[pos + len(SEP) :]

892 if size == 0: # eof marker

893 self._chunk = ChunkState.PARSE_TRAILERS

894 if self._lax and chunk.startswith(b"\r"):

895 chunk = chunk[1:]

896 else:

897 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK

898 self._chunk_size = size

899 self.payload.begin_http_chunk_receiving()

900 else:

901 self._chunk_tail = chunk

902 return False, b""

903

904 # read chunk and feed buffer

905 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK:

906 required = self._chunk_size

907 chunk_len = len(chunk)

908

909 if required > chunk_len:

910 self._chunk_size = required - chunk_len

911 self.payload.feed_data(chunk, chunk_len)

912 return False, b""

913 else:

914 self._chunk_size = 0

915 self.payload.feed_data(chunk[:required], required)

916 chunk = chunk[required:]

917 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF

918 self.payload.end_http_chunk_receiving()

919

920 # toss the CRLF at the end of the chunk

921 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF:

922 if self._lax and chunk.startswith(b"\r"):

923 chunk = chunk[1:]

924 if chunk[: len(SEP)] == SEP:

925 chunk = chunk[len(SEP) :]

926 self._chunk = ChunkState.PARSE_CHUNKED_SIZE

927 else:

928 self._chunk_tail = chunk

929 return False, b""

930

931 if self._chunk == ChunkState.PARSE_TRAILERS:

932 pos = chunk.find(SEP)

933 if pos < 0: # No line found

934 self._chunk_tail = chunk

935 return False, b""

936

937 line = chunk[:pos]

938 chunk = chunk[pos + len(SEP) :]

939 if SEP == b"\n": # For lax response parsing

940 line = line.rstrip(b"\r")

941 self._trailer_lines.append(line)

942

943 # \r\n\r\n found, end of stream

944 if self._trailer_lines[-1] == b"":

945 # Headers and trailers are defined the same way,

946 # so we reuse the HeadersParser here.

947 try:

948 trailers, raw_trailers = self._headers_parser.parse_headers(

949 self._trailer_lines

950 )

951 finally:

952 self._trailer_lines.clear()

953 self.payload.feed_eof()

954 return True, chunk

955

956 # Read all bytes until eof

957 elif self._type == ParseState.PARSE_UNTIL_EOF:

958 self.payload.feed_data(chunk, len(chunk))

959

960 return False, b""

961

962

963class DeflateBuffer:

964 """DeflateStream decompress stream and feed data into specified stream."""

965

966 decompressor: Any

967

968 def __init__(

969 self,

970 out: StreamReader,

971 encoding: Optional[str],

972 max_decompress_size: int = DEFAULT_MAX_DECOMPRESS_SIZE,

973 ) -> None:

974 self.out = out

975 self.size = 0

976 out.total_compressed_bytes = self.size

977 self.encoding = encoding

978 self._started_decoding = False

979

980 self.decompressor: Union[BrotliDecompressor, ZLibDecompressor, ZSTDDecompressor]

981 if encoding == "br":

982 if not HAS_BROTLI: # pragma: no cover

983 raise ContentEncodingError(

984 "Can not decode content-encoding: brotli (br). "

985 "Please install `Brotli`"

986 )

987 self.decompressor = BrotliDecompressor()

988 elif encoding == "zstd":

989 if not HAS_ZSTD:

990 raise ContentEncodingError(

991 "Can not decode content-encoding: zstandard (zstd). "

992 "Please install `backports.zstd`"

993 )

994 self.decompressor = ZSTDDecompressor()

995 else:

996 self.decompressor = ZLibDecompressor(encoding=encoding)

997

998 self._max_decompress_size = max_decompress_size

999

1000 def set_exception(

1001 self,

1002 exc: BaseException,

1003 exc_cause: BaseException = _EXC_SENTINEL,

1004 ) -> None:

1005 set_exception(self.out, exc, exc_cause)

1006

1007 def feed_data(self, chunk: bytes, size: int) -> None:

1008 if not size:

1009 return

1010

1011 self.size += size

1012 self.out.total_compressed_bytes = self.size

1013

1014 # RFC1950

1015 # bits 0..3 = CM = 0b1000 = 8 = "deflate"

1016 # bits 4..7 = CINFO = 1..7 = windows size.

1017 if (

1018 not self._started_decoding

1019 and self.encoding == "deflate"

1020 and chunk[0] & 0xF != 8

1021 ):

1022 # Change the decoder to decompress incorrectly compressed data

1023 # Actually we should issue a warning about non-RFC-compliant data.

1024 self.decompressor = ZLibDecompressor(

1025 encoding=self.encoding, suppress_deflate_header=True

1026 )

1027

1028 try:

1029 # Decompress with limit + 1 so we can detect if output exceeds limit

1030 chunk = self.decompressor.decompress_sync(

1031 chunk, max_length=self._max_decompress_size + 1

1032 )

1033 except Exception:

1034 raise ContentEncodingError(

1035 "Can not decode content-encoding: %s" % self.encoding

1036 )

1037

1038 self._started_decoding = True

1039

1040 # Check if decompression limit was exceeded

1041 if len(chunk) > self._max_decompress_size:

1042 raise DecompressSizeError(

1043 "Decompressed data exceeds the configured limit of %d bytes"

1044 % self._max_decompress_size

1045 )

1046

1047 if chunk:

1048 self.out.feed_data(chunk, len(chunk))

1049

1050 def feed_eof(self) -> None:

1051 chunk = self.decompressor.flush()

1052

1053 if chunk or self.size > 0:

1054 self.out.feed_data(chunk, len(chunk))

1055 if self.encoding == "deflate" and not self.decompressor.eof:

1056 raise ContentEncodingError("deflate")

1057

1058 self.out.feed_eof()

1059

1060 def begin_http_chunk_receiving(self) -> None:

1061 self.out.begin_http_chunk_receiving()

1062

1063 def end_http_chunk_receiving(self) -> None:

1064 self.out.end_http_chunk_receiving()

1065

1066

1067HttpRequestParserPy = HttpRequestParser

1068HttpResponseParserPy = HttpResponseParser

1069RawRequestMessagePy = RawRequestMessage

1070RawResponseMessagePy = RawResponseMessage

1071

1072try:

1073 if not NO_EXTENSIONS:

1074 from ._http_parser import ( # type: ignore[import-not-found,no-redef]

1075 HttpRequestParser,

1076 HttpResponseParser,

1077 RawRequestMessage,

1078 RawResponseMessage,

1079 )

1080

1081 HttpRequestParserC = HttpRequestParser

1082 HttpResponseParserC = HttpResponseParser

1083 RawRequestMessageC = RawRequestMessage

1084 RawResponseMessageC = RawResponseMessage

1085except ImportError: # pragma: no cover

1086 pass

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/aiohttp/http_parser.py: 20%

527 statements