Coverage for /pythoncovmergedfiles/medio/medio/src/aiohttp/aiohttp/http

1import abc

2import asyncio

3import re

4import string

5from contextlib import suppress

6from enum import IntEnum

7from typing import (

8 Any,

9 ClassVar,

10 Final,

11 Generic,

12 List,

13 Literal,

14 NamedTuple,

15 Optional,

16 Pattern,

17 Set,

18 Tuple,

19 Type,

20 TypeVar,

21 Union,

22)

24from multidict import CIMultiDict, CIMultiDictProxy, istr

25from yarl import URL

27from . import hdrs

28from .base_protocol import BaseProtocol

29from .compression_utils import HAS_BROTLI, BrotliDecompressor, ZLibDecompressor

30from .helpers import (

31 _EXC_SENTINEL,

32 DEBUG,

33 EMPTY_BODY_METHODS,

34 EMPTY_BODY_STATUS_CODES,

35 NO_EXTENSIONS,

36 BaseTimerContext,

37 set_exception,

38)

39from .http_exceptions import (

40 BadHttpMessage,

41 BadHttpMethod,

42 BadStatusLine,

43 ContentEncodingError,

44 ContentLengthError,

45 InvalidHeader,

46 InvalidURLError,

47 LineTooLong,

48 TransferEncodingError,

49)

50from .http_writer import HttpVersion, HttpVersion10

51from .streams import EMPTY_PAYLOAD, StreamReader

52from .typedefs import RawHeaders

54__all__ = (

55 "HeadersParser",

56 "HttpParser",

57 "HttpRequestParser",

58 "HttpResponseParser",

59 "RawRequestMessage",

60 "RawResponseMessage",

61)

63_SEP = Literal[b"\r\n", b"\n"]

65ASCIISET: Final[Set[str]] = set(string.printable)

67# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview

68# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens

69#

70# method = token

71# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /

72# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA

73# token = 1*tchar

74_TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`|~")

75TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+")

76VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII)

77DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII)

78HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+")

81class RawRequestMessage(NamedTuple):

82 method: str

83 path: str

84 version: HttpVersion

85 headers: CIMultiDictProxy[str]

86 raw_headers: RawHeaders

87 should_close: bool

88 compression: Optional[str]

89 upgrade: bool

90 chunked: bool

91 url: URL

94class RawResponseMessage(NamedTuple):

95 version: HttpVersion

96 code: int

97 reason: str

98 headers: CIMultiDictProxy[str]

99 raw_headers: RawHeaders

100 should_close: bool

101 compression: Optional[str]

102 upgrade: bool

103 chunked: bool

104

105

106_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage)

107

108

109class ParseState(IntEnum):

110 PARSE_NONE = 0

111 PARSE_LENGTH = 1

112 PARSE_CHUNKED = 2

113 PARSE_UNTIL_EOF = 3

114

115

116class ChunkState(IntEnum):

117 PARSE_CHUNKED_SIZE = 0

118 PARSE_CHUNKED_CHUNK = 1

119 PARSE_CHUNKED_CHUNK_EOF = 2

120 PARSE_MAYBE_TRAILERS = 3

121 PARSE_TRAILERS = 4

122

123

124class HeadersParser:

125 def __init__(

126 self, max_line_size: int = 8190, max_field_size: int = 8190, lax: bool = False

127 ) -> None:

128 self.max_line_size = max_line_size

129 self.max_field_size = max_field_size

130 self._lax = lax

131

132 def parse_headers(

133 self, lines: List[bytes]

134 ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]:

135 headers: CIMultiDict[str] = CIMultiDict()

136 # note: "raw" does not mean inclusion of OWS before/after the field value

137 raw_headers = []

138

139 lines_idx = 1

140 line = lines[1]

141 line_count = len(lines)

142

143 while line:

144 # Parse initial header name : value pair.

145 try:

146 bname, bvalue = line.split(b":", 1)

147 except ValueError:

148 raise InvalidHeader(line) from None

149

150 if len(bname) == 0:

151 raise InvalidHeader(bname)

152

153 # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2

154 if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}

155 raise InvalidHeader(line)

156

157 bvalue = bvalue.lstrip(b" \t")

158 if len(bname) > self.max_field_size:

159 raise LineTooLong(

160 "request header name {}".format(

161 bname.decode("utf8", "backslashreplace")

162 ),

163 str(self.max_field_size),

164 str(len(bname)),

165 )

166 name = bname.decode("utf-8", "surrogateescape")

167 if not TOKENRE.fullmatch(name):

168 raise InvalidHeader(bname)

169

170 header_length = len(bvalue)

171

172 # next line

173 lines_idx += 1

174 line = lines[lines_idx]

175

176 # consume continuation lines

177 continuation = self._lax and line and line[0] in (32, 9) # (' ', '\t')

178

179 # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding

180 if continuation:

181 bvalue_lst = [bvalue]

182 while continuation:

183 header_length += len(line)

184 if header_length > self.max_field_size:

185 raise LineTooLong(

186 "request header field {}".format(

187 bname.decode("utf8", "backslashreplace")

188 ),

189 str(self.max_field_size),

190 str(header_length),

191 )

192 bvalue_lst.append(line)

193

194 # next line

195 lines_idx += 1

196 if lines_idx < line_count:

197 line = lines[lines_idx]

198 if line:

199 continuation = line[0] in (32, 9) # (' ', '\t')

200 else:

201 line = b""

202 break

203 bvalue = b"".join(bvalue_lst)

204 else:

205 if header_length > self.max_field_size:

206 raise LineTooLong(

207 "request header field {}".format(

208 bname.decode("utf8", "backslashreplace")

209 ),

210 str(self.max_field_size),

211 str(header_length),

212 )

213

214 bvalue = bvalue.strip(b" \t")

215 value = bvalue.decode("utf-8", "surrogateescape")

216

217 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5

218 if "\n" in value or "\r" in value or "\x00" in value:

219 raise InvalidHeader(bvalue)

220

221 headers.add(name, value)

222 raw_headers.append((bname, bvalue))

223

224 return (CIMultiDictProxy(headers), tuple(raw_headers))

225

226

227def _is_supported_upgrade(headers: CIMultiDictProxy[str]) -> bool:

228 """Check if the upgrade header is supported."""

229 return headers.get(hdrs.UPGRADE, "").lower() in {"tcp", "websocket"}

230

231

232class HttpParser(abc.ABC, Generic[_MsgT]):

233 lax: ClassVar[bool] = False

234

235 def __init__(

236 self,

237 protocol: BaseProtocol,

238 loop: asyncio.AbstractEventLoop,

239 limit: int,

240 max_line_size: int = 8190,

241 max_field_size: int = 8190,

242 timer: Optional[BaseTimerContext] = None,

243 code: Optional[int] = None,

244 method: Optional[str] = None,

245 payload_exception: Optional[Type[BaseException]] = None,

246 response_with_body: bool = True,

247 read_until_eof: bool = False,

248 auto_decompress: bool = True,

249 ) -> None:

250 self.protocol = protocol

251 self.loop = loop

252 self.max_line_size = max_line_size

253 self.max_field_size = max_field_size

254 self.timer = timer

255 self.code = code

256 self.method = method

257 self.payload_exception = payload_exception

258 self.response_with_body = response_with_body

259 self.read_until_eof = read_until_eof

260

261 self._lines: List[bytes] = []

262 self._tail = b""

263 self._upgraded = False

264 self._payload = None

265 self._payload_parser: Optional[HttpPayloadParser] = None

266 self._auto_decompress = auto_decompress

267 self._limit = limit

268 self._headers_parser = HeadersParser(max_line_size, max_field_size, self.lax)

269

270 @abc.abstractmethod

271 def parse_message(self, lines: List[bytes]) -> _MsgT: ...

272

273 @abc.abstractmethod

274 def _is_chunked_te(self, te: str) -> bool: ...

275

276 def feed_eof(self) -> Optional[_MsgT]:

277 if self._payload_parser is not None:

278 self._payload_parser.feed_eof()

279 self._payload_parser = None

280 else:

281 # try to extract partial message

282 if self._tail:

283 self._lines.append(self._tail)

284

285 if self._lines:

286 if self._lines[-1] != "\r\n":

287 self._lines.append(b"")

288 with suppress(Exception):

289 return self.parse_message(self._lines)

290 return None

291

292 def feed_data(

293 self,

294 data: bytes,

295 SEP: _SEP = b"\r\n",

296 EMPTY: bytes = b"",

297 CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH,

298 METH_CONNECT: str = hdrs.METH_CONNECT,

299 SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1,

300 ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]:

301 messages = []

302

303 if self._tail:

304 data, self._tail = self._tail + data, b""

305

306 data_len = len(data)

307 start_pos = 0

308 loop = self.loop

309

310 should_close = False

311 while start_pos < data_len:

312 # read HTTP message (request/response line + headers), \r\n\r\n

313 # and split by lines

314 if self._payload_parser is None and not self._upgraded:

315 pos = data.find(SEP, start_pos)

316 # consume \r\n

317 if pos == start_pos and not self._lines:

318 start_pos = pos + len(SEP)

319 continue

320

321 if pos >= start_pos:

322 if should_close:

323 raise BadHttpMessage("Data after `Connection: close`")

324

325 # line found

326 line = data[start_pos:pos]

327 if SEP == b"\n": # For lax response parsing

328 line = line.rstrip(b"\r")

329 self._lines.append(line)

330 start_pos = pos + len(SEP)

331

332 # \r\n\r\n found

333 if self._lines[-1] == EMPTY:

334 try:

335 msg: _MsgT = self.parse_message(self._lines)

336 finally:

337 self._lines.clear()

338

339 def get_content_length() -> Optional[int]:

340 # payload length

341 length_hdr = msg.headers.get(CONTENT_LENGTH)

342 if length_hdr is None:

343 return None

344

345 # Shouldn't allow +/- or other number formats.

346 # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2

347 # msg.headers is already stripped of leading/trailing wsp

348 if not DIGITS.fullmatch(length_hdr):

349 raise InvalidHeader(CONTENT_LENGTH)

350

351 return int(length_hdr)

352

353 length = get_content_length()

354 # do not support old websocket spec

355 if SEC_WEBSOCKET_KEY1 in msg.headers:

356 raise InvalidHeader(SEC_WEBSOCKET_KEY1)

357

358 self._upgraded = msg.upgrade and _is_supported_upgrade(

359 msg.headers

360 )

361

362 method = getattr(msg, "method", self.method)

363 # code is only present on responses

364 code = getattr(msg, "code", 0)

365

366 assert self.protocol is not None

367 # calculate payload

368 empty_body = code in EMPTY_BODY_STATUS_CODES or bool(

369 method and method in EMPTY_BODY_METHODS

370 )

371 if not empty_body and (

372 ((length is not None and length > 0) or msg.chunked)

373 and not self._upgraded

374 ):

375 payload = StreamReader(

376 self.protocol,

377 timer=self.timer,

378 loop=loop,

379 limit=self._limit,

380 )

381 payload_parser = HttpPayloadParser(

382 payload,

383 length=length,

384 chunked=msg.chunked,

385 method=method,

386 compression=msg.compression,

387 code=self.code,

388 response_with_body=self.response_with_body,

389 auto_decompress=self._auto_decompress,

390 lax=self.lax,

391 )

392 if not payload_parser.done:

393 self._payload_parser = payload_parser

394 elif method == METH_CONNECT:

395 assert isinstance(msg, RawRequestMessage)

396 payload = StreamReader(

397 self.protocol,

398 timer=self.timer,

399 loop=loop,

400 limit=self._limit,

401 )

402 self._upgraded = True

403 self._payload_parser = HttpPayloadParser(

404 payload,

405 method=msg.method,

406 compression=msg.compression,

407 auto_decompress=self._auto_decompress,

408 lax=self.lax,

409 )

410 elif not empty_body and length is None and self.read_until_eof:

411 payload = StreamReader(

412 self.protocol,

413 timer=self.timer,

414 loop=loop,

415 limit=self._limit,

416 )

417 payload_parser = HttpPayloadParser(

418 payload,

419 length=length,

420 chunked=msg.chunked,

421 method=method,

422 compression=msg.compression,

423 code=self.code,

424 response_with_body=self.response_with_body,

425 auto_decompress=self._auto_decompress,

426 lax=self.lax,

427 )

428 if not payload_parser.done:

429 self._payload_parser = payload_parser

430 else:

431 payload = EMPTY_PAYLOAD

432

433 messages.append((msg, payload))

434 should_close = msg.should_close

435 else:

436 self._tail = data[start_pos:]

437 data = EMPTY

438 break

439

440 # no parser, just store

441 elif self._payload_parser is None and self._upgraded:

442 assert not self._lines

443 break

444

445 # feed payload

446 elif data and start_pos < data_len:

447 assert not self._lines

448 assert self._payload_parser is not None

449 try:

450 eof, data = self._payload_parser.feed_data(data[start_pos:], SEP)

451 except BaseException as underlying_exc:

452 reraised_exc = underlying_exc

453 if self.payload_exception is not None:

454 reraised_exc = self.payload_exception(str(underlying_exc))

455

456 set_exception(

457 self._payload_parser.payload,

458 reraised_exc,

459 underlying_exc,

460 )

461

462 eof = True

463 data = b""

464

465 if eof:

466 start_pos = 0

467 data_len = len(data)

468 self._payload_parser = None

469 continue

470 else:

471 break

472

473 if data and start_pos < data_len:

474 data = data[start_pos:]

475 else:

476 data = EMPTY

477

478 return messages, self._upgraded, data

479

480 def parse_headers(

481 self, lines: List[bytes]

482 ) -> Tuple[

483 "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool

484 ]:

485 """Parses RFC 5322 headers from a stream.

486

487 Line continuations are supported. Returns list of header name

488 and value pairs. Header name is in upper case.

489 """

490 headers, raw_headers = self._headers_parser.parse_headers(lines)

491 close_conn = None

492 encoding = None

493 upgrade = False

494 chunked = False

495

496 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6

497 # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf

498 singletons = (

499 hdrs.CONTENT_LENGTH,

500 hdrs.CONTENT_LOCATION,

501 hdrs.CONTENT_RANGE,

502 hdrs.CONTENT_TYPE,

503 hdrs.ETAG,

504 hdrs.HOST,

505 hdrs.MAX_FORWARDS,

506 hdrs.SERVER,

507 hdrs.TRANSFER_ENCODING,

508 hdrs.USER_AGENT,

509 )

510 bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)

511 if bad_hdr is not None:

512 raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")

513

514 # keep-alive

515 conn = headers.get(hdrs.CONNECTION)

516 if conn:

517 v = conn.lower()

518 if v == "close":

519 close_conn = True

520 elif v == "keep-alive":

521 close_conn = False

522 # https://www.rfc-editor.org/rfc/rfc9110.html#name-101-switching-protocols

523 elif v == "upgrade" and headers.get(hdrs.UPGRADE):

524 upgrade = True

525

526 # encoding

527 enc = headers.get(hdrs.CONTENT_ENCODING)

528 if enc:

529 enc = enc.lower()

530 if enc in ("gzip", "deflate", "br"):

531 encoding = enc

532

533 # chunking

534 te = headers.get(hdrs.TRANSFER_ENCODING)

535 if te is not None:

536 if self._is_chunked_te(te):

537 chunked = True

538

539 if hdrs.CONTENT_LENGTH in headers:

540 raise BadHttpMessage(

541 "Transfer-Encoding can't be present with Content-Length",

542 )

543

544 return (headers, raw_headers, close_conn, encoding, upgrade, chunked)

545

546 def set_upgraded(self, val: bool) -> None:

547 """Set connection upgraded (to websocket) mode.

548

549 :param bool val: new state.

550 """

551 self._upgraded = val

552

553

554class HttpRequestParser(HttpParser[RawRequestMessage]):

555 """Read request status line.

556

557 Exception .http_exceptions.BadStatusLine

558 could be raised in case of any errors in status line.

559 Returns RawRequestMessage.

560 """

561

562 def parse_message(self, lines: List[bytes]) -> RawRequestMessage:

563 # request line

564 line = lines[0].decode("utf-8", "surrogateescape")

565 try:

566 method, path, version = line.split(" ", maxsplit=2)

567 except ValueError:

568 raise BadHttpMethod(line) from None

569

570 if len(path) > self.max_line_size:

571 raise LineTooLong(

572 "Status line is too long", str(self.max_line_size), str(len(path))

573 )

574

575 # method

576 if not TOKENRE.fullmatch(method):

577 raise BadHttpMethod(method)

578

579 # version

580 match = VERSRE.fullmatch(version)

581 if match is None:

582 raise BadStatusLine(line)

583 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))

584

585 if method == "CONNECT":

586 # authority-form,

587 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3

588 url = URL.build(authority=path, encoded=True)

589 elif path.startswith("/"):

590 # origin-form,

591 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1

592 path_part, _hash_separator, url_fragment = path.partition("#")

593 path_part, _question_mark_separator, qs_part = path_part.partition("?")

594

595 # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based

596 # NOTE: parser does, otherwise it results into the same

597 # NOTE: HTTP Request-Line input producing different

598 # NOTE: `yarl.URL()` objects

599 url = URL.build(

600 path=path_part,

601 query_string=qs_part,

602 fragment=url_fragment,

603 encoded=True,

604 )

605 elif path == "*" and method == "OPTIONS":

606 # asterisk-form,

607 url = URL(path, encoded=True)

608 else:

609 # absolute-form for proxy maybe,

610 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2

611 url = URL(path, encoded=True)

612 if url.scheme == "":

613 # not absolute-form

614 raise InvalidURLError(

615 path.encode(errors="surrogateescape").decode("latin1")

616 )

617

618 # read headers

619 (

620 headers,

621 raw_headers,

622 close,

623 compression,

624 upgrade,

625 chunked,

626 ) = self.parse_headers(lines)

627

628 if close is None: # then the headers weren't set in the request

629 if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close

630 close = True

631 else: # HTTP 1.1 must ask to close.

632 close = False

633

634 return RawRequestMessage(

635 method,

636 path,

637 version_o,

638 headers,

639 raw_headers,

640 close,

641 compression,

642 upgrade,

643 chunked,

644 url,

645 )

646

647 def _is_chunked_te(self, te: str) -> bool:

648 if te.rsplit(",", maxsplit=1)[-1].strip(" \t").lower() == "chunked":

649 return True

650 # https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.3

651 raise BadHttpMessage("Request has invalid `Transfer-Encoding`")

652

653

654class HttpResponseParser(HttpParser[RawResponseMessage]):

655 """Read response status line and headers.

656

657 BadStatusLine could be raised in case of any errors in status line.

658 Returns RawResponseMessage.

659 """

660

661 # Lax mode should only be enabled on response parser.

662 lax = not DEBUG

663

664 def feed_data(

665 self,

666 data: bytes,

667 SEP: Optional[_SEP] = None,

668 *args: Any,

669 **kwargs: Any,

670 ) -> Tuple[List[Tuple[RawResponseMessage, StreamReader]], bool, bytes]:

671 if SEP is None:

672 SEP = b"\r\n" if DEBUG else b"\n"

673 return super().feed_data(data, SEP, *args, **kwargs)

674

675 def parse_message(self, lines: List[bytes]) -> RawResponseMessage:

676 line = lines[0].decode("utf-8", "surrogateescape")

677 try:

678 version, status = line.split(maxsplit=1)

679 except ValueError:

680 raise BadStatusLine(line) from None

681

682 try:

683 status, reason = status.split(maxsplit=1)

684 except ValueError:

685 status = status.strip()

686 reason = ""

687

688 if len(reason) > self.max_line_size:

689 raise LineTooLong(

690 "Status line is too long", str(self.max_line_size), str(len(reason))

691 )

692

693 # version

694 match = VERSRE.fullmatch(version)

695 if match is None:

696 raise BadStatusLine(line)

697 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))

698

699 # The status code is a three-digit ASCII number, no padding

700 if len(status) != 3 or not DIGITS.fullmatch(status):

701 raise BadStatusLine(line)

702 status_i = int(status)

703

704 # read headers

705 (

706 headers,

707 raw_headers,

708 close,

709 compression,

710 upgrade,

711 chunked,

712 ) = self.parse_headers(lines)

713

714 if close is None:

715 if version_o <= HttpVersion10:

716 close = True

717 # https://www.rfc-editor.org/rfc/rfc9112.html#name-message-body-length

718 elif 100 <= status_i < 200 or status_i in {204, 304}:

719 close = False

720 elif hdrs.CONTENT_LENGTH in headers or hdrs.TRANSFER_ENCODING in headers:

721 close = False

722 else:

723 # https://www.rfc-editor.org/rfc/rfc9112.html#section-6.3-2.8

724 close = True

725

726 return RawResponseMessage(

727 version_o,

728 status_i,

729 reason.strip(),

730 headers,

731 raw_headers,

732 close,

733 compression,

734 upgrade,

735 chunked,

736 )

737

738 def _is_chunked_te(self, te: str) -> bool:

739 # https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.2

740 return te.rsplit(",", maxsplit=1)[-1].strip(" \t").lower() == "chunked"

741

742

743class HttpPayloadParser:

744 def __init__(

745 self,

746 payload: StreamReader,

747 length: Optional[int] = None,

748 chunked: bool = False,

749 compression: Optional[str] = None,

750 code: Optional[int] = None,

751 method: Optional[str] = None,

752 response_with_body: bool = True,

753 auto_decompress: bool = True,

754 lax: bool = False,

755 ) -> None:

756 self._length = 0

757 self._type = ParseState.PARSE_UNTIL_EOF

758 self._chunk = ChunkState.PARSE_CHUNKED_SIZE

759 self._chunk_size = 0

760 self._chunk_tail = b""

761 self._auto_decompress = auto_decompress

762 self._lax = lax

763 self.done = False

764

765 # payload decompression wrapper

766 if response_with_body and compression and self._auto_decompress:

767 real_payload: Union[StreamReader, DeflateBuffer] = DeflateBuffer(

768 payload, compression

769 )

770 else:

771 real_payload = payload

772

773 # payload parser

774 if not response_with_body:

775 # don't parse payload if it's not expected to be received

776 self._type = ParseState.PARSE_NONE

777 real_payload.feed_eof()

778 self.done = True

779 elif chunked:

780 self._type = ParseState.PARSE_CHUNKED

781 elif length is not None:

782 self._type = ParseState.PARSE_LENGTH

783 self._length = length

784 if self._length == 0:

785 real_payload.feed_eof()

786 self.done = True

787

788 self.payload = real_payload

789

790 def feed_eof(self) -> None:

791 if self._type == ParseState.PARSE_UNTIL_EOF:

792 self.payload.feed_eof()

793 elif self._type == ParseState.PARSE_LENGTH:

794 raise ContentLengthError(

795 "Not enough data to satisfy content length header."

796 )

797 elif self._type == ParseState.PARSE_CHUNKED:

798 raise TransferEncodingError(

799 "Not enough data to satisfy transfer length header."

800 )

801

802 def feed_data(

803 self, chunk: bytes, SEP: _SEP = b"\r\n", CHUNK_EXT: bytes = b";"

804 ) -> Tuple[bool, bytes]:

805 # Read specified amount of bytes

806 if self._type == ParseState.PARSE_LENGTH:

807 required = self._length

808 self._length = max(required - len(chunk), 0)

809 self.payload.feed_data(chunk[:required])

810 if self._length == 0:

811 self.payload.feed_eof()

812 return True, chunk[required:]

813

814 # Chunked transfer encoding parser

815 elif self._type == ParseState.PARSE_CHUNKED:

816 if self._chunk_tail:

817 chunk = self._chunk_tail + chunk

818 self._chunk_tail = b""

819

820 while chunk:

821 # read next chunk size

822 if self._chunk == ChunkState.PARSE_CHUNKED_SIZE:

823 pos = chunk.find(SEP)

824 if pos >= 0:

825 i = chunk.find(CHUNK_EXT, 0, pos)

826 if i >= 0:

827 size_b = chunk[:i] # strip chunk-extensions

828 # Verify no LF in the chunk-extension

829 if b"\n" in (ext := chunk[i:pos]):

830 exc = BadHttpMessage(

831 f"Unexpected LF in chunk-extension: {ext!r}"

832 )

833 set_exception(self.payload, exc)

834 raise exc

835 else:

836 size_b = chunk[:pos]

837

838 if self._lax: # Allow whitespace in lax mode.

839 size_b = size_b.strip()

840

841 if not re.fullmatch(HEXDIGITS, size_b):

842 exc = TransferEncodingError(

843 chunk[:pos].decode("ascii", "surrogateescape")

844 )

845 set_exception(self.payload, exc)

846 raise exc

847 size = int(bytes(size_b), 16)

848

849 chunk = chunk[pos + len(SEP) :]

850 if size == 0: # eof marker

851 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS

852 if self._lax and chunk.startswith(b"\r"):

853 chunk = chunk[1:]

854 else:

855 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK

856 self._chunk_size = size

857 self.payload.begin_http_chunk_receiving()

858 else:

859 self._chunk_tail = chunk

860 return False, b""

861

862 # read chunk and feed buffer

863 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK:

864 required = self._chunk_size

865 self._chunk_size = max(required - len(chunk), 0)

866 self.payload.feed_data(chunk[:required])

867

868 if self._chunk_size:

869 return False, b""

870 chunk = chunk[required:]

871 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF

872 self.payload.end_http_chunk_receiving()

873

874 # toss the CRLF at the end of the chunk

875 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF:

876 if self._lax and chunk.startswith(b"\r"):

877 chunk = chunk[1:]

878 if chunk[: len(SEP)] == SEP:

879 chunk = chunk[len(SEP) :]

880 self._chunk = ChunkState.PARSE_CHUNKED_SIZE

881 else:

882 self._chunk_tail = chunk

883 return False, b""

884

885 # if stream does not contain trailer, after 0\r\n

886 # we should get another \r\n otherwise

887 # trailers needs to be skipped until \r\n\r\n

888 if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS:

889 head = chunk[: len(SEP)]

890 if head == SEP:

891 # end of stream

892 self.payload.feed_eof()

893 return True, chunk[len(SEP) :]

894 # Both CR and LF, or only LF may not be received yet. It is

895 # expected that CRLF or LF will be shown at the very first

896 # byte next time, otherwise trailers should come. The last

897 # CRLF which marks the end of response might not be

898 # contained in the same TCP segment which delivered the

899 # size indicator.

900 if not head:

901 return False, b""

902 if head == SEP[:1]:

903 self._chunk_tail = head

904 return False, b""

905 self._chunk = ChunkState.PARSE_TRAILERS

906

907 # read and discard trailer up to the CRLF terminator

908 if self._chunk == ChunkState.PARSE_TRAILERS:

909 pos = chunk.find(SEP)

910 if pos >= 0:

911 chunk = chunk[pos + len(SEP) :]

912 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS

913 else:

914 self._chunk_tail = chunk

915 return False, b""

916

917 # Read all bytes until eof

918 elif self._type == ParseState.PARSE_UNTIL_EOF:

919 self.payload.feed_data(chunk)

920

921 return False, b""

922

923

924class DeflateBuffer:

925 """DeflateStream decompress stream and feed data into specified stream."""

926

927 def __init__(self, out: StreamReader, encoding: Optional[str]) -> None:

928 self.out = out

929 self.size = 0

930 self.encoding = encoding

931 self._started_decoding = False

932

933 self.decompressor: Union[BrotliDecompressor, ZLibDecompressor]

934 if encoding == "br":

935 if not HAS_BROTLI:

936 raise ContentEncodingError(

937 "Can not decode content-encoding: brotli (br). "

938 "Please install `Brotli`"

939 )

940 self.decompressor = BrotliDecompressor()

941 else:

942 self.decompressor = ZLibDecompressor(encoding=encoding)

943

944 def set_exception(

945 self,

946 exc: Union[Type[BaseException], BaseException],

947 exc_cause: BaseException = _EXC_SENTINEL,

948 ) -> None:

949 set_exception(self.out, exc, exc_cause)

950

951 def feed_data(self, chunk: bytes) -> None:

952 if not chunk:

953 return

954

955 self.size += len(chunk)

956

957 # RFC1950

958 # bits 0..3 = CM = 0b1000 = 8 = "deflate"

959 # bits 4..7 = CINFO = 1..7 = windows size.

960 if (

961 not self._started_decoding

962 and self.encoding == "deflate"

963 and chunk[0] & 0xF != 8

964 ):

965 # Change the decoder to decompress incorrectly compressed data

966 # Actually we should issue a warning about non-RFC-compliant data.

967 self.decompressor = ZLibDecompressor(

968 encoding=self.encoding, suppress_deflate_header=True

969 )

970

971 try:

972 chunk = self.decompressor.decompress_sync(chunk)

973 except Exception:

974 raise ContentEncodingError(

975 "Can not decode content-encoding: %s" % self.encoding

976 )

977

978 self._started_decoding = True

979

980 if chunk:

981 self.out.feed_data(chunk)

982

983 def feed_eof(self) -> None:

984 chunk = self.decompressor.flush()

985

986 if chunk or self.size > 0:

987 self.out.feed_data(chunk)

988 # decompressor is not brotli unless encoding is "br"

989 if self.encoding == "deflate" and not self.decompressor.eof: # type: ignore[union-attr]

990 raise ContentEncodingError("deflate")

991

992 self.out.feed_eof()

993

994 def begin_http_chunk_receiving(self) -> None:

995 self.out.begin_http_chunk_receiving()

996

997 def end_http_chunk_receiving(self) -> None:

998 self.out.end_http_chunk_receiving()

999

1000

1001HttpRequestParserPy = HttpRequestParser

1002HttpResponseParserPy = HttpResponseParser

1003RawRequestMessagePy = RawRequestMessage

1004RawResponseMessagePy = RawResponseMessage

1005

1006with suppress(ImportError):

1007 if not NO_EXTENSIONS:

1008 from ._http_parser import ( # type: ignore[import-not-found,no-redef]

1009 HttpRequestParser,

1010 HttpResponseParser,

1011 RawRequestMessage,

1012 RawResponseMessage,

1013 )

1014

1015 HttpRequestParserC = HttpRequestParser

1016 HttpResponseParserC = HttpResponseParser

1017 RawRequestMessageC = RawRequestMessage

1018 RawResponseMessageC = RawResponseMessage

Coverage for /pythoncovmergedfiles/medio/medio/src/aiohttp/aiohttp/http_parser.py: 59%

506 statements