Coverage for /pythoncovmergedfiles/medio/medio/src/aiohttp/aiohttp/http

1import abc

2import asyncio

3import re

4import string

5from contextlib import suppress

6from enum import IntEnum

7from typing import (

8 Any,

9 ClassVar,

10 Final,

11 Generic,

12 List,

13 Literal,

14 NamedTuple,

15 Optional,

16 Pattern,

17 Set,

18 Tuple,

19 Type,

20 TypeVar,

21 Union,

22)

24from multidict import CIMultiDict, CIMultiDictProxy, istr

25from yarl import URL

27from . import hdrs

28from .base_protocol import BaseProtocol

29from .compression_utils import HAS_BROTLI, BrotliDecompressor, ZLibDecompressor

30from .helpers import (

31 DEBUG,

32 NO_EXTENSIONS,

33 BaseTimerContext,

34 method_must_be_empty_body,

35 status_code_must_be_empty_body,

36)

37from .http_exceptions import (

38 BadHttpMessage,

39 BadStatusLine,

40 ContentEncodingError,

41 ContentLengthError,

42 InvalidHeader,

43 InvalidURLError,

44 LineTooLong,

45 TransferEncodingError,

46)

47from .http_writer import HttpVersion, HttpVersion10

48from .log import internal_logger

49from .streams import EMPTY_PAYLOAD, StreamReader

50from .typedefs import RawHeaders

52__all__ = (

53 "HeadersParser",

54 "HttpParser",

55 "HttpRequestParser",

56 "HttpResponseParser",

57 "RawRequestMessage",

58 "RawResponseMessage",

59)

61_SEP = Literal[b"\r\n", b"\n"]

63ASCIISET: Final[Set[str]] = set(string.printable)

65# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview

66# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens

67#

68# method = token

69# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /

70# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA

71# token = 1*tchar

72_TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`|~")

73TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+")

74VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII)

75DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII)

76HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+")

79class RawRequestMessage(NamedTuple):

80 method: str

81 path: str

82 version: HttpVersion

83 headers: CIMultiDictProxy[str]

84 raw_headers: RawHeaders

85 should_close: bool

86 compression: Optional[str]

87 upgrade: bool

88 chunked: bool

89 url: URL

92class RawResponseMessage(NamedTuple):

93 version: HttpVersion

94 code: int

95 reason: str

96 headers: CIMultiDictProxy[str]

97 raw_headers: RawHeaders

98 should_close: bool

99 compression: Optional[str]

100 upgrade: bool

101 chunked: bool

102

103

104_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage)

105

106

107class ParseState(IntEnum):

108 PARSE_NONE = 0

109 PARSE_LENGTH = 1

110 PARSE_CHUNKED = 2

111 PARSE_UNTIL_EOF = 3

112

113

114class ChunkState(IntEnum):

115 PARSE_CHUNKED_SIZE = 0

116 PARSE_CHUNKED_CHUNK = 1

117 PARSE_CHUNKED_CHUNK_EOF = 2

118 PARSE_MAYBE_TRAILERS = 3

119 PARSE_TRAILERS = 4

120

121

122class HeadersParser:

123 def __init__(

124 self,

125 max_line_size: int = 8190,

126 max_field_size: int = 8190,

127 ) -> None:

128 self.max_line_size = max_line_size

129 self.max_field_size = max_field_size

130

131 def parse_headers(

132 self, lines: List[bytes]

133 ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]:

134 headers: CIMultiDict[str] = CIMultiDict()

135 # note: "raw" does not mean inclusion of OWS before/after the field value

136 raw_headers = []

137

138 lines_idx = 1

139 line = lines[1]

140 line_count = len(lines)

141

142 while line:

143 # Parse initial header name : value pair.

144 try:

145 bname, bvalue = line.split(b":", 1)

146 except ValueError:

147 raise InvalidHeader(line) from None

148

149 if len(bname) == 0:

150 raise InvalidHeader(bname)

151

152 # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2

153 if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}

154 raise InvalidHeader(line)

155

156 bvalue = bvalue.lstrip(b" \t")

157 if len(bname) > self.max_field_size:

158 raise LineTooLong(

159 "request header name {}".format(

160 bname.decode("utf8", "backslashreplace")

161 ),

162 str(self.max_field_size),

163 str(len(bname)),

164 )

165 name = bname.decode("utf-8", "surrogateescape")

166 if not TOKENRE.fullmatch(name):

167 raise InvalidHeader(bname)

168

169 header_length = len(bvalue)

170

171 # next line

172 lines_idx += 1

173 line = lines[lines_idx]

174

175 # consume continuation lines

176 continuation = line and line[0] in (32, 9) # (' ', '\t')

177

178 # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding

179 if continuation:

180 bvalue_lst = [bvalue]

181 while continuation:

182 header_length += len(line)

183 if header_length > self.max_field_size:

184 raise LineTooLong(

185 "request header field {}".format(

186 bname.decode("utf8", "backslashreplace")

187 ),

188 str(self.max_field_size),

189 str(header_length),

190 )

191 bvalue_lst.append(line)

192

193 # next line

194 lines_idx += 1

195 if lines_idx < line_count:

196 line = lines[lines_idx]

197 if line:

198 continuation = line[0] in (32, 9) # (' ', '\t')

199 else:

200 line = b""

201 break

202 bvalue = b"".join(bvalue_lst)

203 else:

204 if header_length > self.max_field_size:

205 raise LineTooLong(

206 "request header field {}".format(

207 bname.decode("utf8", "backslashreplace")

208 ),

209 str(self.max_field_size),

210 str(header_length),

211 )

212

213 bvalue = bvalue.strip(b" \t")

214 value = bvalue.decode("utf-8", "surrogateescape")

215

216 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5

217 if "\n" in value or "\r" in value or "\x00" in value:

218 raise InvalidHeader(bvalue)

219

220 headers.add(name, value)

221 raw_headers.append((bname, bvalue))

222

223 return (CIMultiDictProxy(headers), tuple(raw_headers))

224

225

226class HttpParser(abc.ABC, Generic[_MsgT]):

227 lax: ClassVar[bool] = False

228

229 def __init__(

230 self,

231 protocol: BaseProtocol,

232 loop: asyncio.AbstractEventLoop,

233 limit: int,

234 max_line_size: int = 8190,

235 max_field_size: int = 8190,

236 timer: Optional[BaseTimerContext] = None,

237 code: Optional[int] = None,

238 method: Optional[str] = None,

239 readall: bool = False,

240 payload_exception: Optional[Type[BaseException]] = None,

241 response_with_body: bool = True,

242 read_until_eof: bool = False,

243 auto_decompress: bool = True,

244 ) -> None:

245 self.protocol = protocol

246 self.loop = loop

247 self.max_line_size = max_line_size

248 self.max_field_size = max_field_size

249 self.timer = timer

250 self.code = code

251 self.method = method

252 self.readall = readall

253 self.payload_exception = payload_exception

254 self.response_with_body = response_with_body

255 self.read_until_eof = read_until_eof

256

257 self._lines: List[bytes] = []

258 self._tail = b""

259 self._upgraded = False

260 self._payload = None

261 self._payload_parser: Optional[HttpPayloadParser] = None

262 self._auto_decompress = auto_decompress

263 self._limit = limit

264 self._headers_parser = HeadersParser(max_line_size, max_field_size)

265

266 @abc.abstractmethod

267 def parse_message(self, lines: List[bytes]) -> _MsgT:

268 pass

269

270 def feed_eof(self) -> Optional[_MsgT]:

271 if self._payload_parser is not None:

272 self._payload_parser.feed_eof()

273 self._payload_parser = None

274 else:

275 # try to extract partial message

276 if self._tail:

277 self._lines.append(self._tail)

278

279 if self._lines:

280 if self._lines[-1] != "\r\n":

281 self._lines.append(b"")

282 with suppress(Exception):

283 return self.parse_message(self._lines)

284 return None

285

286 def feed_data(

287 self,

288 data: bytes,

289 SEP: _SEP = b"\r\n",

290 EMPTY: bytes = b"",

291 CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH,

292 METH_CONNECT: str = hdrs.METH_CONNECT,

293 SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1,

294 ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]:

295 messages = []

296

297 if self._tail:

298 data, self._tail = self._tail + data, b""

299

300 data_len = len(data)

301 start_pos = 0

302 loop = self.loop

303

304 while start_pos < data_len:

305 # read HTTP message (request/response line + headers), \r\n\r\n

306 # and split by lines

307 if self._payload_parser is None and not self._upgraded:

308 pos = data.find(SEP, start_pos)

309 # consume \r\n

310 if pos == start_pos and not self._lines:

311 start_pos = pos + len(SEP)

312 continue

313

314 if pos >= start_pos:

315 # line found

316 line = data[start_pos:pos]

317 if SEP == b"\n": # For lax response parsing

318 line = line.rstrip(b"\r")

319 self._lines.append(line)

320 start_pos = pos + len(SEP)

321

322 # \r\n\r\n found

323 if self._lines[-1] == EMPTY:

324 try:

325 msg: _MsgT = self.parse_message(self._lines)

326 finally:

327 self._lines.clear()

328

329 def get_content_length() -> Optional[int]:

330 # payload length

331 length_hdr = msg.headers.get(CONTENT_LENGTH)

332 if length_hdr is None:

333 return None

334

335 # Shouldn't allow +/- or other number formats.

336 # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2

337 # msg.headers is already stripped of leading/trailing wsp

338 if not DIGITS.fullmatch(length_hdr):

339 raise InvalidHeader(CONTENT_LENGTH)

340

341 return int(length_hdr)

342

343 length = get_content_length()

344 # do not support old websocket spec

345 if SEC_WEBSOCKET_KEY1 in msg.headers:

346 raise InvalidHeader(SEC_WEBSOCKET_KEY1)

347

348 self._upgraded = msg.upgrade

349

350 method = getattr(msg, "method", self.method)

351 # code is only present on responses

352 code = getattr(msg, "code", 0)

353

354 assert self.protocol is not None

355 # calculate payload

356 empty_body = status_code_must_be_empty_body(code) or bool(

357 method and method_must_be_empty_body(method)

358 )

359 if not empty_body and (

360 (length is not None and length > 0)

361 or msg.chunked

362 and not msg.upgrade

363 ):

364 payload = StreamReader(

365 self.protocol,

366 timer=self.timer,

367 loop=loop,

368 limit=self._limit,

369 )

370 payload_parser = HttpPayloadParser(

371 payload,

372 length=length,

373 chunked=msg.chunked,

374 method=method,

375 compression=msg.compression,

376 code=self.code,

377 readall=self.readall,

378 response_with_body=self.response_with_body,

379 auto_decompress=self._auto_decompress,

380 lax=self.lax,

381 )

382 if not payload_parser.done:

383 self._payload_parser = payload_parser

384 elif method == METH_CONNECT:

385 assert isinstance(msg, RawRequestMessage)

386 payload = StreamReader(

387 self.protocol,

388 timer=self.timer,

389 loop=loop,

390 limit=self._limit,

391 )

392 self._upgraded = True

393 self._payload_parser = HttpPayloadParser(

394 payload,

395 method=msg.method,

396 compression=msg.compression,

397 readall=True,

398 auto_decompress=self._auto_decompress,

399 lax=self.lax,

400 )

401 elif not empty_body and length is None and self.read_until_eof:

402 payload = StreamReader(

403 self.protocol,

404 timer=self.timer,

405 loop=loop,

406 limit=self._limit,

407 )

408 payload_parser = HttpPayloadParser(

409 payload,

410 length=length,

411 chunked=msg.chunked,

412 method=method,

413 compression=msg.compression,

414 code=self.code,

415 readall=True,

416 response_with_body=self.response_with_body,

417 auto_decompress=self._auto_decompress,

418 lax=self.lax,

419 )

420 if not payload_parser.done:

421 self._payload_parser = payload_parser

422 else:

423 payload = EMPTY_PAYLOAD

424

425 messages.append((msg, payload))

426 else:

427 self._tail = data[start_pos:]

428 data = EMPTY

429 break

430

431 # no parser, just store

432 elif self._payload_parser is None and self._upgraded:

433 assert not self._lines

434 break

435

436 # feed payload

437 elif data and start_pos < data_len:

438 assert not self._lines

439 assert self._payload_parser is not None

440 try:

441 eof, data = self._payload_parser.feed_data(data[start_pos:], SEP)

442 except BaseException as exc:

443 if self.payload_exception is not None:

444 self._payload_parser.payload.set_exception(

445 self.payload_exception(str(exc))

446 )

447 else:

448 self._payload_parser.payload.set_exception(exc)

449

450 eof = True

451 data = b""

452

453 if eof:

454 start_pos = 0

455 data_len = len(data)

456 self._payload_parser = None

457 continue

458 else:

459 break

460

461 if data and start_pos < data_len:

462 data = data[start_pos:]

463 else:

464 data = EMPTY

465

466 return messages, self._upgraded, data

467

468 def parse_headers(

469 self, lines: List[bytes]

470 ) -> Tuple[

471 "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool

472 ]:

473 """Parses RFC 5322 headers from a stream.

474

475 Line continuations are supported. Returns list of header name

476 and value pairs. Header name is in upper case.

477 """

478 headers, raw_headers = self._headers_parser.parse_headers(lines)

479 close_conn = None

480 encoding = None

481 upgrade = False

482 chunked = False

483

484 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6

485 # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf

486 singletons = (

487 hdrs.CONTENT_LENGTH,

488 hdrs.CONTENT_LOCATION,

489 hdrs.CONTENT_RANGE,

490 hdrs.CONTENT_TYPE,

491 hdrs.ETAG,

492 hdrs.HOST,

493 hdrs.MAX_FORWARDS,

494 hdrs.SERVER,

495 hdrs.TRANSFER_ENCODING,

496 hdrs.USER_AGENT,

497 )

498 bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)

499 if bad_hdr is not None:

500 raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")

501

502 # keep-alive

503 conn = headers.get(hdrs.CONNECTION)

504 if conn:

505 v = conn.lower()

506 if v == "close":

507 close_conn = True

508 elif v == "keep-alive":

509 close_conn = False

510 # https://www.rfc-editor.org/rfc/rfc9110.html#name-101-switching-protocols

511 elif v == "upgrade" and headers.get(hdrs.UPGRADE):

512 upgrade = True

513

514 # encoding

515 enc = headers.get(hdrs.CONTENT_ENCODING)

516 if enc:

517 enc = enc.lower()

518 if enc in ("gzip", "deflate", "br"):

519 encoding = enc

520

521 # chunking

522 te = headers.get(hdrs.TRANSFER_ENCODING)

523 if te is not None:

524 if "chunked" == te.lower():

525 chunked = True

526 else:

527 raise BadHttpMessage("Request has invalid `Transfer-Encoding`")

528

529 if hdrs.CONTENT_LENGTH in headers:

530 raise BadHttpMessage(

531 "Transfer-Encoding can't be present with Content-Length",

532 )

533

534 return (headers, raw_headers, close_conn, encoding, upgrade, chunked)

535

536 def set_upgraded(self, val: bool) -> None:

537 """Set connection upgraded (to websocket) mode.

538

539 :param bool val: new state.

540 """

541 self._upgraded = val

542

543

544class HttpRequestParser(HttpParser[RawRequestMessage]):

545 """Read request status line.

546

547 Exception .http_exceptions.BadStatusLine

548 could be raised in case of any errors in status line.

549 Returns RawRequestMessage.

550 """

551

552 def parse_message(self, lines: List[bytes]) -> RawRequestMessage:

553 # request line

554 line = lines[0].decode("utf-8", "surrogateescape")

555 try:

556 method, path, version = line.split(" ", maxsplit=2)

557 except ValueError:

558 raise BadStatusLine(line) from None

559

560 if len(path) > self.max_line_size:

561 raise LineTooLong(

562 "Status line is too long", str(self.max_line_size), str(len(path))

563 )

564

565 # method

566 if not TOKENRE.fullmatch(method):

567 raise BadStatusLine(method)

568

569 # version

570 match = VERSRE.fullmatch(version)

571 if match is None:

572 raise BadStatusLine(line)

573 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))

574

575 if method == "CONNECT":

576 # authority-form,

577 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3

578 url = URL.build(authority=path, encoded=True)

579 elif path.startswith("/"):

580 # origin-form,

581 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1

582 path_part, _hash_separator, url_fragment = path.partition("#")

583 path_part, _question_mark_separator, qs_part = path_part.partition("?")

584

585 # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based

586 # NOTE: parser does, otherwise it results into the same

587 # NOTE: HTTP Request-Line input producing different

588 # NOTE: `yarl.URL()` objects

589 url = URL.build(

590 path=path_part,

591 query_string=qs_part,

592 fragment=url_fragment,

593 encoded=True,

594 )

595 elif path == "*" and method == "OPTIONS":

596 # asterisk-form,

597 url = URL(path, encoded=True)

598 else:

599 # absolute-form for proxy maybe,

600 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2

601 url = URL(path, encoded=True)

602 if url.scheme == "":

603 # not absolute-form

604 raise InvalidURLError(

605 path.encode(errors="surrogateescape").decode("latin1")

606 )

607

608 # read headers

609 (

610 headers,

611 raw_headers,

612 close,

613 compression,

614 upgrade,

615 chunked,

616 ) = self.parse_headers(lines)

617

618 if close is None: # then the headers weren't set in the request

619 if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close

620 close = True

621 else: # HTTP 1.1 must ask to close.

622 close = False

623

624 return RawRequestMessage(

625 method,

626 path,

627 version_o,

628 headers,

629 raw_headers,

630 close,

631 compression,

632 upgrade,

633 chunked,

634 url,

635 )

636

637

638class HttpResponseParser(HttpParser[RawResponseMessage]):

639 """Read response status line and headers.

640

641 BadStatusLine could be raised in case of any errors in status line.

642 Returns RawResponseMessage.

643 """

644

645 # Lax mode should only be enabled on response parser.

646 lax = not DEBUG

647

648 def feed_data(

649 self,

650 data: bytes,

651 SEP: Optional[_SEP] = None,

652 *args: Any,

653 **kwargs: Any,

654 ) -> Tuple[List[Tuple[RawResponseMessage, StreamReader]], bool, bytes]:

655 if SEP is None:

656 SEP = b"\r\n" if DEBUG else b"\n"

657 return super().feed_data(data, SEP, *args, **kwargs)

658

659 def parse_message(self, lines: List[bytes]) -> RawResponseMessage:

660 line = lines[0].decode("utf-8", "surrogateescape")

661 try:

662 version, status = line.split(maxsplit=1)

663 except ValueError:

664 raise BadStatusLine(line) from None

665

666 try:

667 status, reason = status.split(maxsplit=1)

668 except ValueError:

669 status = status.strip()

670 reason = ""

671

672 if len(reason) > self.max_line_size:

673 raise LineTooLong(

674 "Status line is too long", str(self.max_line_size), str(len(reason))

675 )

676

677 # version

678 match = VERSRE.fullmatch(version)

679 if match is None:

680 raise BadStatusLine(line)

681 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))

682

683 # The status code is a three-digit ASCII number, no padding

684 if len(status) != 3 or not DIGITS.fullmatch(status):

685 raise BadStatusLine(line)

686 status_i = int(status)

687

688 # read headers

689 (

690 headers,

691 raw_headers,

692 close,

693 compression,

694 upgrade,

695 chunked,

696 ) = self.parse_headers(lines)

697

698 if close is None:

699 close = version_o <= HttpVersion10

700

701 return RawResponseMessage(

702 version_o,

703 status_i,

704 reason.strip(),

705 headers,

706 raw_headers,

707 close,

708 compression,

709 upgrade,

710 chunked,

711 )

712

713

714class HttpPayloadParser:

715 def __init__(

716 self,

717 payload: StreamReader,

718 length: Optional[int] = None,

719 chunked: bool = False,

720 compression: Optional[str] = None,

721 code: Optional[int] = None,

722 method: Optional[str] = None,

723 readall: bool = False,

724 response_with_body: bool = True,

725 auto_decompress: bool = True,

726 lax: bool = False,

727 ) -> None:

728 self._length = 0

729 self._type = ParseState.PARSE_NONE

730 self._chunk = ChunkState.PARSE_CHUNKED_SIZE

731 self._chunk_size = 0

732 self._chunk_tail = b""

733 self._auto_decompress = auto_decompress

734 self._lax = lax

735 self.done = False

736

737 # payload decompression wrapper

738 if response_with_body and compression and self._auto_decompress:

739 real_payload: Union[StreamReader, DeflateBuffer] = DeflateBuffer(

740 payload, compression

741 )

742 else:

743 real_payload = payload

744

745 # payload parser

746 if not response_with_body:

747 # don't parse payload if it's not expected to be received

748 self._type = ParseState.PARSE_NONE

749 real_payload.feed_eof()

750 self.done = True

751

752 elif chunked:

753 self._type = ParseState.PARSE_CHUNKED

754 elif length is not None:

755 self._type = ParseState.PARSE_LENGTH

756 self._length = length

757 if self._length == 0:

758 real_payload.feed_eof()

759 self.done = True

760 else:

761 if readall and code != 204:

762 self._type = ParseState.PARSE_UNTIL_EOF

763 elif method in ("PUT", "POST"):

764 internal_logger.warning( # pragma: no cover

765 "Content-Length or Transfer-Encoding header is required"

766 )

767 self._type = ParseState.PARSE_NONE

768 real_payload.feed_eof()

769 self.done = True

770

771 self.payload = real_payload

772

773 def feed_eof(self) -> None:

774 if self._type == ParseState.PARSE_UNTIL_EOF:

775 self.payload.feed_eof()

776 elif self._type == ParseState.PARSE_LENGTH:

777 raise ContentLengthError(

778 "Not enough data for satisfy content length header."

779 )

780 elif self._type == ParseState.PARSE_CHUNKED:

781 raise TransferEncodingError(

782 "Not enough data for satisfy transfer length header."

783 )

784

785 def feed_data(

786 self, chunk: bytes, SEP: _SEP = b"\r\n", CHUNK_EXT: bytes = b";"

787 ) -> Tuple[bool, bytes]:

788 # Read specified amount of bytes

789 if self._type == ParseState.PARSE_LENGTH:

790 required = self._length

791 chunk_len = len(chunk)

792

793 if required >= chunk_len:

794 self._length = required - chunk_len

795 self.payload.feed_data(chunk, chunk_len)

796 if self._length == 0:

797 self.payload.feed_eof()

798 return True, b""

799 else:

800 self._length = 0

801 self.payload.feed_data(chunk[:required], required)

802 self.payload.feed_eof()

803 return True, chunk[required:]

804

805 # Chunked transfer encoding parser

806 elif self._type == ParseState.PARSE_CHUNKED:

807 if self._chunk_tail:

808 chunk = self._chunk_tail + chunk

809 self._chunk_tail = b""

810

811 while chunk:

812 # read next chunk size

813 if self._chunk == ChunkState.PARSE_CHUNKED_SIZE:

814 pos = chunk.find(SEP)

815 if pos >= 0:

816 i = chunk.find(CHUNK_EXT, 0, pos)

817 if i >= 0:

818 size_b = chunk[:i] # strip chunk-extensions

819 else:

820 size_b = chunk[:pos]

821

822 if self._lax: # Allow whitespace in lax mode.

823 size_b = size_b.strip()

824

825 if not re.fullmatch(HEXDIGITS, size_b):

826 exc = TransferEncodingError(

827 chunk[:pos].decode("ascii", "surrogateescape")

828 )

829 self.payload.set_exception(exc)

830 raise exc

831 size = int(bytes(size_b), 16)

832

833 chunk = chunk[pos + len(SEP) :]

834 if size == 0: # eof marker

835 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS

836 if self._lax and chunk.startswith(b"\r"):

837 chunk = chunk[1:]

838 else:

839 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK

840 self._chunk_size = size

841 self.payload.begin_http_chunk_receiving()

842 else:

843 self._chunk_tail = chunk

844 return False, b""

845

846 # read chunk and feed buffer

847 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK:

848 required = self._chunk_size

849 chunk_len = len(chunk)

850

851 if required > chunk_len:

852 self._chunk_size = required - chunk_len

853 self.payload.feed_data(chunk, chunk_len)

854 return False, b""

855 else:

856 self._chunk_size = 0

857 self.payload.feed_data(chunk[:required], required)

858 chunk = chunk[required:]

859 if self._lax and chunk.startswith(b"\r"):

860 chunk = chunk[1:]

861 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF

862 self.payload.end_http_chunk_receiving()

863

864 # toss the CRLF at the end of the chunk

865 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF:

866 if chunk[: len(SEP)] == SEP:

867 chunk = chunk[len(SEP) :]

868 self._chunk = ChunkState.PARSE_CHUNKED_SIZE

869 else:

870 self._chunk_tail = chunk

871 return False, b""

872

873 # if stream does not contain trailer, after 0\r\n

874 # we should get another \r\n otherwise

875 # trailers needs to be skipped until \r\n\r\n

876 if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS:

877 head = chunk[: len(SEP)]

878 if head == SEP:

879 # end of stream

880 self.payload.feed_eof()

881 return True, chunk[len(SEP) :]

882 # Both CR and LF, or only LF may not be received yet. It is

883 # expected that CRLF or LF will be shown at the very first

884 # byte next time, otherwise trailers should come. The last

885 # CRLF which marks the end of response might not be

886 # contained in the same TCP segment which delivered the

887 # size indicator.

888 if not head:

889 return False, b""

890 if head == SEP[:1]:

891 self._chunk_tail = head

892 return False, b""

893 self._chunk = ChunkState.PARSE_TRAILERS

894

895 # read and discard trailer up to the CRLF terminator

896 if self._chunk == ChunkState.PARSE_TRAILERS:

897 pos = chunk.find(SEP)

898 if pos >= 0:

899 chunk = chunk[pos + len(SEP) :]

900 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS

901 else:

902 self._chunk_tail = chunk

903 return False, b""

904

905 # Read all bytes until eof

906 elif self._type == ParseState.PARSE_UNTIL_EOF:

907 self.payload.feed_data(chunk, len(chunk))

908

909 return False, b""

910

911

912class DeflateBuffer:

913 """DeflateStream decompress stream and feed data into specified stream."""

914

915 def __init__(self, out: StreamReader, encoding: Optional[str]) -> None:

916 self.out = out

917 self.size = 0

918 self.encoding = encoding

919 self._started_decoding = False

920

921 self.decompressor: Union[BrotliDecompressor, ZLibDecompressor]

922 if encoding == "br":

923 if not HAS_BROTLI: # pragma: no cover

924 raise ContentEncodingError(

925 "Can not decode content-encoding: brotli (br). "

926 "Please install `Brotli`"

927 )

928 self.decompressor = BrotliDecompressor()

929 else:

930 self.decompressor = ZLibDecompressor(encoding=encoding)

931

932 def set_exception(self, exc: BaseException) -> None:

933 self.out.set_exception(exc)

934

935 def feed_data(self, chunk: bytes, size: int) -> None:

936 if not size:

937 return

938

939 self.size += size

940

941 # RFC1950

942 # bits 0..3 = CM = 0b1000 = 8 = "deflate"

943 # bits 4..7 = CINFO = 1..7 = windows size.

944 if (

945 not self._started_decoding

946 and self.encoding == "deflate"

947 and chunk[0] & 0xF != 8

948 ):

949 # Change the decoder to decompress incorrectly compressed data

950 # Actually we should issue a warning about non-RFC-compliant data.

951 self.decompressor = ZLibDecompressor(

952 encoding=self.encoding, suppress_deflate_header=True

953 )

954

955 try:

956 chunk = self.decompressor.decompress_sync(chunk)

957 except Exception:

958 raise ContentEncodingError(

959 "Can not decode content-encoding: %s" % self.encoding

960 )

961

962 self._started_decoding = True

963

964 if chunk:

965 self.out.feed_data(chunk, len(chunk))

966

967 def feed_eof(self) -> None:

968 chunk = self.decompressor.flush()

969

970 if chunk or self.size > 0:

971 self.out.feed_data(chunk, len(chunk))

972 # decompressor is not brotli unless encoding is "br"

973 if self.encoding == "deflate" and not self.decompressor.eof: # type: ignore[union-attr]

974 raise ContentEncodingError("deflate")

975

976 self.out.feed_eof()

977

978 def begin_http_chunk_receiving(self) -> None:

979 self.out.begin_http_chunk_receiving()

980

981 def end_http_chunk_receiving(self) -> None:

982 self.out.end_http_chunk_receiving()

983

984

985HttpRequestParserPy = HttpRequestParser

986HttpResponseParserPy = HttpResponseParser

987RawRequestMessagePy = RawRequestMessage

988RawResponseMessagePy = RawResponseMessage

989

990try:

991 if not NO_EXTENSIONS:

992 from ._http_parser import ( # type: ignore[import-not-found,no-redef]

993 HttpRequestParser,

994 HttpResponseParser,

995 RawRequestMessage,

996 RawResponseMessage,

997 )

998

999 HttpRequestParserC = HttpRequestParser

1000 HttpResponseParserC = HttpResponseParser

1001 RawRequestMessageC = RawRequestMessage

1002 RawResponseMessageC = RawResponseMessage

1003except ImportError: # pragma: no cover

1004 pass

Coverage for /pythoncovmergedfiles/medio/medio/src/aiohttp/aiohttp/http_parser.py: 84%

497 statements