Coverage for /pythoncovmergedfiles/medio/medio/src/aiohttp/aiohttp/http

1import abc

2import asyncio

3import re

4import string

5from contextlib import suppress

6from enum import IntEnum

7from typing import (

8 Any,

9 ClassVar,

10 Final,

11 Generic,

12 List,

13 Literal,

14 NamedTuple,

15 Optional,

16 Pattern,

17 Set,

18 Tuple,

19 Type,

20 TypeVar,

21 Union,

22)

24from multidict import CIMultiDict, CIMultiDictProxy, istr

25from yarl import URL

27from . import hdrs

28from .base_protocol import BaseProtocol

29from .compression_utils import HAS_BROTLI, BrotliDecompressor, ZLibDecompressor

30from .helpers import (

31 DEBUG,

32 NO_EXTENSIONS,

33 BaseTimerContext,

34 method_must_be_empty_body,

35 status_code_must_be_empty_body,

36)

37from .http_exceptions import (

38 BadHttpMessage,

39 BadStatusLine,

40 ContentEncodingError,

41 ContentLengthError,

42 InvalidHeader,

43 InvalidURLError,

44 LineTooLong,

45 TransferEncodingError,

46)

47from .http_writer import HttpVersion, HttpVersion10

48from .log import internal_logger

49from .streams import EMPTY_PAYLOAD, StreamReader

50from .typedefs import RawHeaders

52__all__ = (

53 "HeadersParser",

54 "HttpParser",

55 "HttpRequestParser",

56 "HttpResponseParser",

57 "RawRequestMessage",

58 "RawResponseMessage",

59)

61_SEP = Literal[b"\r\n", b"\n"]

63ASCIISET: Final[Set[str]] = set(string.printable)

65# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview

66# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens

67#

68# method = token

69# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /

70# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA

71# token = 1*tchar

72METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")

73VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)")

74HDRRE: Final[Pattern[bytes]] = re.compile(

75 rb"[\x00-\x1F\x7F-\xFF()<>@,;:\[\]={} \t\"\\]"

76)

77HEXDIGIT = re.compile(rb"[0-9a-fA-F]+")

80class RawRequestMessage(NamedTuple):

81 method: str

82 path: str

83 version: HttpVersion

84 headers: CIMultiDictProxy[str]

85 raw_headers: RawHeaders

86 should_close: bool

87 compression: Optional[str]

88 upgrade: bool

89 chunked: bool

90 url: URL

93class RawResponseMessage(NamedTuple):

94 version: HttpVersion

95 code: int

96 reason: str

97 headers: CIMultiDictProxy[str]

98 raw_headers: RawHeaders

99 should_close: bool

100 compression: Optional[str]

101 upgrade: bool

102 chunked: bool

103

104

105_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage)

106

107

108class ParseState(IntEnum):

109 PARSE_NONE = 0

110 PARSE_LENGTH = 1

111 PARSE_CHUNKED = 2

112 PARSE_UNTIL_EOF = 3

113

114

115class ChunkState(IntEnum):

116 PARSE_CHUNKED_SIZE = 0

117 PARSE_CHUNKED_CHUNK = 1

118 PARSE_CHUNKED_CHUNK_EOF = 2

119 PARSE_MAYBE_TRAILERS = 3

120 PARSE_TRAILERS = 4

121

122

123class HeadersParser:

124 def __init__(

125 self,

126 max_line_size: int = 8190,

127 max_field_size: int = 8190,

128 ) -> None:

129 self.max_line_size = max_line_size

130 self.max_field_size = max_field_size

131

132 def parse_headers(

133 self, lines: List[bytes]

134 ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]:

135 headers: CIMultiDict[str] = CIMultiDict()

136 raw_headers = []

137

138 lines_idx = 1

139 line = lines[1]

140 line_count = len(lines)

141

142 while line:

143 # Parse initial header name : value pair.

144 try:

145 bname, bvalue = line.split(b":", 1)

146 except ValueError:

147 raise InvalidHeader(line) from None

148

149 # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2

150 if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}

151 raise InvalidHeader(line)

152

153 bvalue = bvalue.lstrip(b" \t")

154 if HDRRE.search(bname):

155 raise InvalidHeader(bname)

156 if len(bname) > self.max_field_size:

157 raise LineTooLong(

158 "request header name {}".format(

159 bname.decode("utf8", "backslashreplace")

160 ),

161 str(self.max_field_size),

162 str(len(bname)),

163 )

164

165 header_length = len(bvalue)

166

167 # next line

168 lines_idx += 1

169 line = lines[lines_idx]

170

171 # consume continuation lines

172 continuation = line and line[0] in (32, 9) # (' ', '\t')

173

174 # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding

175 if continuation:

176 bvalue_lst = [bvalue]

177 while continuation:

178 header_length += len(line)

179 if header_length > self.max_field_size:

180 raise LineTooLong(

181 "request header field {}".format(

182 bname.decode("utf8", "backslashreplace")

183 ),

184 str(self.max_field_size),

185 str(header_length),

186 )

187 bvalue_lst.append(line)

188

189 # next line

190 lines_idx += 1

191 if lines_idx < line_count:

192 line = lines[lines_idx]

193 if line:

194 continuation = line[0] in (32, 9) # (' ', '\t')

195 else:

196 line = b""

197 break

198 bvalue = b"".join(bvalue_lst)

199 else:

200 if header_length > self.max_field_size:

201 raise LineTooLong(

202 "request header field {}".format(

203 bname.decode("utf8", "backslashreplace")

204 ),

205 str(self.max_field_size),

206 str(header_length),

207 )

208

209 bvalue = bvalue.strip(b" \t")

210 name = bname.decode("utf-8", "surrogateescape")

211 value = bvalue.decode("utf-8", "surrogateescape")

212

213 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5

214 if "\n" in value or "\r" in value or "\x00" in value:

215 raise InvalidHeader(bvalue)

216

217 headers.add(name, value)

218 raw_headers.append((bname, bvalue))

219

220 return (CIMultiDictProxy(headers), tuple(raw_headers))

221

222

223class HttpParser(abc.ABC, Generic[_MsgT]):

224 lax: ClassVar[bool] = False

225

226 def __init__(

227 self,

228 protocol: BaseProtocol,

229 loop: asyncio.AbstractEventLoop,

230 limit: int,

231 max_line_size: int = 8190,

232 max_field_size: int = 8190,

233 timer: Optional[BaseTimerContext] = None,

234 code: Optional[int] = None,

235 method: Optional[str] = None,

236 readall: bool = False,

237 payload_exception: Optional[Type[BaseException]] = None,

238 response_with_body: bool = True,

239 read_until_eof: bool = False,

240 auto_decompress: bool = True,

241 ) -> None:

242 self.protocol = protocol

243 self.loop = loop

244 self.max_line_size = max_line_size

245 self.max_field_size = max_field_size

246 self.timer = timer

247 self.code = code

248 self.method = method

249 self.readall = readall

250 self.payload_exception = payload_exception

251 self.response_with_body = response_with_body

252 self.read_until_eof = read_until_eof

253

254 self._lines: List[bytes] = []

255 self._tail = b""

256 self._upgraded = False

257 self._payload = None

258 self._payload_parser: Optional[HttpPayloadParser] = None

259 self._auto_decompress = auto_decompress

260 self._limit = limit

261 self._headers_parser = HeadersParser(max_line_size, max_field_size)

262

263 @abc.abstractmethod

264 def parse_message(self, lines: List[bytes]) -> _MsgT:

265 pass

266

267 def feed_eof(self) -> Optional[_MsgT]:

268 if self._payload_parser is not None:

269 self._payload_parser.feed_eof()

270 self._payload_parser = None

271 else:

272 # try to extract partial message

273 if self._tail:

274 self._lines.append(self._tail)

275

276 if self._lines:

277 if self._lines[-1] != "\r\n":

278 self._lines.append(b"")

279 with suppress(Exception):

280 return self.parse_message(self._lines)

281 return None

282

283 def feed_data(

284 self,

285 data: bytes,

286 SEP: _SEP = b"\r\n",

287 EMPTY: bytes = b"",

288 CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH,

289 METH_CONNECT: str = hdrs.METH_CONNECT,

290 SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1,

291 ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]:

292 messages = []

293

294 if self._tail:

295 data, self._tail = self._tail + data, b""

296

297 data_len = len(data)

298 start_pos = 0

299 loop = self.loop

300

301 while start_pos < data_len:

302 # read HTTP message (request/response line + headers), \r\n\r\n

303 # and split by lines

304 if self._payload_parser is None and not self._upgraded:

305 pos = data.find(SEP, start_pos)

306 # consume \r\n

307 if pos == start_pos and not self._lines:

308 start_pos = pos + len(SEP)

309 continue

310

311 if pos >= start_pos:

312 # line found

313 line = data[start_pos:pos]

314 if SEP == b"\n": # For lax response parsing

315 line = line.rstrip(b"\r")

316 self._lines.append(line)

317 start_pos = pos + len(SEP)

318

319 # \r\n\r\n found

320 if self._lines[-1] == EMPTY:

321 try:

322 msg: _MsgT = self.parse_message(self._lines)

323 finally:

324 self._lines.clear()

325

326 def get_content_length() -> Optional[int]:

327 # payload length

328 length_hdr = msg.headers.get(CONTENT_LENGTH)

329 if length_hdr is None:

330 return None

331

332 # Shouldn't allow +/- or other number formats.

333 # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2

334 if not length_hdr.strip(" \t").isdecimal():

335 raise InvalidHeader(CONTENT_LENGTH)

336

337 return int(length_hdr)

338

339 length = get_content_length()

340 # do not support old websocket spec

341 if SEC_WEBSOCKET_KEY1 in msg.headers:

342 raise InvalidHeader(SEC_WEBSOCKET_KEY1)

343

344 self._upgraded = msg.upgrade

345

346 method = getattr(msg, "method", self.method)

347 # code is only present on responses

348 code = getattr(msg, "code", 0)

349

350 assert self.protocol is not None

351 # calculate payload

352 empty_body = status_code_must_be_empty_body(code) or bool(

353 method and method_must_be_empty_body(method)

354 )

355 if not empty_body and (

356 (length is not None and length > 0)

357 or msg.chunked

358 and not msg.upgrade

359 ):

360 payload = StreamReader(

361 self.protocol,

362 timer=self.timer,

363 loop=loop,

364 limit=self._limit,

365 )

366 payload_parser = HttpPayloadParser(

367 payload,

368 length=length,

369 chunked=msg.chunked,

370 method=method,

371 compression=msg.compression,

372 code=self.code,

373 readall=self.readall,

374 response_with_body=self.response_with_body,

375 auto_decompress=self._auto_decompress,

376 lax=self.lax,

377 )

378 if not payload_parser.done:

379 self._payload_parser = payload_parser

380 elif method == METH_CONNECT:

381 assert isinstance(msg, RawRequestMessage)

382 payload = StreamReader(

383 self.protocol,

384 timer=self.timer,

385 loop=loop,

386 limit=self._limit,

387 )

388 self._upgraded = True

389 self._payload_parser = HttpPayloadParser(

390 payload,

391 method=msg.method,

392 compression=msg.compression,

393 readall=True,

394 auto_decompress=self._auto_decompress,

395 lax=self.lax,

396 )

397 elif not empty_body and length is None and self.read_until_eof:

398 payload = StreamReader(

399 self.protocol,

400 timer=self.timer,

401 loop=loop,

402 limit=self._limit,

403 )

404 payload_parser = HttpPayloadParser(

405 payload,

406 length=length,

407 chunked=msg.chunked,

408 method=method,

409 compression=msg.compression,

410 code=self.code,

411 readall=True,

412 response_with_body=self.response_with_body,

413 auto_decompress=self._auto_decompress,

414 lax=self.lax,

415 )

416 if not payload_parser.done:

417 self._payload_parser = payload_parser

418 else:

419 payload = EMPTY_PAYLOAD

420

421 messages.append((msg, payload))

422 else:

423 self._tail = data[start_pos:]

424 data = EMPTY

425 break

426

427 # no parser, just store

428 elif self._payload_parser is None and self._upgraded:

429 assert not self._lines

430 break

431

432 # feed payload

433 elif data and start_pos < data_len:

434 assert not self._lines

435 assert self._payload_parser is not None

436 try:

437 eof, data = self._payload_parser.feed_data(data[start_pos:], SEP)

438 except BaseException as exc:

439 if self.payload_exception is not None:

440 self._payload_parser.payload.set_exception(

441 self.payload_exception(str(exc))

442 )

443 else:

444 self._payload_parser.payload.set_exception(exc)

445

446 eof = True

447 data = b""

448

449 if eof:

450 start_pos = 0

451 data_len = len(data)

452 self._payload_parser = None

453 continue

454 else:

455 break

456

457 if data and start_pos < data_len:

458 data = data[start_pos:]

459 else:

460 data = EMPTY

461

462 return messages, self._upgraded, data

463

464 def parse_headers(

465 self, lines: List[bytes]

466 ) -> Tuple[

467 "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool

468 ]:

469 """Parses RFC 5322 headers from a stream.

470

471 Line continuations are supported. Returns list of header name

472 and value pairs. Header name is in upper case.

473 """

474 headers, raw_headers = self._headers_parser.parse_headers(lines)

475 close_conn = None

476 encoding = None

477 upgrade = False

478 chunked = False

479

480 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6

481 # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf

482 singletons = (

483 hdrs.CONTENT_LENGTH,

484 hdrs.CONTENT_LOCATION,

485 hdrs.CONTENT_RANGE,

486 hdrs.CONTENT_TYPE,

487 hdrs.ETAG,

488 hdrs.HOST,

489 hdrs.MAX_FORWARDS,

490 hdrs.SERVER,

491 hdrs.TRANSFER_ENCODING,

492 hdrs.USER_AGENT,

493 )

494 bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)

495 if bad_hdr is not None:

496 raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")

497

498 # keep-alive

499 conn = headers.get(hdrs.CONNECTION)

500 if conn:

501 v = conn.lower()

502 if v == "close":

503 close_conn = True

504 elif v == "keep-alive":

505 close_conn = False

506 # https://www.rfc-editor.org/rfc/rfc9110.html#name-101-switching-protocols

507 elif v == "upgrade" and headers.get(hdrs.UPGRADE):

508 upgrade = True

509

510 # encoding

511 enc = headers.get(hdrs.CONTENT_ENCODING)

512 if enc:

513 enc = enc.lower()

514 if enc in ("gzip", "deflate", "br"):

515 encoding = enc

516

517 # chunking

518 te = headers.get(hdrs.TRANSFER_ENCODING)

519 if te is not None:

520 if "chunked" == te.lower():

521 chunked = True

522 else:

523 raise BadHttpMessage("Request has invalid `Transfer-Encoding`")

524

525 if hdrs.CONTENT_LENGTH in headers:

526 raise BadHttpMessage(

527 "Transfer-Encoding can't be present with Content-Length",

528 )

529

530 return (headers, raw_headers, close_conn, encoding, upgrade, chunked)

531

532 def set_upgraded(self, val: bool) -> None:

533 """Set connection upgraded (to websocket) mode.

534

535 :param bool val: new state.

536 """

537 self._upgraded = val

538

539

540class HttpRequestParser(HttpParser[RawRequestMessage]):

541 """Read request status line.

542

543 Exception .http_exceptions.BadStatusLine

544 could be raised in case of any errors in status line.

545 Returns RawRequestMessage.

546 """

547

548 def parse_message(self, lines: List[bytes]) -> RawRequestMessage:

549 # request line

550 line = lines[0].decode("utf-8", "surrogateescape")

551 try:

552 method, path, version = line.split(" ", maxsplit=2)

553 except ValueError:

554 raise BadStatusLine(line) from None

555

556 if len(path) > self.max_line_size:

557 raise LineTooLong(

558 "Status line is too long", str(self.max_line_size), str(len(path))

559 )

560

561 # method

562 if not METHRE.fullmatch(method):

563 raise BadStatusLine(method)

564

565 # version

566 match = VERSRE.fullmatch(version)

567 if match is None:

568 raise BadStatusLine(line)

569 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))

570

571 if method == "CONNECT":

572 # authority-form,

573 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3

574 url = URL.build(authority=path, encoded=True)

575 elif path.startswith("/"):

576 # origin-form,

577 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1

578 path_part, _hash_separator, url_fragment = path.partition("#")

579 path_part, _question_mark_separator, qs_part = path_part.partition("?")

580

581 # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based

582 # NOTE: parser does, otherwise it results into the same

583 # NOTE: HTTP Request-Line input producing different

584 # NOTE: `yarl.URL()` objects

585 url = URL.build(

586 path=path_part,

587 query_string=qs_part,

588 fragment=url_fragment,

589 encoded=True,

590 )

591 elif path == "*" and method == "OPTIONS":

592 # asterisk-form,

593 url = URL(path, encoded=True)

594 else:

595 # absolute-form for proxy maybe,

596 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2

597 url = URL(path, encoded=True)

598 if url.scheme == "":

599 # not absolute-form

600 raise InvalidURLError(

601 path.encode(errors="surrogateescape").decode("latin1")

602 )

603

604 # read headers

605 (

606 headers,

607 raw_headers,

608 close,

609 compression,

610 upgrade,

611 chunked,

612 ) = self.parse_headers(lines)

613

614 if close is None: # then the headers weren't set in the request

615 if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close

616 close = True

617 else: # HTTP 1.1 must ask to close.

618 close = False

619

620 return RawRequestMessage(

621 method,

622 path,

623 version_o,

624 headers,

625 raw_headers,

626 close,

627 compression,

628 upgrade,

629 chunked,

630 url,

631 )

632

633

634class HttpResponseParser(HttpParser[RawResponseMessage]):

635 """Read response status line and headers.

636

637 BadStatusLine could be raised in case of any errors in status line.

638 Returns RawResponseMessage.

639 """

640

641 # Lax mode should only be enabled on response parser.

642 lax = not DEBUG

643

644 def feed_data(

645 self,

646 data: bytes,

647 SEP: Optional[_SEP] = None,

648 *args: Any,

649 **kwargs: Any,

650 ) -> Tuple[List[Tuple[RawResponseMessage, StreamReader]], bool, bytes]:

651 if SEP is None:

652 SEP = b"\r\n" if DEBUG else b"\n"

653 return super().feed_data(data, SEP, *args, **kwargs)

654

655 def parse_message(self, lines: List[bytes]) -> RawResponseMessage:

656 line = lines[0].decode("utf-8", "surrogateescape")

657 try:

658 version, status = line.split(maxsplit=1)

659 except ValueError:

660 raise BadStatusLine(line) from None

661

662 try:

663 status, reason = status.split(maxsplit=1)

664 except ValueError:

665 status = status.strip()

666 reason = ""

667

668 if len(reason) > self.max_line_size:

669 raise LineTooLong(

670 "Status line is too long", str(self.max_line_size), str(len(reason))

671 )

672

673 # version

674 match = VERSRE.fullmatch(version)

675 if match is None:

676 raise BadStatusLine(line)

677 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))

678

679 # The status code is a three-digit number

680 if len(status) != 3 or not status.isdecimal():

681 raise BadStatusLine(line)

682 status_i = int(status)

683

684 # read headers

685 (

686 headers,

687 raw_headers,

688 close,

689 compression,

690 upgrade,

691 chunked,

692 ) = self.parse_headers(lines)

693

694 if close is None:

695 close = version_o <= HttpVersion10

696

697 return RawResponseMessage(

698 version_o,

699 status_i,

700 reason.strip(),

701 headers,

702 raw_headers,

703 close,

704 compression,

705 upgrade,

706 chunked,

707 )

708

709

710class HttpPayloadParser:

711 def __init__(

712 self,

713 payload: StreamReader,

714 length: Optional[int] = None,

715 chunked: bool = False,

716 compression: Optional[str] = None,

717 code: Optional[int] = None,

718 method: Optional[str] = None,

719 readall: bool = False,

720 response_with_body: bool = True,

721 auto_decompress: bool = True,

722 lax: bool = False,

723 ) -> None:

724 self._length = 0

725 self._type = ParseState.PARSE_NONE

726 self._chunk = ChunkState.PARSE_CHUNKED_SIZE

727 self._chunk_size = 0

728 self._chunk_tail = b""

729 self._auto_decompress = auto_decompress

730 self._lax = lax

731 self.done = False

732

733 # payload decompression wrapper

734 if response_with_body and compression and self._auto_decompress:

735 real_payload: Union[StreamReader, DeflateBuffer] = DeflateBuffer(

736 payload, compression

737 )

738 else:

739 real_payload = payload

740

741 # payload parser

742 if not response_with_body:

743 # don't parse payload if it's not expected to be received

744 self._type = ParseState.PARSE_NONE

745 real_payload.feed_eof()

746 self.done = True

747

748 elif chunked:

749 self._type = ParseState.PARSE_CHUNKED

750 elif length is not None:

751 self._type = ParseState.PARSE_LENGTH

752 self._length = length

753 if self._length == 0:

754 real_payload.feed_eof()

755 self.done = True

756 else:

757 if readall and code != 204:

758 self._type = ParseState.PARSE_UNTIL_EOF

759 elif method in ("PUT", "POST"):

760 internal_logger.warning( # pragma: no cover

761 "Content-Length or Transfer-Encoding header is required"

762 )

763 self._type = ParseState.PARSE_NONE

764 real_payload.feed_eof()

765 self.done = True

766

767 self.payload = real_payload

768

769 def feed_eof(self) -> None:

770 if self._type == ParseState.PARSE_UNTIL_EOF:

771 self.payload.feed_eof()

772 elif self._type == ParseState.PARSE_LENGTH:

773 raise ContentLengthError(

774 "Not enough data for satisfy content length header."

775 )

776 elif self._type == ParseState.PARSE_CHUNKED:

777 raise TransferEncodingError(

778 "Not enough data for satisfy transfer length header."

779 )

780

781 def feed_data(

782 self, chunk: bytes, SEP: _SEP = b"\r\n", CHUNK_EXT: bytes = b";"

783 ) -> Tuple[bool, bytes]:

784 # Read specified amount of bytes

785 if self._type == ParseState.PARSE_LENGTH:

786 required = self._length

787 chunk_len = len(chunk)

788

789 if required >= chunk_len:

790 self._length = required - chunk_len

791 self.payload.feed_data(chunk, chunk_len)

792 if self._length == 0:

793 self.payload.feed_eof()

794 return True, b""

795 else:

796 self._length = 0

797 self.payload.feed_data(chunk[:required], required)

798 self.payload.feed_eof()

799 return True, chunk[required:]

800

801 # Chunked transfer encoding parser

802 elif self._type == ParseState.PARSE_CHUNKED:

803 if self._chunk_tail:

804 chunk = self._chunk_tail + chunk

805 self._chunk_tail = b""

806

807 while chunk:

808 # read next chunk size

809 if self._chunk == ChunkState.PARSE_CHUNKED_SIZE:

810 pos = chunk.find(SEP)

811 if pos >= 0:

812 i = chunk.find(CHUNK_EXT, 0, pos)

813 if i >= 0:

814 size_b = chunk[:i] # strip chunk-extensions

815 else:

816 size_b = chunk[:pos]

817

818 if self._lax: # Allow whitespace in lax mode.

819 size_b = size_b.strip()

820

821 if not re.fullmatch(HEXDIGIT, size_b):

822 exc = TransferEncodingError(

823 chunk[:pos].decode("ascii", "surrogateescape")

824 )

825 self.payload.set_exception(exc)

826 raise exc

827 size = int(bytes(size_b), 16)

828

829 chunk = chunk[pos + len(SEP) :]

830 if size == 0: # eof marker

831 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS

832 if self._lax and chunk.startswith(b"\r"):

833 chunk = chunk[1:]

834 else:

835 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK

836 self._chunk_size = size

837 self.payload.begin_http_chunk_receiving()

838 else:

839 self._chunk_tail = chunk

840 return False, b""

841

842 # read chunk and feed buffer

843 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK:

844 required = self._chunk_size

845 chunk_len = len(chunk)

846

847 if required > chunk_len:

848 self._chunk_size = required - chunk_len

849 self.payload.feed_data(chunk, chunk_len)

850 return False, b""

851 else:

852 self._chunk_size = 0

853 self.payload.feed_data(chunk[:required], required)

854 chunk = chunk[required:]

855 if self._lax and chunk.startswith(b"\r"):

856 chunk = chunk[1:]

857 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF

858 self.payload.end_http_chunk_receiving()

859

860 # toss the CRLF at the end of the chunk

861 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF:

862 if chunk[: len(SEP)] == SEP:

863 chunk = chunk[len(SEP) :]

864 self._chunk = ChunkState.PARSE_CHUNKED_SIZE

865 else:

866 self._chunk_tail = chunk

867 return False, b""

868

869 # if stream does not contain trailer, after 0\r\n

870 # we should get another \r\n otherwise

871 # trailers needs to be skipped until \r\n\r\n

872 if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS:

873 head = chunk[: len(SEP)]

874 if head == SEP:

875 # end of stream

876 self.payload.feed_eof()

877 return True, chunk[len(SEP) :]

878 # Both CR and LF, or only LF may not be received yet. It is

879 # expected that CRLF or LF will be shown at the very first

880 # byte next time, otherwise trailers should come. The last

881 # CRLF which marks the end of response might not be

882 # contained in the same TCP segment which delivered the

883 # size indicator.

884 if not head:

885 return False, b""

886 if head == SEP[:1]:

887 self._chunk_tail = head

888 return False, b""

889 self._chunk = ChunkState.PARSE_TRAILERS

890

891 # read and discard trailer up to the CRLF terminator

892 if self._chunk == ChunkState.PARSE_TRAILERS:

893 pos = chunk.find(SEP)

894 if pos >= 0:

895 chunk = chunk[pos + len(SEP) :]

896 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS

897 else:

898 self._chunk_tail = chunk

899 return False, b""

900

901 # Read all bytes until eof

902 elif self._type == ParseState.PARSE_UNTIL_EOF:

903 self.payload.feed_data(chunk, len(chunk))

904

905 return False, b""

906

907

908class DeflateBuffer:

909 """DeflateStream decompress stream and feed data into specified stream."""

910

911 def __init__(self, out: StreamReader, encoding: Optional[str]) -> None:

912 self.out = out

913 self.size = 0

914 self.encoding = encoding

915 self._started_decoding = False

916

917 self.decompressor: Union[BrotliDecompressor, ZLibDecompressor]

918 if encoding == "br":

919 if not HAS_BROTLI: # pragma: no cover

920 raise ContentEncodingError(

921 "Can not decode content-encoding: brotli (br). "

922 "Please install `Brotli`"

923 )

924 self.decompressor = BrotliDecompressor()

925 else:

926 self.decompressor = ZLibDecompressor(encoding=encoding)

927

928 def set_exception(self, exc: BaseException) -> None:

929 self.out.set_exception(exc)

930

931 def feed_data(self, chunk: bytes, size: int) -> None:

932 if not size:

933 return

934

935 self.size += size

936

937 # RFC1950

938 # bits 0..3 = CM = 0b1000 = 8 = "deflate"

939 # bits 4..7 = CINFO = 1..7 = windows size.

940 if (

941 not self._started_decoding

942 and self.encoding == "deflate"

943 and chunk[0] & 0xF != 8

944 ):

945 # Change the decoder to decompress incorrectly compressed data

946 # Actually we should issue a warning about non-RFC-compliant data.

947 self.decompressor = ZLibDecompressor(

948 encoding=self.encoding, suppress_deflate_header=True

949 )

950

951 try:

952 chunk = self.decompressor.decompress_sync(chunk)

953 except Exception:

954 raise ContentEncodingError(

955 "Can not decode content-encoding: %s" % self.encoding

956 )

957

958 self._started_decoding = True

959

960 if chunk:

961 self.out.feed_data(chunk, len(chunk))

962

963 def feed_eof(self) -> None:

964 chunk = self.decompressor.flush()

965

966 if chunk or self.size > 0:

967 self.out.feed_data(chunk, len(chunk))

968 # decompressor is not brotli unless encoding is "br"

969 if self.encoding == "deflate" and not self.decompressor.eof: # type: ignore[union-attr]

970 raise ContentEncodingError("deflate")

971

972 self.out.feed_eof()

973

974 def begin_http_chunk_receiving(self) -> None:

975 self.out.begin_http_chunk_receiving()

976

977 def end_http_chunk_receiving(self) -> None:

978 self.out.end_http_chunk_receiving()

979

980

981HttpRequestParserPy = HttpRequestParser

982HttpResponseParserPy = HttpResponseParser

983RawRequestMessagePy = RawRequestMessage

984RawResponseMessagePy = RawResponseMessage

985

986try:

987 if not NO_EXTENSIONS:

988 from ._http_parser import ( # type: ignore[import-not-found,no-redef]

989 HttpRequestParser,

990 HttpResponseParser,

991 RawRequestMessage,

992 RawResponseMessage,

993 )

994

995 HttpRequestParserC = HttpRequestParser

996 HttpResponseParserC = HttpResponseParser

997 RawRequestMessageC = RawRequestMessage

998 RawResponseMessageC = RawResponseMessage

999except ImportError: # pragma: no cover

1000 pass

Coverage for /pythoncovmergedfiles/medio/medio/src/aiohttp/aiohttp/http_parser.py: 83%

494 statements