Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/aiohttp/http

1import abc

2import asyncio

3import re

4import string

5from contextlib import suppress

6from enum import IntEnum

7from typing import (

8 Any,

9 ClassVar,

10 Final,

11 Generic,

12 List,

13 Literal,

14 NamedTuple,

15 Optional,

16 Pattern,

17 Set,

18 Tuple,

19 Type,

20 TypeVar,

21 Union,

22)

24from multidict import CIMultiDict, CIMultiDictProxy, istr

25from yarl import URL

27from . import hdrs

28from .base_protocol import BaseProtocol

29from .compression_utils import HAS_BROTLI, BrotliDecompressor, ZLibDecompressor

30from .helpers import (

31 DEBUG,

32 NO_EXTENSIONS,

33 BaseTimerContext,

34 method_must_be_empty_body,

35 status_code_must_be_empty_body,

36)

37from .http_exceptions import (

38 BadHttpMessage,

39 BadStatusLine,

40 ContentEncodingError,

41 ContentLengthError,

42 InvalidHeader,

43 InvalidURLError,

44 LineTooLong,

45 TransferEncodingError,

46)

47from .http_writer import HttpVersion, HttpVersion10

48from .log import internal_logger

49from .streams import EMPTY_PAYLOAD, StreamReader

50from .typedefs import RawHeaders

52__all__ = (

53 "HeadersParser",

54 "HttpParser",

55 "HttpRequestParser",

56 "HttpResponseParser",

57 "RawRequestMessage",

58 "RawResponseMessage",

59)

61_SEP = Literal[b"\r\n", b"\n"]

63ASCIISET: Final[Set[str]] = set(string.printable)

65# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview

66# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens

67#

68# method = token

69# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." /

70# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA

71# token = 1*tchar

72METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+")

73VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)")

74HDRRE: Final[Pattern[bytes]] = re.compile(

75 rb"[\x00-\x1F\x7F-\xFF()<>@,;:\[\]={} \t\"\\]"

76)

77HEXDIGIT = re.compile(rb"[0-9a-fA-F]+")

80class RawRequestMessage(NamedTuple):

81 method: str

82 path: str

83 version: HttpVersion

84 headers: "CIMultiDictProxy[str]"

85 raw_headers: RawHeaders

86 should_close: bool

87 compression: Optional[str]

88 upgrade: bool

89 chunked: bool

90 url: URL

93class RawResponseMessage(NamedTuple):

94 version: HttpVersion

95 code: int

96 reason: str

97 headers: CIMultiDictProxy[str]

98 raw_headers: RawHeaders

99 should_close: bool

100 compression: Optional[str]

101 upgrade: bool

102 chunked: bool

103

104

105_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage)

106

107

108class ParseState(IntEnum):

109

110 PARSE_NONE = 0

111 PARSE_LENGTH = 1

112 PARSE_CHUNKED = 2

113 PARSE_UNTIL_EOF = 3

114

115

116class ChunkState(IntEnum):

117 PARSE_CHUNKED_SIZE = 0

118 PARSE_CHUNKED_CHUNK = 1

119 PARSE_CHUNKED_CHUNK_EOF = 2

120 PARSE_MAYBE_TRAILERS = 3

121 PARSE_TRAILERS = 4

122

123

124class HeadersParser:

125 def __init__(

126 self,

127 max_line_size: int = 8190,

128 max_headers: int = 32768,

129 max_field_size: int = 8190,

130 ) -> None:

131 self.max_line_size = max_line_size

132 self.max_headers = max_headers

133 self.max_field_size = max_field_size

134

135 def parse_headers(

136 self, lines: List[bytes]

137 ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]:

138 headers: CIMultiDict[str] = CIMultiDict()

139 raw_headers = []

140

141 lines_idx = 1

142 line = lines[1]

143 line_count = len(lines)

144

145 while line:

146 # Parse initial header name : value pair.

147 try:

148 bname, bvalue = line.split(b":", 1)

149 except ValueError:

150 raise InvalidHeader(line) from None

151

152 # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2

153 if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"}

154 raise InvalidHeader(line)

155

156 bvalue = bvalue.lstrip(b" \t")

157 if HDRRE.search(bname):

158 raise InvalidHeader(bname)

159 if len(bname) > self.max_field_size:

160 raise LineTooLong(

161 "request header name {}".format(

162 bname.decode("utf8", "backslashreplace")

163 ),

164 str(self.max_field_size),

165 str(len(bname)),

166 )

167

168 header_length = len(bvalue)

169

170 # next line

171 lines_idx += 1

172 line = lines[lines_idx]

173

174 # consume continuation lines

175 continuation = line and line[0] in (32, 9) # (' ', '\t')

176

177 # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding

178 if continuation:

179 bvalue_lst = [bvalue]

180 while continuation:

181 header_length += len(line)

182 if header_length > self.max_field_size:

183 raise LineTooLong(

184 "request header field {}".format(

185 bname.decode("utf8", "backslashreplace")

186 ),

187 str(self.max_field_size),

188 str(header_length),

189 )

190 bvalue_lst.append(line)

191

192 # next line

193 lines_idx += 1

194 if lines_idx < line_count:

195 line = lines[lines_idx]

196 if line:

197 continuation = line[0] in (32, 9) # (' ', '\t')

198 else:

199 line = b""

200 break

201 bvalue = b"".join(bvalue_lst)

202 else:

203 if header_length > self.max_field_size:

204 raise LineTooLong(

205 "request header field {}".format(

206 bname.decode("utf8", "backslashreplace")

207 ),

208 str(self.max_field_size),

209 str(header_length),

210 )

211

212 bvalue = bvalue.strip(b" \t")

213 name = bname.decode("utf-8", "surrogateescape")

214 value = bvalue.decode("utf-8", "surrogateescape")

215

216 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5

217 if "\n" in value or "\r" in value or "\x00" in value:

218 raise InvalidHeader(bvalue)

219

220 headers.add(name, value)

221 raw_headers.append((bname, bvalue))

222

223 return (CIMultiDictProxy(headers), tuple(raw_headers))

224

225

226class HttpParser(abc.ABC, Generic[_MsgT]):

227 lax: ClassVar[bool] = False

228

229 def __init__(

230 self,

231 protocol: Optional[BaseProtocol] = None,

232 loop: Optional[asyncio.AbstractEventLoop] = None,

233 limit: int = 2**16,

234 max_line_size: int = 8190,

235 max_headers: int = 32768,

236 max_field_size: int = 8190,

237 timer: Optional[BaseTimerContext] = None,

238 code: Optional[int] = None,

239 method: Optional[str] = None,

240 readall: bool = False,

241 payload_exception: Optional[Type[BaseException]] = None,

242 response_with_body: bool = True,

243 read_until_eof: bool = False,

244 auto_decompress: bool = True,

245 ) -> None:

246 self.protocol = protocol

247 self.loop = loop

248 self.max_line_size = max_line_size

249 self.max_headers = max_headers

250 self.max_field_size = max_field_size

251 self.timer = timer

252 self.code = code

253 self.method = method

254 self.readall = readall

255 self.payload_exception = payload_exception

256 self.response_with_body = response_with_body

257 self.read_until_eof = read_until_eof

258

259 self._lines: List[bytes] = []

260 self._tail = b""

261 self._upgraded = False

262 self._payload = None

263 self._payload_parser: Optional[HttpPayloadParser] = None

264 self._auto_decompress = auto_decompress

265 self._limit = limit

266 self._headers_parser = HeadersParser(max_line_size, max_headers, max_field_size)

267

268 @abc.abstractmethod

269 def parse_message(self, lines: List[bytes]) -> _MsgT:

270 pass

271

272 def feed_eof(self) -> Optional[_MsgT]:

273 if self._payload_parser is not None:

274 self._payload_parser.feed_eof()

275 self._payload_parser = None

276 else:

277 # try to extract partial message

278 if self._tail:

279 self._lines.append(self._tail)

280

281 if self._lines:

282 if self._lines[-1] != "\r\n":

283 self._lines.append(b"")

284 with suppress(Exception):

285 return self.parse_message(self._lines)

286 return None

287

288 def feed_data(

289 self,

290 data: bytes,

291 SEP: _SEP = b"\r\n",

292 EMPTY: bytes = b"",

293 CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH,

294 METH_CONNECT: str = hdrs.METH_CONNECT,

295 SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1,

296 ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]:

297

298 messages = []

299

300 if self._tail:

301 data, self._tail = self._tail + data, b""

302

303 data_len = len(data)

304 start_pos = 0

305 loop = self.loop

306

307 while start_pos < data_len:

308

309 # read HTTP message (request/response line + headers), \r\n\r\n

310 # and split by lines

311 if self._payload_parser is None and not self._upgraded:

312 pos = data.find(SEP, start_pos)

313 # consume \r\n

314 if pos == start_pos and not self._lines:

315 start_pos = pos + len(SEP)

316 continue

317

318 if pos >= start_pos:

319 # line found

320 line = data[start_pos:pos]

321 if SEP == b"\n": # For lax response parsing

322 line = line.rstrip(b"\r")

323 self._lines.append(line)

324 start_pos = pos + len(SEP)

325

326 # \r\n\r\n found

327 if self._lines[-1] == EMPTY:

328 try:

329 msg: _MsgT = self.parse_message(self._lines)

330 finally:

331 self._lines.clear()

332

333 def get_content_length() -> Optional[int]:

334 # payload length

335 length_hdr = msg.headers.get(CONTENT_LENGTH)

336 if length_hdr is None:

337 return None

338

339 # Shouldn't allow +/- or other number formats.

340 # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2

341 if not length_hdr.strip(" \t").isdecimal():

342 raise InvalidHeader(CONTENT_LENGTH)

343

344 return int(length_hdr)

345

346 length = get_content_length()

347 # do not support old websocket spec

348 if SEC_WEBSOCKET_KEY1 in msg.headers:

349 raise InvalidHeader(SEC_WEBSOCKET_KEY1)

350

351 self._upgraded = msg.upgrade

352

353 method = getattr(msg, "method", self.method)

354 # code is only present on responses

355 code = getattr(msg, "code", 0)

356

357 assert self.protocol is not None

358 # calculate payload

359 empty_body = status_code_must_be_empty_body(code) or bool(

360 method and method_must_be_empty_body(method)

361 )

362 if not empty_body and (

363 (length is not None and length > 0)

364 or msg.chunked

365 and not msg.upgrade

366 ):

367 payload = StreamReader(

368 self.protocol,

369 timer=self.timer,

370 loop=loop,

371 limit=self._limit,

372 )

373 payload_parser = HttpPayloadParser(

374 payload,

375 length=length,

376 chunked=msg.chunked,

377 method=method,

378 compression=msg.compression,

379 code=self.code,

380 readall=self.readall,

381 response_with_body=self.response_with_body,

382 auto_decompress=self._auto_decompress,

383 lax=self.lax,

384 )

385 if not payload_parser.done:

386 self._payload_parser = payload_parser

387 elif method == METH_CONNECT:

388 assert isinstance(msg, RawRequestMessage)

389 payload = StreamReader(

390 self.protocol,

391 timer=self.timer,

392 loop=loop,

393 limit=self._limit,

394 )

395 self._upgraded = True

396 self._payload_parser = HttpPayloadParser(

397 payload,

398 method=msg.method,

399 compression=msg.compression,

400 readall=True,

401 auto_decompress=self._auto_decompress,

402 lax=self.lax,

403 )

404 elif not empty_body and length is None and self.read_until_eof:

405 payload = StreamReader(

406 self.protocol,

407 timer=self.timer,

408 loop=loop,

409 limit=self._limit,

410 )

411 payload_parser = HttpPayloadParser(

412 payload,

413 length=length,

414 chunked=msg.chunked,

415 method=method,

416 compression=msg.compression,

417 code=self.code,

418 readall=True,

419 response_with_body=self.response_with_body,

420 auto_decompress=self._auto_decompress,

421 lax=self.lax,

422 )

423 if not payload_parser.done:

424 self._payload_parser = payload_parser

425 else:

426 payload = EMPTY_PAYLOAD

427

428 messages.append((msg, payload))

429 else:

430 self._tail = data[start_pos:]

431 data = EMPTY

432 break

433

434 # no parser, just store

435 elif self._payload_parser is None and self._upgraded:

436 assert not self._lines

437 break

438

439 # feed payload

440 elif data and start_pos < data_len:

441 assert not self._lines

442 assert self._payload_parser is not None

443 try:

444 eof, data = self._payload_parser.feed_data(data[start_pos:], SEP)

445 except BaseException as exc:

446 if self.payload_exception is not None:

447 self._payload_parser.payload.set_exception(

448 self.payload_exception(str(exc))

449 )

450 else:

451 self._payload_parser.payload.set_exception(exc)

452

453 eof = True

454 data = b""

455

456 if eof:

457 start_pos = 0

458 data_len = len(data)

459 self._payload_parser = None

460 continue

461 else:

462 break

463

464 if data and start_pos < data_len:

465 data = data[start_pos:]

466 else:

467 data = EMPTY

468

469 return messages, self._upgraded, data

470

471 def parse_headers(

472 self, lines: List[bytes]

473 ) -> Tuple[

474 "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool

475 ]:

476 """Parses RFC 5322 headers from a stream.

477

478 Line continuations are supported. Returns list of header name

479 and value pairs. Header name is in upper case.

480 """

481 headers, raw_headers = self._headers_parser.parse_headers(lines)

482 close_conn = None

483 encoding = None

484 upgrade = False

485 chunked = False

486

487 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6

488 # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf

489 singletons = (

490 hdrs.CONTENT_LENGTH,

491 hdrs.CONTENT_LOCATION,

492 hdrs.CONTENT_RANGE,

493 hdrs.CONTENT_TYPE,

494 hdrs.ETAG,

495 hdrs.HOST,

496 hdrs.MAX_FORWARDS,

497 hdrs.SERVER,

498 hdrs.TRANSFER_ENCODING,

499 hdrs.USER_AGENT,

500 )

501 bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None)

502 if bad_hdr is not None:

503 raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.")

504

505 # keep-alive

506 conn = headers.get(hdrs.CONNECTION)

507 if conn:

508 v = conn.lower()

509 if v == "close":

510 close_conn = True

511 elif v == "keep-alive":

512 close_conn = False

513 # https://www.rfc-editor.org/rfc/rfc9110.html#name-101-switching-protocols

514 elif v == "upgrade" and headers.get(hdrs.UPGRADE):

515 upgrade = True

516

517 # encoding

518 enc = headers.get(hdrs.CONTENT_ENCODING)

519 if enc:

520 enc = enc.lower()

521 if enc in ("gzip", "deflate", "br"):

522 encoding = enc

523

524 # chunking

525 te = headers.get(hdrs.TRANSFER_ENCODING)

526 if te is not None:

527 if "chunked" == te.lower():

528 chunked = True

529 else:

530 raise BadHttpMessage("Request has invalid `Transfer-Encoding`")

531

532 if hdrs.CONTENT_LENGTH in headers:

533 raise BadHttpMessage(

534 "Transfer-Encoding can't be present with Content-Length",

535 )

536

537 return (headers, raw_headers, close_conn, encoding, upgrade, chunked)

538

539 def set_upgraded(self, val: bool) -> None:

540 """Set connection upgraded (to websocket) mode.

541

542 :param bool val: new state.

543 """

544 self._upgraded = val

545

546

547class HttpRequestParser(HttpParser[RawRequestMessage]):

548 """Read request status line.

549

550 Exception .http_exceptions.BadStatusLine

551 could be raised in case of any errors in status line.

552 Returns RawRequestMessage.

553 """

554

555 def parse_message(self, lines: List[bytes]) -> RawRequestMessage:

556 # request line

557 line = lines[0].decode("utf-8", "surrogateescape")

558 try:

559 method, path, version = line.split(" ", maxsplit=2)

560 except ValueError:

561 raise BadStatusLine(line) from None

562

563 if len(path) > self.max_line_size:

564 raise LineTooLong(

565 "Status line is too long", str(self.max_line_size), str(len(path))

566 )

567

568 # method

569 if not METHRE.fullmatch(method):

570 raise BadStatusLine(method)

571

572 # version

573 match = VERSRE.fullmatch(version)

574 if match is None:

575 raise BadStatusLine(line)

576 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))

577

578 if method == "CONNECT":

579 # authority-form,

580 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3

581 url = URL.build(authority=path, encoded=True)

582 elif path.startswith("/"):

583 # origin-form,

584 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1

585 path_part, _hash_separator, url_fragment = path.partition("#")

586 path_part, _question_mark_separator, qs_part = path_part.partition("?")

587

588 # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based

589 # NOTE: parser does, otherwise it results into the same

590 # NOTE: HTTP Request-Line input producing different

591 # NOTE: `yarl.URL()` objects

592 url = URL.build(

593 path=path_part,

594 query_string=qs_part,

595 fragment=url_fragment,

596 encoded=True,

597 )

598 elif path == "*" and method == "OPTIONS":

599 # asterisk-form,

600 url = URL(path, encoded=True)

601 else:

602 # absolute-form for proxy maybe,

603 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2

604 url = URL(path, encoded=True)

605 if url.scheme == "":

606 # not absolute-form

607 raise InvalidURLError(

608 path.encode(errors="surrogateescape").decode("latin1")

609 )

610

611 # read headers

612 (

613 headers,

614 raw_headers,

615 close,

616 compression,

617 upgrade,

618 chunked,

619 ) = self.parse_headers(lines)

620

621 if close is None: # then the headers weren't set in the request

622 if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close

623 close = True

624 else: # HTTP 1.1 must ask to close.

625 close = False

626

627 return RawRequestMessage(

628 method,

629 path,

630 version_o,

631 headers,

632 raw_headers,

633 close,

634 compression,

635 upgrade,

636 chunked,

637 url,

638 )

639

640

641class HttpResponseParser(HttpParser[RawResponseMessage]):

642 """Read response status line and headers.

643

644 BadStatusLine could be raised in case of any errors in status line.

645 Returns RawResponseMessage.

646 """

647

648 # Lax mode should only be enabled on response parser.

649 lax = not DEBUG

650

651 def feed_data(

652 self,

653 data: bytes,

654 SEP: Optional[_SEP] = None,

655 *args: Any,

656 **kwargs: Any,

657 ) -> Tuple[List[Tuple[RawResponseMessage, StreamReader]], bool, bytes]:

658 if SEP is None:

659 SEP = b"\r\n" if DEBUG else b"\n"

660 return super().feed_data(data, SEP, *args, **kwargs)

661

662 def parse_message(self, lines: List[bytes]) -> RawResponseMessage:

663 line = lines[0].decode("utf-8", "surrogateescape")

664 try:

665 version, status = line.split(maxsplit=1)

666 except ValueError:

667 raise BadStatusLine(line) from None

668

669 try:

670 status, reason = status.split(maxsplit=1)

671 except ValueError:

672 status = status.strip()

673 reason = ""

674

675 if len(reason) > self.max_line_size:

676 raise LineTooLong(

677 "Status line is too long", str(self.max_line_size), str(len(reason))

678 )

679

680 # version

681 match = VERSRE.fullmatch(version)

682 if match is None:

683 raise BadStatusLine(line)

684 version_o = HttpVersion(int(match.group(1)), int(match.group(2)))

685

686 # The status code is a three-digit number

687 if len(status) != 3 or not status.isdecimal():

688 raise BadStatusLine(line)

689 status_i = int(status)

690

691 # read headers

692 (

693 headers,

694 raw_headers,

695 close,

696 compression,

697 upgrade,

698 chunked,

699 ) = self.parse_headers(lines)

700

701 if close is None:

702 close = version_o <= HttpVersion10

703

704 return RawResponseMessage(

705 version_o,

706 status_i,

707 reason.strip(),

708 headers,

709 raw_headers,

710 close,

711 compression,

712 upgrade,

713 chunked,

714 )

715

716

717class HttpPayloadParser:

718 def __init__(

719 self,

720 payload: StreamReader,

721 length: Optional[int] = None,

722 chunked: bool = False,

723 compression: Optional[str] = None,

724 code: Optional[int] = None,

725 method: Optional[str] = None,

726 readall: bool = False,

727 response_with_body: bool = True,

728 auto_decompress: bool = True,

729 lax: bool = False,

730 ) -> None:

731 self._length = 0

732 self._type = ParseState.PARSE_NONE

733 self._chunk = ChunkState.PARSE_CHUNKED_SIZE

734 self._chunk_size = 0

735 self._chunk_tail = b""

736 self._auto_decompress = auto_decompress

737 self._lax = lax

738 self.done = False

739

740 # payload decompression wrapper

741 if response_with_body and compression and self._auto_decompress:

742 real_payload: Union[StreamReader, DeflateBuffer] = DeflateBuffer(

743 payload, compression

744 )

745 else:

746 real_payload = payload

747

748 # payload parser

749 if not response_with_body:

750 # don't parse payload if it's not expected to be received

751 self._type = ParseState.PARSE_NONE

752 real_payload.feed_eof()

753 self.done = True

754

755 elif chunked:

756 self._type = ParseState.PARSE_CHUNKED

757 elif length is not None:

758 self._type = ParseState.PARSE_LENGTH

759 self._length = length

760 if self._length == 0:

761 real_payload.feed_eof()

762 self.done = True

763 else:

764 if readall and code != 204:

765 self._type = ParseState.PARSE_UNTIL_EOF

766 elif method in ("PUT", "POST"):

767 internal_logger.warning( # pragma: no cover

768 "Content-Length or Transfer-Encoding header is required"

769 )

770 self._type = ParseState.PARSE_NONE

771 real_payload.feed_eof()

772 self.done = True

773

774 self.payload = real_payload

775

776 def feed_eof(self) -> None:

777 if self._type == ParseState.PARSE_UNTIL_EOF:

778 self.payload.feed_eof()

779 elif self._type == ParseState.PARSE_LENGTH:

780 raise ContentLengthError(

781 "Not enough data for satisfy content length header."

782 )

783 elif self._type == ParseState.PARSE_CHUNKED:

784 raise TransferEncodingError(

785 "Not enough data for satisfy transfer length header."

786 )

787

788 def feed_data(

789 self, chunk: bytes, SEP: _SEP = b"\r\n", CHUNK_EXT: bytes = b";"

790 ) -> Tuple[bool, bytes]:

791 # Read specified amount of bytes

792 if self._type == ParseState.PARSE_LENGTH:

793 required = self._length

794 chunk_len = len(chunk)

795

796 if required >= chunk_len:

797 self._length = required - chunk_len

798 self.payload.feed_data(chunk, chunk_len)

799 if self._length == 0:

800 self.payload.feed_eof()

801 return True, b""

802 else:

803 self._length = 0

804 self.payload.feed_data(chunk[:required], required)

805 self.payload.feed_eof()

806 return True, chunk[required:]

807

808 # Chunked transfer encoding parser

809 elif self._type == ParseState.PARSE_CHUNKED:

810 if self._chunk_tail:

811 chunk = self._chunk_tail + chunk

812 self._chunk_tail = b""

813

814 while chunk:

815

816 # read next chunk size

817 if self._chunk == ChunkState.PARSE_CHUNKED_SIZE:

818 pos = chunk.find(SEP)

819 if pos >= 0:

820 i = chunk.find(CHUNK_EXT, 0, pos)

821 if i >= 0:

822 size_b = chunk[:i] # strip chunk-extensions

823 else:

824 size_b = chunk[:pos]

825

826 if self._lax: # Allow whitespace in lax mode.

827 size_b = size_b.strip()

828

829 if not re.fullmatch(HEXDIGIT, size_b):

830 exc = TransferEncodingError(

831 chunk[:pos].decode("ascii", "surrogateescape")

832 )

833 self.payload.set_exception(exc)

834 raise exc

835 size = int(bytes(size_b), 16)

836

837 chunk = chunk[pos + len(SEP) :]

838 if size == 0: # eof marker

839 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS

840 if self._lax and chunk.startswith(b"\r"):

841 chunk = chunk[1:]

842 else:

843 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK

844 self._chunk_size = size

845 self.payload.begin_http_chunk_receiving()

846 else:

847 self._chunk_tail = chunk

848 return False, b""

849

850 # read chunk and feed buffer

851 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK:

852 required = self._chunk_size

853 chunk_len = len(chunk)

854

855 if required > chunk_len:

856 self._chunk_size = required - chunk_len

857 self.payload.feed_data(chunk, chunk_len)

858 return False, b""

859 else:

860 self._chunk_size = 0

861 self.payload.feed_data(chunk[:required], required)

862 chunk = chunk[required:]

863 if self._lax and chunk.startswith(b"\r"):

864 chunk = chunk[1:]

865 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF

866 self.payload.end_http_chunk_receiving()

867

868 # toss the CRLF at the end of the chunk

869 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF:

870 if chunk[: len(SEP)] == SEP:

871 chunk = chunk[len(SEP) :]

872 self._chunk = ChunkState.PARSE_CHUNKED_SIZE

873 else:

874 self._chunk_tail = chunk

875 return False, b""

876

877 # if stream does not contain trailer, after 0\r\n

878 # we should get another \r\n otherwise

879 # trailers needs to be skipped until \r\n\r\n

880 if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS:

881 head = chunk[: len(SEP)]

882 if head == SEP:

883 # end of stream

884 self.payload.feed_eof()

885 return True, chunk[len(SEP) :]

886 # Both CR and LF, or only LF may not be received yet. It is

887 # expected that CRLF or LF will be shown at the very first

888 # byte next time, otherwise trailers should come. The last

889 # CRLF which marks the end of response might not be

890 # contained in the same TCP segment which delivered the

891 # size indicator.

892 if not head:

893 return False, b""

894 if head == SEP[:1]:

895 self._chunk_tail = head

896 return False, b""

897 self._chunk = ChunkState.PARSE_TRAILERS

898

899 # read and discard trailer up to the CRLF terminator

900 if self._chunk == ChunkState.PARSE_TRAILERS:

901 pos = chunk.find(SEP)

902 if pos >= 0:

903 chunk = chunk[pos + len(SEP) :]

904 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS

905 else:

906 self._chunk_tail = chunk

907 return False, b""

908

909 # Read all bytes until eof

910 elif self._type == ParseState.PARSE_UNTIL_EOF:

911 self.payload.feed_data(chunk, len(chunk))

912

913 return False, b""

914

915

916class DeflateBuffer:

917 """DeflateStream decompress stream and feed data into specified stream."""

918

919 decompressor: Any

920

921 def __init__(self, out: StreamReader, encoding: Optional[str]) -> None:

922 self.out = out

923 self.size = 0

924 self.encoding = encoding

925 self._started_decoding = False

926

927 self.decompressor: Union[BrotliDecompressor, ZLibDecompressor]

928 if encoding == "br":

929 if not HAS_BROTLI: # pragma: no cover

930 raise ContentEncodingError(

931 "Can not decode content-encoding: brotli (br). "

932 "Please install `Brotli`"

933 )

934 self.decompressor = BrotliDecompressor()

935 else:

936 self.decompressor = ZLibDecompressor(encoding=encoding)

937

938 def set_exception(self, exc: BaseException) -> None:

939 self.out.set_exception(exc)

940

941 def feed_data(self, chunk: bytes, size: int) -> None:

942 if not size:

943 return

944

945 self.size += size

946

947 # RFC1950

948 # bits 0..3 = CM = 0b1000 = 8 = "deflate"

949 # bits 4..7 = CINFO = 1..7 = windows size.

950 if (

951 not self._started_decoding

952 and self.encoding == "deflate"

953 and chunk[0] & 0xF != 8

954 ):

955 # Change the decoder to decompress incorrectly compressed data

956 # Actually we should issue a warning about non-RFC-compliant data.

957 self.decompressor = ZLibDecompressor(

958 encoding=self.encoding, suppress_deflate_header=True

959 )

960

961 try:

962 chunk = self.decompressor.decompress_sync(chunk)

963 except Exception:

964 raise ContentEncodingError(

965 "Can not decode content-encoding: %s" % self.encoding

966 )

967

968 self._started_decoding = True

969

970 if chunk:

971 self.out.feed_data(chunk, len(chunk))

972

973 def feed_eof(self) -> None:

974 chunk = self.decompressor.flush()

975

976 if chunk or self.size > 0:

977 self.out.feed_data(chunk, len(chunk))

978 if self.encoding == "deflate" and not self.decompressor.eof:

979 raise ContentEncodingError("deflate")

980

981 self.out.feed_eof()

982

983 def begin_http_chunk_receiving(self) -> None:

984 self.out.begin_http_chunk_receiving()

985

986 def end_http_chunk_receiving(self) -> None:

987 self.out.end_http_chunk_receiving()

988

989

990HttpRequestParserPy = HttpRequestParser

991HttpResponseParserPy = HttpResponseParser

992RawRequestMessagePy = RawRequestMessage

993RawResponseMessagePy = RawResponseMessage

994

995try:

996 if not NO_EXTENSIONS:

997 from ._http_parser import ( # type: ignore[import-not-found,no-redef]

998 HttpRequestParser,

999 HttpResponseParser,

1000 RawRequestMessage,

1001 RawResponseMessage,

1002 )

1003

1004 HttpRequestParserC = HttpRequestParser

1005 HttpResponseParserC = HttpResponseParser

1006 RawRequestMessageC = RawRequestMessage

1007 RawResponseMessageC = RawResponseMessage

1008except ImportError: # pragma: no cover

1009 pass

Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/aiohttp/http_parser.py: 20%

497 statements