Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/aiohttp/http_parser.py: 19%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

551 statements  

1import abc 

2import asyncio 

3import re 

4import string 

5from contextlib import suppress 

6from enum import IntEnum 

7from typing import ( 

8 Any, 

9 ClassVar, 

10 Final, 

11 Generic, 

12 List, 

13 Literal, 

14 NamedTuple, 

15 Optional, 

16 Pattern, 

17 Set, 

18 Tuple, 

19 Type, 

20 TypeVar, 

21 Union, 

22) 

23 

24from multidict import CIMultiDict, CIMultiDictProxy, istr 

25from yarl import URL 

26 

27from . import hdrs 

28from .base_protocol import BaseProtocol 

29from .compression_utils import ( 

30 DEFAULT_MAX_DECOMPRESS_SIZE, 

31 HAS_BROTLI, 

32 HAS_ZSTD, 

33 BrotliDecompressor, 

34 ZLibDecompressor, 

35 ZSTDDecompressor, 

36) 

37from .helpers import ( 

38 _EXC_SENTINEL, 

39 DEBUG, 

40 EMPTY_BODY_METHODS, 

41 EMPTY_BODY_STATUS_CODES, 

42 NO_EXTENSIONS, 

43 BaseTimerContext, 

44 set_exception, 

45) 

46from .http_exceptions import ( 

47 BadHttpMessage, 

48 BadHttpMethod, 

49 BadStatusLine, 

50 ContentEncodingError, 

51 ContentLengthError, 

52 DecompressSizeError, 

53 InvalidHeader, 

54 InvalidURLError, 

55 LineTooLong, 

56 TransferEncodingError, 

57) 

58from .http_writer import HttpVersion, HttpVersion10 

59from .streams import EMPTY_PAYLOAD, StreamReader 

60from .typedefs import RawHeaders 

61 

62__all__ = ( 

63 "HeadersParser", 

64 "HttpParser", 

65 "HttpRequestParser", 

66 "HttpResponseParser", 

67 "RawRequestMessage", 

68 "RawResponseMessage", 

69) 

70 

71_SEP = Literal[b"\r\n", b"\n"] 

72 

73ASCIISET: Final[Set[str]] = set(string.printable) 

74 

75# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview 

76# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens 

77# 

78# method = token 

79# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / 

80# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA 

81# token = 1*tchar 

82_TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`|~") 

83TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+") 

84# https://www.rfc-editor.org/rfc/rfc9110#section-5.5-5 

85_FIELD_VALUE_FORBIDDEN_CTL_RE: Final[Pattern[str]] = re.compile( 

86 r"[\x00-\x08\x0a-\x1f\x7f]" 

87) 

88VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII) 

89DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII) 

90HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+") 

91 

92 

93class RawRequestMessage(NamedTuple): 

94 method: str 

95 path: str 

96 version: HttpVersion 

97 headers: "CIMultiDictProxy[str]" 

98 raw_headers: RawHeaders 

99 should_close: bool 

100 compression: Optional[str] 

101 upgrade: bool 

102 chunked: bool 

103 url: URL 

104 

105 

106class RawResponseMessage(NamedTuple): 

107 version: HttpVersion 

108 code: int 

109 reason: str 

110 headers: CIMultiDictProxy[str] 

111 raw_headers: RawHeaders 

112 should_close: bool 

113 compression: Optional[str] 

114 upgrade: bool 

115 chunked: bool 

116 

117 

118_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage) 

119 

120 

121class ParseState(IntEnum): 

122 

123 PARSE_NONE = 0 

124 PARSE_LENGTH = 1 

125 PARSE_CHUNKED = 2 

126 PARSE_UNTIL_EOF = 3 

127 

128 

129class ChunkState(IntEnum): 

130 PARSE_CHUNKED_SIZE = 0 

131 PARSE_CHUNKED_CHUNK = 1 

132 PARSE_CHUNKED_CHUNK_EOF = 2 

133 PARSE_MAYBE_TRAILERS = 3 

134 PARSE_TRAILERS = 4 

135 

136 

137class HeadersParser: 

138 def __init__( 

139 self, 

140 max_line_size: int = 8190, 

141 max_headers: int = 32768, 

142 max_field_size: int = 8190, 

143 lax: bool = False, 

144 ) -> None: 

145 self.max_line_size = max_line_size 

146 self.max_headers = max_headers 

147 self.max_field_size = max_field_size 

148 self._lax = lax 

149 

150 def parse_headers( 

151 self, lines: List[bytes] 

152 ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]: 

153 headers: CIMultiDict[str] = CIMultiDict() 

154 # note: "raw" does not mean inclusion of OWS before/after the field value 

155 raw_headers = [] 

156 

157 lines_idx = 0 

158 line = lines[lines_idx] 

159 line_count = len(lines) 

160 

161 while line: 

162 # Parse initial header name : value pair. 

163 try: 

164 bname, bvalue = line.split(b":", 1) 

165 except ValueError: 

166 raise InvalidHeader(line) from None 

167 

168 if len(bname) == 0: 

169 raise InvalidHeader(bname) 

170 

171 # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2 

172 if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"} 

173 raise InvalidHeader(line) 

174 

175 bvalue = bvalue.lstrip(b" \t") 

176 name = bname.decode("utf-8", "surrogateescape") 

177 if not TOKENRE.fullmatch(name): 

178 raise InvalidHeader(bname) 

179 

180 # next line 

181 lines_idx += 1 

182 line = lines[lines_idx] 

183 

184 # consume continuation lines 

185 continuation = self._lax and line and line[0] in (32, 9) # (' ', '\t') 

186 

187 # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding 

188 if continuation: 

189 header_length = len(bvalue) 

190 bvalue_lst = [bvalue] 

191 while continuation: 

192 header_length += len(line) 

193 if header_length > self.max_field_size: 

194 header_line = bname + b": " + b"".join(bvalue_lst) 

195 raise LineTooLong( 

196 header_line[:100] + b"...", self.max_field_size 

197 ) 

198 bvalue_lst.append(line) 

199 

200 # next line 

201 lines_idx += 1 

202 if lines_idx < line_count: 

203 line = lines[lines_idx] 

204 if line: 

205 continuation = line[0] in (32, 9) # (' ', '\t') 

206 else: 

207 line = b"" 

208 break 

209 bvalue = b"".join(bvalue_lst) 

210 

211 bvalue = bvalue.strip(b" \t") 

212 value = bvalue.decode("utf-8", "surrogateescape") 

213 

214 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5 

215 if self._lax: 

216 if "\n" in value or "\r" in value or "\x00" in value: 

217 raise InvalidHeader(bvalue) 

218 elif _FIELD_VALUE_FORBIDDEN_CTL_RE.search(value): 

219 raise InvalidHeader(bvalue) 

220 

221 headers.add(name, value) 

222 raw_headers.append((bname, bvalue)) 

223 

224 return (CIMultiDictProxy(headers), tuple(raw_headers)) 

225 

226 

227def _is_supported_upgrade(headers: CIMultiDictProxy[str]) -> bool: 

228 """Check if the upgrade header is supported.""" 

229 u = headers.get(hdrs.UPGRADE, "") 

230 # .lower() can transform non-ascii characters. 

231 return u.isascii() and u.lower() in {"tcp", "websocket"} 

232 

233 

234class HttpParser(abc.ABC, Generic[_MsgT]): 

235 lax: ClassVar[bool] = False 

236 

237 def __init__( 

238 self, 

239 protocol: Optional[BaseProtocol] = None, 

240 loop: Optional[asyncio.AbstractEventLoop] = None, 

241 limit: int = 2**16, 

242 max_line_size: int = 8190, 

243 max_headers: int = 128, 

244 max_field_size: int = 8190, 

245 timer: Optional[BaseTimerContext] = None, 

246 code: Optional[int] = None, 

247 method: Optional[str] = None, 

248 payload_exception: Optional[Type[BaseException]] = None, 

249 response_with_body: bool = True, 

250 read_until_eof: bool = False, 

251 auto_decompress: bool = True, 

252 ) -> None: 

253 self.protocol = protocol 

254 self.loop = loop 

255 self.max_line_size = max_line_size 

256 self.max_headers = max_headers 

257 self.max_field_size = max_field_size 

258 self.max_headers = max_headers 

259 self.timer = timer 

260 self.code = code 

261 self.method = method 

262 self.payload_exception = payload_exception 

263 self.response_with_body = response_with_body 

264 self.read_until_eof = read_until_eof 

265 

266 self._lines: List[bytes] = [] 

267 self._tail = b"" 

268 self._upgraded = False 

269 self._payload = None 

270 self._payload_parser: Optional[HttpPayloadParser] = None 

271 self._auto_decompress = auto_decompress 

272 self._limit = limit 

273 self._headers_parser = HeadersParser( 

274 max_line_size, max_headers, max_field_size, self.lax 

275 ) 

276 

277 @abc.abstractmethod 

278 def parse_message(self, lines: List[bytes]) -> _MsgT: ... 

279 

280 @abc.abstractmethod 

281 def _is_chunked_te(self, te: str) -> bool: ... 

282 

283 def feed_eof(self) -> Optional[_MsgT]: 

284 if self._payload_parser is not None: 

285 self._payload_parser.feed_eof() 

286 self._payload_parser = None 

287 else: 

288 # try to extract partial message 

289 if self._tail: 

290 self._lines.append(self._tail) 

291 

292 if self._lines: 

293 if self._lines[-1] != "\r\n": 

294 self._lines.append(b"") 

295 with suppress(Exception): 

296 return self.parse_message(self._lines) 

297 return None 

298 

299 def feed_data( 

300 self, 

301 data: bytes, 

302 SEP: _SEP = b"\r\n", 

303 EMPTY: bytes = b"", 

304 CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH, 

305 METH_CONNECT: str = hdrs.METH_CONNECT, 

306 SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1, 

307 ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]: 

308 

309 messages = [] 

310 

311 if self._tail: 

312 data, self._tail = self._tail + data, b"" 

313 

314 data_len = len(data) 

315 start_pos = 0 

316 loop = self.loop 

317 max_line_length = self.max_line_size 

318 

319 should_close = False 

320 while start_pos < data_len: 

321 

322 # read HTTP message (request/response line + headers), \r\n\r\n 

323 # and split by lines 

324 if self._payload_parser is None and not self._upgraded: 

325 pos = data.find(SEP, start_pos) 

326 # consume \r\n 

327 if pos == start_pos and not self._lines: 

328 start_pos = pos + len(SEP) 

329 continue 

330 

331 if pos >= start_pos: 

332 if should_close: 

333 raise BadHttpMessage("Data after `Connection: close`") 

334 

335 # line found 

336 line = data[start_pos:pos] 

337 if SEP == b"\n": # For lax response parsing 

338 line = line.rstrip(b"\r") 

339 if len(line) > max_line_length: 

340 raise LineTooLong(line[:100] + b"...", max_line_length) 

341 

342 self._lines.append(line) 

343 # After processing the status/request line, everything is a header. 

344 max_line_length = self.max_field_size 

345 

346 if len(self._lines) > self.max_headers: 

347 raise BadHttpMessage("Too many headers received") 

348 

349 start_pos = pos + len(SEP) 

350 

351 # \r\n\r\n found 

352 if self._lines[-1] == EMPTY: 

353 max_trailers = self.max_headers - len(self._lines) 

354 try: 

355 msg: _MsgT = self.parse_message(self._lines) 

356 finally: 

357 self._lines.clear() 

358 

359 def get_content_length() -> Optional[int]: 

360 # payload length 

361 length_hdr = msg.headers.get(CONTENT_LENGTH) 

362 if length_hdr is None: 

363 return None 

364 

365 # Shouldn't allow +/- or other number formats. 

366 # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2 

367 # msg.headers is already stripped of leading/trailing wsp 

368 if not DIGITS.fullmatch(length_hdr): 

369 raise InvalidHeader(CONTENT_LENGTH) 

370 

371 return int(length_hdr) 

372 

373 length = get_content_length() 

374 # do not support old websocket spec 

375 if SEC_WEBSOCKET_KEY1 in msg.headers: 

376 raise InvalidHeader(SEC_WEBSOCKET_KEY1) 

377 

378 self._upgraded = msg.upgrade and _is_supported_upgrade( 

379 msg.headers 

380 ) 

381 

382 method = getattr(msg, "method", self.method) 

383 # code is only present on responses 

384 code = getattr(msg, "code", 0) 

385 

386 assert self.protocol is not None 

387 # calculate payload 

388 empty_body = code in EMPTY_BODY_STATUS_CODES or bool( 

389 method and method in EMPTY_BODY_METHODS 

390 ) 

391 if not empty_body and ( 

392 ((length is not None and length > 0) or msg.chunked) 

393 and not self._upgraded 

394 ): 

395 payload = StreamReader( 

396 self.protocol, 

397 timer=self.timer, 

398 loop=loop, 

399 limit=self._limit, 

400 ) 

401 payload_parser = HttpPayloadParser( 

402 payload, 

403 length=length, 

404 chunked=msg.chunked, 

405 method=method, 

406 compression=msg.compression, 

407 code=self.code, 

408 response_with_body=self.response_with_body, 

409 auto_decompress=self._auto_decompress, 

410 lax=self.lax, 

411 headers_parser=self._headers_parser, 

412 max_line_size=self.max_line_size, 

413 max_field_size=self.max_field_size, 

414 max_trailers=max_trailers, 

415 ) 

416 if not payload_parser.done: 

417 self._payload_parser = payload_parser 

418 elif method == METH_CONNECT: 

419 assert isinstance(msg, RawRequestMessage) 

420 payload = StreamReader( 

421 self.protocol, 

422 timer=self.timer, 

423 loop=loop, 

424 limit=self._limit, 

425 ) 

426 self._upgraded = True 

427 self._payload_parser = HttpPayloadParser( 

428 payload, 

429 method=msg.method, 

430 compression=msg.compression, 

431 auto_decompress=self._auto_decompress, 

432 lax=self.lax, 

433 headers_parser=self._headers_parser, 

434 max_line_size=self.max_line_size, 

435 max_field_size=self.max_field_size, 

436 max_trailers=max_trailers, 

437 ) 

438 elif not empty_body and length is None and self.read_until_eof: 

439 payload = StreamReader( 

440 self.protocol, 

441 timer=self.timer, 

442 loop=loop, 

443 limit=self._limit, 

444 ) 

445 payload_parser = HttpPayloadParser( 

446 payload, 

447 length=length, 

448 chunked=msg.chunked, 

449 method=method, 

450 compression=msg.compression, 

451 code=self.code, 

452 response_with_body=self.response_with_body, 

453 auto_decompress=self._auto_decompress, 

454 lax=self.lax, 

455 headers_parser=self._headers_parser, 

456 max_line_size=self.max_line_size, 

457 max_field_size=self.max_field_size, 

458 max_trailers=max_trailers, 

459 ) 

460 if not payload_parser.done: 

461 self._payload_parser = payload_parser 

462 else: 

463 payload = EMPTY_PAYLOAD 

464 

465 messages.append((msg, payload)) 

466 should_close = msg.should_close 

467 else: 

468 self._tail = data[start_pos:] 

469 if len(self._tail) > self.max_line_size: 

470 raise LineTooLong(self._tail[:100] + b"...", self.max_line_size) 

471 data = EMPTY 

472 break 

473 

474 # no parser, just store 

475 elif self._payload_parser is None and self._upgraded: 

476 assert not self._lines 

477 break 

478 

479 # feed payload 

480 elif data and start_pos < data_len: 

481 assert not self._lines 

482 assert self._payload_parser is not None 

483 try: 

484 eof, data = self._payload_parser.feed_data(data[start_pos:], SEP) 

485 except BaseException as underlying_exc: 

486 reraised_exc = underlying_exc 

487 if self.payload_exception is not None: 

488 reraised_exc = self.payload_exception(str(underlying_exc)) 

489 

490 set_exception( 

491 self._payload_parser.payload, 

492 reraised_exc, 

493 underlying_exc, 

494 ) 

495 

496 eof = True 

497 data = b"" 

498 if isinstance( 

499 underlying_exc, (InvalidHeader, TransferEncodingError) 

500 ): 

501 raise 

502 

503 if eof: 

504 start_pos = 0 

505 data_len = len(data) 

506 self._payload_parser = None 

507 continue 

508 else: 

509 break 

510 

511 if data and start_pos < data_len: 

512 data = data[start_pos:] 

513 else: 

514 data = EMPTY 

515 

516 return messages, self._upgraded, data 

517 

518 def parse_headers( 

519 self, lines: List[bytes] 

520 ) -> Tuple[ 

521 "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool 

522 ]: 

523 """Parses RFC 5322 headers from a stream. 

524 

525 Line continuations are supported. Returns list of header name 

526 and value pairs. Header name is in upper case. 

527 """ 

528 headers, raw_headers = self._headers_parser.parse_headers(lines) 

529 close_conn = None 

530 encoding = None 

531 upgrade = False 

532 chunked = False 

533 

534 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6 

535 # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf 

536 singletons = ( 

537 hdrs.CONTENT_LENGTH, 

538 hdrs.CONTENT_LOCATION, 

539 hdrs.CONTENT_RANGE, 

540 hdrs.CONTENT_TYPE, 

541 hdrs.ETAG, 

542 hdrs.HOST, 

543 hdrs.MAX_FORWARDS, 

544 hdrs.SERVER, 

545 hdrs.TRANSFER_ENCODING, 

546 hdrs.USER_AGENT, 

547 ) 

548 bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None) 

549 if bad_hdr is not None: 

550 raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.") 

551 

552 # keep-alive and protocol switching 

553 # RFC 9110 section 7.6.1 defines Connection as a comma-separated list. 

554 conn_values = headers.getall(hdrs.CONNECTION, ()) 

555 if conn_values: 

556 conn_tokens = { 

557 token.lower() 

558 for conn_value in conn_values 

559 for token in (part.strip(" \t") for part in conn_value.split(",")) 

560 if token and token.isascii() 

561 } 

562 

563 if "close" in conn_tokens: 

564 close_conn = True 

565 elif "keep-alive" in conn_tokens: 

566 close_conn = False 

567 

568 # https://www.rfc-editor.org/rfc/rfc9110.html#name-101-switching-protocols 

569 if "upgrade" in conn_tokens and headers.get(hdrs.UPGRADE): 

570 upgrade = True 

571 

572 # encoding 

573 enc = headers.get(hdrs.CONTENT_ENCODING, "") 

574 if enc.isascii() and enc.lower() in {"gzip", "deflate", "br", "zstd"}: 

575 encoding = enc 

576 

577 # chunking 

578 te = headers.get(hdrs.TRANSFER_ENCODING) 

579 if te is not None: 

580 if self._is_chunked_te(te): 

581 chunked = True 

582 

583 if hdrs.CONTENT_LENGTH in headers: 

584 raise BadHttpMessage( 

585 "Transfer-Encoding can't be present with Content-Length", 

586 ) 

587 

588 return (headers, raw_headers, close_conn, encoding, upgrade, chunked) 

589 

590 def set_upgraded(self, val: bool) -> None: 

591 """Set connection upgraded (to websocket) mode. 

592 

593 :param bool val: new state. 

594 """ 

595 self._upgraded = val 

596 

597 

598class HttpRequestParser(HttpParser[RawRequestMessage]): 

599 """Read request status line. 

600 

601 Exception .http_exceptions.BadStatusLine 

602 could be raised in case of any errors in status line. 

603 Returns RawRequestMessage. 

604 """ 

605 

606 def parse_message(self, lines: List[bytes]) -> RawRequestMessage: 

607 # request line 

608 line = lines[0].decode("utf-8", "surrogateescape") 

609 try: 

610 method, path, version = line.split(" ", maxsplit=2) 

611 except ValueError: 

612 raise BadHttpMethod(line) from None 

613 

614 # method 

615 if not TOKENRE.fullmatch(method): 

616 raise BadHttpMethod(method) 

617 

618 # version 

619 match = VERSRE.fullmatch(version) 

620 if match is None: 

621 raise BadStatusLine(line) 

622 version_o = HttpVersion(int(match.group(1)), int(match.group(2))) 

623 

624 if method == "CONNECT": 

625 # authority-form, 

626 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3 

627 url = URL.build(authority=path, encoded=True) 

628 elif path.startswith("/"): 

629 # origin-form, 

630 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1 

631 path_part, _hash_separator, url_fragment = path.partition("#") 

632 path_part, _question_mark_separator, qs_part = path_part.partition("?") 

633 

634 # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based 

635 # NOTE: parser does, otherwise it results into the same 

636 # NOTE: HTTP Request-Line input producing different 

637 # NOTE: `yarl.URL()` objects 

638 url = URL.build( 

639 path=path_part, 

640 query_string=qs_part, 

641 fragment=url_fragment, 

642 encoded=True, 

643 ) 

644 elif path == "*" and method == "OPTIONS": 

645 # asterisk-form, 

646 url = URL(path, encoded=True) 

647 else: 

648 # absolute-form for proxy maybe, 

649 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2 

650 url = URL(path, encoded=True) 

651 if url.scheme == "": 

652 # not absolute-form 

653 raise InvalidURLError( 

654 path.encode(errors="surrogateescape").decode("latin1") 

655 ) 

656 

657 # read headers 

658 ( 

659 headers, 

660 raw_headers, 

661 close, 

662 compression, 

663 upgrade, 

664 chunked, 

665 ) = self.parse_headers(lines[1:]) 

666 

667 if close is None: # then the headers weren't set in the request 

668 if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close 

669 close = True 

670 else: # HTTP 1.1 must ask to close. 

671 close = False 

672 

673 return RawRequestMessage( 

674 method, 

675 path, 

676 version_o, 

677 headers, 

678 raw_headers, 

679 close, 

680 compression, 

681 upgrade, 

682 chunked, 

683 url, 

684 ) 

685 

686 def _is_chunked_te(self, te: str) -> bool: 

687 te = te.rsplit(",", maxsplit=1)[-1].strip(" \t") 

688 # .lower() transforms some non-ascii chars, so must check first. 

689 if te.isascii() and te.lower() == "chunked": 

690 return True 

691 # https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.3 

692 raise BadHttpMessage("Request has invalid `Transfer-Encoding`") 

693 

694 

695class HttpResponseParser(HttpParser[RawResponseMessage]): 

696 """Read response status line and headers. 

697 

698 BadStatusLine could be raised in case of any errors in status line. 

699 Returns RawResponseMessage. 

700 """ 

701 

702 # Lax mode should only be enabled on response parser. 

703 lax = not DEBUG 

704 

705 def feed_data( 

706 self, 

707 data: bytes, 

708 SEP: Optional[_SEP] = None, 

709 *args: Any, 

710 **kwargs: Any, 

711 ) -> Tuple[List[Tuple[RawResponseMessage, StreamReader]], bool, bytes]: 

712 if SEP is None: 

713 SEP = b"\r\n" if DEBUG else b"\n" 

714 return super().feed_data(data, SEP, *args, **kwargs) 

715 

716 def parse_message(self, lines: List[bytes]) -> RawResponseMessage: 

717 line = lines[0].decode("utf-8", "surrogateescape") 

718 try: 

719 version, status = line.split(maxsplit=1) 

720 except ValueError: 

721 raise BadStatusLine(line) from None 

722 

723 try: 

724 status, reason = status.split(maxsplit=1) 

725 except ValueError: 

726 status = status.strip() 

727 reason = "" 

728 

729 # version 

730 match = VERSRE.fullmatch(version) 

731 if match is None: 

732 raise BadStatusLine(line) 

733 version_o = HttpVersion(int(match.group(1)), int(match.group(2))) 

734 

735 # The status code is a three-digit ASCII number, no padding 

736 if len(status) != 3 or not DIGITS.fullmatch(status): 

737 raise BadStatusLine(line) 

738 status_i = int(status) 

739 

740 # read headers 

741 ( 

742 headers, 

743 raw_headers, 

744 close, 

745 compression, 

746 upgrade, 

747 chunked, 

748 ) = self.parse_headers(lines[1:]) 

749 

750 if close is None: 

751 if version_o <= HttpVersion10: 

752 close = True 

753 # https://www.rfc-editor.org/rfc/rfc9112.html#name-message-body-length 

754 elif 100 <= status_i < 200 or status_i in {204, 304}: 

755 close = False 

756 elif hdrs.CONTENT_LENGTH in headers or hdrs.TRANSFER_ENCODING in headers: 

757 close = False 

758 else: 

759 # https://www.rfc-editor.org/rfc/rfc9112.html#section-6.3-2.8 

760 close = True 

761 

762 return RawResponseMessage( 

763 version_o, 

764 status_i, 

765 reason.strip(), 

766 headers, 

767 raw_headers, 

768 close, 

769 compression, 

770 upgrade, 

771 chunked, 

772 ) 

773 

774 def _is_chunked_te(self, te: str) -> bool: 

775 # https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.2 

776 return te.rsplit(",", maxsplit=1)[-1].strip(" \t").lower() == "chunked" 

777 

778 

779class HttpPayloadParser: 

780 def __init__( 

781 self, 

782 payload: StreamReader, 

783 length: Optional[int] = None, 

784 chunked: bool = False, 

785 compression: Optional[str] = None, 

786 code: Optional[int] = None, 

787 method: Optional[str] = None, 

788 response_with_body: bool = True, 

789 auto_decompress: bool = True, 

790 lax: bool = False, 

791 *, 

792 headers_parser: HeadersParser, 

793 max_line_size: int = 8190, 

794 max_field_size: int = 8190, 

795 max_trailers: int = 128, 

796 ) -> None: 

797 self._length = 0 

798 self._type = ParseState.PARSE_UNTIL_EOF 

799 self._chunk = ChunkState.PARSE_CHUNKED_SIZE 

800 self._chunk_size = 0 

801 self._chunk_tail = b"" 

802 self._auto_decompress = auto_decompress 

803 self._lax = lax 

804 self._headers_parser = headers_parser 

805 self._max_line_size = max_line_size 

806 self._max_field_size = max_field_size 

807 self._max_trailers = max_trailers 

808 self._trailer_lines: list[bytes] = [] 

809 self.done = False 

810 

811 # payload decompression wrapper 

812 if response_with_body and compression and self._auto_decompress: 

813 real_payload: Union[StreamReader, DeflateBuffer] = DeflateBuffer( 

814 payload, compression 

815 ) 

816 else: 

817 real_payload = payload 

818 

819 # payload parser 

820 if not response_with_body: 

821 # don't parse payload if it's not expected to be received 

822 self._type = ParseState.PARSE_NONE 

823 real_payload.feed_eof() 

824 self.done = True 

825 elif chunked: 

826 self._type = ParseState.PARSE_CHUNKED 

827 elif length is not None: 

828 self._type = ParseState.PARSE_LENGTH 

829 self._length = length 

830 if self._length == 0: 

831 real_payload.feed_eof() 

832 self.done = True 

833 

834 self.payload = real_payload 

835 

836 def feed_eof(self) -> None: 

837 if self._type == ParseState.PARSE_UNTIL_EOF: 

838 self.payload.feed_eof() 

839 elif self._type == ParseState.PARSE_LENGTH: 

840 raise ContentLengthError( 

841 "Not enough data to satisfy content length header." 

842 ) 

843 elif self._type == ParseState.PARSE_CHUNKED: 

844 raise TransferEncodingError( 

845 "Not enough data to satisfy transfer length header." 

846 ) 

847 

848 def feed_data( 

849 self, chunk: bytes, SEP: _SEP = b"\r\n", CHUNK_EXT: bytes = b";" 

850 ) -> Tuple[bool, bytes]: 

851 # Read specified amount of bytes 

852 if self._type == ParseState.PARSE_LENGTH: 

853 required = self._length 

854 chunk_len = len(chunk) 

855 

856 if required >= chunk_len: 

857 self._length = required - chunk_len 

858 self.payload.feed_data(chunk, chunk_len) 

859 if self._length == 0: 

860 self.payload.feed_eof() 

861 return True, b"" 

862 else: 

863 self._length = 0 

864 self.payload.feed_data(chunk[:required], required) 

865 self.payload.feed_eof() 

866 return True, chunk[required:] 

867 

868 # Chunked transfer encoding parser 

869 elif self._type == ParseState.PARSE_CHUNKED: 

870 if self._chunk_tail: 

871 # We should never have a tail if we're inside the payload body. 

872 assert self._chunk != ChunkState.PARSE_CHUNKED_CHUNK 

873 # We should check the length is sane. 

874 max_line_length = self._max_line_size 

875 if self._chunk == ChunkState.PARSE_TRAILERS: 

876 max_line_length = self._max_field_size 

877 if len(self._chunk_tail) > max_line_length: 

878 raise LineTooLong(self._chunk_tail[:100] + b"...", max_line_length) 

879 

880 chunk = self._chunk_tail + chunk 

881 self._chunk_tail = b"" 

882 

883 while chunk: 

884 

885 # read next chunk size 

886 if self._chunk == ChunkState.PARSE_CHUNKED_SIZE: 

887 pos = chunk.find(SEP) 

888 if pos >= 0: 

889 i = chunk.find(CHUNK_EXT, 0, pos) 

890 if i >= 0: 

891 size_b = chunk[:i] # strip chunk-extensions 

892 # Verify no LF in the chunk-extension 

893 if b"\n" in (ext := chunk[i:pos]): 

894 exc = TransferEncodingError( 

895 f"Unexpected LF in chunk-extension: {ext!r}" 

896 ) 

897 set_exception(self.payload, exc) 

898 raise exc 

899 else: 

900 size_b = chunk[:pos] 

901 

902 if self._lax: # Allow whitespace in lax mode. 

903 size_b = size_b.strip() 

904 

905 if not re.fullmatch(HEXDIGITS, size_b): 

906 exc = TransferEncodingError( 

907 chunk[:pos].decode("ascii", "surrogateescape") 

908 ) 

909 set_exception(self.payload, exc) 

910 raise exc 

911 size = int(bytes(size_b), 16) 

912 

913 chunk = chunk[pos + len(SEP) :] 

914 if size == 0: # eof marker 

915 self._chunk = ChunkState.PARSE_TRAILERS 

916 if self._lax and chunk.startswith(b"\r"): 

917 chunk = chunk[1:] 

918 else: 

919 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK 

920 self._chunk_size = size 

921 self.payload.begin_http_chunk_receiving() 

922 else: 

923 self._chunk_tail = chunk 

924 return False, b"" 

925 

926 # read chunk and feed buffer 

927 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK: 

928 required = self._chunk_size 

929 chunk_len = len(chunk) 

930 

931 if required > chunk_len: 

932 self._chunk_size = required - chunk_len 

933 self.payload.feed_data(chunk, chunk_len) 

934 return False, b"" 

935 else: 

936 self._chunk_size = 0 

937 self.payload.feed_data(chunk[:required], required) 

938 chunk = chunk[required:] 

939 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF 

940 self.payload.end_http_chunk_receiving() 

941 

942 # toss the CRLF at the end of the chunk 

943 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF: 

944 if self._lax and chunk.startswith(b"\r"): 

945 chunk = chunk[1:] 

946 if chunk[: len(SEP)] == SEP: 

947 chunk = chunk[len(SEP) :] 

948 self._chunk = ChunkState.PARSE_CHUNKED_SIZE 

949 elif len(chunk) >= len(SEP) or chunk != SEP[: len(chunk)]: 

950 exc = TransferEncodingError( 

951 "Chunk size mismatch: expected CRLF after chunk data" 

952 ) 

953 set_exception(self.payload, exc) 

954 raise exc 

955 else: 

956 self._chunk_tail = chunk 

957 return False, b"" 

958 

959 if self._chunk == ChunkState.PARSE_TRAILERS: 

960 pos = chunk.find(SEP) 

961 if pos < 0: # No line found 

962 self._chunk_tail = chunk 

963 return False, b"" 

964 

965 line = chunk[:pos] 

966 chunk = chunk[pos + len(SEP) :] 

967 if SEP == b"\n": # For lax response parsing 

968 line = line.rstrip(b"\r") 

969 

970 if len(line) > self._max_field_size: 

971 raise LineTooLong(line[:100] + b"...", self._max_field_size) 

972 

973 self._trailer_lines.append(line) 

974 

975 if len(self._trailer_lines) > self._max_trailers: 

976 raise BadHttpMessage("Too many trailers received") 

977 

978 # \r\n\r\n found, end of stream 

979 if self._trailer_lines[-1] == b"": 

980 # Headers and trailers are defined the same way, 

981 # so we reuse the HeadersParser here. 

982 try: 

983 trailers, raw_trailers = self._headers_parser.parse_headers( 

984 self._trailer_lines 

985 ) 

986 finally: 

987 self._trailer_lines.clear() 

988 self.payload.feed_eof() 

989 return True, chunk 

990 

991 # Read all bytes until eof 

992 elif self._type == ParseState.PARSE_UNTIL_EOF: 

993 self.payload.feed_data(chunk, len(chunk)) 

994 

995 return False, b"" 

996 

997 

998class DeflateBuffer: 

999 """DeflateStream decompress stream and feed data into specified stream.""" 

1000 

1001 decompressor: Any 

1002 

1003 def __init__( 

1004 self, 

1005 out: StreamReader, 

1006 encoding: Optional[str], 

1007 max_decompress_size: int = DEFAULT_MAX_DECOMPRESS_SIZE, 

1008 ) -> None: 

1009 self.out = out 

1010 self.size = 0 

1011 out.total_compressed_bytes = self.size 

1012 self.encoding = encoding 

1013 self._started_decoding = False 

1014 

1015 self.decompressor: Union[BrotliDecompressor, ZLibDecompressor, ZSTDDecompressor] 

1016 if encoding == "br": 

1017 if not HAS_BROTLI: # pragma: no cover 

1018 raise ContentEncodingError( 

1019 "Can not decode content-encoding: brotli (br). " 

1020 "Please install `Brotli`" 

1021 ) 

1022 self.decompressor = BrotliDecompressor() 

1023 elif encoding == "zstd": 

1024 if not HAS_ZSTD: 

1025 raise ContentEncodingError( 

1026 "Can not decode content-encoding: zstandard (zstd). " 

1027 "Please install `backports.zstd`" 

1028 ) 

1029 self.decompressor = ZSTDDecompressor() 

1030 else: 

1031 self.decompressor = ZLibDecompressor(encoding=encoding) 

1032 

1033 self._max_decompress_size = max_decompress_size 

1034 

1035 def set_exception( 

1036 self, 

1037 exc: BaseException, 

1038 exc_cause: BaseException = _EXC_SENTINEL, 

1039 ) -> None: 

1040 set_exception(self.out, exc, exc_cause) 

1041 

1042 def feed_data(self, chunk: bytes, size: int) -> None: 

1043 if not size: 

1044 return 

1045 

1046 self.size += size 

1047 self.out.total_compressed_bytes = self.size 

1048 

1049 # RFC1950 

1050 # bits 0..3 = CM = 0b1000 = 8 = "deflate" 

1051 # bits 4..7 = CINFO = 1..7 = windows size. 

1052 if ( 

1053 not self._started_decoding 

1054 and self.encoding == "deflate" 

1055 and chunk[0] & 0xF != 8 

1056 ): 

1057 # Change the decoder to decompress incorrectly compressed data 

1058 # Actually we should issue a warning about non-RFC-compliant data. 

1059 self.decompressor = ZLibDecompressor( 

1060 encoding=self.encoding, suppress_deflate_header=True 

1061 ) 

1062 

1063 try: 

1064 # Decompress with limit + 1 so we can detect if output exceeds limit 

1065 chunk = self.decompressor.decompress_sync( 

1066 chunk, max_length=self._max_decompress_size + 1 

1067 ) 

1068 except Exception: 

1069 raise ContentEncodingError( 

1070 "Can not decode content-encoding: %s" % self.encoding 

1071 ) 

1072 

1073 self._started_decoding = True 

1074 

1075 # Check if decompression limit was exceeded 

1076 if len(chunk) > self._max_decompress_size: 

1077 raise DecompressSizeError( 

1078 "Decompressed data exceeds the configured limit of %d bytes" 

1079 % self._max_decompress_size 

1080 ) 

1081 

1082 if chunk: 

1083 self.out.feed_data(chunk, len(chunk)) 

1084 

1085 def feed_eof(self) -> None: 

1086 chunk = self.decompressor.flush() 

1087 

1088 if chunk or self.size > 0: 

1089 self.out.feed_data(chunk, len(chunk)) 

1090 if self.encoding == "deflate" and not self.decompressor.eof: 

1091 raise ContentEncodingError("deflate") 

1092 

1093 self.out.feed_eof() 

1094 

1095 def begin_http_chunk_receiving(self) -> None: 

1096 self.out.begin_http_chunk_receiving() 

1097 

1098 def end_http_chunk_receiving(self) -> None: 

1099 self.out.end_http_chunk_receiving() 

1100 

1101 

1102HttpRequestParserPy = HttpRequestParser 

1103HttpResponseParserPy = HttpResponseParser 

1104RawRequestMessagePy = RawRequestMessage 

1105RawResponseMessagePy = RawResponseMessage 

1106 

1107try: 

1108 if not NO_EXTENSIONS: 

1109 from ._http_parser import ( # type: ignore[import-not-found,no-redef] 

1110 HttpRequestParser, 

1111 HttpResponseParser, 

1112 RawRequestMessage, 

1113 RawResponseMessage, 

1114 ) 

1115 

1116 HttpRequestParserC = HttpRequestParser 

1117 HttpResponseParserC = HttpResponseParser 

1118 RawRequestMessageC = RawRequestMessage 

1119 RawResponseMessageC = RawResponseMessage 

1120except ImportError: # pragma: no cover 

1121 pass