Coverage for /pythoncovmergedfiles/medio/medio/src/aiohttp/aiohttp/http_parser.py: 59%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

506 statements  

1import abc 

2import asyncio 

3import re 

4import string 

5from contextlib import suppress 

6from enum import IntEnum 

7from typing import ( 

8 Any, 

9 ClassVar, 

10 Final, 

11 Generic, 

12 List, 

13 Literal, 

14 NamedTuple, 

15 Optional, 

16 Pattern, 

17 Set, 

18 Tuple, 

19 Type, 

20 TypeVar, 

21 Union, 

22) 

23 

24from multidict import CIMultiDict, CIMultiDictProxy, istr 

25from yarl import URL 

26 

27from . import hdrs 

28from .base_protocol import BaseProtocol 

29from .compression_utils import HAS_BROTLI, BrotliDecompressor, ZLibDecompressor 

30from .helpers import ( 

31 _EXC_SENTINEL, 

32 DEBUG, 

33 EMPTY_BODY_METHODS, 

34 EMPTY_BODY_STATUS_CODES, 

35 NO_EXTENSIONS, 

36 BaseTimerContext, 

37 set_exception, 

38) 

39from .http_exceptions import ( 

40 BadHttpMessage, 

41 BadHttpMethod, 

42 BadStatusLine, 

43 ContentEncodingError, 

44 ContentLengthError, 

45 InvalidHeader, 

46 InvalidURLError, 

47 LineTooLong, 

48 TransferEncodingError, 

49) 

50from .http_writer import HttpVersion, HttpVersion10 

51from .streams import EMPTY_PAYLOAD, StreamReader 

52from .typedefs import RawHeaders 

53 

54__all__ = ( 

55 "HeadersParser", 

56 "HttpParser", 

57 "HttpRequestParser", 

58 "HttpResponseParser", 

59 "RawRequestMessage", 

60 "RawResponseMessage", 

61) 

62 

63_SEP = Literal[b"\r\n", b"\n"] 

64 

65ASCIISET: Final[Set[str]] = set(string.printable) 

66 

67# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview 

68# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens 

69# 

70# method = token 

71# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / 

72# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA 

73# token = 1*tchar 

74_TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`|~") 

75TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+") 

76VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII) 

77DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII) 

78HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+") 

79 

80 

81class RawRequestMessage(NamedTuple): 

82 method: str 

83 path: str 

84 version: HttpVersion 

85 headers: CIMultiDictProxy[str] 

86 raw_headers: RawHeaders 

87 should_close: bool 

88 compression: Optional[str] 

89 upgrade: bool 

90 chunked: bool 

91 url: URL 

92 

93 

94class RawResponseMessage(NamedTuple): 

95 version: HttpVersion 

96 code: int 

97 reason: str 

98 headers: CIMultiDictProxy[str] 

99 raw_headers: RawHeaders 

100 should_close: bool 

101 compression: Optional[str] 

102 upgrade: bool 

103 chunked: bool 

104 

105 

106_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage) 

107 

108 

109class ParseState(IntEnum): 

110 PARSE_NONE = 0 

111 PARSE_LENGTH = 1 

112 PARSE_CHUNKED = 2 

113 PARSE_UNTIL_EOF = 3 

114 

115 

116class ChunkState(IntEnum): 

117 PARSE_CHUNKED_SIZE = 0 

118 PARSE_CHUNKED_CHUNK = 1 

119 PARSE_CHUNKED_CHUNK_EOF = 2 

120 PARSE_MAYBE_TRAILERS = 3 

121 PARSE_TRAILERS = 4 

122 

123 

124class HeadersParser: 

125 def __init__( 

126 self, max_line_size: int = 8190, max_field_size: int = 8190, lax: bool = False 

127 ) -> None: 

128 self.max_line_size = max_line_size 

129 self.max_field_size = max_field_size 

130 self._lax = lax 

131 

132 def parse_headers( 

133 self, lines: List[bytes] 

134 ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]: 

135 headers: CIMultiDict[str] = CIMultiDict() 

136 # note: "raw" does not mean inclusion of OWS before/after the field value 

137 raw_headers = [] 

138 

139 lines_idx = 1 

140 line = lines[1] 

141 line_count = len(lines) 

142 

143 while line: 

144 # Parse initial header name : value pair. 

145 try: 

146 bname, bvalue = line.split(b":", 1) 

147 except ValueError: 

148 raise InvalidHeader(line) from None 

149 

150 if len(bname) == 0: 

151 raise InvalidHeader(bname) 

152 

153 # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2 

154 if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"} 

155 raise InvalidHeader(line) 

156 

157 bvalue = bvalue.lstrip(b" \t") 

158 if len(bname) > self.max_field_size: 

159 raise LineTooLong( 

160 "request header name {}".format( 

161 bname.decode("utf8", "backslashreplace") 

162 ), 

163 str(self.max_field_size), 

164 str(len(bname)), 

165 ) 

166 name = bname.decode("utf-8", "surrogateescape") 

167 if not TOKENRE.fullmatch(name): 

168 raise InvalidHeader(bname) 

169 

170 header_length = len(bvalue) 

171 

172 # next line 

173 lines_idx += 1 

174 line = lines[lines_idx] 

175 

176 # consume continuation lines 

177 continuation = self._lax and line and line[0] in (32, 9) # (' ', '\t') 

178 

179 # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding 

180 if continuation: 

181 bvalue_lst = [bvalue] 

182 while continuation: 

183 header_length += len(line) 

184 if header_length > self.max_field_size: 

185 raise LineTooLong( 

186 "request header field {}".format( 

187 bname.decode("utf8", "backslashreplace") 

188 ), 

189 str(self.max_field_size), 

190 str(header_length), 

191 ) 

192 bvalue_lst.append(line) 

193 

194 # next line 

195 lines_idx += 1 

196 if lines_idx < line_count: 

197 line = lines[lines_idx] 

198 if line: 

199 continuation = line[0] in (32, 9) # (' ', '\t') 

200 else: 

201 line = b"" 

202 break 

203 bvalue = b"".join(bvalue_lst) 

204 else: 

205 if header_length > self.max_field_size: 

206 raise LineTooLong( 

207 "request header field {}".format( 

208 bname.decode("utf8", "backslashreplace") 

209 ), 

210 str(self.max_field_size), 

211 str(header_length), 

212 ) 

213 

214 bvalue = bvalue.strip(b" \t") 

215 value = bvalue.decode("utf-8", "surrogateescape") 

216 

217 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5 

218 if "\n" in value or "\r" in value or "\x00" in value: 

219 raise InvalidHeader(bvalue) 

220 

221 headers.add(name, value) 

222 raw_headers.append((bname, bvalue)) 

223 

224 return (CIMultiDictProxy(headers), tuple(raw_headers)) 

225 

226 

227def _is_supported_upgrade(headers: CIMultiDictProxy[str]) -> bool: 

228 """Check if the upgrade header is supported.""" 

229 return headers.get(hdrs.UPGRADE, "").lower() in {"tcp", "websocket"} 

230 

231 

232class HttpParser(abc.ABC, Generic[_MsgT]): 

233 lax: ClassVar[bool] = False 

234 

235 def __init__( 

236 self, 

237 protocol: BaseProtocol, 

238 loop: asyncio.AbstractEventLoop, 

239 limit: int, 

240 max_line_size: int = 8190, 

241 max_field_size: int = 8190, 

242 timer: Optional[BaseTimerContext] = None, 

243 code: Optional[int] = None, 

244 method: Optional[str] = None, 

245 payload_exception: Optional[Type[BaseException]] = None, 

246 response_with_body: bool = True, 

247 read_until_eof: bool = False, 

248 auto_decompress: bool = True, 

249 ) -> None: 

250 self.protocol = protocol 

251 self.loop = loop 

252 self.max_line_size = max_line_size 

253 self.max_field_size = max_field_size 

254 self.timer = timer 

255 self.code = code 

256 self.method = method 

257 self.payload_exception = payload_exception 

258 self.response_with_body = response_with_body 

259 self.read_until_eof = read_until_eof 

260 

261 self._lines: List[bytes] = [] 

262 self._tail = b"" 

263 self._upgraded = False 

264 self._payload = None 

265 self._payload_parser: Optional[HttpPayloadParser] = None 

266 self._auto_decompress = auto_decompress 

267 self._limit = limit 

268 self._headers_parser = HeadersParser(max_line_size, max_field_size, self.lax) 

269 

270 @abc.abstractmethod 

271 def parse_message(self, lines: List[bytes]) -> _MsgT: ... 

272 

273 @abc.abstractmethod 

274 def _is_chunked_te(self, te: str) -> bool: ... 

275 

276 def feed_eof(self) -> Optional[_MsgT]: 

277 if self._payload_parser is not None: 

278 self._payload_parser.feed_eof() 

279 self._payload_parser = None 

280 else: 

281 # try to extract partial message 

282 if self._tail: 

283 self._lines.append(self._tail) 

284 

285 if self._lines: 

286 if self._lines[-1] != "\r\n": 

287 self._lines.append(b"") 

288 with suppress(Exception): 

289 return self.parse_message(self._lines) 

290 return None 

291 

292 def feed_data( 

293 self, 

294 data: bytes, 

295 SEP: _SEP = b"\r\n", 

296 EMPTY: bytes = b"", 

297 CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH, 

298 METH_CONNECT: str = hdrs.METH_CONNECT, 

299 SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1, 

300 ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]: 

301 messages = [] 

302 

303 if self._tail: 

304 data, self._tail = self._tail + data, b"" 

305 

306 data_len = len(data) 

307 start_pos = 0 

308 loop = self.loop 

309 

310 should_close = False 

311 while start_pos < data_len: 

312 # read HTTP message (request/response line + headers), \r\n\r\n 

313 # and split by lines 

314 if self._payload_parser is None and not self._upgraded: 

315 pos = data.find(SEP, start_pos) 

316 # consume \r\n 

317 if pos == start_pos and not self._lines: 

318 start_pos = pos + len(SEP) 

319 continue 

320 

321 if pos >= start_pos: 

322 if should_close: 

323 raise BadHttpMessage("Data after `Connection: close`") 

324 

325 # line found 

326 line = data[start_pos:pos] 

327 if SEP == b"\n": # For lax response parsing 

328 line = line.rstrip(b"\r") 

329 self._lines.append(line) 

330 start_pos = pos + len(SEP) 

331 

332 # \r\n\r\n found 

333 if self._lines[-1] == EMPTY: 

334 try: 

335 msg: _MsgT = self.parse_message(self._lines) 

336 finally: 

337 self._lines.clear() 

338 

339 def get_content_length() -> Optional[int]: 

340 # payload length 

341 length_hdr = msg.headers.get(CONTENT_LENGTH) 

342 if length_hdr is None: 

343 return None 

344 

345 # Shouldn't allow +/- or other number formats. 

346 # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2 

347 # msg.headers is already stripped of leading/trailing wsp 

348 if not DIGITS.fullmatch(length_hdr): 

349 raise InvalidHeader(CONTENT_LENGTH) 

350 

351 return int(length_hdr) 

352 

353 length = get_content_length() 

354 # do not support old websocket spec 

355 if SEC_WEBSOCKET_KEY1 in msg.headers: 

356 raise InvalidHeader(SEC_WEBSOCKET_KEY1) 

357 

358 self._upgraded = msg.upgrade and _is_supported_upgrade( 

359 msg.headers 

360 ) 

361 

362 method = getattr(msg, "method", self.method) 

363 # code is only present on responses 

364 code = getattr(msg, "code", 0) 

365 

366 assert self.protocol is not None 

367 # calculate payload 

368 empty_body = code in EMPTY_BODY_STATUS_CODES or bool( 

369 method and method in EMPTY_BODY_METHODS 

370 ) 

371 if not empty_body and ( 

372 ((length is not None and length > 0) or msg.chunked) 

373 and not self._upgraded 

374 ): 

375 payload = StreamReader( 

376 self.protocol, 

377 timer=self.timer, 

378 loop=loop, 

379 limit=self._limit, 

380 ) 

381 payload_parser = HttpPayloadParser( 

382 payload, 

383 length=length, 

384 chunked=msg.chunked, 

385 method=method, 

386 compression=msg.compression, 

387 code=self.code, 

388 response_with_body=self.response_with_body, 

389 auto_decompress=self._auto_decompress, 

390 lax=self.lax, 

391 ) 

392 if not payload_parser.done: 

393 self._payload_parser = payload_parser 

394 elif method == METH_CONNECT: 

395 assert isinstance(msg, RawRequestMessage) 

396 payload = StreamReader( 

397 self.protocol, 

398 timer=self.timer, 

399 loop=loop, 

400 limit=self._limit, 

401 ) 

402 self._upgraded = True 

403 self._payload_parser = HttpPayloadParser( 

404 payload, 

405 method=msg.method, 

406 compression=msg.compression, 

407 auto_decompress=self._auto_decompress, 

408 lax=self.lax, 

409 ) 

410 elif not empty_body and length is None and self.read_until_eof: 

411 payload = StreamReader( 

412 self.protocol, 

413 timer=self.timer, 

414 loop=loop, 

415 limit=self._limit, 

416 ) 

417 payload_parser = HttpPayloadParser( 

418 payload, 

419 length=length, 

420 chunked=msg.chunked, 

421 method=method, 

422 compression=msg.compression, 

423 code=self.code, 

424 response_with_body=self.response_with_body, 

425 auto_decompress=self._auto_decompress, 

426 lax=self.lax, 

427 ) 

428 if not payload_parser.done: 

429 self._payload_parser = payload_parser 

430 else: 

431 payload = EMPTY_PAYLOAD 

432 

433 messages.append((msg, payload)) 

434 should_close = msg.should_close 

435 else: 

436 self._tail = data[start_pos:] 

437 data = EMPTY 

438 break 

439 

440 # no parser, just store 

441 elif self._payload_parser is None and self._upgraded: 

442 assert not self._lines 

443 break 

444 

445 # feed payload 

446 elif data and start_pos < data_len: 

447 assert not self._lines 

448 assert self._payload_parser is not None 

449 try: 

450 eof, data = self._payload_parser.feed_data(data[start_pos:], SEP) 

451 except BaseException as underlying_exc: 

452 reraised_exc = underlying_exc 

453 if self.payload_exception is not None: 

454 reraised_exc = self.payload_exception(str(underlying_exc)) 

455 

456 set_exception( 

457 self._payload_parser.payload, 

458 reraised_exc, 

459 underlying_exc, 

460 ) 

461 

462 eof = True 

463 data = b"" 

464 

465 if eof: 

466 start_pos = 0 

467 data_len = len(data) 

468 self._payload_parser = None 

469 continue 

470 else: 

471 break 

472 

473 if data and start_pos < data_len: 

474 data = data[start_pos:] 

475 else: 

476 data = EMPTY 

477 

478 return messages, self._upgraded, data 

479 

480 def parse_headers( 

481 self, lines: List[bytes] 

482 ) -> Tuple[ 

483 "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool 

484 ]: 

485 """Parses RFC 5322 headers from a stream. 

486 

487 Line continuations are supported. Returns list of header name 

488 and value pairs. Header name is in upper case. 

489 """ 

490 headers, raw_headers = self._headers_parser.parse_headers(lines) 

491 close_conn = None 

492 encoding = None 

493 upgrade = False 

494 chunked = False 

495 

496 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6 

497 # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf 

498 singletons = ( 

499 hdrs.CONTENT_LENGTH, 

500 hdrs.CONTENT_LOCATION, 

501 hdrs.CONTENT_RANGE, 

502 hdrs.CONTENT_TYPE, 

503 hdrs.ETAG, 

504 hdrs.HOST, 

505 hdrs.MAX_FORWARDS, 

506 hdrs.SERVER, 

507 hdrs.TRANSFER_ENCODING, 

508 hdrs.USER_AGENT, 

509 ) 

510 bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None) 

511 if bad_hdr is not None: 

512 raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.") 

513 

514 # keep-alive 

515 conn = headers.get(hdrs.CONNECTION) 

516 if conn: 

517 v = conn.lower() 

518 if v == "close": 

519 close_conn = True 

520 elif v == "keep-alive": 

521 close_conn = False 

522 # https://www.rfc-editor.org/rfc/rfc9110.html#name-101-switching-protocols 

523 elif v == "upgrade" and headers.get(hdrs.UPGRADE): 

524 upgrade = True 

525 

526 # encoding 

527 enc = headers.get(hdrs.CONTENT_ENCODING) 

528 if enc: 

529 enc = enc.lower() 

530 if enc in ("gzip", "deflate", "br"): 

531 encoding = enc 

532 

533 # chunking 

534 te = headers.get(hdrs.TRANSFER_ENCODING) 

535 if te is not None: 

536 if self._is_chunked_te(te): 

537 chunked = True 

538 

539 if hdrs.CONTENT_LENGTH in headers: 

540 raise BadHttpMessage( 

541 "Transfer-Encoding can't be present with Content-Length", 

542 ) 

543 

544 return (headers, raw_headers, close_conn, encoding, upgrade, chunked) 

545 

546 def set_upgraded(self, val: bool) -> None: 

547 """Set connection upgraded (to websocket) mode. 

548 

549 :param bool val: new state. 

550 """ 

551 self._upgraded = val 

552 

553 

554class HttpRequestParser(HttpParser[RawRequestMessage]): 

555 """Read request status line. 

556 

557 Exception .http_exceptions.BadStatusLine 

558 could be raised in case of any errors in status line. 

559 Returns RawRequestMessage. 

560 """ 

561 

562 def parse_message(self, lines: List[bytes]) -> RawRequestMessage: 

563 # request line 

564 line = lines[0].decode("utf-8", "surrogateescape") 

565 try: 

566 method, path, version = line.split(" ", maxsplit=2) 

567 except ValueError: 

568 raise BadHttpMethod(line) from None 

569 

570 if len(path) > self.max_line_size: 

571 raise LineTooLong( 

572 "Status line is too long", str(self.max_line_size), str(len(path)) 

573 ) 

574 

575 # method 

576 if not TOKENRE.fullmatch(method): 

577 raise BadHttpMethod(method) 

578 

579 # version 

580 match = VERSRE.fullmatch(version) 

581 if match is None: 

582 raise BadStatusLine(line) 

583 version_o = HttpVersion(int(match.group(1)), int(match.group(2))) 

584 

585 if method == "CONNECT": 

586 # authority-form, 

587 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3 

588 url = URL.build(authority=path, encoded=True) 

589 elif path.startswith("/"): 

590 # origin-form, 

591 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1 

592 path_part, _hash_separator, url_fragment = path.partition("#") 

593 path_part, _question_mark_separator, qs_part = path_part.partition("?") 

594 

595 # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based 

596 # NOTE: parser does, otherwise it results into the same 

597 # NOTE: HTTP Request-Line input producing different 

598 # NOTE: `yarl.URL()` objects 

599 url = URL.build( 

600 path=path_part, 

601 query_string=qs_part, 

602 fragment=url_fragment, 

603 encoded=True, 

604 ) 

605 elif path == "*" and method == "OPTIONS": 

606 # asterisk-form, 

607 url = URL(path, encoded=True) 

608 else: 

609 # absolute-form for proxy maybe, 

610 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2 

611 url = URL(path, encoded=True) 

612 if url.scheme == "": 

613 # not absolute-form 

614 raise InvalidURLError( 

615 path.encode(errors="surrogateescape").decode("latin1") 

616 ) 

617 

618 # read headers 

619 ( 

620 headers, 

621 raw_headers, 

622 close, 

623 compression, 

624 upgrade, 

625 chunked, 

626 ) = self.parse_headers(lines) 

627 

628 if close is None: # then the headers weren't set in the request 

629 if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close 

630 close = True 

631 else: # HTTP 1.1 must ask to close. 

632 close = False 

633 

634 return RawRequestMessage( 

635 method, 

636 path, 

637 version_o, 

638 headers, 

639 raw_headers, 

640 close, 

641 compression, 

642 upgrade, 

643 chunked, 

644 url, 

645 ) 

646 

647 def _is_chunked_te(self, te: str) -> bool: 

648 if te.rsplit(",", maxsplit=1)[-1].strip(" \t").lower() == "chunked": 

649 return True 

650 # https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.3 

651 raise BadHttpMessage("Request has invalid `Transfer-Encoding`") 

652 

653 

654class HttpResponseParser(HttpParser[RawResponseMessage]): 

655 """Read response status line and headers. 

656 

657 BadStatusLine could be raised in case of any errors in status line. 

658 Returns RawResponseMessage. 

659 """ 

660 

661 # Lax mode should only be enabled on response parser. 

662 lax = not DEBUG 

663 

664 def feed_data( 

665 self, 

666 data: bytes, 

667 SEP: Optional[_SEP] = None, 

668 *args: Any, 

669 **kwargs: Any, 

670 ) -> Tuple[List[Tuple[RawResponseMessage, StreamReader]], bool, bytes]: 

671 if SEP is None: 

672 SEP = b"\r\n" if DEBUG else b"\n" 

673 return super().feed_data(data, SEP, *args, **kwargs) 

674 

675 def parse_message(self, lines: List[bytes]) -> RawResponseMessage: 

676 line = lines[0].decode("utf-8", "surrogateescape") 

677 try: 

678 version, status = line.split(maxsplit=1) 

679 except ValueError: 

680 raise BadStatusLine(line) from None 

681 

682 try: 

683 status, reason = status.split(maxsplit=1) 

684 except ValueError: 

685 status = status.strip() 

686 reason = "" 

687 

688 if len(reason) > self.max_line_size: 

689 raise LineTooLong( 

690 "Status line is too long", str(self.max_line_size), str(len(reason)) 

691 ) 

692 

693 # version 

694 match = VERSRE.fullmatch(version) 

695 if match is None: 

696 raise BadStatusLine(line) 

697 version_o = HttpVersion(int(match.group(1)), int(match.group(2))) 

698 

699 # The status code is a three-digit ASCII number, no padding 

700 if len(status) != 3 or not DIGITS.fullmatch(status): 

701 raise BadStatusLine(line) 

702 status_i = int(status) 

703 

704 # read headers 

705 ( 

706 headers, 

707 raw_headers, 

708 close, 

709 compression, 

710 upgrade, 

711 chunked, 

712 ) = self.parse_headers(lines) 

713 

714 if close is None: 

715 if version_o <= HttpVersion10: 

716 close = True 

717 # https://www.rfc-editor.org/rfc/rfc9112.html#name-message-body-length 

718 elif 100 <= status_i < 200 or status_i in {204, 304}: 

719 close = False 

720 elif hdrs.CONTENT_LENGTH in headers or hdrs.TRANSFER_ENCODING in headers: 

721 close = False 

722 else: 

723 # https://www.rfc-editor.org/rfc/rfc9112.html#section-6.3-2.8 

724 close = True 

725 

726 return RawResponseMessage( 

727 version_o, 

728 status_i, 

729 reason.strip(), 

730 headers, 

731 raw_headers, 

732 close, 

733 compression, 

734 upgrade, 

735 chunked, 

736 ) 

737 

738 def _is_chunked_te(self, te: str) -> bool: 

739 # https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.2 

740 return te.rsplit(",", maxsplit=1)[-1].strip(" \t").lower() == "chunked" 

741 

742 

743class HttpPayloadParser: 

744 def __init__( 

745 self, 

746 payload: StreamReader, 

747 length: Optional[int] = None, 

748 chunked: bool = False, 

749 compression: Optional[str] = None, 

750 code: Optional[int] = None, 

751 method: Optional[str] = None, 

752 response_with_body: bool = True, 

753 auto_decompress: bool = True, 

754 lax: bool = False, 

755 ) -> None: 

756 self._length = 0 

757 self._type = ParseState.PARSE_UNTIL_EOF 

758 self._chunk = ChunkState.PARSE_CHUNKED_SIZE 

759 self._chunk_size = 0 

760 self._chunk_tail = b"" 

761 self._auto_decompress = auto_decompress 

762 self._lax = lax 

763 self.done = False 

764 

765 # payload decompression wrapper 

766 if response_with_body and compression and self._auto_decompress: 

767 real_payload: Union[StreamReader, DeflateBuffer] = DeflateBuffer( 

768 payload, compression 

769 ) 

770 else: 

771 real_payload = payload 

772 

773 # payload parser 

774 if not response_with_body: 

775 # don't parse payload if it's not expected to be received 

776 self._type = ParseState.PARSE_NONE 

777 real_payload.feed_eof() 

778 self.done = True 

779 elif chunked: 

780 self._type = ParseState.PARSE_CHUNKED 

781 elif length is not None: 

782 self._type = ParseState.PARSE_LENGTH 

783 self._length = length 

784 if self._length == 0: 

785 real_payload.feed_eof() 

786 self.done = True 

787 

788 self.payload = real_payload 

789 

790 def feed_eof(self) -> None: 

791 if self._type == ParseState.PARSE_UNTIL_EOF: 

792 self.payload.feed_eof() 

793 elif self._type == ParseState.PARSE_LENGTH: 

794 raise ContentLengthError( 

795 "Not enough data to satisfy content length header." 

796 ) 

797 elif self._type == ParseState.PARSE_CHUNKED: 

798 raise TransferEncodingError( 

799 "Not enough data to satisfy transfer length header." 

800 ) 

801 

802 def feed_data( 

803 self, chunk: bytes, SEP: _SEP = b"\r\n", CHUNK_EXT: bytes = b";" 

804 ) -> Tuple[bool, bytes]: 

805 # Read specified amount of bytes 

806 if self._type == ParseState.PARSE_LENGTH: 

807 required = self._length 

808 self._length = max(required - len(chunk), 0) 

809 self.payload.feed_data(chunk[:required]) 

810 if self._length == 0: 

811 self.payload.feed_eof() 

812 return True, chunk[required:] 

813 

814 # Chunked transfer encoding parser 

815 elif self._type == ParseState.PARSE_CHUNKED: 

816 if self._chunk_tail: 

817 chunk = self._chunk_tail + chunk 

818 self._chunk_tail = b"" 

819 

820 while chunk: 

821 # read next chunk size 

822 if self._chunk == ChunkState.PARSE_CHUNKED_SIZE: 

823 pos = chunk.find(SEP) 

824 if pos >= 0: 

825 i = chunk.find(CHUNK_EXT, 0, pos) 

826 if i >= 0: 

827 size_b = chunk[:i] # strip chunk-extensions 

828 # Verify no LF in the chunk-extension 

829 if b"\n" in (ext := chunk[i:pos]): 

830 exc = BadHttpMessage( 

831 f"Unexpected LF in chunk-extension: {ext!r}" 

832 ) 

833 set_exception(self.payload, exc) 

834 raise exc 

835 else: 

836 size_b = chunk[:pos] 

837 

838 if self._lax: # Allow whitespace in lax mode. 

839 size_b = size_b.strip() 

840 

841 if not re.fullmatch(HEXDIGITS, size_b): 

842 exc = TransferEncodingError( 

843 chunk[:pos].decode("ascii", "surrogateescape") 

844 ) 

845 set_exception(self.payload, exc) 

846 raise exc 

847 size = int(bytes(size_b), 16) 

848 

849 chunk = chunk[pos + len(SEP) :] 

850 if size == 0: # eof marker 

851 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS 

852 if self._lax and chunk.startswith(b"\r"): 

853 chunk = chunk[1:] 

854 else: 

855 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK 

856 self._chunk_size = size 

857 self.payload.begin_http_chunk_receiving() 

858 else: 

859 self._chunk_tail = chunk 

860 return False, b"" 

861 

862 # read chunk and feed buffer 

863 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK: 

864 required = self._chunk_size 

865 self._chunk_size = max(required - len(chunk), 0) 

866 self.payload.feed_data(chunk[:required]) 

867 

868 if self._chunk_size: 

869 return False, b"" 

870 chunk = chunk[required:] 

871 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF 

872 self.payload.end_http_chunk_receiving() 

873 

874 # toss the CRLF at the end of the chunk 

875 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF: 

876 if self._lax and chunk.startswith(b"\r"): 

877 chunk = chunk[1:] 

878 if chunk[: len(SEP)] == SEP: 

879 chunk = chunk[len(SEP) :] 

880 self._chunk = ChunkState.PARSE_CHUNKED_SIZE 

881 else: 

882 self._chunk_tail = chunk 

883 return False, b"" 

884 

885 # if stream does not contain trailer, after 0\r\n 

886 # we should get another \r\n otherwise 

887 # trailers needs to be skipped until \r\n\r\n 

888 if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS: 

889 head = chunk[: len(SEP)] 

890 if head == SEP: 

891 # end of stream 

892 self.payload.feed_eof() 

893 return True, chunk[len(SEP) :] 

894 # Both CR and LF, or only LF may not be received yet. It is 

895 # expected that CRLF or LF will be shown at the very first 

896 # byte next time, otherwise trailers should come. The last 

897 # CRLF which marks the end of response might not be 

898 # contained in the same TCP segment which delivered the 

899 # size indicator. 

900 if not head: 

901 return False, b"" 

902 if head == SEP[:1]: 

903 self._chunk_tail = head 

904 return False, b"" 

905 self._chunk = ChunkState.PARSE_TRAILERS 

906 

907 # read and discard trailer up to the CRLF terminator 

908 if self._chunk == ChunkState.PARSE_TRAILERS: 

909 pos = chunk.find(SEP) 

910 if pos >= 0: 

911 chunk = chunk[pos + len(SEP) :] 

912 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS 

913 else: 

914 self._chunk_tail = chunk 

915 return False, b"" 

916 

917 # Read all bytes until eof 

918 elif self._type == ParseState.PARSE_UNTIL_EOF: 

919 self.payload.feed_data(chunk) 

920 

921 return False, b"" 

922 

923 

924class DeflateBuffer: 

925 """DeflateStream decompress stream and feed data into specified stream.""" 

926 

927 def __init__(self, out: StreamReader, encoding: Optional[str]) -> None: 

928 self.out = out 

929 self.size = 0 

930 self.encoding = encoding 

931 self._started_decoding = False 

932 

933 self.decompressor: Union[BrotliDecompressor, ZLibDecompressor] 

934 if encoding == "br": 

935 if not HAS_BROTLI: 

936 raise ContentEncodingError( 

937 "Can not decode content-encoding: brotli (br). " 

938 "Please install `Brotli`" 

939 ) 

940 self.decompressor = BrotliDecompressor() 

941 else: 

942 self.decompressor = ZLibDecompressor(encoding=encoding) 

943 

944 def set_exception( 

945 self, 

946 exc: Union[Type[BaseException], BaseException], 

947 exc_cause: BaseException = _EXC_SENTINEL, 

948 ) -> None: 

949 set_exception(self.out, exc, exc_cause) 

950 

951 def feed_data(self, chunk: bytes) -> None: 

952 if not chunk: 

953 return 

954 

955 self.size += len(chunk) 

956 

957 # RFC1950 

958 # bits 0..3 = CM = 0b1000 = 8 = "deflate" 

959 # bits 4..7 = CINFO = 1..7 = windows size. 

960 if ( 

961 not self._started_decoding 

962 and self.encoding == "deflate" 

963 and chunk[0] & 0xF != 8 

964 ): 

965 # Change the decoder to decompress incorrectly compressed data 

966 # Actually we should issue a warning about non-RFC-compliant data. 

967 self.decompressor = ZLibDecompressor( 

968 encoding=self.encoding, suppress_deflate_header=True 

969 ) 

970 

971 try: 

972 chunk = self.decompressor.decompress_sync(chunk) 

973 except Exception: 

974 raise ContentEncodingError( 

975 "Can not decode content-encoding: %s" % self.encoding 

976 ) 

977 

978 self._started_decoding = True 

979 

980 if chunk: 

981 self.out.feed_data(chunk) 

982 

983 def feed_eof(self) -> None: 

984 chunk = self.decompressor.flush() 

985 

986 if chunk or self.size > 0: 

987 self.out.feed_data(chunk) 

988 # decompressor is not brotli unless encoding is "br" 

989 if self.encoding == "deflate" and not self.decompressor.eof: # type: ignore[union-attr] 

990 raise ContentEncodingError("deflate") 

991 

992 self.out.feed_eof() 

993 

994 def begin_http_chunk_receiving(self) -> None: 

995 self.out.begin_http_chunk_receiving() 

996 

997 def end_http_chunk_receiving(self) -> None: 

998 self.out.end_http_chunk_receiving() 

999 

1000 

1001HttpRequestParserPy = HttpRequestParser 

1002HttpResponseParserPy = HttpResponseParser 

1003RawRequestMessagePy = RawRequestMessage 

1004RawResponseMessagePy = RawResponseMessage 

1005 

1006with suppress(ImportError): 

1007 if not NO_EXTENSIONS: 

1008 from ._http_parser import ( # type: ignore[import-not-found,no-redef] 

1009 HttpRequestParser, 

1010 HttpResponseParser, 

1011 RawRequestMessage, 

1012 RawResponseMessage, 

1013 ) 

1014 

1015 HttpRequestParserC = HttpRequestParser 

1016 HttpResponseParserC = HttpResponseParser 

1017 RawRequestMessageC = RawRequestMessage 

1018 RawResponseMessageC = RawResponseMessage