Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/aiohttp/http_parser.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

516 statements  

1import abc 

2import asyncio 

3import re 

4import string 

5from contextlib import suppress 

6from enum import IntEnum 

7from typing import ( 

8 Any, 

9 ClassVar, 

10 Final, 

11 Generic, 

12 List, 

13 Literal, 

14 NamedTuple, 

15 Optional, 

16 Pattern, 

17 Set, 

18 Tuple, 

19 Type, 

20 TypeVar, 

21 Union, 

22) 

23 

24from multidict import CIMultiDict, CIMultiDictProxy, istr 

25from yarl import URL 

26 

27from . import hdrs 

28from .base_protocol import BaseProtocol 

29from .compression_utils import HAS_BROTLI, BrotliDecompressor, ZLibDecompressor 

30from .helpers import ( 

31 _EXC_SENTINEL, 

32 DEBUG, 

33 EMPTY_BODY_METHODS, 

34 EMPTY_BODY_STATUS_CODES, 

35 NO_EXTENSIONS, 

36 BaseTimerContext, 

37 set_exception, 

38) 

39from .http_exceptions import ( 

40 BadHttpMessage, 

41 BadHttpMethod, 

42 BadStatusLine, 

43 ContentEncodingError, 

44 ContentLengthError, 

45 InvalidHeader, 

46 InvalidURLError, 

47 LineTooLong, 

48 TransferEncodingError, 

49) 

50from .http_writer import HttpVersion, HttpVersion10 

51from .streams import EMPTY_PAYLOAD, StreamReader 

52from .typedefs import RawHeaders 

53 

54__all__ = ( 

55 "HeadersParser", 

56 "HttpParser", 

57 "HttpRequestParser", 

58 "HttpResponseParser", 

59 "RawRequestMessage", 

60 "RawResponseMessage", 

61) 

62 

63_SEP = Literal[b"\r\n", b"\n"] 

64 

65ASCIISET: Final[Set[str]] = set(string.printable) 

66 

67# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview 

68# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens 

69# 

70# method = token 

71# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / 

72# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA 

73# token = 1*tchar 

74_TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`|~") 

75TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+") 

76VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII) 

77DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII) 

78HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+") 

79 

80 

81class RawRequestMessage(NamedTuple): 

82 method: str 

83 path: str 

84 version: HttpVersion 

85 headers: "CIMultiDictProxy[str]" 

86 raw_headers: RawHeaders 

87 should_close: bool 

88 compression: Optional[str] 

89 upgrade: bool 

90 chunked: bool 

91 url: URL 

92 

93 

94class RawResponseMessage(NamedTuple): 

95 version: HttpVersion 

96 code: int 

97 reason: str 

98 headers: CIMultiDictProxy[str] 

99 raw_headers: RawHeaders 

100 should_close: bool 

101 compression: Optional[str] 

102 upgrade: bool 

103 chunked: bool 

104 

105 

106_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage) 

107 

108 

109class ParseState(IntEnum): 

110 

111 PARSE_NONE = 0 

112 PARSE_LENGTH = 1 

113 PARSE_CHUNKED = 2 

114 PARSE_UNTIL_EOF = 3 

115 

116 

117class ChunkState(IntEnum): 

118 PARSE_CHUNKED_SIZE = 0 

119 PARSE_CHUNKED_CHUNK = 1 

120 PARSE_CHUNKED_CHUNK_EOF = 2 

121 PARSE_MAYBE_TRAILERS = 3 

122 PARSE_TRAILERS = 4 

123 

124 

125class HeadersParser: 

126 def __init__( 

127 self, 

128 max_line_size: int = 8190, 

129 max_headers: int = 32768, 

130 max_field_size: int = 8190, 

131 lax: bool = False, 

132 ) -> None: 

133 self.max_line_size = max_line_size 

134 self.max_headers = max_headers 

135 self.max_field_size = max_field_size 

136 self._lax = lax 

137 

138 def parse_headers( 

139 self, lines: List[bytes] 

140 ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]: 

141 headers: CIMultiDict[str] = CIMultiDict() 

142 # note: "raw" does not mean inclusion of OWS before/after the field value 

143 raw_headers = [] 

144 

145 lines_idx = 1 

146 line = lines[1] 

147 line_count = len(lines) 

148 

149 while line: 

150 # Parse initial header name : value pair. 

151 try: 

152 bname, bvalue = line.split(b":", 1) 

153 except ValueError: 

154 raise InvalidHeader(line) from None 

155 

156 if len(bname) == 0: 

157 raise InvalidHeader(bname) 

158 

159 # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2 

160 if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"} 

161 raise InvalidHeader(line) 

162 

163 bvalue = bvalue.lstrip(b" \t") 

164 if len(bname) > self.max_field_size: 

165 raise LineTooLong( 

166 "request header name {}".format( 

167 bname.decode("utf8", "backslashreplace") 

168 ), 

169 str(self.max_field_size), 

170 str(len(bname)), 

171 ) 

172 name = bname.decode("utf-8", "surrogateescape") 

173 if not TOKENRE.fullmatch(name): 

174 raise InvalidHeader(bname) 

175 

176 header_length = len(bvalue) 

177 

178 # next line 

179 lines_idx += 1 

180 line = lines[lines_idx] 

181 

182 # consume continuation lines 

183 continuation = self._lax and line and line[0] in (32, 9) # (' ', '\t') 

184 

185 # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding 

186 if continuation: 

187 bvalue_lst = [bvalue] 

188 while continuation: 

189 header_length += len(line) 

190 if header_length > self.max_field_size: 

191 raise LineTooLong( 

192 "request header field {}".format( 

193 bname.decode("utf8", "backslashreplace") 

194 ), 

195 str(self.max_field_size), 

196 str(header_length), 

197 ) 

198 bvalue_lst.append(line) 

199 

200 # next line 

201 lines_idx += 1 

202 if lines_idx < line_count: 

203 line = lines[lines_idx] 

204 if line: 

205 continuation = line[0] in (32, 9) # (' ', '\t') 

206 else: 

207 line = b"" 

208 break 

209 bvalue = b"".join(bvalue_lst) 

210 else: 

211 if header_length > self.max_field_size: 

212 raise LineTooLong( 

213 "request header field {}".format( 

214 bname.decode("utf8", "backslashreplace") 

215 ), 

216 str(self.max_field_size), 

217 str(header_length), 

218 ) 

219 

220 bvalue = bvalue.strip(b" \t") 

221 value = bvalue.decode("utf-8", "surrogateescape") 

222 

223 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5 

224 if "\n" in value or "\r" in value or "\x00" in value: 

225 raise InvalidHeader(bvalue) 

226 

227 headers.add(name, value) 

228 raw_headers.append((bname, bvalue)) 

229 

230 return (CIMultiDictProxy(headers), tuple(raw_headers)) 

231 

232 

233def _is_supported_upgrade(headers: CIMultiDictProxy[str]) -> bool: 

234 """Check if the upgrade header is supported.""" 

235 return headers.get(hdrs.UPGRADE, "").lower() in {"tcp", "websocket"} 

236 

237 

238class HttpParser(abc.ABC, Generic[_MsgT]): 

239 lax: ClassVar[bool] = False 

240 

241 def __init__( 

242 self, 

243 protocol: Optional[BaseProtocol] = None, 

244 loop: Optional[asyncio.AbstractEventLoop] = None, 

245 limit: int = 2**16, 

246 max_line_size: int = 8190, 

247 max_headers: int = 32768, 

248 max_field_size: int = 8190, 

249 timer: Optional[BaseTimerContext] = None, 

250 code: Optional[int] = None, 

251 method: Optional[str] = None, 

252 payload_exception: Optional[Type[BaseException]] = None, 

253 response_with_body: bool = True, 

254 read_until_eof: bool = False, 

255 auto_decompress: bool = True, 

256 ) -> None: 

257 self.protocol = protocol 

258 self.loop = loop 

259 self.max_line_size = max_line_size 

260 self.max_headers = max_headers 

261 self.max_field_size = max_field_size 

262 self.timer = timer 

263 self.code = code 

264 self.method = method 

265 self.payload_exception = payload_exception 

266 self.response_with_body = response_with_body 

267 self.read_until_eof = read_until_eof 

268 

269 self._lines: List[bytes] = [] 

270 self._tail = b"" 

271 self._upgraded = False 

272 self._payload = None 

273 self._payload_parser: Optional[HttpPayloadParser] = None 

274 self._auto_decompress = auto_decompress 

275 self._limit = limit 

276 self._headers_parser = HeadersParser( 

277 max_line_size, max_headers, max_field_size, self.lax 

278 ) 

279 

280 @abc.abstractmethod 

281 def parse_message(self, lines: List[bytes]) -> _MsgT: ... 

282 

283 @abc.abstractmethod 

284 def _is_chunked_te(self, te: str) -> bool: ... 

285 

286 def feed_eof(self) -> Optional[_MsgT]: 

287 if self._payload_parser is not None: 

288 self._payload_parser.feed_eof() 

289 self._payload_parser = None 

290 else: 

291 # try to extract partial message 

292 if self._tail: 

293 self._lines.append(self._tail) 

294 

295 if self._lines: 

296 if self._lines[-1] != "\r\n": 

297 self._lines.append(b"") 

298 with suppress(Exception): 

299 return self.parse_message(self._lines) 

300 return None 

301 

302 def feed_data( 

303 self, 

304 data: bytes, 

305 SEP: _SEP = b"\r\n", 

306 EMPTY: bytes = b"", 

307 CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH, 

308 METH_CONNECT: str = hdrs.METH_CONNECT, 

309 SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1, 

310 ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]: 

311 

312 messages = [] 

313 

314 if self._tail: 

315 data, self._tail = self._tail + data, b"" 

316 

317 data_len = len(data) 

318 start_pos = 0 

319 loop = self.loop 

320 

321 should_close = False 

322 while start_pos < data_len: 

323 

324 # read HTTP message (request/response line + headers), \r\n\r\n 

325 # and split by lines 

326 if self._payload_parser is None and not self._upgraded: 

327 pos = data.find(SEP, start_pos) 

328 # consume \r\n 

329 if pos == start_pos and not self._lines: 

330 start_pos = pos + len(SEP) 

331 continue 

332 

333 if pos >= start_pos: 

334 if should_close: 

335 raise BadHttpMessage("Data after `Connection: close`") 

336 

337 # line found 

338 line = data[start_pos:pos] 

339 if SEP == b"\n": # For lax response parsing 

340 line = line.rstrip(b"\r") 

341 self._lines.append(line) 

342 start_pos = pos + len(SEP) 

343 

344 # \r\n\r\n found 

345 if self._lines[-1] == EMPTY: 

346 try: 

347 msg: _MsgT = self.parse_message(self._lines) 

348 finally: 

349 self._lines.clear() 

350 

351 def get_content_length() -> Optional[int]: 

352 # payload length 

353 length_hdr = msg.headers.get(CONTENT_LENGTH) 

354 if length_hdr is None: 

355 return None 

356 

357 # Shouldn't allow +/- or other number formats. 

358 # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2 

359 # msg.headers is already stripped of leading/trailing wsp 

360 if not DIGITS.fullmatch(length_hdr): 

361 raise InvalidHeader(CONTENT_LENGTH) 

362 

363 return int(length_hdr) 

364 

365 length = get_content_length() 

366 # do not support old websocket spec 

367 if SEC_WEBSOCKET_KEY1 in msg.headers: 

368 raise InvalidHeader(SEC_WEBSOCKET_KEY1) 

369 

370 self._upgraded = msg.upgrade and _is_supported_upgrade( 

371 msg.headers 

372 ) 

373 

374 method = getattr(msg, "method", self.method) 

375 # code is only present on responses 

376 code = getattr(msg, "code", 0) 

377 

378 assert self.protocol is not None 

379 # calculate payload 

380 empty_body = code in EMPTY_BODY_STATUS_CODES or bool( 

381 method and method in EMPTY_BODY_METHODS 

382 ) 

383 if not empty_body and ( 

384 ((length is not None and length > 0) or msg.chunked) 

385 and not self._upgraded 

386 ): 

387 payload = StreamReader( 

388 self.protocol, 

389 timer=self.timer, 

390 loop=loop, 

391 limit=self._limit, 

392 ) 

393 payload_parser = HttpPayloadParser( 

394 payload, 

395 length=length, 

396 chunked=msg.chunked, 

397 method=method, 

398 compression=msg.compression, 

399 code=self.code, 

400 response_with_body=self.response_with_body, 

401 auto_decompress=self._auto_decompress, 

402 lax=self.lax, 

403 ) 

404 if not payload_parser.done: 

405 self._payload_parser = payload_parser 

406 elif method == METH_CONNECT: 

407 assert isinstance(msg, RawRequestMessage) 

408 payload = StreamReader( 

409 self.protocol, 

410 timer=self.timer, 

411 loop=loop, 

412 limit=self._limit, 

413 ) 

414 self._upgraded = True 

415 self._payload_parser = HttpPayloadParser( 

416 payload, 

417 method=msg.method, 

418 compression=msg.compression, 

419 auto_decompress=self._auto_decompress, 

420 lax=self.lax, 

421 ) 

422 elif not empty_body and length is None and self.read_until_eof: 

423 payload = StreamReader( 

424 self.protocol, 

425 timer=self.timer, 

426 loop=loop, 

427 limit=self._limit, 

428 ) 

429 payload_parser = HttpPayloadParser( 

430 payload, 

431 length=length, 

432 chunked=msg.chunked, 

433 method=method, 

434 compression=msg.compression, 

435 code=self.code, 

436 response_with_body=self.response_with_body, 

437 auto_decompress=self._auto_decompress, 

438 lax=self.lax, 

439 ) 

440 if not payload_parser.done: 

441 self._payload_parser = payload_parser 

442 else: 

443 payload = EMPTY_PAYLOAD 

444 

445 messages.append((msg, payload)) 

446 should_close = msg.should_close 

447 else: 

448 self._tail = data[start_pos:] 

449 data = EMPTY 

450 break 

451 

452 # no parser, just store 

453 elif self._payload_parser is None and self._upgraded: 

454 assert not self._lines 

455 break 

456 

457 # feed payload 

458 elif data and start_pos < data_len: 

459 assert not self._lines 

460 assert self._payload_parser is not None 

461 try: 

462 eof, data = self._payload_parser.feed_data(data[start_pos:], SEP) 

463 except BaseException as underlying_exc: 

464 reraised_exc = underlying_exc 

465 if self.payload_exception is not None: 

466 reraised_exc = self.payload_exception(str(underlying_exc)) 

467 

468 set_exception( 

469 self._payload_parser.payload, 

470 reraised_exc, 

471 underlying_exc, 

472 ) 

473 

474 eof = True 

475 data = b"" 

476 

477 if eof: 

478 start_pos = 0 

479 data_len = len(data) 

480 self._payload_parser = None 

481 continue 

482 else: 

483 break 

484 

485 if data and start_pos < data_len: 

486 data = data[start_pos:] 

487 else: 

488 data = EMPTY 

489 

490 return messages, self._upgraded, data 

491 

492 def parse_headers( 

493 self, lines: List[bytes] 

494 ) -> Tuple[ 

495 "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool 

496 ]: 

497 """Parses RFC 5322 headers from a stream. 

498 

499 Line continuations are supported. Returns list of header name 

500 and value pairs. Header name is in upper case. 

501 """ 

502 headers, raw_headers = self._headers_parser.parse_headers(lines) 

503 close_conn = None 

504 encoding = None 

505 upgrade = False 

506 chunked = False 

507 

508 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6 

509 # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf 

510 singletons = ( 

511 hdrs.CONTENT_LENGTH, 

512 hdrs.CONTENT_LOCATION, 

513 hdrs.CONTENT_RANGE, 

514 hdrs.CONTENT_TYPE, 

515 hdrs.ETAG, 

516 hdrs.HOST, 

517 hdrs.MAX_FORWARDS, 

518 hdrs.SERVER, 

519 hdrs.TRANSFER_ENCODING, 

520 hdrs.USER_AGENT, 

521 ) 

522 bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None) 

523 if bad_hdr is not None: 

524 raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.") 

525 

526 # keep-alive 

527 conn = headers.get(hdrs.CONNECTION) 

528 if conn: 

529 v = conn.lower() 

530 if v == "close": 

531 close_conn = True 

532 elif v == "keep-alive": 

533 close_conn = False 

534 # https://www.rfc-editor.org/rfc/rfc9110.html#name-101-switching-protocols 

535 elif v == "upgrade" and headers.get(hdrs.UPGRADE): 

536 upgrade = True 

537 

538 # encoding 

539 enc = headers.get(hdrs.CONTENT_ENCODING) 

540 if enc: 

541 enc = enc.lower() 

542 if enc in ("gzip", "deflate", "br"): 

543 encoding = enc 

544 

545 # chunking 

546 te = headers.get(hdrs.TRANSFER_ENCODING) 

547 if te is not None: 

548 if self._is_chunked_te(te): 

549 chunked = True 

550 

551 if hdrs.CONTENT_LENGTH in headers: 

552 raise BadHttpMessage( 

553 "Transfer-Encoding can't be present with Content-Length", 

554 ) 

555 

556 return (headers, raw_headers, close_conn, encoding, upgrade, chunked) 

557 

558 def set_upgraded(self, val: bool) -> None: 

559 """Set connection upgraded (to websocket) mode. 

560 

561 :param bool val: new state. 

562 """ 

563 self._upgraded = val 

564 

565 

566class HttpRequestParser(HttpParser[RawRequestMessage]): 

567 """Read request status line. 

568 

569 Exception .http_exceptions.BadStatusLine 

570 could be raised in case of any errors in status line. 

571 Returns RawRequestMessage. 

572 """ 

573 

574 def parse_message(self, lines: List[bytes]) -> RawRequestMessage: 

575 # request line 

576 line = lines[0].decode("utf-8", "surrogateescape") 

577 try: 

578 method, path, version = line.split(" ", maxsplit=2) 

579 except ValueError: 

580 raise BadHttpMethod(line) from None 

581 

582 if len(path) > self.max_line_size: 

583 raise LineTooLong( 

584 "Status line is too long", str(self.max_line_size), str(len(path)) 

585 ) 

586 

587 # method 

588 if not TOKENRE.fullmatch(method): 

589 raise BadHttpMethod(method) 

590 

591 # version 

592 match = VERSRE.fullmatch(version) 

593 if match is None: 

594 raise BadStatusLine(line) 

595 version_o = HttpVersion(int(match.group(1)), int(match.group(2))) 

596 

597 if method == "CONNECT": 

598 # authority-form, 

599 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3 

600 url = URL.build(authority=path, encoded=True) 

601 elif path.startswith("/"): 

602 # origin-form, 

603 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1 

604 path_part, _hash_separator, url_fragment = path.partition("#") 

605 path_part, _question_mark_separator, qs_part = path_part.partition("?") 

606 

607 # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based 

608 # NOTE: parser does, otherwise it results into the same 

609 # NOTE: HTTP Request-Line input producing different 

610 # NOTE: `yarl.URL()` objects 

611 url = URL.build( 

612 path=path_part, 

613 query_string=qs_part, 

614 fragment=url_fragment, 

615 encoded=True, 

616 ) 

617 elif path == "*" and method == "OPTIONS": 

618 # asterisk-form, 

619 url = URL(path, encoded=True) 

620 else: 

621 # absolute-form for proxy maybe, 

622 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2 

623 url = URL(path, encoded=True) 

624 if url.scheme == "": 

625 # not absolute-form 

626 raise InvalidURLError( 

627 path.encode(errors="surrogateescape").decode("latin1") 

628 ) 

629 

630 # read headers 

631 ( 

632 headers, 

633 raw_headers, 

634 close, 

635 compression, 

636 upgrade, 

637 chunked, 

638 ) = self.parse_headers(lines) 

639 

640 if close is None: # then the headers weren't set in the request 

641 if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close 

642 close = True 

643 else: # HTTP 1.1 must ask to close. 

644 close = False 

645 

646 return RawRequestMessage( 

647 method, 

648 path, 

649 version_o, 

650 headers, 

651 raw_headers, 

652 close, 

653 compression, 

654 upgrade, 

655 chunked, 

656 url, 

657 ) 

658 

659 def _is_chunked_te(self, te: str) -> bool: 

660 if te.rsplit(",", maxsplit=1)[-1].strip(" \t").lower() == "chunked": 

661 return True 

662 # https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.3 

663 raise BadHttpMessage("Request has invalid `Transfer-Encoding`") 

664 

665 

666class HttpResponseParser(HttpParser[RawResponseMessage]): 

667 """Read response status line and headers. 

668 

669 BadStatusLine could be raised in case of any errors in status line. 

670 Returns RawResponseMessage. 

671 """ 

672 

673 # Lax mode should only be enabled on response parser. 

674 lax = not DEBUG 

675 

676 def feed_data( 

677 self, 

678 data: bytes, 

679 SEP: Optional[_SEP] = None, 

680 *args: Any, 

681 **kwargs: Any, 

682 ) -> Tuple[List[Tuple[RawResponseMessage, StreamReader]], bool, bytes]: 

683 if SEP is None: 

684 SEP = b"\r\n" if DEBUG else b"\n" 

685 return super().feed_data(data, SEP, *args, **kwargs) 

686 

687 def parse_message(self, lines: List[bytes]) -> RawResponseMessage: 

688 line = lines[0].decode("utf-8", "surrogateescape") 

689 try: 

690 version, status = line.split(maxsplit=1) 

691 except ValueError: 

692 raise BadStatusLine(line) from None 

693 

694 try: 

695 status, reason = status.split(maxsplit=1) 

696 except ValueError: 

697 status = status.strip() 

698 reason = "" 

699 

700 if len(reason) > self.max_line_size: 

701 raise LineTooLong( 

702 "Status line is too long", str(self.max_line_size), str(len(reason)) 

703 ) 

704 

705 # version 

706 match = VERSRE.fullmatch(version) 

707 if match is None: 

708 raise BadStatusLine(line) 

709 version_o = HttpVersion(int(match.group(1)), int(match.group(2))) 

710 

711 # The status code is a three-digit ASCII number, no padding 

712 if len(status) != 3 or not DIGITS.fullmatch(status): 

713 raise BadStatusLine(line) 

714 status_i = int(status) 

715 

716 # read headers 

717 ( 

718 headers, 

719 raw_headers, 

720 close, 

721 compression, 

722 upgrade, 

723 chunked, 

724 ) = self.parse_headers(lines) 

725 

726 if close is None: 

727 if version_o <= HttpVersion10: 

728 close = True 

729 # https://www.rfc-editor.org/rfc/rfc9112.html#name-message-body-length 

730 elif 100 <= status_i < 200 or status_i in {204, 304}: 

731 close = False 

732 elif hdrs.CONTENT_LENGTH in headers or hdrs.TRANSFER_ENCODING in headers: 

733 close = False 

734 else: 

735 # https://www.rfc-editor.org/rfc/rfc9112.html#section-6.3-2.8 

736 close = True 

737 

738 return RawResponseMessage( 

739 version_o, 

740 status_i, 

741 reason.strip(), 

742 headers, 

743 raw_headers, 

744 close, 

745 compression, 

746 upgrade, 

747 chunked, 

748 ) 

749 

750 def _is_chunked_te(self, te: str) -> bool: 

751 # https://www.rfc-editor.org/rfc/rfc9112#section-6.3-2.4.2 

752 return te.rsplit(",", maxsplit=1)[-1].strip(" \t").lower() == "chunked" 

753 

754 

755class HttpPayloadParser: 

756 def __init__( 

757 self, 

758 payload: StreamReader, 

759 length: Optional[int] = None, 

760 chunked: bool = False, 

761 compression: Optional[str] = None, 

762 code: Optional[int] = None, 

763 method: Optional[str] = None, 

764 response_with_body: bool = True, 

765 auto_decompress: bool = True, 

766 lax: bool = False, 

767 ) -> None: 

768 self._length = 0 

769 self._type = ParseState.PARSE_UNTIL_EOF 

770 self._chunk = ChunkState.PARSE_CHUNKED_SIZE 

771 self._chunk_size = 0 

772 self._chunk_tail = b"" 

773 self._auto_decompress = auto_decompress 

774 self._lax = lax 

775 self.done = False 

776 

777 # payload decompression wrapper 

778 if response_with_body and compression and self._auto_decompress: 

779 real_payload: Union[StreamReader, DeflateBuffer] = DeflateBuffer( 

780 payload, compression 

781 ) 

782 else: 

783 real_payload = payload 

784 

785 # payload parser 

786 if not response_with_body: 

787 # don't parse payload if it's not expected to be received 

788 self._type = ParseState.PARSE_NONE 

789 real_payload.feed_eof() 

790 self.done = True 

791 elif chunked: 

792 self._type = ParseState.PARSE_CHUNKED 

793 elif length is not None: 

794 self._type = ParseState.PARSE_LENGTH 

795 self._length = length 

796 if self._length == 0: 

797 real_payload.feed_eof() 

798 self.done = True 

799 

800 self.payload = real_payload 

801 

802 def feed_eof(self) -> None: 

803 if self._type == ParseState.PARSE_UNTIL_EOF: 

804 self.payload.feed_eof() 

805 elif self._type == ParseState.PARSE_LENGTH: 

806 raise ContentLengthError( 

807 "Not enough data to satisfy content length header." 

808 ) 

809 elif self._type == ParseState.PARSE_CHUNKED: 

810 raise TransferEncodingError( 

811 "Not enough data to satisfy transfer length header." 

812 ) 

813 

814 def feed_data( 

815 self, chunk: bytes, SEP: _SEP = b"\r\n", CHUNK_EXT: bytes = b";" 

816 ) -> Tuple[bool, bytes]: 

817 # Read specified amount of bytes 

818 if self._type == ParseState.PARSE_LENGTH: 

819 required = self._length 

820 chunk_len = len(chunk) 

821 

822 if required >= chunk_len: 

823 self._length = required - chunk_len 

824 self.payload.feed_data(chunk, chunk_len) 

825 if self._length == 0: 

826 self.payload.feed_eof() 

827 return True, b"" 

828 else: 

829 self._length = 0 

830 self.payload.feed_data(chunk[:required], required) 

831 self.payload.feed_eof() 

832 return True, chunk[required:] 

833 

834 # Chunked transfer encoding parser 

835 elif self._type == ParseState.PARSE_CHUNKED: 

836 if self._chunk_tail: 

837 chunk = self._chunk_tail + chunk 

838 self._chunk_tail = b"" 

839 

840 while chunk: 

841 

842 # read next chunk size 

843 if self._chunk == ChunkState.PARSE_CHUNKED_SIZE: 

844 pos = chunk.find(SEP) 

845 if pos >= 0: 

846 i = chunk.find(CHUNK_EXT, 0, pos) 

847 if i >= 0: 

848 size_b = chunk[:i] # strip chunk-extensions 

849 # Verify no LF in the chunk-extension 

850 if b"\n" in (ext := chunk[i:pos]): 

851 exc = BadHttpMessage( 

852 f"Unexpected LF in chunk-extension: {ext!r}" 

853 ) 

854 set_exception(self.payload, exc) 

855 raise exc 

856 else: 

857 size_b = chunk[:pos] 

858 

859 if self._lax: # Allow whitespace in lax mode. 

860 size_b = size_b.strip() 

861 

862 if not re.fullmatch(HEXDIGITS, size_b): 

863 exc = TransferEncodingError( 

864 chunk[:pos].decode("ascii", "surrogateescape") 

865 ) 

866 set_exception(self.payload, exc) 

867 raise exc 

868 size = int(bytes(size_b), 16) 

869 

870 chunk = chunk[pos + len(SEP) :] 

871 if size == 0: # eof marker 

872 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS 

873 if self._lax and chunk.startswith(b"\r"): 

874 chunk = chunk[1:] 

875 else: 

876 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK 

877 self._chunk_size = size 

878 self.payload.begin_http_chunk_receiving() 

879 else: 

880 self._chunk_tail = chunk 

881 return False, b"" 

882 

883 # read chunk and feed buffer 

884 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK: 

885 required = self._chunk_size 

886 chunk_len = len(chunk) 

887 

888 if required > chunk_len: 

889 self._chunk_size = required - chunk_len 

890 self.payload.feed_data(chunk, chunk_len) 

891 return False, b"" 

892 else: 

893 self._chunk_size = 0 

894 self.payload.feed_data(chunk[:required], required) 

895 chunk = chunk[required:] 

896 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF 

897 self.payload.end_http_chunk_receiving() 

898 

899 # toss the CRLF at the end of the chunk 

900 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF: 

901 if self._lax and chunk.startswith(b"\r"): 

902 chunk = chunk[1:] 

903 if chunk[: len(SEP)] == SEP: 

904 chunk = chunk[len(SEP) :] 

905 self._chunk = ChunkState.PARSE_CHUNKED_SIZE 

906 else: 

907 self._chunk_tail = chunk 

908 return False, b"" 

909 

910 # if stream does not contain trailer, after 0\r\n 

911 # we should get another \r\n otherwise 

912 # trailers needs to be skipped until \r\n\r\n 

913 if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS: 

914 head = chunk[: len(SEP)] 

915 if head == SEP: 

916 # end of stream 

917 self.payload.feed_eof() 

918 return True, chunk[len(SEP) :] 

919 # Both CR and LF, or only LF may not be received yet. It is 

920 # expected that CRLF or LF will be shown at the very first 

921 # byte next time, otherwise trailers should come. The last 

922 # CRLF which marks the end of response might not be 

923 # contained in the same TCP segment which delivered the 

924 # size indicator. 

925 if not head: 

926 return False, b"" 

927 if head == SEP[:1]: 

928 self._chunk_tail = head 

929 return False, b"" 

930 self._chunk = ChunkState.PARSE_TRAILERS 

931 

932 # read and discard trailer up to the CRLF terminator 

933 if self._chunk == ChunkState.PARSE_TRAILERS: 

934 pos = chunk.find(SEP) 

935 if pos >= 0: 

936 chunk = chunk[pos + len(SEP) :] 

937 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS 

938 else: 

939 self._chunk_tail = chunk 

940 return False, b"" 

941 

942 # Read all bytes until eof 

943 elif self._type == ParseState.PARSE_UNTIL_EOF: 

944 self.payload.feed_data(chunk, len(chunk)) 

945 

946 return False, b"" 

947 

948 

949class DeflateBuffer: 

950 """DeflateStream decompress stream and feed data into specified stream.""" 

951 

952 decompressor: Any 

953 

954 def __init__(self, out: StreamReader, encoding: Optional[str]) -> None: 

955 self.out = out 

956 self.size = 0 

957 self.encoding = encoding 

958 self._started_decoding = False 

959 

960 self.decompressor: Union[BrotliDecompressor, ZLibDecompressor] 

961 if encoding == "br": 

962 if not HAS_BROTLI: # pragma: no cover 

963 raise ContentEncodingError( 

964 "Can not decode content-encoding: brotli (br). " 

965 "Please install `Brotli`" 

966 ) 

967 self.decompressor = BrotliDecompressor() 

968 else: 

969 self.decompressor = ZLibDecompressor(encoding=encoding) 

970 

971 def set_exception( 

972 self, 

973 exc: BaseException, 

974 exc_cause: BaseException = _EXC_SENTINEL, 

975 ) -> None: 

976 set_exception(self.out, exc, exc_cause) 

977 

978 def feed_data(self, chunk: bytes, size: int) -> None: 

979 if not size: 

980 return 

981 

982 self.size += size 

983 

984 # RFC1950 

985 # bits 0..3 = CM = 0b1000 = 8 = "deflate" 

986 # bits 4..7 = CINFO = 1..7 = windows size. 

987 if ( 

988 not self._started_decoding 

989 and self.encoding == "deflate" 

990 and chunk[0] & 0xF != 8 

991 ): 

992 # Change the decoder to decompress incorrectly compressed data 

993 # Actually we should issue a warning about non-RFC-compliant data. 

994 self.decompressor = ZLibDecompressor( 

995 encoding=self.encoding, suppress_deflate_header=True 

996 ) 

997 

998 try: 

999 chunk = self.decompressor.decompress_sync(chunk) 

1000 except Exception: 

1001 raise ContentEncodingError( 

1002 "Can not decode content-encoding: %s" % self.encoding 

1003 ) 

1004 

1005 self._started_decoding = True 

1006 

1007 if chunk: 

1008 self.out.feed_data(chunk, len(chunk)) 

1009 

1010 def feed_eof(self) -> None: 

1011 chunk = self.decompressor.flush() 

1012 

1013 if chunk or self.size > 0: 

1014 self.out.feed_data(chunk, len(chunk)) 

1015 if self.encoding == "deflate" and not self.decompressor.eof: 

1016 raise ContentEncodingError("deflate") 

1017 

1018 self.out.feed_eof() 

1019 

1020 def begin_http_chunk_receiving(self) -> None: 

1021 self.out.begin_http_chunk_receiving() 

1022 

1023 def end_http_chunk_receiving(self) -> None: 

1024 self.out.end_http_chunk_receiving() 

1025 

1026 

1027HttpRequestParserPy = HttpRequestParser 

1028HttpResponseParserPy = HttpResponseParser 

1029RawRequestMessagePy = RawRequestMessage 

1030RawResponseMessagePy = RawResponseMessage 

1031 

1032try: 

1033 if not NO_EXTENSIONS: 

1034 from ._http_parser import ( # type: ignore[import-not-found,no-redef] 

1035 HttpRequestParser, 

1036 HttpResponseParser, 

1037 RawRequestMessage, 

1038 RawResponseMessage, 

1039 ) 

1040 

1041 HttpRequestParserC = HttpRequestParser 

1042 HttpResponseParserC = HttpResponseParser 

1043 RawRequestMessageC = RawRequestMessage 

1044 RawResponseMessageC = RawResponseMessage 

1045except ImportError: # pragma: no cover 

1046 pass