Coverage for /pythoncovmergedfiles/medio/medio/src/aiohttp/aiohttp/http_parser.py: 83%

494 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-26 06:16 +0000

1import abc 

2import asyncio 

3import re 

4import string 

5from contextlib import suppress 

6from enum import IntEnum 

7from typing import ( 

8 Any, 

9 ClassVar, 

10 Final, 

11 Generic, 

12 List, 

13 Literal, 

14 NamedTuple, 

15 Optional, 

16 Pattern, 

17 Set, 

18 Tuple, 

19 Type, 

20 TypeVar, 

21 Union, 

22) 

23 

24from multidict import CIMultiDict, CIMultiDictProxy, istr 

25from yarl import URL 

26 

27from . import hdrs 

28from .base_protocol import BaseProtocol 

29from .compression_utils import HAS_BROTLI, BrotliDecompressor, ZLibDecompressor 

30from .helpers import ( 

31 DEBUG, 

32 NO_EXTENSIONS, 

33 BaseTimerContext, 

34 method_must_be_empty_body, 

35 status_code_must_be_empty_body, 

36) 

37from .http_exceptions import ( 

38 BadHttpMessage, 

39 BadStatusLine, 

40 ContentEncodingError, 

41 ContentLengthError, 

42 InvalidHeader, 

43 InvalidURLError, 

44 LineTooLong, 

45 TransferEncodingError, 

46) 

47from .http_writer import HttpVersion, HttpVersion10 

48from .log import internal_logger 

49from .streams import EMPTY_PAYLOAD, StreamReader 

50from .typedefs import RawHeaders 

51 

52__all__ = ( 

53 "HeadersParser", 

54 "HttpParser", 

55 "HttpRequestParser", 

56 "HttpResponseParser", 

57 "RawRequestMessage", 

58 "RawResponseMessage", 

59) 

60 

61_SEP = Literal[b"\r\n", b"\n"] 

62 

63ASCIISET: Final[Set[str]] = set(string.printable) 

64 

65# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview 

66# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens 

67# 

68# method = token 

69# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / 

70# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA 

71# token = 1*tchar 

72METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+") 

73VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)") 

74HDRRE: Final[Pattern[bytes]] = re.compile( 

75 rb"[\x00-\x1F\x7F-\xFF()<>@,;:\[\]={} \t\"\\]" 

76) 

77HEXDIGIT = re.compile(rb"[0-9a-fA-F]+") 

78 

79 

80class RawRequestMessage(NamedTuple): 

81 method: str 

82 path: str 

83 version: HttpVersion 

84 headers: CIMultiDictProxy[str] 

85 raw_headers: RawHeaders 

86 should_close: bool 

87 compression: Optional[str] 

88 upgrade: bool 

89 chunked: bool 

90 url: URL 

91 

92 

93class RawResponseMessage(NamedTuple): 

94 version: HttpVersion 

95 code: int 

96 reason: str 

97 headers: CIMultiDictProxy[str] 

98 raw_headers: RawHeaders 

99 should_close: bool 

100 compression: Optional[str] 

101 upgrade: bool 

102 chunked: bool 

103 

104 

105_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage) 

106 

107 

108class ParseState(IntEnum): 

109 PARSE_NONE = 0 

110 PARSE_LENGTH = 1 

111 PARSE_CHUNKED = 2 

112 PARSE_UNTIL_EOF = 3 

113 

114 

115class ChunkState(IntEnum): 

116 PARSE_CHUNKED_SIZE = 0 

117 PARSE_CHUNKED_CHUNK = 1 

118 PARSE_CHUNKED_CHUNK_EOF = 2 

119 PARSE_MAYBE_TRAILERS = 3 

120 PARSE_TRAILERS = 4 

121 

122 

123class HeadersParser: 

124 def __init__( 

125 self, 

126 max_line_size: int = 8190, 

127 max_field_size: int = 8190, 

128 ) -> None: 

129 self.max_line_size = max_line_size 

130 self.max_field_size = max_field_size 

131 

132 def parse_headers( 

133 self, lines: List[bytes] 

134 ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]: 

135 headers: CIMultiDict[str] = CIMultiDict() 

136 raw_headers = [] 

137 

138 lines_idx = 1 

139 line = lines[1] 

140 line_count = len(lines) 

141 

142 while line: 

143 # Parse initial header name : value pair. 

144 try: 

145 bname, bvalue = line.split(b":", 1) 

146 except ValueError: 

147 raise InvalidHeader(line) from None 

148 

149 # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2 

150 if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"} 

151 raise InvalidHeader(line) 

152 

153 bvalue = bvalue.lstrip(b" \t") 

154 if HDRRE.search(bname): 

155 raise InvalidHeader(bname) 

156 if len(bname) > self.max_field_size: 

157 raise LineTooLong( 

158 "request header name {}".format( 

159 bname.decode("utf8", "backslashreplace") 

160 ), 

161 str(self.max_field_size), 

162 str(len(bname)), 

163 ) 

164 

165 header_length = len(bvalue) 

166 

167 # next line 

168 lines_idx += 1 

169 line = lines[lines_idx] 

170 

171 # consume continuation lines 

172 continuation = line and line[0] in (32, 9) # (' ', '\t') 

173 

174 # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding 

175 if continuation: 

176 bvalue_lst = [bvalue] 

177 while continuation: 

178 header_length += len(line) 

179 if header_length > self.max_field_size: 

180 raise LineTooLong( 

181 "request header field {}".format( 

182 bname.decode("utf8", "backslashreplace") 

183 ), 

184 str(self.max_field_size), 

185 str(header_length), 

186 ) 

187 bvalue_lst.append(line) 

188 

189 # next line 

190 lines_idx += 1 

191 if lines_idx < line_count: 

192 line = lines[lines_idx] 

193 if line: 

194 continuation = line[0] in (32, 9) # (' ', '\t') 

195 else: 

196 line = b"" 

197 break 

198 bvalue = b"".join(bvalue_lst) 

199 else: 

200 if header_length > self.max_field_size: 

201 raise LineTooLong( 

202 "request header field {}".format( 

203 bname.decode("utf8", "backslashreplace") 

204 ), 

205 str(self.max_field_size), 

206 str(header_length), 

207 ) 

208 

209 bvalue = bvalue.strip(b" \t") 

210 name = bname.decode("utf-8", "surrogateescape") 

211 value = bvalue.decode("utf-8", "surrogateescape") 

212 

213 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5 

214 if "\n" in value or "\r" in value or "\x00" in value: 

215 raise InvalidHeader(bvalue) 

216 

217 headers.add(name, value) 

218 raw_headers.append((bname, bvalue)) 

219 

220 return (CIMultiDictProxy(headers), tuple(raw_headers)) 

221 

222 

223class HttpParser(abc.ABC, Generic[_MsgT]): 

224 lax: ClassVar[bool] = False 

225 

226 def __init__( 

227 self, 

228 protocol: BaseProtocol, 

229 loop: asyncio.AbstractEventLoop, 

230 limit: int, 

231 max_line_size: int = 8190, 

232 max_field_size: int = 8190, 

233 timer: Optional[BaseTimerContext] = None, 

234 code: Optional[int] = None, 

235 method: Optional[str] = None, 

236 readall: bool = False, 

237 payload_exception: Optional[Type[BaseException]] = None, 

238 response_with_body: bool = True, 

239 read_until_eof: bool = False, 

240 auto_decompress: bool = True, 

241 ) -> None: 

242 self.protocol = protocol 

243 self.loop = loop 

244 self.max_line_size = max_line_size 

245 self.max_field_size = max_field_size 

246 self.timer = timer 

247 self.code = code 

248 self.method = method 

249 self.readall = readall 

250 self.payload_exception = payload_exception 

251 self.response_with_body = response_with_body 

252 self.read_until_eof = read_until_eof 

253 

254 self._lines: List[bytes] = [] 

255 self._tail = b"" 

256 self._upgraded = False 

257 self._payload = None 

258 self._payload_parser: Optional[HttpPayloadParser] = None 

259 self._auto_decompress = auto_decompress 

260 self._limit = limit 

261 self._headers_parser = HeadersParser(max_line_size, max_field_size) 

262 

263 @abc.abstractmethod 

264 def parse_message(self, lines: List[bytes]) -> _MsgT: 

265 pass 

266 

267 def feed_eof(self) -> Optional[_MsgT]: 

268 if self._payload_parser is not None: 

269 self._payload_parser.feed_eof() 

270 self._payload_parser = None 

271 else: 

272 # try to extract partial message 

273 if self._tail: 

274 self._lines.append(self._tail) 

275 

276 if self._lines: 

277 if self._lines[-1] != "\r\n": 

278 self._lines.append(b"") 

279 with suppress(Exception): 

280 return self.parse_message(self._lines) 

281 return None 

282 

283 def feed_data( 

284 self, 

285 data: bytes, 

286 SEP: _SEP = b"\r\n", 

287 EMPTY: bytes = b"", 

288 CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH, 

289 METH_CONNECT: str = hdrs.METH_CONNECT, 

290 SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1, 

291 ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]: 

292 messages = [] 

293 

294 if self._tail: 

295 data, self._tail = self._tail + data, b"" 

296 

297 data_len = len(data) 

298 start_pos = 0 

299 loop = self.loop 

300 

301 while start_pos < data_len: 

302 # read HTTP message (request/response line + headers), \r\n\r\n 

303 # and split by lines 

304 if self._payload_parser is None and not self._upgraded: 

305 pos = data.find(SEP, start_pos) 

306 # consume \r\n 

307 if pos == start_pos and not self._lines: 

308 start_pos = pos + len(SEP) 

309 continue 

310 

311 if pos >= start_pos: 

312 # line found 

313 line = data[start_pos:pos] 

314 if SEP == b"\n": # For lax response parsing 

315 line = line.rstrip(b"\r") 

316 self._lines.append(line) 

317 start_pos = pos + len(SEP) 

318 

319 # \r\n\r\n found 

320 if self._lines[-1] == EMPTY: 

321 try: 

322 msg: _MsgT = self.parse_message(self._lines) 

323 finally: 

324 self._lines.clear() 

325 

326 def get_content_length() -> Optional[int]: 

327 # payload length 

328 length_hdr = msg.headers.get(CONTENT_LENGTH) 

329 if length_hdr is None: 

330 return None 

331 

332 # Shouldn't allow +/- or other number formats. 

333 # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2 

334 if not length_hdr.strip(" \t").isdecimal(): 

335 raise InvalidHeader(CONTENT_LENGTH) 

336 

337 return int(length_hdr) 

338 

339 length = get_content_length() 

340 # do not support old websocket spec 

341 if SEC_WEBSOCKET_KEY1 in msg.headers: 

342 raise InvalidHeader(SEC_WEBSOCKET_KEY1) 

343 

344 self._upgraded = msg.upgrade 

345 

346 method = getattr(msg, "method", self.method) 

347 # code is only present on responses 

348 code = getattr(msg, "code", 0) 

349 

350 assert self.protocol is not None 

351 # calculate payload 

352 empty_body = status_code_must_be_empty_body(code) or bool( 

353 method and method_must_be_empty_body(method) 

354 ) 

355 if not empty_body and ( 

356 (length is not None and length > 0) 

357 or msg.chunked 

358 and not msg.upgrade 

359 ): 

360 payload = StreamReader( 

361 self.protocol, 

362 timer=self.timer, 

363 loop=loop, 

364 limit=self._limit, 

365 ) 

366 payload_parser = HttpPayloadParser( 

367 payload, 

368 length=length, 

369 chunked=msg.chunked, 

370 method=method, 

371 compression=msg.compression, 

372 code=self.code, 

373 readall=self.readall, 

374 response_with_body=self.response_with_body, 

375 auto_decompress=self._auto_decompress, 

376 lax=self.lax, 

377 ) 

378 if not payload_parser.done: 

379 self._payload_parser = payload_parser 

380 elif method == METH_CONNECT: 

381 assert isinstance(msg, RawRequestMessage) 

382 payload = StreamReader( 

383 self.protocol, 

384 timer=self.timer, 

385 loop=loop, 

386 limit=self._limit, 

387 ) 

388 self._upgraded = True 

389 self._payload_parser = HttpPayloadParser( 

390 payload, 

391 method=msg.method, 

392 compression=msg.compression, 

393 readall=True, 

394 auto_decompress=self._auto_decompress, 

395 lax=self.lax, 

396 ) 

397 elif not empty_body and length is None and self.read_until_eof: 

398 payload = StreamReader( 

399 self.protocol, 

400 timer=self.timer, 

401 loop=loop, 

402 limit=self._limit, 

403 ) 

404 payload_parser = HttpPayloadParser( 

405 payload, 

406 length=length, 

407 chunked=msg.chunked, 

408 method=method, 

409 compression=msg.compression, 

410 code=self.code, 

411 readall=True, 

412 response_with_body=self.response_with_body, 

413 auto_decompress=self._auto_decompress, 

414 lax=self.lax, 

415 ) 

416 if not payload_parser.done: 

417 self._payload_parser = payload_parser 

418 else: 

419 payload = EMPTY_PAYLOAD 

420 

421 messages.append((msg, payload)) 

422 else: 

423 self._tail = data[start_pos:] 

424 data = EMPTY 

425 break 

426 

427 # no parser, just store 

428 elif self._payload_parser is None and self._upgraded: 

429 assert not self._lines 

430 break 

431 

432 # feed payload 

433 elif data and start_pos < data_len: 

434 assert not self._lines 

435 assert self._payload_parser is not None 

436 try: 

437 eof, data = self._payload_parser.feed_data(data[start_pos:], SEP) 

438 except BaseException as exc: 

439 if self.payload_exception is not None: 

440 self._payload_parser.payload.set_exception( 

441 self.payload_exception(str(exc)) 

442 ) 

443 else: 

444 self._payload_parser.payload.set_exception(exc) 

445 

446 eof = True 

447 data = b"" 

448 

449 if eof: 

450 start_pos = 0 

451 data_len = len(data) 

452 self._payload_parser = None 

453 continue 

454 else: 

455 break 

456 

457 if data and start_pos < data_len: 

458 data = data[start_pos:] 

459 else: 

460 data = EMPTY 

461 

462 return messages, self._upgraded, data 

463 

464 def parse_headers( 

465 self, lines: List[bytes] 

466 ) -> Tuple[ 

467 "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool 

468 ]: 

469 """Parses RFC 5322 headers from a stream. 

470 

471 Line continuations are supported. Returns list of header name 

472 and value pairs. Header name is in upper case. 

473 """ 

474 headers, raw_headers = self._headers_parser.parse_headers(lines) 

475 close_conn = None 

476 encoding = None 

477 upgrade = False 

478 chunked = False 

479 

480 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6 

481 # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf 

482 singletons = ( 

483 hdrs.CONTENT_LENGTH, 

484 hdrs.CONTENT_LOCATION, 

485 hdrs.CONTENT_RANGE, 

486 hdrs.CONTENT_TYPE, 

487 hdrs.ETAG, 

488 hdrs.HOST, 

489 hdrs.MAX_FORWARDS, 

490 hdrs.SERVER, 

491 hdrs.TRANSFER_ENCODING, 

492 hdrs.USER_AGENT, 

493 ) 

494 bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None) 

495 if bad_hdr is not None: 

496 raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.") 

497 

498 # keep-alive 

499 conn = headers.get(hdrs.CONNECTION) 

500 if conn: 

501 v = conn.lower() 

502 if v == "close": 

503 close_conn = True 

504 elif v == "keep-alive": 

505 close_conn = False 

506 # https://www.rfc-editor.org/rfc/rfc9110.html#name-101-switching-protocols 

507 elif v == "upgrade" and headers.get(hdrs.UPGRADE): 

508 upgrade = True 

509 

510 # encoding 

511 enc = headers.get(hdrs.CONTENT_ENCODING) 

512 if enc: 

513 enc = enc.lower() 

514 if enc in ("gzip", "deflate", "br"): 

515 encoding = enc 

516 

517 # chunking 

518 te = headers.get(hdrs.TRANSFER_ENCODING) 

519 if te is not None: 

520 if "chunked" == te.lower(): 

521 chunked = True 

522 else: 

523 raise BadHttpMessage("Request has invalid `Transfer-Encoding`") 

524 

525 if hdrs.CONTENT_LENGTH in headers: 

526 raise BadHttpMessage( 

527 "Transfer-Encoding can't be present with Content-Length", 

528 ) 

529 

530 return (headers, raw_headers, close_conn, encoding, upgrade, chunked) 

531 

532 def set_upgraded(self, val: bool) -> None: 

533 """Set connection upgraded (to websocket) mode. 

534 

535 :param bool val: new state. 

536 """ 

537 self._upgraded = val 

538 

539 

540class HttpRequestParser(HttpParser[RawRequestMessage]): 

541 """Read request status line. 

542 

543 Exception .http_exceptions.BadStatusLine 

544 could be raised in case of any errors in status line. 

545 Returns RawRequestMessage. 

546 """ 

547 

548 def parse_message(self, lines: List[bytes]) -> RawRequestMessage: 

549 # request line 

550 line = lines[0].decode("utf-8", "surrogateescape") 

551 try: 

552 method, path, version = line.split(" ", maxsplit=2) 

553 except ValueError: 

554 raise BadStatusLine(line) from None 

555 

556 if len(path) > self.max_line_size: 

557 raise LineTooLong( 

558 "Status line is too long", str(self.max_line_size), str(len(path)) 

559 ) 

560 

561 # method 

562 if not METHRE.fullmatch(method): 

563 raise BadStatusLine(method) 

564 

565 # version 

566 match = VERSRE.fullmatch(version) 

567 if match is None: 

568 raise BadStatusLine(line) 

569 version_o = HttpVersion(int(match.group(1)), int(match.group(2))) 

570 

571 if method == "CONNECT": 

572 # authority-form, 

573 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3 

574 url = URL.build(authority=path, encoded=True) 

575 elif path.startswith("/"): 

576 # origin-form, 

577 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1 

578 path_part, _hash_separator, url_fragment = path.partition("#") 

579 path_part, _question_mark_separator, qs_part = path_part.partition("?") 

580 

581 # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based 

582 # NOTE: parser does, otherwise it results into the same 

583 # NOTE: HTTP Request-Line input producing different 

584 # NOTE: `yarl.URL()` objects 

585 url = URL.build( 

586 path=path_part, 

587 query_string=qs_part, 

588 fragment=url_fragment, 

589 encoded=True, 

590 ) 

591 elif path == "*" and method == "OPTIONS": 

592 # asterisk-form, 

593 url = URL(path, encoded=True) 

594 else: 

595 # absolute-form for proxy maybe, 

596 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2 

597 url = URL(path, encoded=True) 

598 if url.scheme == "": 

599 # not absolute-form 

600 raise InvalidURLError( 

601 path.encode(errors="surrogateescape").decode("latin1") 

602 ) 

603 

604 # read headers 

605 ( 

606 headers, 

607 raw_headers, 

608 close, 

609 compression, 

610 upgrade, 

611 chunked, 

612 ) = self.parse_headers(lines) 

613 

614 if close is None: # then the headers weren't set in the request 

615 if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close 

616 close = True 

617 else: # HTTP 1.1 must ask to close. 

618 close = False 

619 

620 return RawRequestMessage( 

621 method, 

622 path, 

623 version_o, 

624 headers, 

625 raw_headers, 

626 close, 

627 compression, 

628 upgrade, 

629 chunked, 

630 url, 

631 ) 

632 

633 

634class HttpResponseParser(HttpParser[RawResponseMessage]): 

635 """Read response status line and headers. 

636 

637 BadStatusLine could be raised in case of any errors in status line. 

638 Returns RawResponseMessage. 

639 """ 

640 

641 # Lax mode should only be enabled on response parser. 

642 lax = not DEBUG 

643 

644 def feed_data( 

645 self, 

646 data: bytes, 

647 SEP: Optional[_SEP] = None, 

648 *args: Any, 

649 **kwargs: Any, 

650 ) -> Tuple[List[Tuple[RawResponseMessage, StreamReader]], bool, bytes]: 

651 if SEP is None: 

652 SEP = b"\r\n" if DEBUG else b"\n" 

653 return super().feed_data(data, SEP, *args, **kwargs) 

654 

655 def parse_message(self, lines: List[bytes]) -> RawResponseMessage: 

656 line = lines[0].decode("utf-8", "surrogateescape") 

657 try: 

658 version, status = line.split(maxsplit=1) 

659 except ValueError: 

660 raise BadStatusLine(line) from None 

661 

662 try: 

663 status, reason = status.split(maxsplit=1) 

664 except ValueError: 

665 status = status.strip() 

666 reason = "" 

667 

668 if len(reason) > self.max_line_size: 

669 raise LineTooLong( 

670 "Status line is too long", str(self.max_line_size), str(len(reason)) 

671 ) 

672 

673 # version 

674 match = VERSRE.fullmatch(version) 

675 if match is None: 

676 raise BadStatusLine(line) 

677 version_o = HttpVersion(int(match.group(1)), int(match.group(2))) 

678 

679 # The status code is a three-digit number 

680 if len(status) != 3 or not status.isdecimal(): 

681 raise BadStatusLine(line) 

682 status_i = int(status) 

683 

684 # read headers 

685 ( 

686 headers, 

687 raw_headers, 

688 close, 

689 compression, 

690 upgrade, 

691 chunked, 

692 ) = self.parse_headers(lines) 

693 

694 if close is None: 

695 close = version_o <= HttpVersion10 

696 

697 return RawResponseMessage( 

698 version_o, 

699 status_i, 

700 reason.strip(), 

701 headers, 

702 raw_headers, 

703 close, 

704 compression, 

705 upgrade, 

706 chunked, 

707 ) 

708 

709 

710class HttpPayloadParser: 

711 def __init__( 

712 self, 

713 payload: StreamReader, 

714 length: Optional[int] = None, 

715 chunked: bool = False, 

716 compression: Optional[str] = None, 

717 code: Optional[int] = None, 

718 method: Optional[str] = None, 

719 readall: bool = False, 

720 response_with_body: bool = True, 

721 auto_decompress: bool = True, 

722 lax: bool = False, 

723 ) -> None: 

724 self._length = 0 

725 self._type = ParseState.PARSE_NONE 

726 self._chunk = ChunkState.PARSE_CHUNKED_SIZE 

727 self._chunk_size = 0 

728 self._chunk_tail = b"" 

729 self._auto_decompress = auto_decompress 

730 self._lax = lax 

731 self.done = False 

732 

733 # payload decompression wrapper 

734 if response_with_body and compression and self._auto_decompress: 

735 real_payload: Union[StreamReader, DeflateBuffer] = DeflateBuffer( 

736 payload, compression 

737 ) 

738 else: 

739 real_payload = payload 

740 

741 # payload parser 

742 if not response_with_body: 

743 # don't parse payload if it's not expected to be received 

744 self._type = ParseState.PARSE_NONE 

745 real_payload.feed_eof() 

746 self.done = True 

747 

748 elif chunked: 

749 self._type = ParseState.PARSE_CHUNKED 

750 elif length is not None: 

751 self._type = ParseState.PARSE_LENGTH 

752 self._length = length 

753 if self._length == 0: 

754 real_payload.feed_eof() 

755 self.done = True 

756 else: 

757 if readall and code != 204: 

758 self._type = ParseState.PARSE_UNTIL_EOF 

759 elif method in ("PUT", "POST"): 

760 internal_logger.warning( # pragma: no cover 

761 "Content-Length or Transfer-Encoding header is required" 

762 ) 

763 self._type = ParseState.PARSE_NONE 

764 real_payload.feed_eof() 

765 self.done = True 

766 

767 self.payload = real_payload 

768 

769 def feed_eof(self) -> None: 

770 if self._type == ParseState.PARSE_UNTIL_EOF: 

771 self.payload.feed_eof() 

772 elif self._type == ParseState.PARSE_LENGTH: 

773 raise ContentLengthError( 

774 "Not enough data for satisfy content length header." 

775 ) 

776 elif self._type == ParseState.PARSE_CHUNKED: 

777 raise TransferEncodingError( 

778 "Not enough data for satisfy transfer length header." 

779 ) 

780 

781 def feed_data( 

782 self, chunk: bytes, SEP: _SEP = b"\r\n", CHUNK_EXT: bytes = b";" 

783 ) -> Tuple[bool, bytes]: 

784 # Read specified amount of bytes 

785 if self._type == ParseState.PARSE_LENGTH: 

786 required = self._length 

787 chunk_len = len(chunk) 

788 

789 if required >= chunk_len: 

790 self._length = required - chunk_len 

791 self.payload.feed_data(chunk, chunk_len) 

792 if self._length == 0: 

793 self.payload.feed_eof() 

794 return True, b"" 

795 else: 

796 self._length = 0 

797 self.payload.feed_data(chunk[:required], required) 

798 self.payload.feed_eof() 

799 return True, chunk[required:] 

800 

801 # Chunked transfer encoding parser 

802 elif self._type == ParseState.PARSE_CHUNKED: 

803 if self._chunk_tail: 

804 chunk = self._chunk_tail + chunk 

805 self._chunk_tail = b"" 

806 

807 while chunk: 

808 # read next chunk size 

809 if self._chunk == ChunkState.PARSE_CHUNKED_SIZE: 

810 pos = chunk.find(SEP) 

811 if pos >= 0: 

812 i = chunk.find(CHUNK_EXT, 0, pos) 

813 if i >= 0: 

814 size_b = chunk[:i] # strip chunk-extensions 

815 else: 

816 size_b = chunk[:pos] 

817 

818 if self._lax: # Allow whitespace in lax mode. 

819 size_b = size_b.strip() 

820 

821 if not re.fullmatch(HEXDIGIT, size_b): 

822 exc = TransferEncodingError( 

823 chunk[:pos].decode("ascii", "surrogateescape") 

824 ) 

825 self.payload.set_exception(exc) 

826 raise exc 

827 size = int(bytes(size_b), 16) 

828 

829 chunk = chunk[pos + len(SEP) :] 

830 if size == 0: # eof marker 

831 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS 

832 if self._lax and chunk.startswith(b"\r"): 

833 chunk = chunk[1:] 

834 else: 

835 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK 

836 self._chunk_size = size 

837 self.payload.begin_http_chunk_receiving() 

838 else: 

839 self._chunk_tail = chunk 

840 return False, b"" 

841 

842 # read chunk and feed buffer 

843 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK: 

844 required = self._chunk_size 

845 chunk_len = len(chunk) 

846 

847 if required > chunk_len: 

848 self._chunk_size = required - chunk_len 

849 self.payload.feed_data(chunk, chunk_len) 

850 return False, b"" 

851 else: 

852 self._chunk_size = 0 

853 self.payload.feed_data(chunk[:required], required) 

854 chunk = chunk[required:] 

855 if self._lax and chunk.startswith(b"\r"): 

856 chunk = chunk[1:] 

857 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF 

858 self.payload.end_http_chunk_receiving() 

859 

860 # toss the CRLF at the end of the chunk 

861 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF: 

862 if chunk[: len(SEP)] == SEP: 

863 chunk = chunk[len(SEP) :] 

864 self._chunk = ChunkState.PARSE_CHUNKED_SIZE 

865 else: 

866 self._chunk_tail = chunk 

867 return False, b"" 

868 

869 # if stream does not contain trailer, after 0\r\n 

870 # we should get another \r\n otherwise 

871 # trailers needs to be skipped until \r\n\r\n 

872 if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS: 

873 head = chunk[: len(SEP)] 

874 if head == SEP: 

875 # end of stream 

876 self.payload.feed_eof() 

877 return True, chunk[len(SEP) :] 

878 # Both CR and LF, or only LF may not be received yet. It is 

879 # expected that CRLF or LF will be shown at the very first 

880 # byte next time, otherwise trailers should come. The last 

881 # CRLF which marks the end of response might not be 

882 # contained in the same TCP segment which delivered the 

883 # size indicator. 

884 if not head: 

885 return False, b"" 

886 if head == SEP[:1]: 

887 self._chunk_tail = head 

888 return False, b"" 

889 self._chunk = ChunkState.PARSE_TRAILERS 

890 

891 # read and discard trailer up to the CRLF terminator 

892 if self._chunk == ChunkState.PARSE_TRAILERS: 

893 pos = chunk.find(SEP) 

894 if pos >= 0: 

895 chunk = chunk[pos + len(SEP) :] 

896 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS 

897 else: 

898 self._chunk_tail = chunk 

899 return False, b"" 

900 

901 # Read all bytes until eof 

902 elif self._type == ParseState.PARSE_UNTIL_EOF: 

903 self.payload.feed_data(chunk, len(chunk)) 

904 

905 return False, b"" 

906 

907 

908class DeflateBuffer: 

909 """DeflateStream decompress stream and feed data into specified stream.""" 

910 

911 def __init__(self, out: StreamReader, encoding: Optional[str]) -> None: 

912 self.out = out 

913 self.size = 0 

914 self.encoding = encoding 

915 self._started_decoding = False 

916 

917 self.decompressor: Union[BrotliDecompressor, ZLibDecompressor] 

918 if encoding == "br": 

919 if not HAS_BROTLI: # pragma: no cover 

920 raise ContentEncodingError( 

921 "Can not decode content-encoding: brotli (br). " 

922 "Please install `Brotli`" 

923 ) 

924 self.decompressor = BrotliDecompressor() 

925 else: 

926 self.decompressor = ZLibDecompressor(encoding=encoding) 

927 

928 def set_exception(self, exc: BaseException) -> None: 

929 self.out.set_exception(exc) 

930 

931 def feed_data(self, chunk: bytes, size: int) -> None: 

932 if not size: 

933 return 

934 

935 self.size += size 

936 

937 # RFC1950 

938 # bits 0..3 = CM = 0b1000 = 8 = "deflate" 

939 # bits 4..7 = CINFO = 1..7 = windows size. 

940 if ( 

941 not self._started_decoding 

942 and self.encoding == "deflate" 

943 and chunk[0] & 0xF != 8 

944 ): 

945 # Change the decoder to decompress incorrectly compressed data 

946 # Actually we should issue a warning about non-RFC-compliant data. 

947 self.decompressor = ZLibDecompressor( 

948 encoding=self.encoding, suppress_deflate_header=True 

949 ) 

950 

951 try: 

952 chunk = self.decompressor.decompress_sync(chunk) 

953 except Exception: 

954 raise ContentEncodingError( 

955 "Can not decode content-encoding: %s" % self.encoding 

956 ) 

957 

958 self._started_decoding = True 

959 

960 if chunk: 

961 self.out.feed_data(chunk, len(chunk)) 

962 

963 def feed_eof(self) -> None: 

964 chunk = self.decompressor.flush() 

965 

966 if chunk or self.size > 0: 

967 self.out.feed_data(chunk, len(chunk)) 

968 # decompressor is not brotli unless encoding is "br" 

969 if self.encoding == "deflate" and not self.decompressor.eof: # type: ignore[union-attr] 

970 raise ContentEncodingError("deflate") 

971 

972 self.out.feed_eof() 

973 

974 def begin_http_chunk_receiving(self) -> None: 

975 self.out.begin_http_chunk_receiving() 

976 

977 def end_http_chunk_receiving(self) -> None: 

978 self.out.end_http_chunk_receiving() 

979 

980 

981HttpRequestParserPy = HttpRequestParser 

982HttpResponseParserPy = HttpResponseParser 

983RawRequestMessagePy = RawRequestMessage 

984RawResponseMessagePy = RawResponseMessage 

985 

986try: 

987 if not NO_EXTENSIONS: 

988 from ._http_parser import ( # type: ignore[import-not-found,no-redef] 

989 HttpRequestParser, 

990 HttpResponseParser, 

991 RawRequestMessage, 

992 RawResponseMessage, 

993 ) 

994 

995 HttpRequestParserC = HttpRequestParser 

996 HttpResponseParserC = HttpResponseParser 

997 RawRequestMessageC = RawRequestMessage 

998 RawResponseMessageC = RawResponseMessage 

999except ImportError: # pragma: no cover 

1000 pass