Coverage for /pythoncovmergedfiles/medio/medio/src/aiohttp/aiohttp/http_parser.py: 84%

497 statements  

« prev     ^ index     » next       coverage.py v7.4.1, created at 2024-02-09 06:47 +0000

1import abc 

2import asyncio 

3import re 

4import string 

5from contextlib import suppress 

6from enum import IntEnum 

7from typing import ( 

8 Any, 

9 ClassVar, 

10 Final, 

11 Generic, 

12 List, 

13 Literal, 

14 NamedTuple, 

15 Optional, 

16 Pattern, 

17 Set, 

18 Tuple, 

19 Type, 

20 TypeVar, 

21 Union, 

22) 

23 

24from multidict import CIMultiDict, CIMultiDictProxy, istr 

25from yarl import URL 

26 

27from . import hdrs 

28from .base_protocol import BaseProtocol 

29from .compression_utils import HAS_BROTLI, BrotliDecompressor, ZLibDecompressor 

30from .helpers import ( 

31 DEBUG, 

32 NO_EXTENSIONS, 

33 BaseTimerContext, 

34 method_must_be_empty_body, 

35 status_code_must_be_empty_body, 

36) 

37from .http_exceptions import ( 

38 BadHttpMessage, 

39 BadStatusLine, 

40 ContentEncodingError, 

41 ContentLengthError, 

42 InvalidHeader, 

43 InvalidURLError, 

44 LineTooLong, 

45 TransferEncodingError, 

46) 

47from .http_writer import HttpVersion, HttpVersion10 

48from .log import internal_logger 

49from .streams import EMPTY_PAYLOAD, StreamReader 

50from .typedefs import RawHeaders 

51 

52__all__ = ( 

53 "HeadersParser", 

54 "HttpParser", 

55 "HttpRequestParser", 

56 "HttpResponseParser", 

57 "RawRequestMessage", 

58 "RawResponseMessage", 

59) 

60 

61_SEP = Literal[b"\r\n", b"\n"] 

62 

63ASCIISET: Final[Set[str]] = set(string.printable) 

64 

65# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview 

66# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens 

67# 

68# method = token 

69# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / 

70# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA 

71# token = 1*tchar 

72_TCHAR_SPECIALS: Final[str] = re.escape("!#$%&'*+-.^_`|~") 

73TOKENRE: Final[Pattern[str]] = re.compile(f"[0-9A-Za-z{_TCHAR_SPECIALS}]+") 

74VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d)\.(\d)", re.ASCII) 

75DIGITS: Final[Pattern[str]] = re.compile(r"\d+", re.ASCII) 

76HEXDIGITS: Final[Pattern[bytes]] = re.compile(rb"[0-9a-fA-F]+") 

77 

78 

79class RawRequestMessage(NamedTuple): 

80 method: str 

81 path: str 

82 version: HttpVersion 

83 headers: CIMultiDictProxy[str] 

84 raw_headers: RawHeaders 

85 should_close: bool 

86 compression: Optional[str] 

87 upgrade: bool 

88 chunked: bool 

89 url: URL 

90 

91 

92class RawResponseMessage(NamedTuple): 

93 version: HttpVersion 

94 code: int 

95 reason: str 

96 headers: CIMultiDictProxy[str] 

97 raw_headers: RawHeaders 

98 should_close: bool 

99 compression: Optional[str] 

100 upgrade: bool 

101 chunked: bool 

102 

103 

104_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage) 

105 

106 

107class ParseState(IntEnum): 

108 PARSE_NONE = 0 

109 PARSE_LENGTH = 1 

110 PARSE_CHUNKED = 2 

111 PARSE_UNTIL_EOF = 3 

112 

113 

114class ChunkState(IntEnum): 

115 PARSE_CHUNKED_SIZE = 0 

116 PARSE_CHUNKED_CHUNK = 1 

117 PARSE_CHUNKED_CHUNK_EOF = 2 

118 PARSE_MAYBE_TRAILERS = 3 

119 PARSE_TRAILERS = 4 

120 

121 

122class HeadersParser: 

123 def __init__( 

124 self, 

125 max_line_size: int = 8190, 

126 max_field_size: int = 8190, 

127 ) -> None: 

128 self.max_line_size = max_line_size 

129 self.max_field_size = max_field_size 

130 

131 def parse_headers( 

132 self, lines: List[bytes] 

133 ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]: 

134 headers: CIMultiDict[str] = CIMultiDict() 

135 # note: "raw" does not mean inclusion of OWS before/after the field value 

136 raw_headers = [] 

137 

138 lines_idx = 1 

139 line = lines[1] 

140 line_count = len(lines) 

141 

142 while line: 

143 # Parse initial header name : value pair. 

144 try: 

145 bname, bvalue = line.split(b":", 1) 

146 except ValueError: 

147 raise InvalidHeader(line) from None 

148 

149 if len(bname) == 0: 

150 raise InvalidHeader(bname) 

151 

152 # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2 

153 if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"} 

154 raise InvalidHeader(line) 

155 

156 bvalue = bvalue.lstrip(b" \t") 

157 if len(bname) > self.max_field_size: 

158 raise LineTooLong( 

159 "request header name {}".format( 

160 bname.decode("utf8", "backslashreplace") 

161 ), 

162 str(self.max_field_size), 

163 str(len(bname)), 

164 ) 

165 name = bname.decode("utf-8", "surrogateescape") 

166 if not TOKENRE.fullmatch(name): 

167 raise InvalidHeader(bname) 

168 

169 header_length = len(bvalue) 

170 

171 # next line 

172 lines_idx += 1 

173 line = lines[lines_idx] 

174 

175 # consume continuation lines 

176 continuation = line and line[0] in (32, 9) # (' ', '\t') 

177 

178 # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding 

179 if continuation: 

180 bvalue_lst = [bvalue] 

181 while continuation: 

182 header_length += len(line) 

183 if header_length > self.max_field_size: 

184 raise LineTooLong( 

185 "request header field {}".format( 

186 bname.decode("utf8", "backslashreplace") 

187 ), 

188 str(self.max_field_size), 

189 str(header_length), 

190 ) 

191 bvalue_lst.append(line) 

192 

193 # next line 

194 lines_idx += 1 

195 if lines_idx < line_count: 

196 line = lines[lines_idx] 

197 if line: 

198 continuation = line[0] in (32, 9) # (' ', '\t') 

199 else: 

200 line = b"" 

201 break 

202 bvalue = b"".join(bvalue_lst) 

203 else: 

204 if header_length > self.max_field_size: 

205 raise LineTooLong( 

206 "request header field {}".format( 

207 bname.decode("utf8", "backslashreplace") 

208 ), 

209 str(self.max_field_size), 

210 str(header_length), 

211 ) 

212 

213 bvalue = bvalue.strip(b" \t") 

214 value = bvalue.decode("utf-8", "surrogateescape") 

215 

216 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5 

217 if "\n" in value or "\r" in value or "\x00" in value: 

218 raise InvalidHeader(bvalue) 

219 

220 headers.add(name, value) 

221 raw_headers.append((bname, bvalue)) 

222 

223 return (CIMultiDictProxy(headers), tuple(raw_headers)) 

224 

225 

226class HttpParser(abc.ABC, Generic[_MsgT]): 

227 lax: ClassVar[bool] = False 

228 

229 def __init__( 

230 self, 

231 protocol: BaseProtocol, 

232 loop: asyncio.AbstractEventLoop, 

233 limit: int, 

234 max_line_size: int = 8190, 

235 max_field_size: int = 8190, 

236 timer: Optional[BaseTimerContext] = None, 

237 code: Optional[int] = None, 

238 method: Optional[str] = None, 

239 readall: bool = False, 

240 payload_exception: Optional[Type[BaseException]] = None, 

241 response_with_body: bool = True, 

242 read_until_eof: bool = False, 

243 auto_decompress: bool = True, 

244 ) -> None: 

245 self.protocol = protocol 

246 self.loop = loop 

247 self.max_line_size = max_line_size 

248 self.max_field_size = max_field_size 

249 self.timer = timer 

250 self.code = code 

251 self.method = method 

252 self.readall = readall 

253 self.payload_exception = payload_exception 

254 self.response_with_body = response_with_body 

255 self.read_until_eof = read_until_eof 

256 

257 self._lines: List[bytes] = [] 

258 self._tail = b"" 

259 self._upgraded = False 

260 self._payload = None 

261 self._payload_parser: Optional[HttpPayloadParser] = None 

262 self._auto_decompress = auto_decompress 

263 self._limit = limit 

264 self._headers_parser = HeadersParser(max_line_size, max_field_size) 

265 

266 @abc.abstractmethod 

267 def parse_message(self, lines: List[bytes]) -> _MsgT: 

268 pass 

269 

270 def feed_eof(self) -> Optional[_MsgT]: 

271 if self._payload_parser is not None: 

272 self._payload_parser.feed_eof() 

273 self._payload_parser = None 

274 else: 

275 # try to extract partial message 

276 if self._tail: 

277 self._lines.append(self._tail) 

278 

279 if self._lines: 

280 if self._lines[-1] != "\r\n": 

281 self._lines.append(b"") 

282 with suppress(Exception): 

283 return self.parse_message(self._lines) 

284 return None 

285 

286 def feed_data( 

287 self, 

288 data: bytes, 

289 SEP: _SEP = b"\r\n", 

290 EMPTY: bytes = b"", 

291 CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH, 

292 METH_CONNECT: str = hdrs.METH_CONNECT, 

293 SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1, 

294 ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]: 

295 messages = [] 

296 

297 if self._tail: 

298 data, self._tail = self._tail + data, b"" 

299 

300 data_len = len(data) 

301 start_pos = 0 

302 loop = self.loop 

303 

304 while start_pos < data_len: 

305 # read HTTP message (request/response line + headers), \r\n\r\n 

306 # and split by lines 

307 if self._payload_parser is None and not self._upgraded: 

308 pos = data.find(SEP, start_pos) 

309 # consume \r\n 

310 if pos == start_pos and not self._lines: 

311 start_pos = pos + len(SEP) 

312 continue 

313 

314 if pos >= start_pos: 

315 # line found 

316 line = data[start_pos:pos] 

317 if SEP == b"\n": # For lax response parsing 

318 line = line.rstrip(b"\r") 

319 self._lines.append(line) 

320 start_pos = pos + len(SEP) 

321 

322 # \r\n\r\n found 

323 if self._lines[-1] == EMPTY: 

324 try: 

325 msg: _MsgT = self.parse_message(self._lines) 

326 finally: 

327 self._lines.clear() 

328 

329 def get_content_length() -> Optional[int]: 

330 # payload length 

331 length_hdr = msg.headers.get(CONTENT_LENGTH) 

332 if length_hdr is None: 

333 return None 

334 

335 # Shouldn't allow +/- or other number formats. 

336 # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2 

337 # msg.headers is already stripped of leading/trailing wsp 

338 if not DIGITS.fullmatch(length_hdr): 

339 raise InvalidHeader(CONTENT_LENGTH) 

340 

341 return int(length_hdr) 

342 

343 length = get_content_length() 

344 # do not support old websocket spec 

345 if SEC_WEBSOCKET_KEY1 in msg.headers: 

346 raise InvalidHeader(SEC_WEBSOCKET_KEY1) 

347 

348 self._upgraded = msg.upgrade 

349 

350 method = getattr(msg, "method", self.method) 

351 # code is only present on responses 

352 code = getattr(msg, "code", 0) 

353 

354 assert self.protocol is not None 

355 # calculate payload 

356 empty_body = status_code_must_be_empty_body(code) or bool( 

357 method and method_must_be_empty_body(method) 

358 ) 

359 if not empty_body and ( 

360 (length is not None and length > 0) 

361 or msg.chunked 

362 and not msg.upgrade 

363 ): 

364 payload = StreamReader( 

365 self.protocol, 

366 timer=self.timer, 

367 loop=loop, 

368 limit=self._limit, 

369 ) 

370 payload_parser = HttpPayloadParser( 

371 payload, 

372 length=length, 

373 chunked=msg.chunked, 

374 method=method, 

375 compression=msg.compression, 

376 code=self.code, 

377 readall=self.readall, 

378 response_with_body=self.response_with_body, 

379 auto_decompress=self._auto_decompress, 

380 lax=self.lax, 

381 ) 

382 if not payload_parser.done: 

383 self._payload_parser = payload_parser 

384 elif method == METH_CONNECT: 

385 assert isinstance(msg, RawRequestMessage) 

386 payload = StreamReader( 

387 self.protocol, 

388 timer=self.timer, 

389 loop=loop, 

390 limit=self._limit, 

391 ) 

392 self._upgraded = True 

393 self._payload_parser = HttpPayloadParser( 

394 payload, 

395 method=msg.method, 

396 compression=msg.compression, 

397 readall=True, 

398 auto_decompress=self._auto_decompress, 

399 lax=self.lax, 

400 ) 

401 elif not empty_body and length is None and self.read_until_eof: 

402 payload = StreamReader( 

403 self.protocol, 

404 timer=self.timer, 

405 loop=loop, 

406 limit=self._limit, 

407 ) 

408 payload_parser = HttpPayloadParser( 

409 payload, 

410 length=length, 

411 chunked=msg.chunked, 

412 method=method, 

413 compression=msg.compression, 

414 code=self.code, 

415 readall=True, 

416 response_with_body=self.response_with_body, 

417 auto_decompress=self._auto_decompress, 

418 lax=self.lax, 

419 ) 

420 if not payload_parser.done: 

421 self._payload_parser = payload_parser 

422 else: 

423 payload = EMPTY_PAYLOAD 

424 

425 messages.append((msg, payload)) 

426 else: 

427 self._tail = data[start_pos:] 

428 data = EMPTY 

429 break 

430 

431 # no parser, just store 

432 elif self._payload_parser is None and self._upgraded: 

433 assert not self._lines 

434 break 

435 

436 # feed payload 

437 elif data and start_pos < data_len: 

438 assert not self._lines 

439 assert self._payload_parser is not None 

440 try: 

441 eof, data = self._payload_parser.feed_data(data[start_pos:], SEP) 

442 except BaseException as exc: 

443 if self.payload_exception is not None: 

444 self._payload_parser.payload.set_exception( 

445 self.payload_exception(str(exc)) 

446 ) 

447 else: 

448 self._payload_parser.payload.set_exception(exc) 

449 

450 eof = True 

451 data = b"" 

452 

453 if eof: 

454 start_pos = 0 

455 data_len = len(data) 

456 self._payload_parser = None 

457 continue 

458 else: 

459 break 

460 

461 if data and start_pos < data_len: 

462 data = data[start_pos:] 

463 else: 

464 data = EMPTY 

465 

466 return messages, self._upgraded, data 

467 

468 def parse_headers( 

469 self, lines: List[bytes] 

470 ) -> Tuple[ 

471 "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool 

472 ]: 

473 """Parses RFC 5322 headers from a stream. 

474 

475 Line continuations are supported. Returns list of header name 

476 and value pairs. Header name is in upper case. 

477 """ 

478 headers, raw_headers = self._headers_parser.parse_headers(lines) 

479 close_conn = None 

480 encoding = None 

481 upgrade = False 

482 chunked = False 

483 

484 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6 

485 # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf 

486 singletons = ( 

487 hdrs.CONTENT_LENGTH, 

488 hdrs.CONTENT_LOCATION, 

489 hdrs.CONTENT_RANGE, 

490 hdrs.CONTENT_TYPE, 

491 hdrs.ETAG, 

492 hdrs.HOST, 

493 hdrs.MAX_FORWARDS, 

494 hdrs.SERVER, 

495 hdrs.TRANSFER_ENCODING, 

496 hdrs.USER_AGENT, 

497 ) 

498 bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None) 

499 if bad_hdr is not None: 

500 raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.") 

501 

502 # keep-alive 

503 conn = headers.get(hdrs.CONNECTION) 

504 if conn: 

505 v = conn.lower() 

506 if v == "close": 

507 close_conn = True 

508 elif v == "keep-alive": 

509 close_conn = False 

510 # https://www.rfc-editor.org/rfc/rfc9110.html#name-101-switching-protocols 

511 elif v == "upgrade" and headers.get(hdrs.UPGRADE): 

512 upgrade = True 

513 

514 # encoding 

515 enc = headers.get(hdrs.CONTENT_ENCODING) 

516 if enc: 

517 enc = enc.lower() 

518 if enc in ("gzip", "deflate", "br"): 

519 encoding = enc 

520 

521 # chunking 

522 te = headers.get(hdrs.TRANSFER_ENCODING) 

523 if te is not None: 

524 if "chunked" == te.lower(): 

525 chunked = True 

526 else: 

527 raise BadHttpMessage("Request has invalid `Transfer-Encoding`") 

528 

529 if hdrs.CONTENT_LENGTH in headers: 

530 raise BadHttpMessage( 

531 "Transfer-Encoding can't be present with Content-Length", 

532 ) 

533 

534 return (headers, raw_headers, close_conn, encoding, upgrade, chunked) 

535 

536 def set_upgraded(self, val: bool) -> None: 

537 """Set connection upgraded (to websocket) mode. 

538 

539 :param bool val: new state. 

540 """ 

541 self._upgraded = val 

542 

543 

544class HttpRequestParser(HttpParser[RawRequestMessage]): 

545 """Read request status line. 

546 

547 Exception .http_exceptions.BadStatusLine 

548 could be raised in case of any errors in status line. 

549 Returns RawRequestMessage. 

550 """ 

551 

552 def parse_message(self, lines: List[bytes]) -> RawRequestMessage: 

553 # request line 

554 line = lines[0].decode("utf-8", "surrogateescape") 

555 try: 

556 method, path, version = line.split(" ", maxsplit=2) 

557 except ValueError: 

558 raise BadStatusLine(line) from None 

559 

560 if len(path) > self.max_line_size: 

561 raise LineTooLong( 

562 "Status line is too long", str(self.max_line_size), str(len(path)) 

563 ) 

564 

565 # method 

566 if not TOKENRE.fullmatch(method): 

567 raise BadStatusLine(method) 

568 

569 # version 

570 match = VERSRE.fullmatch(version) 

571 if match is None: 

572 raise BadStatusLine(line) 

573 version_o = HttpVersion(int(match.group(1)), int(match.group(2))) 

574 

575 if method == "CONNECT": 

576 # authority-form, 

577 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3 

578 url = URL.build(authority=path, encoded=True) 

579 elif path.startswith("/"): 

580 # origin-form, 

581 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1 

582 path_part, _hash_separator, url_fragment = path.partition("#") 

583 path_part, _question_mark_separator, qs_part = path_part.partition("?") 

584 

585 # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based 

586 # NOTE: parser does, otherwise it results into the same 

587 # NOTE: HTTP Request-Line input producing different 

588 # NOTE: `yarl.URL()` objects 

589 url = URL.build( 

590 path=path_part, 

591 query_string=qs_part, 

592 fragment=url_fragment, 

593 encoded=True, 

594 ) 

595 elif path == "*" and method == "OPTIONS": 

596 # asterisk-form, 

597 url = URL(path, encoded=True) 

598 else: 

599 # absolute-form for proxy maybe, 

600 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2 

601 url = URL(path, encoded=True) 

602 if url.scheme == "": 

603 # not absolute-form 

604 raise InvalidURLError( 

605 path.encode(errors="surrogateescape").decode("latin1") 

606 ) 

607 

608 # read headers 

609 ( 

610 headers, 

611 raw_headers, 

612 close, 

613 compression, 

614 upgrade, 

615 chunked, 

616 ) = self.parse_headers(lines) 

617 

618 if close is None: # then the headers weren't set in the request 

619 if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close 

620 close = True 

621 else: # HTTP 1.1 must ask to close. 

622 close = False 

623 

624 return RawRequestMessage( 

625 method, 

626 path, 

627 version_o, 

628 headers, 

629 raw_headers, 

630 close, 

631 compression, 

632 upgrade, 

633 chunked, 

634 url, 

635 ) 

636 

637 

638class HttpResponseParser(HttpParser[RawResponseMessage]): 

639 """Read response status line and headers. 

640 

641 BadStatusLine could be raised in case of any errors in status line. 

642 Returns RawResponseMessage. 

643 """ 

644 

645 # Lax mode should only be enabled on response parser. 

646 lax = not DEBUG 

647 

648 def feed_data( 

649 self, 

650 data: bytes, 

651 SEP: Optional[_SEP] = None, 

652 *args: Any, 

653 **kwargs: Any, 

654 ) -> Tuple[List[Tuple[RawResponseMessage, StreamReader]], bool, bytes]: 

655 if SEP is None: 

656 SEP = b"\r\n" if DEBUG else b"\n" 

657 return super().feed_data(data, SEP, *args, **kwargs) 

658 

659 def parse_message(self, lines: List[bytes]) -> RawResponseMessage: 

660 line = lines[0].decode("utf-8", "surrogateescape") 

661 try: 

662 version, status = line.split(maxsplit=1) 

663 except ValueError: 

664 raise BadStatusLine(line) from None 

665 

666 try: 

667 status, reason = status.split(maxsplit=1) 

668 except ValueError: 

669 status = status.strip() 

670 reason = "" 

671 

672 if len(reason) > self.max_line_size: 

673 raise LineTooLong( 

674 "Status line is too long", str(self.max_line_size), str(len(reason)) 

675 ) 

676 

677 # version 

678 match = VERSRE.fullmatch(version) 

679 if match is None: 

680 raise BadStatusLine(line) 

681 version_o = HttpVersion(int(match.group(1)), int(match.group(2))) 

682 

683 # The status code is a three-digit ASCII number, no padding 

684 if len(status) != 3 or not DIGITS.fullmatch(status): 

685 raise BadStatusLine(line) 

686 status_i = int(status) 

687 

688 # read headers 

689 ( 

690 headers, 

691 raw_headers, 

692 close, 

693 compression, 

694 upgrade, 

695 chunked, 

696 ) = self.parse_headers(lines) 

697 

698 if close is None: 

699 close = version_o <= HttpVersion10 

700 

701 return RawResponseMessage( 

702 version_o, 

703 status_i, 

704 reason.strip(), 

705 headers, 

706 raw_headers, 

707 close, 

708 compression, 

709 upgrade, 

710 chunked, 

711 ) 

712 

713 

714class HttpPayloadParser: 

715 def __init__( 

716 self, 

717 payload: StreamReader, 

718 length: Optional[int] = None, 

719 chunked: bool = False, 

720 compression: Optional[str] = None, 

721 code: Optional[int] = None, 

722 method: Optional[str] = None, 

723 readall: bool = False, 

724 response_with_body: bool = True, 

725 auto_decompress: bool = True, 

726 lax: bool = False, 

727 ) -> None: 

728 self._length = 0 

729 self._type = ParseState.PARSE_NONE 

730 self._chunk = ChunkState.PARSE_CHUNKED_SIZE 

731 self._chunk_size = 0 

732 self._chunk_tail = b"" 

733 self._auto_decompress = auto_decompress 

734 self._lax = lax 

735 self.done = False 

736 

737 # payload decompression wrapper 

738 if response_with_body and compression and self._auto_decompress: 

739 real_payload: Union[StreamReader, DeflateBuffer] = DeflateBuffer( 

740 payload, compression 

741 ) 

742 else: 

743 real_payload = payload 

744 

745 # payload parser 

746 if not response_with_body: 

747 # don't parse payload if it's not expected to be received 

748 self._type = ParseState.PARSE_NONE 

749 real_payload.feed_eof() 

750 self.done = True 

751 

752 elif chunked: 

753 self._type = ParseState.PARSE_CHUNKED 

754 elif length is not None: 

755 self._type = ParseState.PARSE_LENGTH 

756 self._length = length 

757 if self._length == 0: 

758 real_payload.feed_eof() 

759 self.done = True 

760 else: 

761 if readall and code != 204: 

762 self._type = ParseState.PARSE_UNTIL_EOF 

763 elif method in ("PUT", "POST"): 

764 internal_logger.warning( # pragma: no cover 

765 "Content-Length or Transfer-Encoding header is required" 

766 ) 

767 self._type = ParseState.PARSE_NONE 

768 real_payload.feed_eof() 

769 self.done = True 

770 

771 self.payload = real_payload 

772 

773 def feed_eof(self) -> None: 

774 if self._type == ParseState.PARSE_UNTIL_EOF: 

775 self.payload.feed_eof() 

776 elif self._type == ParseState.PARSE_LENGTH: 

777 raise ContentLengthError( 

778 "Not enough data for satisfy content length header." 

779 ) 

780 elif self._type == ParseState.PARSE_CHUNKED: 

781 raise TransferEncodingError( 

782 "Not enough data for satisfy transfer length header." 

783 ) 

784 

785 def feed_data( 

786 self, chunk: bytes, SEP: _SEP = b"\r\n", CHUNK_EXT: bytes = b";" 

787 ) -> Tuple[bool, bytes]: 

788 # Read specified amount of bytes 

789 if self._type == ParseState.PARSE_LENGTH: 

790 required = self._length 

791 chunk_len = len(chunk) 

792 

793 if required >= chunk_len: 

794 self._length = required - chunk_len 

795 self.payload.feed_data(chunk, chunk_len) 

796 if self._length == 0: 

797 self.payload.feed_eof() 

798 return True, b"" 

799 else: 

800 self._length = 0 

801 self.payload.feed_data(chunk[:required], required) 

802 self.payload.feed_eof() 

803 return True, chunk[required:] 

804 

805 # Chunked transfer encoding parser 

806 elif self._type == ParseState.PARSE_CHUNKED: 

807 if self._chunk_tail: 

808 chunk = self._chunk_tail + chunk 

809 self._chunk_tail = b"" 

810 

811 while chunk: 

812 # read next chunk size 

813 if self._chunk == ChunkState.PARSE_CHUNKED_SIZE: 

814 pos = chunk.find(SEP) 

815 if pos >= 0: 

816 i = chunk.find(CHUNK_EXT, 0, pos) 

817 if i >= 0: 

818 size_b = chunk[:i] # strip chunk-extensions 

819 else: 

820 size_b = chunk[:pos] 

821 

822 if self._lax: # Allow whitespace in lax mode. 

823 size_b = size_b.strip() 

824 

825 if not re.fullmatch(HEXDIGITS, size_b): 

826 exc = TransferEncodingError( 

827 chunk[:pos].decode("ascii", "surrogateescape") 

828 ) 

829 self.payload.set_exception(exc) 

830 raise exc 

831 size = int(bytes(size_b), 16) 

832 

833 chunk = chunk[pos + len(SEP) :] 

834 if size == 0: # eof marker 

835 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS 

836 if self._lax and chunk.startswith(b"\r"): 

837 chunk = chunk[1:] 

838 else: 

839 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK 

840 self._chunk_size = size 

841 self.payload.begin_http_chunk_receiving() 

842 else: 

843 self._chunk_tail = chunk 

844 return False, b"" 

845 

846 # read chunk and feed buffer 

847 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK: 

848 required = self._chunk_size 

849 chunk_len = len(chunk) 

850 

851 if required > chunk_len: 

852 self._chunk_size = required - chunk_len 

853 self.payload.feed_data(chunk, chunk_len) 

854 return False, b"" 

855 else: 

856 self._chunk_size = 0 

857 self.payload.feed_data(chunk[:required], required) 

858 chunk = chunk[required:] 

859 if self._lax and chunk.startswith(b"\r"): 

860 chunk = chunk[1:] 

861 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF 

862 self.payload.end_http_chunk_receiving() 

863 

864 # toss the CRLF at the end of the chunk 

865 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF: 

866 if chunk[: len(SEP)] == SEP: 

867 chunk = chunk[len(SEP) :] 

868 self._chunk = ChunkState.PARSE_CHUNKED_SIZE 

869 else: 

870 self._chunk_tail = chunk 

871 return False, b"" 

872 

873 # if stream does not contain trailer, after 0\r\n 

874 # we should get another \r\n otherwise 

875 # trailers needs to be skipped until \r\n\r\n 

876 if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS: 

877 head = chunk[: len(SEP)] 

878 if head == SEP: 

879 # end of stream 

880 self.payload.feed_eof() 

881 return True, chunk[len(SEP) :] 

882 # Both CR and LF, or only LF may not be received yet. It is 

883 # expected that CRLF or LF will be shown at the very first 

884 # byte next time, otherwise trailers should come. The last 

885 # CRLF which marks the end of response might not be 

886 # contained in the same TCP segment which delivered the 

887 # size indicator. 

888 if not head: 

889 return False, b"" 

890 if head == SEP[:1]: 

891 self._chunk_tail = head 

892 return False, b"" 

893 self._chunk = ChunkState.PARSE_TRAILERS 

894 

895 # read and discard trailer up to the CRLF terminator 

896 if self._chunk == ChunkState.PARSE_TRAILERS: 

897 pos = chunk.find(SEP) 

898 if pos >= 0: 

899 chunk = chunk[pos + len(SEP) :] 

900 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS 

901 else: 

902 self._chunk_tail = chunk 

903 return False, b"" 

904 

905 # Read all bytes until eof 

906 elif self._type == ParseState.PARSE_UNTIL_EOF: 

907 self.payload.feed_data(chunk, len(chunk)) 

908 

909 return False, b"" 

910 

911 

912class DeflateBuffer: 

913 """DeflateStream decompress stream and feed data into specified stream.""" 

914 

915 def __init__(self, out: StreamReader, encoding: Optional[str]) -> None: 

916 self.out = out 

917 self.size = 0 

918 self.encoding = encoding 

919 self._started_decoding = False 

920 

921 self.decompressor: Union[BrotliDecompressor, ZLibDecompressor] 

922 if encoding == "br": 

923 if not HAS_BROTLI: # pragma: no cover 

924 raise ContentEncodingError( 

925 "Can not decode content-encoding: brotli (br). " 

926 "Please install `Brotli`" 

927 ) 

928 self.decompressor = BrotliDecompressor() 

929 else: 

930 self.decompressor = ZLibDecompressor(encoding=encoding) 

931 

932 def set_exception(self, exc: BaseException) -> None: 

933 self.out.set_exception(exc) 

934 

935 def feed_data(self, chunk: bytes, size: int) -> None: 

936 if not size: 

937 return 

938 

939 self.size += size 

940 

941 # RFC1950 

942 # bits 0..3 = CM = 0b1000 = 8 = "deflate" 

943 # bits 4..7 = CINFO = 1..7 = windows size. 

944 if ( 

945 not self._started_decoding 

946 and self.encoding == "deflate" 

947 and chunk[0] & 0xF != 8 

948 ): 

949 # Change the decoder to decompress incorrectly compressed data 

950 # Actually we should issue a warning about non-RFC-compliant data. 

951 self.decompressor = ZLibDecompressor( 

952 encoding=self.encoding, suppress_deflate_header=True 

953 ) 

954 

955 try: 

956 chunk = self.decompressor.decompress_sync(chunk) 

957 except Exception: 

958 raise ContentEncodingError( 

959 "Can not decode content-encoding: %s" % self.encoding 

960 ) 

961 

962 self._started_decoding = True 

963 

964 if chunk: 

965 self.out.feed_data(chunk, len(chunk)) 

966 

967 def feed_eof(self) -> None: 

968 chunk = self.decompressor.flush() 

969 

970 if chunk or self.size > 0: 

971 self.out.feed_data(chunk, len(chunk)) 

972 # decompressor is not brotli unless encoding is "br" 

973 if self.encoding == "deflate" and not self.decompressor.eof: # type: ignore[union-attr] 

974 raise ContentEncodingError("deflate") 

975 

976 self.out.feed_eof() 

977 

978 def begin_http_chunk_receiving(self) -> None: 

979 self.out.begin_http_chunk_receiving() 

980 

981 def end_http_chunk_receiving(self) -> None: 

982 self.out.end_http_chunk_receiving() 

983 

984 

985HttpRequestParserPy = HttpRequestParser 

986HttpResponseParserPy = HttpResponseParser 

987RawRequestMessagePy = RawRequestMessage 

988RawResponseMessagePy = RawResponseMessage 

989 

990try: 

991 if not NO_EXTENSIONS: 

992 from ._http_parser import ( # type: ignore[import-not-found,no-redef] 

993 HttpRequestParser, 

994 HttpResponseParser, 

995 RawRequestMessage, 

996 RawResponseMessage, 

997 ) 

998 

999 HttpRequestParserC = HttpRequestParser 

1000 HttpResponseParserC = HttpResponseParser 

1001 RawRequestMessageC = RawRequestMessage 

1002 RawResponseMessageC = RawResponseMessage 

1003except ImportError: # pragma: no cover 

1004 pass