Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/aiohttp/http_parser.py: 18%

482 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:56 +0000

1import abc 

2import asyncio 

3import collections 

4import re 

5import string 

6import zlib 

7from contextlib import suppress 

8from enum import IntEnum 

9from typing import ( 

10 Any, 

11 Generic, 

12 List, 

13 NamedTuple, 

14 Optional, 

15 Pattern, 

16 Set, 

17 Tuple, 

18 Type, 

19 TypeVar, 

20 Union, 

21 cast, 

22) 

23 

24from multidict import CIMultiDict, CIMultiDictProxy, istr 

25from yarl import URL 

26 

27from . import hdrs 

28from .base_protocol import BaseProtocol 

29from .helpers import NO_EXTENSIONS, BaseTimerContext 

30from .http_exceptions import ( 

31 BadHttpMessage, 

32 BadStatusLine, 

33 ContentEncodingError, 

34 ContentLengthError, 

35 InvalidHeader, 

36 LineTooLong, 

37 TransferEncodingError, 

38) 

39from .http_writer import HttpVersion, HttpVersion10 

40from .log import internal_logger 

41from .streams import EMPTY_PAYLOAD, StreamReader 

42from .typedefs import Final, RawHeaders 

43 

44try: 

45 import brotli 

46 

47 HAS_BROTLI = True 

48except ImportError: # pragma: no cover 

49 HAS_BROTLI = False 

50 

51 

52__all__ = ( 

53 "HeadersParser", 

54 "HttpParser", 

55 "HttpRequestParser", 

56 "HttpResponseParser", 

57 "RawRequestMessage", 

58 "RawResponseMessage", 

59) 

60 

61ASCIISET: Final[Set[str]] = set(string.printable) 

62 

63# See https://tools.ietf.org/html/rfc7230#section-3.1.1 

64# and https://tools.ietf.org/html/rfc7230#appendix-B 

65# 

66# method = token 

67# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / 

68# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA 

69# token = 1*tchar 

70METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+") 

71VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d+).(\d+)") 

72HDRRE: Final[Pattern[bytes]] = re.compile(rb"[\x00-\x1F\x7F()<>@,;:\[\]={} \t\\\\\"]") 

73 

74 

75class RawRequestMessage(NamedTuple): 

76 method: str 

77 path: str 

78 version: HttpVersion 

79 headers: "CIMultiDictProxy[str]" 

80 raw_headers: RawHeaders 

81 should_close: bool 

82 compression: Optional[str] 

83 upgrade: bool 

84 chunked: bool 

85 url: URL 

86 

87 

88RawResponseMessage = collections.namedtuple( 

89 "RawResponseMessage", 

90 [ 

91 "version", 

92 "code", 

93 "reason", 

94 "headers", 

95 "raw_headers", 

96 "should_close", 

97 "compression", 

98 "upgrade", 

99 "chunked", 

100 ], 

101) 

102 

103 

104_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage) 

105 

106 

107class ParseState(IntEnum): 

108 

109 PARSE_NONE = 0 

110 PARSE_LENGTH = 1 

111 PARSE_CHUNKED = 2 

112 PARSE_UNTIL_EOF = 3 

113 

114 

115class ChunkState(IntEnum): 

116 PARSE_CHUNKED_SIZE = 0 

117 PARSE_CHUNKED_CHUNK = 1 

118 PARSE_CHUNKED_CHUNK_EOF = 2 

119 PARSE_MAYBE_TRAILERS = 3 

120 PARSE_TRAILERS = 4 

121 

122 

123class HeadersParser: 

124 def __init__( 

125 self, 

126 max_line_size: int = 8190, 

127 max_headers: int = 32768, 

128 max_field_size: int = 8190, 

129 ) -> None: 

130 self.max_line_size = max_line_size 

131 self.max_headers = max_headers 

132 self.max_field_size = max_field_size 

133 

134 def parse_headers( 

135 self, lines: List[bytes] 

136 ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]: 

137 headers: CIMultiDict[str] = CIMultiDict() 

138 raw_headers = [] 

139 

140 lines_idx = 1 

141 line = lines[1] 

142 line_count = len(lines) 

143 

144 while line: 

145 # Parse initial header name : value pair. 

146 try: 

147 bname, bvalue = line.split(b":", 1) 

148 except ValueError: 

149 raise InvalidHeader(line) from None 

150 

151 bname = bname.strip(b" \t") 

152 bvalue = bvalue.lstrip() 

153 if HDRRE.search(bname): 

154 raise InvalidHeader(bname) 

155 if len(bname) > self.max_field_size: 

156 raise LineTooLong( 

157 "request header name {}".format( 

158 bname.decode("utf8", "xmlcharrefreplace") 

159 ), 

160 str(self.max_field_size), 

161 str(len(bname)), 

162 ) 

163 

164 header_length = len(bvalue) 

165 

166 # next line 

167 lines_idx += 1 

168 line = lines[lines_idx] 

169 

170 # consume continuation lines 

171 continuation = line and line[0] in (32, 9) # (' ', '\t') 

172 

173 if continuation: 

174 bvalue_lst = [bvalue] 

175 while continuation: 

176 header_length += len(line) 

177 if header_length > self.max_field_size: 

178 raise LineTooLong( 

179 "request header field {}".format( 

180 bname.decode("utf8", "xmlcharrefreplace") 

181 ), 

182 str(self.max_field_size), 

183 str(header_length), 

184 ) 

185 bvalue_lst.append(line) 

186 

187 # next line 

188 lines_idx += 1 

189 if lines_idx < line_count: 

190 line = lines[lines_idx] 

191 if line: 

192 continuation = line[0] in (32, 9) # (' ', '\t') 

193 else: 

194 line = b"" 

195 break 

196 bvalue = b"".join(bvalue_lst) 

197 else: 

198 if header_length > self.max_field_size: 

199 raise LineTooLong( 

200 "request header field {}".format( 

201 bname.decode("utf8", "xmlcharrefreplace") 

202 ), 

203 str(self.max_field_size), 

204 str(header_length), 

205 ) 

206 

207 bvalue = bvalue.strip() 

208 name = bname.decode("utf-8", "surrogateescape") 

209 value = bvalue.decode("utf-8", "surrogateescape") 

210 

211 headers.add(name, value) 

212 raw_headers.append((bname, bvalue)) 

213 

214 return (CIMultiDictProxy(headers), tuple(raw_headers)) 

215 

216 

217class HttpParser(abc.ABC, Generic[_MsgT]): 

218 def __init__( 

219 self, 

220 protocol: Optional[BaseProtocol] = None, 

221 loop: Optional[asyncio.AbstractEventLoop] = None, 

222 limit: int = 2**16, 

223 max_line_size: int = 8190, 

224 max_headers: int = 32768, 

225 max_field_size: int = 8190, 

226 timer: Optional[BaseTimerContext] = None, 

227 code: Optional[int] = None, 

228 method: Optional[str] = None, 

229 readall: bool = False, 

230 payload_exception: Optional[Type[BaseException]] = None, 

231 response_with_body: bool = True, 

232 read_until_eof: bool = False, 

233 auto_decompress: bool = True, 

234 ) -> None: 

235 self.protocol = protocol 

236 self.loop = loop 

237 self.max_line_size = max_line_size 

238 self.max_headers = max_headers 

239 self.max_field_size = max_field_size 

240 self.timer = timer 

241 self.code = code 

242 self.method = method 

243 self.readall = readall 

244 self.payload_exception = payload_exception 

245 self.response_with_body = response_with_body 

246 self.read_until_eof = read_until_eof 

247 

248 self._lines: List[bytes] = [] 

249 self._tail = b"" 

250 self._upgraded = False 

251 self._payload = None 

252 self._payload_parser: Optional[HttpPayloadParser] = None 

253 self._auto_decompress = auto_decompress 

254 self._limit = limit 

255 self._headers_parser = HeadersParser(max_line_size, max_headers, max_field_size) 

256 

257 @abc.abstractmethod 

258 def parse_message(self, lines: List[bytes]) -> _MsgT: 

259 pass 

260 

261 def feed_eof(self) -> Optional[_MsgT]: 

262 if self._payload_parser is not None: 

263 self._payload_parser.feed_eof() 

264 self._payload_parser = None 

265 else: 

266 # try to extract partial message 

267 if self._tail: 

268 self._lines.append(self._tail) 

269 

270 if self._lines: 

271 if self._lines[-1] != "\r\n": 

272 self._lines.append(b"") 

273 with suppress(Exception): 

274 return self.parse_message(self._lines) 

275 return None 

276 

277 def feed_data( 

278 self, 

279 data: bytes, 

280 SEP: bytes = b"\r\n", 

281 EMPTY: bytes = b"", 

282 CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH, 

283 METH_CONNECT: str = hdrs.METH_CONNECT, 

284 SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1, 

285 ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]: 

286 

287 messages = [] 

288 

289 if self._tail: 

290 data, self._tail = self._tail + data, b"" 

291 

292 data_len = len(data) 

293 start_pos = 0 

294 loop = self.loop 

295 

296 while start_pos < data_len: 

297 

298 # read HTTP message (request/response line + headers), \r\n\r\n 

299 # and split by lines 

300 if self._payload_parser is None and not self._upgraded: 

301 pos = data.find(SEP, start_pos) 

302 # consume \r\n 

303 if pos == start_pos and not self._lines: 

304 start_pos = pos + 2 

305 continue 

306 

307 if pos >= start_pos: 

308 # line found 

309 self._lines.append(data[start_pos:pos]) 

310 start_pos = pos + 2 

311 

312 # \r\n\r\n found 

313 if self._lines[-1] == EMPTY: 

314 try: 

315 msg: _MsgT = self.parse_message(self._lines) 

316 finally: 

317 self._lines.clear() 

318 

319 def get_content_length() -> Optional[int]: 

320 # payload length 

321 length_hdr = msg.headers.get(CONTENT_LENGTH) 

322 if length_hdr is None: 

323 return None 

324 

325 try: 

326 length = int(length_hdr) 

327 except ValueError: 

328 raise InvalidHeader(CONTENT_LENGTH) 

329 

330 if length < 0: 

331 raise InvalidHeader(CONTENT_LENGTH) 

332 

333 return length 

334 

335 length = get_content_length() 

336 # do not support old websocket spec 

337 if SEC_WEBSOCKET_KEY1 in msg.headers: 

338 raise InvalidHeader(SEC_WEBSOCKET_KEY1) 

339 

340 self._upgraded = msg.upgrade 

341 

342 method = getattr(msg, "method", self.method) 

343 

344 assert self.protocol is not None 

345 # calculate payload 

346 if ( 

347 (length is not None and length > 0) 

348 or msg.chunked 

349 and not msg.upgrade 

350 ): 

351 payload = StreamReader( 

352 self.protocol, 

353 timer=self.timer, 

354 loop=loop, 

355 limit=self._limit, 

356 ) 

357 payload_parser = HttpPayloadParser( 

358 payload, 

359 length=length, 

360 chunked=msg.chunked, 

361 method=method, 

362 compression=msg.compression, 

363 code=self.code, 

364 readall=self.readall, 

365 response_with_body=self.response_with_body, 

366 auto_decompress=self._auto_decompress, 

367 ) 

368 if not payload_parser.done: 

369 self._payload_parser = payload_parser 

370 elif method == METH_CONNECT: 

371 assert isinstance(msg, RawRequestMessage) 

372 payload = StreamReader( 

373 self.protocol, 

374 timer=self.timer, 

375 loop=loop, 

376 limit=self._limit, 

377 ) 

378 self._upgraded = True 

379 self._payload_parser = HttpPayloadParser( 

380 payload, 

381 method=msg.method, 

382 compression=msg.compression, 

383 readall=True, 

384 auto_decompress=self._auto_decompress, 

385 ) 

386 else: 

387 if ( 

388 getattr(msg, "code", 100) >= 199 

389 and length is None 

390 and self.read_until_eof 

391 ): 

392 payload = StreamReader( 

393 self.protocol, 

394 timer=self.timer, 

395 loop=loop, 

396 limit=self._limit, 

397 ) 

398 payload_parser = HttpPayloadParser( 

399 payload, 

400 length=length, 

401 chunked=msg.chunked, 

402 method=method, 

403 compression=msg.compression, 

404 code=self.code, 

405 readall=True, 

406 response_with_body=self.response_with_body, 

407 auto_decompress=self._auto_decompress, 

408 ) 

409 if not payload_parser.done: 

410 self._payload_parser = payload_parser 

411 else: 

412 payload = EMPTY_PAYLOAD 

413 

414 messages.append((msg, payload)) 

415 else: 

416 self._tail = data[start_pos:] 

417 data = EMPTY 

418 break 

419 

420 # no parser, just store 

421 elif self._payload_parser is None and self._upgraded: 

422 assert not self._lines 

423 break 

424 

425 # feed payload 

426 elif data and start_pos < data_len: 

427 assert not self._lines 

428 assert self._payload_parser is not None 

429 try: 

430 eof, data = self._payload_parser.feed_data(data[start_pos:]) 

431 except BaseException as exc: 

432 if self.payload_exception is not None: 

433 self._payload_parser.payload.set_exception( 

434 self.payload_exception(str(exc)) 

435 ) 

436 else: 

437 self._payload_parser.payload.set_exception(exc) 

438 

439 eof = True 

440 data = b"" 

441 

442 if eof: 

443 start_pos = 0 

444 data_len = len(data) 

445 self._payload_parser = None 

446 continue 

447 else: 

448 break 

449 

450 if data and start_pos < data_len: 

451 data = data[start_pos:] 

452 else: 

453 data = EMPTY 

454 

455 return messages, self._upgraded, data 

456 

457 def parse_headers( 

458 self, lines: List[bytes] 

459 ) -> Tuple[ 

460 "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool 

461 ]: 

462 """Parses RFC 5322 headers from a stream. 

463 

464 Line continuations are supported. Returns list of header name 

465 and value pairs. Header name is in upper case. 

466 """ 

467 headers, raw_headers = self._headers_parser.parse_headers(lines) 

468 close_conn = None 

469 encoding = None 

470 upgrade = False 

471 chunked = False 

472 

473 # keep-alive 

474 conn = headers.get(hdrs.CONNECTION) 

475 if conn: 

476 v = conn.lower() 

477 if v == "close": 

478 close_conn = True 

479 elif v == "keep-alive": 

480 close_conn = False 

481 elif v == "upgrade": 

482 upgrade = True 

483 

484 # encoding 

485 enc = headers.get(hdrs.CONTENT_ENCODING) 

486 if enc: 

487 enc = enc.lower() 

488 if enc in ("gzip", "deflate", "br"): 

489 encoding = enc 

490 

491 # chunking 

492 te = headers.get(hdrs.TRANSFER_ENCODING) 

493 if te is not None: 

494 if "chunked" == te.lower(): 

495 chunked = True 

496 else: 

497 raise BadHttpMessage("Request has invalid `Transfer-Encoding`") 

498 

499 if hdrs.CONTENT_LENGTH in headers: 

500 raise BadHttpMessage( 

501 "Content-Length can't be present with Transfer-Encoding", 

502 ) 

503 

504 return (headers, raw_headers, close_conn, encoding, upgrade, chunked) 

505 

506 def set_upgraded(self, val: bool) -> None: 

507 """Set connection upgraded (to websocket) mode. 

508 

509 :param bool val: new state. 

510 """ 

511 self._upgraded = val 

512 

513 

514class HttpRequestParser(HttpParser[RawRequestMessage]): 

515 """Read request status line. 

516 

517 Exception .http_exceptions.BadStatusLine 

518 could be raised in case of any errors in status line. 

519 Returns RawRequestMessage. 

520 """ 

521 

522 def parse_message(self, lines: List[bytes]) -> RawRequestMessage: 

523 # request line 

524 line = lines[0].decode("utf-8", "surrogateescape") 

525 try: 

526 method, path, version = line.split(None, 2) 

527 except ValueError: 

528 raise BadStatusLine(line) from None 

529 

530 if len(path) > self.max_line_size: 

531 raise LineTooLong( 

532 "Status line is too long", str(self.max_line_size), str(len(path)) 

533 ) 

534 

535 # method 

536 if not METHRE.match(method): 

537 raise BadStatusLine(method) 

538 

539 # version 

540 try: 

541 if version.startswith("HTTP/"): 

542 n1, n2 = version[5:].split(".", 1) 

543 version_o = HttpVersion(int(n1), int(n2)) 

544 else: 

545 raise BadStatusLine(version) 

546 except Exception: 

547 raise BadStatusLine(version) 

548 

549 if method == "CONNECT": 

550 # authority-form, 

551 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3 

552 url = URL.build(authority=path, encoded=True) 

553 elif path.startswith("/"): 

554 # origin-form, 

555 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1 

556 path_part, _hash_separator, url_fragment = path.partition("#") 

557 path_part, _question_mark_separator, qs_part = path_part.partition("?") 

558 

559 # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based 

560 # NOTE: parser does, otherwise it results into the same 

561 # NOTE: HTTP Request-Line input producing different 

562 # NOTE: `yarl.URL()` objects 

563 url = URL.build( 

564 path=path_part, 

565 query_string=qs_part, 

566 fragment=url_fragment, 

567 encoded=True, 

568 ) 

569 else: 

570 # absolute-form for proxy maybe, 

571 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2 

572 url = URL(path, encoded=True) 

573 

574 # read headers 

575 ( 

576 headers, 

577 raw_headers, 

578 close, 

579 compression, 

580 upgrade, 

581 chunked, 

582 ) = self.parse_headers(lines) 

583 

584 if close is None: # then the headers weren't set in the request 

585 if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close 

586 close = True 

587 else: # HTTP 1.1 must ask to close. 

588 close = False 

589 

590 return RawRequestMessage( 

591 method, 

592 path, 

593 version_o, 

594 headers, 

595 raw_headers, 

596 close, 

597 compression, 

598 upgrade, 

599 chunked, 

600 url, 

601 ) 

602 

603 

604class HttpResponseParser(HttpParser[RawResponseMessage]): 

605 """Read response status line and headers. 

606 

607 BadStatusLine could be raised in case of any errors in status line. 

608 Returns RawResponseMessage. 

609 """ 

610 

611 def parse_message(self, lines: List[bytes]) -> RawResponseMessage: 

612 line = lines[0].decode("utf-8", "surrogateescape") 

613 try: 

614 version, status = line.split(None, 1) 

615 except ValueError: 

616 raise BadStatusLine(line) from None 

617 

618 try: 

619 status, reason = status.split(None, 1) 

620 except ValueError: 

621 reason = "" 

622 

623 if len(reason) > self.max_line_size: 

624 raise LineTooLong( 

625 "Status line is too long", str(self.max_line_size), str(len(reason)) 

626 ) 

627 

628 # version 

629 match = VERSRE.match(version) 

630 if match is None: 

631 raise BadStatusLine(line) 

632 version_o = HttpVersion(int(match.group(1)), int(match.group(2))) 

633 

634 # The status code is a three-digit number 

635 try: 

636 status_i = int(status) 

637 except ValueError: 

638 raise BadStatusLine(line) from None 

639 

640 if status_i > 999: 

641 raise BadStatusLine(line) 

642 

643 # read headers 

644 ( 

645 headers, 

646 raw_headers, 

647 close, 

648 compression, 

649 upgrade, 

650 chunked, 

651 ) = self.parse_headers(lines) 

652 

653 if close is None: 

654 close = version_o <= HttpVersion10 

655 

656 return RawResponseMessage( 

657 version_o, 

658 status_i, 

659 reason.strip(), 

660 headers, 

661 raw_headers, 

662 close, 

663 compression, 

664 upgrade, 

665 chunked, 

666 ) 

667 

668 

669class HttpPayloadParser: 

670 def __init__( 

671 self, 

672 payload: StreamReader, 

673 length: Optional[int] = None, 

674 chunked: bool = False, 

675 compression: Optional[str] = None, 

676 code: Optional[int] = None, 

677 method: Optional[str] = None, 

678 readall: bool = False, 

679 response_with_body: bool = True, 

680 auto_decompress: bool = True, 

681 ) -> None: 

682 self._length = 0 

683 self._type = ParseState.PARSE_NONE 

684 self._chunk = ChunkState.PARSE_CHUNKED_SIZE 

685 self._chunk_size = 0 

686 self._chunk_tail = b"" 

687 self._auto_decompress = auto_decompress 

688 self.done = False 

689 

690 # payload decompression wrapper 

691 if response_with_body and compression and self._auto_decompress: 

692 real_payload: Union[StreamReader, DeflateBuffer] = DeflateBuffer( 

693 payload, compression 

694 ) 

695 else: 

696 real_payload = payload 

697 

698 # payload parser 

699 if not response_with_body: 

700 # don't parse payload if it's not expected to be received 

701 self._type = ParseState.PARSE_NONE 

702 real_payload.feed_eof() 

703 self.done = True 

704 

705 elif chunked: 

706 self._type = ParseState.PARSE_CHUNKED 

707 elif length is not None: 

708 self._type = ParseState.PARSE_LENGTH 

709 self._length = length 

710 if self._length == 0: 

711 real_payload.feed_eof() 

712 self.done = True 

713 else: 

714 if readall and code != 204: 

715 self._type = ParseState.PARSE_UNTIL_EOF 

716 elif method in ("PUT", "POST"): 

717 internal_logger.warning( # pragma: no cover 

718 "Content-Length or Transfer-Encoding header is required" 

719 ) 

720 self._type = ParseState.PARSE_NONE 

721 real_payload.feed_eof() 

722 self.done = True 

723 

724 self.payload = real_payload 

725 

726 def feed_eof(self) -> None: 

727 if self._type == ParseState.PARSE_UNTIL_EOF: 

728 self.payload.feed_eof() 

729 elif self._type == ParseState.PARSE_LENGTH: 

730 raise ContentLengthError( 

731 "Not enough data for satisfy content length header." 

732 ) 

733 elif self._type == ParseState.PARSE_CHUNKED: 

734 raise TransferEncodingError( 

735 "Not enough data for satisfy transfer length header." 

736 ) 

737 

738 def feed_data( 

739 self, chunk: bytes, SEP: bytes = b"\r\n", CHUNK_EXT: bytes = b";" 

740 ) -> Tuple[bool, bytes]: 

741 # Read specified amount of bytes 

742 if self._type == ParseState.PARSE_LENGTH: 

743 required = self._length 

744 chunk_len = len(chunk) 

745 

746 if required >= chunk_len: 

747 self._length = required - chunk_len 

748 self.payload.feed_data(chunk, chunk_len) 

749 if self._length == 0: 

750 self.payload.feed_eof() 

751 return True, b"" 

752 else: 

753 self._length = 0 

754 self.payload.feed_data(chunk[:required], required) 

755 self.payload.feed_eof() 

756 return True, chunk[required:] 

757 

758 # Chunked transfer encoding parser 

759 elif self._type == ParseState.PARSE_CHUNKED: 

760 if self._chunk_tail: 

761 chunk = self._chunk_tail + chunk 

762 self._chunk_tail = b"" 

763 

764 while chunk: 

765 

766 # read next chunk size 

767 if self._chunk == ChunkState.PARSE_CHUNKED_SIZE: 

768 pos = chunk.find(SEP) 

769 if pos >= 0: 

770 i = chunk.find(CHUNK_EXT, 0, pos) 

771 if i >= 0: 

772 size_b = chunk[:i] # strip chunk-extensions 

773 else: 

774 size_b = chunk[:pos] 

775 

776 try: 

777 size = int(bytes(size_b), 16) 

778 except ValueError: 

779 exc = TransferEncodingError( 

780 chunk[:pos].decode("ascii", "surrogateescape") 

781 ) 

782 self.payload.set_exception(exc) 

783 raise exc from None 

784 

785 chunk = chunk[pos + 2 :] 

786 if size == 0: # eof marker 

787 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS 

788 else: 

789 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK 

790 self._chunk_size = size 

791 self.payload.begin_http_chunk_receiving() 

792 else: 

793 self._chunk_tail = chunk 

794 return False, b"" 

795 

796 # read chunk and feed buffer 

797 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK: 

798 required = self._chunk_size 

799 chunk_len = len(chunk) 

800 

801 if required > chunk_len: 

802 self._chunk_size = required - chunk_len 

803 self.payload.feed_data(chunk, chunk_len) 

804 return False, b"" 

805 else: 

806 self._chunk_size = 0 

807 self.payload.feed_data(chunk[:required], required) 

808 chunk = chunk[required:] 

809 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF 

810 self.payload.end_http_chunk_receiving() 

811 

812 # toss the CRLF at the end of the chunk 

813 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF: 

814 if chunk[:2] == SEP: 

815 chunk = chunk[2:] 

816 self._chunk = ChunkState.PARSE_CHUNKED_SIZE 

817 else: 

818 self._chunk_tail = chunk 

819 return False, b"" 

820 

821 # if stream does not contain trailer, after 0\r\n 

822 # we should get another \r\n otherwise 

823 # trailers needs to be skiped until \r\n\r\n 

824 if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS: 

825 head = chunk[:2] 

826 if head == SEP: 

827 # end of stream 

828 self.payload.feed_eof() 

829 return True, chunk[2:] 

830 # Both CR and LF, or only LF may not be received yet. It is 

831 # expected that CRLF or LF will be shown at the very first 

832 # byte next time, otherwise trailers should come. The last 

833 # CRLF which marks the end of response might not be 

834 # contained in the same TCP segment which delivered the 

835 # size indicator. 

836 if not head: 

837 return False, b"" 

838 if head == SEP[:1]: 

839 self._chunk_tail = head 

840 return False, b"" 

841 self._chunk = ChunkState.PARSE_TRAILERS 

842 

843 # read and discard trailer up to the CRLF terminator 

844 if self._chunk == ChunkState.PARSE_TRAILERS: 

845 pos = chunk.find(SEP) 

846 if pos >= 0: 

847 chunk = chunk[pos + 2 :] 

848 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS 

849 else: 

850 self._chunk_tail = chunk 

851 return False, b"" 

852 

853 # Read all bytes until eof 

854 elif self._type == ParseState.PARSE_UNTIL_EOF: 

855 self.payload.feed_data(chunk, len(chunk)) 

856 

857 return False, b"" 

858 

859 

860class DeflateBuffer: 

861 """DeflateStream decompress stream and feed data into specified stream.""" 

862 

863 decompressor: Any 

864 

865 def __init__(self, out: StreamReader, encoding: Optional[str]) -> None: 

866 self.out = out 

867 self.size = 0 

868 self.encoding = encoding 

869 self._started_decoding = False 

870 

871 if encoding == "br": 

872 if not HAS_BROTLI: # pragma: no cover 

873 raise ContentEncodingError( 

874 "Can not decode content-encoding: brotli (br). " 

875 "Please install `Brotli`" 

876 ) 

877 

878 class BrotliDecoder: 

879 # Supports both 'brotlipy' and 'Brotli' packages 

880 # since they share an import name. The top branches 

881 # are for 'brotlipy' and bottom branches for 'Brotli' 

882 def __init__(self) -> None: 

883 self._obj = brotli.Decompressor() 

884 

885 def decompress(self, data: bytes) -> bytes: 

886 if hasattr(self._obj, "decompress"): 

887 return cast(bytes, self._obj.decompress(data)) 

888 return cast(bytes, self._obj.process(data)) 

889 

890 def flush(self) -> bytes: 

891 if hasattr(self._obj, "flush"): 

892 return cast(bytes, self._obj.flush()) 

893 return b"" 

894 

895 self.decompressor = BrotliDecoder() 

896 else: 

897 zlib_mode = 16 + zlib.MAX_WBITS if encoding == "gzip" else zlib.MAX_WBITS 

898 self.decompressor = zlib.decompressobj(wbits=zlib_mode) 

899 

900 def set_exception(self, exc: BaseException) -> None: 

901 self.out.set_exception(exc) 

902 

903 def feed_data(self, chunk: bytes, size: int) -> None: 

904 if not size: 

905 return 

906 

907 self.size += size 

908 

909 # RFC1950 

910 # bits 0..3 = CM = 0b1000 = 8 = "deflate" 

911 # bits 4..7 = CINFO = 1..7 = windows size. 

912 if ( 

913 not self._started_decoding 

914 and self.encoding == "deflate" 

915 and chunk[0] & 0xF != 8 

916 ): 

917 # Change the decoder to decompress incorrectly compressed data 

918 # Actually we should issue a warning about non-RFC-compliant data. 

919 self.decompressor = zlib.decompressobj(wbits=-zlib.MAX_WBITS) 

920 

921 try: 

922 chunk = self.decompressor.decompress(chunk) 

923 except Exception: 

924 raise ContentEncodingError( 

925 "Can not decode content-encoding: %s" % self.encoding 

926 ) 

927 

928 self._started_decoding = True 

929 

930 if chunk: 

931 self.out.feed_data(chunk, len(chunk)) 

932 

933 def feed_eof(self) -> None: 

934 chunk = self.decompressor.flush() 

935 

936 if chunk or self.size > 0: 

937 self.out.feed_data(chunk, len(chunk)) 

938 if self.encoding == "deflate" and not self.decompressor.eof: 

939 raise ContentEncodingError("deflate") 

940 

941 self.out.feed_eof() 

942 

943 def begin_http_chunk_receiving(self) -> None: 

944 self.out.begin_http_chunk_receiving() 

945 

946 def end_http_chunk_receiving(self) -> None: 

947 self.out.end_http_chunk_receiving() 

948 

949 

950HttpRequestParserPy = HttpRequestParser 

951HttpResponseParserPy = HttpResponseParser 

952RawRequestMessagePy = RawRequestMessage 

953RawResponseMessagePy = RawResponseMessage 

954 

955try: 

956 if not NO_EXTENSIONS: 

957 from ._http_parser import ( # type: ignore[import,no-redef] 

958 HttpRequestParser, 

959 HttpResponseParser, 

960 RawRequestMessage, 

961 RawResponseMessage, 

962 ) 

963 

964 HttpRequestParserC = HttpRequestParser 

965 HttpResponseParserC = HttpResponseParser 

966 RawRequestMessageC = RawRequestMessage 

967 RawResponseMessageC = RawResponseMessage 

968except ImportError: # pragma: no cover 

969 pass