Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/aiohttp/http_parser.py: 20%

497 statements  

« prev     ^ index     » next       coverage.py v7.3.2, created at 2023-12-08 06:40 +0000

1import abc 

2import asyncio 

3import re 

4import string 

5from contextlib import suppress 

6from enum import IntEnum 

7from typing import ( 

8 Any, 

9 ClassVar, 

10 Final, 

11 Generic, 

12 List, 

13 Literal, 

14 NamedTuple, 

15 Optional, 

16 Pattern, 

17 Set, 

18 Tuple, 

19 Type, 

20 TypeVar, 

21 Union, 

22) 

23 

24from multidict import CIMultiDict, CIMultiDictProxy, istr 

25from yarl import URL 

26 

27from . import hdrs 

28from .base_protocol import BaseProtocol 

29from .compression_utils import HAS_BROTLI, BrotliDecompressor, ZLibDecompressor 

30from .helpers import ( 

31 DEBUG, 

32 NO_EXTENSIONS, 

33 BaseTimerContext, 

34 method_must_be_empty_body, 

35 status_code_must_be_empty_body, 

36) 

37from .http_exceptions import ( 

38 BadHttpMessage, 

39 BadStatusLine, 

40 ContentEncodingError, 

41 ContentLengthError, 

42 InvalidHeader, 

43 InvalidURLError, 

44 LineTooLong, 

45 TransferEncodingError, 

46) 

47from .http_writer import HttpVersion, HttpVersion10 

48from .log import internal_logger 

49from .streams import EMPTY_PAYLOAD, StreamReader 

50from .typedefs import RawHeaders 

51 

52__all__ = ( 

53 "HeadersParser", 

54 "HttpParser", 

55 "HttpRequestParser", 

56 "HttpResponseParser", 

57 "RawRequestMessage", 

58 "RawResponseMessage", 

59) 

60 

61_SEP = Literal[b"\r\n", b"\n"] 

62 

63ASCIISET: Final[Set[str]] = set(string.printable) 

64 

65# See https://www.rfc-editor.org/rfc/rfc9110.html#name-overview 

66# and https://www.rfc-editor.org/rfc/rfc9110.html#name-tokens 

67# 

68# method = token 

69# tchar = "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+" / "-" / "." / 

70# "^" / "_" / "`" / "|" / "~" / DIGIT / ALPHA 

71# token = 1*tchar 

72METHRE: Final[Pattern[str]] = re.compile(r"[!#$%&'*+\-.^_`|~0-9A-Za-z]+") 

73VERSRE: Final[Pattern[str]] = re.compile(r"HTTP/(\d).(\d)") 

74HDRRE: Final[Pattern[bytes]] = re.compile( 

75 rb"[\x00-\x1F\x7F-\xFF()<>@,;:\[\]={} \t\"\\]" 

76) 

77HEXDIGIT = re.compile(rb"[0-9a-fA-F]+") 

78 

79 

80class RawRequestMessage(NamedTuple): 

81 method: str 

82 path: str 

83 version: HttpVersion 

84 headers: "CIMultiDictProxy[str]" 

85 raw_headers: RawHeaders 

86 should_close: bool 

87 compression: Optional[str] 

88 upgrade: bool 

89 chunked: bool 

90 url: URL 

91 

92 

93class RawResponseMessage(NamedTuple): 

94 version: HttpVersion 

95 code: int 

96 reason: str 

97 headers: CIMultiDictProxy[str] 

98 raw_headers: RawHeaders 

99 should_close: bool 

100 compression: Optional[str] 

101 upgrade: bool 

102 chunked: bool 

103 

104 

105_MsgT = TypeVar("_MsgT", RawRequestMessage, RawResponseMessage) 

106 

107 

108class ParseState(IntEnum): 

109 

110 PARSE_NONE = 0 

111 PARSE_LENGTH = 1 

112 PARSE_CHUNKED = 2 

113 PARSE_UNTIL_EOF = 3 

114 

115 

116class ChunkState(IntEnum): 

117 PARSE_CHUNKED_SIZE = 0 

118 PARSE_CHUNKED_CHUNK = 1 

119 PARSE_CHUNKED_CHUNK_EOF = 2 

120 PARSE_MAYBE_TRAILERS = 3 

121 PARSE_TRAILERS = 4 

122 

123 

124class HeadersParser: 

125 def __init__( 

126 self, 

127 max_line_size: int = 8190, 

128 max_headers: int = 32768, 

129 max_field_size: int = 8190, 

130 ) -> None: 

131 self.max_line_size = max_line_size 

132 self.max_headers = max_headers 

133 self.max_field_size = max_field_size 

134 

135 def parse_headers( 

136 self, lines: List[bytes] 

137 ) -> Tuple["CIMultiDictProxy[str]", RawHeaders]: 

138 headers: CIMultiDict[str] = CIMultiDict() 

139 raw_headers = [] 

140 

141 lines_idx = 1 

142 line = lines[1] 

143 line_count = len(lines) 

144 

145 while line: 

146 # Parse initial header name : value pair. 

147 try: 

148 bname, bvalue = line.split(b":", 1) 

149 except ValueError: 

150 raise InvalidHeader(line) from None 

151 

152 # https://www.rfc-editor.org/rfc/rfc9112.html#section-5.1-2 

153 if {bname[0], bname[-1]} & {32, 9}: # {" ", "\t"} 

154 raise InvalidHeader(line) 

155 

156 bvalue = bvalue.lstrip(b" \t") 

157 if HDRRE.search(bname): 

158 raise InvalidHeader(bname) 

159 if len(bname) > self.max_field_size: 

160 raise LineTooLong( 

161 "request header name {}".format( 

162 bname.decode("utf8", "backslashreplace") 

163 ), 

164 str(self.max_field_size), 

165 str(len(bname)), 

166 ) 

167 

168 header_length = len(bvalue) 

169 

170 # next line 

171 lines_idx += 1 

172 line = lines[lines_idx] 

173 

174 # consume continuation lines 

175 continuation = line and line[0] in (32, 9) # (' ', '\t') 

176 

177 # Deprecated: https://www.rfc-editor.org/rfc/rfc9112.html#name-obsolete-line-folding 

178 if continuation: 

179 bvalue_lst = [bvalue] 

180 while continuation: 

181 header_length += len(line) 

182 if header_length > self.max_field_size: 

183 raise LineTooLong( 

184 "request header field {}".format( 

185 bname.decode("utf8", "backslashreplace") 

186 ), 

187 str(self.max_field_size), 

188 str(header_length), 

189 ) 

190 bvalue_lst.append(line) 

191 

192 # next line 

193 lines_idx += 1 

194 if lines_idx < line_count: 

195 line = lines[lines_idx] 

196 if line: 

197 continuation = line[0] in (32, 9) # (' ', '\t') 

198 else: 

199 line = b"" 

200 break 

201 bvalue = b"".join(bvalue_lst) 

202 else: 

203 if header_length > self.max_field_size: 

204 raise LineTooLong( 

205 "request header field {}".format( 

206 bname.decode("utf8", "backslashreplace") 

207 ), 

208 str(self.max_field_size), 

209 str(header_length), 

210 ) 

211 

212 bvalue = bvalue.strip(b" \t") 

213 name = bname.decode("utf-8", "surrogateescape") 

214 value = bvalue.decode("utf-8", "surrogateescape") 

215 

216 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-5 

217 if "\n" in value or "\r" in value or "\x00" in value: 

218 raise InvalidHeader(bvalue) 

219 

220 headers.add(name, value) 

221 raw_headers.append((bname, bvalue)) 

222 

223 return (CIMultiDictProxy(headers), tuple(raw_headers)) 

224 

225 

226class HttpParser(abc.ABC, Generic[_MsgT]): 

227 lax: ClassVar[bool] = False 

228 

229 def __init__( 

230 self, 

231 protocol: Optional[BaseProtocol] = None, 

232 loop: Optional[asyncio.AbstractEventLoop] = None, 

233 limit: int = 2**16, 

234 max_line_size: int = 8190, 

235 max_headers: int = 32768, 

236 max_field_size: int = 8190, 

237 timer: Optional[BaseTimerContext] = None, 

238 code: Optional[int] = None, 

239 method: Optional[str] = None, 

240 readall: bool = False, 

241 payload_exception: Optional[Type[BaseException]] = None, 

242 response_with_body: bool = True, 

243 read_until_eof: bool = False, 

244 auto_decompress: bool = True, 

245 ) -> None: 

246 self.protocol = protocol 

247 self.loop = loop 

248 self.max_line_size = max_line_size 

249 self.max_headers = max_headers 

250 self.max_field_size = max_field_size 

251 self.timer = timer 

252 self.code = code 

253 self.method = method 

254 self.readall = readall 

255 self.payload_exception = payload_exception 

256 self.response_with_body = response_with_body 

257 self.read_until_eof = read_until_eof 

258 

259 self._lines: List[bytes] = [] 

260 self._tail = b"" 

261 self._upgraded = False 

262 self._payload = None 

263 self._payload_parser: Optional[HttpPayloadParser] = None 

264 self._auto_decompress = auto_decompress 

265 self._limit = limit 

266 self._headers_parser = HeadersParser(max_line_size, max_headers, max_field_size) 

267 

268 @abc.abstractmethod 

269 def parse_message(self, lines: List[bytes]) -> _MsgT: 

270 pass 

271 

272 def feed_eof(self) -> Optional[_MsgT]: 

273 if self._payload_parser is not None: 

274 self._payload_parser.feed_eof() 

275 self._payload_parser = None 

276 else: 

277 # try to extract partial message 

278 if self._tail: 

279 self._lines.append(self._tail) 

280 

281 if self._lines: 

282 if self._lines[-1] != "\r\n": 

283 self._lines.append(b"") 

284 with suppress(Exception): 

285 return self.parse_message(self._lines) 

286 return None 

287 

288 def feed_data( 

289 self, 

290 data: bytes, 

291 SEP: _SEP = b"\r\n", 

292 EMPTY: bytes = b"", 

293 CONTENT_LENGTH: istr = hdrs.CONTENT_LENGTH, 

294 METH_CONNECT: str = hdrs.METH_CONNECT, 

295 SEC_WEBSOCKET_KEY1: istr = hdrs.SEC_WEBSOCKET_KEY1, 

296 ) -> Tuple[List[Tuple[_MsgT, StreamReader]], bool, bytes]: 

297 

298 messages = [] 

299 

300 if self._tail: 

301 data, self._tail = self._tail + data, b"" 

302 

303 data_len = len(data) 

304 start_pos = 0 

305 loop = self.loop 

306 

307 while start_pos < data_len: 

308 

309 # read HTTP message (request/response line + headers), \r\n\r\n 

310 # and split by lines 

311 if self._payload_parser is None and not self._upgraded: 

312 pos = data.find(SEP, start_pos) 

313 # consume \r\n 

314 if pos == start_pos and not self._lines: 

315 start_pos = pos + len(SEP) 

316 continue 

317 

318 if pos >= start_pos: 

319 # line found 

320 line = data[start_pos:pos] 

321 if SEP == b"\n": # For lax response parsing 

322 line = line.rstrip(b"\r") 

323 self._lines.append(line) 

324 start_pos = pos + len(SEP) 

325 

326 # \r\n\r\n found 

327 if self._lines[-1] == EMPTY: 

328 try: 

329 msg: _MsgT = self.parse_message(self._lines) 

330 finally: 

331 self._lines.clear() 

332 

333 def get_content_length() -> Optional[int]: 

334 # payload length 

335 length_hdr = msg.headers.get(CONTENT_LENGTH) 

336 if length_hdr is None: 

337 return None 

338 

339 # Shouldn't allow +/- or other number formats. 

340 # https://www.rfc-editor.org/rfc/rfc9110#section-8.6-2 

341 if not length_hdr.strip(" \t").isdecimal(): 

342 raise InvalidHeader(CONTENT_LENGTH) 

343 

344 return int(length_hdr) 

345 

346 length = get_content_length() 

347 # do not support old websocket spec 

348 if SEC_WEBSOCKET_KEY1 in msg.headers: 

349 raise InvalidHeader(SEC_WEBSOCKET_KEY1) 

350 

351 self._upgraded = msg.upgrade 

352 

353 method = getattr(msg, "method", self.method) 

354 # code is only present on responses 

355 code = getattr(msg, "code", 0) 

356 

357 assert self.protocol is not None 

358 # calculate payload 

359 empty_body = status_code_must_be_empty_body(code) or bool( 

360 method and method_must_be_empty_body(method) 

361 ) 

362 if not empty_body and ( 

363 (length is not None and length > 0) 

364 or msg.chunked 

365 and not msg.upgrade 

366 ): 

367 payload = StreamReader( 

368 self.protocol, 

369 timer=self.timer, 

370 loop=loop, 

371 limit=self._limit, 

372 ) 

373 payload_parser = HttpPayloadParser( 

374 payload, 

375 length=length, 

376 chunked=msg.chunked, 

377 method=method, 

378 compression=msg.compression, 

379 code=self.code, 

380 readall=self.readall, 

381 response_with_body=self.response_with_body, 

382 auto_decompress=self._auto_decompress, 

383 lax=self.lax, 

384 ) 

385 if not payload_parser.done: 

386 self._payload_parser = payload_parser 

387 elif method == METH_CONNECT: 

388 assert isinstance(msg, RawRequestMessage) 

389 payload = StreamReader( 

390 self.protocol, 

391 timer=self.timer, 

392 loop=loop, 

393 limit=self._limit, 

394 ) 

395 self._upgraded = True 

396 self._payload_parser = HttpPayloadParser( 

397 payload, 

398 method=msg.method, 

399 compression=msg.compression, 

400 readall=True, 

401 auto_decompress=self._auto_decompress, 

402 lax=self.lax, 

403 ) 

404 elif not empty_body and length is None and self.read_until_eof: 

405 payload = StreamReader( 

406 self.protocol, 

407 timer=self.timer, 

408 loop=loop, 

409 limit=self._limit, 

410 ) 

411 payload_parser = HttpPayloadParser( 

412 payload, 

413 length=length, 

414 chunked=msg.chunked, 

415 method=method, 

416 compression=msg.compression, 

417 code=self.code, 

418 readall=True, 

419 response_with_body=self.response_with_body, 

420 auto_decompress=self._auto_decompress, 

421 lax=self.lax, 

422 ) 

423 if not payload_parser.done: 

424 self._payload_parser = payload_parser 

425 else: 

426 payload = EMPTY_PAYLOAD 

427 

428 messages.append((msg, payload)) 

429 else: 

430 self._tail = data[start_pos:] 

431 data = EMPTY 

432 break 

433 

434 # no parser, just store 

435 elif self._payload_parser is None and self._upgraded: 

436 assert not self._lines 

437 break 

438 

439 # feed payload 

440 elif data and start_pos < data_len: 

441 assert not self._lines 

442 assert self._payload_parser is not None 

443 try: 

444 eof, data = self._payload_parser.feed_data(data[start_pos:], SEP) 

445 except BaseException as exc: 

446 if self.payload_exception is not None: 

447 self._payload_parser.payload.set_exception( 

448 self.payload_exception(str(exc)) 

449 ) 

450 else: 

451 self._payload_parser.payload.set_exception(exc) 

452 

453 eof = True 

454 data = b"" 

455 

456 if eof: 

457 start_pos = 0 

458 data_len = len(data) 

459 self._payload_parser = None 

460 continue 

461 else: 

462 break 

463 

464 if data and start_pos < data_len: 

465 data = data[start_pos:] 

466 else: 

467 data = EMPTY 

468 

469 return messages, self._upgraded, data 

470 

471 def parse_headers( 

472 self, lines: List[bytes] 

473 ) -> Tuple[ 

474 "CIMultiDictProxy[str]", RawHeaders, Optional[bool], Optional[str], bool, bool 

475 ]: 

476 """Parses RFC 5322 headers from a stream. 

477 

478 Line continuations are supported. Returns list of header name 

479 and value pairs. Header name is in upper case. 

480 """ 

481 headers, raw_headers = self._headers_parser.parse_headers(lines) 

482 close_conn = None 

483 encoding = None 

484 upgrade = False 

485 chunked = False 

486 

487 # https://www.rfc-editor.org/rfc/rfc9110.html#section-5.5-6 

488 # https://www.rfc-editor.org/rfc/rfc9110.html#name-collected-abnf 

489 singletons = ( 

490 hdrs.CONTENT_LENGTH, 

491 hdrs.CONTENT_LOCATION, 

492 hdrs.CONTENT_RANGE, 

493 hdrs.CONTENT_TYPE, 

494 hdrs.ETAG, 

495 hdrs.HOST, 

496 hdrs.MAX_FORWARDS, 

497 hdrs.SERVER, 

498 hdrs.TRANSFER_ENCODING, 

499 hdrs.USER_AGENT, 

500 ) 

501 bad_hdr = next((h for h in singletons if len(headers.getall(h, ())) > 1), None) 

502 if bad_hdr is not None: 

503 raise BadHttpMessage(f"Duplicate '{bad_hdr}' header found.") 

504 

505 # keep-alive 

506 conn = headers.get(hdrs.CONNECTION) 

507 if conn: 

508 v = conn.lower() 

509 if v == "close": 

510 close_conn = True 

511 elif v == "keep-alive": 

512 close_conn = False 

513 # https://www.rfc-editor.org/rfc/rfc9110.html#name-101-switching-protocols 

514 elif v == "upgrade" and headers.get(hdrs.UPGRADE): 

515 upgrade = True 

516 

517 # encoding 

518 enc = headers.get(hdrs.CONTENT_ENCODING) 

519 if enc: 

520 enc = enc.lower() 

521 if enc in ("gzip", "deflate", "br"): 

522 encoding = enc 

523 

524 # chunking 

525 te = headers.get(hdrs.TRANSFER_ENCODING) 

526 if te is not None: 

527 if "chunked" == te.lower(): 

528 chunked = True 

529 else: 

530 raise BadHttpMessage("Request has invalid `Transfer-Encoding`") 

531 

532 if hdrs.CONTENT_LENGTH in headers: 

533 raise BadHttpMessage( 

534 "Transfer-Encoding can't be present with Content-Length", 

535 ) 

536 

537 return (headers, raw_headers, close_conn, encoding, upgrade, chunked) 

538 

539 def set_upgraded(self, val: bool) -> None: 

540 """Set connection upgraded (to websocket) mode. 

541 

542 :param bool val: new state. 

543 """ 

544 self._upgraded = val 

545 

546 

547class HttpRequestParser(HttpParser[RawRequestMessage]): 

548 """Read request status line. 

549 

550 Exception .http_exceptions.BadStatusLine 

551 could be raised in case of any errors in status line. 

552 Returns RawRequestMessage. 

553 """ 

554 

555 def parse_message(self, lines: List[bytes]) -> RawRequestMessage: 

556 # request line 

557 line = lines[0].decode("utf-8", "surrogateescape") 

558 try: 

559 method, path, version = line.split(" ", maxsplit=2) 

560 except ValueError: 

561 raise BadStatusLine(line) from None 

562 

563 if len(path) > self.max_line_size: 

564 raise LineTooLong( 

565 "Status line is too long", str(self.max_line_size), str(len(path)) 

566 ) 

567 

568 # method 

569 if not METHRE.fullmatch(method): 

570 raise BadStatusLine(method) 

571 

572 # version 

573 match = VERSRE.fullmatch(version) 

574 if match is None: 

575 raise BadStatusLine(line) 

576 version_o = HttpVersion(int(match.group(1)), int(match.group(2))) 

577 

578 if method == "CONNECT": 

579 # authority-form, 

580 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.3 

581 url = URL.build(authority=path, encoded=True) 

582 elif path.startswith("/"): 

583 # origin-form, 

584 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.1 

585 path_part, _hash_separator, url_fragment = path.partition("#") 

586 path_part, _question_mark_separator, qs_part = path_part.partition("?") 

587 

588 # NOTE: `yarl.URL.build()` is used to mimic what the Cython-based 

589 # NOTE: parser does, otherwise it results into the same 

590 # NOTE: HTTP Request-Line input producing different 

591 # NOTE: `yarl.URL()` objects 

592 url = URL.build( 

593 path=path_part, 

594 query_string=qs_part, 

595 fragment=url_fragment, 

596 encoded=True, 

597 ) 

598 elif path == "*" and method == "OPTIONS": 

599 # asterisk-form, 

600 url = URL(path, encoded=True) 

601 else: 

602 # absolute-form for proxy maybe, 

603 # https://datatracker.ietf.org/doc/html/rfc7230#section-5.3.2 

604 url = URL(path, encoded=True) 

605 if url.scheme == "": 

606 # not absolute-form 

607 raise InvalidURLError( 

608 path.encode(errors="surrogateescape").decode("latin1") 

609 ) 

610 

611 # read headers 

612 ( 

613 headers, 

614 raw_headers, 

615 close, 

616 compression, 

617 upgrade, 

618 chunked, 

619 ) = self.parse_headers(lines) 

620 

621 if close is None: # then the headers weren't set in the request 

622 if version_o <= HttpVersion10: # HTTP 1.0 must asks to not close 

623 close = True 

624 else: # HTTP 1.1 must ask to close. 

625 close = False 

626 

627 return RawRequestMessage( 

628 method, 

629 path, 

630 version_o, 

631 headers, 

632 raw_headers, 

633 close, 

634 compression, 

635 upgrade, 

636 chunked, 

637 url, 

638 ) 

639 

640 

641class HttpResponseParser(HttpParser[RawResponseMessage]): 

642 """Read response status line and headers. 

643 

644 BadStatusLine could be raised in case of any errors in status line. 

645 Returns RawResponseMessage. 

646 """ 

647 

648 # Lax mode should only be enabled on response parser. 

649 lax = not DEBUG 

650 

651 def feed_data( 

652 self, 

653 data: bytes, 

654 SEP: Optional[_SEP] = None, 

655 *args: Any, 

656 **kwargs: Any, 

657 ) -> Tuple[List[Tuple[RawResponseMessage, StreamReader]], bool, bytes]: 

658 if SEP is None: 

659 SEP = b"\r\n" if DEBUG else b"\n" 

660 return super().feed_data(data, SEP, *args, **kwargs) 

661 

662 def parse_message(self, lines: List[bytes]) -> RawResponseMessage: 

663 line = lines[0].decode("utf-8", "surrogateescape") 

664 try: 

665 version, status = line.split(maxsplit=1) 

666 except ValueError: 

667 raise BadStatusLine(line) from None 

668 

669 try: 

670 status, reason = status.split(maxsplit=1) 

671 except ValueError: 

672 status = status.strip() 

673 reason = "" 

674 

675 if len(reason) > self.max_line_size: 

676 raise LineTooLong( 

677 "Status line is too long", str(self.max_line_size), str(len(reason)) 

678 ) 

679 

680 # version 

681 match = VERSRE.fullmatch(version) 

682 if match is None: 

683 raise BadStatusLine(line) 

684 version_o = HttpVersion(int(match.group(1)), int(match.group(2))) 

685 

686 # The status code is a three-digit number 

687 if len(status) != 3 or not status.isdecimal(): 

688 raise BadStatusLine(line) 

689 status_i = int(status) 

690 

691 # read headers 

692 ( 

693 headers, 

694 raw_headers, 

695 close, 

696 compression, 

697 upgrade, 

698 chunked, 

699 ) = self.parse_headers(lines) 

700 

701 if close is None: 

702 close = version_o <= HttpVersion10 

703 

704 return RawResponseMessage( 

705 version_o, 

706 status_i, 

707 reason.strip(), 

708 headers, 

709 raw_headers, 

710 close, 

711 compression, 

712 upgrade, 

713 chunked, 

714 ) 

715 

716 

717class HttpPayloadParser: 

718 def __init__( 

719 self, 

720 payload: StreamReader, 

721 length: Optional[int] = None, 

722 chunked: bool = False, 

723 compression: Optional[str] = None, 

724 code: Optional[int] = None, 

725 method: Optional[str] = None, 

726 readall: bool = False, 

727 response_with_body: bool = True, 

728 auto_decompress: bool = True, 

729 lax: bool = False, 

730 ) -> None: 

731 self._length = 0 

732 self._type = ParseState.PARSE_NONE 

733 self._chunk = ChunkState.PARSE_CHUNKED_SIZE 

734 self._chunk_size = 0 

735 self._chunk_tail = b"" 

736 self._auto_decompress = auto_decompress 

737 self._lax = lax 

738 self.done = False 

739 

740 # payload decompression wrapper 

741 if response_with_body and compression and self._auto_decompress: 

742 real_payload: Union[StreamReader, DeflateBuffer] = DeflateBuffer( 

743 payload, compression 

744 ) 

745 else: 

746 real_payload = payload 

747 

748 # payload parser 

749 if not response_with_body: 

750 # don't parse payload if it's not expected to be received 

751 self._type = ParseState.PARSE_NONE 

752 real_payload.feed_eof() 

753 self.done = True 

754 

755 elif chunked: 

756 self._type = ParseState.PARSE_CHUNKED 

757 elif length is not None: 

758 self._type = ParseState.PARSE_LENGTH 

759 self._length = length 

760 if self._length == 0: 

761 real_payload.feed_eof() 

762 self.done = True 

763 else: 

764 if readall and code != 204: 

765 self._type = ParseState.PARSE_UNTIL_EOF 

766 elif method in ("PUT", "POST"): 

767 internal_logger.warning( # pragma: no cover 

768 "Content-Length or Transfer-Encoding header is required" 

769 ) 

770 self._type = ParseState.PARSE_NONE 

771 real_payload.feed_eof() 

772 self.done = True 

773 

774 self.payload = real_payload 

775 

776 def feed_eof(self) -> None: 

777 if self._type == ParseState.PARSE_UNTIL_EOF: 

778 self.payload.feed_eof() 

779 elif self._type == ParseState.PARSE_LENGTH: 

780 raise ContentLengthError( 

781 "Not enough data for satisfy content length header." 

782 ) 

783 elif self._type == ParseState.PARSE_CHUNKED: 

784 raise TransferEncodingError( 

785 "Not enough data for satisfy transfer length header." 

786 ) 

787 

788 def feed_data( 

789 self, chunk: bytes, SEP: _SEP = b"\r\n", CHUNK_EXT: bytes = b";" 

790 ) -> Tuple[bool, bytes]: 

791 # Read specified amount of bytes 

792 if self._type == ParseState.PARSE_LENGTH: 

793 required = self._length 

794 chunk_len = len(chunk) 

795 

796 if required >= chunk_len: 

797 self._length = required - chunk_len 

798 self.payload.feed_data(chunk, chunk_len) 

799 if self._length == 0: 

800 self.payload.feed_eof() 

801 return True, b"" 

802 else: 

803 self._length = 0 

804 self.payload.feed_data(chunk[:required], required) 

805 self.payload.feed_eof() 

806 return True, chunk[required:] 

807 

808 # Chunked transfer encoding parser 

809 elif self._type == ParseState.PARSE_CHUNKED: 

810 if self._chunk_tail: 

811 chunk = self._chunk_tail + chunk 

812 self._chunk_tail = b"" 

813 

814 while chunk: 

815 

816 # read next chunk size 

817 if self._chunk == ChunkState.PARSE_CHUNKED_SIZE: 

818 pos = chunk.find(SEP) 

819 if pos >= 0: 

820 i = chunk.find(CHUNK_EXT, 0, pos) 

821 if i >= 0: 

822 size_b = chunk[:i] # strip chunk-extensions 

823 else: 

824 size_b = chunk[:pos] 

825 

826 if self._lax: # Allow whitespace in lax mode. 

827 size_b = size_b.strip() 

828 

829 if not re.fullmatch(HEXDIGIT, size_b): 

830 exc = TransferEncodingError( 

831 chunk[:pos].decode("ascii", "surrogateescape") 

832 ) 

833 self.payload.set_exception(exc) 

834 raise exc 

835 size = int(bytes(size_b), 16) 

836 

837 chunk = chunk[pos + len(SEP) :] 

838 if size == 0: # eof marker 

839 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS 

840 if self._lax and chunk.startswith(b"\r"): 

841 chunk = chunk[1:] 

842 else: 

843 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK 

844 self._chunk_size = size 

845 self.payload.begin_http_chunk_receiving() 

846 else: 

847 self._chunk_tail = chunk 

848 return False, b"" 

849 

850 # read chunk and feed buffer 

851 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK: 

852 required = self._chunk_size 

853 chunk_len = len(chunk) 

854 

855 if required > chunk_len: 

856 self._chunk_size = required - chunk_len 

857 self.payload.feed_data(chunk, chunk_len) 

858 return False, b"" 

859 else: 

860 self._chunk_size = 0 

861 self.payload.feed_data(chunk[:required], required) 

862 chunk = chunk[required:] 

863 if self._lax and chunk.startswith(b"\r"): 

864 chunk = chunk[1:] 

865 self._chunk = ChunkState.PARSE_CHUNKED_CHUNK_EOF 

866 self.payload.end_http_chunk_receiving() 

867 

868 # toss the CRLF at the end of the chunk 

869 if self._chunk == ChunkState.PARSE_CHUNKED_CHUNK_EOF: 

870 if chunk[: len(SEP)] == SEP: 

871 chunk = chunk[len(SEP) :] 

872 self._chunk = ChunkState.PARSE_CHUNKED_SIZE 

873 else: 

874 self._chunk_tail = chunk 

875 return False, b"" 

876 

877 # if stream does not contain trailer, after 0\r\n 

878 # we should get another \r\n otherwise 

879 # trailers needs to be skipped until \r\n\r\n 

880 if self._chunk == ChunkState.PARSE_MAYBE_TRAILERS: 

881 head = chunk[: len(SEP)] 

882 if head == SEP: 

883 # end of stream 

884 self.payload.feed_eof() 

885 return True, chunk[len(SEP) :] 

886 # Both CR and LF, or only LF may not be received yet. It is 

887 # expected that CRLF or LF will be shown at the very first 

888 # byte next time, otherwise trailers should come. The last 

889 # CRLF which marks the end of response might not be 

890 # contained in the same TCP segment which delivered the 

891 # size indicator. 

892 if not head: 

893 return False, b"" 

894 if head == SEP[:1]: 

895 self._chunk_tail = head 

896 return False, b"" 

897 self._chunk = ChunkState.PARSE_TRAILERS 

898 

899 # read and discard trailer up to the CRLF terminator 

900 if self._chunk == ChunkState.PARSE_TRAILERS: 

901 pos = chunk.find(SEP) 

902 if pos >= 0: 

903 chunk = chunk[pos + len(SEP) :] 

904 self._chunk = ChunkState.PARSE_MAYBE_TRAILERS 

905 else: 

906 self._chunk_tail = chunk 

907 return False, b"" 

908 

909 # Read all bytes until eof 

910 elif self._type == ParseState.PARSE_UNTIL_EOF: 

911 self.payload.feed_data(chunk, len(chunk)) 

912 

913 return False, b"" 

914 

915 

916class DeflateBuffer: 

917 """DeflateStream decompress stream and feed data into specified stream.""" 

918 

919 decompressor: Any 

920 

921 def __init__(self, out: StreamReader, encoding: Optional[str]) -> None: 

922 self.out = out 

923 self.size = 0 

924 self.encoding = encoding 

925 self._started_decoding = False 

926 

927 self.decompressor: Union[BrotliDecompressor, ZLibDecompressor] 

928 if encoding == "br": 

929 if not HAS_BROTLI: # pragma: no cover 

930 raise ContentEncodingError( 

931 "Can not decode content-encoding: brotli (br). " 

932 "Please install `Brotli`" 

933 ) 

934 self.decompressor = BrotliDecompressor() 

935 else: 

936 self.decompressor = ZLibDecompressor(encoding=encoding) 

937 

938 def set_exception(self, exc: BaseException) -> None: 

939 self.out.set_exception(exc) 

940 

941 def feed_data(self, chunk: bytes, size: int) -> None: 

942 if not size: 

943 return 

944 

945 self.size += size 

946 

947 # RFC1950 

948 # bits 0..3 = CM = 0b1000 = 8 = "deflate" 

949 # bits 4..7 = CINFO = 1..7 = windows size. 

950 if ( 

951 not self._started_decoding 

952 and self.encoding == "deflate" 

953 and chunk[0] & 0xF != 8 

954 ): 

955 # Change the decoder to decompress incorrectly compressed data 

956 # Actually we should issue a warning about non-RFC-compliant data. 

957 self.decompressor = ZLibDecompressor( 

958 encoding=self.encoding, suppress_deflate_header=True 

959 ) 

960 

961 try: 

962 chunk = self.decompressor.decompress_sync(chunk) 

963 except Exception: 

964 raise ContentEncodingError( 

965 "Can not decode content-encoding: %s" % self.encoding 

966 ) 

967 

968 self._started_decoding = True 

969 

970 if chunk: 

971 self.out.feed_data(chunk, len(chunk)) 

972 

973 def feed_eof(self) -> None: 

974 chunk = self.decompressor.flush() 

975 

976 if chunk or self.size > 0: 

977 self.out.feed_data(chunk, len(chunk)) 

978 if self.encoding == "deflate" and not self.decompressor.eof: 

979 raise ContentEncodingError("deflate") 

980 

981 self.out.feed_eof() 

982 

983 def begin_http_chunk_receiving(self) -> None: 

984 self.out.begin_http_chunk_receiving() 

985 

986 def end_http_chunk_receiving(self) -> None: 

987 self.out.end_http_chunk_receiving() 

988 

989 

990HttpRequestParserPy = HttpRequestParser 

991HttpResponseParserPy = HttpResponseParser 

992RawRequestMessagePy = RawRequestMessage 

993RawResponseMessagePy = RawResponseMessage 

994 

995try: 

996 if not NO_EXTENSIONS: 

997 from ._http_parser import ( # type: ignore[import-not-found,no-redef] 

998 HttpRequestParser, 

999 HttpResponseParser, 

1000 RawRequestMessage, 

1001 RawResponseMessage, 

1002 ) 

1003 

1004 HttpRequestParserC = HttpRequestParser 

1005 HttpResponseParserC = HttpResponseParser 

1006 RawRequestMessageC = RawRequestMessage 

1007 RawResponseMessageC = RawResponseMessage 

1008except ImportError: # pragma: no cover 

1009 pass