Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/tomlkit/parser.py: 97%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

713 statements  

1from __future__ import annotations 

2 

3import datetime 

4import re 

5import string 

6 

7from typing import Any 

8from typing import Callable 

9 

10from tomlkit._compat import decode 

11from tomlkit._utils import RFC_3339_LOOSE 

12from tomlkit._utils import _escaped 

13from tomlkit._utils import parse_rfc3339 

14from tomlkit.container import Container 

15from tomlkit.exceptions import EmptyKeyError 

16from tomlkit.exceptions import EmptyTableNameError 

17from tomlkit.exceptions import InternalParserError 

18from tomlkit.exceptions import InvalidCharInStringError 

19from tomlkit.exceptions import InvalidControlChar 

20from tomlkit.exceptions import InvalidDateError 

21from tomlkit.exceptions import InvalidDateTimeError 

22from tomlkit.exceptions import InvalidNumberError 

23from tomlkit.exceptions import InvalidTimeError 

24from tomlkit.exceptions import InvalidUnicodeValueError 

25from tomlkit.exceptions import ParseError 

26from tomlkit.exceptions import UnexpectedCharError 

27from tomlkit.exceptions import UnexpectedEofError 

28from tomlkit.items import AoT 

29from tomlkit.items import Array 

30from tomlkit.items import Bool 

31from tomlkit.items import BoolType 

32from tomlkit.items import Comment 

33from tomlkit.items import Date 

34from tomlkit.items import DateTime 

35from tomlkit.items import Float 

36from tomlkit.items import InlineTable 

37from tomlkit.items import Integer 

38from tomlkit.items import Item 

39from tomlkit.items import Key 

40from tomlkit.items import KeyType 

41from tomlkit.items import Null 

42from tomlkit.items import SingleKey 

43from tomlkit.items import String 

44from tomlkit.items import StringType 

45from tomlkit.items import Table 

46from tomlkit.items import Time 

47from tomlkit.items import Trivia 

48from tomlkit.items import Whitespace 

49from tomlkit.source import Source 

50from tomlkit.source import _StateHandler 

51from tomlkit.toml_document import TOMLDocument 

52 

53 

54CTRL_I = 0x09 # Tab 

55CTRL_J = 0x0A # Line feed 

56CTRL_M = 0x0D # Carriage return 

57CTRL_CHAR_LIMIT = 0x1F 

58CHR_DEL = 0x7F 

59 

60# TOML character classes (formerly the `TOMLChar` constants), as frozensets for 

61# O(1) membership tests; also the stop-sets for the Source.advance_while / 

62# advance_until bulk run scans that replace per-character 

63# `while self._current in <set> and self.inc()` loops with a single scan. 

64_SPACES = frozenset(" \t") 

65_NL = frozenset("\n\r") 

66_WS = _SPACES | _NL 

67_KV = frozenset("= \t") 

68_BARE_KEY_OR_SPACE = frozenset(string.ascii_letters + string.digits + "-_ \t") 

69_NUM_STOP = frozenset(" \t\n\r#,]}") 

70_DATE_TAIL_STOP = frozenset("\t\n\r#,]}") 

71# Control chars invalid inside a single-line string (DEL + everything <= 0x1F 

72# except tab) — exactly the set that raises InvalidControlChar in the per-char 

73# string loop. The single-line string-body fast-path stops its bulk scan at the 

74# first delimiter / backslash / control char, then the main loop handles that 

75# char with its existing branch (raising InvalidControlChar where needed). 

76_CTRL_SINGLE = frozenset(chr(c) for c in range(0x20) if c != CTRL_I) | {chr(CHR_DEL)} 

77_SINGLE_LITERAL_STOP = _CTRL_SINGLE | {"'"} # literal: only the closing quote 

78_SINGLE_BASIC_STOP = _CTRL_SINGLE | {'"', "\\"} # basic: quote or escape 

79 

80 

81class Parser: 

82 """ 

83 Parser for TOML documents. 

84 """ 

85 

86 # Deeply nested documents would overflow the interpreter stack: arrays and 

87 # inline tables are parsed recursively, and every fragment of a dotted key 

88 # adds a level of nested containers. Refuse documents beyond this depth. 

89 MAX_NESTING_DEPTH = 100 

90 

91 def __init__(self, string: str | bytes) -> None: 

92 # Input to parse 

93 self._src = Source(decode(string)) 

94 

95 self._aot_stack: list[Key] = [] 

96 self._nesting_depth = 0 

97 

98 @property 

99 def _state(self) -> _StateHandler: 

100 return self._src.state 

101 

102 @property 

103 def _idx(self) -> int: 

104 return self._src.idx 

105 

106 @property 

107 def _current(self) -> str: 

108 return self._src.current 

109 

110 @property 

111 def _marker(self) -> int: 

112 return self._src.marker 

113 

114 def extract(self) -> str: 

115 """ 

116 Extracts the value between marker and index 

117 """ 

118 return self._src.extract() 

119 

120 def inc(self, exception: type[ParseError] | None = None) -> bool: 

121 """ 

122 Increments the parser if the end of the input has not been reached. 

123 Returns whether or not it was able to advance. 

124 """ 

125 return self._src.inc(exception=exception) 

126 

127 def inc_n(self, n: int, exception: type[ParseError] | None = None) -> bool: 

128 """ 

129 Increments the parser by n characters 

130 if the end of the input has not been reached. 

131 """ 

132 return self._src.inc_n(n=n, exception=exception) 

133 

134 def consume(self, chars: str, min: int = 0, max: int = -1) -> None: 

135 """ 

136 Consume chars until min/max is satisfied is valid. 

137 """ 

138 return self._src.consume(chars=chars, min=min, max=max) 

139 

140 def end(self) -> bool: 

141 """ 

142 Returns True if the parser has reached the end of the input. 

143 """ 

144 return self._src.end() 

145 

146 def mark(self) -> None: 

147 """ 

148 Sets the marker to the index's current position 

149 """ 

150 self._src.mark() 

151 

152 def parse_error( 

153 self, 

154 exception: type[ParseError] = ParseError, 

155 *args: Any, 

156 **kwargs: Any, 

157 ) -> ParseError: 

158 """ 

159 Creates a generic "parse error" at the current position. 

160 """ 

161 return self._src.parse_error(exception, *args, **kwargs) 

162 

163 def parse(self) -> TOMLDocument: 

164 body = TOMLDocument(True) 

165 

166 # Take all keyvals outside of tables/AoT's. 

167 while not self.end(): 

168 # Break out if a table is found 

169 if self._current == "[": 

170 break 

171 

172 # Otherwise, take and append one KV 

173 item = self._parse_item() 

174 if not item: 

175 break 

176 

177 key, value = item 

178 if (key is not None and key.is_multi()) or not self._merge_ws(value, body): 

179 # We actually have a table 

180 try: 

181 body.append(key, value) 

182 except Exception as e: 

183 raise self.parse_error(ParseError, str(e)) from e 

184 

185 self.mark() 

186 

187 while not self.end(): 

188 key, value = self._parse_table() 

189 if isinstance(value, Table) and value.is_aot_element(): 

190 # This is just the first table in an AoT. Parse the rest of the array 

191 # along with it. 

192 value = self._parse_aot(value, key) 

193 

194 try: 

195 body.append(key, value) 

196 except Exception as e: 

197 raise self.parse_error(ParseError, str(e)) from e 

198 

199 body.parsing(False) 

200 

201 return body 

202 

203 def _merge_ws(self, item: Item, container: Container) -> bool: 

204 """ 

205 Merges the given Item with the last one currently in the given Container if 

206 both are whitespace items. 

207 

208 Returns True if the items were merged. 

209 """ 

210 last = container.last_item() 

211 if not last: 

212 return False 

213 

214 if not isinstance(item, Whitespace) or not isinstance(last, Whitespace): 

215 return False 

216 

217 start = self._idx - (len(last.s) + len(item.s)) 

218 container.body[-1] = ( 

219 container.body[-1][0], 

220 Whitespace(self._src[start : self._idx]), 

221 ) 

222 

223 return True 

224 

225 def _is_child(self, parent: Key, child: Key) -> bool: 

226 """ 

227 Returns whether a key is strictly a child of another key. 

228 AoT siblings are not considered children of one another. 

229 """ 

230 parent_parts = tuple(parent) 

231 child_parts = tuple(child) 

232 

233 if parent_parts == child_parts: 

234 return False 

235 

236 return parent_parts == child_parts[: len(parent_parts)] 

237 

238 def _parse_item(self) -> tuple[Key | None, Item] | None: 

239 """ 

240 Attempts to parse the next item and returns it, along with its key 

241 if the item is value-like. 

242 """ 

243 self.mark() 

244 with self._state as state: 

245 while True: 

246 c = self._current 

247 if c == "\n": 

248 # Found a newline; Return all whitespace found up to this point. 

249 self.inc() 

250 

251 return None, Whitespace(self.extract()) 

252 elif c in " \t\r": 

253 if c == "\r": 

254 with self._state(restore=True): 

255 if not self.inc() or self._current != "\n": 

256 raise self.parse_error( 

257 InvalidControlChar, CTRL_M, "documents" 

258 ) 

259 # Skip whitespace. 

260 if not self.inc(): 

261 return None, Whitespace(self.extract()) 

262 elif c == "#": 

263 # Found a comment, parse it 

264 indent = self.extract() 

265 cws, comment, trail = self._parse_comment_trail() 

266 

267 return None, Comment(Trivia(indent, cws, comment, trail)) 

268 elif c == "[": 

269 # Found a table, delegate to the calling function. 

270 return None 

271 else: 

272 # Beginning of a KV pair. 

273 # Return to beginning of whitespace so it gets included 

274 # as indentation for the KV about to be parsed. 

275 state.restore = True 

276 break 

277 

278 return self._parse_key_value(True) 

279 

280 def _parse_comment_trail(self, parse_trail: bool = True) -> tuple[str, str, str]: 

281 """ 

282 Returns (comment_ws, comment, trail) 

283 If there is no comment, comment_ws and comment will 

284 simply be empty. 

285 """ 

286 if self.end(): 

287 return "", "", "" 

288 

289 comment = "" 

290 comment_ws = "" 

291 self.mark() 

292 

293 while True: 

294 c = self._current 

295 

296 if c == "\n": 

297 break 

298 elif c == "#": 

299 comment_ws = self.extract() 

300 

301 self.mark() 

302 self.inc() # Skip # 

303 

304 # The comment itself 

305 while not self.end() and self._current not in _NL: 

306 code = ord(self._current) 

307 if code == CHR_DEL or (code <= CTRL_CHAR_LIMIT and code != CTRL_I): 

308 raise self.parse_error(InvalidControlChar, code, "comments") 

309 

310 if not self.inc(): 

311 break 

312 

313 comment = self.extract() 

314 self.mark() 

315 

316 break 

317 elif c in " \t\r": 

318 if c == "\r": 

319 with self._state(restore=True): 

320 if not self.inc() or self._current != "\n": 

321 raise self.parse_error( 

322 InvalidControlChar, CTRL_M, "comments" 

323 ) 

324 self.inc() 

325 else: 

326 raise self.parse_error(UnexpectedCharError, c) 

327 

328 if self.end(): 

329 break 

330 

331 trail = "" 

332 if parse_trail: 

333 self._src.advance_while(_SPACES) 

334 

335 if self._current == "\r": 

336 with self._state(restore=True): 

337 if not self.inc() or self._current != "\n": 

338 raise self.parse_error(InvalidControlChar, CTRL_M, "documents") 

339 self.inc() 

340 

341 if self._current == "\n": 

342 self.inc() 

343 

344 if self._idx != self._marker or self._current in _WS: 

345 trail = self.extract() 

346 

347 return comment_ws, comment, trail 

348 

349 def _parse_key_value(self, parse_comment: bool = False) -> tuple[Key, Item]: 

350 # Leading indent 

351 self.mark() 

352 

353 self._src.advance_while(_SPACES) 

354 

355 indent = self.extract() 

356 

357 # Key 

358 key = self._parse_key() 

359 

360 self.mark() 

361 

362 found_equals = self._current == "=" 

363 while self._current in _KV and self.inc(): 

364 if self._current == "=": 

365 if found_equals: 

366 raise self.parse_error(UnexpectedCharError, "=") 

367 else: 

368 found_equals = True 

369 if not found_equals: 

370 raise self.parse_error(UnexpectedCharError, self._current) 

371 

372 if not key.sep: 

373 key.sep = self.extract() 

374 else: 

375 key.sep += self.extract() 

376 

377 # Value 

378 val = self._parse_value() 

379 # Comment 

380 if parse_comment: 

381 cws, comment, trail = self._parse_comment_trail() 

382 meta = val.trivia 

383 if not meta.comment_ws: 

384 meta.comment_ws = cws 

385 

386 meta.comment = comment 

387 meta.trail = trail 

388 else: 

389 val.trivia.trail = "" 

390 

391 val.trivia.indent = indent 

392 

393 return key, val 

394 

395 def _parse_key(self) -> Key: 

396 """ 

397 Parses a Key at the current position; 

398 WS before the key must be exhausted first at the callsite. 

399 """ 

400 key = self._parse_simple_key() 

401 fragments = 1 

402 while self._current == ".": 

403 fragments += 1 

404 if fragments > self.MAX_NESTING_DEPTH: 

405 raise self.parse_error( 

406 ParseError, 

407 f"TOML key nested more than {self.MAX_NESTING_DEPTH} levels deep", 

408 ) 

409 self.inc() 

410 key = key.concat(self._parse_simple_key()) 

411 

412 return key 

413 

414 def _parse_simple_key(self) -> Key: 

415 """ 

416 Parses a single (non-dotted) key fragment. 

417 """ 

418 self.mark() 

419 # Skip any leading whitespace (bulk scan) 

420 self._src.advance_while(_SPACES) 

421 if self._current in "\"'": 

422 return self._parse_quoted_key() 

423 else: 

424 return self._parse_bare_key() 

425 

426 def _parse_quoted_key(self) -> Key: 

427 """ 

428 Parses a key enclosed in either single or double quotes. 

429 """ 

430 # Extract the leading whitespace 

431 original = self.extract() 

432 quote_style = self._current 

433 key_type = next((t for t in KeyType if t.value == quote_style), None) 

434 

435 if key_type is None: 

436 raise RuntimeError("Should not have entered _parse_quoted_key()") 

437 

438 key_str = self._parse_string( 

439 StringType.SLB if key_type == KeyType.Basic else StringType.SLL 

440 ) 

441 if key_str._t.is_multiline(): 

442 raise self.parse_error(UnexpectedCharError, key_str._t.value) 

443 original += key_str.as_string() 

444 self.mark() 

445 self._src.advance_while(_SPACES) 

446 original += self.extract() 

447 

448 return SingleKey(str(key_str), t=key_type, sep="", original=original) 

449 

450 def _parse_bare_key(self) -> Key: 

451 """ 

452 Parses a bare key. 

453 """ 

454 self._src.advance_while(_BARE_KEY_OR_SPACE) 

455 

456 original = self.extract() 

457 key_s = original.strip() 

458 if not key_s: 

459 # Empty key 

460 raise self.parse_error(EmptyKeyError) 

461 

462 if " " in key_s or "\t" in key_s: 

463 # Bare key with whitespace in it 

464 raise self.parse_error(ParseError, f'Invalid key "{key_s}"') 

465 

466 return SingleKey(key_s, KeyType.Bare, "", original) 

467 

468 def _parse_value(self) -> Item: 

469 """ 

470 Attempts to parse a value at the current position. 

471 """ 

472 self.mark() 

473 c = self._current 

474 trivia = Trivia() 

475 

476 if c == StringType.SLB.value: 

477 return self._parse_basic_string() 

478 elif c == StringType.SLL.value: 

479 return self._parse_literal_string() 

480 elif c == BoolType.TRUE.value[0]: 

481 return self._parse_true() 

482 elif c == BoolType.FALSE.value[0]: 

483 return self._parse_false() 

484 elif c == "[": 

485 return self._parse_nested(self._parse_array) 

486 elif c == "{": 

487 return self._parse_nested(self._parse_inline_table) 

488 elif c in "+-" or self._peek(4) in { 

489 "+inf", 

490 "-inf", 

491 "inf", 

492 "+nan", 

493 "-nan", 

494 "nan", 

495 }: 

496 # Number 

497 self._src.advance_until(_NUM_STOP) 

498 

499 raw = self.extract() 

500 

501 item = self._parse_number(raw, trivia) 

502 if item is not None: 

503 return item 

504 

505 raise self.parse_error(InvalidNumberError) 

506 elif c in string.digits: 

507 # Integer, Float, Date, Time or DateTime 

508 self._src.advance_until(_NUM_STOP) 

509 

510 raw = self.extract() 

511 

512 m = RFC_3339_LOOSE.match(raw) 

513 if m: 

514 if m.group("date") and m.group("time"): 

515 # datetime 

516 try: 

517 dt = parse_rfc3339(raw) 

518 assert isinstance(dt, datetime.datetime) 

519 return DateTime( 

520 dt.year, 

521 dt.month, 

522 dt.day, 

523 dt.hour, 

524 dt.minute, 

525 dt.second, 

526 dt.microsecond, 

527 dt.tzinfo, 

528 trivia, 

529 raw, 

530 ) 

531 except ValueError: 

532 raise self.parse_error(InvalidDateTimeError) from None 

533 

534 if m.group("date"): 

535 try: 

536 dt = parse_rfc3339(raw) 

537 assert isinstance(dt, datetime.date) 

538 date = Date(dt.year, dt.month, dt.day, trivia, raw) 

539 self.mark() 

540 self._src.advance_until(_DATE_TAIL_STOP) 

541 

542 time_raw = self.extract() 

543 time_part = time_raw.rstrip() 

544 trivia.comment_ws = time_raw[len(time_part) :] 

545 if not time_part: 

546 return date 

547 

548 dt = parse_rfc3339(raw + time_part) 

549 assert isinstance(dt, datetime.datetime) 

550 return DateTime( 

551 dt.year, 

552 dt.month, 

553 dt.day, 

554 dt.hour, 

555 dt.minute, 

556 dt.second, 

557 dt.microsecond, 

558 dt.tzinfo, 

559 trivia, 

560 raw + time_part, 

561 ) 

562 except ValueError: 

563 raise self.parse_error(InvalidDateError) from None 

564 

565 if m.group("time"): 

566 try: 

567 t = parse_rfc3339(raw) 

568 assert isinstance(t, datetime.time) 

569 return Time( 

570 t.hour, 

571 t.minute, 

572 t.second, 

573 t.microsecond, 

574 t.tzinfo, 

575 trivia, 

576 raw, 

577 ) 

578 except ValueError: 

579 raise self.parse_error(InvalidTimeError) from None 

580 

581 item = self._parse_number(raw, trivia) 

582 if item is not None: 

583 return item 

584 

585 raise self.parse_error(InvalidNumberError) 

586 else: 

587 raise self.parse_error(UnexpectedCharError, c) 

588 

589 def _parse_true(self) -> Bool: 

590 return self._parse_bool(BoolType.TRUE) 

591 

592 def _parse_false(self) -> Bool: 

593 return self._parse_bool(BoolType.FALSE) 

594 

595 def _parse_bool(self, style: BoolType) -> Bool: 

596 with self._state: 

597 style = BoolType(style) 

598 

599 # only keep parsing for bool if the characters match the style 

600 # try consuming rest of chars in style 

601 for c in style: 

602 self.consume(c, min=1, max=1) 

603 

604 return Bool(style, Trivia()) 

605 

606 def _parse_nested(self, parse: Callable[[], Item]) -> Item: 

607 """ 

608 Parses an array or inline table, enforcing the nesting depth limit. 

609 """ 

610 self._nesting_depth += 1 

611 if self._nesting_depth > self.MAX_NESTING_DEPTH: 

612 raise self.parse_error( 

613 ParseError, 

614 f"TOML value nested more than {self.MAX_NESTING_DEPTH} levels deep", 

615 ) 

616 try: 

617 return parse() 

618 finally: 

619 self._nesting_depth -= 1 

620 

621 def _parse_array(self) -> Array: 

622 # Consume opening bracket, EOF here is an issue (middle of array) 

623 self.inc(exception=UnexpectedEofError) 

624 

625 elems: list[Item] = [] 

626 prev_value = None 

627 while True: 

628 # consume whitespace 

629 mark = self._idx 

630 self.consume(" \t\n\r") 

631 indent = self._src[mark : self._idx] 

632 newline = _NL & set(indent) 

633 if newline: 

634 elems.append(Whitespace(indent)) 

635 continue 

636 

637 # consume comment 

638 if self._current == "#": 

639 cws, comment, trail = self._parse_comment_trail(parse_trail=False) 

640 elems.append(Comment(Trivia(indent, cws, comment, trail))) 

641 continue 

642 

643 # consume indent 

644 if indent: 

645 elems.append(Whitespace(indent)) 

646 continue 

647 

648 # consume value 

649 if not prev_value: 

650 try: 

651 elems.append(self._parse_value()) 

652 prev_value = True 

653 continue 

654 except UnexpectedCharError: 

655 pass 

656 

657 # consume comma 

658 if prev_value and self._current == ",": 

659 self.inc(exception=UnexpectedEofError) 

660 # If the previous item is Whitespace, add to it 

661 if isinstance(elems[-1], Whitespace): 

662 elems[-1]._s = elems[-1].s + "," 

663 else: 

664 elems.append(Whitespace(",")) 

665 prev_value = False 

666 continue 

667 

668 # consume closing bracket 

669 if self._current == "]": 

670 # consume closing bracket, EOF here doesn't matter 

671 self.inc() 

672 break 

673 

674 raise self.parse_error(UnexpectedCharError, self._current) 

675 

676 try: 

677 res = Array(elems, Trivia()) 

678 except ValueError: 

679 pass 

680 else: 

681 return res 

682 

683 raise self.parse_error(ParseError, "Failed to parse array") 

684 

685 def _parse_inline_table(self) -> InlineTable: 

686 # consume opening bracket, EOF here is an issue (middle of array) 

687 self.inc(exception=UnexpectedEofError) 

688 

689 elems = Container(True) 

690 expect_key = True 

691 while True: 

692 while True: 

693 # consume whitespace and newlines 

694 mark = self._idx 

695 self.consume(" \t\n\r") 

696 raw = self._src[mark : self._idx] 

697 if raw: 

698 elems.add(Whitespace(raw)) 

699 

700 if self._current != "#": 

701 break 

702 

703 cws, comment, trail = self._parse_comment_trail(parse_trail=False) 

704 elems.add(Comment(Trivia("", cws, comment, trail))) 

705 

706 if self._current == "}": 

707 # consume closing bracket, EOF here doesn't matter 

708 self.inc() 

709 break 

710 

711 if expect_key: 

712 if self._current == ",": 

713 raise self.parse_error(UnexpectedCharError, self._current) 

714 key, val = self._parse_key_value(False) 

715 elems.add(key, val) 

716 expect_key = False 

717 continue 

718 

719 if self._current != ",": 

720 raise self.parse_error(UnexpectedCharError, self._current) 

721 

722 elems.add(Whitespace(",")) 

723 # consume comma, EOF here is an issue (middle of inline table) 

724 self.inc(exception=UnexpectedEofError) 

725 expect_key = True 

726 

727 return InlineTable(elems, Trivia()) 

728 

729 def _parse_number(self, raw: str, trivia: Trivia) -> Item | None: 

730 # Leading zeros are not allowed 

731 sign = "" 

732 if raw.startswith(("+", "-")): 

733 sign = raw[0] 

734 raw = raw[1:] 

735 

736 if len(raw) > 1 and ( 

737 (raw.startswith("0") and not raw.startswith(("0.", "0o", "0x", "0b", "0e"))) 

738 or (sign and raw.startswith(".")) 

739 ): 

740 return None 

741 

742 if raw.startswith(("0o", "0x", "0b")) and sign: 

743 return None 

744 

745 digits = "[0-9]" 

746 base = 10 

747 if raw.startswith("0b"): 

748 digits = "[01]" 

749 base = 2 

750 elif raw.startswith("0o"): 

751 digits = "[0-7]" 

752 base = 8 

753 elif raw.startswith("0x"): 

754 digits = "[0-9a-f]" 

755 base = 16 

756 

757 # Underscores should be surrounded by digits 

758 clean = re.sub(f"(?i)(?<={digits})_(?={digits})", "", raw).lower() 

759 

760 if "_" in clean: 

761 return None 

762 

763 if clean.endswith(".") or ( 

764 not clean.startswith("0x") and clean.split("e", 1)[0].endswith(".") 

765 ): 

766 return None 

767 

768 try: 

769 return Integer(int(sign + clean, base), trivia, sign + raw) 

770 except ValueError: 

771 try: 

772 return Float(float(sign + clean), trivia, sign + raw) 

773 except ValueError: 

774 return None 

775 

776 def _parse_literal_string(self) -> String: 

777 with self._state: 

778 return self._parse_string(StringType.SLL) 

779 

780 def _parse_basic_string(self) -> String: 

781 with self._state: 

782 return self._parse_string(StringType.SLB) 

783 

784 def _parse_escaped_char(self, multiline: bool) -> str: 

785 if multiline and self._current in _WS: 

786 # When the last non-whitespace character on a line is 

787 # a \, it will be trimmed along with all whitespace 

788 # (including newlines) up to the next non-whitespace 

789 # character or closing delimiter. 

790 # """\ 

791 # hello \ 

792 # world""" 

793 tmp = "" 

794 while self._current in _WS: 

795 tmp += self._current 

796 # consume the whitespace, EOF here is an issue 

797 # (middle of string) 

798 self.inc(exception=UnexpectedEofError) 

799 continue 

800 

801 # the escape followed by whitespace must have a newline 

802 # before any other chars 

803 if "\n" not in tmp: 

804 raise self.parse_error(InvalidCharInStringError, self._current) 

805 

806 return "" 

807 

808 if self._current in _escaped: 

809 c = _escaped[self._current] 

810 

811 # consume this char, EOF here is an issue (middle of string) 

812 self.inc(exception=UnexpectedEofError) 

813 

814 return c 

815 

816 if self._current in {"u", "U"}: 

817 # this needs to be a unicode 

818 u, ue = self._peek_unicode(self._current == "U") 

819 if u is not None: 

820 assert ue is not None 

821 # consume the U char and the unicode value 

822 self.inc_n(len(ue) + 1) 

823 

824 return u 

825 

826 raise self.parse_error(InvalidUnicodeValueError) 

827 

828 if self._current == "x": 

829 h, he = self._peek_hex() 

830 if h is not None: 

831 assert he is not None 

832 # consume the x char and the hex value 

833 self.inc_n(len(he) + 1) 

834 return h 

835 

836 raise self.parse_error(InvalidUnicodeValueError) 

837 

838 raise self.parse_error(InvalidCharInStringError, self._current) 

839 

840 def _parse_string(self, delim: StringType) -> String: 

841 # only keep parsing for string if the current character matches the delim 

842 if self._current != delim.unit: 

843 raise self.parse_error( 

844 InternalParserError, 

845 f"Invalid character for string type {delim}", 

846 ) 

847 

848 # consume the opening/first delim, EOF here is an issue 

849 # (middle of string or middle of delim) 

850 self.inc(exception=UnexpectedEofError) 

851 

852 if self._current == delim.unit: 

853 # consume the closing/second delim, we do not care if EOF occurs as 

854 # that would simply imply an empty single line string 

855 if not self.inc() or self._current != delim.unit: 

856 # Empty string 

857 return String(delim, "", "", Trivia()) 

858 

859 # consume the third delim, EOF here is an issue (middle of string) 

860 self.inc(exception=UnexpectedEofError) 

861 

862 delim = delim.toggle() # convert delim to multi delim 

863 

864 self.mark() # to extract the original string with whitespace and all 

865 value = "" 

866 

867 # A newline immediately following the opening delimiter will be trimmed. 

868 if delim.is_multiline(): 

869 if self._current == "\n": 

870 # consume the newline, EOF here is an issue (middle of string) 

871 self.inc(exception=UnexpectedEofError) 

872 else: 

873 cur: str = self._current 

874 with self._state(restore=True): 

875 if self.inc(): 

876 cur += self._current 

877 if cur == "\r\n": 

878 self.inc_n(2, exception=UnexpectedEofError) 

879 

880 # PERF: stop-set for the single-line string-body bulk fast-path (None for 

881 # multiline, which keeps the per-char loop because of \r\n handling). 

882 src = self._src 

883 single_stop = None 

884 if delim.is_singleline(): 

885 single_stop = ( 

886 _SINGLE_BASIC_STOP if delim.is_basic() else _SINGLE_LITERAL_STOP 

887 ) 

888 

889 escaped = False # whether the previous key was ESCAPE 

890 while True: 

891 code = ord(self._current) 

892 if ( 

893 delim.is_singleline() 

894 and not escaped 

895 and (code == CHR_DEL or (code <= CTRL_CHAR_LIMIT and code != CTRL_I)) 

896 ) or ( 

897 delim.is_multiline() 

898 and not escaped 

899 and ( 

900 code == CHR_DEL 

901 or ( 

902 code <= CTRL_CHAR_LIMIT and code not in [CTRL_I, CTRL_J, CTRL_M] 

903 ) 

904 ) 

905 ): 

906 raise self.parse_error(InvalidControlChar, code, "strings") 

907 elif delim.is_multiline() and not escaped and self._current == "\r": 

908 with self._state(restore=True): 

909 if not self.inc() or self._current != "\n": 

910 raise self.parse_error(InvalidControlChar, CTRL_M, "strings") 

911 value += self._current 

912 self.inc(exception=UnexpectedEofError) 

913 elif not escaped and self._current == delim.unit: 

914 # try to process current as a closing delim 

915 original = self.extract() 

916 

917 close = "" 

918 if delim.is_multiline(): 

919 # Consume the delimiters to see if we are at the end of the string 

920 close = "" 

921 while self._current == delim.unit: 

922 close += self._current 

923 self.inc() 

924 

925 if len(close) < 3: 

926 # Not a triple quote, leave in result as-is. 

927 # Adding back the characters we already consumed 

928 value += close 

929 continue 

930 

931 if len(close) == 3: 

932 # We are at the end of the string 

933 return String(delim, value, original, Trivia()) 

934 

935 if len(close) >= 6: 

936 raise self.parse_error(InvalidCharInStringError, self._current) 

937 

938 value += close[:-3] 

939 original += close[:-3] 

940 

941 return String(delim, value, original, Trivia()) 

942 else: 

943 # consume the closing delim, we do not care if EOF occurs as 

944 # that would simply imply the end of self._src 

945 self.inc() 

946 

947 return String(delim, value, original, Trivia()) 

948 elif delim.is_basic() and escaped: 

949 # attempt to parse the current char as an escaped value, an exception 

950 # is raised if this fails 

951 value += self._parse_escaped_char(delim.is_multiline()) 

952 

953 # no longer escaped 

954 escaped = False 

955 elif delim.is_basic() and self._current == "\\": 

956 # the next char is being escaped 

957 escaped = True 

958 

959 # consume this char, EOF here is an issue (middle of string) 

960 self.inc(exception=UnexpectedEofError) 

961 else: 

962 # this is either a literal string where we keep everything as is, 

963 # or this is not a special escaped char in a basic string 

964 if single_stop is not None: 

965 # PERF fast-path: bulk-append the run of ordinary characters 

966 # up to the next delimiter / backslash / control char, instead 

967 # of one `value += cur; inc()` iteration per character. The 

968 # stop char is then handled by the branches above on the next 

969 # iteration (single-line only; multiline keeps the per-char 

970 # loop for CRLF handling). 

971 run_start = src._idx 

972 src.advance_until(single_stop) 

973 if src.end(): 

974 # mid-string EOF — same error as the per-char inc() 

975 raise self.parse_error(UnexpectedEofError) 

976 value += src[run_start : src._idx] 

977 else: 

978 value += self._current 

979 

980 # consume this char, EOF here is an issue (middle of string) 

981 self.inc(exception=UnexpectedEofError) 

982 

983 def _parse_table( 

984 self, parent_name: Key | None = None, parent: Table | None = None 

985 ) -> tuple[Key, Table | AoT]: 

986 """ 

987 Parses a table element. 

988 """ 

989 if self._current != "[": 

990 raise self.parse_error( 

991 InternalParserError, "_parse_table() called on non-bracket character." 

992 ) 

993 

994 indent = self.extract() 

995 self.inc() # Skip opening bracket 

996 

997 if self.end(): 

998 raise self.parse_error(UnexpectedEofError) 

999 

1000 is_aot = False 

1001 if self._current == "[": 

1002 if not self.inc(): 

1003 raise self.parse_error(UnexpectedEofError) 

1004 

1005 is_aot = True 

1006 try: 

1007 key = self._parse_key() 

1008 except EmptyKeyError: 

1009 raise self.parse_error(EmptyTableNameError) from None 

1010 if self.end(): 

1011 raise self.parse_error(UnexpectedEofError) 

1012 elif self._current != "]": 

1013 raise self.parse_error(UnexpectedCharError, self._current) 

1014 

1015 key.sep = "" 

1016 full_key = key 

1017 name_parts = tuple(key) 

1018 if any(" " in part.key.strip() and part.is_bare() for part in name_parts): 

1019 raise self.parse_error( 

1020 ParseError, f'Invalid table name "{full_key.as_string()}"' 

1021 ) 

1022 

1023 missing_table = False 

1024 if parent_name: 

1025 parent_name_parts = tuple(parent_name) 

1026 else: 

1027 parent_name_parts = () 

1028 

1029 if len(name_parts) > len(parent_name_parts) + 1: 

1030 missing_table = True 

1031 

1032 name_parts = name_parts[len(parent_name_parts) :] 

1033 

1034 values = Container(True) 

1035 

1036 self.inc() # Skip closing bracket 

1037 if is_aot: 

1038 # TODO: Verify close bracket 

1039 self.inc() 

1040 

1041 cws, comment, trail = self._parse_comment_trail() 

1042 

1043 result: Table | AoT = Null() # type: ignore[assignment] 

1044 table = Table( 

1045 values, 

1046 Trivia(indent, cws, comment, trail), 

1047 is_aot, 

1048 name=name_parts[0].key if name_parts else key.key, 

1049 display_name=full_key.as_string(), 

1050 is_super_table=False, 

1051 ) 

1052 

1053 if len(name_parts) > 1: 

1054 if missing_table: 

1055 # Missing super table 

1056 # i.e. a table initialized like this: [foo.bar] 

1057 # without initializing [foo] 

1058 # 

1059 # So we have to create the parent tables 

1060 table = Table( 

1061 Container(True), 

1062 Trivia("", cws, comment, trail), 

1063 is_aot and name_parts[0] in self._aot_stack, 

1064 is_super_table=True, 

1065 name=name_parts[0].key, 

1066 ) 

1067 

1068 result = table 

1069 key = name_parts[0] 

1070 

1071 for i, _name in enumerate(name_parts[1:]): 

1072 child = table.get( 

1073 _name, 

1074 Table( 

1075 Container(True), 

1076 Trivia(indent, cws, comment, trail), 

1077 is_aot and i == len(name_parts) - 2, 

1078 is_super_table=i < len(name_parts) - 2, 

1079 name=_name.key, 

1080 display_name=( 

1081 full_key.as_string() if i == len(name_parts) - 2 else None 

1082 ), 

1083 ), 

1084 ) 

1085 

1086 if is_aot and i == len(name_parts) - 2: 

1087 table.raw_append(_name, AoT([child], name=table.name, parsed=True)) 

1088 else: 

1089 table.raw_append(_name, child) 

1090 

1091 table = child 

1092 values = table.value 

1093 else: 

1094 if name_parts: 

1095 key = name_parts[0] 

1096 

1097 while not self.end(): 

1098 parsed = self._parse_item() 

1099 if parsed: 

1100 _key, _val = parsed 

1101 if not self._merge_ws(_val, values): 

1102 table.raw_append(_key, _val) 

1103 else: 

1104 if self._current == "[": 

1105 _, key_next = self._peek_table() 

1106 

1107 if self._is_child(full_key, key_next): 

1108 key_next, table_next = self._parse_table(full_key, table) 

1109 

1110 table.raw_append(key_next, table_next) 

1111 

1112 # Picking up any sibling 

1113 while not self.end(): 

1114 _, key_next = self._peek_table() 

1115 

1116 if not self._is_child(full_key, key_next): 

1117 break 

1118 

1119 key_next, table_next = self._parse_table(full_key, table) 

1120 

1121 table.raw_append(key_next, table_next) 

1122 

1123 break 

1124 else: 

1125 raise self.parse_error( 

1126 InternalParserError, 

1127 "_parse_item() returned None on a non-bracket character.", 

1128 ) 

1129 table.value._validate_out_of_order_table() 

1130 if isinstance(result, Null): 

1131 result = table 

1132 

1133 if is_aot and (not self._aot_stack or full_key != self._aot_stack[-1]): 

1134 result = self._parse_aot(result, full_key) 

1135 

1136 return key, result 

1137 

1138 def _peek_table(self) -> tuple[bool, Key]: 

1139 """ 

1140 Peeks ahead non-intrusively by cloning then restoring the 

1141 initial state of the parser. 

1142 

1143 Returns the name of the table about to be parsed, 

1144 as well as whether it is part of an AoT. 

1145 """ 

1146 # we always want to restore after exiting this scope 

1147 with self._state(save_marker=True, restore=True): 

1148 if self._current != "[": 

1149 raise self.parse_error( 

1150 InternalParserError, 

1151 "_peek_table() entered on non-bracket character", 

1152 ) 

1153 

1154 # AoT 

1155 self.inc() 

1156 is_aot = False 

1157 if self._current == "[": 

1158 self.inc() 

1159 is_aot = True 

1160 try: 

1161 return is_aot, self._parse_key() 

1162 except EmptyKeyError: 

1163 raise self.parse_error(EmptyTableNameError) from None 

1164 

1165 def _parse_aot(self, first: Table, name_first: Key) -> AoT: 

1166 """ 

1167 Parses all siblings of the provided table first and bundles them into 

1168 an AoT. 

1169 """ 

1170 payload: list[Table] = [first] 

1171 self._aot_stack.append(name_first) 

1172 while not self.end(): 

1173 is_aot_next, name_next = self._peek_table() 

1174 if is_aot_next and name_next == name_first: 

1175 _, table = self._parse_table(name_first) 

1176 assert isinstance(table, Table) 

1177 payload.append(table) 

1178 else: 

1179 break 

1180 

1181 self._aot_stack.pop() 

1182 

1183 return AoT(payload, parsed=True) 

1184 

1185 def _peek(self, n: int) -> str: 

1186 """ 

1187 Peeks ahead n characters. 

1188 

1189 n is the max number of characters that will be peeked. 

1190 """ 

1191 # we always want to restore after exiting this scope 

1192 with self._state(restore=True): 

1193 buf = "" 

1194 for _ in range(n): 

1195 if self._current not in " \t\n\r#,]}" + self._src.EOF: 

1196 buf += self._current 

1197 self.inc() 

1198 continue 

1199 

1200 break 

1201 return buf 

1202 

1203 def _peek_unicode(self, is_long: bool) -> tuple[str | None, str | None]: 

1204 """ 

1205 Peeks ahead non-intrusively by cloning then restoring the 

1206 initial state of the parser. 

1207 

1208 Returns the unicode value is it's a valid one else None. 

1209 """ 

1210 # we always want to restore after exiting this scope 

1211 with self._state(save_marker=True, restore=True): 

1212 if self._current not in {"u", "U"}: 

1213 raise self.parse_error( 

1214 InternalParserError, "_peek_unicode() entered on non-unicode value" 

1215 ) 

1216 

1217 self.inc() # Dropping prefix 

1218 self.mark() 

1219 

1220 if is_long: 

1221 chars = 8 

1222 else: 

1223 chars = 4 

1224 

1225 if not self.inc_n(chars): 

1226 value, extracted = None, None 

1227 else: 

1228 extracted = self.extract() 

1229 

1230 if extracted.strip("0123456789abcdefABCDEF"): 

1231 return None, extracted 

1232 

1233 codepoint = int(extracted, 16) 

1234 

1235 # Unicode scalar values exclude the surrogate range 

1236 # (U+D800 to U+DFFF). The 8-digit \U form reaches this range 

1237 # with leading zeros, so it must be checked on the value itself. 

1238 if 0xD800 <= codepoint <= 0xDFFF: 

1239 return None, extracted 

1240 

1241 try: 

1242 value = chr(codepoint) 

1243 except (ValueError, OverflowError): 

1244 value = None 

1245 

1246 return value, extracted 

1247 

1248 def _peek_hex(self) -> tuple[str | None, str | None]: 

1249 with self._state(save_marker=True, restore=True): 

1250 if self._current != "x": 

1251 raise self.parse_error( 

1252 InternalParserError, "_peek_hex() entered on non-hex value" 

1253 ) 

1254 

1255 self.inc() # Dropping prefix 

1256 self.mark() 

1257 

1258 if not self.inc_n(2): 

1259 return None, None 

1260 

1261 extracted = self.extract() 

1262 if extracted.strip("0123456789abcdefABCDEF"): 

1263 return None, None 

1264 

1265 try: 

1266 value = chr(int(extracted, 16)) 

1267 except (ValueError, OverflowError): 

1268 value = None 

1269 

1270 return value, extracted