Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/tomlkit/parser.py: 97%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

679 statements  

1from __future__ import annotations 

2 

3import datetime 

4import re 

5import string 

6 

7from tomlkit._compat import decode 

8from tomlkit._utils import RFC_3339_LOOSE 

9from tomlkit._utils import _escaped 

10from tomlkit._utils import parse_rfc3339 

11from tomlkit.container import Container 

12from tomlkit.exceptions import EmptyKeyError 

13from tomlkit.exceptions import EmptyTableNameError 

14from tomlkit.exceptions import InternalParserError 

15from tomlkit.exceptions import InvalidCharInStringError 

16from tomlkit.exceptions import InvalidControlChar 

17from tomlkit.exceptions import InvalidDateError 

18from tomlkit.exceptions import InvalidDateTimeError 

19from tomlkit.exceptions import InvalidNumberError 

20from tomlkit.exceptions import InvalidTimeError 

21from tomlkit.exceptions import InvalidUnicodeValueError 

22from tomlkit.exceptions import ParseError 

23from tomlkit.exceptions import UnexpectedCharError 

24from tomlkit.exceptions import UnexpectedEofError 

25from tomlkit.items import AoT 

26from tomlkit.items import Array 

27from tomlkit.items import Bool 

28from tomlkit.items import BoolType 

29from tomlkit.items import Comment 

30from tomlkit.items import Date 

31from tomlkit.items import DateTime 

32from tomlkit.items import Float 

33from tomlkit.items import InlineTable 

34from tomlkit.items import Integer 

35from tomlkit.items import Item 

36from tomlkit.items import Key 

37from tomlkit.items import KeyType 

38from tomlkit.items import Null 

39from tomlkit.items import SingleKey 

40from tomlkit.items import String 

41from tomlkit.items import StringType 

42from tomlkit.items import Table 

43from tomlkit.items import Time 

44from tomlkit.items import Trivia 

45from tomlkit.items import Whitespace 

46from tomlkit.source import Source 

47from tomlkit.toml_char import TOMLChar 

48from tomlkit.toml_document import TOMLDocument 

49 

50 

51CTRL_I = 0x09 # Tab 

52CTRL_J = 0x0A # Line feed 

53CTRL_M = 0x0D # Carriage return 

54CTRL_CHAR_LIMIT = 0x1F 

55CHR_DEL = 0x7F 

56 

57 

58class Parser: 

59 """ 

60 Parser for TOML documents. 

61 """ 

62 

63 def __init__(self, string: str | bytes) -> None: 

64 # Input to parse 

65 self._src = Source(decode(string)) 

66 

67 self._aot_stack: list[Key] = [] 

68 

69 @property 

70 def _state(self): 

71 return self._src.state 

72 

73 @property 

74 def _idx(self): 

75 return self._src.idx 

76 

77 @property 

78 def _current(self): 

79 return self._src.current 

80 

81 @property 

82 def _marker(self): 

83 return self._src.marker 

84 

85 def extract(self) -> str: 

86 """ 

87 Extracts the value between marker and index 

88 """ 

89 return self._src.extract() 

90 

91 def inc(self, exception: type[ParseError] | None = None) -> bool: 

92 """ 

93 Increments the parser if the end of the input has not been reached. 

94 Returns whether or not it was able to advance. 

95 """ 

96 return self._src.inc(exception=exception) 

97 

98 def inc_n(self, n: int, exception: type[ParseError] | None = None) -> bool: 

99 """ 

100 Increments the parser by n characters 

101 if the end of the input has not been reached. 

102 """ 

103 return self._src.inc_n(n=n, exception=exception) 

104 

105 def consume(self, chars, min=0, max=-1): 

106 """ 

107 Consume chars until min/max is satisfied is valid. 

108 """ 

109 return self._src.consume(chars=chars, min=min, max=max) 

110 

111 def end(self) -> bool: 

112 """ 

113 Returns True if the parser has reached the end of the input. 

114 """ 

115 return self._src.end() 

116 

117 def mark(self) -> None: 

118 """ 

119 Sets the marker to the index's current position 

120 """ 

121 self._src.mark() 

122 

123 def parse_error(self, exception=ParseError, *args, **kwargs): 

124 """ 

125 Creates a generic "parse error" at the current position. 

126 """ 

127 return self._src.parse_error(exception, *args, **kwargs) 

128 

129 def parse(self) -> TOMLDocument: 

130 body = TOMLDocument(True) 

131 

132 # Take all keyvals outside of tables/AoT's. 

133 while not self.end(): 

134 # Break out if a table is found 

135 if self._current == "[": 

136 break 

137 

138 # Otherwise, take and append one KV 

139 item = self._parse_item() 

140 if not item: 

141 break 

142 

143 key, value = item 

144 if (key is not None and key.is_multi()) or not self._merge_ws(value, body): 

145 # We actually have a table 

146 try: 

147 body.append(key, value) 

148 except Exception as e: 

149 raise self.parse_error(ParseError, str(e)) from e 

150 

151 self.mark() 

152 

153 while not self.end(): 

154 key, value = self._parse_table() 

155 if isinstance(value, Table) and value.is_aot_element(): 

156 # This is just the first table in an AoT. Parse the rest of the array 

157 # along with it. 

158 value = self._parse_aot(value, key) 

159 

160 try: 

161 body.append(key, value) 

162 except Exception as e: 

163 raise self.parse_error(ParseError, str(e)) from e 

164 

165 body.parsing(False) 

166 

167 return body 

168 

169 def _merge_ws(self, item: Item, container: Container) -> bool: 

170 """ 

171 Merges the given Item with the last one currently in the given Container if 

172 both are whitespace items. 

173 

174 Returns True if the items were merged. 

175 """ 

176 last = container.last_item() 

177 if not last: 

178 return False 

179 

180 if not isinstance(item, Whitespace) or not isinstance(last, Whitespace): 

181 return False 

182 

183 start = self._idx - (len(last.s) + len(item.s)) 

184 container.body[-1] = ( 

185 container.body[-1][0], 

186 Whitespace(self._src[start : self._idx]), 

187 ) 

188 

189 return True 

190 

191 def _is_child(self, parent: Key, child: Key) -> bool: 

192 """ 

193 Returns whether a key is strictly a child of another key. 

194 AoT siblings are not considered children of one another. 

195 """ 

196 parent_parts = tuple(parent) 

197 child_parts = tuple(child) 

198 

199 if parent_parts == child_parts: 

200 return False 

201 

202 return parent_parts == child_parts[: len(parent_parts)] 

203 

204 def _parse_item(self) -> tuple[Key | None, Item] | None: 

205 """ 

206 Attempts to parse the next item and returns it, along with its key 

207 if the item is value-like. 

208 """ 

209 self.mark() 

210 with self._state as state: 

211 while True: 

212 c = self._current 

213 if c == "\n": 

214 # Found a newline; Return all whitespace found up to this point. 

215 self.inc() 

216 

217 return None, Whitespace(self.extract()) 

218 elif c in " \t\r": 

219 if c == "\r": 

220 with self._state(restore=True): 

221 if not self.inc() or self._current != "\n": 

222 raise self.parse_error( 

223 InvalidControlChar, CTRL_M, "documents" 

224 ) 

225 # Skip whitespace. 

226 if not self.inc(): 

227 return None, Whitespace(self.extract()) 

228 elif c == "#": 

229 # Found a comment, parse it 

230 indent = self.extract() 

231 cws, comment, trail = self._parse_comment_trail() 

232 

233 return None, Comment(Trivia(indent, cws, comment, trail)) 

234 elif c == "[": 

235 # Found a table, delegate to the calling function. 

236 return 

237 else: 

238 # Beginning of a KV pair. 

239 # Return to beginning of whitespace so it gets included 

240 # as indentation for the KV about to be parsed. 

241 state.restore = True 

242 break 

243 

244 return self._parse_key_value(True) 

245 

246 def _parse_comment_trail(self, parse_trail: bool = True) -> tuple[str, str, str]: 

247 """ 

248 Returns (comment_ws, comment, trail) 

249 If there is no comment, comment_ws and comment will 

250 simply be empty. 

251 """ 

252 if self.end(): 

253 return "", "", "" 

254 

255 comment = "" 

256 comment_ws = "" 

257 self.mark() 

258 

259 while True: 

260 c = self._current 

261 

262 if c == "\n": 

263 break 

264 elif c == "#": 

265 comment_ws = self.extract() 

266 

267 self.mark() 

268 self.inc() # Skip # 

269 

270 # The comment itself 

271 while not self.end() and not self._current.is_nl(): 

272 code = ord(self._current) 

273 if code == CHR_DEL or (code <= CTRL_CHAR_LIMIT and code != CTRL_I): 

274 raise self.parse_error(InvalidControlChar, code, "comments") 

275 

276 if not self.inc(): 

277 break 

278 

279 comment = self.extract() 

280 self.mark() 

281 

282 break 

283 elif c in " \t\r": 

284 if c == "\r": 

285 with self._state(restore=True): 

286 if not self.inc() or self._current != "\n": 

287 raise self.parse_error( 

288 InvalidControlChar, CTRL_M, "comments" 

289 ) 

290 self.inc() 

291 else: 

292 raise self.parse_error(UnexpectedCharError, c) 

293 

294 if self.end(): 

295 break 

296 

297 trail = "" 

298 if parse_trail: 

299 while self._current.is_spaces() and self.inc(): 

300 pass 

301 

302 if self._current == "\r": 

303 with self._state(restore=True): 

304 if not self.inc() or self._current != "\n": 

305 raise self.parse_error(InvalidControlChar, CTRL_M, "documents") 

306 self.inc() 

307 

308 if self._current == "\n": 

309 self.inc() 

310 

311 if self._idx != self._marker or self._current.is_ws(): 

312 trail = self.extract() 

313 

314 return comment_ws, comment, trail 

315 

316 def _parse_key_value(self, parse_comment: bool = False) -> tuple[Key, Item]: 

317 # Leading indent 

318 self.mark() 

319 

320 while self._current.is_spaces() and self.inc(): 

321 pass 

322 

323 indent = self.extract() 

324 

325 # Key 

326 key = self._parse_key() 

327 

328 self.mark() 

329 

330 found_equals = self._current == "=" 

331 while self._current.is_kv_sep() and self.inc(): 

332 if self._current == "=": 

333 if found_equals: 

334 raise self.parse_error(UnexpectedCharError, "=") 

335 else: 

336 found_equals = True 

337 if not found_equals: 

338 raise self.parse_error(UnexpectedCharError, self._current) 

339 

340 if not key.sep: 

341 key.sep = self.extract() 

342 else: 

343 key.sep += self.extract() 

344 

345 # Value 

346 val = self._parse_value() 

347 # Comment 

348 if parse_comment: 

349 cws, comment, trail = self._parse_comment_trail() 

350 meta = val.trivia 

351 if not meta.comment_ws: 

352 meta.comment_ws = cws 

353 

354 meta.comment = comment 

355 meta.trail = trail 

356 else: 

357 val.trivia.trail = "" 

358 

359 val.trivia.indent = indent 

360 

361 return key, val 

362 

363 def _parse_key(self) -> Key: 

364 """ 

365 Parses a Key at the current position; 

366 WS before the key must be exhausted first at the callsite. 

367 """ 

368 self.mark() 

369 while self._current.is_spaces() and self.inc(): 

370 # Skip any leading whitespace 

371 pass 

372 if self._current in "\"'": 

373 return self._parse_quoted_key() 

374 else: 

375 return self._parse_bare_key() 

376 

377 def _parse_quoted_key(self) -> Key: 

378 """ 

379 Parses a key enclosed in either single or double quotes. 

380 """ 

381 # Extract the leading whitespace 

382 original = self.extract() 

383 quote_style = self._current 

384 key_type = next((t for t in KeyType if t.value == quote_style), None) 

385 

386 if key_type is None: 

387 raise RuntimeError("Should not have entered _parse_quoted_key()") 

388 

389 key_str = self._parse_string( 

390 StringType.SLB if key_type == KeyType.Basic else StringType.SLL 

391 ) 

392 if key_str._t.is_multiline(): 

393 raise self.parse_error(UnexpectedCharError, key_str._t.value) 

394 original += key_str.as_string() 

395 self.mark() 

396 while self._current.is_spaces() and self.inc(): 

397 pass 

398 original += self.extract() 

399 key = SingleKey(str(key_str), t=key_type, sep="", original=original) 

400 if self._current == ".": 

401 self.inc() 

402 key = key.concat(self._parse_key()) 

403 

404 return key 

405 

406 def _parse_bare_key(self) -> Key: 

407 """ 

408 Parses a bare key. 

409 """ 

410 while ( 

411 self._current.is_bare_key_char() or self._current.is_spaces() 

412 ) and self.inc(): 

413 pass 

414 

415 original = self.extract() 

416 key = original.strip() 

417 if not key: 

418 # Empty key 

419 raise self.parse_error(EmptyKeyError) 

420 

421 if " " in key: 

422 # Bare key with spaces in it 

423 raise self.parse_error(ParseError, f'Invalid key "{key}"') 

424 

425 key = SingleKey(key, KeyType.Bare, "", original) 

426 

427 if self._current == ".": 

428 self.inc() 

429 key = key.concat(self._parse_key()) 

430 

431 return key 

432 

433 def _parse_value(self) -> Item: 

434 """ 

435 Attempts to parse a value at the current position. 

436 """ 

437 self.mark() 

438 c = self._current 

439 trivia = Trivia() 

440 

441 if c == StringType.SLB.value: 

442 return self._parse_basic_string() 

443 elif c == StringType.SLL.value: 

444 return self._parse_literal_string() 

445 elif c == BoolType.TRUE.value[0]: 

446 return self._parse_true() 

447 elif c == BoolType.FALSE.value[0]: 

448 return self._parse_false() 

449 elif c == "[": 

450 return self._parse_array() 

451 elif c == "{": 

452 return self._parse_inline_table() 

453 elif c in "+-" or self._peek(4) in { 

454 "+inf", 

455 "-inf", 

456 "inf", 

457 "+nan", 

458 "-nan", 

459 "nan", 

460 }: 

461 # Number 

462 while self._current not in " \t\n\r#,]}" and self.inc(): 

463 pass 

464 

465 raw = self.extract() 

466 

467 item = self._parse_number(raw, trivia) 

468 if item is not None: 

469 return item 

470 

471 raise self.parse_error(InvalidNumberError) 

472 elif c in string.digits: 

473 # Integer, Float, Date, Time or DateTime 

474 while self._current not in " \t\n\r#,]}" and self.inc(): 

475 pass 

476 

477 raw = self.extract() 

478 

479 m = RFC_3339_LOOSE.match(raw) 

480 if m: 

481 if m.group("date") and m.group("time"): 

482 # datetime 

483 try: 

484 dt = parse_rfc3339(raw) 

485 assert isinstance(dt, datetime.datetime) 

486 return DateTime( 

487 dt.year, 

488 dt.month, 

489 dt.day, 

490 dt.hour, 

491 dt.minute, 

492 dt.second, 

493 dt.microsecond, 

494 dt.tzinfo, 

495 trivia, 

496 raw, 

497 ) 

498 except ValueError: 

499 raise self.parse_error(InvalidDateTimeError) from None 

500 

501 if m.group("date"): 

502 try: 

503 dt = parse_rfc3339(raw) 

504 assert isinstance(dt, datetime.date) 

505 date = Date(dt.year, dt.month, dt.day, trivia, raw) 

506 self.mark() 

507 while self._current not in "\t\n\r#,]}" and self.inc(): 

508 pass 

509 

510 time_raw = self.extract() 

511 time_part = time_raw.rstrip() 

512 trivia.comment_ws = time_raw[len(time_part) :] 

513 if not time_part: 

514 return date 

515 

516 dt = parse_rfc3339(raw + time_part) 

517 assert isinstance(dt, datetime.datetime) 

518 return DateTime( 

519 dt.year, 

520 dt.month, 

521 dt.day, 

522 dt.hour, 

523 dt.minute, 

524 dt.second, 

525 dt.microsecond, 

526 dt.tzinfo, 

527 trivia, 

528 raw + time_part, 

529 ) 

530 except ValueError: 

531 raise self.parse_error(InvalidDateError) from None 

532 

533 if m.group("time"): 

534 try: 

535 t = parse_rfc3339(raw) 

536 assert isinstance(t, datetime.time) 

537 return Time( 

538 t.hour, 

539 t.minute, 

540 t.second, 

541 t.microsecond, 

542 t.tzinfo, 

543 trivia, 

544 raw, 

545 ) 

546 except ValueError: 

547 raise self.parse_error(InvalidTimeError) from None 

548 

549 item = self._parse_number(raw, trivia) 

550 if item is not None: 

551 return item 

552 

553 raise self.parse_error(InvalidNumberError) 

554 else: 

555 raise self.parse_error(UnexpectedCharError, c) 

556 

557 def _parse_true(self): 

558 return self._parse_bool(BoolType.TRUE) 

559 

560 def _parse_false(self): 

561 return self._parse_bool(BoolType.FALSE) 

562 

563 def _parse_bool(self, style: BoolType) -> Bool: 

564 with self._state: 

565 style = BoolType(style) 

566 

567 # only keep parsing for bool if the characters match the style 

568 # try consuming rest of chars in style 

569 for c in style: 

570 self.consume(c, min=1, max=1) 

571 

572 return Bool(style, Trivia()) 

573 

574 def _parse_array(self) -> Array: 

575 # Consume opening bracket, EOF here is an issue (middle of array) 

576 self.inc(exception=UnexpectedEofError) 

577 

578 elems: list[Item] = [] 

579 prev_value = None 

580 while True: 

581 # consume whitespace 

582 mark = self._idx 

583 self.consume(TOMLChar.SPACES + TOMLChar.NL) 

584 indent = self._src[mark : self._idx] 

585 newline = set(TOMLChar.NL) & set(indent) 

586 if newline: 

587 elems.append(Whitespace(indent)) 

588 continue 

589 

590 # consume comment 

591 if self._current == "#": 

592 cws, comment, trail = self._parse_comment_trail(parse_trail=False) 

593 elems.append(Comment(Trivia(indent, cws, comment, trail))) 

594 continue 

595 

596 # consume indent 

597 if indent: 

598 elems.append(Whitespace(indent)) 

599 continue 

600 

601 # consume value 

602 if not prev_value: 

603 try: 

604 elems.append(self._parse_value()) 

605 prev_value = True 

606 continue 

607 except UnexpectedCharError: 

608 pass 

609 

610 # consume comma 

611 if prev_value and self._current == ",": 

612 self.inc(exception=UnexpectedEofError) 

613 # If the previous item is Whitespace, add to it 

614 if isinstance(elems[-1], Whitespace): 

615 elems[-1]._s = elems[-1].s + "," 

616 else: 

617 elems.append(Whitespace(",")) 

618 prev_value = False 

619 continue 

620 

621 # consume closing bracket 

622 if self._current == "]": 

623 # consume closing bracket, EOF here doesn't matter 

624 self.inc() 

625 break 

626 

627 raise self.parse_error(UnexpectedCharError, self._current) 

628 

629 try: 

630 res = Array(elems, Trivia()) 

631 except ValueError: 

632 pass 

633 else: 

634 return res 

635 

636 def _parse_inline_table(self) -> InlineTable: 

637 # consume opening bracket, EOF here is an issue (middle of array) 

638 self.inc(exception=UnexpectedEofError) 

639 

640 elems = Container(True) 

641 expect_key = True 

642 while True: 

643 while True: 

644 # consume whitespace and newlines 

645 mark = self._idx 

646 self.consume(TOMLChar.SPACES + TOMLChar.NL) 

647 raw = self._src[mark : self._idx] 

648 if raw: 

649 elems.add(Whitespace(raw)) 

650 

651 if self._current != "#": 

652 break 

653 

654 cws, comment, trail = self._parse_comment_trail(parse_trail=False) 

655 elems.add(Comment(Trivia("", cws, comment, trail))) 

656 

657 if self._current == "}": 

658 # consume closing bracket, EOF here doesn't matter 

659 self.inc() 

660 break 

661 

662 if expect_key: 

663 if self._current == ",": 

664 raise self.parse_error(UnexpectedCharError, self._current) 

665 key, val = self._parse_key_value(False) 

666 elems.add(key, val) 

667 expect_key = False 

668 continue 

669 

670 if self._current != ",": 

671 raise self.parse_error(UnexpectedCharError, self._current) 

672 

673 elems.add(Whitespace(",")) 

674 # consume comma, EOF here is an issue (middle of inline table) 

675 self.inc(exception=UnexpectedEofError) 

676 expect_key = True 

677 

678 return InlineTable(elems, Trivia()) 

679 

680 def _parse_number(self, raw: str, trivia: Trivia) -> Item | None: 

681 # Leading zeros are not allowed 

682 sign = "" 

683 if raw.startswith(("+", "-")): 

684 sign = raw[0] 

685 raw = raw[1:] 

686 

687 if len(raw) > 1 and ( 

688 (raw.startswith("0") and not raw.startswith(("0.", "0o", "0x", "0b", "0e"))) 

689 or (sign and raw.startswith(".")) 

690 ): 

691 return None 

692 

693 if raw.startswith(("0o", "0x", "0b")) and sign: 

694 return None 

695 

696 digits = "[0-9]" 

697 base = 10 

698 if raw.startswith("0b"): 

699 digits = "[01]" 

700 base = 2 

701 elif raw.startswith("0o"): 

702 digits = "[0-7]" 

703 base = 8 

704 elif raw.startswith("0x"): 

705 digits = "[0-9a-f]" 

706 base = 16 

707 

708 # Underscores should be surrounded by digits 

709 clean = re.sub(f"(?i)(?<={digits})_(?={digits})", "", raw).lower() 

710 

711 if "_" in clean: 

712 return None 

713 

714 if clean.endswith(".") or ( 

715 not clean.startswith("0x") and clean.split("e", 1)[0].endswith(".") 

716 ): 

717 return None 

718 

719 try: 

720 return Integer(int(sign + clean, base), trivia, sign + raw) 

721 except ValueError: 

722 try: 

723 return Float(float(sign + clean), trivia, sign + raw) 

724 except ValueError: 

725 return None 

726 

727 def _parse_literal_string(self) -> String: 

728 with self._state: 

729 return self._parse_string(StringType.SLL) 

730 

731 def _parse_basic_string(self) -> String: 

732 with self._state: 

733 return self._parse_string(StringType.SLB) 

734 

735 def _parse_escaped_char(self, multiline): 

736 if multiline and self._current.is_ws(): 

737 # When the last non-whitespace character on a line is 

738 # a \, it will be trimmed along with all whitespace 

739 # (including newlines) up to the next non-whitespace 

740 # character or closing delimiter. 

741 # """\ 

742 # hello \ 

743 # world""" 

744 tmp = "" 

745 while self._current.is_ws(): 

746 tmp += self._current 

747 # consume the whitespace, EOF here is an issue 

748 # (middle of string) 

749 self.inc(exception=UnexpectedEofError) 

750 continue 

751 

752 # the escape followed by whitespace must have a newline 

753 # before any other chars 

754 if "\n" not in tmp: 

755 raise self.parse_error(InvalidCharInStringError, self._current) 

756 

757 return "" 

758 

759 if self._current in _escaped: 

760 c = _escaped[self._current] 

761 

762 # consume this char, EOF here is an issue (middle of string) 

763 self.inc(exception=UnexpectedEofError) 

764 

765 return c 

766 

767 if self._current in {"u", "U"}: 

768 # this needs to be a unicode 

769 u, ue = self._peek_unicode(self._current == "U") 

770 if u is not None: 

771 # consume the U char and the unicode value 

772 self.inc_n(len(ue) + 1) 

773 

774 return u 

775 

776 raise self.parse_error(InvalidUnicodeValueError) 

777 

778 if self._current == "x": 

779 h, he = self._peek_hex() 

780 if h is not None: 

781 # consume the x char and the hex value 

782 self.inc_n(len(he) + 1) 

783 return h 

784 

785 raise self.parse_error(InvalidUnicodeValueError) 

786 

787 raise self.parse_error(InvalidCharInStringError, self._current) 

788 

789 def _parse_string(self, delim: StringType) -> String: 

790 # only keep parsing for string if the current character matches the delim 

791 if self._current != delim.unit: 

792 raise self.parse_error( 

793 InternalParserError, 

794 f"Invalid character for string type {delim}", 

795 ) 

796 

797 # consume the opening/first delim, EOF here is an issue 

798 # (middle of string or middle of delim) 

799 self.inc(exception=UnexpectedEofError) 

800 

801 if self._current == delim.unit: 

802 # consume the closing/second delim, we do not care if EOF occurs as 

803 # that would simply imply an empty single line string 

804 if not self.inc() or self._current != delim.unit: 

805 # Empty string 

806 return String(delim, "", "", Trivia()) 

807 

808 # consume the third delim, EOF here is an issue (middle of string) 

809 self.inc(exception=UnexpectedEofError) 

810 

811 delim = delim.toggle() # convert delim to multi delim 

812 

813 self.mark() # to extract the original string with whitespace and all 

814 value = "" 

815 

816 # A newline immediately following the opening delimiter will be trimmed. 

817 if delim.is_multiline(): 

818 if self._current == "\n": 

819 # consume the newline, EOF here is an issue (middle of string) 

820 self.inc(exception=UnexpectedEofError) 

821 else: 

822 cur = self._current 

823 with self._state(restore=True): 

824 if self.inc(): 

825 cur += self._current 

826 if cur == "\r\n": 

827 self.inc_n(2, exception=UnexpectedEofError) 

828 

829 escaped = False # whether the previous key was ESCAPE 

830 while True: 

831 code = ord(self._current) 

832 if ( 

833 delim.is_singleline() 

834 and not escaped 

835 and (code == CHR_DEL or (code <= CTRL_CHAR_LIMIT and code != CTRL_I)) 

836 ) or ( 

837 delim.is_multiline() 

838 and not escaped 

839 and ( 

840 code == CHR_DEL 

841 or ( 

842 code <= CTRL_CHAR_LIMIT and code not in [CTRL_I, CTRL_J, CTRL_M] 

843 ) 

844 ) 

845 ): 

846 raise self.parse_error(InvalidControlChar, code, "strings") 

847 elif delim.is_multiline() and not escaped and self._current == "\r": 

848 with self._state(restore=True): 

849 if not self.inc() or self._current != "\n": 

850 raise self.parse_error(InvalidControlChar, CTRL_M, "strings") 

851 elif not escaped and self._current == delim.unit: 

852 # try to process current as a closing delim 

853 original = self.extract() 

854 

855 close = "" 

856 if delim.is_multiline(): 

857 # Consume the delimiters to see if we are at the end of the string 

858 close = "" 

859 while self._current == delim.unit: 

860 close += self._current 

861 self.inc() 

862 

863 if len(close) < 3: 

864 # Not a triple quote, leave in result as-is. 

865 # Adding back the characters we already consumed 

866 value += close 

867 continue 

868 

869 if len(close) == 3: 

870 # We are at the end of the string 

871 return String(delim, value, original, Trivia()) 

872 

873 if len(close) >= 6: 

874 raise self.parse_error(InvalidCharInStringError, self._current) 

875 

876 value += close[:-3] 

877 original += close[:-3] 

878 

879 return String(delim, value, original, Trivia()) 

880 else: 

881 # consume the closing delim, we do not care if EOF occurs as 

882 # that would simply imply the end of self._src 

883 self.inc() 

884 

885 return String(delim, value, original, Trivia()) 

886 elif delim.is_basic() and escaped: 

887 # attempt to parse the current char as an escaped value, an exception 

888 # is raised if this fails 

889 value += self._parse_escaped_char(delim.is_multiline()) 

890 

891 # no longer escaped 

892 escaped = False 

893 elif delim.is_basic() and self._current == "\\": 

894 # the next char is being escaped 

895 escaped = True 

896 

897 # consume this char, EOF here is an issue (middle of string) 

898 self.inc(exception=UnexpectedEofError) 

899 else: 

900 # this is either a literal string where we keep everything as is, 

901 # or this is not a special escaped char in a basic string 

902 value += self._current 

903 

904 # consume this char, EOF here is an issue (middle of string) 

905 self.inc(exception=UnexpectedEofError) 

906 

907 def _parse_table( 

908 self, parent_name: Key | None = None, parent: Table | None = None 

909 ) -> tuple[Key, Table | AoT]: 

910 """ 

911 Parses a table element. 

912 """ 

913 if self._current != "[": 

914 raise self.parse_error( 

915 InternalParserError, "_parse_table() called on non-bracket character." 

916 ) 

917 

918 indent = self.extract() 

919 self.inc() # Skip opening bracket 

920 

921 if self.end(): 

922 raise self.parse_error(UnexpectedEofError) 

923 

924 is_aot = False 

925 if self._current == "[": 

926 if not self.inc(): 

927 raise self.parse_error(UnexpectedEofError) 

928 

929 is_aot = True 

930 try: 

931 key = self._parse_key() 

932 except EmptyKeyError: 

933 raise self.parse_error(EmptyTableNameError) from None 

934 if self.end(): 

935 raise self.parse_error(UnexpectedEofError) 

936 elif self._current != "]": 

937 raise self.parse_error(UnexpectedCharError, self._current) 

938 

939 key.sep = "" 

940 full_key = key 

941 name_parts = tuple(key) 

942 if any(" " in part.key.strip() and part.is_bare() for part in name_parts): 

943 raise self.parse_error( 

944 ParseError, f'Invalid table name "{full_key.as_string()}"' 

945 ) 

946 

947 missing_table = False 

948 if parent_name: 

949 parent_name_parts = tuple(parent_name) 

950 else: 

951 parent_name_parts = () 

952 

953 if len(name_parts) > len(parent_name_parts) + 1: 

954 missing_table = True 

955 

956 name_parts = name_parts[len(parent_name_parts) :] 

957 

958 values = Container(True) 

959 

960 self.inc() # Skip closing bracket 

961 if is_aot: 

962 # TODO: Verify close bracket 

963 self.inc() 

964 

965 cws, comment, trail = self._parse_comment_trail() 

966 

967 result = Null() 

968 table = Table( 

969 values, 

970 Trivia(indent, cws, comment, trail), 

971 is_aot, 

972 name=name_parts[0].key if name_parts else key.key, 

973 display_name=full_key.as_string(), 

974 is_super_table=False, 

975 ) 

976 

977 if len(name_parts) > 1: 

978 if missing_table: 

979 # Missing super table 

980 # i.e. a table initialized like this: [foo.bar] 

981 # without initializing [foo] 

982 # 

983 # So we have to create the parent tables 

984 table = Table( 

985 Container(True), 

986 Trivia("", cws, comment, trail), 

987 is_aot and name_parts[0] in self._aot_stack, 

988 is_super_table=True, 

989 name=name_parts[0].key, 

990 ) 

991 

992 result = table 

993 key = name_parts[0] 

994 

995 for i, _name in enumerate(name_parts[1:]): 

996 child = table.get( 

997 _name, 

998 Table( 

999 Container(True), 

1000 Trivia(indent, cws, comment, trail), 

1001 is_aot and i == len(name_parts) - 2, 

1002 is_super_table=i < len(name_parts) - 2, 

1003 name=_name.key, 

1004 display_name=( 

1005 full_key.as_string() if i == len(name_parts) - 2 else None 

1006 ), 

1007 ), 

1008 ) 

1009 

1010 if is_aot and i == len(name_parts) - 2: 

1011 table.raw_append(_name, AoT([child], name=table.name, parsed=True)) 

1012 else: 

1013 table.raw_append(_name, child) 

1014 

1015 table = child 

1016 values = table.value 

1017 else: 

1018 if name_parts: 

1019 key = name_parts[0] 

1020 

1021 while not self.end(): 

1022 item = self._parse_item() 

1023 if item: 

1024 _key, item = item 

1025 if not self._merge_ws(item, values): 

1026 table.raw_append(_key, item) 

1027 else: 

1028 if self._current == "[": 

1029 _, key_next = self._peek_table() 

1030 

1031 if self._is_child(full_key, key_next): 

1032 key_next, table_next = self._parse_table(full_key, table) 

1033 

1034 table.raw_append(key_next, table_next) 

1035 

1036 # Picking up any sibling 

1037 while not self.end(): 

1038 _, key_next = self._peek_table() 

1039 

1040 if not self._is_child(full_key, key_next): 

1041 break 

1042 

1043 key_next, table_next = self._parse_table(full_key, table) 

1044 

1045 table.raw_append(key_next, table_next) 

1046 

1047 break 

1048 else: 

1049 raise self.parse_error( 

1050 InternalParserError, 

1051 "_parse_item() returned None on a non-bracket character.", 

1052 ) 

1053 table.value._validate_out_of_order_table() 

1054 if isinstance(result, Null): 

1055 result = table 

1056 

1057 if is_aot and (not self._aot_stack or full_key != self._aot_stack[-1]): 

1058 result = self._parse_aot(result, full_key) 

1059 

1060 return key, result 

1061 

1062 def _peek_table(self) -> tuple[bool, Key]: 

1063 """ 

1064 Peeks ahead non-intrusively by cloning then restoring the 

1065 initial state of the parser. 

1066 

1067 Returns the name of the table about to be parsed, 

1068 as well as whether it is part of an AoT. 

1069 """ 

1070 # we always want to restore after exiting this scope 

1071 with self._state(save_marker=True, restore=True): 

1072 if self._current != "[": 

1073 raise self.parse_error( 

1074 InternalParserError, 

1075 "_peek_table() entered on non-bracket character", 

1076 ) 

1077 

1078 # AoT 

1079 self.inc() 

1080 is_aot = False 

1081 if self._current == "[": 

1082 self.inc() 

1083 is_aot = True 

1084 try: 

1085 return is_aot, self._parse_key() 

1086 except EmptyKeyError: 

1087 raise self.parse_error(EmptyTableNameError) from None 

1088 

1089 def _parse_aot(self, first: Table, name_first: Key) -> AoT: 

1090 """ 

1091 Parses all siblings of the provided table first and bundles them into 

1092 an AoT. 

1093 """ 

1094 payload = [first] 

1095 self._aot_stack.append(name_first) 

1096 while not self.end(): 

1097 is_aot_next, name_next = self._peek_table() 

1098 if is_aot_next and name_next == name_first: 

1099 _, table = self._parse_table(name_first) 

1100 payload.append(table) 

1101 else: 

1102 break 

1103 

1104 self._aot_stack.pop() 

1105 

1106 return AoT(payload, parsed=True) 

1107 

1108 def _peek(self, n: int) -> str: 

1109 """ 

1110 Peeks ahead n characters. 

1111 

1112 n is the max number of characters that will be peeked. 

1113 """ 

1114 # we always want to restore after exiting this scope 

1115 with self._state(restore=True): 

1116 buf = "" 

1117 for _ in range(n): 

1118 if self._current not in " \t\n\r#,]}" + self._src.EOF: 

1119 buf += self._current 

1120 self.inc() 

1121 continue 

1122 

1123 break 

1124 return buf 

1125 

1126 def _peek_unicode(self, is_long: bool) -> tuple[str | None, str | None]: 

1127 """ 

1128 Peeks ahead non-intrusively by cloning then restoring the 

1129 initial state of the parser. 

1130 

1131 Returns the unicode value is it's a valid one else None. 

1132 """ 

1133 # we always want to restore after exiting this scope 

1134 with self._state(save_marker=True, restore=True): 

1135 if self._current not in {"u", "U"}: 

1136 raise self.parse_error( 

1137 InternalParserError, "_peek_unicode() entered on non-unicode value" 

1138 ) 

1139 

1140 self.inc() # Dropping prefix 

1141 self.mark() 

1142 

1143 if is_long: 

1144 chars = 8 

1145 else: 

1146 chars = 4 

1147 

1148 if not self.inc_n(chars): 

1149 value, extracted = None, None 

1150 else: 

1151 extracted = self.extract() 

1152 

1153 if extracted[0].lower() == "d" and extracted[1].strip("01234567"): 

1154 return None, None 

1155 

1156 try: 

1157 value = chr(int(extracted, 16)) 

1158 except (ValueError, OverflowError): 

1159 value = None 

1160 

1161 return value, extracted 

1162 

1163 def _peek_hex(self) -> tuple[str | None, str | None]: 

1164 with self._state(save_marker=True, restore=True): 

1165 if self._current != "x": 

1166 raise self.parse_error( 

1167 InternalParserError, "_peek_hex() entered on non-hex value" 

1168 ) 

1169 

1170 self.inc() # Dropping prefix 

1171 self.mark() 

1172 

1173 if not self.inc_n(2): 

1174 return None, None 

1175 

1176 extracted = self.extract() 

1177 if extracted.strip("0123456789abcdefABCDEF"): 

1178 return None, None 

1179 

1180 try: 

1181 value = chr(int(extracted, 16)) 

1182 except (ValueError, OverflowError): 

1183 value = None 

1184 

1185 return value, extracted