Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/tomlkit/parser.py: 98%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

640 statements  

1from __future__ import annotations 

2 

3import datetime 

4import re 

5import string 

6 

7from tomlkit._compat import decode 

8from tomlkit._utils import RFC_3339_LOOSE 

9from tomlkit._utils import _escaped 

10from tomlkit._utils import parse_rfc3339 

11from tomlkit.container import Container 

12from tomlkit.exceptions import EmptyKeyError 

13from tomlkit.exceptions import EmptyTableNameError 

14from tomlkit.exceptions import InternalParserError 

15from tomlkit.exceptions import InvalidCharInStringError 

16from tomlkit.exceptions import InvalidControlChar 

17from tomlkit.exceptions import InvalidDateError 

18from tomlkit.exceptions import InvalidDateTimeError 

19from tomlkit.exceptions import InvalidNumberError 

20from tomlkit.exceptions import InvalidTimeError 

21from tomlkit.exceptions import InvalidUnicodeValueError 

22from tomlkit.exceptions import ParseError 

23from tomlkit.exceptions import UnexpectedCharError 

24from tomlkit.exceptions import UnexpectedEofError 

25from tomlkit.items import AoT 

26from tomlkit.items import Array 

27from tomlkit.items import Bool 

28from tomlkit.items import BoolType 

29from tomlkit.items import Comment 

30from tomlkit.items import Date 

31from tomlkit.items import DateTime 

32from tomlkit.items import Float 

33from tomlkit.items import InlineTable 

34from tomlkit.items import Integer 

35from tomlkit.items import Item 

36from tomlkit.items import Key 

37from tomlkit.items import KeyType 

38from tomlkit.items import Null 

39from tomlkit.items import SingleKey 

40from tomlkit.items import String 

41from tomlkit.items import StringType 

42from tomlkit.items import Table 

43from tomlkit.items import Time 

44from tomlkit.items import Trivia 

45from tomlkit.items import Whitespace 

46from tomlkit.source import Source 

47from tomlkit.toml_char import TOMLChar 

48from tomlkit.toml_document import TOMLDocument 

49 

50 

51CTRL_I = 0x09 # Tab 

52CTRL_J = 0x0A # Line feed 

53CTRL_M = 0x0D # Carriage return 

54CTRL_CHAR_LIMIT = 0x1F 

55CHR_DEL = 0x7F 

56 

57 

58class Parser: 

59 """ 

60 Parser for TOML documents. 

61 """ 

62 

63 def __init__(self, string: str | bytes) -> None: 

64 # Input to parse 

65 self._src = Source(decode(string)) 

66 

67 self._aot_stack: list[Key] = [] 

68 

69 @property 

70 def _state(self): 

71 return self._src.state 

72 

73 @property 

74 def _idx(self): 

75 return self._src.idx 

76 

77 @property 

78 def _current(self): 

79 return self._src.current 

80 

81 @property 

82 def _marker(self): 

83 return self._src.marker 

84 

85 def extract(self) -> str: 

86 """ 

87 Extracts the value between marker and index 

88 """ 

89 return self._src.extract() 

90 

91 def inc(self, exception: type[ParseError] | None = None) -> bool: 

92 """ 

93 Increments the parser if the end of the input has not been reached. 

94 Returns whether or not it was able to advance. 

95 """ 

96 return self._src.inc(exception=exception) 

97 

98 def inc_n(self, n: int, exception: type[ParseError] | None = None) -> bool: 

99 """ 

100 Increments the parser by n characters 

101 if the end of the input has not been reached. 

102 """ 

103 return self._src.inc_n(n=n, exception=exception) 

104 

105 def consume(self, chars, min=0, max=-1): 

106 """ 

107 Consume chars until min/max is satisfied is valid. 

108 """ 

109 return self._src.consume(chars=chars, min=min, max=max) 

110 

111 def end(self) -> bool: 

112 """ 

113 Returns True if the parser has reached the end of the input. 

114 """ 

115 return self._src.end() 

116 

117 def mark(self) -> None: 

118 """ 

119 Sets the marker to the index's current position 

120 """ 

121 self._src.mark() 

122 

123 def parse_error(self, exception=ParseError, *args, **kwargs): 

124 """ 

125 Creates a generic "parse error" at the current position. 

126 """ 

127 return self._src.parse_error(exception, *args, **kwargs) 

128 

129 def parse(self) -> TOMLDocument: 

130 body = TOMLDocument(True) 

131 

132 # Take all keyvals outside of tables/AoT's. 

133 while not self.end(): 

134 # Break out if a table is found 

135 if self._current == "[": 

136 break 

137 

138 # Otherwise, take and append one KV 

139 item = self._parse_item() 

140 if not item: 

141 break 

142 

143 key, value = item 

144 if (key is not None and key.is_multi()) or not self._merge_ws(value, body): 

145 # We actually have a table 

146 try: 

147 body.append(key, value) 

148 except Exception as e: 

149 raise self.parse_error(ParseError, str(e)) from e 

150 

151 self.mark() 

152 

153 while not self.end(): 

154 key, value = self._parse_table() 

155 if isinstance(value, Table) and value.is_aot_element(): 

156 # This is just the first table in an AoT. Parse the rest of the array 

157 # along with it. 

158 value = self._parse_aot(value, key) 

159 

160 try: 

161 body.append(key, value) 

162 except Exception as e: 

163 raise self.parse_error(ParseError, str(e)) from e 

164 

165 body.parsing(False) 

166 

167 return body 

168 

169 def _merge_ws(self, item: Item, container: Container) -> bool: 

170 """ 

171 Merges the given Item with the last one currently in the given Container if 

172 both are whitespace items. 

173 

174 Returns True if the items were merged. 

175 """ 

176 last = container.last_item() 

177 if not last: 

178 return False 

179 

180 if not isinstance(item, Whitespace) or not isinstance(last, Whitespace): 

181 return False 

182 

183 start = self._idx - (len(last.s) + len(item.s)) 

184 container.body[-1] = ( 

185 container.body[-1][0], 

186 Whitespace(self._src[start : self._idx]), 

187 ) 

188 

189 return True 

190 

191 def _is_child(self, parent: Key, child: Key) -> bool: 

192 """ 

193 Returns whether a key is strictly a child of another key. 

194 AoT siblings are not considered children of one another. 

195 """ 

196 parent_parts = tuple(parent) 

197 child_parts = tuple(child) 

198 

199 if parent_parts == child_parts: 

200 return False 

201 

202 return parent_parts == child_parts[: len(parent_parts)] 

203 

204 def _parse_item(self) -> tuple[Key | None, Item] | None: 

205 """ 

206 Attempts to parse the next item and returns it, along with its key 

207 if the item is value-like. 

208 """ 

209 self.mark() 

210 with self._state as state: 

211 while True: 

212 c = self._current 

213 if c == "\n": 

214 # Found a newline; Return all whitespace found up to this point. 

215 self.inc() 

216 

217 return None, Whitespace(self.extract()) 

218 elif c in " \t\r": 

219 # Skip whitespace. 

220 if not self.inc(): 

221 return None, Whitespace(self.extract()) 

222 elif c == "#": 

223 # Found a comment, parse it 

224 indent = self.extract() 

225 cws, comment, trail = self._parse_comment_trail() 

226 

227 return None, Comment(Trivia(indent, cws, comment, trail)) 

228 elif c == "[": 

229 # Found a table, delegate to the calling function. 

230 return 

231 else: 

232 # Beginning of a KV pair. 

233 # Return to beginning of whitespace so it gets included 

234 # as indentation for the KV about to be parsed. 

235 state.restore = True 

236 break 

237 

238 return self._parse_key_value(True) 

239 

240 def _parse_comment_trail(self, parse_trail: bool = True) -> tuple[str, str, str]: 

241 """ 

242 Returns (comment_ws, comment, trail) 

243 If there is no comment, comment_ws and comment will 

244 simply be empty. 

245 """ 

246 if self.end(): 

247 return "", "", "" 

248 

249 comment = "" 

250 comment_ws = "" 

251 self.mark() 

252 

253 while True: 

254 c = self._current 

255 

256 if c == "\n": 

257 break 

258 elif c == "#": 

259 comment_ws = self.extract() 

260 

261 self.mark() 

262 self.inc() # Skip # 

263 

264 # The comment itself 

265 while not self.end() and not self._current.is_nl(): 

266 code = ord(self._current) 

267 if code == CHR_DEL or (code <= CTRL_CHAR_LIMIT and code != CTRL_I): 

268 raise self.parse_error(InvalidControlChar, code, "comments") 

269 

270 if not self.inc(): 

271 break 

272 

273 comment = self.extract() 

274 self.mark() 

275 

276 break 

277 elif c in " \t\r": 

278 self.inc() 

279 else: 

280 raise self.parse_error(UnexpectedCharError, c) 

281 

282 if self.end(): 

283 break 

284 

285 trail = "" 

286 if parse_trail: 

287 while self._current.is_spaces() and self.inc(): 

288 pass 

289 

290 if self._current == "\r": 

291 self.inc() 

292 

293 if self._current == "\n": 

294 self.inc() 

295 

296 if self._idx != self._marker or self._current.is_ws(): 

297 trail = self.extract() 

298 

299 return comment_ws, comment, trail 

300 

301 def _parse_key_value(self, parse_comment: bool = False) -> tuple[Key, Item]: 

302 # Leading indent 

303 self.mark() 

304 

305 while self._current.is_spaces() and self.inc(): 

306 pass 

307 

308 indent = self.extract() 

309 

310 # Key 

311 key = self._parse_key() 

312 

313 self.mark() 

314 

315 found_equals = self._current == "=" 

316 while self._current.is_kv_sep() and self.inc(): 

317 if self._current == "=": 

318 if found_equals: 

319 raise self.parse_error(UnexpectedCharError, "=") 

320 else: 

321 found_equals = True 

322 if not found_equals: 

323 raise self.parse_error(UnexpectedCharError, self._current) 

324 

325 if not key.sep: 

326 key.sep = self.extract() 

327 else: 

328 key.sep += self.extract() 

329 

330 # Value 

331 val = self._parse_value() 

332 # Comment 

333 if parse_comment: 

334 cws, comment, trail = self._parse_comment_trail() 

335 meta = val.trivia 

336 if not meta.comment_ws: 

337 meta.comment_ws = cws 

338 

339 meta.comment = comment 

340 meta.trail = trail 

341 else: 

342 val.trivia.trail = "" 

343 

344 val.trivia.indent = indent 

345 

346 return key, val 

347 

348 def _parse_key(self) -> Key: 

349 """ 

350 Parses a Key at the current position; 

351 WS before the key must be exhausted first at the callsite. 

352 """ 

353 self.mark() 

354 while self._current.is_spaces() and self.inc(): 

355 # Skip any leading whitespace 

356 pass 

357 if self._current in "\"'": 

358 return self._parse_quoted_key() 

359 else: 

360 return self._parse_bare_key() 

361 

362 def _parse_quoted_key(self) -> Key: 

363 """ 

364 Parses a key enclosed in either single or double quotes. 

365 """ 

366 # Extract the leading whitespace 

367 original = self.extract() 

368 quote_style = self._current 

369 key_type = next((t for t in KeyType if t.value == quote_style), None) 

370 

371 if key_type is None: 

372 raise RuntimeError("Should not have entered _parse_quoted_key()") 

373 

374 key_str = self._parse_string( 

375 StringType.SLB if key_type == KeyType.Basic else StringType.SLL 

376 ) 

377 if key_str._t.is_multiline(): 

378 raise self.parse_error(UnexpectedCharError, key_str._t.value) 

379 original += key_str.as_string() 

380 self.mark() 

381 while self._current.is_spaces() and self.inc(): 

382 pass 

383 original += self.extract() 

384 key = SingleKey(str(key_str), t=key_type, sep="", original=original) 

385 if self._current == ".": 

386 self.inc() 

387 key = key.concat(self._parse_key()) 

388 

389 return key 

390 

391 def _parse_bare_key(self) -> Key: 

392 """ 

393 Parses a bare key. 

394 """ 

395 while ( 

396 self._current.is_bare_key_char() or self._current.is_spaces() 

397 ) and self.inc(): 

398 pass 

399 

400 original = self.extract() 

401 key = original.strip() 

402 if not key: 

403 # Empty key 

404 raise self.parse_error(EmptyKeyError) 

405 

406 if " " in key: 

407 # Bare key with spaces in it 

408 raise self.parse_error(ParseError, f'Invalid key "{key}"') 

409 

410 key = SingleKey(key, KeyType.Bare, "", original) 

411 

412 if self._current == ".": 

413 self.inc() 

414 key = key.concat(self._parse_key()) 

415 

416 return key 

417 

418 def _parse_value(self) -> Item: 

419 """ 

420 Attempts to parse a value at the current position. 

421 """ 

422 self.mark() 

423 c = self._current 

424 trivia = Trivia() 

425 

426 if c == StringType.SLB.value: 

427 return self._parse_basic_string() 

428 elif c == StringType.SLL.value: 

429 return self._parse_literal_string() 

430 elif c == BoolType.TRUE.value[0]: 

431 return self._parse_true() 

432 elif c == BoolType.FALSE.value[0]: 

433 return self._parse_false() 

434 elif c == "[": 

435 return self._parse_array() 

436 elif c == "{": 

437 return self._parse_inline_table() 

438 elif c in "+-" or self._peek(4) in { 

439 "+inf", 

440 "-inf", 

441 "inf", 

442 "+nan", 

443 "-nan", 

444 "nan", 

445 }: 

446 # Number 

447 while self._current not in " \t\n\r#,]}" and self.inc(): 

448 pass 

449 

450 raw = self.extract() 

451 

452 item = self._parse_number(raw, trivia) 

453 if item is not None: 

454 return item 

455 

456 raise self.parse_error(InvalidNumberError) 

457 elif c in string.digits: 

458 # Integer, Float, Date, Time or DateTime 

459 while self._current not in " \t\n\r#,]}" and self.inc(): 

460 pass 

461 

462 raw = self.extract() 

463 

464 m = RFC_3339_LOOSE.match(raw) 

465 if m: 

466 if m.group(1) and m.group(5): 

467 # datetime 

468 try: 

469 dt = parse_rfc3339(raw) 

470 assert isinstance(dt, datetime.datetime) 

471 return DateTime( 

472 dt.year, 

473 dt.month, 

474 dt.day, 

475 dt.hour, 

476 dt.minute, 

477 dt.second, 

478 dt.microsecond, 

479 dt.tzinfo, 

480 trivia, 

481 raw, 

482 ) 

483 except ValueError: 

484 raise self.parse_error(InvalidDateTimeError) from None 

485 

486 if m.group(1): 

487 try: 

488 dt = parse_rfc3339(raw) 

489 assert isinstance(dt, datetime.date) 

490 date = Date(dt.year, dt.month, dt.day, trivia, raw) 

491 self.mark() 

492 while self._current not in "\t\n\r#,]}" and self.inc(): 

493 pass 

494 

495 time_raw = self.extract() 

496 time_part = time_raw.rstrip() 

497 trivia.comment_ws = time_raw[len(time_part) :] 

498 if not time_part: 

499 return date 

500 

501 dt = parse_rfc3339(raw + time_part) 

502 assert isinstance(dt, datetime.datetime) 

503 return DateTime( 

504 dt.year, 

505 dt.month, 

506 dt.day, 

507 dt.hour, 

508 dt.minute, 

509 dt.second, 

510 dt.microsecond, 

511 dt.tzinfo, 

512 trivia, 

513 raw + time_part, 

514 ) 

515 except ValueError: 

516 raise self.parse_error(InvalidDateError) from None 

517 

518 if m.group(5): 

519 try: 

520 t = parse_rfc3339(raw) 

521 assert isinstance(t, datetime.time) 

522 return Time( 

523 t.hour, 

524 t.minute, 

525 t.second, 

526 t.microsecond, 

527 t.tzinfo, 

528 trivia, 

529 raw, 

530 ) 

531 except ValueError: 

532 raise self.parse_error(InvalidTimeError) from None 

533 

534 item = self._parse_number(raw, trivia) 

535 if item is not None: 

536 return item 

537 

538 raise self.parse_error(InvalidNumberError) 

539 else: 

540 raise self.parse_error(UnexpectedCharError, c) 

541 

542 def _parse_true(self): 

543 return self._parse_bool(BoolType.TRUE) 

544 

545 def _parse_false(self): 

546 return self._parse_bool(BoolType.FALSE) 

547 

548 def _parse_bool(self, style: BoolType) -> Bool: 

549 with self._state: 

550 style = BoolType(style) 

551 

552 # only keep parsing for bool if the characters match the style 

553 # try consuming rest of chars in style 

554 for c in style: 

555 self.consume(c, min=1, max=1) 

556 

557 return Bool(style, Trivia()) 

558 

559 def _parse_array(self) -> Array: 

560 # Consume opening bracket, EOF here is an issue (middle of array) 

561 self.inc(exception=UnexpectedEofError) 

562 

563 elems: list[Item] = [] 

564 prev_value = None 

565 while True: 

566 # consume whitespace 

567 mark = self._idx 

568 self.consume(TOMLChar.SPACES + TOMLChar.NL) 

569 indent = self._src[mark : self._idx] 

570 newline = set(TOMLChar.NL) & set(indent) 

571 if newline: 

572 elems.append(Whitespace(indent)) 

573 continue 

574 

575 # consume comment 

576 if self._current == "#": 

577 cws, comment, trail = self._parse_comment_trail(parse_trail=False) 

578 elems.append(Comment(Trivia(indent, cws, comment, trail))) 

579 continue 

580 

581 # consume indent 

582 if indent: 

583 elems.append(Whitespace(indent)) 

584 continue 

585 

586 # consume value 

587 if not prev_value: 

588 try: 

589 elems.append(self._parse_value()) 

590 prev_value = True 

591 continue 

592 except UnexpectedCharError: 

593 pass 

594 

595 # consume comma 

596 if prev_value and self._current == ",": 

597 self.inc(exception=UnexpectedEofError) 

598 # If the previous item is Whitespace, add to it 

599 if isinstance(elems[-1], Whitespace): 

600 elems[-1]._s = elems[-1].s + "," 

601 else: 

602 elems.append(Whitespace(",")) 

603 prev_value = False 

604 continue 

605 

606 # consume closing bracket 

607 if self._current == "]": 

608 # consume closing bracket, EOF here doesn't matter 

609 self.inc() 

610 break 

611 

612 raise self.parse_error(UnexpectedCharError, self._current) 

613 

614 try: 

615 res = Array(elems, Trivia()) 

616 except ValueError: 

617 pass 

618 else: 

619 return res 

620 

621 def _parse_inline_table(self) -> InlineTable: 

622 # consume opening bracket, EOF here is an issue (middle of array) 

623 self.inc(exception=UnexpectedEofError) 

624 

625 elems = Container(True) 

626 trailing_comma = None 

627 while True: 

628 # consume leading whitespace 

629 mark = self._idx 

630 self.consume(TOMLChar.SPACES) 

631 raw = self._src[mark : self._idx] 

632 if raw: 

633 elems.add(Whitespace(raw)) 

634 

635 if not trailing_comma: 

636 # None: empty inline table 

637 # False: previous key-value pair was not followed by a comma 

638 if self._current == "}": 

639 # consume closing bracket, EOF here doesn't matter 

640 self.inc() 

641 break 

642 

643 if trailing_comma is False or ( 

644 trailing_comma is None and self._current == "," 

645 ): 

646 # Either the previous key-value pair was not followed by a comma 

647 # or the table has an unexpected leading comma. 

648 raise self.parse_error(UnexpectedCharError, self._current) 

649 else: 

650 # True: previous key-value pair was followed by a comma 

651 if self._current == "}" or self._current == ",": 

652 raise self.parse_error(UnexpectedCharError, self._current) 

653 

654 key, val = self._parse_key_value(False) 

655 elems.add(key, val) 

656 

657 # consume trailing whitespace 

658 mark = self._idx 

659 self.consume(TOMLChar.SPACES) 

660 raw = self._src[mark : self._idx] 

661 if raw: 

662 elems.add(Whitespace(raw)) 

663 

664 # consume trailing comma 

665 trailing_comma = self._current == "," 

666 if trailing_comma: 

667 # consume closing bracket, EOF here is an issue (middle of inline table) 

668 self.inc(exception=UnexpectedEofError) 

669 

670 return InlineTable(elems, Trivia()) 

671 

672 def _parse_number(self, raw: str, trivia: Trivia) -> Item | None: 

673 # Leading zeros are not allowed 

674 sign = "" 

675 if raw.startswith(("+", "-")): 

676 sign = raw[0] 

677 raw = raw[1:] 

678 

679 if len(raw) > 1 and ( 

680 (raw.startswith("0") and not raw.startswith(("0.", "0o", "0x", "0b", "0e"))) 

681 or (sign and raw.startswith(".")) 

682 ): 

683 return None 

684 

685 if raw.startswith(("0o", "0x", "0b")) and sign: 

686 return None 

687 

688 digits = "[0-9]" 

689 base = 10 

690 if raw.startswith("0b"): 

691 digits = "[01]" 

692 base = 2 

693 elif raw.startswith("0o"): 

694 digits = "[0-7]" 

695 base = 8 

696 elif raw.startswith("0x"): 

697 digits = "[0-9a-f]" 

698 base = 16 

699 

700 # Underscores should be surrounded by digits 

701 clean = re.sub(f"(?i)(?<={digits})_(?={digits})", "", raw).lower() 

702 

703 if "_" in clean: 

704 return None 

705 

706 if clean.endswith(".") or ( 

707 not clean.startswith("0x") and clean.split("e", 1)[0].endswith(".") 

708 ): 

709 return None 

710 

711 try: 

712 return Integer(int(sign + clean, base), trivia, sign + raw) 

713 except ValueError: 

714 try: 

715 return Float(float(sign + clean), trivia, sign + raw) 

716 except ValueError: 

717 return None 

718 

719 def _parse_literal_string(self) -> String: 

720 with self._state: 

721 return self._parse_string(StringType.SLL) 

722 

723 def _parse_basic_string(self) -> String: 

724 with self._state: 

725 return self._parse_string(StringType.SLB) 

726 

727 def _parse_escaped_char(self, multiline): 

728 if multiline and self._current.is_ws(): 

729 # When the last non-whitespace character on a line is 

730 # a \, it will be trimmed along with all whitespace 

731 # (including newlines) up to the next non-whitespace 

732 # character or closing delimiter. 

733 # """\ 

734 # hello \ 

735 # world""" 

736 tmp = "" 

737 while self._current.is_ws(): 

738 tmp += self._current 

739 # consume the whitespace, EOF here is an issue 

740 # (middle of string) 

741 self.inc(exception=UnexpectedEofError) 

742 continue 

743 

744 # the escape followed by whitespace must have a newline 

745 # before any other chars 

746 if "\n" not in tmp: 

747 raise self.parse_error(InvalidCharInStringError, self._current) 

748 

749 return "" 

750 

751 if self._current in _escaped: 

752 c = _escaped[self._current] 

753 

754 # consume this char, EOF here is an issue (middle of string) 

755 self.inc(exception=UnexpectedEofError) 

756 

757 return c 

758 

759 if self._current in {"u", "U"}: 

760 # this needs to be a unicode 

761 u, ue = self._peek_unicode(self._current == "U") 

762 if u is not None: 

763 # consume the U char and the unicode value 

764 self.inc_n(len(ue) + 1) 

765 

766 return u 

767 

768 raise self.parse_error(InvalidUnicodeValueError) 

769 

770 raise self.parse_error(InvalidCharInStringError, self._current) 

771 

772 def _parse_string(self, delim: StringType) -> String: 

773 # only keep parsing for string if the current character matches the delim 

774 if self._current != delim.unit: 

775 raise self.parse_error( 

776 InternalParserError, 

777 f"Invalid character for string type {delim}", 

778 ) 

779 

780 # consume the opening/first delim, EOF here is an issue 

781 # (middle of string or middle of delim) 

782 self.inc(exception=UnexpectedEofError) 

783 

784 if self._current == delim.unit: 

785 # consume the closing/second delim, we do not care if EOF occurs as 

786 # that would simply imply an empty single line string 

787 if not self.inc() or self._current != delim.unit: 

788 # Empty string 

789 return String(delim, "", "", Trivia()) 

790 

791 # consume the third delim, EOF here is an issue (middle of string) 

792 self.inc(exception=UnexpectedEofError) 

793 

794 delim = delim.toggle() # convert delim to multi delim 

795 

796 self.mark() # to extract the original string with whitespace and all 

797 value = "" 

798 

799 # A newline immediately following the opening delimiter will be trimmed. 

800 if delim.is_multiline(): 

801 if self._current == "\n": 

802 # consume the newline, EOF here is an issue (middle of string) 

803 self.inc(exception=UnexpectedEofError) 

804 else: 

805 cur = self._current 

806 with self._state(restore=True): 

807 if self.inc(): 

808 cur += self._current 

809 if cur == "\r\n": 

810 self.inc_n(2, exception=UnexpectedEofError) 

811 

812 escaped = False # whether the previous key was ESCAPE 

813 while True: 

814 code = ord(self._current) 

815 if ( 

816 delim.is_singleline() 

817 and not escaped 

818 and (code == CHR_DEL or (code <= CTRL_CHAR_LIMIT and code != CTRL_I)) 

819 ) or ( 

820 delim.is_multiline() 

821 and not escaped 

822 and ( 

823 code == CHR_DEL 

824 or ( 

825 code <= CTRL_CHAR_LIMIT and code not in [CTRL_I, CTRL_J, CTRL_M] 

826 ) 

827 ) 

828 ): 

829 raise self.parse_error(InvalidControlChar, code, "strings") 

830 elif not escaped and self._current == delim.unit: 

831 # try to process current as a closing delim 

832 original = self.extract() 

833 

834 close = "" 

835 if delim.is_multiline(): 

836 # Consume the delimiters to see if we are at the end of the string 

837 close = "" 

838 while self._current == delim.unit: 

839 close += self._current 

840 self.inc() 

841 

842 if len(close) < 3: 

843 # Not a triple quote, leave in result as-is. 

844 # Adding back the characters we already consumed 

845 value += close 

846 continue 

847 

848 if len(close) == 3: 

849 # We are at the end of the string 

850 return String(delim, value, original, Trivia()) 

851 

852 if len(close) >= 6: 

853 raise self.parse_error(InvalidCharInStringError, self._current) 

854 

855 value += close[:-3] 

856 original += close[:-3] 

857 

858 return String(delim, value, original, Trivia()) 

859 else: 

860 # consume the closing delim, we do not care if EOF occurs as 

861 # that would simply imply the end of self._src 

862 self.inc() 

863 

864 return String(delim, value, original, Trivia()) 

865 elif delim.is_basic() and escaped: 

866 # attempt to parse the current char as an escaped value, an exception 

867 # is raised if this fails 

868 value += self._parse_escaped_char(delim.is_multiline()) 

869 

870 # no longer escaped 

871 escaped = False 

872 elif delim.is_basic() and self._current == "\\": 

873 # the next char is being escaped 

874 escaped = True 

875 

876 # consume this char, EOF here is an issue (middle of string) 

877 self.inc(exception=UnexpectedEofError) 

878 else: 

879 # this is either a literal string where we keep everything as is, 

880 # or this is not a special escaped char in a basic string 

881 value += self._current 

882 

883 # consume this char, EOF here is an issue (middle of string) 

884 self.inc(exception=UnexpectedEofError) 

885 

886 def _parse_table( 

887 self, parent_name: Key | None = None, parent: Table | None = None 

888 ) -> tuple[Key, Table | AoT]: 

889 """ 

890 Parses a table element. 

891 """ 

892 if self._current != "[": 

893 raise self.parse_error( 

894 InternalParserError, "_parse_table() called on non-bracket character." 

895 ) 

896 

897 indent = self.extract() 

898 self.inc() # Skip opening bracket 

899 

900 if self.end(): 

901 raise self.parse_error(UnexpectedEofError) 

902 

903 is_aot = False 

904 if self._current == "[": 

905 if not self.inc(): 

906 raise self.parse_error(UnexpectedEofError) 

907 

908 is_aot = True 

909 try: 

910 key = self._parse_key() 

911 except EmptyKeyError: 

912 raise self.parse_error(EmptyTableNameError) from None 

913 if self.end(): 

914 raise self.parse_error(UnexpectedEofError) 

915 elif self._current != "]": 

916 raise self.parse_error(UnexpectedCharError, self._current) 

917 

918 key.sep = "" 

919 full_key = key 

920 name_parts = tuple(key) 

921 if any(" " in part.key.strip() and part.is_bare() for part in name_parts): 

922 raise self.parse_error( 

923 ParseError, f'Invalid table name "{full_key.as_string()}"' 

924 ) 

925 

926 missing_table = False 

927 if parent_name: 

928 parent_name_parts = tuple(parent_name) 

929 else: 

930 parent_name_parts = () 

931 

932 if len(name_parts) > len(parent_name_parts) + 1: 

933 missing_table = True 

934 

935 name_parts = name_parts[len(parent_name_parts) :] 

936 

937 values = Container(True) 

938 

939 self.inc() # Skip closing bracket 

940 if is_aot: 

941 # TODO: Verify close bracket 

942 self.inc() 

943 

944 cws, comment, trail = self._parse_comment_trail() 

945 

946 result = Null() 

947 table = Table( 

948 values, 

949 Trivia(indent, cws, comment, trail), 

950 is_aot, 

951 name=name_parts[0].key if name_parts else key.key, 

952 display_name=full_key.as_string(), 

953 is_super_table=False, 

954 ) 

955 

956 if len(name_parts) > 1: 

957 if missing_table: 

958 # Missing super table 

959 # i.e. a table initialized like this: [foo.bar] 

960 # without initializing [foo] 

961 # 

962 # So we have to create the parent tables 

963 table = Table( 

964 Container(True), 

965 Trivia("", cws, comment, trail), 

966 is_aot and name_parts[0] in self._aot_stack, 

967 is_super_table=True, 

968 name=name_parts[0].key, 

969 ) 

970 

971 result = table 

972 key = name_parts[0] 

973 

974 for i, _name in enumerate(name_parts[1:]): 

975 child = table.get( 

976 _name, 

977 Table( 

978 Container(True), 

979 Trivia(indent, cws, comment, trail), 

980 is_aot and i == len(name_parts) - 2, 

981 is_super_table=i < len(name_parts) - 2, 

982 name=_name.key, 

983 display_name=( 

984 full_key.as_string() if i == len(name_parts) - 2 else None 

985 ), 

986 ), 

987 ) 

988 

989 if is_aot and i == len(name_parts) - 2: 

990 table.raw_append(_name, AoT([child], name=table.name, parsed=True)) 

991 else: 

992 table.raw_append(_name, child) 

993 

994 table = child 

995 values = table.value 

996 else: 

997 if name_parts: 

998 key = name_parts[0] 

999 

1000 while not self.end(): 

1001 item = self._parse_item() 

1002 if item: 

1003 _key, item = item 

1004 if not self._merge_ws(item, values): 

1005 table.raw_append(_key, item) 

1006 else: 

1007 if self._current == "[": 

1008 _, key_next = self._peek_table() 

1009 

1010 if self._is_child(full_key, key_next): 

1011 key_next, table_next = self._parse_table(full_key, table) 

1012 

1013 table.raw_append(key_next, table_next) 

1014 

1015 # Picking up any sibling 

1016 while not self.end(): 

1017 _, key_next = self._peek_table() 

1018 

1019 if not self._is_child(full_key, key_next): 

1020 break 

1021 

1022 key_next, table_next = self._parse_table(full_key, table) 

1023 

1024 table.raw_append(key_next, table_next) 

1025 

1026 break 

1027 else: 

1028 raise self.parse_error( 

1029 InternalParserError, 

1030 "_parse_item() returned None on a non-bracket character.", 

1031 ) 

1032 table.value._validate_out_of_order_table() 

1033 if isinstance(result, Null): 

1034 result = table 

1035 

1036 if is_aot and (not self._aot_stack or full_key != self._aot_stack[-1]): 

1037 result = self._parse_aot(result, full_key) 

1038 

1039 return key, result 

1040 

1041 def _peek_table(self) -> tuple[bool, Key]: 

1042 """ 

1043 Peeks ahead non-intrusively by cloning then restoring the 

1044 initial state of the parser. 

1045 

1046 Returns the name of the table about to be parsed, 

1047 as well as whether it is part of an AoT. 

1048 """ 

1049 # we always want to restore after exiting this scope 

1050 with self._state(save_marker=True, restore=True): 

1051 if self._current != "[": 

1052 raise self.parse_error( 

1053 InternalParserError, 

1054 "_peek_table() entered on non-bracket character", 

1055 ) 

1056 

1057 # AoT 

1058 self.inc() 

1059 is_aot = False 

1060 if self._current == "[": 

1061 self.inc() 

1062 is_aot = True 

1063 try: 

1064 return is_aot, self._parse_key() 

1065 except EmptyKeyError: 

1066 raise self.parse_error(EmptyTableNameError) from None 

1067 

1068 def _parse_aot(self, first: Table, name_first: Key) -> AoT: 

1069 """ 

1070 Parses all siblings of the provided table first and bundles them into 

1071 an AoT. 

1072 """ 

1073 payload = [first] 

1074 self._aot_stack.append(name_first) 

1075 while not self.end(): 

1076 is_aot_next, name_next = self._peek_table() 

1077 if is_aot_next and name_next == name_first: 

1078 _, table = self._parse_table(name_first) 

1079 payload.append(table) 

1080 else: 

1081 break 

1082 

1083 self._aot_stack.pop() 

1084 

1085 return AoT(payload, parsed=True) 

1086 

1087 def _peek(self, n: int) -> str: 

1088 """ 

1089 Peeks ahead n characters. 

1090 

1091 n is the max number of characters that will be peeked. 

1092 """ 

1093 # we always want to restore after exiting this scope 

1094 with self._state(restore=True): 

1095 buf = "" 

1096 for _ in range(n): 

1097 if self._current not in " \t\n\r#,]}" + self._src.EOF: 

1098 buf += self._current 

1099 self.inc() 

1100 continue 

1101 

1102 break 

1103 return buf 

1104 

1105 def _peek_unicode(self, is_long: bool) -> tuple[str | None, str | None]: 

1106 """ 

1107 Peeks ahead non-intrusively by cloning then restoring the 

1108 initial state of the parser. 

1109 

1110 Returns the unicode value is it's a valid one else None. 

1111 """ 

1112 # we always want to restore after exiting this scope 

1113 with self._state(save_marker=True, restore=True): 

1114 if self._current not in {"u", "U"}: 

1115 raise self.parse_error( 

1116 InternalParserError, "_peek_unicode() entered on non-unicode value" 

1117 ) 

1118 

1119 self.inc() # Dropping prefix 

1120 self.mark() 

1121 

1122 if is_long: 

1123 chars = 8 

1124 else: 

1125 chars = 4 

1126 

1127 if not self.inc_n(chars): 

1128 value, extracted = None, None 

1129 else: 

1130 extracted = self.extract() 

1131 

1132 if extracted[0].lower() == "d" and extracted[1].strip("01234567"): 

1133 return None, None 

1134 

1135 try: 

1136 value = chr(int(extracted, 16)) 

1137 except (ValueError, OverflowError): 

1138 value = None 

1139 

1140 return value, extracted