Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tomlkit/parser.py: 98%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

633 statements  

1from __future__ import annotations 

2 

3import datetime 

4import re 

5import string 

6 

7from tomlkit._compat import decode 

8from tomlkit._utils import RFC_3339_LOOSE 

9from tomlkit._utils import _escaped 

10from tomlkit._utils import parse_rfc3339 

11from tomlkit.container import Container 

12from tomlkit.exceptions import EmptyKeyError 

13from tomlkit.exceptions import EmptyTableNameError 

14from tomlkit.exceptions import InternalParserError 

15from tomlkit.exceptions import InvalidCharInStringError 

16from tomlkit.exceptions import InvalidControlChar 

17from tomlkit.exceptions import InvalidDateError 

18from tomlkit.exceptions import InvalidDateTimeError 

19from tomlkit.exceptions import InvalidNumberError 

20from tomlkit.exceptions import InvalidTimeError 

21from tomlkit.exceptions import InvalidUnicodeValueError 

22from tomlkit.exceptions import ParseError 

23from tomlkit.exceptions import UnexpectedCharError 

24from tomlkit.exceptions import UnexpectedEofError 

25from tomlkit.items import AoT 

26from tomlkit.items import Array 

27from tomlkit.items import Bool 

28from tomlkit.items import BoolType 

29from tomlkit.items import Comment 

30from tomlkit.items import Date 

31from tomlkit.items import DateTime 

32from tomlkit.items import Float 

33from tomlkit.items import InlineTable 

34from tomlkit.items import Integer 

35from tomlkit.items import Item 

36from tomlkit.items import Key 

37from tomlkit.items import KeyType 

38from tomlkit.items import Null 

39from tomlkit.items import SingleKey 

40from tomlkit.items import String 

41from tomlkit.items import StringType 

42from tomlkit.items import Table 

43from tomlkit.items import Time 

44from tomlkit.items import Trivia 

45from tomlkit.items import Whitespace 

46from tomlkit.source import Source 

47from tomlkit.toml_char import TOMLChar 

48from tomlkit.toml_document import TOMLDocument 

49 

50 

51CTRL_I = 0x09 # Tab 

52CTRL_J = 0x0A # Line feed 

53CTRL_M = 0x0D # Carriage return 

54CTRL_CHAR_LIMIT = 0x1F 

55CHR_DEL = 0x7F 

56 

57 

58class Parser: 

59 """ 

60 Parser for TOML documents. 

61 """ 

62 

63 def __init__(self, string: str | bytes) -> None: 

64 # Input to parse 

65 self._src = Source(decode(string)) 

66 

67 self._aot_stack: list[Key] = [] 

68 

69 @property 

70 def _state(self): 

71 return self._src.state 

72 

73 @property 

74 def _idx(self): 

75 return self._src.idx 

76 

77 @property 

78 def _current(self): 

79 return self._src.current 

80 

81 @property 

82 def _marker(self): 

83 return self._src.marker 

84 

85 def extract(self) -> str: 

86 """ 

87 Extracts the value between marker and index 

88 """ 

89 return self._src.extract() 

90 

91 def inc(self, exception: type[ParseError] | None = None) -> bool: 

92 """ 

93 Increments the parser if the end of the input has not been reached. 

94 Returns whether or not it was able to advance. 

95 """ 

96 return self._src.inc(exception=exception) 

97 

98 def inc_n(self, n: int, exception: type[ParseError] | None = None) -> bool: 

99 """ 

100 Increments the parser by n characters 

101 if the end of the input has not been reached. 

102 """ 

103 return self._src.inc_n(n=n, exception=exception) 

104 

105 def consume(self, chars, min=0, max=-1): 

106 """ 

107 Consume chars until min/max is satisfied is valid. 

108 """ 

109 return self._src.consume(chars=chars, min=min, max=max) 

110 

111 def end(self) -> bool: 

112 """ 

113 Returns True if the parser has reached the end of the input. 

114 """ 

115 return self._src.end() 

116 

117 def mark(self) -> None: 

118 """ 

119 Sets the marker to the index's current position 

120 """ 

121 self._src.mark() 

122 

123 def parse_error(self, exception=ParseError, *args, **kwargs): 

124 """ 

125 Creates a generic "parse error" at the current position. 

126 """ 

127 return self._src.parse_error(exception, *args, **kwargs) 

128 

129 def parse(self) -> TOMLDocument: 

130 body = TOMLDocument(True) 

131 

132 # Take all keyvals outside of tables/AoT's. 

133 while not self.end(): 

134 # Break out if a table is found 

135 if self._current == "[": 

136 break 

137 

138 # Otherwise, take and append one KV 

139 item = self._parse_item() 

140 if not item: 

141 break 

142 

143 key, value = item 

144 if (key is not None and key.is_multi()) or not self._merge_ws(value, body): 

145 # We actually have a table 

146 try: 

147 body.append(key, value) 

148 except Exception as e: 

149 raise self.parse_error(ParseError, str(e)) from e 

150 

151 self.mark() 

152 

153 while not self.end(): 

154 key, value = self._parse_table() 

155 if isinstance(value, Table) and value.is_aot_element(): 

156 # This is just the first table in an AoT. Parse the rest of the array 

157 # along with it. 

158 value = self._parse_aot(value, key) 

159 

160 try: 

161 body.append(key, value) 

162 except Exception as e: 

163 raise self.parse_error(ParseError, str(e)) from e 

164 

165 body.parsing(False) 

166 

167 return body 

168 

169 def _merge_ws(self, item: Item, container: Container) -> bool: 

170 """ 

171 Merges the given Item with the last one currently in the given Container if 

172 both are whitespace items. 

173 

174 Returns True if the items were merged. 

175 """ 

176 last = container.last_item() 

177 if not last: 

178 return False 

179 

180 if not isinstance(item, Whitespace) or not isinstance(last, Whitespace): 

181 return False 

182 

183 start = self._idx - (len(last.s) + len(item.s)) 

184 container.body[-1] = ( 

185 container.body[-1][0], 

186 Whitespace(self._src[start : self._idx]), 

187 ) 

188 

189 return True 

190 

191 def _is_child(self, parent: Key, child: Key) -> bool: 

192 """ 

193 Returns whether a key is strictly a child of another key. 

194 AoT siblings are not considered children of one another. 

195 """ 

196 parent_parts = tuple(parent) 

197 child_parts = tuple(child) 

198 

199 if parent_parts == child_parts: 

200 return False 

201 

202 return parent_parts == child_parts[: len(parent_parts)] 

203 

204 def _parse_item(self) -> tuple[Key | None, Item] | None: 

205 """ 

206 Attempts to parse the next item and returns it, along with its key 

207 if the item is value-like. 

208 """ 

209 self.mark() 

210 with self._state as state: 

211 while True: 

212 c = self._current 

213 if c == "\n": 

214 # Found a newline; Return all whitespace found up to this point. 

215 self.inc() 

216 

217 return None, Whitespace(self.extract()) 

218 elif c in " \t\r": 

219 # Skip whitespace. 

220 if not self.inc(): 

221 return None, Whitespace(self.extract()) 

222 elif c == "#": 

223 # Found a comment, parse it 

224 indent = self.extract() 

225 cws, comment, trail = self._parse_comment_trail() 

226 

227 return None, Comment(Trivia(indent, cws, comment, trail)) 

228 elif c == "[": 

229 # Found a table, delegate to the calling function. 

230 return 

231 else: 

232 # Beginning of a KV pair. 

233 # Return to beginning of whitespace so it gets included 

234 # as indentation for the KV about to be parsed. 

235 state.restore = True 

236 break 

237 

238 return self._parse_key_value(True) 

239 

240 def _parse_comment_trail(self, parse_trail: bool = True) -> tuple[str, str, str]: 

241 """ 

242 Returns (comment_ws, comment, trail) 

243 If there is no comment, comment_ws and comment will 

244 simply be empty. 

245 """ 

246 if self.end(): 

247 return "", "", "" 

248 

249 comment = "" 

250 comment_ws = "" 

251 self.mark() 

252 

253 while True: 

254 c = self._current 

255 

256 if c == "\n": 

257 break 

258 elif c == "#": 

259 comment_ws = self.extract() 

260 

261 self.mark() 

262 self.inc() # Skip # 

263 

264 # The comment itself 

265 while not self.end() and not self._current.is_nl(): 

266 code = ord(self._current) 

267 if code == CHR_DEL or code <= CTRL_CHAR_LIMIT and code != CTRL_I: 

268 raise self.parse_error(InvalidControlChar, code, "comments") 

269 

270 if not self.inc(): 

271 break 

272 

273 comment = self.extract() 

274 self.mark() 

275 

276 break 

277 elif c in " \t\r": 

278 self.inc() 

279 else: 

280 raise self.parse_error(UnexpectedCharError, c) 

281 

282 if self.end(): 

283 break 

284 

285 trail = "" 

286 if parse_trail: 

287 while self._current.is_spaces() and self.inc(): 

288 pass 

289 

290 if self._current == "\r": 

291 self.inc() 

292 

293 if self._current == "\n": 

294 self.inc() 

295 

296 if self._idx != self._marker or self._current.is_ws(): 

297 trail = self.extract() 

298 

299 return comment_ws, comment, trail 

300 

301 def _parse_key_value(self, parse_comment: bool = False) -> tuple[Key, Item]: 

302 # Leading indent 

303 self.mark() 

304 

305 while self._current.is_spaces() and self.inc(): 

306 pass 

307 

308 indent = self.extract() 

309 

310 # Key 

311 key = self._parse_key() 

312 

313 self.mark() 

314 

315 found_equals = self._current == "=" 

316 while self._current.is_kv_sep() and self.inc(): 

317 if self._current == "=": 

318 if found_equals: 

319 raise self.parse_error(UnexpectedCharError, "=") 

320 else: 

321 found_equals = True 

322 if not found_equals: 

323 raise self.parse_error(UnexpectedCharError, self._current) 

324 

325 if not key.sep: 

326 key.sep = self.extract() 

327 else: 

328 key.sep += self.extract() 

329 

330 # Value 

331 val = self._parse_value() 

332 # Comment 

333 if parse_comment: 

334 cws, comment, trail = self._parse_comment_trail() 

335 meta = val.trivia 

336 if not meta.comment_ws: 

337 meta.comment_ws = cws 

338 

339 meta.comment = comment 

340 meta.trail = trail 

341 else: 

342 val.trivia.trail = "" 

343 

344 val.trivia.indent = indent 

345 

346 return key, val 

347 

348 def _parse_key(self) -> Key: 

349 """ 

350 Parses a Key at the current position; 

351 WS before the key must be exhausted first at the callsite. 

352 """ 

353 self.mark() 

354 while self._current.is_spaces() and self.inc(): 

355 # Skip any leading whitespace 

356 pass 

357 if self._current in "\"'": 

358 return self._parse_quoted_key() 

359 else: 

360 return self._parse_bare_key() 

361 

362 def _parse_quoted_key(self) -> Key: 

363 """ 

364 Parses a key enclosed in either single or double quotes. 

365 """ 

366 # Extract the leading whitespace 

367 original = self.extract() 

368 quote_style = self._current 

369 key_type = next((t for t in KeyType if t.value == quote_style), None) 

370 

371 if key_type is None: 

372 raise RuntimeError("Should not have entered _parse_quoted_key()") 

373 

374 key_str = self._parse_string( 

375 StringType.SLB if key_type == KeyType.Basic else StringType.SLL 

376 ) 

377 if key_str._t.is_multiline(): 

378 raise self.parse_error(UnexpectedCharError, key_str._t.value) 

379 original += key_str.as_string() 

380 self.mark() 

381 while self._current.is_spaces() and self.inc(): 

382 pass 

383 original += self.extract() 

384 key = SingleKey(str(key_str), t=key_type, sep="", original=original) 

385 if self._current == ".": 

386 self.inc() 

387 key = key.concat(self._parse_key()) 

388 

389 return key 

390 

391 def _parse_bare_key(self) -> Key: 

392 """ 

393 Parses a bare key. 

394 """ 

395 while ( 

396 self._current.is_bare_key_char() or self._current.is_spaces() 

397 ) and self.inc(): 

398 pass 

399 

400 original = self.extract() 

401 key = original.strip() 

402 if not key: 

403 # Empty key 

404 raise self.parse_error(EmptyKeyError) 

405 

406 if " " in key: 

407 # Bare key with spaces in it 

408 raise self.parse_error(ParseError, f'Invalid key "{key}"') 

409 

410 key = SingleKey(key, KeyType.Bare, "", original) 

411 

412 if self._current == ".": 

413 self.inc() 

414 key = key.concat(self._parse_key()) 

415 

416 return key 

417 

418 def _parse_value(self) -> Item: 

419 """ 

420 Attempts to parse a value at the current position. 

421 """ 

422 self.mark() 

423 c = self._current 

424 trivia = Trivia() 

425 

426 if c == StringType.SLB.value: 

427 return self._parse_basic_string() 

428 elif c == StringType.SLL.value: 

429 return self._parse_literal_string() 

430 elif c == BoolType.TRUE.value[0]: 

431 return self._parse_true() 

432 elif c == BoolType.FALSE.value[0]: 

433 return self._parse_false() 

434 elif c == "[": 

435 return self._parse_array() 

436 elif c == "{": 

437 return self._parse_inline_table() 

438 elif c in "+-" or self._peek(4) in { 

439 "+inf", 

440 "-inf", 

441 "inf", 

442 "+nan", 

443 "-nan", 

444 "nan", 

445 }: 

446 # Number 

447 while self._current not in " \t\n\r#,]}" and self.inc(): 

448 pass 

449 

450 raw = self.extract() 

451 

452 item = self._parse_number(raw, trivia) 

453 if item is not None: 

454 return item 

455 

456 raise self.parse_error(InvalidNumberError) 

457 elif c in string.digits: 

458 # Integer, Float, Date, Time or DateTime 

459 while self._current not in " \t\n\r#,]}" and self.inc(): 

460 pass 

461 

462 raw = self.extract() 

463 

464 m = RFC_3339_LOOSE.match(raw) 

465 if m: 

466 if m.group(1) and m.group(5): 

467 # datetime 

468 try: 

469 dt = parse_rfc3339(raw) 

470 assert isinstance(dt, datetime.datetime) 

471 return DateTime( 

472 dt.year, 

473 dt.month, 

474 dt.day, 

475 dt.hour, 

476 dt.minute, 

477 dt.second, 

478 dt.microsecond, 

479 dt.tzinfo, 

480 trivia, 

481 raw, 

482 ) 

483 except ValueError: 

484 raise self.parse_error(InvalidDateTimeError) from None 

485 

486 if m.group(1): 

487 try: 

488 dt = parse_rfc3339(raw) 

489 assert isinstance(dt, datetime.date) 

490 date = Date(dt.year, dt.month, dt.day, trivia, raw) 

491 self.mark() 

492 while self._current not in "\t\n\r#,]}" and self.inc(): 

493 pass 

494 

495 time_raw = self.extract() 

496 time_part = time_raw.rstrip() 

497 trivia.comment_ws = time_raw[len(time_part) :] 

498 if not time_part: 

499 return date 

500 

501 dt = parse_rfc3339(raw + time_part) 

502 assert isinstance(dt, datetime.datetime) 

503 return DateTime( 

504 dt.year, 

505 dt.month, 

506 dt.day, 

507 dt.hour, 

508 dt.minute, 

509 dt.second, 

510 dt.microsecond, 

511 dt.tzinfo, 

512 trivia, 

513 raw + time_part, 

514 ) 

515 except ValueError: 

516 raise self.parse_error(InvalidDateError) from None 

517 

518 if m.group(5): 

519 try: 

520 t = parse_rfc3339(raw) 

521 assert isinstance(t, datetime.time) 

522 return Time( 

523 t.hour, 

524 t.minute, 

525 t.second, 

526 t.microsecond, 

527 t.tzinfo, 

528 trivia, 

529 raw, 

530 ) 

531 except ValueError: 

532 raise self.parse_error(InvalidTimeError) from None 

533 

534 item = self._parse_number(raw, trivia) 

535 if item is not None: 

536 return item 

537 

538 raise self.parse_error(InvalidNumberError) 

539 else: 

540 raise self.parse_error(UnexpectedCharError, c) 

541 

542 def _parse_true(self): 

543 return self._parse_bool(BoolType.TRUE) 

544 

545 def _parse_false(self): 

546 return self._parse_bool(BoolType.FALSE) 

547 

548 def _parse_bool(self, style: BoolType) -> Bool: 

549 with self._state: 

550 style = BoolType(style) 

551 

552 # only keep parsing for bool if the characters match the style 

553 # try consuming rest of chars in style 

554 for c in style: 

555 self.consume(c, min=1, max=1) 

556 

557 return Bool(style, Trivia()) 

558 

559 def _parse_array(self) -> Array: 

560 # Consume opening bracket, EOF here is an issue (middle of array) 

561 self.inc(exception=UnexpectedEofError) 

562 

563 elems: list[Item] = [] 

564 prev_value = None 

565 while True: 

566 # consume whitespace 

567 mark = self._idx 

568 self.consume(TOMLChar.SPACES + TOMLChar.NL) 

569 indent = self._src[mark : self._idx] 

570 newline = set(TOMLChar.NL) & set(indent) 

571 if newline: 

572 elems.append(Whitespace(indent)) 

573 continue 

574 

575 # consume comment 

576 if self._current == "#": 

577 cws, comment, trail = self._parse_comment_trail(parse_trail=False) 

578 elems.append(Comment(Trivia(indent, cws, comment, trail))) 

579 continue 

580 

581 # consume indent 

582 if indent: 

583 elems.append(Whitespace(indent)) 

584 continue 

585 

586 # consume value 

587 if not prev_value: 

588 try: 

589 elems.append(self._parse_value()) 

590 prev_value = True 

591 continue 

592 except UnexpectedCharError: 

593 pass 

594 

595 # consume comma 

596 if prev_value and self._current == ",": 

597 self.inc(exception=UnexpectedEofError) 

598 elems.append(Whitespace(",")) 

599 prev_value = False 

600 continue 

601 

602 # consume closing bracket 

603 if self._current == "]": 

604 # consume closing bracket, EOF here doesn't matter 

605 self.inc() 

606 break 

607 

608 raise self.parse_error(UnexpectedCharError, self._current) 

609 

610 try: 

611 res = Array(elems, Trivia()) 

612 except ValueError: 

613 pass 

614 else: 

615 return res 

616 

617 def _parse_inline_table(self) -> InlineTable: 

618 # consume opening bracket, EOF here is an issue (middle of array) 

619 self.inc(exception=UnexpectedEofError) 

620 

621 elems = Container(True) 

622 trailing_comma = None 

623 while True: 

624 # consume leading whitespace 

625 mark = self._idx 

626 self.consume(TOMLChar.SPACES) 

627 raw = self._src[mark : self._idx] 

628 if raw: 

629 elems.add(Whitespace(raw)) 

630 

631 if not trailing_comma: 

632 # None: empty inline table 

633 # False: previous key-value pair was not followed by a comma 

634 if self._current == "}": 

635 # consume closing bracket, EOF here doesn't matter 

636 self.inc() 

637 break 

638 

639 if ( 

640 trailing_comma is False 

641 or trailing_comma is None 

642 and self._current == "," 

643 ): 

644 # Either the previous key-value pair was not followed by a comma 

645 # or the table has an unexpected leading comma. 

646 raise self.parse_error(UnexpectedCharError, self._current) 

647 else: 

648 # True: previous key-value pair was followed by a comma 

649 if self._current == "}" or self._current == ",": 

650 raise self.parse_error(UnexpectedCharError, self._current) 

651 

652 key, val = self._parse_key_value(False) 

653 elems.add(key, val) 

654 

655 # consume trailing whitespace 

656 mark = self._idx 

657 self.consume(TOMLChar.SPACES) 

658 raw = self._src[mark : self._idx] 

659 if raw: 

660 elems.add(Whitespace(raw)) 

661 

662 # consume trailing comma 

663 trailing_comma = self._current == "," 

664 if trailing_comma: 

665 # consume closing bracket, EOF here is an issue (middle of inline table) 

666 self.inc(exception=UnexpectedEofError) 

667 

668 return InlineTable(elems, Trivia()) 

669 

670 def _parse_number(self, raw: str, trivia: Trivia) -> Item | None: 

671 # Leading zeros are not allowed 

672 sign = "" 

673 if raw.startswith(("+", "-")): 

674 sign = raw[0] 

675 raw = raw[1:] 

676 

677 if len(raw) > 1 and ( 

678 raw.startswith("0") 

679 and not raw.startswith(("0.", "0o", "0x", "0b", "0e")) 

680 or sign 

681 and raw.startswith(".") 

682 ): 

683 return None 

684 

685 if raw.startswith(("0o", "0x", "0b")) and sign: 

686 return None 

687 

688 digits = "[0-9]" 

689 base = 10 

690 if raw.startswith("0b"): 

691 digits = "[01]" 

692 base = 2 

693 elif raw.startswith("0o"): 

694 digits = "[0-7]" 

695 base = 8 

696 elif raw.startswith("0x"): 

697 digits = "[0-9a-f]" 

698 base = 16 

699 

700 # Underscores should be surrounded by digits 

701 clean = re.sub(f"(?i)(?<={digits})_(?={digits})", "", raw).lower() 

702 

703 if "_" in clean: 

704 return None 

705 

706 if ( 

707 clean.endswith(".") 

708 or not clean.startswith("0x") 

709 and clean.split("e", 1)[0].endswith(".") 

710 ): 

711 return None 

712 

713 try: 

714 return Integer(int(sign + clean, base), trivia, sign + raw) 

715 except ValueError: 

716 try: 

717 return Float(float(sign + clean), trivia, sign + raw) 

718 except ValueError: 

719 return None 

720 

721 def _parse_literal_string(self) -> String: 

722 with self._state: 

723 return self._parse_string(StringType.SLL) 

724 

725 def _parse_basic_string(self) -> String: 

726 with self._state: 

727 return self._parse_string(StringType.SLB) 

728 

729 def _parse_escaped_char(self, multiline): 

730 if multiline and self._current.is_ws(): 

731 # When the last non-whitespace character on a line is 

732 # a \, it will be trimmed along with all whitespace 

733 # (including newlines) up to the next non-whitespace 

734 # character or closing delimiter. 

735 # """\ 

736 # hello \ 

737 # world""" 

738 tmp = "" 

739 while self._current.is_ws(): 

740 tmp += self._current 

741 # consume the whitespace, EOF here is an issue 

742 # (middle of string) 

743 self.inc(exception=UnexpectedEofError) 

744 continue 

745 

746 # the escape followed by whitespace must have a newline 

747 # before any other chars 

748 if "\n" not in tmp: 

749 raise self.parse_error(InvalidCharInStringError, self._current) 

750 

751 return "" 

752 

753 if self._current in _escaped: 

754 c = _escaped[self._current] 

755 

756 # consume this char, EOF here is an issue (middle of string) 

757 self.inc(exception=UnexpectedEofError) 

758 

759 return c 

760 

761 if self._current in {"u", "U"}: 

762 # this needs to be a unicode 

763 u, ue = self._peek_unicode(self._current == "U") 

764 if u is not None: 

765 # consume the U char and the unicode value 

766 self.inc_n(len(ue) + 1) 

767 

768 return u 

769 

770 raise self.parse_error(InvalidUnicodeValueError) 

771 

772 raise self.parse_error(InvalidCharInStringError, self._current) 

773 

774 def _parse_string(self, delim: StringType) -> String: 

775 # only keep parsing for string if the current character matches the delim 

776 if self._current != delim.unit: 

777 raise self.parse_error( 

778 InternalParserError, 

779 f"Invalid character for string type {delim}", 

780 ) 

781 

782 # consume the opening/first delim, EOF here is an issue 

783 # (middle of string or middle of delim) 

784 self.inc(exception=UnexpectedEofError) 

785 

786 if self._current == delim.unit: 

787 # consume the closing/second delim, we do not care if EOF occurs as 

788 # that would simply imply an empty single line string 

789 if not self.inc() or self._current != delim.unit: 

790 # Empty string 

791 return String(delim, "", "", Trivia()) 

792 

793 # consume the third delim, EOF here is an issue (middle of string) 

794 self.inc(exception=UnexpectedEofError) 

795 

796 delim = delim.toggle() # convert delim to multi delim 

797 

798 self.mark() # to extract the original string with whitespace and all 

799 value = "" 

800 

801 # A newline immediately following the opening delimiter will be trimmed. 

802 if delim.is_multiline(): 

803 if self._current == "\n": 

804 # consume the newline, EOF here is an issue (middle of string) 

805 self.inc(exception=UnexpectedEofError) 

806 else: 

807 cur = self._current 

808 with self._state(restore=True): 

809 if self.inc(): 

810 cur += self._current 

811 if cur == "\r\n": 

812 self.inc_n(2, exception=UnexpectedEofError) 

813 

814 escaped = False # whether the previous key was ESCAPE 

815 while True: 

816 code = ord(self._current) 

817 if ( 

818 delim.is_singleline() 

819 and not escaped 

820 and (code == CHR_DEL or code <= CTRL_CHAR_LIMIT and code != CTRL_I) 

821 ) or ( 

822 delim.is_multiline() 

823 and not escaped 

824 and ( 

825 code == CHR_DEL 

826 or code <= CTRL_CHAR_LIMIT 

827 and code not in [CTRL_I, CTRL_J, CTRL_M] 

828 ) 

829 ): 

830 raise self.parse_error(InvalidControlChar, code, "strings") 

831 elif not escaped and self._current == delim.unit: 

832 # try to process current as a closing delim 

833 original = self.extract() 

834 

835 close = "" 

836 if delim.is_multiline(): 

837 # Consume the delimiters to see if we are at the end of the string 

838 close = "" 

839 while self._current == delim.unit: 

840 close += self._current 

841 self.inc() 

842 

843 if len(close) < 3: 

844 # Not a triple quote, leave in result as-is. 

845 # Adding back the characters we already consumed 

846 value += close 

847 continue 

848 

849 if len(close) == 3: 

850 # We are at the end of the string 

851 return String(delim, value, original, Trivia()) 

852 

853 if len(close) >= 6: 

854 raise self.parse_error(InvalidCharInStringError, self._current) 

855 

856 value += close[:-3] 

857 original += close[:-3] 

858 

859 return String(delim, value, original, Trivia()) 

860 else: 

861 # consume the closing delim, we do not care if EOF occurs as 

862 # that would simply imply the end of self._src 

863 self.inc() 

864 

865 return String(delim, value, original, Trivia()) 

866 elif delim.is_basic() and escaped: 

867 # attempt to parse the current char as an escaped value, an exception 

868 # is raised if this fails 

869 value += self._parse_escaped_char(delim.is_multiline()) 

870 

871 # no longer escaped 

872 escaped = False 

873 elif delim.is_basic() and self._current == "\\": 

874 # the next char is being escaped 

875 escaped = True 

876 

877 # consume this char, EOF here is an issue (middle of string) 

878 self.inc(exception=UnexpectedEofError) 

879 else: 

880 # this is either a literal string where we keep everything as is, 

881 # or this is not a special escaped char in a basic string 

882 value += self._current 

883 

884 # consume this char, EOF here is an issue (middle of string) 

885 self.inc(exception=UnexpectedEofError) 

886 

887 def _parse_table( 

888 self, parent_name: Key | None = None, parent: Table | None = None 

889 ) -> tuple[Key, Table | AoT]: 

890 """ 

891 Parses a table element. 

892 """ 

893 if self._current != "[": 

894 raise self.parse_error( 

895 InternalParserError, "_parse_table() called on non-bracket character." 

896 ) 

897 

898 indent = self.extract() 

899 self.inc() # Skip opening bracket 

900 

901 if self.end(): 

902 raise self.parse_error(UnexpectedEofError) 

903 

904 is_aot = False 

905 if self._current == "[": 

906 if not self.inc(): 

907 raise self.parse_error(UnexpectedEofError) 

908 

909 is_aot = True 

910 try: 

911 key = self._parse_key() 

912 except EmptyKeyError: 

913 raise self.parse_error(EmptyTableNameError) from None 

914 if self.end(): 

915 raise self.parse_error(UnexpectedEofError) 

916 elif self._current != "]": 

917 raise self.parse_error(UnexpectedCharError, self._current) 

918 

919 key.sep = "" 

920 full_key = key 

921 name_parts = tuple(key) 

922 if any(" " in part.key.strip() and part.is_bare() for part in name_parts): 

923 raise self.parse_error( 

924 ParseError, f'Invalid table name "{full_key.as_string()}"' 

925 ) 

926 

927 missing_table = False 

928 if parent_name: 

929 parent_name_parts = tuple(parent_name) 

930 else: 

931 parent_name_parts = () 

932 

933 if len(name_parts) > len(parent_name_parts) + 1: 

934 missing_table = True 

935 

936 name_parts = name_parts[len(parent_name_parts) :] 

937 

938 values = Container(True) 

939 

940 self.inc() # Skip closing bracket 

941 if is_aot: 

942 # TODO: Verify close bracket 

943 self.inc() 

944 

945 cws, comment, trail = self._parse_comment_trail() 

946 

947 result = Null() 

948 table = Table( 

949 values, 

950 Trivia(indent, cws, comment, trail), 

951 is_aot, 

952 name=name_parts[0].key if name_parts else key.key, 

953 display_name=full_key.as_string(), 

954 is_super_table=False, 

955 ) 

956 

957 if len(name_parts) > 1: 

958 if missing_table: 

959 # Missing super table 

960 # i.e. a table initialized like this: [foo.bar] 

961 # without initializing [foo] 

962 # 

963 # So we have to create the parent tables 

964 table = Table( 

965 Container(True), 

966 Trivia("", cws, comment, trail), 

967 is_aot and name_parts[0] in self._aot_stack, 

968 is_super_table=True, 

969 name=name_parts[0].key, 

970 ) 

971 

972 result = table 

973 key = name_parts[0] 

974 

975 for i, _name in enumerate(name_parts[1:]): 

976 child = table.get( 

977 _name, 

978 Table( 

979 Container(True), 

980 Trivia(indent, cws, comment, trail), 

981 is_aot and i == len(name_parts) - 2, 

982 is_super_table=i < len(name_parts) - 2, 

983 name=_name.key, 

984 display_name=( 

985 full_key.as_string() if i == len(name_parts) - 2 else None 

986 ), 

987 ), 

988 ) 

989 

990 if is_aot and i == len(name_parts) - 2: 

991 table.raw_append(_name, AoT([child], name=table.name, parsed=True)) 

992 else: 

993 table.raw_append(_name, child) 

994 

995 table = child 

996 values = table.value 

997 else: 

998 if name_parts: 

999 key = name_parts[0] 

1000 

1001 while not self.end(): 

1002 item = self._parse_item() 

1003 if item: 

1004 _key, item = item 

1005 if not self._merge_ws(item, values): 

1006 table.raw_append(_key, item) 

1007 else: 

1008 if self._current == "[": 

1009 _, key_next = self._peek_table() 

1010 

1011 if self._is_child(full_key, key_next): 

1012 key_next, table_next = self._parse_table(full_key, table) 

1013 

1014 table.raw_append(key_next, table_next) 

1015 

1016 # Picking up any sibling 

1017 while not self.end(): 

1018 _, key_next = self._peek_table() 

1019 

1020 if not self._is_child(full_key, key_next): 

1021 break 

1022 

1023 key_next, table_next = self._parse_table(full_key, table) 

1024 

1025 table.raw_append(key_next, table_next) 

1026 

1027 break 

1028 else: 

1029 raise self.parse_error( 

1030 InternalParserError, 

1031 "_parse_item() returned None on a non-bracket character.", 

1032 ) 

1033 table.value._validate_out_of_order_table() 

1034 if isinstance(result, Null): 

1035 result = table 

1036 

1037 if is_aot and (not self._aot_stack or full_key != self._aot_stack[-1]): 

1038 result = self._parse_aot(result, full_key) 

1039 

1040 return key, result 

1041 

1042 def _peek_table(self) -> tuple[bool, Key]: 

1043 """ 

1044 Peeks ahead non-intrusively by cloning then restoring the 

1045 initial state of the parser. 

1046 

1047 Returns the name of the table about to be parsed, 

1048 as well as whether it is part of an AoT. 

1049 """ 

1050 # we always want to restore after exiting this scope 

1051 with self._state(save_marker=True, restore=True): 

1052 if self._current != "[": 

1053 raise self.parse_error( 

1054 InternalParserError, 

1055 "_peek_table() entered on non-bracket character", 

1056 ) 

1057 

1058 # AoT 

1059 self.inc() 

1060 is_aot = False 

1061 if self._current == "[": 

1062 self.inc() 

1063 is_aot = True 

1064 try: 

1065 return is_aot, self._parse_key() 

1066 except EmptyKeyError: 

1067 raise self.parse_error(EmptyTableNameError) from None 

1068 

1069 def _parse_aot(self, first: Table, name_first: Key) -> AoT: 

1070 """ 

1071 Parses all siblings of the provided table first and bundles them into 

1072 an AoT. 

1073 """ 

1074 payload = [first] 

1075 self._aot_stack.append(name_first) 

1076 while not self.end(): 

1077 is_aot_next, name_next = self._peek_table() 

1078 if is_aot_next and name_next == name_first: 

1079 _, table = self._parse_table(name_first) 

1080 payload.append(table) 

1081 else: 

1082 break 

1083 

1084 self._aot_stack.pop() 

1085 

1086 return AoT(payload, parsed=True) 

1087 

1088 def _peek(self, n: int) -> str: 

1089 """ 

1090 Peeks ahead n characters. 

1091 

1092 n is the max number of characters that will be peeked. 

1093 """ 

1094 # we always want to restore after exiting this scope 

1095 with self._state(restore=True): 

1096 buf = "" 

1097 for _ in range(n): 

1098 if self._current not in " \t\n\r#,]}" + self._src.EOF: 

1099 buf += self._current 

1100 self.inc() 

1101 continue 

1102 

1103 break 

1104 return buf 

1105 

1106 def _peek_unicode(self, is_long: bool) -> tuple[str | None, str | None]: 

1107 """ 

1108 Peeks ahead non-intrusively by cloning then restoring the 

1109 initial state of the parser. 

1110 

1111 Returns the unicode value is it's a valid one else None. 

1112 """ 

1113 # we always want to restore after exiting this scope 

1114 with self._state(save_marker=True, restore=True): 

1115 if self._current not in {"u", "U"}: 

1116 raise self.parse_error( 

1117 InternalParserError, "_peek_unicode() entered on non-unicode value" 

1118 ) 

1119 

1120 self.inc() # Dropping prefix 

1121 self.mark() 

1122 

1123 if is_long: 

1124 chars = 8 

1125 else: 

1126 chars = 4 

1127 

1128 if not self.inc_n(chars): 

1129 value, extracted = None, None 

1130 else: 

1131 extracted = self.extract() 

1132 

1133 if extracted[0].lower() == "d" and extracted[1].strip("01234567"): 

1134 return None, None 

1135 

1136 try: 

1137 value = chr(int(extracted, 16)) 

1138 except (ValueError, OverflowError): 

1139 value = None 

1140 

1141 return value, extracted