Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tomlkit/parser.py: 98%

629 statements  

« prev     ^ index     » next       coverage.py v7.2.2, created at 2023-03-26 07:01 +0000

1import datetime 

2import re 

3import string 

4 

5from typing import List 

6from typing import Optional 

7from typing import Tuple 

8from typing import Type 

9from typing import Union 

10 

11from tomlkit._compat import decode 

12from tomlkit._utils import RFC_3339_LOOSE 

13from tomlkit._utils import _escaped 

14from tomlkit._utils import parse_rfc3339 

15from tomlkit.container import Container 

16from tomlkit.exceptions import EmptyKeyError 

17from tomlkit.exceptions import EmptyTableNameError 

18from tomlkit.exceptions import InternalParserError 

19from tomlkit.exceptions import InvalidCharInStringError 

20from tomlkit.exceptions import InvalidControlChar 

21from tomlkit.exceptions import InvalidDateError 

22from tomlkit.exceptions import InvalidDateTimeError 

23from tomlkit.exceptions import InvalidNumberError 

24from tomlkit.exceptions import InvalidTimeError 

25from tomlkit.exceptions import InvalidUnicodeValueError 

26from tomlkit.exceptions import ParseError 

27from tomlkit.exceptions import UnexpectedCharError 

28from tomlkit.exceptions import UnexpectedEofError 

29from tomlkit.items import AoT 

30from tomlkit.items import Array 

31from tomlkit.items import Bool 

32from tomlkit.items import BoolType 

33from tomlkit.items import Comment 

34from tomlkit.items import Date 

35from tomlkit.items import DateTime 

36from tomlkit.items import Float 

37from tomlkit.items import InlineTable 

38from tomlkit.items import Integer 

39from tomlkit.items import Item 

40from tomlkit.items import Key 

41from tomlkit.items import KeyType 

42from tomlkit.items import Null 

43from tomlkit.items import SingleKey 

44from tomlkit.items import String 

45from tomlkit.items import StringType 

46from tomlkit.items import Table 

47from tomlkit.items import Time 

48from tomlkit.items import Trivia 

49from tomlkit.items import Whitespace 

50from tomlkit.source import Source 

51from tomlkit.toml_char import TOMLChar 

52from tomlkit.toml_document import TOMLDocument 

53 

54 

55CTRL_I = 0x09 # Tab 

56CTRL_J = 0x0A # Line feed 

57CTRL_M = 0x0D # Carriage return 

58CTRL_CHAR_LIMIT = 0x1F 

59CHR_DEL = 0x7F 

60 

61 

62class Parser: 

63 """ 

64 Parser for TOML documents. 

65 """ 

66 

67 def __init__(self, string: str) -> None: 

68 # Input to parse 

69 self._src = Source(decode(string)) 

70 

71 self._aot_stack: List[Key] = [] 

72 

73 @property 

74 def _state(self): 

75 return self._src.state 

76 

77 @property 

78 def _idx(self): 

79 return self._src.idx 

80 

81 @property 

82 def _current(self): 

83 return self._src.current 

84 

85 @property 

86 def _marker(self): 

87 return self._src.marker 

88 

89 def extract(self) -> str: 

90 """ 

91 Extracts the value between marker and index 

92 """ 

93 return self._src.extract() 

94 

95 def inc(self, exception: Optional[Type[ParseError]] = None) -> bool: 

96 """ 

97 Increments the parser if the end of the input has not been reached. 

98 Returns whether or not it was able to advance. 

99 """ 

100 return self._src.inc(exception=exception) 

101 

102 def inc_n(self, n: int, exception: Optional[Type[ParseError]] = None) -> bool: 

103 """ 

104 Increments the parser by n characters 

105 if the end of the input has not been reached. 

106 """ 

107 return self._src.inc_n(n=n, exception=exception) 

108 

109 def consume(self, chars, min=0, max=-1): 

110 """ 

111 Consume chars until min/max is satisfied is valid. 

112 """ 

113 return self._src.consume(chars=chars, min=min, max=max) 

114 

115 def end(self) -> bool: 

116 """ 

117 Returns True if the parser has reached the end of the input. 

118 """ 

119 return self._src.end() 

120 

121 def mark(self) -> None: 

122 """ 

123 Sets the marker to the index's current position 

124 """ 

125 self._src.mark() 

126 

127 def parse_error(self, exception=ParseError, *args, **kwargs): 

128 """ 

129 Creates a generic "parse error" at the current position. 

130 """ 

131 return self._src.parse_error(exception, *args, **kwargs) 

132 

133 def parse(self) -> TOMLDocument: 

134 body = TOMLDocument(True) 

135 

136 # Take all keyvals outside of tables/AoT's. 

137 while not self.end(): 

138 # Break out if a table is found 

139 if self._current == "[": 

140 break 

141 

142 # Otherwise, take and append one KV 

143 item = self._parse_item() 

144 if not item: 

145 break 

146 

147 key, value = item 

148 if (key is not None and key.is_multi()) or not self._merge_ws(value, body): 

149 # We actually have a table 

150 try: 

151 body.append(key, value) 

152 except Exception as e: 

153 raise self.parse_error(ParseError, str(e)) from e 

154 

155 self.mark() 

156 

157 while not self.end(): 

158 key, value = self._parse_table() 

159 if isinstance(value, Table) and value.is_aot_element(): 

160 # This is just the first table in an AoT. Parse the rest of the array 

161 # along with it. 

162 value = self._parse_aot(value, key) 

163 

164 try: 

165 body.append(key, value) 

166 except Exception as e: 

167 raise self.parse_error(ParseError, str(e)) from e 

168 

169 body.parsing(False) 

170 

171 return body 

172 

173 def _merge_ws(self, item: Item, container: Container) -> bool: 

174 """ 

175 Merges the given Item with the last one currently in the given Container if 

176 both are whitespace items. 

177 

178 Returns True if the items were merged. 

179 """ 

180 last = container.last_item() 

181 if not last: 

182 return False 

183 

184 if not isinstance(item, Whitespace) or not isinstance(last, Whitespace): 

185 return False 

186 

187 start = self._idx - (len(last.s) + len(item.s)) 

188 container.body[-1] = ( 

189 container.body[-1][0], 

190 Whitespace(self._src[start : self._idx]), 

191 ) 

192 

193 return True 

194 

195 def _is_child(self, parent: Key, child: Key) -> bool: 

196 """ 

197 Returns whether a key is strictly a child of another key. 

198 AoT siblings are not considered children of one another. 

199 """ 

200 parent_parts = tuple(parent) 

201 child_parts = tuple(child) 

202 

203 if parent_parts == child_parts: 

204 return False 

205 

206 return parent_parts == child_parts[: len(parent_parts)] 

207 

208 def _parse_item(self) -> Optional[Tuple[Optional[Key], Item]]: 

209 """ 

210 Attempts to parse the next item and returns it, along with its key 

211 if the item is value-like. 

212 """ 

213 self.mark() 

214 with self._state as state: 

215 while True: 

216 c = self._current 

217 if c == "\n": 

218 # Found a newline; Return all whitespace found up to this point. 

219 self.inc() 

220 

221 return None, Whitespace(self.extract()) 

222 elif c in " \t\r": 

223 # Skip whitespace. 

224 if not self.inc(): 

225 return None, Whitespace(self.extract()) 

226 elif c == "#": 

227 # Found a comment, parse it 

228 indent = self.extract() 

229 cws, comment, trail = self._parse_comment_trail() 

230 

231 return None, Comment(Trivia(indent, cws, comment, trail)) 

232 elif c == "[": 

233 # Found a table, delegate to the calling function. 

234 return 

235 else: 

236 # Beginning of a KV pair. 

237 # Return to beginning of whitespace so it gets included 

238 # as indentation for the KV about to be parsed. 

239 state.restore = True 

240 break 

241 

242 return self._parse_key_value(True) 

243 

244 def _parse_comment_trail(self, parse_trail: bool = True) -> Tuple[str, str, str]: 

245 """ 

246 Returns (comment_ws, comment, trail) 

247 If there is no comment, comment_ws and comment will 

248 simply be empty. 

249 """ 

250 if self.end(): 

251 return "", "", "" 

252 

253 comment = "" 

254 comment_ws = "" 

255 self.mark() 

256 

257 while True: 

258 c = self._current 

259 

260 if c == "\n": 

261 break 

262 elif c == "#": 

263 comment_ws = self.extract() 

264 

265 self.mark() 

266 self.inc() # Skip # 

267 

268 # The comment itself 

269 while not self.end() and not self._current.is_nl(): 

270 code = ord(self._current) 

271 if code == CHR_DEL or code <= CTRL_CHAR_LIMIT and code != CTRL_I: 

272 raise self.parse_error(InvalidControlChar, code, "comments") 

273 

274 if not self.inc(): 

275 break 

276 

277 comment = self.extract() 

278 self.mark() 

279 

280 break 

281 elif c in " \t\r": 

282 self.inc() 

283 else: 

284 raise self.parse_error(UnexpectedCharError, c) 

285 

286 if self.end(): 

287 break 

288 

289 trail = "" 

290 if parse_trail: 

291 while self._current.is_spaces() and self.inc(): 

292 pass 

293 

294 if self._current == "\r": 

295 self.inc() 

296 

297 if self._current == "\n": 

298 self.inc() 

299 

300 if self._idx != self._marker or self._current.is_ws(): 

301 trail = self.extract() 

302 

303 return comment_ws, comment, trail 

304 

305 def _parse_key_value(self, parse_comment: bool = False) -> Tuple[Key, Item]: 

306 # Leading indent 

307 self.mark() 

308 

309 while self._current.is_spaces() and self.inc(): 

310 pass 

311 

312 indent = self.extract() 

313 

314 # Key 

315 key = self._parse_key() 

316 

317 self.mark() 

318 

319 found_equals = self._current == "=" 

320 while self._current.is_kv_sep() and self.inc(): 

321 if self._current == "=": 

322 if found_equals: 

323 raise self.parse_error(UnexpectedCharError, "=") 

324 else: 

325 found_equals = True 

326 if not found_equals: 

327 raise self.parse_error(UnexpectedCharError, self._current) 

328 

329 if not key.sep: 

330 key.sep = self.extract() 

331 else: 

332 key.sep += self.extract() 

333 

334 # Value 

335 val = self._parse_value() 

336 # Comment 

337 if parse_comment: 

338 cws, comment, trail = self._parse_comment_trail() 

339 meta = val.trivia 

340 if not meta.comment_ws: 

341 meta.comment_ws = cws 

342 

343 meta.comment = comment 

344 meta.trail = trail 

345 else: 

346 val.trivia.trail = "" 

347 

348 val.trivia.indent = indent 

349 

350 return key, val 

351 

352 def _parse_key(self) -> Key: 

353 """ 

354 Parses a Key at the current position; 

355 WS before the key must be exhausted first at the callsite. 

356 """ 

357 self.mark() 

358 while self._current.is_spaces() and self.inc(): 

359 # Skip any leading whitespace 

360 pass 

361 if self._current in "\"'": 

362 return self._parse_quoted_key() 

363 else: 

364 return self._parse_bare_key() 

365 

366 def _parse_quoted_key(self) -> Key: 

367 """ 

368 Parses a key enclosed in either single or double quotes. 

369 """ 

370 # Extract the leading whitespace 

371 original = self.extract() 

372 quote_style = self._current 

373 key_type = next((t for t in KeyType if t.value == quote_style), None) 

374 

375 if key_type is None: 

376 raise RuntimeError("Should not have entered _parse_quoted_key()") 

377 

378 key_str = self._parse_string( 

379 StringType.SLB if key_type == KeyType.Basic else StringType.SLL 

380 ) 

381 if key_str._t.is_multiline(): 

382 raise self.parse_error(UnexpectedCharError, key_str._t.value) 

383 original += key_str.as_string() 

384 self.mark() 

385 while self._current.is_spaces() and self.inc(): 

386 pass 

387 original += self.extract() 

388 key = SingleKey(str(key_str), t=key_type, sep="", original=original) 

389 if self._current == ".": 

390 self.inc() 

391 key = key.concat(self._parse_key()) 

392 

393 return key 

394 

395 def _parse_bare_key(self) -> Key: 

396 """ 

397 Parses a bare key. 

398 """ 

399 while ( 

400 self._current.is_bare_key_char() or self._current.is_spaces() 

401 ) and self.inc(): 

402 pass 

403 

404 original = self.extract() 

405 key = original.strip() 

406 if not key: 

407 # Empty key 

408 raise self.parse_error(EmptyKeyError) 

409 

410 if " " in key: 

411 # Bare key with spaces in it 

412 raise self.parse_error(ParseError, f'Invalid key "{key}"') 

413 

414 key = SingleKey(key, KeyType.Bare, "", original) 

415 

416 if self._current == ".": 

417 self.inc() 

418 key = key.concat(self._parse_key()) 

419 

420 return key 

421 

422 def _parse_value(self) -> Item: 

423 """ 

424 Attempts to parse a value at the current position. 

425 """ 

426 self.mark() 

427 c = self._current 

428 trivia = Trivia() 

429 

430 if c == StringType.SLB.value: 

431 return self._parse_basic_string() 

432 elif c == StringType.SLL.value: 

433 return self._parse_literal_string() 

434 elif c == BoolType.TRUE.value[0]: 

435 return self._parse_true() 

436 elif c == BoolType.FALSE.value[0]: 

437 return self._parse_false() 

438 elif c == "[": 

439 return self._parse_array() 

440 elif c == "{": 

441 return self._parse_inline_table() 

442 elif c in "+-" or self._peek(4) in { 

443 "+inf", 

444 "-inf", 

445 "inf", 

446 "+nan", 

447 "-nan", 

448 "nan", 

449 }: 

450 # Number 

451 while self._current not in " \t\n\r#,]}" and self.inc(): 

452 pass 

453 

454 raw = self.extract() 

455 

456 item = self._parse_number(raw, trivia) 

457 if item is not None: 

458 return item 

459 

460 raise self.parse_error(InvalidNumberError) 

461 elif c in string.digits: 

462 # Integer, Float, Date, Time or DateTime 

463 while self._current not in " \t\n\r#,]}" and self.inc(): 

464 pass 

465 

466 raw = self.extract() 

467 

468 m = RFC_3339_LOOSE.match(raw) 

469 if m: 

470 if m.group(1) and m.group(5): 

471 # datetime 

472 try: 

473 dt = parse_rfc3339(raw) 

474 assert isinstance(dt, datetime.datetime) 

475 return DateTime( 

476 dt.year, 

477 dt.month, 

478 dt.day, 

479 dt.hour, 

480 dt.minute, 

481 dt.second, 

482 dt.microsecond, 

483 dt.tzinfo, 

484 trivia, 

485 raw, 

486 ) 

487 except ValueError: 

488 raise self.parse_error(InvalidDateTimeError) 

489 

490 if m.group(1): 

491 try: 

492 dt = parse_rfc3339(raw) 

493 assert isinstance(dt, datetime.date) 

494 date = Date(dt.year, dt.month, dt.day, trivia, raw) 

495 self.mark() 

496 while self._current not in "\t\n\r#,]}" and self.inc(): 

497 pass 

498 

499 time_raw = self.extract() 

500 time_part = time_raw.rstrip() 

501 trivia.comment_ws = time_raw[len(time_part) :] 

502 if not time_part: 

503 return date 

504 

505 dt = parse_rfc3339(raw + time_part) 

506 assert isinstance(dt, datetime.datetime) 

507 return DateTime( 

508 dt.year, 

509 dt.month, 

510 dt.day, 

511 dt.hour, 

512 dt.minute, 

513 dt.second, 

514 dt.microsecond, 

515 dt.tzinfo, 

516 trivia, 

517 raw + time_part, 

518 ) 

519 except ValueError: 

520 raise self.parse_error(InvalidDateError) 

521 

522 if m.group(5): 

523 try: 

524 t = parse_rfc3339(raw) 

525 assert isinstance(t, datetime.time) 

526 return Time( 

527 t.hour, 

528 t.minute, 

529 t.second, 

530 t.microsecond, 

531 t.tzinfo, 

532 trivia, 

533 raw, 

534 ) 

535 except ValueError: 

536 raise self.parse_error(InvalidTimeError) 

537 

538 item = self._parse_number(raw, trivia) 

539 if item is not None: 

540 return item 

541 

542 raise self.parse_error(InvalidNumberError) 

543 else: 

544 raise self.parse_error(UnexpectedCharError, c) 

545 

546 def _parse_true(self): 

547 return self._parse_bool(BoolType.TRUE) 

548 

549 def _parse_false(self): 

550 return self._parse_bool(BoolType.FALSE) 

551 

552 def _parse_bool(self, style: BoolType) -> Bool: 

553 with self._state: 

554 style = BoolType(style) 

555 

556 # only keep parsing for bool if the characters match the style 

557 # try consuming rest of chars in style 

558 for c in style: 

559 self.consume(c, min=1, max=1) 

560 

561 return Bool(style, Trivia()) 

562 

563 def _parse_array(self) -> Array: 

564 # Consume opening bracket, EOF here is an issue (middle of array) 

565 self.inc(exception=UnexpectedEofError) 

566 

567 elems: List[Item] = [] 

568 prev_value = None 

569 while True: 

570 # consume whitespace 

571 mark = self._idx 

572 self.consume(TOMLChar.SPACES + TOMLChar.NL) 

573 indent = self._src[mark : self._idx] 

574 newline = set(TOMLChar.NL) & set(indent) 

575 if newline: 

576 elems.append(Whitespace(indent)) 

577 continue 

578 

579 # consume comment 

580 if self._current == "#": 

581 cws, comment, trail = self._parse_comment_trail(parse_trail=False) 

582 elems.append(Comment(Trivia(indent, cws, comment, trail))) 

583 continue 

584 

585 # consume indent 

586 if indent: 

587 elems.append(Whitespace(indent)) 

588 continue 

589 

590 # consume value 

591 if not prev_value: 

592 try: 

593 elems.append(self._parse_value()) 

594 prev_value = True 

595 continue 

596 except UnexpectedCharError: 

597 pass 

598 

599 # consume comma 

600 if prev_value and self._current == ",": 

601 self.inc(exception=UnexpectedEofError) 

602 elems.append(Whitespace(",")) 

603 prev_value = False 

604 continue 

605 

606 # consume closing bracket 

607 if self._current == "]": 

608 # consume closing bracket, EOF here doesn't matter 

609 self.inc() 

610 break 

611 

612 raise self.parse_error(UnexpectedCharError, self._current) 

613 

614 try: 

615 res = Array(elems, Trivia()) 

616 except ValueError: 

617 pass 

618 else: 

619 return res 

620 

621 def _parse_inline_table(self) -> InlineTable: 

622 # consume opening bracket, EOF here is an issue (middle of array) 

623 self.inc(exception=UnexpectedEofError) 

624 

625 elems = Container(True) 

626 trailing_comma = None 

627 while True: 

628 # consume leading whitespace 

629 mark = self._idx 

630 self.consume(TOMLChar.SPACES) 

631 raw = self._src[mark : self._idx] 

632 if raw: 

633 elems.add(Whitespace(raw)) 

634 

635 if not trailing_comma: 

636 # None: empty inline table 

637 # False: previous key-value pair was not followed by a comma 

638 if self._current == "}": 

639 # consume closing bracket, EOF here doesn't matter 

640 self.inc() 

641 break 

642 

643 if ( 

644 trailing_comma is False 

645 or trailing_comma is None 

646 and self._current == "," 

647 ): 

648 # Either the previous key-value pair was not followed by a comma 

649 # or the table has an unexpected leading comma. 

650 raise self.parse_error(UnexpectedCharError, self._current) 

651 else: 

652 # True: previous key-value pair was followed by a comma 

653 if self._current == "}" or self._current == ",": 

654 raise self.parse_error(UnexpectedCharError, self._current) 

655 

656 key, val = self._parse_key_value(False) 

657 elems.add(key, val) 

658 

659 # consume trailing whitespace 

660 mark = self._idx 

661 self.consume(TOMLChar.SPACES) 

662 raw = self._src[mark : self._idx] 

663 if raw: 

664 elems.add(Whitespace(raw)) 

665 

666 # consume trailing comma 

667 trailing_comma = self._current == "," 

668 if trailing_comma: 

669 # consume closing bracket, EOF here is an issue (middle of inline table) 

670 self.inc(exception=UnexpectedEofError) 

671 

672 return InlineTable(elems, Trivia()) 

673 

674 def _parse_number(self, raw: str, trivia: Trivia) -> Optional[Item]: 

675 # Leading zeros are not allowed 

676 sign = "" 

677 if raw.startswith(("+", "-")): 

678 sign = raw[0] 

679 raw = raw[1:] 

680 

681 if len(raw) > 1 and ( 

682 raw.startswith("0") 

683 and not raw.startswith(("0.", "0o", "0x", "0b", "0e")) 

684 or sign 

685 and raw.startswith(".") 

686 ): 

687 return None 

688 

689 if raw.startswith(("0o", "0x", "0b")) and sign: 

690 return None 

691 

692 digits = "[0-9]" 

693 base = 10 

694 if raw.startswith("0b"): 

695 digits = "[01]" 

696 base = 2 

697 elif raw.startswith("0o"): 

698 digits = "[0-7]" 

699 base = 8 

700 elif raw.startswith("0x"): 

701 digits = "[0-9a-f]" 

702 base = 16 

703 

704 # Underscores should be surrounded by digits 

705 clean = re.sub(f"(?i)(?<={digits})_(?={digits})", "", raw).lower() 

706 

707 if "_" in clean: 

708 return None 

709 

710 if ( 

711 clean.endswith(".") 

712 or not clean.startswith("0x") 

713 and clean.split("e", 1)[0].endswith(".") 

714 ): 

715 return None 

716 

717 try: 

718 return Integer(int(sign + clean, base), trivia, sign + raw) 

719 except ValueError: 

720 try: 

721 return Float(float(sign + clean), trivia, sign + raw) 

722 except ValueError: 

723 return None 

724 

725 def _parse_literal_string(self) -> String: 

726 with self._state: 

727 return self._parse_string(StringType.SLL) 

728 

729 def _parse_basic_string(self) -> String: 

730 with self._state: 

731 return self._parse_string(StringType.SLB) 

732 

733 def _parse_escaped_char(self, multiline): 

734 if multiline and self._current.is_ws(): 

735 # When the last non-whitespace character on a line is 

736 # a \, it will be trimmed along with all whitespace 

737 # (including newlines) up to the next non-whitespace 

738 # character or closing delimiter. 

739 # """\ 

740 # hello \ 

741 # world""" 

742 tmp = "" 

743 while self._current.is_ws(): 

744 tmp += self._current 

745 # consume the whitespace, EOF here is an issue 

746 # (middle of string) 

747 self.inc(exception=UnexpectedEofError) 

748 continue 

749 

750 # the escape followed by whitespace must have a newline 

751 # before any other chars 

752 if "\n" not in tmp: 

753 raise self.parse_error(InvalidCharInStringError, self._current) 

754 

755 return "" 

756 

757 if self._current in _escaped: 

758 c = _escaped[self._current] 

759 

760 # consume this char, EOF here is an issue (middle of string) 

761 self.inc(exception=UnexpectedEofError) 

762 

763 return c 

764 

765 if self._current in {"u", "U"}: 

766 # this needs to be a unicode 

767 u, ue = self._peek_unicode(self._current == "U") 

768 if u is not None: 

769 # consume the U char and the unicode value 

770 self.inc_n(len(ue) + 1) 

771 

772 return u 

773 

774 raise self.parse_error(InvalidUnicodeValueError) 

775 

776 raise self.parse_error(InvalidCharInStringError, self._current) 

777 

778 def _parse_string(self, delim: StringType) -> String: 

779 # only keep parsing for string if the current character matches the delim 

780 if self._current != delim.unit: 

781 raise self.parse_error( 

782 InternalParserError, 

783 f"Invalid character for string type {delim}", 

784 ) 

785 

786 # consume the opening/first delim, EOF here is an issue 

787 # (middle of string or middle of delim) 

788 self.inc(exception=UnexpectedEofError) 

789 

790 if self._current == delim.unit: 

791 # consume the closing/second delim, we do not care if EOF occurs as 

792 # that would simply imply an empty single line string 

793 if not self.inc() or self._current != delim.unit: 

794 # Empty string 

795 return String(delim, "", "", Trivia()) 

796 

797 # consume the third delim, EOF here is an issue (middle of string) 

798 self.inc(exception=UnexpectedEofError) 

799 

800 delim = delim.toggle() # convert delim to multi delim 

801 

802 self.mark() # to extract the original string with whitespace and all 

803 value = "" 

804 

805 # A newline immediately following the opening delimiter will be trimmed. 

806 if delim.is_multiline() and self._current == "\n": 

807 # consume the newline, EOF here is an issue (middle of string) 

808 self.inc(exception=UnexpectedEofError) 

809 

810 escaped = False # whether the previous key was ESCAPE 

811 while True: 

812 code = ord(self._current) 

813 if ( 

814 delim.is_singleline() 

815 and not escaped 

816 and (code == CHR_DEL or code <= CTRL_CHAR_LIMIT and code != CTRL_I) 

817 ) or ( 

818 delim.is_multiline() 

819 and not escaped 

820 and ( 

821 code == CHR_DEL 

822 or code <= CTRL_CHAR_LIMIT 

823 and code not in [CTRL_I, CTRL_J, CTRL_M] 

824 ) 

825 ): 

826 raise self.parse_error(InvalidControlChar, code, "strings") 

827 elif not escaped and self._current == delim.unit: 

828 # try to process current as a closing delim 

829 original = self.extract() 

830 

831 close = "" 

832 if delim.is_multiline(): 

833 # Consume the delimiters to see if we are at the end of the string 

834 close = "" 

835 while self._current == delim.unit: 

836 close += self._current 

837 self.inc() 

838 

839 if len(close) < 3: 

840 # Not a triple quote, leave in result as-is. 

841 # Adding back the characters we already consumed 

842 value += close 

843 continue 

844 

845 if len(close) == 3: 

846 # We are at the end of the string 

847 return String(delim, value, original, Trivia()) 

848 

849 if len(close) >= 6: 

850 raise self.parse_error(InvalidCharInStringError, self._current) 

851 

852 value += close[:-3] 

853 original += close[:-3] 

854 

855 return String(delim, value, original, Trivia()) 

856 else: 

857 # consume the closing delim, we do not care if EOF occurs as 

858 # that would simply imply the end of self._src 

859 self.inc() 

860 

861 return String(delim, value, original, Trivia()) 

862 elif delim.is_basic() and escaped: 

863 # attempt to parse the current char as an escaped value, an exception 

864 # is raised if this fails 

865 value += self._parse_escaped_char(delim.is_multiline()) 

866 

867 # no longer escaped 

868 escaped = False 

869 elif delim.is_basic() and self._current == "\\": 

870 # the next char is being escaped 

871 escaped = True 

872 

873 # consume this char, EOF here is an issue (middle of string) 

874 self.inc(exception=UnexpectedEofError) 

875 else: 

876 # this is either a literal string where we keep everything as is, 

877 # or this is not a special escaped char in a basic string 

878 value += self._current 

879 

880 # consume this char, EOF here is an issue (middle of string) 

881 self.inc(exception=UnexpectedEofError) 

882 

883 def _parse_table( 

884 self, parent_name: Optional[Key] = None, parent: Optional[Table] = None 

885 ) -> Tuple[Key, Union[Table, AoT]]: 

886 """ 

887 Parses a table element. 

888 """ 

889 if self._current != "[": 

890 raise self.parse_error( 

891 InternalParserError, "_parse_table() called on non-bracket character." 

892 ) 

893 

894 indent = self.extract() 

895 self.inc() # Skip opening bracket 

896 

897 if self.end(): 

898 raise self.parse_error(UnexpectedEofError) 

899 

900 is_aot = False 

901 if self._current == "[": 

902 if not self.inc(): 

903 raise self.parse_error(UnexpectedEofError) 

904 

905 is_aot = True 

906 try: 

907 key = self._parse_key() 

908 except EmptyKeyError: 

909 raise self.parse_error(EmptyTableNameError) from None 

910 if self.end(): 

911 raise self.parse_error(UnexpectedEofError) 

912 elif self._current != "]": 

913 raise self.parse_error(UnexpectedCharError, self._current) 

914 

915 key.sep = "" 

916 full_key = key 

917 name_parts = tuple(key) 

918 if any(" " in part.key.strip() and part.is_bare() for part in name_parts): 

919 raise self.parse_error( 

920 ParseError, f'Invalid table name "{full_key.as_string()}"' 

921 ) 

922 

923 missing_table = False 

924 if parent_name: 

925 parent_name_parts = tuple(parent_name) 

926 else: 

927 parent_name_parts = () 

928 

929 if len(name_parts) > len(parent_name_parts) + 1: 

930 missing_table = True 

931 

932 name_parts = name_parts[len(parent_name_parts) :] 

933 

934 values = Container(True) 

935 

936 self.inc() # Skip closing bracket 

937 if is_aot: 

938 # TODO: Verify close bracket 

939 self.inc() 

940 

941 cws, comment, trail = self._parse_comment_trail() 

942 

943 result = Null() 

944 table = Table( 

945 values, 

946 Trivia(indent, cws, comment, trail), 

947 is_aot, 

948 name=name_parts[0].key if name_parts else key.key, 

949 display_name=full_key.as_string(), 

950 is_super_table=False, 

951 ) 

952 

953 if len(name_parts) > 1: 

954 if missing_table: 

955 # Missing super table 

956 # i.e. a table initialized like this: [foo.bar] 

957 # without initializing [foo] 

958 # 

959 # So we have to create the parent tables 

960 table = Table( 

961 Container(True), 

962 Trivia(indent, cws, comment, trail), 

963 is_aot and name_parts[0] in self._aot_stack, 

964 is_super_table=True, 

965 name=name_parts[0].key, 

966 ) 

967 

968 result = table 

969 key = name_parts[0] 

970 

971 for i, _name in enumerate(name_parts[1:]): 

972 child = table.get( 

973 _name, 

974 Table( 

975 Container(True), 

976 Trivia(indent, cws, comment, trail), 

977 is_aot and i == len(name_parts) - 2, 

978 is_super_table=i < len(name_parts) - 2, 

979 name=_name.key, 

980 display_name=full_key.as_string() 

981 if i == len(name_parts) - 2 

982 else None, 

983 ), 

984 ) 

985 

986 if is_aot and i == len(name_parts) - 2: 

987 table.raw_append(_name, AoT([child], name=table.name, parsed=True)) 

988 else: 

989 table.raw_append(_name, child) 

990 

991 table = child 

992 values = table.value 

993 else: 

994 if name_parts: 

995 key = name_parts[0] 

996 

997 while not self.end(): 

998 item = self._parse_item() 

999 if item: 

1000 _key, item = item 

1001 if not self._merge_ws(item, values): 

1002 table.raw_append(_key, item) 

1003 else: 

1004 if self._current == "[": 

1005 _, key_next = self._peek_table() 

1006 

1007 if self._is_child(full_key, key_next): 

1008 key_next, table_next = self._parse_table(full_key, table) 

1009 

1010 table.raw_append(key_next, table_next) 

1011 

1012 # Picking up any sibling 

1013 while not self.end(): 

1014 _, key_next = self._peek_table() 

1015 

1016 if not self._is_child(full_key, key_next): 

1017 break 

1018 

1019 key_next, table_next = self._parse_table(full_key, table) 

1020 

1021 table.raw_append(key_next, table_next) 

1022 

1023 break 

1024 else: 

1025 raise self.parse_error( 

1026 InternalParserError, 

1027 "_parse_item() returned None on a non-bracket character.", 

1028 ) 

1029 

1030 if isinstance(result, Null): 

1031 result = table 

1032 

1033 if is_aot and (not self._aot_stack or full_key != self._aot_stack[-1]): 

1034 result = self._parse_aot(result, full_key) 

1035 

1036 return key, result 

1037 

1038 def _peek_table(self) -> Tuple[bool, Key]: 

1039 """ 

1040 Peeks ahead non-intrusively by cloning then restoring the 

1041 initial state of the parser. 

1042 

1043 Returns the name of the table about to be parsed, 

1044 as well as whether it is part of an AoT. 

1045 """ 

1046 # we always want to restore after exiting this scope 

1047 with self._state(save_marker=True, restore=True): 

1048 if self._current != "[": 

1049 raise self.parse_error( 

1050 InternalParserError, 

1051 "_peek_table() entered on non-bracket character", 

1052 ) 

1053 

1054 # AoT 

1055 self.inc() 

1056 is_aot = False 

1057 if self._current == "[": 

1058 self.inc() 

1059 is_aot = True 

1060 try: 

1061 return is_aot, self._parse_key() 

1062 except EmptyKeyError: 

1063 raise self.parse_error(EmptyTableNameError) from None 

1064 

1065 def _parse_aot(self, first: Table, name_first: Key) -> AoT: 

1066 """ 

1067 Parses all siblings of the provided table first and bundles them into 

1068 an AoT. 

1069 """ 

1070 payload = [first] 

1071 self._aot_stack.append(name_first) 

1072 while not self.end(): 

1073 is_aot_next, name_next = self._peek_table() 

1074 if is_aot_next and name_next == name_first: 

1075 _, table = self._parse_table(name_first) 

1076 payload.append(table) 

1077 else: 

1078 break 

1079 

1080 self._aot_stack.pop() 

1081 

1082 return AoT(payload, parsed=True) 

1083 

1084 def _peek(self, n: int) -> str: 

1085 """ 

1086 Peeks ahead n characters. 

1087 

1088 n is the max number of characters that will be peeked. 

1089 """ 

1090 # we always want to restore after exiting this scope 

1091 with self._state(restore=True): 

1092 buf = "" 

1093 for _ in range(n): 

1094 if self._current not in " \t\n\r#,]}" + self._src.EOF: 

1095 buf += self._current 

1096 self.inc() 

1097 continue 

1098 

1099 break 

1100 return buf 

1101 

1102 def _peek_unicode(self, is_long: bool) -> Tuple[Optional[str], Optional[str]]: 

1103 """ 

1104 Peeks ahead non-intrusively by cloning then restoring the 

1105 initial state of the parser. 

1106 

1107 Returns the unicode value is it's a valid one else None. 

1108 """ 

1109 # we always want to restore after exiting this scope 

1110 with self._state(save_marker=True, restore=True): 

1111 if self._current not in {"u", "U"}: 

1112 raise self.parse_error( 

1113 InternalParserError, "_peek_unicode() entered on non-unicode value" 

1114 ) 

1115 

1116 self.inc() # Dropping prefix 

1117 self.mark() 

1118 

1119 if is_long: 

1120 chars = 8 

1121 else: 

1122 chars = 4 

1123 

1124 if not self.inc_n(chars): 

1125 value, extracted = None, None 

1126 else: 

1127 extracted = self.extract() 

1128 

1129 if extracted[0].lower() == "d" and extracted[1].strip("01234567"): 

1130 return None, None 

1131 

1132 try: 

1133 value = chr(int(extracted, 16)) 

1134 except (ValueError, OverflowError): 

1135 value = None 

1136 

1137 return value, extracted