Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tomlkit/parser.py: 98%

1from __future__ import annotations

3import datetime

4import re

5import string

7from tomlkit._compat import decode

8from tomlkit._utils import RFC_3339_LOOSE

9from tomlkit._utils import _escaped

10from tomlkit._utils import parse_rfc3339

11from tomlkit.container import Container

12from tomlkit.exceptions import EmptyKeyError

13from tomlkit.exceptions import EmptyTableNameError

14from tomlkit.exceptions import InternalParserError

15from tomlkit.exceptions import InvalidCharInStringError

16from tomlkit.exceptions import InvalidControlChar

17from tomlkit.exceptions import InvalidDateError

18from tomlkit.exceptions import InvalidDateTimeError

19from tomlkit.exceptions import InvalidNumberError

20from tomlkit.exceptions import InvalidTimeError

21from tomlkit.exceptions import InvalidUnicodeValueError

22from tomlkit.exceptions import ParseError

23from tomlkit.exceptions import UnexpectedCharError

24from tomlkit.exceptions import UnexpectedEofError

25from tomlkit.items import AoT

26from tomlkit.items import Array

27from tomlkit.items import Bool

28from tomlkit.items import BoolType

29from tomlkit.items import Comment

30from tomlkit.items import Date

31from tomlkit.items import DateTime

32from tomlkit.items import Float

33from tomlkit.items import InlineTable

34from tomlkit.items import Integer

35from tomlkit.items import Item

36from tomlkit.items import Key

37from tomlkit.items import KeyType

38from tomlkit.items import Null

39from tomlkit.items import SingleKey

40from tomlkit.items import String

41from tomlkit.items import StringType

42from tomlkit.items import Table

43from tomlkit.items import Time

44from tomlkit.items import Trivia

45from tomlkit.items import Whitespace

46from tomlkit.source import Source

47from tomlkit.toml_char import TOMLChar

48from tomlkit.toml_document import TOMLDocument

51CTRL_I = 0x09 # Tab

52CTRL_J = 0x0A # Line feed

53CTRL_M = 0x0D # Carriage return

54CTRL_CHAR_LIMIT = 0x1F

55CHR_DEL = 0x7F

58class Parser:

59 """

60 Parser for TOML documents.

61 """

63 def __init__(self, string: str | bytes) -> None:

64 # Input to parse

65 self._src = Source(decode(string))

67 self._aot_stack: list[Key] = []

69 @property

70 def _state(self):

71 return self._src.state

73 @property

74 def _idx(self):

75 return self._src.idx

77 @property

78 def _current(self):

79 return self._src.current

81 @property

82 def _marker(self):

83 return self._src.marker

85 def extract(self) -> str:

86 """

87 Extracts the value between marker and index

88 """

89 return self._src.extract()

91 def inc(self, exception: type[ParseError] | None = None) -> bool:

92 """

93 Increments the parser if the end of the input has not been reached.

94 Returns whether or not it was able to advance.

95 """

96 return self._src.inc(exception=exception)

98 def inc_n(self, n: int, exception: type[ParseError] | None = None) -> bool:

99 """

100 Increments the parser by n characters

101 if the end of the input has not been reached.

102 """

103 return self._src.inc_n(n=n, exception=exception)

104

105 def consume(self, chars, min=0, max=-1):

106 """

107 Consume chars until min/max is satisfied is valid.

108 """

109 return self._src.consume(chars=chars, min=min, max=max)

110

111 def end(self) -> bool:

112 """

113 Returns True if the parser has reached the end of the input.

114 """

115 return self._src.end()

116

117 def mark(self) -> None:

118 """

119 Sets the marker to the index's current position

120 """

121 self._src.mark()

122

123 def parse_error(self, exception=ParseError, *args, **kwargs):

124 """

125 Creates a generic "parse error" at the current position.

126 """

127 return self._src.parse_error(exception, *args, **kwargs)

128

129 def parse(self) -> TOMLDocument:

130 body = TOMLDocument(True)

131

132 # Take all keyvals outside of tables/AoT's.

133 while not self.end():

134 # Break out if a table is found

135 if self._current == "[":

136 break

137

138 # Otherwise, take and append one KV

139 item = self._parse_item()

140 if not item:

141 break

142

143 key, value = item

144 if (key is not None and key.is_multi()) or not self._merge_ws(value, body):

145 # We actually have a table

146 try:

147 body.append(key, value)

148 except Exception as e:

149 raise self.parse_error(ParseError, str(e)) from e

150

151 self.mark()

152

153 while not self.end():

154 key, value = self._parse_table()

155 if isinstance(value, Table) and value.is_aot_element():

156 # This is just the first table in an AoT. Parse the rest of the array

157 # along with it.

158 value = self._parse_aot(value, key)

159

160 try:

161 body.append(key, value)

162 except Exception as e:

163 raise self.parse_error(ParseError, str(e)) from e

164

165 body.parsing(False)

166

167 return body

168

169 def _merge_ws(self, item: Item, container: Container) -> bool:

170 """

171 Merges the given Item with the last one currently in the given Container if

172 both are whitespace items.

173

174 Returns True if the items were merged.

175 """

176 last = container.last_item()

177 if not last:

178 return False

179

180 if not isinstance(item, Whitespace) or not isinstance(last, Whitespace):

181 return False

182

183 start = self._idx - (len(last.s) + len(item.s))

184 container.body[-1] = (

185 container.body[-1][0],

186 Whitespace(self._src[start : self._idx]),

187 )

188

189 return True

190

191 def _is_child(self, parent: Key, child: Key) -> bool:

192 """

193 Returns whether a key is strictly a child of another key.

194 AoT siblings are not considered children of one another.

195 """

196 parent_parts = tuple(parent)

197 child_parts = tuple(child)

198

199 if parent_parts == child_parts:

200 return False

201

202 return parent_parts == child_parts[: len(parent_parts)]

203

204 def _parse_item(self) -> tuple[Key | None, Item] | None:

205 """

206 Attempts to parse the next item and returns it, along with its key

207 if the item is value-like.

208 """

209 self.mark()

210 with self._state as state:

211 while True:

212 c = self._current

213 if c == "\n":

214 # Found a newline; Return all whitespace found up to this point.

215 self.inc()

216

217 return None, Whitespace(self.extract())

218 elif c in " \t\r":

219 # Skip whitespace.

220 if not self.inc():

221 return None, Whitespace(self.extract())

222 elif c == "#":

223 # Found a comment, parse it

224 indent = self.extract()

225 cws, comment, trail = self._parse_comment_trail()

226

227 return None, Comment(Trivia(indent, cws, comment, trail))

228 elif c == "[":

229 # Found a table, delegate to the calling function.

230 return

231 else:

232 # Beginning of a KV pair.

233 # Return to beginning of whitespace so it gets included

234 # as indentation for the KV about to be parsed.

235 state.restore = True

236 break

237

238 return self._parse_key_value(True)

239

240 def _parse_comment_trail(self, parse_trail: bool = True) -> tuple[str, str, str]:

241 """

242 Returns (comment_ws, comment, trail)

243 If there is no comment, comment_ws and comment will

244 simply be empty.

245 """

246 if self.end():

247 return "", "", ""

248

249 comment = ""

250 comment_ws = ""

251 self.mark()

252

253 while True:

254 c = self._current

255

256 if c == "\n":

257 break

258 elif c == "#":

259 comment_ws = self.extract()

260

261 self.mark()

262 self.inc() # Skip #

263

264 # The comment itself

265 while not self.end() and not self._current.is_nl():

266 code = ord(self._current)

267 if code == CHR_DEL or code <= CTRL_CHAR_LIMIT and code != CTRL_I:

268 raise self.parse_error(InvalidControlChar, code, "comments")

269

270 if not self.inc():

271 break

272

273 comment = self.extract()

274 self.mark()

275

276 break

277 elif c in " \t\r":

278 self.inc()

279 else:

280 raise self.parse_error(UnexpectedCharError, c)

281

282 if self.end():

283 break

284

285 trail = ""

286 if parse_trail:

287 while self._current.is_spaces() and self.inc():

288 pass

289

290 if self._current == "\r":

291 self.inc()

292

293 if self._current == "\n":

294 self.inc()

295

296 if self._idx != self._marker or self._current.is_ws():

297 trail = self.extract()

298

299 return comment_ws, comment, trail

300

301 def _parse_key_value(self, parse_comment: bool = False) -> tuple[Key, Item]:

302 # Leading indent

303 self.mark()

304

305 while self._current.is_spaces() and self.inc():

306 pass

307

308 indent = self.extract()

309

310 # Key

311 key = self._parse_key()

312

313 self.mark()

314

315 found_equals = self._current == "="

316 while self._current.is_kv_sep() and self.inc():

317 if self._current == "=":

318 if found_equals:

319 raise self.parse_error(UnexpectedCharError, "=")

320 else:

321 found_equals = True

322 if not found_equals:

323 raise self.parse_error(UnexpectedCharError, self._current)

324

325 if not key.sep:

326 key.sep = self.extract()

327 else:

328 key.sep += self.extract()

329

330 # Value

331 val = self._parse_value()

332 # Comment

333 if parse_comment:

334 cws, comment, trail = self._parse_comment_trail()

335 meta = val.trivia

336 if not meta.comment_ws:

337 meta.comment_ws = cws

338

339 meta.comment = comment

340 meta.trail = trail

341 else:

342 val.trivia.trail = ""

343

344 val.trivia.indent = indent

345

346 return key, val

347

348 def _parse_key(self) -> Key:

349 """

350 Parses a Key at the current position;

351 WS before the key must be exhausted first at the callsite.

352 """

353 self.mark()

354 while self._current.is_spaces() and self.inc():

355 # Skip any leading whitespace

356 pass

357 if self._current in "\"'":

358 return self._parse_quoted_key()

359 else:

360 return self._parse_bare_key()

361

362 def _parse_quoted_key(self) -> Key:

363 """

364 Parses a key enclosed in either single or double quotes.

365 """

366 # Extract the leading whitespace

367 original = self.extract()

368 quote_style = self._current

369 key_type = next((t for t in KeyType if t.value == quote_style), None)

370

371 if key_type is None:

372 raise RuntimeError("Should not have entered _parse_quoted_key()")

373

374 key_str = self._parse_string(

375 StringType.SLB if key_type == KeyType.Basic else StringType.SLL

376 )

377 if key_str._t.is_multiline():

378 raise self.parse_error(UnexpectedCharError, key_str._t.value)

379 original += key_str.as_string()

380 self.mark()

381 while self._current.is_spaces() and self.inc():

382 pass

383 original += self.extract()

384 key = SingleKey(str(key_str), t=key_type, sep="", original=original)

385 if self._current == ".":

386 self.inc()

387 key = key.concat(self._parse_key())

388

389 return key

390

391 def _parse_bare_key(self) -> Key:

392 """

393 Parses a bare key.

394 """

395 while (

396 self._current.is_bare_key_char() or self._current.is_spaces()

397 ) and self.inc():

398 pass

399

400 original = self.extract()

401 key = original.strip()

402 if not key:

403 # Empty key

404 raise self.parse_error(EmptyKeyError)

405

406 if " " in key:

407 # Bare key with spaces in it

408 raise self.parse_error(ParseError, f'Invalid key "{key}"')

409

410 key = SingleKey(key, KeyType.Bare, "", original)

411

412 if self._current == ".":

413 self.inc()

414 key = key.concat(self._parse_key())

415

416 return key

417

418 def _parse_value(self) -> Item:

419 """

420 Attempts to parse a value at the current position.

421 """

422 self.mark()

423 c = self._current

424 trivia = Trivia()

425

426 if c == StringType.SLB.value:

427 return self._parse_basic_string()

428 elif c == StringType.SLL.value:

429 return self._parse_literal_string()

430 elif c == BoolType.TRUE.value[0]:

431 return self._parse_true()

432 elif c == BoolType.FALSE.value[0]:

433 return self._parse_false()

434 elif c == "[":

435 return self._parse_array()

436 elif c == "{":

437 return self._parse_inline_table()

438 elif c in "+-" or self._peek(4) in {

439 "+inf",

440 "-inf",

441 "inf",

442 "+nan",

443 "-nan",

444 "nan",

445 }:

446 # Number

447 while self._current not in " \t\n\r#,]}" and self.inc():

448 pass

449

450 raw = self.extract()

451

452 item = self._parse_number(raw, trivia)

453 if item is not None:

454 return item

455

456 raise self.parse_error(InvalidNumberError)

457 elif c in string.digits:

458 # Integer, Float, Date, Time or DateTime

459 while self._current not in " \t\n\r#,]}" and self.inc():

460 pass

461

462 raw = self.extract()

463

464 m = RFC_3339_LOOSE.match(raw)

465 if m:

466 if m.group(1) and m.group(5):

467 # datetime

468 try:

469 dt = parse_rfc3339(raw)

470 assert isinstance(dt, datetime.datetime)

471 return DateTime(

472 dt.year,

473 dt.month,

474 dt.day,

475 dt.hour,

476 dt.minute,

477 dt.second,

478 dt.microsecond,

479 dt.tzinfo,

480 trivia,

481 raw,

482 )

483 except ValueError:

484 raise self.parse_error(InvalidDateTimeError) from None

485

486 if m.group(1):

487 try:

488 dt = parse_rfc3339(raw)

489 assert isinstance(dt, datetime.date)

490 date = Date(dt.year, dt.month, dt.day, trivia, raw)

491 self.mark()

492 while self._current not in "\t\n\r#,]}" and self.inc():

493 pass

494

495 time_raw = self.extract()

496 time_part = time_raw.rstrip()

497 trivia.comment_ws = time_raw[len(time_part) :]

498 if not time_part:

499 return date

500

501 dt = parse_rfc3339(raw + time_part)

502 assert isinstance(dt, datetime.datetime)

503 return DateTime(

504 dt.year,

505 dt.month,

506 dt.day,

507 dt.hour,

508 dt.minute,

509 dt.second,

510 dt.microsecond,

511 dt.tzinfo,

512 trivia,

513 raw + time_part,

514 )

515 except ValueError:

516 raise self.parse_error(InvalidDateError) from None

517

518 if m.group(5):

519 try:

520 t = parse_rfc3339(raw)

521 assert isinstance(t, datetime.time)

522 return Time(

523 t.hour,

524 t.minute,

525 t.second,

526 t.microsecond,

527 t.tzinfo,

528 trivia,

529 raw,

530 )

531 except ValueError:

532 raise self.parse_error(InvalidTimeError) from None

533

534 item = self._parse_number(raw, trivia)

535 if item is not None:

536 return item

537

538 raise self.parse_error(InvalidNumberError)

539 else:

540 raise self.parse_error(UnexpectedCharError, c)

541

542 def _parse_true(self):

543 return self._parse_bool(BoolType.TRUE)

544

545 def _parse_false(self):

546 return self._parse_bool(BoolType.FALSE)

547

548 def _parse_bool(self, style: BoolType) -> Bool:

549 with self._state:

550 style = BoolType(style)

551

552 # only keep parsing for bool if the characters match the style

553 # try consuming rest of chars in style

554 for c in style:

555 self.consume(c, min=1, max=1)

556

557 return Bool(style, Trivia())

558

559 def _parse_array(self) -> Array:

560 # Consume opening bracket, EOF here is an issue (middle of array)

561 self.inc(exception=UnexpectedEofError)

562

563 elems: list[Item] = []

564 prev_value = None

565 while True:

566 # consume whitespace

567 mark = self._idx

568 self.consume(TOMLChar.SPACES + TOMLChar.NL)

569 indent = self._src[mark : self._idx]

570 newline = set(TOMLChar.NL) & set(indent)

571 if newline:

572 elems.append(Whitespace(indent))

573 continue

574

575 # consume comment

576 if self._current == "#":

577 cws, comment, trail = self._parse_comment_trail(parse_trail=False)

578 elems.append(Comment(Trivia(indent, cws, comment, trail)))

579 continue

580

581 # consume indent

582 if indent:

583 elems.append(Whitespace(indent))

584 continue

585

586 # consume value

587 if not prev_value:

588 try:

589 elems.append(self._parse_value())

590 prev_value = True

591 continue

592 except UnexpectedCharError:

593 pass

594

595 # consume comma

596 if prev_value and self._current == ",":

597 self.inc(exception=UnexpectedEofError)

598 elems.append(Whitespace(","))

599 prev_value = False

600 continue

601

602 # consume closing bracket

603 if self._current == "]":

604 # consume closing bracket, EOF here doesn't matter

605 self.inc()

606 break

607

608 raise self.parse_error(UnexpectedCharError, self._current)

609

610 try:

611 res = Array(elems, Trivia())

612 except ValueError:

613 pass

614 else:

615 return res

616

617 def _parse_inline_table(self) -> InlineTable:

618 # consume opening bracket, EOF here is an issue (middle of array)

619 self.inc(exception=UnexpectedEofError)

620

621 elems = Container(True)

622 trailing_comma = None

623 while True:

624 # consume leading whitespace

625 mark = self._idx

626 self.consume(TOMLChar.SPACES)

627 raw = self._src[mark : self._idx]

628 if raw:

629 elems.add(Whitespace(raw))

630

631 if not trailing_comma:

632 # None: empty inline table

633 # False: previous key-value pair was not followed by a comma

634 if self._current == "}":

635 # consume closing bracket, EOF here doesn't matter

636 self.inc()

637 break

638

639 if (

640 trailing_comma is False

641 or trailing_comma is None

642 and self._current == ","

643 ):

644 # Either the previous key-value pair was not followed by a comma

645 # or the table has an unexpected leading comma.

646 raise self.parse_error(UnexpectedCharError, self._current)

647 else:

648 # True: previous key-value pair was followed by a comma

649 if self._current == "}" or self._current == ",":

650 raise self.parse_error(UnexpectedCharError, self._current)

651

652 key, val = self._parse_key_value(False)

653 elems.add(key, val)

654

655 # consume trailing whitespace

656 mark = self._idx

657 self.consume(TOMLChar.SPACES)

658 raw = self._src[mark : self._idx]

659 if raw:

660 elems.add(Whitespace(raw))

661

662 # consume trailing comma

663 trailing_comma = self._current == ","

664 if trailing_comma:

665 # consume closing bracket, EOF here is an issue (middle of inline table)

666 self.inc(exception=UnexpectedEofError)

667

668 return InlineTable(elems, Trivia())

669

670 def _parse_number(self, raw: str, trivia: Trivia) -> Item | None:

671 # Leading zeros are not allowed

672 sign = ""

673 if raw.startswith(("+", "-")):

674 sign = raw[0]

675 raw = raw[1:]

676

677 if len(raw) > 1 and (

678 raw.startswith("0")

679 and not raw.startswith(("0.", "0o", "0x", "0b", "0e"))

680 or sign

681 and raw.startswith(".")

682 ):

683 return None

684

685 if raw.startswith(("0o", "0x", "0b")) and sign:

686 return None

687

688 digits = "[0-9]"

689 base = 10

690 if raw.startswith("0b"):

691 digits = "[01]"

692 base = 2

693 elif raw.startswith("0o"):

694 digits = "[0-7]"

695 base = 8

696 elif raw.startswith("0x"):

697 digits = "[0-9a-f]"

698 base = 16

699

700 # Underscores should be surrounded by digits

701 clean = re.sub(f"(?i)(?<={digits})_(?={digits})", "", raw).lower()

702

703 if "_" in clean:

704 return None

705

706 if (

707 clean.endswith(".")

708 or not clean.startswith("0x")

709 and clean.split("e", 1)[0].endswith(".")

710 ):

711 return None

712

713 try:

714 return Integer(int(sign + clean, base), trivia, sign + raw)

715 except ValueError:

716 try:

717 return Float(float(sign + clean), trivia, sign + raw)

718 except ValueError:

719 return None

720

721 def _parse_literal_string(self) -> String:

722 with self._state:

723 return self._parse_string(StringType.SLL)

724

725 def _parse_basic_string(self) -> String:

726 with self._state:

727 return self._parse_string(StringType.SLB)

728

729 def _parse_escaped_char(self, multiline):

730 if multiline and self._current.is_ws():

731 # When the last non-whitespace character on a line is

732 # a \, it will be trimmed along with all whitespace

733 # (including newlines) up to the next non-whitespace

734 # character or closing delimiter.

735 # """\

736 # hello \

737 # world"""

738 tmp = ""

739 while self._current.is_ws():

740 tmp += self._current

741 # consume the whitespace, EOF here is an issue

742 # (middle of string)

743 self.inc(exception=UnexpectedEofError)

744 continue

745

746 # the escape followed by whitespace must have a newline

747 # before any other chars

748 if "\n" not in tmp:

749 raise self.parse_error(InvalidCharInStringError, self._current)

750

751 return ""

752

753 if self._current in _escaped:

754 c = _escaped[self._current]

755

756 # consume this char, EOF here is an issue (middle of string)

757 self.inc(exception=UnexpectedEofError)

758

759 return c

760

761 if self._current in {"u", "U"}:

762 # this needs to be a unicode

763 u, ue = self._peek_unicode(self._current == "U")

764 if u is not None:

765 # consume the U char and the unicode value

766 self.inc_n(len(ue) + 1)

767

768 return u

769

770 raise self.parse_error(InvalidUnicodeValueError)

771

772 raise self.parse_error(InvalidCharInStringError, self._current)

773

774 def _parse_string(self, delim: StringType) -> String:

775 # only keep parsing for string if the current character matches the delim

776 if self._current != delim.unit:

777 raise self.parse_error(

778 InternalParserError,

779 f"Invalid character for string type {delim}",

780 )

781

782 # consume the opening/first delim, EOF here is an issue

783 # (middle of string or middle of delim)

784 self.inc(exception=UnexpectedEofError)

785

786 if self._current == delim.unit:

787 # consume the closing/second delim, we do not care if EOF occurs as

788 # that would simply imply an empty single line string

789 if not self.inc() or self._current != delim.unit:

790 # Empty string

791 return String(delim, "", "", Trivia())

792

793 # consume the third delim, EOF here is an issue (middle of string)

794 self.inc(exception=UnexpectedEofError)

795

796 delim = delim.toggle() # convert delim to multi delim

797

798 self.mark() # to extract the original string with whitespace and all

799 value = ""

800

801 # A newline immediately following the opening delimiter will be trimmed.

802 if delim.is_multiline():

803 if self._current == "\n":

804 # consume the newline, EOF here is an issue (middle of string)

805 self.inc(exception=UnexpectedEofError)

806 else:

807 cur = self._current

808 with self._state(restore=True):

809 if self.inc():

810 cur += self._current

811 if cur == "\r\n":

812 self.inc_n(2, exception=UnexpectedEofError)

813

814 escaped = False # whether the previous key was ESCAPE

815 while True:

816 code = ord(self._current)

817 if (

818 delim.is_singleline()

819 and not escaped

820 and (code == CHR_DEL or code <= CTRL_CHAR_LIMIT and code != CTRL_I)

821 ) or (

822 delim.is_multiline()

823 and not escaped

824 and (

825 code == CHR_DEL

826 or code <= CTRL_CHAR_LIMIT

827 and code not in [CTRL_I, CTRL_J, CTRL_M]

828 )

829 ):

830 raise self.parse_error(InvalidControlChar, code, "strings")

831 elif not escaped and self._current == delim.unit:

832 # try to process current as a closing delim

833 original = self.extract()

834

835 close = ""

836 if delim.is_multiline():

837 # Consume the delimiters to see if we are at the end of the string

838 close = ""

839 while self._current == delim.unit:

840 close += self._current

841 self.inc()

842

843 if len(close) < 3:

844 # Not a triple quote, leave in result as-is.

845 # Adding back the characters we already consumed

846 value += close

847 continue

848

849 if len(close) == 3:

850 # We are at the end of the string

851 return String(delim, value, original, Trivia())

852

853 if len(close) >= 6:

854 raise self.parse_error(InvalidCharInStringError, self._current)

855

856 value += close[:-3]

857 original += close[:-3]

858

859 return String(delim, value, original, Trivia())

860 else:

861 # consume the closing delim, we do not care if EOF occurs as

862 # that would simply imply the end of self._src

863 self.inc()

864

865 return String(delim, value, original, Trivia())

866 elif delim.is_basic() and escaped:

867 # attempt to parse the current char as an escaped value, an exception

868 # is raised if this fails

869 value += self._parse_escaped_char(delim.is_multiline())

870

871 # no longer escaped

872 escaped = False

873 elif delim.is_basic() and self._current == "\\":

874 # the next char is being escaped

875 escaped = True

876

877 # consume this char, EOF here is an issue (middle of string)

878 self.inc(exception=UnexpectedEofError)

879 else:

880 # this is either a literal string where we keep everything as is,

881 # or this is not a special escaped char in a basic string

882 value += self._current

883

884 # consume this char, EOF here is an issue (middle of string)

885 self.inc(exception=UnexpectedEofError)

886

887 def _parse_table(

888 self, parent_name: Key | None = None, parent: Table | None = None

889 ) -> tuple[Key, Table | AoT]:

890 """

891 Parses a table element.

892 """

893 if self._current != "[":

894 raise self.parse_error(

895 InternalParserError, "_parse_table() called on non-bracket character."

896 )

897

898 indent = self.extract()

899 self.inc() # Skip opening bracket

900

901 if self.end():

902 raise self.parse_error(UnexpectedEofError)

903

904 is_aot = False

905 if self._current == "[":

906 if not self.inc():

907 raise self.parse_error(UnexpectedEofError)

908

909 is_aot = True

910 try:

911 key = self._parse_key()

912 except EmptyKeyError:

913 raise self.parse_error(EmptyTableNameError) from None

914 if self.end():

915 raise self.parse_error(UnexpectedEofError)

916 elif self._current != "]":

917 raise self.parse_error(UnexpectedCharError, self._current)

918

919 key.sep = ""

920 full_key = key

921 name_parts = tuple(key)

922 if any(" " in part.key.strip() and part.is_bare() for part in name_parts):

923 raise self.parse_error(

924 ParseError, f'Invalid table name "{full_key.as_string()}"'

925 )

926

927 missing_table = False

928 if parent_name:

929 parent_name_parts = tuple(parent_name)

930 else:

931 parent_name_parts = ()

932

933 if len(name_parts) > len(parent_name_parts) + 1:

934 missing_table = True

935

936 name_parts = name_parts[len(parent_name_parts) :]

937

938 values = Container(True)

939

940 self.inc() # Skip closing bracket

941 if is_aot:

942 # TODO: Verify close bracket

943 self.inc()

944

945 cws, comment, trail = self._parse_comment_trail()

946

947 result = Null()

948 table = Table(

949 values,

950 Trivia(indent, cws, comment, trail),

951 is_aot,

952 name=name_parts[0].key if name_parts else key.key,

953 display_name=full_key.as_string(),

954 is_super_table=False,

955 )

956

957 if len(name_parts) > 1:

958 if missing_table:

959 # Missing super table

960 # i.e. a table initialized like this: [foo.bar]

961 # without initializing [foo]

962 #

963 # So we have to create the parent tables

964 table = Table(

965 Container(True),

966 Trivia("", cws, comment, trail),

967 is_aot and name_parts[0] in self._aot_stack,

968 is_super_table=True,

969 name=name_parts[0].key,

970 )

971

972 result = table

973 key = name_parts[0]

974

975 for i, _name in enumerate(name_parts[1:]):

976 child = table.get(

977 _name,

978 Table(

979 Container(True),

980 Trivia(indent, cws, comment, trail),

981 is_aot and i == len(name_parts) - 2,

982 is_super_table=i < len(name_parts) - 2,

983 name=_name.key,

984 display_name=(

985 full_key.as_string() if i == len(name_parts) - 2 else None

986 ),

987 ),

988 )

989

990 if is_aot and i == len(name_parts) - 2:

991 table.raw_append(_name, AoT([child], name=table.name, parsed=True))

992 else:

993 table.raw_append(_name, child)

994

995 table = child

996 values = table.value

997 else:

998 if name_parts:

999 key = name_parts[0]

1000

1001 while not self.end():

1002 item = self._parse_item()

1003 if item:

1004 _key, item = item

1005 if not self._merge_ws(item, values):

1006 table.raw_append(_key, item)

1007 else:

1008 if self._current == "[":

1009 _, key_next = self._peek_table()

1010

1011 if self._is_child(full_key, key_next):

1012 key_next, table_next = self._parse_table(full_key, table)

1013

1014 table.raw_append(key_next, table_next)

1015

1016 # Picking up any sibling

1017 while not self.end():

1018 _, key_next = self._peek_table()

1019

1020 if not self._is_child(full_key, key_next):

1021 break

1022

1023 key_next, table_next = self._parse_table(full_key, table)

1024

1025 table.raw_append(key_next, table_next)

1026

1027 break

1028 else:

1029 raise self.parse_error(

1030 InternalParserError,

1031 "_parse_item() returned None on a non-bracket character.",

1032 )

1033 table.value._validate_out_of_order_table()

1034 if isinstance(result, Null):

1035 result = table

1036

1037 if is_aot and (not self._aot_stack or full_key != self._aot_stack[-1]):

1038 result = self._parse_aot(result, full_key)

1039

1040 return key, result

1041

1042 def _peek_table(self) -> tuple[bool, Key]:

1043 """

1044 Peeks ahead non-intrusively by cloning then restoring the

1045 initial state of the parser.

1046

1047 Returns the name of the table about to be parsed,

1048 as well as whether it is part of an AoT.

1049 """

1050 # we always want to restore after exiting this scope

1051 with self._state(save_marker=True, restore=True):

1052 if self._current != "[":

1053 raise self.parse_error(

1054 InternalParserError,

1055 "_peek_table() entered on non-bracket character",

1056 )

1057

1058 # AoT

1059 self.inc()

1060 is_aot = False

1061 if self._current == "[":

1062 self.inc()

1063 is_aot = True

1064 try:

1065 return is_aot, self._parse_key()

1066 except EmptyKeyError:

1067 raise self.parse_error(EmptyTableNameError) from None

1068

1069 def _parse_aot(self, first: Table, name_first: Key) -> AoT:

1070 """

1071 Parses all siblings of the provided table first and bundles them into

1072 an AoT.

1073 """

1074 payload = [first]

1075 self._aot_stack.append(name_first)

1076 while not self.end():

1077 is_aot_next, name_next = self._peek_table()

1078 if is_aot_next and name_next == name_first:

1079 _, table = self._parse_table(name_first)

1080 payload.append(table)

1081 else:

1082 break

1083

1084 self._aot_stack.pop()

1085

1086 return AoT(payload, parsed=True)

1087

1088 def _peek(self, n: int) -> str:

1089 """

1090 Peeks ahead n characters.

1091

1092 n is the max number of characters that will be peeked.

1093 """

1094 # we always want to restore after exiting this scope

1095 with self._state(restore=True):

1096 buf = ""

1097 for _ in range(n):

1098 if self._current not in " \t\n\r#,]}" + self._src.EOF:

1099 buf += self._current

1100 self.inc()

1101 continue

1102

1103 break

1104 return buf

1105

1106 def _peek_unicode(self, is_long: bool) -> tuple[str | None, str | None]:

1107 """

1108 Peeks ahead non-intrusively by cloning then restoring the

1109 initial state of the parser.

1110

1111 Returns the unicode value is it's a valid one else None.

1112 """

1113 # we always want to restore after exiting this scope

1114 with self._state(save_marker=True, restore=True):

1115 if self._current not in {"u", "U"}:

1116 raise self.parse_error(

1117 InternalParserError, "_peek_unicode() entered on non-unicode value"

1118 )

1119

1120 self.inc() # Dropping prefix

1121 self.mark()

1122

1123 if is_long:

1124 chars = 8

1125 else:

1126 chars = 4

1127

1128 if not self.inc_n(chars):

1129 value, extracted = None, None

1130 else:

1131 extracted = self.extract()

1132

1133 if extracted[0].lower() == "d" and extracted[1].strip("01234567"):

1134 return None, None

1135

1136 try:

1137 value = chr(int(extracted, 16))

1138 except (ValueError, OverflowError):

1139 value = None

1140

1141 return value, extracted