Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/tomlkit/parser.py: 97%

1from __future__ import annotations

3import datetime

4import re

5import string

7from tomlkit._compat import decode

8from tomlkit._utils import RFC_3339_LOOSE

9from tomlkit._utils import _escaped

10from tomlkit._utils import parse_rfc3339

11from tomlkit.container import Container

12from tomlkit.exceptions import EmptyKeyError

13from tomlkit.exceptions import EmptyTableNameError

14from tomlkit.exceptions import InternalParserError

15from tomlkit.exceptions import InvalidCharInStringError

16from tomlkit.exceptions import InvalidControlChar

17from tomlkit.exceptions import InvalidDateError

18from tomlkit.exceptions import InvalidDateTimeError

19from tomlkit.exceptions import InvalidNumberError

20from tomlkit.exceptions import InvalidTimeError

21from tomlkit.exceptions import InvalidUnicodeValueError

22from tomlkit.exceptions import ParseError

23from tomlkit.exceptions import UnexpectedCharError

24from tomlkit.exceptions import UnexpectedEofError

25from tomlkit.items import AoT

26from tomlkit.items import Array

27from tomlkit.items import Bool

28from tomlkit.items import BoolType

29from tomlkit.items import Comment

30from tomlkit.items import Date

31from tomlkit.items import DateTime

32from tomlkit.items import Float

33from tomlkit.items import InlineTable

34from tomlkit.items import Integer

35from tomlkit.items import Item

36from tomlkit.items import Key

37from tomlkit.items import KeyType

38from tomlkit.items import Null

39from tomlkit.items import SingleKey

40from tomlkit.items import String

41from tomlkit.items import StringType

42from tomlkit.items import Table

43from tomlkit.items import Time

44from tomlkit.items import Trivia

45from tomlkit.items import Whitespace

46from tomlkit.source import Source

47from tomlkit.toml_char import TOMLChar

48from tomlkit.toml_document import TOMLDocument

51CTRL_I = 0x09 # Tab

52CTRL_J = 0x0A # Line feed

53CTRL_M = 0x0D # Carriage return

54CTRL_CHAR_LIMIT = 0x1F

55CHR_DEL = 0x7F

58class Parser:

59 """

60 Parser for TOML documents.

61 """

63 def __init__(self, string: str | bytes) -> None:

64 # Input to parse

65 self._src = Source(decode(string))

67 self._aot_stack: list[Key] = []

69 @property

70 def _state(self):

71 return self._src.state

73 @property

74 def _idx(self):

75 return self._src.idx

77 @property

78 def _current(self):

79 return self._src.current

81 @property

82 def _marker(self):

83 return self._src.marker

85 def extract(self) -> str:

86 """

87 Extracts the value between marker and index

88 """

89 return self._src.extract()

91 def inc(self, exception: type[ParseError] | None = None) -> bool:

92 """

93 Increments the parser if the end of the input has not been reached.

94 Returns whether or not it was able to advance.

95 """

96 return self._src.inc(exception=exception)

98 def inc_n(self, n: int, exception: type[ParseError] | None = None) -> bool:

99 """

100 Increments the parser by n characters

101 if the end of the input has not been reached.

102 """

103 return self._src.inc_n(n=n, exception=exception)

104

105 def consume(self, chars, min=0, max=-1):

106 """

107 Consume chars until min/max is satisfied is valid.

108 """

109 return self._src.consume(chars=chars, min=min, max=max)

110

111 def end(self) -> bool:

112 """

113 Returns True if the parser has reached the end of the input.

114 """

115 return self._src.end()

116

117 def mark(self) -> None:

118 """

119 Sets the marker to the index's current position

120 """

121 self._src.mark()

122

123 def parse_error(self, exception=ParseError, *args, **kwargs):

124 """

125 Creates a generic "parse error" at the current position.

126 """

127 return self._src.parse_error(exception, *args, **kwargs)

128

129 def parse(self) -> TOMLDocument:

130 body = TOMLDocument(True)

131

132 # Take all keyvals outside of tables/AoT's.

133 while not self.end():

134 # Break out if a table is found

135 if self._current == "[":

136 break

137

138 # Otherwise, take and append one KV

139 item = self._parse_item()

140 if not item:

141 break

142

143 key, value = item

144 if (key is not None and key.is_multi()) or not self._merge_ws(value, body):

145 # We actually have a table

146 try:

147 body.append(key, value)

148 except Exception as e:

149 raise self.parse_error(ParseError, str(e)) from e

150

151 self.mark()

152

153 while not self.end():

154 key, value = self._parse_table()

155 if isinstance(value, Table) and value.is_aot_element():

156 # This is just the first table in an AoT. Parse the rest of the array

157 # along with it.

158 value = self._parse_aot(value, key)

159

160 try:

161 body.append(key, value)

162 except Exception as e:

163 raise self.parse_error(ParseError, str(e)) from e

164

165 body.parsing(False)

166

167 return body

168

169 def _merge_ws(self, item: Item, container: Container) -> bool:

170 """

171 Merges the given Item with the last one currently in the given Container if

172 both are whitespace items.

173

174 Returns True if the items were merged.

175 """

176 last = container.last_item()

177 if not last:

178 return False

179

180 if not isinstance(item, Whitespace) or not isinstance(last, Whitespace):

181 return False

182

183 start = self._idx - (len(last.s) + len(item.s))

184 container.body[-1] = (

185 container.body[-1][0],

186 Whitespace(self._src[start : self._idx]),

187 )

188

189 return True

190

191 def _is_child(self, parent: Key, child: Key) -> bool:

192 """

193 Returns whether a key is strictly a child of another key.

194 AoT siblings are not considered children of one another.

195 """

196 parent_parts = tuple(parent)

197 child_parts = tuple(child)

198

199 if parent_parts == child_parts:

200 return False

201

202 return parent_parts == child_parts[: len(parent_parts)]

203

204 def _parse_item(self) -> tuple[Key | None, Item] | None:

205 """

206 Attempts to parse the next item and returns it, along with its key

207 if the item is value-like.

208 """

209 self.mark()

210 with self._state as state:

211 while True:

212 c = self._current

213 if c == "\n":

214 # Found a newline; Return all whitespace found up to this point.

215 self.inc()

216

217 return None, Whitespace(self.extract())

218 elif c in " \t\r":

219 if c == "\r":

220 with self._state(restore=True):

221 if not self.inc() or self._current != "\n":

222 raise self.parse_error(

223 InvalidControlChar, CTRL_M, "documents"

224 )

225 # Skip whitespace.

226 if not self.inc():

227 return None, Whitespace(self.extract())

228 elif c == "#":

229 # Found a comment, parse it

230 indent = self.extract()

231 cws, comment, trail = self._parse_comment_trail()

232

233 return None, Comment(Trivia(indent, cws, comment, trail))

234 elif c == "[":

235 # Found a table, delegate to the calling function.

236 return

237 else:

238 # Beginning of a KV pair.

239 # Return to beginning of whitespace so it gets included

240 # as indentation for the KV about to be parsed.

241 state.restore = True

242 break

243

244 return self._parse_key_value(True)

245

246 def _parse_comment_trail(self, parse_trail: bool = True) -> tuple[str, str, str]:

247 """

248 Returns (comment_ws, comment, trail)

249 If there is no comment, comment_ws and comment will

250 simply be empty.

251 """

252 if self.end():

253 return "", "", ""

254

255 comment = ""

256 comment_ws = ""

257 self.mark()

258

259 while True:

260 c = self._current

261

262 if c == "\n":

263 break

264 elif c == "#":

265 comment_ws = self.extract()

266

267 self.mark()

268 self.inc() # Skip #

269

270 # The comment itself

271 while not self.end() and not self._current.is_nl():

272 code = ord(self._current)

273 if code == CHR_DEL or (code <= CTRL_CHAR_LIMIT and code != CTRL_I):

274 raise self.parse_error(InvalidControlChar, code, "comments")

275

276 if not self.inc():

277 break

278

279 comment = self.extract()

280 self.mark()

281

282 break

283 elif c in " \t\r":

284 if c == "\r":

285 with self._state(restore=True):

286 if not self.inc() or self._current != "\n":

287 raise self.parse_error(

288 InvalidControlChar, CTRL_M, "comments"

289 )

290 self.inc()

291 else:

292 raise self.parse_error(UnexpectedCharError, c)

293

294 if self.end():

295 break

296

297 trail = ""

298 if parse_trail:

299 while self._current.is_spaces() and self.inc():

300 pass

301

302 if self._current == "\r":

303 with self._state(restore=True):

304 if not self.inc() or self._current != "\n":

305 raise self.parse_error(InvalidControlChar, CTRL_M, "documents")

306 self.inc()

307

308 if self._current == "\n":

309 self.inc()

310

311 if self._idx != self._marker or self._current.is_ws():

312 trail = self.extract()

313

314 return comment_ws, comment, trail

315

316 def _parse_key_value(self, parse_comment: bool = False) -> tuple[Key, Item]:

317 # Leading indent

318 self.mark()

319

320 while self._current.is_spaces() and self.inc():

321 pass

322

323 indent = self.extract()

324

325 # Key

326 key = self._parse_key()

327

328 self.mark()

329

330 found_equals = self._current == "="

331 while self._current.is_kv_sep() and self.inc():

332 if self._current == "=":

333 if found_equals:

334 raise self.parse_error(UnexpectedCharError, "=")

335 else:

336 found_equals = True

337 if not found_equals:

338 raise self.parse_error(UnexpectedCharError, self._current)

339

340 if not key.sep:

341 key.sep = self.extract()

342 else:

343 key.sep += self.extract()

344

345 # Value

346 val = self._parse_value()

347 # Comment

348 if parse_comment:

349 cws, comment, trail = self._parse_comment_trail()

350 meta = val.trivia

351 if not meta.comment_ws:

352 meta.comment_ws = cws

353

354 meta.comment = comment

355 meta.trail = trail

356 else:

357 val.trivia.trail = ""

358

359 val.trivia.indent = indent

360

361 return key, val

362

363 def _parse_key(self) -> Key:

364 """

365 Parses a Key at the current position;

366 WS before the key must be exhausted first at the callsite.

367 """

368 self.mark()

369 while self._current.is_spaces() and self.inc():

370 # Skip any leading whitespace

371 pass

372 if self._current in "\"'":

373 return self._parse_quoted_key()

374 else:

375 return self._parse_bare_key()

376

377 def _parse_quoted_key(self) -> Key:

378 """

379 Parses a key enclosed in either single or double quotes.

380 """

381 # Extract the leading whitespace

382 original = self.extract()

383 quote_style = self._current

384 key_type = next((t for t in KeyType if t.value == quote_style), None)

385

386 if key_type is None:

387 raise RuntimeError("Should not have entered _parse_quoted_key()")

388

389 key_str = self._parse_string(

390 StringType.SLB if key_type == KeyType.Basic else StringType.SLL

391 )

392 if key_str._t.is_multiline():

393 raise self.parse_error(UnexpectedCharError, key_str._t.value)

394 original += key_str.as_string()

395 self.mark()

396 while self._current.is_spaces() and self.inc():

397 pass

398 original += self.extract()

399 key = SingleKey(str(key_str), t=key_type, sep="", original=original)

400 if self._current == ".":

401 self.inc()

402 key = key.concat(self._parse_key())

403

404 return key

405

406 def _parse_bare_key(self) -> Key:

407 """

408 Parses a bare key.

409 """

410 while (

411 self._current.is_bare_key_char() or self._current.is_spaces()

412 ) and self.inc():

413 pass

414

415 original = self.extract()

416 key = original.strip()

417 if not key:

418 # Empty key

419 raise self.parse_error(EmptyKeyError)

420

421 if " " in key:

422 # Bare key with spaces in it

423 raise self.parse_error(ParseError, f'Invalid key "{key}"')

424

425 key = SingleKey(key, KeyType.Bare, "", original)

426

427 if self._current == ".":

428 self.inc()

429 key = key.concat(self._parse_key())

430

431 return key

432

433 def _parse_value(self) -> Item:

434 """

435 Attempts to parse a value at the current position.

436 """

437 self.mark()

438 c = self._current

439 trivia = Trivia()

440

441 if c == StringType.SLB.value:

442 return self._parse_basic_string()

443 elif c == StringType.SLL.value:

444 return self._parse_literal_string()

445 elif c == BoolType.TRUE.value[0]:

446 return self._parse_true()

447 elif c == BoolType.FALSE.value[0]:

448 return self._parse_false()

449 elif c == "[":

450 return self._parse_array()

451 elif c == "{":

452 return self._parse_inline_table()

453 elif c in "+-" or self._peek(4) in {

454 "+inf",

455 "-inf",

456 "inf",

457 "+nan",

458 "-nan",

459 "nan",

460 }:

461 # Number

462 while self._current not in " \t\n\r#,]}" and self.inc():

463 pass

464

465 raw = self.extract()

466

467 item = self._parse_number(raw, trivia)

468 if item is not None:

469 return item

470

471 raise self.parse_error(InvalidNumberError)

472 elif c in string.digits:

473 # Integer, Float, Date, Time or DateTime

474 while self._current not in " \t\n\r#,]}" and self.inc():

475 pass

476

477 raw = self.extract()

478

479 m = RFC_3339_LOOSE.match(raw)

480 if m:

481 if m.group("date") and m.group("time"):

482 # datetime

483 try:

484 dt = parse_rfc3339(raw)

485 assert isinstance(dt, datetime.datetime)

486 return DateTime(

487 dt.year,

488 dt.month,

489 dt.day,

490 dt.hour,

491 dt.minute,

492 dt.second,

493 dt.microsecond,

494 dt.tzinfo,

495 trivia,

496 raw,

497 )

498 except ValueError:

499 raise self.parse_error(InvalidDateTimeError) from None

500

501 if m.group("date"):

502 try:

503 dt = parse_rfc3339(raw)

504 assert isinstance(dt, datetime.date)

505 date = Date(dt.year, dt.month, dt.day, trivia, raw)

506 self.mark()

507 while self._current not in "\t\n\r#,]}" and self.inc():

508 pass

509

510 time_raw = self.extract()

511 time_part = time_raw.rstrip()

512 trivia.comment_ws = time_raw[len(time_part) :]

513 if not time_part:

514 return date

515

516 dt = parse_rfc3339(raw + time_part)

517 assert isinstance(dt, datetime.datetime)

518 return DateTime(

519 dt.year,

520 dt.month,

521 dt.day,

522 dt.hour,

523 dt.minute,

524 dt.second,

525 dt.microsecond,

526 dt.tzinfo,

527 trivia,

528 raw + time_part,

529 )

530 except ValueError:

531 raise self.parse_error(InvalidDateError) from None

532

533 if m.group("time"):

534 try:

535 t = parse_rfc3339(raw)

536 assert isinstance(t, datetime.time)

537 return Time(

538 t.hour,

539 t.minute,

540 t.second,

541 t.microsecond,

542 t.tzinfo,

543 trivia,

544 raw,

545 )

546 except ValueError:

547 raise self.parse_error(InvalidTimeError) from None

548

549 item = self._parse_number(raw, trivia)

550 if item is not None:

551 return item

552

553 raise self.parse_error(InvalidNumberError)

554 else:

555 raise self.parse_error(UnexpectedCharError, c)

556

557 def _parse_true(self):

558 return self._parse_bool(BoolType.TRUE)

559

560 def _parse_false(self):

561 return self._parse_bool(BoolType.FALSE)

562

563 def _parse_bool(self, style: BoolType) -> Bool:

564 with self._state:

565 style = BoolType(style)

566

567 # only keep parsing for bool if the characters match the style

568 # try consuming rest of chars in style

569 for c in style:

570 self.consume(c, min=1, max=1)

571

572 return Bool(style, Trivia())

573

574 def _parse_array(self) -> Array:

575 # Consume opening bracket, EOF here is an issue (middle of array)

576 self.inc(exception=UnexpectedEofError)

577

578 elems: list[Item] = []

579 prev_value = None

580 while True:

581 # consume whitespace

582 mark = self._idx

583 self.consume(TOMLChar.SPACES + TOMLChar.NL)

584 indent = self._src[mark : self._idx]

585 newline = set(TOMLChar.NL) & set(indent)

586 if newline:

587 elems.append(Whitespace(indent))

588 continue

589

590 # consume comment

591 if self._current == "#":

592 cws, comment, trail = self._parse_comment_trail(parse_trail=False)

593 elems.append(Comment(Trivia(indent, cws, comment, trail)))

594 continue

595

596 # consume indent

597 if indent:

598 elems.append(Whitespace(indent))

599 continue

600

601 # consume value

602 if not prev_value:

603 try:

604 elems.append(self._parse_value())

605 prev_value = True

606 continue

607 except UnexpectedCharError:

608 pass

609

610 # consume comma

611 if prev_value and self._current == ",":

612 self.inc(exception=UnexpectedEofError)

613 # If the previous item is Whitespace, add to it

614 if isinstance(elems[-1], Whitespace):

615 elems[-1]._s = elems[-1].s + ","

616 else:

617 elems.append(Whitespace(","))

618 prev_value = False

619 continue

620

621 # consume closing bracket

622 if self._current == "]":

623 # consume closing bracket, EOF here doesn't matter

624 self.inc()

625 break

626

627 raise self.parse_error(UnexpectedCharError, self._current)

628

629 try:

630 res = Array(elems, Trivia())

631 except ValueError:

632 pass

633 else:

634 return res

635

636 def _parse_inline_table(self) -> InlineTable:

637 # consume opening bracket, EOF here is an issue (middle of array)

638 self.inc(exception=UnexpectedEofError)

639

640 elems = Container(True)

641 expect_key = True

642 while True:

643 while True:

644 # consume whitespace and newlines

645 mark = self._idx

646 self.consume(TOMLChar.SPACES + TOMLChar.NL)

647 raw = self._src[mark : self._idx]

648 if raw:

649 elems.add(Whitespace(raw))

650

651 if self._current != "#":

652 break

653

654 cws, comment, trail = self._parse_comment_trail(parse_trail=False)

655 elems.add(Comment(Trivia("", cws, comment, trail)))

656

657 if self._current == "}":

658 # consume closing bracket, EOF here doesn't matter

659 self.inc()

660 break

661

662 if expect_key:

663 if self._current == ",":

664 raise self.parse_error(UnexpectedCharError, self._current)

665 key, val = self._parse_key_value(False)

666 elems.add(key, val)

667 expect_key = False

668 continue

669

670 if self._current != ",":

671 raise self.parse_error(UnexpectedCharError, self._current)

672

673 elems.add(Whitespace(","))

674 # consume comma, EOF here is an issue (middle of inline table)

675 self.inc(exception=UnexpectedEofError)

676 expect_key = True

677

678 return InlineTable(elems, Trivia())

679

680 def _parse_number(self, raw: str, trivia: Trivia) -> Item | None:

681 # Leading zeros are not allowed

682 sign = ""

683 if raw.startswith(("+", "-")):

684 sign = raw[0]

685 raw = raw[1:]

686

687 if len(raw) > 1 and (

688 (raw.startswith("0") and not raw.startswith(("0.", "0o", "0x", "0b", "0e")))

689 or (sign and raw.startswith("."))

690 ):

691 return None

692

693 if raw.startswith(("0o", "0x", "0b")) and sign:

694 return None

695

696 digits = "[0-9]"

697 base = 10

698 if raw.startswith("0b"):

699 digits = "[01]"

700 base = 2

701 elif raw.startswith("0o"):

702 digits = "[0-7]"

703 base = 8

704 elif raw.startswith("0x"):

705 digits = "[0-9a-f]"

706 base = 16

707

708 # Underscores should be surrounded by digits

709 clean = re.sub(f"(?i)(?<={digits})_(?={digits})", "", raw).lower()

710

711 if "_" in clean:

712 return None

713

714 if clean.endswith(".") or (

715 not clean.startswith("0x") and clean.split("e", 1)[0].endswith(".")

716 ):

717 return None

718

719 try:

720 return Integer(int(sign + clean, base), trivia, sign + raw)

721 except ValueError:

722 try:

723 return Float(float(sign + clean), trivia, sign + raw)

724 except ValueError:

725 return None

726

727 def _parse_literal_string(self) -> String:

728 with self._state:

729 return self._parse_string(StringType.SLL)

730

731 def _parse_basic_string(self) -> String:

732 with self._state:

733 return self._parse_string(StringType.SLB)

734

735 def _parse_escaped_char(self, multiline):

736 if multiline and self._current.is_ws():

737 # When the last non-whitespace character on a line is

738 # a \, it will be trimmed along with all whitespace

739 # (including newlines) up to the next non-whitespace

740 # character or closing delimiter.

741 # """\

742 # hello \

743 # world"""

744 tmp = ""

745 while self._current.is_ws():

746 tmp += self._current

747 # consume the whitespace, EOF here is an issue

748 # (middle of string)

749 self.inc(exception=UnexpectedEofError)

750 continue

751

752 # the escape followed by whitespace must have a newline

753 # before any other chars

754 if "\n" not in tmp:

755 raise self.parse_error(InvalidCharInStringError, self._current)

756

757 return ""

758

759 if self._current in _escaped:

760 c = _escaped[self._current]

761

762 # consume this char, EOF here is an issue (middle of string)

763 self.inc(exception=UnexpectedEofError)

764

765 return c

766

767 if self._current in {"u", "U"}:

768 # this needs to be a unicode

769 u, ue = self._peek_unicode(self._current == "U")

770 if u is not None:

771 # consume the U char and the unicode value

772 self.inc_n(len(ue) + 1)

773

774 return u

775

776 raise self.parse_error(InvalidUnicodeValueError)

777

778 if self._current == "x":

779 h, he = self._peek_hex()

780 if h is not None:

781 # consume the x char and the hex value

782 self.inc_n(len(he) + 1)

783 return h

784

785 raise self.parse_error(InvalidUnicodeValueError)

786

787 raise self.parse_error(InvalidCharInStringError, self._current)

788

789 def _parse_string(self, delim: StringType) -> String:

790 # only keep parsing for string if the current character matches the delim

791 if self._current != delim.unit:

792 raise self.parse_error(

793 InternalParserError,

794 f"Invalid character for string type {delim}",

795 )

796

797 # consume the opening/first delim, EOF here is an issue

798 # (middle of string or middle of delim)

799 self.inc(exception=UnexpectedEofError)

800

801 if self._current == delim.unit:

802 # consume the closing/second delim, we do not care if EOF occurs as

803 # that would simply imply an empty single line string

804 if not self.inc() or self._current != delim.unit:

805 # Empty string

806 return String(delim, "", "", Trivia())

807

808 # consume the third delim, EOF here is an issue (middle of string)

809 self.inc(exception=UnexpectedEofError)

810

811 delim = delim.toggle() # convert delim to multi delim

812

813 self.mark() # to extract the original string with whitespace and all

814 value = ""

815

816 # A newline immediately following the opening delimiter will be trimmed.

817 if delim.is_multiline():

818 if self._current == "\n":

819 # consume the newline, EOF here is an issue (middle of string)

820 self.inc(exception=UnexpectedEofError)

821 else:

822 cur = self._current

823 with self._state(restore=True):

824 if self.inc():

825 cur += self._current

826 if cur == "\r\n":

827 self.inc_n(2, exception=UnexpectedEofError)

828

829 escaped = False # whether the previous key was ESCAPE

830 while True:

831 code = ord(self._current)

832 if (

833 delim.is_singleline()

834 and not escaped

835 and (code == CHR_DEL or (code <= CTRL_CHAR_LIMIT and code != CTRL_I))

836 ) or (

837 delim.is_multiline()

838 and not escaped

839 and (

840 code == CHR_DEL

841 or (

842 code <= CTRL_CHAR_LIMIT and code not in [CTRL_I, CTRL_J, CTRL_M]

843 )

844 )

845 ):

846 raise self.parse_error(InvalidControlChar, code, "strings")

847 elif delim.is_multiline() and not escaped and self._current == "\r":

848 with self._state(restore=True):

849 if not self.inc() or self._current != "\n":

850 raise self.parse_error(InvalidControlChar, CTRL_M, "strings")

851 elif not escaped and self._current == delim.unit:

852 # try to process current as a closing delim

853 original = self.extract()

854

855 close = ""

856 if delim.is_multiline():

857 # Consume the delimiters to see if we are at the end of the string

858 close = ""

859 while self._current == delim.unit:

860 close += self._current

861 self.inc()

862

863 if len(close) < 3:

864 # Not a triple quote, leave in result as-is.

865 # Adding back the characters we already consumed

866 value += close

867 continue

868

869 if len(close) == 3:

870 # We are at the end of the string

871 return String(delim, value, original, Trivia())

872

873 if len(close) >= 6:

874 raise self.parse_error(InvalidCharInStringError, self._current)

875

876 value += close[:-3]

877 original += close[:-3]

878

879 return String(delim, value, original, Trivia())

880 else:

881 # consume the closing delim, we do not care if EOF occurs as

882 # that would simply imply the end of self._src

883 self.inc()

884

885 return String(delim, value, original, Trivia())

886 elif delim.is_basic() and escaped:

887 # attempt to parse the current char as an escaped value, an exception

888 # is raised if this fails

889 value += self._parse_escaped_char(delim.is_multiline())

890

891 # no longer escaped

892 escaped = False

893 elif delim.is_basic() and self._current == "\\":

894 # the next char is being escaped

895 escaped = True

896

897 # consume this char, EOF here is an issue (middle of string)

898 self.inc(exception=UnexpectedEofError)

899 else:

900 # this is either a literal string where we keep everything as is,

901 # or this is not a special escaped char in a basic string

902 value += self._current

903

904 # consume this char, EOF here is an issue (middle of string)

905 self.inc(exception=UnexpectedEofError)

906

907 def _parse_table(

908 self, parent_name: Key | None = None, parent: Table | None = None

909 ) -> tuple[Key, Table | AoT]:

910 """

911 Parses a table element.

912 """

913 if self._current != "[":

914 raise self.parse_error(

915 InternalParserError, "_parse_table() called on non-bracket character."

916 )

917

918 indent = self.extract()

919 self.inc() # Skip opening bracket

920

921 if self.end():

922 raise self.parse_error(UnexpectedEofError)

923

924 is_aot = False

925 if self._current == "[":

926 if not self.inc():

927 raise self.parse_error(UnexpectedEofError)

928

929 is_aot = True

930 try:

931 key = self._parse_key()

932 except EmptyKeyError:

933 raise self.parse_error(EmptyTableNameError) from None

934 if self.end():

935 raise self.parse_error(UnexpectedEofError)

936 elif self._current != "]":

937 raise self.parse_error(UnexpectedCharError, self._current)

938

939 key.sep = ""

940 full_key = key

941 name_parts = tuple(key)

942 if any(" " in part.key.strip() and part.is_bare() for part in name_parts):

943 raise self.parse_error(

944 ParseError, f'Invalid table name "{full_key.as_string()}"'

945 )

946

947 missing_table = False

948 if parent_name:

949 parent_name_parts = tuple(parent_name)

950 else:

951 parent_name_parts = ()

952

953 if len(name_parts) > len(parent_name_parts) + 1:

954 missing_table = True

955

956 name_parts = name_parts[len(parent_name_parts) :]

957

958 values = Container(True)

959

960 self.inc() # Skip closing bracket

961 if is_aot:

962 # TODO: Verify close bracket

963 self.inc()

964

965 cws, comment, trail = self._parse_comment_trail()

966

967 result = Null()

968 table = Table(

969 values,

970 Trivia(indent, cws, comment, trail),

971 is_aot,

972 name=name_parts[0].key if name_parts else key.key,

973 display_name=full_key.as_string(),

974 is_super_table=False,

975 )

976

977 if len(name_parts) > 1:

978 if missing_table:

979 # Missing super table

980 # i.e. a table initialized like this: [foo.bar]

981 # without initializing [foo]

982 #

983 # So we have to create the parent tables

984 table = Table(

985 Container(True),

986 Trivia("", cws, comment, trail),

987 is_aot and name_parts[0] in self._aot_stack,

988 is_super_table=True,

989 name=name_parts[0].key,

990 )

991

992 result = table

993 key = name_parts[0]

994

995 for i, _name in enumerate(name_parts[1:]):

996 child = table.get(

997 _name,

998 Table(

999 Container(True),

1000 Trivia(indent, cws, comment, trail),

1001 is_aot and i == len(name_parts) - 2,

1002 is_super_table=i < len(name_parts) - 2,

1003 name=_name.key,

1004 display_name=(

1005 full_key.as_string() if i == len(name_parts) - 2 else None

1006 ),

1007 ),

1008 )

1009

1010 if is_aot and i == len(name_parts) - 2:

1011 table.raw_append(_name, AoT([child], name=table.name, parsed=True))

1012 else:

1013 table.raw_append(_name, child)

1014

1015 table = child

1016 values = table.value

1017 else:

1018 if name_parts:

1019 key = name_parts[0]

1020

1021 while not self.end():

1022 item = self._parse_item()

1023 if item:

1024 _key, item = item

1025 if not self._merge_ws(item, values):

1026 table.raw_append(_key, item)

1027 else:

1028 if self._current == "[":

1029 _, key_next = self._peek_table()

1030

1031 if self._is_child(full_key, key_next):

1032 key_next, table_next = self._parse_table(full_key, table)

1033

1034 table.raw_append(key_next, table_next)

1035

1036 # Picking up any sibling

1037 while not self.end():

1038 _, key_next = self._peek_table()

1039

1040 if not self._is_child(full_key, key_next):

1041 break

1042

1043 key_next, table_next = self._parse_table(full_key, table)

1044

1045 table.raw_append(key_next, table_next)

1046

1047 break

1048 else:

1049 raise self.parse_error(

1050 InternalParserError,

1051 "_parse_item() returned None on a non-bracket character.",

1052 )

1053 table.value._validate_out_of_order_table()

1054 if isinstance(result, Null):

1055 result = table

1056

1057 if is_aot and (not self._aot_stack or full_key != self._aot_stack[-1]):

1058 result = self._parse_aot(result, full_key)

1059

1060 return key, result

1061

1062 def _peek_table(self) -> tuple[bool, Key]:

1063 """

1064 Peeks ahead non-intrusively by cloning then restoring the

1065 initial state of the parser.

1066

1067 Returns the name of the table about to be parsed,

1068 as well as whether it is part of an AoT.

1069 """

1070 # we always want to restore after exiting this scope

1071 with self._state(save_marker=True, restore=True):

1072 if self._current != "[":

1073 raise self.parse_error(

1074 InternalParserError,

1075 "_peek_table() entered on non-bracket character",

1076 )

1077

1078 # AoT

1079 self.inc()

1080 is_aot = False

1081 if self._current == "[":

1082 self.inc()

1083 is_aot = True

1084 try:

1085 return is_aot, self._parse_key()

1086 except EmptyKeyError:

1087 raise self.parse_error(EmptyTableNameError) from None

1088

1089 def _parse_aot(self, first: Table, name_first: Key) -> AoT:

1090 """

1091 Parses all siblings of the provided table first and bundles them into

1092 an AoT.

1093 """

1094 payload = [first]

1095 self._aot_stack.append(name_first)

1096 while not self.end():

1097 is_aot_next, name_next = self._peek_table()

1098 if is_aot_next and name_next == name_first:

1099 _, table = self._parse_table(name_first)

1100 payload.append(table)

1101 else:

1102 break

1103

1104 self._aot_stack.pop()

1105

1106 return AoT(payload, parsed=True)

1107

1108 def _peek(self, n: int) -> str:

1109 """

1110 Peeks ahead n characters.

1111

1112 n is the max number of characters that will be peeked.

1113 """

1114 # we always want to restore after exiting this scope

1115 with self._state(restore=True):

1116 buf = ""

1117 for _ in range(n):

1118 if self._current not in " \t\n\r#,]}" + self._src.EOF:

1119 buf += self._current

1120 self.inc()

1121 continue

1122

1123 break

1124 return buf

1125

1126 def _peek_unicode(self, is_long: bool) -> tuple[str | None, str | None]:

1127 """

1128 Peeks ahead non-intrusively by cloning then restoring the

1129 initial state of the parser.

1130

1131 Returns the unicode value is it's a valid one else None.

1132 """

1133 # we always want to restore after exiting this scope

1134 with self._state(save_marker=True, restore=True):

1135 if self._current not in {"u", "U"}:

1136 raise self.parse_error(

1137 InternalParserError, "_peek_unicode() entered on non-unicode value"

1138 )

1139

1140 self.inc() # Dropping prefix

1141 self.mark()

1142

1143 if is_long:

1144 chars = 8

1145 else:

1146 chars = 4

1147

1148 if not self.inc_n(chars):

1149 value, extracted = None, None

1150 else:

1151 extracted = self.extract()

1152

1153 if extracted[0].lower() == "d" and extracted[1].strip("01234567"):

1154 return None, None

1155

1156 try:

1157 value = chr(int(extracted, 16))

1158 except (ValueError, OverflowError):

1159 value = None

1160

1161 return value, extracted

1162

1163 def _peek_hex(self) -> tuple[str | None, str | None]:

1164 with self._state(save_marker=True, restore=True):

1165 if self._current != "x":

1166 raise self.parse_error(

1167 InternalParserError, "_peek_hex() entered on non-hex value"

1168 )

1169

1170 self.inc() # Dropping prefix

1171 self.mark()

1172

1173 if not self.inc_n(2):

1174 return None, None

1175

1176 extracted = self.extract()

1177 if extracted.strip("0123456789abcdefABCDEF"):

1178 return None, None

1179

1180 try:

1181 value = chr(int(extracted, 16))

1182 except (ValueError, OverflowError):

1183 value = None

1184

1185 return value, extracted