Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tomlkit/parser.py: 98%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import datetime
4import re
5import string
7from tomlkit._compat import decode
8from tomlkit._utils import RFC_3339_LOOSE
9from tomlkit._utils import _escaped
10from tomlkit._utils import parse_rfc3339
11from tomlkit.container import Container
12from tomlkit.exceptions import EmptyKeyError
13from tomlkit.exceptions import EmptyTableNameError
14from tomlkit.exceptions import InternalParserError
15from tomlkit.exceptions import InvalidCharInStringError
16from tomlkit.exceptions import InvalidControlChar
17from tomlkit.exceptions import InvalidDateError
18from tomlkit.exceptions import InvalidDateTimeError
19from tomlkit.exceptions import InvalidNumberError
20from tomlkit.exceptions import InvalidTimeError
21from tomlkit.exceptions import InvalidUnicodeValueError
22from tomlkit.exceptions import ParseError
23from tomlkit.exceptions import UnexpectedCharError
24from tomlkit.exceptions import UnexpectedEofError
25from tomlkit.items import AoT
26from tomlkit.items import Array
27from tomlkit.items import Bool
28from tomlkit.items import BoolType
29from tomlkit.items import Comment
30from tomlkit.items import Date
31from tomlkit.items import DateTime
32from tomlkit.items import Float
33from tomlkit.items import InlineTable
34from tomlkit.items import Integer
35from tomlkit.items import Item
36from tomlkit.items import Key
37from tomlkit.items import KeyType
38from tomlkit.items import Null
39from tomlkit.items import SingleKey
40from tomlkit.items import String
41from tomlkit.items import StringType
42from tomlkit.items import Table
43from tomlkit.items import Time
44from tomlkit.items import Trivia
45from tomlkit.items import Whitespace
46from tomlkit.source import Source
47from tomlkit.toml_char import TOMLChar
48from tomlkit.toml_document import TOMLDocument
51CTRL_I = 0x09 # Tab
52CTRL_J = 0x0A # Line feed
53CTRL_M = 0x0D # Carriage return
54CTRL_CHAR_LIMIT = 0x1F
55CHR_DEL = 0x7F
58class Parser:
59 """
60 Parser for TOML documents.
61 """
63 def __init__(self, string: str | bytes) -> None:
64 # Input to parse
65 self._src = Source(decode(string))
67 self._aot_stack: list[Key] = []
69 @property
70 def _state(self):
71 return self._src.state
73 @property
74 def _idx(self):
75 return self._src.idx
77 @property
78 def _current(self):
79 return self._src.current
81 @property
82 def _marker(self):
83 return self._src.marker
85 def extract(self) -> str:
86 """
87 Extracts the value between marker and index
88 """
89 return self._src.extract()
91 def inc(self, exception: type[ParseError] | None = None) -> bool:
92 """
93 Increments the parser if the end of the input has not been reached.
94 Returns whether or not it was able to advance.
95 """
96 return self._src.inc(exception=exception)
98 def inc_n(self, n: int, exception: type[ParseError] | None = None) -> bool:
99 """
100 Increments the parser by n characters
101 if the end of the input has not been reached.
102 """
103 return self._src.inc_n(n=n, exception=exception)
105 def consume(self, chars, min=0, max=-1):
106 """
107 Consume chars until min/max is satisfied is valid.
108 """
109 return self._src.consume(chars=chars, min=min, max=max)
111 def end(self) -> bool:
112 """
113 Returns True if the parser has reached the end of the input.
114 """
115 return self._src.end()
117 def mark(self) -> None:
118 """
119 Sets the marker to the index's current position
120 """
121 self._src.mark()
123 def parse_error(self, exception=ParseError, *args, **kwargs):
124 """
125 Creates a generic "parse error" at the current position.
126 """
127 return self._src.parse_error(exception, *args, **kwargs)
129 def parse(self) -> TOMLDocument:
130 body = TOMLDocument(True)
132 # Take all keyvals outside of tables/AoT's.
133 while not self.end():
134 # Break out if a table is found
135 if self._current == "[":
136 break
138 # Otherwise, take and append one KV
139 item = self._parse_item()
140 if not item:
141 break
143 key, value = item
144 if (key is not None and key.is_multi()) or not self._merge_ws(value, body):
145 # We actually have a table
146 try:
147 body.append(key, value)
148 except Exception as e:
149 raise self.parse_error(ParseError, str(e)) from e
151 self.mark()
153 while not self.end():
154 key, value = self._parse_table()
155 if isinstance(value, Table) and value.is_aot_element():
156 # This is just the first table in an AoT. Parse the rest of the array
157 # along with it.
158 value = self._parse_aot(value, key)
160 try:
161 body.append(key, value)
162 except Exception as e:
163 raise self.parse_error(ParseError, str(e)) from e
165 body.parsing(False)
167 return body
169 def _merge_ws(self, item: Item, container: Container) -> bool:
170 """
171 Merges the given Item with the last one currently in the given Container if
172 both are whitespace items.
174 Returns True if the items were merged.
175 """
176 last = container.last_item()
177 if not last:
178 return False
180 if not isinstance(item, Whitespace) or not isinstance(last, Whitespace):
181 return False
183 start = self._idx - (len(last.s) + len(item.s))
184 container.body[-1] = (
185 container.body[-1][0],
186 Whitespace(self._src[start : self._idx]),
187 )
189 return True
191 def _is_child(self, parent: Key, child: Key) -> bool:
192 """
193 Returns whether a key is strictly a child of another key.
194 AoT siblings are not considered children of one another.
195 """
196 parent_parts = tuple(parent)
197 child_parts = tuple(child)
199 if parent_parts == child_parts:
200 return False
202 return parent_parts == child_parts[: len(parent_parts)]
204 def _parse_item(self) -> tuple[Key | None, Item] | None:
205 """
206 Attempts to parse the next item and returns it, along with its key
207 if the item is value-like.
208 """
209 self.mark()
210 with self._state as state:
211 while True:
212 c = self._current
213 if c == "\n":
214 # Found a newline; Return all whitespace found up to this point.
215 self.inc()
217 return None, Whitespace(self.extract())
218 elif c in " \t\r":
219 # Skip whitespace.
220 if not self.inc():
221 return None, Whitespace(self.extract())
222 elif c == "#":
223 # Found a comment, parse it
224 indent = self.extract()
225 cws, comment, trail = self._parse_comment_trail()
227 return None, Comment(Trivia(indent, cws, comment, trail))
228 elif c == "[":
229 # Found a table, delegate to the calling function.
230 return
231 else:
232 # Beginning of a KV pair.
233 # Return to beginning of whitespace so it gets included
234 # as indentation for the KV about to be parsed.
235 state.restore = True
236 break
238 return self._parse_key_value(True)
240 def _parse_comment_trail(self, parse_trail: bool = True) -> tuple[str, str, str]:
241 """
242 Returns (comment_ws, comment, trail)
243 If there is no comment, comment_ws and comment will
244 simply be empty.
245 """
246 if self.end():
247 return "", "", ""
249 comment = ""
250 comment_ws = ""
251 self.mark()
253 while True:
254 c = self._current
256 if c == "\n":
257 break
258 elif c == "#":
259 comment_ws = self.extract()
261 self.mark()
262 self.inc() # Skip #
264 # The comment itself
265 while not self.end() and not self._current.is_nl():
266 code = ord(self._current)
267 if code == CHR_DEL or code <= CTRL_CHAR_LIMIT and code != CTRL_I:
268 raise self.parse_error(InvalidControlChar, code, "comments")
270 if not self.inc():
271 break
273 comment = self.extract()
274 self.mark()
276 break
277 elif c in " \t\r":
278 self.inc()
279 else:
280 raise self.parse_error(UnexpectedCharError, c)
282 if self.end():
283 break
285 trail = ""
286 if parse_trail:
287 while self._current.is_spaces() and self.inc():
288 pass
290 if self._current == "\r":
291 self.inc()
293 if self._current == "\n":
294 self.inc()
296 if self._idx != self._marker or self._current.is_ws():
297 trail = self.extract()
299 return comment_ws, comment, trail
301 def _parse_key_value(self, parse_comment: bool = False) -> tuple[Key, Item]:
302 # Leading indent
303 self.mark()
305 while self._current.is_spaces() and self.inc():
306 pass
308 indent = self.extract()
310 # Key
311 key = self._parse_key()
313 self.mark()
315 found_equals = self._current == "="
316 while self._current.is_kv_sep() and self.inc():
317 if self._current == "=":
318 if found_equals:
319 raise self.parse_error(UnexpectedCharError, "=")
320 else:
321 found_equals = True
322 if not found_equals:
323 raise self.parse_error(UnexpectedCharError, self._current)
325 if not key.sep:
326 key.sep = self.extract()
327 else:
328 key.sep += self.extract()
330 # Value
331 val = self._parse_value()
332 # Comment
333 if parse_comment:
334 cws, comment, trail = self._parse_comment_trail()
335 meta = val.trivia
336 if not meta.comment_ws:
337 meta.comment_ws = cws
339 meta.comment = comment
340 meta.trail = trail
341 else:
342 val.trivia.trail = ""
344 val.trivia.indent = indent
346 return key, val
348 def _parse_key(self) -> Key:
349 """
350 Parses a Key at the current position;
351 WS before the key must be exhausted first at the callsite.
352 """
353 self.mark()
354 while self._current.is_spaces() and self.inc():
355 # Skip any leading whitespace
356 pass
357 if self._current in "\"'":
358 return self._parse_quoted_key()
359 else:
360 return self._parse_bare_key()
362 def _parse_quoted_key(self) -> Key:
363 """
364 Parses a key enclosed in either single or double quotes.
365 """
366 # Extract the leading whitespace
367 original = self.extract()
368 quote_style = self._current
369 key_type = next((t for t in KeyType if t.value == quote_style), None)
371 if key_type is None:
372 raise RuntimeError("Should not have entered _parse_quoted_key()")
374 key_str = self._parse_string(
375 StringType.SLB if key_type == KeyType.Basic else StringType.SLL
376 )
377 if key_str._t.is_multiline():
378 raise self.parse_error(UnexpectedCharError, key_str._t.value)
379 original += key_str.as_string()
380 self.mark()
381 while self._current.is_spaces() and self.inc():
382 pass
383 original += self.extract()
384 key = SingleKey(str(key_str), t=key_type, sep="", original=original)
385 if self._current == ".":
386 self.inc()
387 key = key.concat(self._parse_key())
389 return key
391 def _parse_bare_key(self) -> Key:
392 """
393 Parses a bare key.
394 """
395 while (
396 self._current.is_bare_key_char() or self._current.is_spaces()
397 ) and self.inc():
398 pass
400 original = self.extract()
401 key = original.strip()
402 if not key:
403 # Empty key
404 raise self.parse_error(EmptyKeyError)
406 if " " in key:
407 # Bare key with spaces in it
408 raise self.parse_error(ParseError, f'Invalid key "{key}"')
410 key = SingleKey(key, KeyType.Bare, "", original)
412 if self._current == ".":
413 self.inc()
414 key = key.concat(self._parse_key())
416 return key
418 def _parse_value(self) -> Item:
419 """
420 Attempts to parse a value at the current position.
421 """
422 self.mark()
423 c = self._current
424 trivia = Trivia()
426 if c == StringType.SLB.value:
427 return self._parse_basic_string()
428 elif c == StringType.SLL.value:
429 return self._parse_literal_string()
430 elif c == BoolType.TRUE.value[0]:
431 return self._parse_true()
432 elif c == BoolType.FALSE.value[0]:
433 return self._parse_false()
434 elif c == "[":
435 return self._parse_array()
436 elif c == "{":
437 return self._parse_inline_table()
438 elif c in "+-" or self._peek(4) in {
439 "+inf",
440 "-inf",
441 "inf",
442 "+nan",
443 "-nan",
444 "nan",
445 }:
446 # Number
447 while self._current not in " \t\n\r#,]}" and self.inc():
448 pass
450 raw = self.extract()
452 item = self._parse_number(raw, trivia)
453 if item is not None:
454 return item
456 raise self.parse_error(InvalidNumberError)
457 elif c in string.digits:
458 # Integer, Float, Date, Time or DateTime
459 while self._current not in " \t\n\r#,]}" and self.inc():
460 pass
462 raw = self.extract()
464 m = RFC_3339_LOOSE.match(raw)
465 if m:
466 if m.group(1) and m.group(5):
467 # datetime
468 try:
469 dt = parse_rfc3339(raw)
470 assert isinstance(dt, datetime.datetime)
471 return DateTime(
472 dt.year,
473 dt.month,
474 dt.day,
475 dt.hour,
476 dt.minute,
477 dt.second,
478 dt.microsecond,
479 dt.tzinfo,
480 trivia,
481 raw,
482 )
483 except ValueError:
484 raise self.parse_error(InvalidDateTimeError) from None
486 if m.group(1):
487 try:
488 dt = parse_rfc3339(raw)
489 assert isinstance(dt, datetime.date)
490 date = Date(dt.year, dt.month, dt.day, trivia, raw)
491 self.mark()
492 while self._current not in "\t\n\r#,]}" and self.inc():
493 pass
495 time_raw = self.extract()
496 time_part = time_raw.rstrip()
497 trivia.comment_ws = time_raw[len(time_part) :]
498 if not time_part:
499 return date
501 dt = parse_rfc3339(raw + time_part)
502 assert isinstance(dt, datetime.datetime)
503 return DateTime(
504 dt.year,
505 dt.month,
506 dt.day,
507 dt.hour,
508 dt.minute,
509 dt.second,
510 dt.microsecond,
511 dt.tzinfo,
512 trivia,
513 raw + time_part,
514 )
515 except ValueError:
516 raise self.parse_error(InvalidDateError) from None
518 if m.group(5):
519 try:
520 t = parse_rfc3339(raw)
521 assert isinstance(t, datetime.time)
522 return Time(
523 t.hour,
524 t.minute,
525 t.second,
526 t.microsecond,
527 t.tzinfo,
528 trivia,
529 raw,
530 )
531 except ValueError:
532 raise self.parse_error(InvalidTimeError) from None
534 item = self._parse_number(raw, trivia)
535 if item is not None:
536 return item
538 raise self.parse_error(InvalidNumberError)
539 else:
540 raise self.parse_error(UnexpectedCharError, c)
542 def _parse_true(self):
543 return self._parse_bool(BoolType.TRUE)
545 def _parse_false(self):
546 return self._parse_bool(BoolType.FALSE)
548 def _parse_bool(self, style: BoolType) -> Bool:
549 with self._state:
550 style = BoolType(style)
552 # only keep parsing for bool if the characters match the style
553 # try consuming rest of chars in style
554 for c in style:
555 self.consume(c, min=1, max=1)
557 return Bool(style, Trivia())
559 def _parse_array(self) -> Array:
560 # Consume opening bracket, EOF here is an issue (middle of array)
561 self.inc(exception=UnexpectedEofError)
563 elems: list[Item] = []
564 prev_value = None
565 while True:
566 # consume whitespace
567 mark = self._idx
568 self.consume(TOMLChar.SPACES + TOMLChar.NL)
569 indent = self._src[mark : self._idx]
570 newline = set(TOMLChar.NL) & set(indent)
571 if newline:
572 elems.append(Whitespace(indent))
573 continue
575 # consume comment
576 if self._current == "#":
577 cws, comment, trail = self._parse_comment_trail(parse_trail=False)
578 elems.append(Comment(Trivia(indent, cws, comment, trail)))
579 continue
581 # consume indent
582 if indent:
583 elems.append(Whitespace(indent))
584 continue
586 # consume value
587 if not prev_value:
588 try:
589 elems.append(self._parse_value())
590 prev_value = True
591 continue
592 except UnexpectedCharError:
593 pass
595 # consume comma
596 if prev_value and self._current == ",":
597 self.inc(exception=UnexpectedEofError)
598 elems.append(Whitespace(","))
599 prev_value = False
600 continue
602 # consume closing bracket
603 if self._current == "]":
604 # consume closing bracket, EOF here doesn't matter
605 self.inc()
606 break
608 raise self.parse_error(UnexpectedCharError, self._current)
610 try:
611 res = Array(elems, Trivia())
612 except ValueError:
613 pass
614 else:
615 return res
617 def _parse_inline_table(self) -> InlineTable:
618 # consume opening bracket, EOF here is an issue (middle of array)
619 self.inc(exception=UnexpectedEofError)
621 elems = Container(True)
622 trailing_comma = None
623 while True:
624 # consume leading whitespace
625 mark = self._idx
626 self.consume(TOMLChar.SPACES)
627 raw = self._src[mark : self._idx]
628 if raw:
629 elems.add(Whitespace(raw))
631 if not trailing_comma:
632 # None: empty inline table
633 # False: previous key-value pair was not followed by a comma
634 if self._current == "}":
635 # consume closing bracket, EOF here doesn't matter
636 self.inc()
637 break
639 if (
640 trailing_comma is False
641 or trailing_comma is None
642 and self._current == ","
643 ):
644 # Either the previous key-value pair was not followed by a comma
645 # or the table has an unexpected leading comma.
646 raise self.parse_error(UnexpectedCharError, self._current)
647 else:
648 # True: previous key-value pair was followed by a comma
649 if self._current == "}" or self._current == ",":
650 raise self.parse_error(UnexpectedCharError, self._current)
652 key, val = self._parse_key_value(False)
653 elems.add(key, val)
655 # consume trailing whitespace
656 mark = self._idx
657 self.consume(TOMLChar.SPACES)
658 raw = self._src[mark : self._idx]
659 if raw:
660 elems.add(Whitespace(raw))
662 # consume trailing comma
663 trailing_comma = self._current == ","
664 if trailing_comma:
665 # consume closing bracket, EOF here is an issue (middle of inline table)
666 self.inc(exception=UnexpectedEofError)
668 return InlineTable(elems, Trivia())
670 def _parse_number(self, raw: str, trivia: Trivia) -> Item | None:
671 # Leading zeros are not allowed
672 sign = ""
673 if raw.startswith(("+", "-")):
674 sign = raw[0]
675 raw = raw[1:]
677 if len(raw) > 1 and (
678 raw.startswith("0")
679 and not raw.startswith(("0.", "0o", "0x", "0b", "0e"))
680 or sign
681 and raw.startswith(".")
682 ):
683 return None
685 if raw.startswith(("0o", "0x", "0b")) and sign:
686 return None
688 digits = "[0-9]"
689 base = 10
690 if raw.startswith("0b"):
691 digits = "[01]"
692 base = 2
693 elif raw.startswith("0o"):
694 digits = "[0-7]"
695 base = 8
696 elif raw.startswith("0x"):
697 digits = "[0-9a-f]"
698 base = 16
700 # Underscores should be surrounded by digits
701 clean = re.sub(f"(?i)(?<={digits})_(?={digits})", "", raw).lower()
703 if "_" in clean:
704 return None
706 if (
707 clean.endswith(".")
708 or not clean.startswith("0x")
709 and clean.split("e", 1)[0].endswith(".")
710 ):
711 return None
713 try:
714 return Integer(int(sign + clean, base), trivia, sign + raw)
715 except ValueError:
716 try:
717 return Float(float(sign + clean), trivia, sign + raw)
718 except ValueError:
719 return None
721 def _parse_literal_string(self) -> String:
722 with self._state:
723 return self._parse_string(StringType.SLL)
725 def _parse_basic_string(self) -> String:
726 with self._state:
727 return self._parse_string(StringType.SLB)
729 def _parse_escaped_char(self, multiline):
730 if multiline and self._current.is_ws():
731 # When the last non-whitespace character on a line is
732 # a \, it will be trimmed along with all whitespace
733 # (including newlines) up to the next non-whitespace
734 # character or closing delimiter.
735 # """\
736 # hello \
737 # world"""
738 tmp = ""
739 while self._current.is_ws():
740 tmp += self._current
741 # consume the whitespace, EOF here is an issue
742 # (middle of string)
743 self.inc(exception=UnexpectedEofError)
744 continue
746 # the escape followed by whitespace must have a newline
747 # before any other chars
748 if "\n" not in tmp:
749 raise self.parse_error(InvalidCharInStringError, self._current)
751 return ""
753 if self._current in _escaped:
754 c = _escaped[self._current]
756 # consume this char, EOF here is an issue (middle of string)
757 self.inc(exception=UnexpectedEofError)
759 return c
761 if self._current in {"u", "U"}:
762 # this needs to be a unicode
763 u, ue = self._peek_unicode(self._current == "U")
764 if u is not None:
765 # consume the U char and the unicode value
766 self.inc_n(len(ue) + 1)
768 return u
770 raise self.parse_error(InvalidUnicodeValueError)
772 raise self.parse_error(InvalidCharInStringError, self._current)
774 def _parse_string(self, delim: StringType) -> String:
775 # only keep parsing for string if the current character matches the delim
776 if self._current != delim.unit:
777 raise self.parse_error(
778 InternalParserError,
779 f"Invalid character for string type {delim}",
780 )
782 # consume the opening/first delim, EOF here is an issue
783 # (middle of string or middle of delim)
784 self.inc(exception=UnexpectedEofError)
786 if self._current == delim.unit:
787 # consume the closing/second delim, we do not care if EOF occurs as
788 # that would simply imply an empty single line string
789 if not self.inc() or self._current != delim.unit:
790 # Empty string
791 return String(delim, "", "", Trivia())
793 # consume the third delim, EOF here is an issue (middle of string)
794 self.inc(exception=UnexpectedEofError)
796 delim = delim.toggle() # convert delim to multi delim
798 self.mark() # to extract the original string with whitespace and all
799 value = ""
801 # A newline immediately following the opening delimiter will be trimmed.
802 if delim.is_multiline():
803 if self._current == "\n":
804 # consume the newline, EOF here is an issue (middle of string)
805 self.inc(exception=UnexpectedEofError)
806 else:
807 cur = self._current
808 with self._state(restore=True):
809 if self.inc():
810 cur += self._current
811 if cur == "\r\n":
812 self.inc_n(2, exception=UnexpectedEofError)
814 escaped = False # whether the previous key was ESCAPE
815 while True:
816 code = ord(self._current)
817 if (
818 delim.is_singleline()
819 and not escaped
820 and (code == CHR_DEL or code <= CTRL_CHAR_LIMIT and code != CTRL_I)
821 ) or (
822 delim.is_multiline()
823 and not escaped
824 and (
825 code == CHR_DEL
826 or code <= CTRL_CHAR_LIMIT
827 and code not in [CTRL_I, CTRL_J, CTRL_M]
828 )
829 ):
830 raise self.parse_error(InvalidControlChar, code, "strings")
831 elif not escaped and self._current == delim.unit:
832 # try to process current as a closing delim
833 original = self.extract()
835 close = ""
836 if delim.is_multiline():
837 # Consume the delimiters to see if we are at the end of the string
838 close = ""
839 while self._current == delim.unit:
840 close += self._current
841 self.inc()
843 if len(close) < 3:
844 # Not a triple quote, leave in result as-is.
845 # Adding back the characters we already consumed
846 value += close
847 continue
849 if len(close) == 3:
850 # We are at the end of the string
851 return String(delim, value, original, Trivia())
853 if len(close) >= 6:
854 raise self.parse_error(InvalidCharInStringError, self._current)
856 value += close[:-3]
857 original += close[:-3]
859 return String(delim, value, original, Trivia())
860 else:
861 # consume the closing delim, we do not care if EOF occurs as
862 # that would simply imply the end of self._src
863 self.inc()
865 return String(delim, value, original, Trivia())
866 elif delim.is_basic() and escaped:
867 # attempt to parse the current char as an escaped value, an exception
868 # is raised if this fails
869 value += self._parse_escaped_char(delim.is_multiline())
871 # no longer escaped
872 escaped = False
873 elif delim.is_basic() and self._current == "\\":
874 # the next char is being escaped
875 escaped = True
877 # consume this char, EOF here is an issue (middle of string)
878 self.inc(exception=UnexpectedEofError)
879 else:
880 # this is either a literal string where we keep everything as is,
881 # or this is not a special escaped char in a basic string
882 value += self._current
884 # consume this char, EOF here is an issue (middle of string)
885 self.inc(exception=UnexpectedEofError)
887 def _parse_table(
888 self, parent_name: Key | None = None, parent: Table | None = None
889 ) -> tuple[Key, Table | AoT]:
890 """
891 Parses a table element.
892 """
893 if self._current != "[":
894 raise self.parse_error(
895 InternalParserError, "_parse_table() called on non-bracket character."
896 )
898 indent = self.extract()
899 self.inc() # Skip opening bracket
901 if self.end():
902 raise self.parse_error(UnexpectedEofError)
904 is_aot = False
905 if self._current == "[":
906 if not self.inc():
907 raise self.parse_error(UnexpectedEofError)
909 is_aot = True
910 try:
911 key = self._parse_key()
912 except EmptyKeyError:
913 raise self.parse_error(EmptyTableNameError) from None
914 if self.end():
915 raise self.parse_error(UnexpectedEofError)
916 elif self._current != "]":
917 raise self.parse_error(UnexpectedCharError, self._current)
919 key.sep = ""
920 full_key = key
921 name_parts = tuple(key)
922 if any(" " in part.key.strip() and part.is_bare() for part in name_parts):
923 raise self.parse_error(
924 ParseError, f'Invalid table name "{full_key.as_string()}"'
925 )
927 missing_table = False
928 if parent_name:
929 parent_name_parts = tuple(parent_name)
930 else:
931 parent_name_parts = ()
933 if len(name_parts) > len(parent_name_parts) + 1:
934 missing_table = True
936 name_parts = name_parts[len(parent_name_parts) :]
938 values = Container(True)
940 self.inc() # Skip closing bracket
941 if is_aot:
942 # TODO: Verify close bracket
943 self.inc()
945 cws, comment, trail = self._parse_comment_trail()
947 result = Null()
948 table = Table(
949 values,
950 Trivia(indent, cws, comment, trail),
951 is_aot,
952 name=name_parts[0].key if name_parts else key.key,
953 display_name=full_key.as_string(),
954 is_super_table=False,
955 )
957 if len(name_parts) > 1:
958 if missing_table:
959 # Missing super table
960 # i.e. a table initialized like this: [foo.bar]
961 # without initializing [foo]
962 #
963 # So we have to create the parent tables
964 table = Table(
965 Container(True),
966 Trivia("", cws, comment, trail),
967 is_aot and name_parts[0] in self._aot_stack,
968 is_super_table=True,
969 name=name_parts[0].key,
970 )
972 result = table
973 key = name_parts[0]
975 for i, _name in enumerate(name_parts[1:]):
976 child = table.get(
977 _name,
978 Table(
979 Container(True),
980 Trivia(indent, cws, comment, trail),
981 is_aot and i == len(name_parts) - 2,
982 is_super_table=i < len(name_parts) - 2,
983 name=_name.key,
984 display_name=(
985 full_key.as_string() if i == len(name_parts) - 2 else None
986 ),
987 ),
988 )
990 if is_aot and i == len(name_parts) - 2:
991 table.raw_append(_name, AoT([child], name=table.name, parsed=True))
992 else:
993 table.raw_append(_name, child)
995 table = child
996 values = table.value
997 else:
998 if name_parts:
999 key = name_parts[0]
1001 while not self.end():
1002 item = self._parse_item()
1003 if item:
1004 _key, item = item
1005 if not self._merge_ws(item, values):
1006 table.raw_append(_key, item)
1007 else:
1008 if self._current == "[":
1009 _, key_next = self._peek_table()
1011 if self._is_child(full_key, key_next):
1012 key_next, table_next = self._parse_table(full_key, table)
1014 table.raw_append(key_next, table_next)
1016 # Picking up any sibling
1017 while not self.end():
1018 _, key_next = self._peek_table()
1020 if not self._is_child(full_key, key_next):
1021 break
1023 key_next, table_next = self._parse_table(full_key, table)
1025 table.raw_append(key_next, table_next)
1027 break
1028 else:
1029 raise self.parse_error(
1030 InternalParserError,
1031 "_parse_item() returned None on a non-bracket character.",
1032 )
1033 table.value._validate_out_of_order_table()
1034 if isinstance(result, Null):
1035 result = table
1037 if is_aot and (not self._aot_stack or full_key != self._aot_stack[-1]):
1038 result = self._parse_aot(result, full_key)
1040 return key, result
1042 def _peek_table(self) -> tuple[bool, Key]:
1043 """
1044 Peeks ahead non-intrusively by cloning then restoring the
1045 initial state of the parser.
1047 Returns the name of the table about to be parsed,
1048 as well as whether it is part of an AoT.
1049 """
1050 # we always want to restore after exiting this scope
1051 with self._state(save_marker=True, restore=True):
1052 if self._current != "[":
1053 raise self.parse_error(
1054 InternalParserError,
1055 "_peek_table() entered on non-bracket character",
1056 )
1058 # AoT
1059 self.inc()
1060 is_aot = False
1061 if self._current == "[":
1062 self.inc()
1063 is_aot = True
1064 try:
1065 return is_aot, self._parse_key()
1066 except EmptyKeyError:
1067 raise self.parse_error(EmptyTableNameError) from None
1069 def _parse_aot(self, first: Table, name_first: Key) -> AoT:
1070 """
1071 Parses all siblings of the provided table first and bundles them into
1072 an AoT.
1073 """
1074 payload = [first]
1075 self._aot_stack.append(name_first)
1076 while not self.end():
1077 is_aot_next, name_next = self._peek_table()
1078 if is_aot_next and name_next == name_first:
1079 _, table = self._parse_table(name_first)
1080 payload.append(table)
1081 else:
1082 break
1084 self._aot_stack.pop()
1086 return AoT(payload, parsed=True)
1088 def _peek(self, n: int) -> str:
1089 """
1090 Peeks ahead n characters.
1092 n is the max number of characters that will be peeked.
1093 """
1094 # we always want to restore after exiting this scope
1095 with self._state(restore=True):
1096 buf = ""
1097 for _ in range(n):
1098 if self._current not in " \t\n\r#,]}" + self._src.EOF:
1099 buf += self._current
1100 self.inc()
1101 continue
1103 break
1104 return buf
1106 def _peek_unicode(self, is_long: bool) -> tuple[str | None, str | None]:
1107 """
1108 Peeks ahead non-intrusively by cloning then restoring the
1109 initial state of the parser.
1111 Returns the unicode value is it's a valid one else None.
1112 """
1113 # we always want to restore after exiting this scope
1114 with self._state(save_marker=True, restore=True):
1115 if self._current not in {"u", "U"}:
1116 raise self.parse_error(
1117 InternalParserError, "_peek_unicode() entered on non-unicode value"
1118 )
1120 self.inc() # Dropping prefix
1121 self.mark()
1123 if is_long:
1124 chars = 8
1125 else:
1126 chars = 4
1128 if not self.inc_n(chars):
1129 value, extracted = None, None
1130 else:
1131 extracted = self.extract()
1133 if extracted[0].lower() == "d" and extracted[1].strip("01234567"):
1134 return None, None
1136 try:
1137 value = chr(int(extracted, 16))
1138 except (ValueError, OverflowError):
1139 value = None
1141 return value, extracted