Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/tomlkit/parser.py: 98%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import datetime
4import re
5import string
7from tomlkit._compat import decode
8from tomlkit._utils import RFC_3339_LOOSE
9from tomlkit._utils import _escaped
10from tomlkit._utils import parse_rfc3339
11from tomlkit.container import Container
12from tomlkit.exceptions import EmptyKeyError
13from tomlkit.exceptions import EmptyTableNameError
14from tomlkit.exceptions import InternalParserError
15from tomlkit.exceptions import InvalidCharInStringError
16from tomlkit.exceptions import InvalidControlChar
17from tomlkit.exceptions import InvalidDateError
18from tomlkit.exceptions import InvalidDateTimeError
19from tomlkit.exceptions import InvalidNumberError
20from tomlkit.exceptions import InvalidTimeError
21from tomlkit.exceptions import InvalidUnicodeValueError
22from tomlkit.exceptions import ParseError
23from tomlkit.exceptions import UnexpectedCharError
24from tomlkit.exceptions import UnexpectedEofError
25from tomlkit.items import AoT
26from tomlkit.items import Array
27from tomlkit.items import Bool
28from tomlkit.items import BoolType
29from tomlkit.items import Comment
30from tomlkit.items import Date
31from tomlkit.items import DateTime
32from tomlkit.items import Float
33from tomlkit.items import InlineTable
34from tomlkit.items import Integer
35from tomlkit.items import Item
36from tomlkit.items import Key
37from tomlkit.items import KeyType
38from tomlkit.items import Null
39from tomlkit.items import SingleKey
40from tomlkit.items import String
41from tomlkit.items import StringType
42from tomlkit.items import Table
43from tomlkit.items import Time
44from tomlkit.items import Trivia
45from tomlkit.items import Whitespace
46from tomlkit.source import Source
47from tomlkit.toml_char import TOMLChar
48from tomlkit.toml_document import TOMLDocument
51CTRL_I = 0x09 # Tab
52CTRL_J = 0x0A # Line feed
53CTRL_M = 0x0D # Carriage return
54CTRL_CHAR_LIMIT = 0x1F
55CHR_DEL = 0x7F
58class Parser:
59 """
60 Parser for TOML documents.
61 """
63 def __init__(self, string: str | bytes) -> None:
64 # Input to parse
65 self._src = Source(decode(string))
67 self._aot_stack: list[Key] = []
69 @property
70 def _state(self):
71 return self._src.state
73 @property
74 def _idx(self):
75 return self._src.idx
77 @property
78 def _current(self):
79 return self._src.current
81 @property
82 def _marker(self):
83 return self._src.marker
85 def extract(self) -> str:
86 """
87 Extracts the value between marker and index
88 """
89 return self._src.extract()
91 def inc(self, exception: type[ParseError] | None = None) -> bool:
92 """
93 Increments the parser if the end of the input has not been reached.
94 Returns whether or not it was able to advance.
95 """
96 return self._src.inc(exception=exception)
98 def inc_n(self, n: int, exception: type[ParseError] | None = None) -> bool:
99 """
100 Increments the parser by n characters
101 if the end of the input has not been reached.
102 """
103 return self._src.inc_n(n=n, exception=exception)
105 def consume(self, chars, min=0, max=-1):
106 """
107 Consume chars until min/max is satisfied is valid.
108 """
109 return self._src.consume(chars=chars, min=min, max=max)
111 def end(self) -> bool:
112 """
113 Returns True if the parser has reached the end of the input.
114 """
115 return self._src.end()
117 def mark(self) -> None:
118 """
119 Sets the marker to the index's current position
120 """
121 self._src.mark()
123 def parse_error(self, exception=ParseError, *args, **kwargs):
124 """
125 Creates a generic "parse error" at the current position.
126 """
127 return self._src.parse_error(exception, *args, **kwargs)
129 def parse(self) -> TOMLDocument:
130 body = TOMLDocument(True)
132 # Take all keyvals outside of tables/AoT's.
133 while not self.end():
134 # Break out if a table is found
135 if self._current == "[":
136 break
138 # Otherwise, take and append one KV
139 item = self._parse_item()
140 if not item:
141 break
143 key, value = item
144 if (key is not None and key.is_multi()) or not self._merge_ws(value, body):
145 # We actually have a table
146 try:
147 body.append(key, value)
148 except Exception as e:
149 raise self.parse_error(ParseError, str(e)) from e
151 self.mark()
153 while not self.end():
154 key, value = self._parse_table()
155 if isinstance(value, Table) and value.is_aot_element():
156 # This is just the first table in an AoT. Parse the rest of the array
157 # along with it.
158 value = self._parse_aot(value, key)
160 try:
161 body.append(key, value)
162 except Exception as e:
163 raise self.parse_error(ParseError, str(e)) from e
165 body.parsing(False)
167 return body
169 def _merge_ws(self, item: Item, container: Container) -> bool:
170 """
171 Merges the given Item with the last one currently in the given Container if
172 both are whitespace items.
174 Returns True if the items were merged.
175 """
176 last = container.last_item()
177 if not last:
178 return False
180 if not isinstance(item, Whitespace) or not isinstance(last, Whitespace):
181 return False
183 start = self._idx - (len(last.s) + len(item.s))
184 container.body[-1] = (
185 container.body[-1][0],
186 Whitespace(self._src[start : self._idx]),
187 )
189 return True
191 def _is_child(self, parent: Key, child: Key) -> bool:
192 """
193 Returns whether a key is strictly a child of another key.
194 AoT siblings are not considered children of one another.
195 """
196 parent_parts = tuple(parent)
197 child_parts = tuple(child)
199 if parent_parts == child_parts:
200 return False
202 return parent_parts == child_parts[: len(parent_parts)]
204 def _parse_item(self) -> tuple[Key | None, Item] | None:
205 """
206 Attempts to parse the next item and returns it, along with its key
207 if the item is value-like.
208 """
209 self.mark()
210 with self._state as state:
211 while True:
212 c = self._current
213 if c == "\n":
214 # Found a newline; Return all whitespace found up to this point.
215 self.inc()
217 return None, Whitespace(self.extract())
218 elif c in " \t\r":
219 # Skip whitespace.
220 if not self.inc():
221 return None, Whitespace(self.extract())
222 elif c == "#":
223 # Found a comment, parse it
224 indent = self.extract()
225 cws, comment, trail = self._parse_comment_trail()
227 return None, Comment(Trivia(indent, cws, comment, trail))
228 elif c == "[":
229 # Found a table, delegate to the calling function.
230 return
231 else:
232 # Beginning of a KV pair.
233 # Return to beginning of whitespace so it gets included
234 # as indentation for the KV about to be parsed.
235 state.restore = True
236 break
238 return self._parse_key_value(True)
240 def _parse_comment_trail(self, parse_trail: bool = True) -> tuple[str, str, str]:
241 """
242 Returns (comment_ws, comment, trail)
243 If there is no comment, comment_ws and comment will
244 simply be empty.
245 """
246 if self.end():
247 return "", "", ""
249 comment = ""
250 comment_ws = ""
251 self.mark()
253 while True:
254 c = self._current
256 if c == "\n":
257 break
258 elif c == "#":
259 comment_ws = self.extract()
261 self.mark()
262 self.inc() # Skip #
264 # The comment itself
265 while not self.end() and not self._current.is_nl():
266 code = ord(self._current)
267 if code == CHR_DEL or (code <= CTRL_CHAR_LIMIT and code != CTRL_I):
268 raise self.parse_error(InvalidControlChar, code, "comments")
270 if not self.inc():
271 break
273 comment = self.extract()
274 self.mark()
276 break
277 elif c in " \t\r":
278 self.inc()
279 else:
280 raise self.parse_error(UnexpectedCharError, c)
282 if self.end():
283 break
285 trail = ""
286 if parse_trail:
287 while self._current.is_spaces() and self.inc():
288 pass
290 if self._current == "\r":
291 self.inc()
293 if self._current == "\n":
294 self.inc()
296 if self._idx != self._marker or self._current.is_ws():
297 trail = self.extract()
299 return comment_ws, comment, trail
301 def _parse_key_value(self, parse_comment: bool = False) -> tuple[Key, Item]:
302 # Leading indent
303 self.mark()
305 while self._current.is_spaces() and self.inc():
306 pass
308 indent = self.extract()
310 # Key
311 key = self._parse_key()
313 self.mark()
315 found_equals = self._current == "="
316 while self._current.is_kv_sep() and self.inc():
317 if self._current == "=":
318 if found_equals:
319 raise self.parse_error(UnexpectedCharError, "=")
320 else:
321 found_equals = True
322 if not found_equals:
323 raise self.parse_error(UnexpectedCharError, self._current)
325 if not key.sep:
326 key.sep = self.extract()
327 else:
328 key.sep += self.extract()
330 # Value
331 val = self._parse_value()
332 # Comment
333 if parse_comment:
334 cws, comment, trail = self._parse_comment_trail()
335 meta = val.trivia
336 if not meta.comment_ws:
337 meta.comment_ws = cws
339 meta.comment = comment
340 meta.trail = trail
341 else:
342 val.trivia.trail = ""
344 val.trivia.indent = indent
346 return key, val
348 def _parse_key(self) -> Key:
349 """
350 Parses a Key at the current position;
351 WS before the key must be exhausted first at the callsite.
352 """
353 self.mark()
354 while self._current.is_spaces() and self.inc():
355 # Skip any leading whitespace
356 pass
357 if self._current in "\"'":
358 return self._parse_quoted_key()
359 else:
360 return self._parse_bare_key()
362 def _parse_quoted_key(self) -> Key:
363 """
364 Parses a key enclosed in either single or double quotes.
365 """
366 # Extract the leading whitespace
367 original = self.extract()
368 quote_style = self._current
369 key_type = next((t for t in KeyType if t.value == quote_style), None)
371 if key_type is None:
372 raise RuntimeError("Should not have entered _parse_quoted_key()")
374 key_str = self._parse_string(
375 StringType.SLB if key_type == KeyType.Basic else StringType.SLL
376 )
377 if key_str._t.is_multiline():
378 raise self.parse_error(UnexpectedCharError, key_str._t.value)
379 original += key_str.as_string()
380 self.mark()
381 while self._current.is_spaces() and self.inc():
382 pass
383 original += self.extract()
384 key = SingleKey(str(key_str), t=key_type, sep="", original=original)
385 if self._current == ".":
386 self.inc()
387 key = key.concat(self._parse_key())
389 return key
391 def _parse_bare_key(self) -> Key:
392 """
393 Parses a bare key.
394 """
395 while (
396 self._current.is_bare_key_char() or self._current.is_spaces()
397 ) and self.inc():
398 pass
400 original = self.extract()
401 key = original.strip()
402 if not key:
403 # Empty key
404 raise self.parse_error(EmptyKeyError)
406 if " " in key:
407 # Bare key with spaces in it
408 raise self.parse_error(ParseError, f'Invalid key "{key}"')
410 key = SingleKey(key, KeyType.Bare, "", original)
412 if self._current == ".":
413 self.inc()
414 key = key.concat(self._parse_key())
416 return key
418 def _parse_value(self) -> Item:
419 """
420 Attempts to parse a value at the current position.
421 """
422 self.mark()
423 c = self._current
424 trivia = Trivia()
426 if c == StringType.SLB.value:
427 return self._parse_basic_string()
428 elif c == StringType.SLL.value:
429 return self._parse_literal_string()
430 elif c == BoolType.TRUE.value[0]:
431 return self._parse_true()
432 elif c == BoolType.FALSE.value[0]:
433 return self._parse_false()
434 elif c == "[":
435 return self._parse_array()
436 elif c == "{":
437 return self._parse_inline_table()
438 elif c in "+-" or self._peek(4) in {
439 "+inf",
440 "-inf",
441 "inf",
442 "+nan",
443 "-nan",
444 "nan",
445 }:
446 # Number
447 while self._current not in " \t\n\r#,]}" and self.inc():
448 pass
450 raw = self.extract()
452 item = self._parse_number(raw, trivia)
453 if item is not None:
454 return item
456 raise self.parse_error(InvalidNumberError)
457 elif c in string.digits:
458 # Integer, Float, Date, Time or DateTime
459 while self._current not in " \t\n\r#,]}" and self.inc():
460 pass
462 raw = self.extract()
464 m = RFC_3339_LOOSE.match(raw)
465 if m:
466 if m.group(1) and m.group(5):
467 # datetime
468 try:
469 dt = parse_rfc3339(raw)
470 assert isinstance(dt, datetime.datetime)
471 return DateTime(
472 dt.year,
473 dt.month,
474 dt.day,
475 dt.hour,
476 dt.minute,
477 dt.second,
478 dt.microsecond,
479 dt.tzinfo,
480 trivia,
481 raw,
482 )
483 except ValueError:
484 raise self.parse_error(InvalidDateTimeError) from None
486 if m.group(1):
487 try:
488 dt = parse_rfc3339(raw)
489 assert isinstance(dt, datetime.date)
490 date = Date(dt.year, dt.month, dt.day, trivia, raw)
491 self.mark()
492 while self._current not in "\t\n\r#,]}" and self.inc():
493 pass
495 time_raw = self.extract()
496 time_part = time_raw.rstrip()
497 trivia.comment_ws = time_raw[len(time_part) :]
498 if not time_part:
499 return date
501 dt = parse_rfc3339(raw + time_part)
502 assert isinstance(dt, datetime.datetime)
503 return DateTime(
504 dt.year,
505 dt.month,
506 dt.day,
507 dt.hour,
508 dt.minute,
509 dt.second,
510 dt.microsecond,
511 dt.tzinfo,
512 trivia,
513 raw + time_part,
514 )
515 except ValueError:
516 raise self.parse_error(InvalidDateError) from None
518 if m.group(5):
519 try:
520 t = parse_rfc3339(raw)
521 assert isinstance(t, datetime.time)
522 return Time(
523 t.hour,
524 t.minute,
525 t.second,
526 t.microsecond,
527 t.tzinfo,
528 trivia,
529 raw,
530 )
531 except ValueError:
532 raise self.parse_error(InvalidTimeError) from None
534 item = self._parse_number(raw, trivia)
535 if item is not None:
536 return item
538 raise self.parse_error(InvalidNumberError)
539 else:
540 raise self.parse_error(UnexpectedCharError, c)
542 def _parse_true(self):
543 return self._parse_bool(BoolType.TRUE)
545 def _parse_false(self):
546 return self._parse_bool(BoolType.FALSE)
548 def _parse_bool(self, style: BoolType) -> Bool:
549 with self._state:
550 style = BoolType(style)
552 # only keep parsing for bool if the characters match the style
553 # try consuming rest of chars in style
554 for c in style:
555 self.consume(c, min=1, max=1)
557 return Bool(style, Trivia())
559 def _parse_array(self) -> Array:
560 # Consume opening bracket, EOF here is an issue (middle of array)
561 self.inc(exception=UnexpectedEofError)
563 elems: list[Item] = []
564 prev_value = None
565 while True:
566 # consume whitespace
567 mark = self._idx
568 self.consume(TOMLChar.SPACES + TOMLChar.NL)
569 indent = self._src[mark : self._idx]
570 newline = set(TOMLChar.NL) & set(indent)
571 if newline:
572 elems.append(Whitespace(indent))
573 continue
575 # consume comment
576 if self._current == "#":
577 cws, comment, trail = self._parse_comment_trail(parse_trail=False)
578 elems.append(Comment(Trivia(indent, cws, comment, trail)))
579 continue
581 # consume indent
582 if indent:
583 elems.append(Whitespace(indent))
584 continue
586 # consume value
587 if not prev_value:
588 try:
589 elems.append(self._parse_value())
590 prev_value = True
591 continue
592 except UnexpectedCharError:
593 pass
595 # consume comma
596 if prev_value and self._current == ",":
597 self.inc(exception=UnexpectedEofError)
598 # If the previous item is Whitespace, add to it
599 if isinstance(elems[-1], Whitespace):
600 elems[-1]._s = elems[-1].s + ","
601 else:
602 elems.append(Whitespace(","))
603 prev_value = False
604 continue
606 # consume closing bracket
607 if self._current == "]":
608 # consume closing bracket, EOF here doesn't matter
609 self.inc()
610 break
612 raise self.parse_error(UnexpectedCharError, self._current)
614 try:
615 res = Array(elems, Trivia())
616 except ValueError:
617 pass
618 else:
619 return res
621 def _parse_inline_table(self) -> InlineTable:
622 # consume opening bracket, EOF here is an issue (middle of array)
623 self.inc(exception=UnexpectedEofError)
625 elems = Container(True)
626 trailing_comma = None
627 while True:
628 # consume leading whitespace
629 mark = self._idx
630 self.consume(TOMLChar.SPACES)
631 raw = self._src[mark : self._idx]
632 if raw:
633 elems.add(Whitespace(raw))
635 if not trailing_comma:
636 # None: empty inline table
637 # False: previous key-value pair was not followed by a comma
638 if self._current == "}":
639 # consume closing bracket, EOF here doesn't matter
640 self.inc()
641 break
643 if trailing_comma is False or (
644 trailing_comma is None and self._current == ","
645 ):
646 # Either the previous key-value pair was not followed by a comma
647 # or the table has an unexpected leading comma.
648 raise self.parse_error(UnexpectedCharError, self._current)
649 else:
650 # True: previous key-value pair was followed by a comma
651 if self._current == "}" or self._current == ",":
652 raise self.parse_error(UnexpectedCharError, self._current)
654 key, val = self._parse_key_value(False)
655 elems.add(key, val)
657 # consume trailing whitespace
658 mark = self._idx
659 self.consume(TOMLChar.SPACES)
660 raw = self._src[mark : self._idx]
661 if raw:
662 elems.add(Whitespace(raw))
664 # consume trailing comma
665 trailing_comma = self._current == ","
666 if trailing_comma:
667 # consume closing bracket, EOF here is an issue (middle of inline table)
668 self.inc(exception=UnexpectedEofError)
670 return InlineTable(elems, Trivia())
672 def _parse_number(self, raw: str, trivia: Trivia) -> Item | None:
673 # Leading zeros are not allowed
674 sign = ""
675 if raw.startswith(("+", "-")):
676 sign = raw[0]
677 raw = raw[1:]
679 if len(raw) > 1 and (
680 (raw.startswith("0") and not raw.startswith(("0.", "0o", "0x", "0b", "0e")))
681 or (sign and raw.startswith("."))
682 ):
683 return None
685 if raw.startswith(("0o", "0x", "0b")) and sign:
686 return None
688 digits = "[0-9]"
689 base = 10
690 if raw.startswith("0b"):
691 digits = "[01]"
692 base = 2
693 elif raw.startswith("0o"):
694 digits = "[0-7]"
695 base = 8
696 elif raw.startswith("0x"):
697 digits = "[0-9a-f]"
698 base = 16
700 # Underscores should be surrounded by digits
701 clean = re.sub(f"(?i)(?<={digits})_(?={digits})", "", raw).lower()
703 if "_" in clean:
704 return None
706 if clean.endswith(".") or (
707 not clean.startswith("0x") and clean.split("e", 1)[0].endswith(".")
708 ):
709 return None
711 try:
712 return Integer(int(sign + clean, base), trivia, sign + raw)
713 except ValueError:
714 try:
715 return Float(float(sign + clean), trivia, sign + raw)
716 except ValueError:
717 return None
719 def _parse_literal_string(self) -> String:
720 with self._state:
721 return self._parse_string(StringType.SLL)
723 def _parse_basic_string(self) -> String:
724 with self._state:
725 return self._parse_string(StringType.SLB)
727 def _parse_escaped_char(self, multiline):
728 if multiline and self._current.is_ws():
729 # When the last non-whitespace character on a line is
730 # a \, it will be trimmed along with all whitespace
731 # (including newlines) up to the next non-whitespace
732 # character or closing delimiter.
733 # """\
734 # hello \
735 # world"""
736 tmp = ""
737 while self._current.is_ws():
738 tmp += self._current
739 # consume the whitespace, EOF here is an issue
740 # (middle of string)
741 self.inc(exception=UnexpectedEofError)
742 continue
744 # the escape followed by whitespace must have a newline
745 # before any other chars
746 if "\n" not in tmp:
747 raise self.parse_error(InvalidCharInStringError, self._current)
749 return ""
751 if self._current in _escaped:
752 c = _escaped[self._current]
754 # consume this char, EOF here is an issue (middle of string)
755 self.inc(exception=UnexpectedEofError)
757 return c
759 if self._current in {"u", "U"}:
760 # this needs to be a unicode
761 u, ue = self._peek_unicode(self._current == "U")
762 if u is not None:
763 # consume the U char and the unicode value
764 self.inc_n(len(ue) + 1)
766 return u
768 raise self.parse_error(InvalidUnicodeValueError)
770 raise self.parse_error(InvalidCharInStringError, self._current)
772 def _parse_string(self, delim: StringType) -> String:
773 # only keep parsing for string if the current character matches the delim
774 if self._current != delim.unit:
775 raise self.parse_error(
776 InternalParserError,
777 f"Invalid character for string type {delim}",
778 )
780 # consume the opening/first delim, EOF here is an issue
781 # (middle of string or middle of delim)
782 self.inc(exception=UnexpectedEofError)
784 if self._current == delim.unit:
785 # consume the closing/second delim, we do not care if EOF occurs as
786 # that would simply imply an empty single line string
787 if not self.inc() or self._current != delim.unit:
788 # Empty string
789 return String(delim, "", "", Trivia())
791 # consume the third delim, EOF here is an issue (middle of string)
792 self.inc(exception=UnexpectedEofError)
794 delim = delim.toggle() # convert delim to multi delim
796 self.mark() # to extract the original string with whitespace and all
797 value = ""
799 # A newline immediately following the opening delimiter will be trimmed.
800 if delim.is_multiline():
801 if self._current == "\n":
802 # consume the newline, EOF here is an issue (middle of string)
803 self.inc(exception=UnexpectedEofError)
804 else:
805 cur = self._current
806 with self._state(restore=True):
807 if self.inc():
808 cur += self._current
809 if cur == "\r\n":
810 self.inc_n(2, exception=UnexpectedEofError)
812 escaped = False # whether the previous key was ESCAPE
813 while True:
814 code = ord(self._current)
815 if (
816 delim.is_singleline()
817 and not escaped
818 and (code == CHR_DEL or (code <= CTRL_CHAR_LIMIT and code != CTRL_I))
819 ) or (
820 delim.is_multiline()
821 and not escaped
822 and (
823 code == CHR_DEL
824 or (
825 code <= CTRL_CHAR_LIMIT and code not in [CTRL_I, CTRL_J, CTRL_M]
826 )
827 )
828 ):
829 raise self.parse_error(InvalidControlChar, code, "strings")
830 elif not escaped and self._current == delim.unit:
831 # try to process current as a closing delim
832 original = self.extract()
834 close = ""
835 if delim.is_multiline():
836 # Consume the delimiters to see if we are at the end of the string
837 close = ""
838 while self._current == delim.unit:
839 close += self._current
840 self.inc()
842 if len(close) < 3:
843 # Not a triple quote, leave in result as-is.
844 # Adding back the characters we already consumed
845 value += close
846 continue
848 if len(close) == 3:
849 # We are at the end of the string
850 return String(delim, value, original, Trivia())
852 if len(close) >= 6:
853 raise self.parse_error(InvalidCharInStringError, self._current)
855 value += close[:-3]
856 original += close[:-3]
858 return String(delim, value, original, Trivia())
859 else:
860 # consume the closing delim, we do not care if EOF occurs as
861 # that would simply imply the end of self._src
862 self.inc()
864 return String(delim, value, original, Trivia())
865 elif delim.is_basic() and escaped:
866 # attempt to parse the current char as an escaped value, an exception
867 # is raised if this fails
868 value += self._parse_escaped_char(delim.is_multiline())
870 # no longer escaped
871 escaped = False
872 elif delim.is_basic() and self._current == "\\":
873 # the next char is being escaped
874 escaped = True
876 # consume this char, EOF here is an issue (middle of string)
877 self.inc(exception=UnexpectedEofError)
878 else:
879 # this is either a literal string where we keep everything as is,
880 # or this is not a special escaped char in a basic string
881 value += self._current
883 # consume this char, EOF here is an issue (middle of string)
884 self.inc(exception=UnexpectedEofError)
886 def _parse_table(
887 self, parent_name: Key | None = None, parent: Table | None = None
888 ) -> tuple[Key, Table | AoT]:
889 """
890 Parses a table element.
891 """
892 if self._current != "[":
893 raise self.parse_error(
894 InternalParserError, "_parse_table() called on non-bracket character."
895 )
897 indent = self.extract()
898 self.inc() # Skip opening bracket
900 if self.end():
901 raise self.parse_error(UnexpectedEofError)
903 is_aot = False
904 if self._current == "[":
905 if not self.inc():
906 raise self.parse_error(UnexpectedEofError)
908 is_aot = True
909 try:
910 key = self._parse_key()
911 except EmptyKeyError:
912 raise self.parse_error(EmptyTableNameError) from None
913 if self.end():
914 raise self.parse_error(UnexpectedEofError)
915 elif self._current != "]":
916 raise self.parse_error(UnexpectedCharError, self._current)
918 key.sep = ""
919 full_key = key
920 name_parts = tuple(key)
921 if any(" " in part.key.strip() and part.is_bare() for part in name_parts):
922 raise self.parse_error(
923 ParseError, f'Invalid table name "{full_key.as_string()}"'
924 )
926 missing_table = False
927 if parent_name:
928 parent_name_parts = tuple(parent_name)
929 else:
930 parent_name_parts = ()
932 if len(name_parts) > len(parent_name_parts) + 1:
933 missing_table = True
935 name_parts = name_parts[len(parent_name_parts) :]
937 values = Container(True)
939 self.inc() # Skip closing bracket
940 if is_aot:
941 # TODO: Verify close bracket
942 self.inc()
944 cws, comment, trail = self._parse_comment_trail()
946 result = Null()
947 table = Table(
948 values,
949 Trivia(indent, cws, comment, trail),
950 is_aot,
951 name=name_parts[0].key if name_parts else key.key,
952 display_name=full_key.as_string(),
953 is_super_table=False,
954 )
956 if len(name_parts) > 1:
957 if missing_table:
958 # Missing super table
959 # i.e. a table initialized like this: [foo.bar]
960 # without initializing [foo]
961 #
962 # So we have to create the parent tables
963 table = Table(
964 Container(True),
965 Trivia("", cws, comment, trail),
966 is_aot and name_parts[0] in self._aot_stack,
967 is_super_table=True,
968 name=name_parts[0].key,
969 )
971 result = table
972 key = name_parts[0]
974 for i, _name in enumerate(name_parts[1:]):
975 child = table.get(
976 _name,
977 Table(
978 Container(True),
979 Trivia(indent, cws, comment, trail),
980 is_aot and i == len(name_parts) - 2,
981 is_super_table=i < len(name_parts) - 2,
982 name=_name.key,
983 display_name=(
984 full_key.as_string() if i == len(name_parts) - 2 else None
985 ),
986 ),
987 )
989 if is_aot and i == len(name_parts) - 2:
990 table.raw_append(_name, AoT([child], name=table.name, parsed=True))
991 else:
992 table.raw_append(_name, child)
994 table = child
995 values = table.value
996 else:
997 if name_parts:
998 key = name_parts[0]
1000 while not self.end():
1001 item = self._parse_item()
1002 if item:
1003 _key, item = item
1004 if not self._merge_ws(item, values):
1005 table.raw_append(_key, item)
1006 else:
1007 if self._current == "[":
1008 _, key_next = self._peek_table()
1010 if self._is_child(full_key, key_next):
1011 key_next, table_next = self._parse_table(full_key, table)
1013 table.raw_append(key_next, table_next)
1015 # Picking up any sibling
1016 while not self.end():
1017 _, key_next = self._peek_table()
1019 if not self._is_child(full_key, key_next):
1020 break
1022 key_next, table_next = self._parse_table(full_key, table)
1024 table.raw_append(key_next, table_next)
1026 break
1027 else:
1028 raise self.parse_error(
1029 InternalParserError,
1030 "_parse_item() returned None on a non-bracket character.",
1031 )
1032 table.value._validate_out_of_order_table()
1033 if isinstance(result, Null):
1034 result = table
1036 if is_aot and (not self._aot_stack or full_key != self._aot_stack[-1]):
1037 result = self._parse_aot(result, full_key)
1039 return key, result
1041 def _peek_table(self) -> tuple[bool, Key]:
1042 """
1043 Peeks ahead non-intrusively by cloning then restoring the
1044 initial state of the parser.
1046 Returns the name of the table about to be parsed,
1047 as well as whether it is part of an AoT.
1048 """
1049 # we always want to restore after exiting this scope
1050 with self._state(save_marker=True, restore=True):
1051 if self._current != "[":
1052 raise self.parse_error(
1053 InternalParserError,
1054 "_peek_table() entered on non-bracket character",
1055 )
1057 # AoT
1058 self.inc()
1059 is_aot = False
1060 if self._current == "[":
1061 self.inc()
1062 is_aot = True
1063 try:
1064 return is_aot, self._parse_key()
1065 except EmptyKeyError:
1066 raise self.parse_error(EmptyTableNameError) from None
1068 def _parse_aot(self, first: Table, name_first: Key) -> AoT:
1069 """
1070 Parses all siblings of the provided table first and bundles them into
1071 an AoT.
1072 """
1073 payload = [first]
1074 self._aot_stack.append(name_first)
1075 while not self.end():
1076 is_aot_next, name_next = self._peek_table()
1077 if is_aot_next and name_next == name_first:
1078 _, table = self._parse_table(name_first)
1079 payload.append(table)
1080 else:
1081 break
1083 self._aot_stack.pop()
1085 return AoT(payload, parsed=True)
1087 def _peek(self, n: int) -> str:
1088 """
1089 Peeks ahead n characters.
1091 n is the max number of characters that will be peeked.
1092 """
1093 # we always want to restore after exiting this scope
1094 with self._state(restore=True):
1095 buf = ""
1096 for _ in range(n):
1097 if self._current not in " \t\n\r#,]}" + self._src.EOF:
1098 buf += self._current
1099 self.inc()
1100 continue
1102 break
1103 return buf
1105 def _peek_unicode(self, is_long: bool) -> tuple[str | None, str | None]:
1106 """
1107 Peeks ahead non-intrusively by cloning then restoring the
1108 initial state of the parser.
1110 Returns the unicode value is it's a valid one else None.
1111 """
1112 # we always want to restore after exiting this scope
1113 with self._state(save_marker=True, restore=True):
1114 if self._current not in {"u", "U"}:
1115 raise self.parse_error(
1116 InternalParserError, "_peek_unicode() entered on non-unicode value"
1117 )
1119 self.inc() # Dropping prefix
1120 self.mark()
1122 if is_long:
1123 chars = 8
1124 else:
1125 chars = 4
1127 if not self.inc_n(chars):
1128 value, extracted = None, None
1129 else:
1130 extracted = self.extract()
1132 if extracted[0].lower() == "d" and extracted[1].strip("01234567"):
1133 return None, None
1135 try:
1136 value = chr(int(extracted, 16))
1137 except (ValueError, OverflowError):
1138 value = None
1140 return value, extracted