Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/tomlkit/parser.py: 98%
629 statements
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-26 07:01 +0000
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-26 07:01 +0000
1import datetime
2import re
3import string
5from typing import List
6from typing import Optional
7from typing import Tuple
8from typing import Type
9from typing import Union
11from tomlkit._compat import decode
12from tomlkit._utils import RFC_3339_LOOSE
13from tomlkit._utils import _escaped
14from tomlkit._utils import parse_rfc3339
15from tomlkit.container import Container
16from tomlkit.exceptions import EmptyKeyError
17from tomlkit.exceptions import EmptyTableNameError
18from tomlkit.exceptions import InternalParserError
19from tomlkit.exceptions import InvalidCharInStringError
20from tomlkit.exceptions import InvalidControlChar
21from tomlkit.exceptions import InvalidDateError
22from tomlkit.exceptions import InvalidDateTimeError
23from tomlkit.exceptions import InvalidNumberError
24from tomlkit.exceptions import InvalidTimeError
25from tomlkit.exceptions import InvalidUnicodeValueError
26from tomlkit.exceptions import ParseError
27from tomlkit.exceptions import UnexpectedCharError
28from tomlkit.exceptions import UnexpectedEofError
29from tomlkit.items import AoT
30from tomlkit.items import Array
31from tomlkit.items import Bool
32from tomlkit.items import BoolType
33from tomlkit.items import Comment
34from tomlkit.items import Date
35from tomlkit.items import DateTime
36from tomlkit.items import Float
37from tomlkit.items import InlineTable
38from tomlkit.items import Integer
39from tomlkit.items import Item
40from tomlkit.items import Key
41from tomlkit.items import KeyType
42from tomlkit.items import Null
43from tomlkit.items import SingleKey
44from tomlkit.items import String
45from tomlkit.items import StringType
46from tomlkit.items import Table
47from tomlkit.items import Time
48from tomlkit.items import Trivia
49from tomlkit.items import Whitespace
50from tomlkit.source import Source
51from tomlkit.toml_char import TOMLChar
52from tomlkit.toml_document import TOMLDocument
55CTRL_I = 0x09 # Tab
56CTRL_J = 0x0A # Line feed
57CTRL_M = 0x0D # Carriage return
58CTRL_CHAR_LIMIT = 0x1F
59CHR_DEL = 0x7F
62class Parser:
63 """
64 Parser for TOML documents.
65 """
67 def __init__(self, string: str) -> None:
68 # Input to parse
69 self._src = Source(decode(string))
71 self._aot_stack: List[Key] = []
73 @property
74 def _state(self):
75 return self._src.state
77 @property
78 def _idx(self):
79 return self._src.idx
81 @property
82 def _current(self):
83 return self._src.current
85 @property
86 def _marker(self):
87 return self._src.marker
89 def extract(self) -> str:
90 """
91 Extracts the value between marker and index
92 """
93 return self._src.extract()
95 def inc(self, exception: Optional[Type[ParseError]] = None) -> bool:
96 """
97 Increments the parser if the end of the input has not been reached.
98 Returns whether or not it was able to advance.
99 """
100 return self._src.inc(exception=exception)
102 def inc_n(self, n: int, exception: Optional[Type[ParseError]] = None) -> bool:
103 """
104 Increments the parser by n characters
105 if the end of the input has not been reached.
106 """
107 return self._src.inc_n(n=n, exception=exception)
109 def consume(self, chars, min=0, max=-1):
110 """
111 Consume chars until min/max is satisfied is valid.
112 """
113 return self._src.consume(chars=chars, min=min, max=max)
115 def end(self) -> bool:
116 """
117 Returns True if the parser has reached the end of the input.
118 """
119 return self._src.end()
121 def mark(self) -> None:
122 """
123 Sets the marker to the index's current position
124 """
125 self._src.mark()
127 def parse_error(self, exception=ParseError, *args, **kwargs):
128 """
129 Creates a generic "parse error" at the current position.
130 """
131 return self._src.parse_error(exception, *args, **kwargs)
133 def parse(self) -> TOMLDocument:
134 body = TOMLDocument(True)
136 # Take all keyvals outside of tables/AoT's.
137 while not self.end():
138 # Break out if a table is found
139 if self._current == "[":
140 break
142 # Otherwise, take and append one KV
143 item = self._parse_item()
144 if not item:
145 break
147 key, value = item
148 if (key is not None and key.is_multi()) or not self._merge_ws(value, body):
149 # We actually have a table
150 try:
151 body.append(key, value)
152 except Exception as e:
153 raise self.parse_error(ParseError, str(e)) from e
155 self.mark()
157 while not self.end():
158 key, value = self._parse_table()
159 if isinstance(value, Table) and value.is_aot_element():
160 # This is just the first table in an AoT. Parse the rest of the array
161 # along with it.
162 value = self._parse_aot(value, key)
164 try:
165 body.append(key, value)
166 except Exception as e:
167 raise self.parse_error(ParseError, str(e)) from e
169 body.parsing(False)
171 return body
173 def _merge_ws(self, item: Item, container: Container) -> bool:
174 """
175 Merges the given Item with the last one currently in the given Container if
176 both are whitespace items.
178 Returns True if the items were merged.
179 """
180 last = container.last_item()
181 if not last:
182 return False
184 if not isinstance(item, Whitespace) or not isinstance(last, Whitespace):
185 return False
187 start = self._idx - (len(last.s) + len(item.s))
188 container.body[-1] = (
189 container.body[-1][0],
190 Whitespace(self._src[start : self._idx]),
191 )
193 return True
195 def _is_child(self, parent: Key, child: Key) -> bool:
196 """
197 Returns whether a key is strictly a child of another key.
198 AoT siblings are not considered children of one another.
199 """
200 parent_parts = tuple(parent)
201 child_parts = tuple(child)
203 if parent_parts == child_parts:
204 return False
206 return parent_parts == child_parts[: len(parent_parts)]
208 def _parse_item(self) -> Optional[Tuple[Optional[Key], Item]]:
209 """
210 Attempts to parse the next item and returns it, along with its key
211 if the item is value-like.
212 """
213 self.mark()
214 with self._state as state:
215 while True:
216 c = self._current
217 if c == "\n":
218 # Found a newline; Return all whitespace found up to this point.
219 self.inc()
221 return None, Whitespace(self.extract())
222 elif c in " \t\r":
223 # Skip whitespace.
224 if not self.inc():
225 return None, Whitespace(self.extract())
226 elif c == "#":
227 # Found a comment, parse it
228 indent = self.extract()
229 cws, comment, trail = self._parse_comment_trail()
231 return None, Comment(Trivia(indent, cws, comment, trail))
232 elif c == "[":
233 # Found a table, delegate to the calling function.
234 return
235 else:
236 # Beginning of a KV pair.
237 # Return to beginning of whitespace so it gets included
238 # as indentation for the KV about to be parsed.
239 state.restore = True
240 break
242 return self._parse_key_value(True)
244 def _parse_comment_trail(self, parse_trail: bool = True) -> Tuple[str, str, str]:
245 """
246 Returns (comment_ws, comment, trail)
247 If there is no comment, comment_ws and comment will
248 simply be empty.
249 """
250 if self.end():
251 return "", "", ""
253 comment = ""
254 comment_ws = ""
255 self.mark()
257 while True:
258 c = self._current
260 if c == "\n":
261 break
262 elif c == "#":
263 comment_ws = self.extract()
265 self.mark()
266 self.inc() # Skip #
268 # The comment itself
269 while not self.end() and not self._current.is_nl():
270 code = ord(self._current)
271 if code == CHR_DEL or code <= CTRL_CHAR_LIMIT and code != CTRL_I:
272 raise self.parse_error(InvalidControlChar, code, "comments")
274 if not self.inc():
275 break
277 comment = self.extract()
278 self.mark()
280 break
281 elif c in " \t\r":
282 self.inc()
283 else:
284 raise self.parse_error(UnexpectedCharError, c)
286 if self.end():
287 break
289 trail = ""
290 if parse_trail:
291 while self._current.is_spaces() and self.inc():
292 pass
294 if self._current == "\r":
295 self.inc()
297 if self._current == "\n":
298 self.inc()
300 if self._idx != self._marker or self._current.is_ws():
301 trail = self.extract()
303 return comment_ws, comment, trail
305 def _parse_key_value(self, parse_comment: bool = False) -> Tuple[Key, Item]:
306 # Leading indent
307 self.mark()
309 while self._current.is_spaces() and self.inc():
310 pass
312 indent = self.extract()
314 # Key
315 key = self._parse_key()
317 self.mark()
319 found_equals = self._current == "="
320 while self._current.is_kv_sep() and self.inc():
321 if self._current == "=":
322 if found_equals:
323 raise self.parse_error(UnexpectedCharError, "=")
324 else:
325 found_equals = True
326 if not found_equals:
327 raise self.parse_error(UnexpectedCharError, self._current)
329 if not key.sep:
330 key.sep = self.extract()
331 else:
332 key.sep += self.extract()
334 # Value
335 val = self._parse_value()
336 # Comment
337 if parse_comment:
338 cws, comment, trail = self._parse_comment_trail()
339 meta = val.trivia
340 if not meta.comment_ws:
341 meta.comment_ws = cws
343 meta.comment = comment
344 meta.trail = trail
345 else:
346 val.trivia.trail = ""
348 val.trivia.indent = indent
350 return key, val
352 def _parse_key(self) -> Key:
353 """
354 Parses a Key at the current position;
355 WS before the key must be exhausted first at the callsite.
356 """
357 self.mark()
358 while self._current.is_spaces() and self.inc():
359 # Skip any leading whitespace
360 pass
361 if self._current in "\"'":
362 return self._parse_quoted_key()
363 else:
364 return self._parse_bare_key()
366 def _parse_quoted_key(self) -> Key:
367 """
368 Parses a key enclosed in either single or double quotes.
369 """
370 # Extract the leading whitespace
371 original = self.extract()
372 quote_style = self._current
373 key_type = next((t for t in KeyType if t.value == quote_style), None)
375 if key_type is None:
376 raise RuntimeError("Should not have entered _parse_quoted_key()")
378 key_str = self._parse_string(
379 StringType.SLB if key_type == KeyType.Basic else StringType.SLL
380 )
381 if key_str._t.is_multiline():
382 raise self.parse_error(UnexpectedCharError, key_str._t.value)
383 original += key_str.as_string()
384 self.mark()
385 while self._current.is_spaces() and self.inc():
386 pass
387 original += self.extract()
388 key = SingleKey(str(key_str), t=key_type, sep="", original=original)
389 if self._current == ".":
390 self.inc()
391 key = key.concat(self._parse_key())
393 return key
395 def _parse_bare_key(self) -> Key:
396 """
397 Parses a bare key.
398 """
399 while (
400 self._current.is_bare_key_char() or self._current.is_spaces()
401 ) and self.inc():
402 pass
404 original = self.extract()
405 key = original.strip()
406 if not key:
407 # Empty key
408 raise self.parse_error(EmptyKeyError)
410 if " " in key:
411 # Bare key with spaces in it
412 raise self.parse_error(ParseError, f'Invalid key "{key}"')
414 key = SingleKey(key, KeyType.Bare, "", original)
416 if self._current == ".":
417 self.inc()
418 key = key.concat(self._parse_key())
420 return key
422 def _parse_value(self) -> Item:
423 """
424 Attempts to parse a value at the current position.
425 """
426 self.mark()
427 c = self._current
428 trivia = Trivia()
430 if c == StringType.SLB.value:
431 return self._parse_basic_string()
432 elif c == StringType.SLL.value:
433 return self._parse_literal_string()
434 elif c == BoolType.TRUE.value[0]:
435 return self._parse_true()
436 elif c == BoolType.FALSE.value[0]:
437 return self._parse_false()
438 elif c == "[":
439 return self._parse_array()
440 elif c == "{":
441 return self._parse_inline_table()
442 elif c in "+-" or self._peek(4) in {
443 "+inf",
444 "-inf",
445 "inf",
446 "+nan",
447 "-nan",
448 "nan",
449 }:
450 # Number
451 while self._current not in " \t\n\r#,]}" and self.inc():
452 pass
454 raw = self.extract()
456 item = self._parse_number(raw, trivia)
457 if item is not None:
458 return item
460 raise self.parse_error(InvalidNumberError)
461 elif c in string.digits:
462 # Integer, Float, Date, Time or DateTime
463 while self._current not in " \t\n\r#,]}" and self.inc():
464 pass
466 raw = self.extract()
468 m = RFC_3339_LOOSE.match(raw)
469 if m:
470 if m.group(1) and m.group(5):
471 # datetime
472 try:
473 dt = parse_rfc3339(raw)
474 assert isinstance(dt, datetime.datetime)
475 return DateTime(
476 dt.year,
477 dt.month,
478 dt.day,
479 dt.hour,
480 dt.minute,
481 dt.second,
482 dt.microsecond,
483 dt.tzinfo,
484 trivia,
485 raw,
486 )
487 except ValueError:
488 raise self.parse_error(InvalidDateTimeError)
490 if m.group(1):
491 try:
492 dt = parse_rfc3339(raw)
493 assert isinstance(dt, datetime.date)
494 date = Date(dt.year, dt.month, dt.day, trivia, raw)
495 self.mark()
496 while self._current not in "\t\n\r#,]}" and self.inc():
497 pass
499 time_raw = self.extract()
500 time_part = time_raw.rstrip()
501 trivia.comment_ws = time_raw[len(time_part) :]
502 if not time_part:
503 return date
505 dt = parse_rfc3339(raw + time_part)
506 assert isinstance(dt, datetime.datetime)
507 return DateTime(
508 dt.year,
509 dt.month,
510 dt.day,
511 dt.hour,
512 dt.minute,
513 dt.second,
514 dt.microsecond,
515 dt.tzinfo,
516 trivia,
517 raw + time_part,
518 )
519 except ValueError:
520 raise self.parse_error(InvalidDateError)
522 if m.group(5):
523 try:
524 t = parse_rfc3339(raw)
525 assert isinstance(t, datetime.time)
526 return Time(
527 t.hour,
528 t.minute,
529 t.second,
530 t.microsecond,
531 t.tzinfo,
532 trivia,
533 raw,
534 )
535 except ValueError:
536 raise self.parse_error(InvalidTimeError)
538 item = self._parse_number(raw, trivia)
539 if item is not None:
540 return item
542 raise self.parse_error(InvalidNumberError)
543 else:
544 raise self.parse_error(UnexpectedCharError, c)
546 def _parse_true(self):
547 return self._parse_bool(BoolType.TRUE)
549 def _parse_false(self):
550 return self._parse_bool(BoolType.FALSE)
552 def _parse_bool(self, style: BoolType) -> Bool:
553 with self._state:
554 style = BoolType(style)
556 # only keep parsing for bool if the characters match the style
557 # try consuming rest of chars in style
558 for c in style:
559 self.consume(c, min=1, max=1)
561 return Bool(style, Trivia())
563 def _parse_array(self) -> Array:
564 # Consume opening bracket, EOF here is an issue (middle of array)
565 self.inc(exception=UnexpectedEofError)
567 elems: List[Item] = []
568 prev_value = None
569 while True:
570 # consume whitespace
571 mark = self._idx
572 self.consume(TOMLChar.SPACES + TOMLChar.NL)
573 indent = self._src[mark : self._idx]
574 newline = set(TOMLChar.NL) & set(indent)
575 if newline:
576 elems.append(Whitespace(indent))
577 continue
579 # consume comment
580 if self._current == "#":
581 cws, comment, trail = self._parse_comment_trail(parse_trail=False)
582 elems.append(Comment(Trivia(indent, cws, comment, trail)))
583 continue
585 # consume indent
586 if indent:
587 elems.append(Whitespace(indent))
588 continue
590 # consume value
591 if not prev_value:
592 try:
593 elems.append(self._parse_value())
594 prev_value = True
595 continue
596 except UnexpectedCharError:
597 pass
599 # consume comma
600 if prev_value and self._current == ",":
601 self.inc(exception=UnexpectedEofError)
602 elems.append(Whitespace(","))
603 prev_value = False
604 continue
606 # consume closing bracket
607 if self._current == "]":
608 # consume closing bracket, EOF here doesn't matter
609 self.inc()
610 break
612 raise self.parse_error(UnexpectedCharError, self._current)
614 try:
615 res = Array(elems, Trivia())
616 except ValueError:
617 pass
618 else:
619 return res
621 def _parse_inline_table(self) -> InlineTable:
622 # consume opening bracket, EOF here is an issue (middle of array)
623 self.inc(exception=UnexpectedEofError)
625 elems = Container(True)
626 trailing_comma = None
627 while True:
628 # consume leading whitespace
629 mark = self._idx
630 self.consume(TOMLChar.SPACES)
631 raw = self._src[mark : self._idx]
632 if raw:
633 elems.add(Whitespace(raw))
635 if not trailing_comma:
636 # None: empty inline table
637 # False: previous key-value pair was not followed by a comma
638 if self._current == "}":
639 # consume closing bracket, EOF here doesn't matter
640 self.inc()
641 break
643 if (
644 trailing_comma is False
645 or trailing_comma is None
646 and self._current == ","
647 ):
648 # Either the previous key-value pair was not followed by a comma
649 # or the table has an unexpected leading comma.
650 raise self.parse_error(UnexpectedCharError, self._current)
651 else:
652 # True: previous key-value pair was followed by a comma
653 if self._current == "}" or self._current == ",":
654 raise self.parse_error(UnexpectedCharError, self._current)
656 key, val = self._parse_key_value(False)
657 elems.add(key, val)
659 # consume trailing whitespace
660 mark = self._idx
661 self.consume(TOMLChar.SPACES)
662 raw = self._src[mark : self._idx]
663 if raw:
664 elems.add(Whitespace(raw))
666 # consume trailing comma
667 trailing_comma = self._current == ","
668 if trailing_comma:
669 # consume closing bracket, EOF here is an issue (middle of inline table)
670 self.inc(exception=UnexpectedEofError)
672 return InlineTable(elems, Trivia())
674 def _parse_number(self, raw: str, trivia: Trivia) -> Optional[Item]:
675 # Leading zeros are not allowed
676 sign = ""
677 if raw.startswith(("+", "-")):
678 sign = raw[0]
679 raw = raw[1:]
681 if len(raw) > 1 and (
682 raw.startswith("0")
683 and not raw.startswith(("0.", "0o", "0x", "0b", "0e"))
684 or sign
685 and raw.startswith(".")
686 ):
687 return None
689 if raw.startswith(("0o", "0x", "0b")) and sign:
690 return None
692 digits = "[0-9]"
693 base = 10
694 if raw.startswith("0b"):
695 digits = "[01]"
696 base = 2
697 elif raw.startswith("0o"):
698 digits = "[0-7]"
699 base = 8
700 elif raw.startswith("0x"):
701 digits = "[0-9a-f]"
702 base = 16
704 # Underscores should be surrounded by digits
705 clean = re.sub(f"(?i)(?<={digits})_(?={digits})", "", raw).lower()
707 if "_" in clean:
708 return None
710 if (
711 clean.endswith(".")
712 or not clean.startswith("0x")
713 and clean.split("e", 1)[0].endswith(".")
714 ):
715 return None
717 try:
718 return Integer(int(sign + clean, base), trivia, sign + raw)
719 except ValueError:
720 try:
721 return Float(float(sign + clean), trivia, sign + raw)
722 except ValueError:
723 return None
725 def _parse_literal_string(self) -> String:
726 with self._state:
727 return self._parse_string(StringType.SLL)
729 def _parse_basic_string(self) -> String:
730 with self._state:
731 return self._parse_string(StringType.SLB)
733 def _parse_escaped_char(self, multiline):
734 if multiline and self._current.is_ws():
735 # When the last non-whitespace character on a line is
736 # a \, it will be trimmed along with all whitespace
737 # (including newlines) up to the next non-whitespace
738 # character or closing delimiter.
739 # """\
740 # hello \
741 # world"""
742 tmp = ""
743 while self._current.is_ws():
744 tmp += self._current
745 # consume the whitespace, EOF here is an issue
746 # (middle of string)
747 self.inc(exception=UnexpectedEofError)
748 continue
750 # the escape followed by whitespace must have a newline
751 # before any other chars
752 if "\n" not in tmp:
753 raise self.parse_error(InvalidCharInStringError, self._current)
755 return ""
757 if self._current in _escaped:
758 c = _escaped[self._current]
760 # consume this char, EOF here is an issue (middle of string)
761 self.inc(exception=UnexpectedEofError)
763 return c
765 if self._current in {"u", "U"}:
766 # this needs to be a unicode
767 u, ue = self._peek_unicode(self._current == "U")
768 if u is not None:
769 # consume the U char and the unicode value
770 self.inc_n(len(ue) + 1)
772 return u
774 raise self.parse_error(InvalidUnicodeValueError)
776 raise self.parse_error(InvalidCharInStringError, self._current)
778 def _parse_string(self, delim: StringType) -> String:
779 # only keep parsing for string if the current character matches the delim
780 if self._current != delim.unit:
781 raise self.parse_error(
782 InternalParserError,
783 f"Invalid character for string type {delim}",
784 )
786 # consume the opening/first delim, EOF here is an issue
787 # (middle of string or middle of delim)
788 self.inc(exception=UnexpectedEofError)
790 if self._current == delim.unit:
791 # consume the closing/second delim, we do not care if EOF occurs as
792 # that would simply imply an empty single line string
793 if not self.inc() or self._current != delim.unit:
794 # Empty string
795 return String(delim, "", "", Trivia())
797 # consume the third delim, EOF here is an issue (middle of string)
798 self.inc(exception=UnexpectedEofError)
800 delim = delim.toggle() # convert delim to multi delim
802 self.mark() # to extract the original string with whitespace and all
803 value = ""
805 # A newline immediately following the opening delimiter will be trimmed.
806 if delim.is_multiline() and self._current == "\n":
807 # consume the newline, EOF here is an issue (middle of string)
808 self.inc(exception=UnexpectedEofError)
810 escaped = False # whether the previous key was ESCAPE
811 while True:
812 code = ord(self._current)
813 if (
814 delim.is_singleline()
815 and not escaped
816 and (code == CHR_DEL or code <= CTRL_CHAR_LIMIT and code != CTRL_I)
817 ) or (
818 delim.is_multiline()
819 and not escaped
820 and (
821 code == CHR_DEL
822 or code <= CTRL_CHAR_LIMIT
823 and code not in [CTRL_I, CTRL_J, CTRL_M]
824 )
825 ):
826 raise self.parse_error(InvalidControlChar, code, "strings")
827 elif not escaped and self._current == delim.unit:
828 # try to process current as a closing delim
829 original = self.extract()
831 close = ""
832 if delim.is_multiline():
833 # Consume the delimiters to see if we are at the end of the string
834 close = ""
835 while self._current == delim.unit:
836 close += self._current
837 self.inc()
839 if len(close) < 3:
840 # Not a triple quote, leave in result as-is.
841 # Adding back the characters we already consumed
842 value += close
843 continue
845 if len(close) == 3:
846 # We are at the end of the string
847 return String(delim, value, original, Trivia())
849 if len(close) >= 6:
850 raise self.parse_error(InvalidCharInStringError, self._current)
852 value += close[:-3]
853 original += close[:-3]
855 return String(delim, value, original, Trivia())
856 else:
857 # consume the closing delim, we do not care if EOF occurs as
858 # that would simply imply the end of self._src
859 self.inc()
861 return String(delim, value, original, Trivia())
862 elif delim.is_basic() and escaped:
863 # attempt to parse the current char as an escaped value, an exception
864 # is raised if this fails
865 value += self._parse_escaped_char(delim.is_multiline())
867 # no longer escaped
868 escaped = False
869 elif delim.is_basic() and self._current == "\\":
870 # the next char is being escaped
871 escaped = True
873 # consume this char, EOF here is an issue (middle of string)
874 self.inc(exception=UnexpectedEofError)
875 else:
876 # this is either a literal string where we keep everything as is,
877 # or this is not a special escaped char in a basic string
878 value += self._current
880 # consume this char, EOF here is an issue (middle of string)
881 self.inc(exception=UnexpectedEofError)
883 def _parse_table(
884 self, parent_name: Optional[Key] = None, parent: Optional[Table] = None
885 ) -> Tuple[Key, Union[Table, AoT]]:
886 """
887 Parses a table element.
888 """
889 if self._current != "[":
890 raise self.parse_error(
891 InternalParserError, "_parse_table() called on non-bracket character."
892 )
894 indent = self.extract()
895 self.inc() # Skip opening bracket
897 if self.end():
898 raise self.parse_error(UnexpectedEofError)
900 is_aot = False
901 if self._current == "[":
902 if not self.inc():
903 raise self.parse_error(UnexpectedEofError)
905 is_aot = True
906 try:
907 key = self._parse_key()
908 except EmptyKeyError:
909 raise self.parse_error(EmptyTableNameError) from None
910 if self.end():
911 raise self.parse_error(UnexpectedEofError)
912 elif self._current != "]":
913 raise self.parse_error(UnexpectedCharError, self._current)
915 key.sep = ""
916 full_key = key
917 name_parts = tuple(key)
918 if any(" " in part.key.strip() and part.is_bare() for part in name_parts):
919 raise self.parse_error(
920 ParseError, f'Invalid table name "{full_key.as_string()}"'
921 )
923 missing_table = False
924 if parent_name:
925 parent_name_parts = tuple(parent_name)
926 else:
927 parent_name_parts = ()
929 if len(name_parts) > len(parent_name_parts) + 1:
930 missing_table = True
932 name_parts = name_parts[len(parent_name_parts) :]
934 values = Container(True)
936 self.inc() # Skip closing bracket
937 if is_aot:
938 # TODO: Verify close bracket
939 self.inc()
941 cws, comment, trail = self._parse_comment_trail()
943 result = Null()
944 table = Table(
945 values,
946 Trivia(indent, cws, comment, trail),
947 is_aot,
948 name=name_parts[0].key if name_parts else key.key,
949 display_name=full_key.as_string(),
950 is_super_table=False,
951 )
953 if len(name_parts) > 1:
954 if missing_table:
955 # Missing super table
956 # i.e. a table initialized like this: [foo.bar]
957 # without initializing [foo]
958 #
959 # So we have to create the parent tables
960 table = Table(
961 Container(True),
962 Trivia(indent, cws, comment, trail),
963 is_aot and name_parts[0] in self._aot_stack,
964 is_super_table=True,
965 name=name_parts[0].key,
966 )
968 result = table
969 key = name_parts[0]
971 for i, _name in enumerate(name_parts[1:]):
972 child = table.get(
973 _name,
974 Table(
975 Container(True),
976 Trivia(indent, cws, comment, trail),
977 is_aot and i == len(name_parts) - 2,
978 is_super_table=i < len(name_parts) - 2,
979 name=_name.key,
980 display_name=full_key.as_string()
981 if i == len(name_parts) - 2
982 else None,
983 ),
984 )
986 if is_aot and i == len(name_parts) - 2:
987 table.raw_append(_name, AoT([child], name=table.name, parsed=True))
988 else:
989 table.raw_append(_name, child)
991 table = child
992 values = table.value
993 else:
994 if name_parts:
995 key = name_parts[0]
997 while not self.end():
998 item = self._parse_item()
999 if item:
1000 _key, item = item
1001 if not self._merge_ws(item, values):
1002 table.raw_append(_key, item)
1003 else:
1004 if self._current == "[":
1005 _, key_next = self._peek_table()
1007 if self._is_child(full_key, key_next):
1008 key_next, table_next = self._parse_table(full_key, table)
1010 table.raw_append(key_next, table_next)
1012 # Picking up any sibling
1013 while not self.end():
1014 _, key_next = self._peek_table()
1016 if not self._is_child(full_key, key_next):
1017 break
1019 key_next, table_next = self._parse_table(full_key, table)
1021 table.raw_append(key_next, table_next)
1023 break
1024 else:
1025 raise self.parse_error(
1026 InternalParserError,
1027 "_parse_item() returned None on a non-bracket character.",
1028 )
1030 if isinstance(result, Null):
1031 result = table
1033 if is_aot and (not self._aot_stack or full_key != self._aot_stack[-1]):
1034 result = self._parse_aot(result, full_key)
1036 return key, result
1038 def _peek_table(self) -> Tuple[bool, Key]:
1039 """
1040 Peeks ahead non-intrusively by cloning then restoring the
1041 initial state of the parser.
1043 Returns the name of the table about to be parsed,
1044 as well as whether it is part of an AoT.
1045 """
1046 # we always want to restore after exiting this scope
1047 with self._state(save_marker=True, restore=True):
1048 if self._current != "[":
1049 raise self.parse_error(
1050 InternalParserError,
1051 "_peek_table() entered on non-bracket character",
1052 )
1054 # AoT
1055 self.inc()
1056 is_aot = False
1057 if self._current == "[":
1058 self.inc()
1059 is_aot = True
1060 try:
1061 return is_aot, self._parse_key()
1062 except EmptyKeyError:
1063 raise self.parse_error(EmptyTableNameError) from None
1065 def _parse_aot(self, first: Table, name_first: Key) -> AoT:
1066 """
1067 Parses all siblings of the provided table first and bundles them into
1068 an AoT.
1069 """
1070 payload = [first]
1071 self._aot_stack.append(name_first)
1072 while not self.end():
1073 is_aot_next, name_next = self._peek_table()
1074 if is_aot_next and name_next == name_first:
1075 _, table = self._parse_table(name_first)
1076 payload.append(table)
1077 else:
1078 break
1080 self._aot_stack.pop()
1082 return AoT(payload, parsed=True)
1084 def _peek(self, n: int) -> str:
1085 """
1086 Peeks ahead n characters.
1088 n is the max number of characters that will be peeked.
1089 """
1090 # we always want to restore after exiting this scope
1091 with self._state(restore=True):
1092 buf = ""
1093 for _ in range(n):
1094 if self._current not in " \t\n\r#,]}" + self._src.EOF:
1095 buf += self._current
1096 self.inc()
1097 continue
1099 break
1100 return buf
1102 def _peek_unicode(self, is_long: bool) -> Tuple[Optional[str], Optional[str]]:
1103 """
1104 Peeks ahead non-intrusively by cloning then restoring the
1105 initial state of the parser.
1107 Returns the unicode value is it's a valid one else None.
1108 """
1109 # we always want to restore after exiting this scope
1110 with self._state(save_marker=True, restore=True):
1111 if self._current not in {"u", "U"}:
1112 raise self.parse_error(
1113 InternalParserError, "_peek_unicode() entered on non-unicode value"
1114 )
1116 self.inc() # Dropping prefix
1117 self.mark()
1119 if is_long:
1120 chars = 8
1121 else:
1122 chars = 4
1124 if not self.inc_n(chars):
1125 value, extracted = None, None
1126 else:
1127 extracted = self.extract()
1129 if extracted[0].lower() == "d" and extracted[1].strip("01234567"):
1130 return None, None
1132 try:
1133 value = chr(int(extracted, 16))
1134 except (ValueError, OverflowError):
1135 value = None
1137 return value, extracted