Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/tomlkit/parser.py: 97%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from __future__ import annotations
3import datetime
4import re
5import string
7from tomlkit._compat import decode
8from tomlkit._utils import RFC_3339_LOOSE
9from tomlkit._utils import _escaped
10from tomlkit._utils import parse_rfc3339
11from tomlkit.container import Container
12from tomlkit.exceptions import EmptyKeyError
13from tomlkit.exceptions import EmptyTableNameError
14from tomlkit.exceptions import InternalParserError
15from tomlkit.exceptions import InvalidCharInStringError
16from tomlkit.exceptions import InvalidControlChar
17from tomlkit.exceptions import InvalidDateError
18from tomlkit.exceptions import InvalidDateTimeError
19from tomlkit.exceptions import InvalidNumberError
20from tomlkit.exceptions import InvalidTimeError
21from tomlkit.exceptions import InvalidUnicodeValueError
22from tomlkit.exceptions import ParseError
23from tomlkit.exceptions import UnexpectedCharError
24from tomlkit.exceptions import UnexpectedEofError
25from tomlkit.items import AoT
26from tomlkit.items import Array
27from tomlkit.items import Bool
28from tomlkit.items import BoolType
29from tomlkit.items import Comment
30from tomlkit.items import Date
31from tomlkit.items import DateTime
32from tomlkit.items import Float
33from tomlkit.items import InlineTable
34from tomlkit.items import Integer
35from tomlkit.items import Item
36from tomlkit.items import Key
37from tomlkit.items import KeyType
38from tomlkit.items import Null
39from tomlkit.items import SingleKey
40from tomlkit.items import String
41from tomlkit.items import StringType
42from tomlkit.items import Table
43from tomlkit.items import Time
44from tomlkit.items import Trivia
45from tomlkit.items import Whitespace
46from tomlkit.source import Source
47from tomlkit.toml_char import TOMLChar
48from tomlkit.toml_document import TOMLDocument
51CTRL_I = 0x09 # Tab
52CTRL_J = 0x0A # Line feed
53CTRL_M = 0x0D # Carriage return
54CTRL_CHAR_LIMIT = 0x1F
55CHR_DEL = 0x7F
58class Parser:
59 """
60 Parser for TOML documents.
61 """
63 def __init__(self, string: str | bytes) -> None:
64 # Input to parse
65 self._src = Source(decode(string))
67 self._aot_stack: list[Key] = []
69 @property
70 def _state(self):
71 return self._src.state
73 @property
74 def _idx(self):
75 return self._src.idx
77 @property
78 def _current(self):
79 return self._src.current
81 @property
82 def _marker(self):
83 return self._src.marker
85 def extract(self) -> str:
86 """
87 Extracts the value between marker and index
88 """
89 return self._src.extract()
91 def inc(self, exception: type[ParseError] | None = None) -> bool:
92 """
93 Increments the parser if the end of the input has not been reached.
94 Returns whether or not it was able to advance.
95 """
96 return self._src.inc(exception=exception)
98 def inc_n(self, n: int, exception: type[ParseError] | None = None) -> bool:
99 """
100 Increments the parser by n characters
101 if the end of the input has not been reached.
102 """
103 return self._src.inc_n(n=n, exception=exception)
105 def consume(self, chars, min=0, max=-1):
106 """
107 Consume chars until min/max is satisfied is valid.
108 """
109 return self._src.consume(chars=chars, min=min, max=max)
111 def end(self) -> bool:
112 """
113 Returns True if the parser has reached the end of the input.
114 """
115 return self._src.end()
117 def mark(self) -> None:
118 """
119 Sets the marker to the index's current position
120 """
121 self._src.mark()
123 def parse_error(self, exception=ParseError, *args, **kwargs):
124 """
125 Creates a generic "parse error" at the current position.
126 """
127 return self._src.parse_error(exception, *args, **kwargs)
129 def parse(self) -> TOMLDocument:
130 body = TOMLDocument(True)
132 # Take all keyvals outside of tables/AoT's.
133 while not self.end():
134 # Break out if a table is found
135 if self._current == "[":
136 break
138 # Otherwise, take and append one KV
139 item = self._parse_item()
140 if not item:
141 break
143 key, value = item
144 if (key is not None and key.is_multi()) or not self._merge_ws(value, body):
145 # We actually have a table
146 try:
147 body.append(key, value)
148 except Exception as e:
149 raise self.parse_error(ParseError, str(e)) from e
151 self.mark()
153 while not self.end():
154 key, value = self._parse_table()
155 if isinstance(value, Table) and value.is_aot_element():
156 # This is just the first table in an AoT. Parse the rest of the array
157 # along with it.
158 value = self._parse_aot(value, key)
160 try:
161 body.append(key, value)
162 except Exception as e:
163 raise self.parse_error(ParseError, str(e)) from e
165 body.parsing(False)
167 return body
169 def _merge_ws(self, item: Item, container: Container) -> bool:
170 """
171 Merges the given Item with the last one currently in the given Container if
172 both are whitespace items.
174 Returns True if the items were merged.
175 """
176 last = container.last_item()
177 if not last:
178 return False
180 if not isinstance(item, Whitespace) or not isinstance(last, Whitespace):
181 return False
183 start = self._idx - (len(last.s) + len(item.s))
184 container.body[-1] = (
185 container.body[-1][0],
186 Whitespace(self._src[start : self._idx]),
187 )
189 return True
191 def _is_child(self, parent: Key, child: Key) -> bool:
192 """
193 Returns whether a key is strictly a child of another key.
194 AoT siblings are not considered children of one another.
195 """
196 parent_parts = tuple(parent)
197 child_parts = tuple(child)
199 if parent_parts == child_parts:
200 return False
202 return parent_parts == child_parts[: len(parent_parts)]
204 def _parse_item(self) -> tuple[Key | None, Item] | None:
205 """
206 Attempts to parse the next item and returns it, along with its key
207 if the item is value-like.
208 """
209 self.mark()
210 with self._state as state:
211 while True:
212 c = self._current
213 if c == "\n":
214 # Found a newline; Return all whitespace found up to this point.
215 self.inc()
217 return None, Whitespace(self.extract())
218 elif c in " \t\r":
219 if c == "\r":
220 with self._state(restore=True):
221 if not self.inc() or self._current != "\n":
222 raise self.parse_error(
223 InvalidControlChar, CTRL_M, "documents"
224 )
225 # Skip whitespace.
226 if not self.inc():
227 return None, Whitespace(self.extract())
228 elif c == "#":
229 # Found a comment, parse it
230 indent = self.extract()
231 cws, comment, trail = self._parse_comment_trail()
233 return None, Comment(Trivia(indent, cws, comment, trail))
234 elif c == "[":
235 # Found a table, delegate to the calling function.
236 return
237 else:
238 # Beginning of a KV pair.
239 # Return to beginning of whitespace so it gets included
240 # as indentation for the KV about to be parsed.
241 state.restore = True
242 break
244 return self._parse_key_value(True)
246 def _parse_comment_trail(self, parse_trail: bool = True) -> tuple[str, str, str]:
247 """
248 Returns (comment_ws, comment, trail)
249 If there is no comment, comment_ws and comment will
250 simply be empty.
251 """
252 if self.end():
253 return "", "", ""
255 comment = ""
256 comment_ws = ""
257 self.mark()
259 while True:
260 c = self._current
262 if c == "\n":
263 break
264 elif c == "#":
265 comment_ws = self.extract()
267 self.mark()
268 self.inc() # Skip #
270 # The comment itself
271 while not self.end() and not self._current.is_nl():
272 code = ord(self._current)
273 if code == CHR_DEL or (code <= CTRL_CHAR_LIMIT and code != CTRL_I):
274 raise self.parse_error(InvalidControlChar, code, "comments")
276 if not self.inc():
277 break
279 comment = self.extract()
280 self.mark()
282 break
283 elif c in " \t\r":
284 if c == "\r":
285 with self._state(restore=True):
286 if not self.inc() or self._current != "\n":
287 raise self.parse_error(
288 InvalidControlChar, CTRL_M, "comments"
289 )
290 self.inc()
291 else:
292 raise self.parse_error(UnexpectedCharError, c)
294 if self.end():
295 break
297 trail = ""
298 if parse_trail:
299 while self._current.is_spaces() and self.inc():
300 pass
302 if self._current == "\r":
303 with self._state(restore=True):
304 if not self.inc() or self._current != "\n":
305 raise self.parse_error(InvalidControlChar, CTRL_M, "documents")
306 self.inc()
308 if self._current == "\n":
309 self.inc()
311 if self._idx != self._marker or self._current.is_ws():
312 trail = self.extract()
314 return comment_ws, comment, trail
316 def _parse_key_value(self, parse_comment: bool = False) -> tuple[Key, Item]:
317 # Leading indent
318 self.mark()
320 while self._current.is_spaces() and self.inc():
321 pass
323 indent = self.extract()
325 # Key
326 key = self._parse_key()
328 self.mark()
330 found_equals = self._current == "="
331 while self._current.is_kv_sep() and self.inc():
332 if self._current == "=":
333 if found_equals:
334 raise self.parse_error(UnexpectedCharError, "=")
335 else:
336 found_equals = True
337 if not found_equals:
338 raise self.parse_error(UnexpectedCharError, self._current)
340 if not key.sep:
341 key.sep = self.extract()
342 else:
343 key.sep += self.extract()
345 # Value
346 val = self._parse_value()
347 # Comment
348 if parse_comment:
349 cws, comment, trail = self._parse_comment_trail()
350 meta = val.trivia
351 if not meta.comment_ws:
352 meta.comment_ws = cws
354 meta.comment = comment
355 meta.trail = trail
356 else:
357 val.trivia.trail = ""
359 val.trivia.indent = indent
361 return key, val
363 def _parse_key(self) -> Key:
364 """
365 Parses a Key at the current position;
366 WS before the key must be exhausted first at the callsite.
367 """
368 self.mark()
369 while self._current.is_spaces() and self.inc():
370 # Skip any leading whitespace
371 pass
372 if self._current in "\"'":
373 return self._parse_quoted_key()
374 else:
375 return self._parse_bare_key()
377 def _parse_quoted_key(self) -> Key:
378 """
379 Parses a key enclosed in either single or double quotes.
380 """
381 # Extract the leading whitespace
382 original = self.extract()
383 quote_style = self._current
384 key_type = next((t for t in KeyType if t.value == quote_style), None)
386 if key_type is None:
387 raise RuntimeError("Should not have entered _parse_quoted_key()")
389 key_str = self._parse_string(
390 StringType.SLB if key_type == KeyType.Basic else StringType.SLL
391 )
392 if key_str._t.is_multiline():
393 raise self.parse_error(UnexpectedCharError, key_str._t.value)
394 original += key_str.as_string()
395 self.mark()
396 while self._current.is_spaces() and self.inc():
397 pass
398 original += self.extract()
399 key = SingleKey(str(key_str), t=key_type, sep="", original=original)
400 if self._current == ".":
401 self.inc()
402 key = key.concat(self._parse_key())
404 return key
406 def _parse_bare_key(self) -> Key:
407 """
408 Parses a bare key.
409 """
410 while (
411 self._current.is_bare_key_char() or self._current.is_spaces()
412 ) and self.inc():
413 pass
415 original = self.extract()
416 key = original.strip()
417 if not key:
418 # Empty key
419 raise self.parse_error(EmptyKeyError)
421 if " " in key:
422 # Bare key with spaces in it
423 raise self.parse_error(ParseError, f'Invalid key "{key}"')
425 key = SingleKey(key, KeyType.Bare, "", original)
427 if self._current == ".":
428 self.inc()
429 key = key.concat(self._parse_key())
431 return key
433 def _parse_value(self) -> Item:
434 """
435 Attempts to parse a value at the current position.
436 """
437 self.mark()
438 c = self._current
439 trivia = Trivia()
441 if c == StringType.SLB.value:
442 return self._parse_basic_string()
443 elif c == StringType.SLL.value:
444 return self._parse_literal_string()
445 elif c == BoolType.TRUE.value[0]:
446 return self._parse_true()
447 elif c == BoolType.FALSE.value[0]:
448 return self._parse_false()
449 elif c == "[":
450 return self._parse_array()
451 elif c == "{":
452 return self._parse_inline_table()
453 elif c in "+-" or self._peek(4) in {
454 "+inf",
455 "-inf",
456 "inf",
457 "+nan",
458 "-nan",
459 "nan",
460 }:
461 # Number
462 while self._current not in " \t\n\r#,]}" and self.inc():
463 pass
465 raw = self.extract()
467 item = self._parse_number(raw, trivia)
468 if item is not None:
469 return item
471 raise self.parse_error(InvalidNumberError)
472 elif c in string.digits:
473 # Integer, Float, Date, Time or DateTime
474 while self._current not in " \t\n\r#,]}" and self.inc():
475 pass
477 raw = self.extract()
479 m = RFC_3339_LOOSE.match(raw)
480 if m:
481 if m.group("date") and m.group("time"):
482 # datetime
483 try:
484 dt = parse_rfc3339(raw)
485 assert isinstance(dt, datetime.datetime)
486 return DateTime(
487 dt.year,
488 dt.month,
489 dt.day,
490 dt.hour,
491 dt.minute,
492 dt.second,
493 dt.microsecond,
494 dt.tzinfo,
495 trivia,
496 raw,
497 )
498 except ValueError:
499 raise self.parse_error(InvalidDateTimeError) from None
501 if m.group("date"):
502 try:
503 dt = parse_rfc3339(raw)
504 assert isinstance(dt, datetime.date)
505 date = Date(dt.year, dt.month, dt.day, trivia, raw)
506 self.mark()
507 while self._current not in "\t\n\r#,]}" and self.inc():
508 pass
510 time_raw = self.extract()
511 time_part = time_raw.rstrip()
512 trivia.comment_ws = time_raw[len(time_part) :]
513 if not time_part:
514 return date
516 dt = parse_rfc3339(raw + time_part)
517 assert isinstance(dt, datetime.datetime)
518 return DateTime(
519 dt.year,
520 dt.month,
521 dt.day,
522 dt.hour,
523 dt.minute,
524 dt.second,
525 dt.microsecond,
526 dt.tzinfo,
527 trivia,
528 raw + time_part,
529 )
530 except ValueError:
531 raise self.parse_error(InvalidDateError) from None
533 if m.group("time"):
534 try:
535 t = parse_rfc3339(raw)
536 assert isinstance(t, datetime.time)
537 return Time(
538 t.hour,
539 t.minute,
540 t.second,
541 t.microsecond,
542 t.tzinfo,
543 trivia,
544 raw,
545 )
546 except ValueError:
547 raise self.parse_error(InvalidTimeError) from None
549 item = self._parse_number(raw, trivia)
550 if item is not None:
551 return item
553 raise self.parse_error(InvalidNumberError)
554 else:
555 raise self.parse_error(UnexpectedCharError, c)
557 def _parse_true(self):
558 return self._parse_bool(BoolType.TRUE)
560 def _parse_false(self):
561 return self._parse_bool(BoolType.FALSE)
563 def _parse_bool(self, style: BoolType) -> Bool:
564 with self._state:
565 style = BoolType(style)
567 # only keep parsing for bool if the characters match the style
568 # try consuming rest of chars in style
569 for c in style:
570 self.consume(c, min=1, max=1)
572 return Bool(style, Trivia())
574 def _parse_array(self) -> Array:
575 # Consume opening bracket, EOF here is an issue (middle of array)
576 self.inc(exception=UnexpectedEofError)
578 elems: list[Item] = []
579 prev_value = None
580 while True:
581 # consume whitespace
582 mark = self._idx
583 self.consume(TOMLChar.SPACES + TOMLChar.NL)
584 indent = self._src[mark : self._idx]
585 newline = set(TOMLChar.NL) & set(indent)
586 if newline:
587 elems.append(Whitespace(indent))
588 continue
590 # consume comment
591 if self._current == "#":
592 cws, comment, trail = self._parse_comment_trail(parse_trail=False)
593 elems.append(Comment(Trivia(indent, cws, comment, trail)))
594 continue
596 # consume indent
597 if indent:
598 elems.append(Whitespace(indent))
599 continue
601 # consume value
602 if not prev_value:
603 try:
604 elems.append(self._parse_value())
605 prev_value = True
606 continue
607 except UnexpectedCharError:
608 pass
610 # consume comma
611 if prev_value and self._current == ",":
612 self.inc(exception=UnexpectedEofError)
613 # If the previous item is Whitespace, add to it
614 if isinstance(elems[-1], Whitespace):
615 elems[-1]._s = elems[-1].s + ","
616 else:
617 elems.append(Whitespace(","))
618 prev_value = False
619 continue
621 # consume closing bracket
622 if self._current == "]":
623 # consume closing bracket, EOF here doesn't matter
624 self.inc()
625 break
627 raise self.parse_error(UnexpectedCharError, self._current)
629 try:
630 res = Array(elems, Trivia())
631 except ValueError:
632 pass
633 else:
634 return res
636 def _parse_inline_table(self) -> InlineTable:
637 # consume opening bracket, EOF here is an issue (middle of array)
638 self.inc(exception=UnexpectedEofError)
640 elems = Container(True)
641 expect_key = True
642 while True:
643 while True:
644 # consume whitespace and newlines
645 mark = self._idx
646 self.consume(TOMLChar.SPACES + TOMLChar.NL)
647 raw = self._src[mark : self._idx]
648 if raw:
649 elems.add(Whitespace(raw))
651 if self._current != "#":
652 break
654 cws, comment, trail = self._parse_comment_trail(parse_trail=False)
655 elems.add(Comment(Trivia("", cws, comment, trail)))
657 if self._current == "}":
658 # consume closing bracket, EOF here doesn't matter
659 self.inc()
660 break
662 if expect_key:
663 if self._current == ",":
664 raise self.parse_error(UnexpectedCharError, self._current)
665 key, val = self._parse_key_value(False)
666 elems.add(key, val)
667 expect_key = False
668 continue
670 if self._current != ",":
671 raise self.parse_error(UnexpectedCharError, self._current)
673 elems.add(Whitespace(","))
674 # consume comma, EOF here is an issue (middle of inline table)
675 self.inc(exception=UnexpectedEofError)
676 expect_key = True
678 return InlineTable(elems, Trivia())
680 def _parse_number(self, raw: str, trivia: Trivia) -> Item | None:
681 # Leading zeros are not allowed
682 sign = ""
683 if raw.startswith(("+", "-")):
684 sign = raw[0]
685 raw = raw[1:]
687 if len(raw) > 1 and (
688 (raw.startswith("0") and not raw.startswith(("0.", "0o", "0x", "0b", "0e")))
689 or (sign and raw.startswith("."))
690 ):
691 return None
693 if raw.startswith(("0o", "0x", "0b")) and sign:
694 return None
696 digits = "[0-9]"
697 base = 10
698 if raw.startswith("0b"):
699 digits = "[01]"
700 base = 2
701 elif raw.startswith("0o"):
702 digits = "[0-7]"
703 base = 8
704 elif raw.startswith("0x"):
705 digits = "[0-9a-f]"
706 base = 16
708 # Underscores should be surrounded by digits
709 clean = re.sub(f"(?i)(?<={digits})_(?={digits})", "", raw).lower()
711 if "_" in clean:
712 return None
714 if clean.endswith(".") or (
715 not clean.startswith("0x") and clean.split("e", 1)[0].endswith(".")
716 ):
717 return None
719 try:
720 return Integer(int(sign + clean, base), trivia, sign + raw)
721 except ValueError:
722 try:
723 return Float(float(sign + clean), trivia, sign + raw)
724 except ValueError:
725 return None
727 def _parse_literal_string(self) -> String:
728 with self._state:
729 return self._parse_string(StringType.SLL)
731 def _parse_basic_string(self) -> String:
732 with self._state:
733 return self._parse_string(StringType.SLB)
735 def _parse_escaped_char(self, multiline):
736 if multiline and self._current.is_ws():
737 # When the last non-whitespace character on a line is
738 # a \, it will be trimmed along with all whitespace
739 # (including newlines) up to the next non-whitespace
740 # character or closing delimiter.
741 # """\
742 # hello \
743 # world"""
744 tmp = ""
745 while self._current.is_ws():
746 tmp += self._current
747 # consume the whitespace, EOF here is an issue
748 # (middle of string)
749 self.inc(exception=UnexpectedEofError)
750 continue
752 # the escape followed by whitespace must have a newline
753 # before any other chars
754 if "\n" not in tmp:
755 raise self.parse_error(InvalidCharInStringError, self._current)
757 return ""
759 if self._current in _escaped:
760 c = _escaped[self._current]
762 # consume this char, EOF here is an issue (middle of string)
763 self.inc(exception=UnexpectedEofError)
765 return c
767 if self._current in {"u", "U"}:
768 # this needs to be a unicode
769 u, ue = self._peek_unicode(self._current == "U")
770 if u is not None:
771 # consume the U char and the unicode value
772 self.inc_n(len(ue) + 1)
774 return u
776 raise self.parse_error(InvalidUnicodeValueError)
778 if self._current == "x":
779 h, he = self._peek_hex()
780 if h is not None:
781 # consume the x char and the hex value
782 self.inc_n(len(he) + 1)
783 return h
785 raise self.parse_error(InvalidUnicodeValueError)
787 raise self.parse_error(InvalidCharInStringError, self._current)
789 def _parse_string(self, delim: StringType) -> String:
790 # only keep parsing for string if the current character matches the delim
791 if self._current != delim.unit:
792 raise self.parse_error(
793 InternalParserError,
794 f"Invalid character for string type {delim}",
795 )
797 # consume the opening/first delim, EOF here is an issue
798 # (middle of string or middle of delim)
799 self.inc(exception=UnexpectedEofError)
801 if self._current == delim.unit:
802 # consume the closing/second delim, we do not care if EOF occurs as
803 # that would simply imply an empty single line string
804 if not self.inc() or self._current != delim.unit:
805 # Empty string
806 return String(delim, "", "", Trivia())
808 # consume the third delim, EOF here is an issue (middle of string)
809 self.inc(exception=UnexpectedEofError)
811 delim = delim.toggle() # convert delim to multi delim
813 self.mark() # to extract the original string with whitespace and all
814 value = ""
816 # A newline immediately following the opening delimiter will be trimmed.
817 if delim.is_multiline():
818 if self._current == "\n":
819 # consume the newline, EOF here is an issue (middle of string)
820 self.inc(exception=UnexpectedEofError)
821 else:
822 cur = self._current
823 with self._state(restore=True):
824 if self.inc():
825 cur += self._current
826 if cur == "\r\n":
827 self.inc_n(2, exception=UnexpectedEofError)
829 escaped = False # whether the previous key was ESCAPE
830 while True:
831 code = ord(self._current)
832 if (
833 delim.is_singleline()
834 and not escaped
835 and (code == CHR_DEL or (code <= CTRL_CHAR_LIMIT and code != CTRL_I))
836 ) or (
837 delim.is_multiline()
838 and not escaped
839 and (
840 code == CHR_DEL
841 or (
842 code <= CTRL_CHAR_LIMIT and code not in [CTRL_I, CTRL_J, CTRL_M]
843 )
844 )
845 ):
846 raise self.parse_error(InvalidControlChar, code, "strings")
847 elif delim.is_multiline() and not escaped and self._current == "\r":
848 with self._state(restore=True):
849 if not self.inc() or self._current != "\n":
850 raise self.parse_error(InvalidControlChar, CTRL_M, "strings")
851 elif not escaped and self._current == delim.unit:
852 # try to process current as a closing delim
853 original = self.extract()
855 close = ""
856 if delim.is_multiline():
857 # Consume the delimiters to see if we are at the end of the string
858 close = ""
859 while self._current == delim.unit:
860 close += self._current
861 self.inc()
863 if len(close) < 3:
864 # Not a triple quote, leave in result as-is.
865 # Adding back the characters we already consumed
866 value += close
867 continue
869 if len(close) == 3:
870 # We are at the end of the string
871 return String(delim, value, original, Trivia())
873 if len(close) >= 6:
874 raise self.parse_error(InvalidCharInStringError, self._current)
876 value += close[:-3]
877 original += close[:-3]
879 return String(delim, value, original, Trivia())
880 else:
881 # consume the closing delim, we do not care if EOF occurs as
882 # that would simply imply the end of self._src
883 self.inc()
885 return String(delim, value, original, Trivia())
886 elif delim.is_basic() and escaped:
887 # attempt to parse the current char as an escaped value, an exception
888 # is raised if this fails
889 value += self._parse_escaped_char(delim.is_multiline())
891 # no longer escaped
892 escaped = False
893 elif delim.is_basic() and self._current == "\\":
894 # the next char is being escaped
895 escaped = True
897 # consume this char, EOF here is an issue (middle of string)
898 self.inc(exception=UnexpectedEofError)
899 else:
900 # this is either a literal string where we keep everything as is,
901 # or this is not a special escaped char in a basic string
902 value += self._current
904 # consume this char, EOF here is an issue (middle of string)
905 self.inc(exception=UnexpectedEofError)
907 def _parse_table(
908 self, parent_name: Key | None = None, parent: Table | None = None
909 ) -> tuple[Key, Table | AoT]:
910 """
911 Parses a table element.
912 """
913 if self._current != "[":
914 raise self.parse_error(
915 InternalParserError, "_parse_table() called on non-bracket character."
916 )
918 indent = self.extract()
919 self.inc() # Skip opening bracket
921 if self.end():
922 raise self.parse_error(UnexpectedEofError)
924 is_aot = False
925 if self._current == "[":
926 if not self.inc():
927 raise self.parse_error(UnexpectedEofError)
929 is_aot = True
930 try:
931 key = self._parse_key()
932 except EmptyKeyError:
933 raise self.parse_error(EmptyTableNameError) from None
934 if self.end():
935 raise self.parse_error(UnexpectedEofError)
936 elif self._current != "]":
937 raise self.parse_error(UnexpectedCharError, self._current)
939 key.sep = ""
940 full_key = key
941 name_parts = tuple(key)
942 if any(" " in part.key.strip() and part.is_bare() for part in name_parts):
943 raise self.parse_error(
944 ParseError, f'Invalid table name "{full_key.as_string()}"'
945 )
947 missing_table = False
948 if parent_name:
949 parent_name_parts = tuple(parent_name)
950 else:
951 parent_name_parts = ()
953 if len(name_parts) > len(parent_name_parts) + 1:
954 missing_table = True
956 name_parts = name_parts[len(parent_name_parts) :]
958 values = Container(True)
960 self.inc() # Skip closing bracket
961 if is_aot:
962 # TODO: Verify close bracket
963 self.inc()
965 cws, comment, trail = self._parse_comment_trail()
967 result = Null()
968 table = Table(
969 values,
970 Trivia(indent, cws, comment, trail),
971 is_aot,
972 name=name_parts[0].key if name_parts else key.key,
973 display_name=full_key.as_string(),
974 is_super_table=False,
975 )
977 if len(name_parts) > 1:
978 if missing_table:
979 # Missing super table
980 # i.e. a table initialized like this: [foo.bar]
981 # without initializing [foo]
982 #
983 # So we have to create the parent tables
984 table = Table(
985 Container(True),
986 Trivia("", cws, comment, trail),
987 is_aot and name_parts[0] in self._aot_stack,
988 is_super_table=True,
989 name=name_parts[0].key,
990 )
992 result = table
993 key = name_parts[0]
995 for i, _name in enumerate(name_parts[1:]):
996 child = table.get(
997 _name,
998 Table(
999 Container(True),
1000 Trivia(indent, cws, comment, trail),
1001 is_aot and i == len(name_parts) - 2,
1002 is_super_table=i < len(name_parts) - 2,
1003 name=_name.key,
1004 display_name=(
1005 full_key.as_string() if i == len(name_parts) - 2 else None
1006 ),
1007 ),
1008 )
1010 if is_aot and i == len(name_parts) - 2:
1011 table.raw_append(_name, AoT([child], name=table.name, parsed=True))
1012 else:
1013 table.raw_append(_name, child)
1015 table = child
1016 values = table.value
1017 else:
1018 if name_parts:
1019 key = name_parts[0]
1021 while not self.end():
1022 item = self._parse_item()
1023 if item:
1024 _key, item = item
1025 if not self._merge_ws(item, values):
1026 table.raw_append(_key, item)
1027 else:
1028 if self._current == "[":
1029 _, key_next = self._peek_table()
1031 if self._is_child(full_key, key_next):
1032 key_next, table_next = self._parse_table(full_key, table)
1034 table.raw_append(key_next, table_next)
1036 # Picking up any sibling
1037 while not self.end():
1038 _, key_next = self._peek_table()
1040 if not self._is_child(full_key, key_next):
1041 break
1043 key_next, table_next = self._parse_table(full_key, table)
1045 table.raw_append(key_next, table_next)
1047 break
1048 else:
1049 raise self.parse_error(
1050 InternalParserError,
1051 "_parse_item() returned None on a non-bracket character.",
1052 )
1053 table.value._validate_out_of_order_table()
1054 if isinstance(result, Null):
1055 result = table
1057 if is_aot and (not self._aot_stack or full_key != self._aot_stack[-1]):
1058 result = self._parse_aot(result, full_key)
1060 return key, result
1062 def _peek_table(self) -> tuple[bool, Key]:
1063 """
1064 Peeks ahead non-intrusively by cloning then restoring the
1065 initial state of the parser.
1067 Returns the name of the table about to be parsed,
1068 as well as whether it is part of an AoT.
1069 """
1070 # we always want to restore after exiting this scope
1071 with self._state(save_marker=True, restore=True):
1072 if self._current != "[":
1073 raise self.parse_error(
1074 InternalParserError,
1075 "_peek_table() entered on non-bracket character",
1076 )
1078 # AoT
1079 self.inc()
1080 is_aot = False
1081 if self._current == "[":
1082 self.inc()
1083 is_aot = True
1084 try:
1085 return is_aot, self._parse_key()
1086 except EmptyKeyError:
1087 raise self.parse_error(EmptyTableNameError) from None
1089 def _parse_aot(self, first: Table, name_first: Key) -> AoT:
1090 """
1091 Parses all siblings of the provided table first and bundles them into
1092 an AoT.
1093 """
1094 payload = [first]
1095 self._aot_stack.append(name_first)
1096 while not self.end():
1097 is_aot_next, name_next = self._peek_table()
1098 if is_aot_next and name_next == name_first:
1099 _, table = self._parse_table(name_first)
1100 payload.append(table)
1101 else:
1102 break
1104 self._aot_stack.pop()
1106 return AoT(payload, parsed=True)
1108 def _peek(self, n: int) -> str:
1109 """
1110 Peeks ahead n characters.
1112 n is the max number of characters that will be peeked.
1113 """
1114 # we always want to restore after exiting this scope
1115 with self._state(restore=True):
1116 buf = ""
1117 for _ in range(n):
1118 if self._current not in " \t\n\r#,]}" + self._src.EOF:
1119 buf += self._current
1120 self.inc()
1121 continue
1123 break
1124 return buf
1126 def _peek_unicode(self, is_long: bool) -> tuple[str | None, str | None]:
1127 """
1128 Peeks ahead non-intrusively by cloning then restoring the
1129 initial state of the parser.
1131 Returns the unicode value is it's a valid one else None.
1132 """
1133 # we always want to restore after exiting this scope
1134 with self._state(save_marker=True, restore=True):
1135 if self._current not in {"u", "U"}:
1136 raise self.parse_error(
1137 InternalParserError, "_peek_unicode() entered on non-unicode value"
1138 )
1140 self.inc() # Dropping prefix
1141 self.mark()
1143 if is_long:
1144 chars = 8
1145 else:
1146 chars = 4
1148 if not self.inc_n(chars):
1149 value, extracted = None, None
1150 else:
1151 extracted = self.extract()
1153 if extracted[0].lower() == "d" and extracted[1].strip("01234567"):
1154 return None, None
1156 try:
1157 value = chr(int(extracted, 16))
1158 except (ValueError, OverflowError):
1159 value = None
1161 return value, extracted
1163 def _peek_hex(self) -> tuple[str | None, str | None]:
1164 with self._state(save_marker=True, restore=True):
1165 if self._current != "x":
1166 raise self.parse_error(
1167 InternalParserError, "_peek_hex() entered on non-hex value"
1168 )
1170 self.inc() # Dropping prefix
1171 self.mark()
1173 if not self.inc_n(2):
1174 return None, None
1176 extracted = self.extract()
1177 if extracted.strip("0123456789abcdefABCDEF"):
1178 return None, None
1180 try:
1181 value = chr(int(extracted, 16))
1182 except (ValueError, OverflowError):
1183 value = None
1185 return value, extracted