Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/tomli/_parser.py: 72%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# SPDX-License-Identifier: MIT
2# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
3# Licensed to PSF under a Contributor Agreement.
5from __future__ import annotations
7import sys
8from types import MappingProxyType
10from ._re import (
11 RE_DATETIME,
12 RE_LOCALTIME,
13 RE_NUMBER,
14 match_to_datetime,
15 match_to_localtime,
16 match_to_number,
17)
19TYPE_CHECKING = False
20if TYPE_CHECKING:
21 from collections.abc import Iterable
22 from typing import IO, Any, Final
24 from ._types import Key, ParseFloat, Pos
26# Inline tables/arrays are implemented using recursion. Pathologically
27# nested documents cause pure Python to raise RecursionError (which is OK),
28# but mypyc binary wheels will crash unrecoverably (not OK). According to
29# mypyc docs this will be fixed in the future:
30# https://mypyc.readthedocs.io/en/latest/differences_from_python.html#stack-overflows
31# Before mypyc's fix is in, recursion needs to be limited by this library.
32# Choosing `sys.getrecursionlimit()` as maximum inline table/array nesting
33# level, as it allows more nesting than pure Python, but still seems a far
34# lower number than where mypyc binaries crash.
35MAX_INLINE_NESTING: Final = sys.getrecursionlimit()
37ASCII_CTRL: Final = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
39# Neither of these sets include quotation mark or backslash. They are
40# currently handled as separate cases in the parser functions.
41ILLEGAL_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t")
42ILLEGAL_MULTILINE_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t\n")
44ILLEGAL_LITERAL_STR_CHARS: Final = ILLEGAL_BASIC_STR_CHARS
45ILLEGAL_MULTILINE_LITERAL_STR_CHARS: Final = ILLEGAL_MULTILINE_BASIC_STR_CHARS
47ILLEGAL_COMMENT_CHARS: Final = ILLEGAL_BASIC_STR_CHARS
49TOML_WS: Final = frozenset(" \t")
50TOML_WS_AND_NEWLINE: Final = TOML_WS | frozenset("\n")
51BARE_KEY_CHARS: Final = frozenset(
52 "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789" "-_"
53)
54KEY_INITIAL_CHARS: Final = BARE_KEY_CHARS | frozenset("\"'")
55HEXDIGIT_CHARS: Final = frozenset("abcdef" "ABCDEF" "0123456789")
57BASIC_STR_ESCAPE_REPLACEMENTS: Final = MappingProxyType(
58 {
59 "\\b": "\u0008", # backspace
60 "\\t": "\u0009", # tab
61 "\\n": "\u000a", # linefeed
62 "\\f": "\u000c", # form feed
63 "\\r": "\u000d", # carriage return
64 '\\"': "\u0022", # quote
65 "\\\\": "\u005c", # backslash
66 }
67)
70class DEPRECATED_DEFAULT:
71 """Sentinel to be used as default arg during deprecation
72 period of TOMLDecodeError's free-form arguments."""
75class TOMLDecodeError(ValueError):
76 """An error raised if a document is not valid TOML.
78 Adds the following attributes to ValueError:
79 msg: The unformatted error message
80 doc: The TOML document being parsed
81 pos: The index of doc where parsing failed
82 lineno: The line corresponding to pos
83 colno: The column corresponding to pos
84 """
86 def __init__(
87 self,
88 msg: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT,
89 doc: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT,
90 pos: Pos | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT,
91 *args: Any,
92 ):
93 if (
94 args
95 or not isinstance(msg, str)
96 or not isinstance(doc, str)
97 or not isinstance(pos, int)
98 ):
99 import warnings
101 warnings.warn(
102 "Free-form arguments for TOMLDecodeError are deprecated. "
103 "Please set 'msg' (str), 'doc' (str) and 'pos' (int) arguments only.",
104 DeprecationWarning,
105 stacklevel=2,
106 )
107 if pos is not DEPRECATED_DEFAULT:
108 args = pos, *args
109 if doc is not DEPRECATED_DEFAULT:
110 args = doc, *args
111 if msg is not DEPRECATED_DEFAULT:
112 args = msg, *args
113 ValueError.__init__(self, *args)
114 return
116 lineno = doc.count("\n", 0, pos) + 1
117 if lineno == 1:
118 colno = pos + 1
119 else:
120 colno = pos - doc.rindex("\n", 0, pos)
122 if pos >= len(doc):
123 coord_repr = "end of document"
124 else:
125 coord_repr = f"line {lineno}, column {colno}"
126 errmsg = f"{msg} (at {coord_repr})"
127 ValueError.__init__(self, errmsg)
129 self.msg = msg
130 self.doc = doc
131 self.pos = pos
132 self.lineno = lineno
133 self.colno = colno
136def load(__fp: IO[bytes], *, parse_float: ParseFloat = float) -> dict[str, Any]:
137 """Parse TOML from a binary file object."""
138 b = __fp.read()
139 try:
140 s = b.decode()
141 except AttributeError:
142 raise TypeError(
143 "File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`"
144 ) from None
145 return loads(s, parse_float=parse_float)
148def loads(__s: str, *, parse_float: ParseFloat = float) -> dict[str, Any]: # noqa: C901
149 """Parse TOML from a string."""
151 # The spec allows converting "\r\n" to "\n", even in string
152 # literals. Let's do so to simplify parsing.
153 try:
154 src = __s.replace("\r\n", "\n")
155 except (AttributeError, TypeError):
156 raise TypeError(
157 f"Expected str object, not '{type(__s).__qualname__}'"
158 ) from None
159 pos = 0
160 out = Output()
161 header: Key = ()
162 parse_float = make_safe_parse_float(parse_float)
164 # Parse one statement at a time
165 # (typically means one line in TOML source)
166 while True:
167 # 1. Skip line leading whitespace
168 pos = skip_chars(src, pos, TOML_WS)
170 # 2. Parse rules. Expect one of the following:
171 # - end of file
172 # - end of line
173 # - comment
174 # - key/value pair
175 # - append dict to list (and move to its namespace)
176 # - create dict (and move to its namespace)
177 # Skip trailing whitespace when applicable.
178 try:
179 char = src[pos]
180 except IndexError:
181 break
182 if char == "\n":
183 pos += 1
184 continue
185 if char in KEY_INITIAL_CHARS:
186 pos = key_value_rule(src, pos, out, header, parse_float)
187 pos = skip_chars(src, pos, TOML_WS)
188 elif char == "[":
189 try:
190 second_char: str | None = src[pos + 1]
191 except IndexError:
192 second_char = None
193 out.flags.finalize_pending()
194 if second_char == "[":
195 pos, header = create_list_rule(src, pos, out)
196 else:
197 pos, header = create_dict_rule(src, pos, out)
198 pos = skip_chars(src, pos, TOML_WS)
199 elif char != "#":
200 raise TOMLDecodeError("Invalid statement", src, pos)
202 # 3. Skip comment
203 pos = skip_comment(src, pos)
205 # 4. Expect end of line or end of file
206 try:
207 char = src[pos]
208 except IndexError:
209 break
210 if char != "\n":
211 raise TOMLDecodeError(
212 "Expected newline or end of document after a statement", src, pos
213 )
214 pos += 1
216 return out.data.dict
219class Flags:
220 """Flags that map to parsed keys/namespaces."""
222 # Marks an immutable namespace (inline array or inline table).
223 FROZEN: Final = 0
224 # Marks a nest that has been explicitly created and can no longer
225 # be opened using the "[table]" syntax.
226 EXPLICIT_NEST: Final = 1
228 def __init__(self) -> None:
229 self._flags: dict[str, dict[Any, Any]] = {}
230 self._pending_flags: set[tuple[Key, int]] = set()
232 def add_pending(self, key: Key, flag: int) -> None:
233 self._pending_flags.add((key, flag))
235 def finalize_pending(self) -> None:
236 for key, flag in self._pending_flags:
237 self.set(key, flag, recursive=False)
238 self._pending_flags.clear()
240 def unset_all(self, key: Key) -> None:
241 cont = self._flags
242 for k in key[:-1]:
243 if k not in cont:
244 return
245 cont = cont[k]["nested"]
246 cont.pop(key[-1], None)
248 def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003
249 cont = self._flags
250 key_parent, key_stem = key[:-1], key[-1]
251 for k in key_parent:
252 if k not in cont:
253 cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}}
254 cont = cont[k]["nested"]
255 if key_stem not in cont:
256 cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}}
257 cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag)
259 def is_(self, key: Key, flag: int) -> bool:
260 if not key:
261 return False # document root has no flags
262 cont = self._flags
263 for k in key[:-1]:
264 if k not in cont:
265 return False
266 inner_cont = cont[k]
267 if flag in inner_cont["recursive_flags"]:
268 return True
269 cont = inner_cont["nested"]
270 key_stem = key[-1]
271 if key_stem in cont:
272 inner_cont = cont[key_stem]
273 return flag in inner_cont["flags"] or flag in inner_cont["recursive_flags"]
274 return False
277class NestedDict:
278 def __init__(self) -> None:
279 # The parsed content of the TOML document
280 self.dict: dict[str, Any] = {}
282 def get_or_create_nest(
283 self,
284 key: Key,
285 *,
286 access_lists: bool = True,
287 ) -> dict[str, Any]:
288 cont: Any = self.dict
289 for k in key:
290 if k not in cont:
291 cont[k] = {}
292 cont = cont[k]
293 if access_lists and isinstance(cont, list):
294 cont = cont[-1]
295 if not isinstance(cont, dict):
296 raise KeyError("There is no nest behind this key")
297 return cont # type: ignore[no-any-return]
299 def append_nest_to_list(self, key: Key) -> None:
300 cont = self.get_or_create_nest(key[:-1])
301 last_key = key[-1]
302 if last_key in cont:
303 list_ = cont[last_key]
304 if not isinstance(list_, list):
305 raise KeyError("An object other than list found behind this key")
306 list_.append({})
307 else:
308 cont[last_key] = [{}]
311class Output:
312 def __init__(self) -> None:
313 self.data = NestedDict()
314 self.flags = Flags()
317def skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos:
318 try:
319 while src[pos] in chars:
320 pos += 1
321 except IndexError:
322 pass
323 return pos
326def skip_until(
327 src: str,
328 pos: Pos,
329 expect: str,
330 *,
331 error_on: frozenset[str],
332 error_on_eof: bool,
333) -> Pos:
334 try:
335 new_pos = src.index(expect, pos)
336 except ValueError:
337 new_pos = len(src)
338 if error_on_eof:
339 raise TOMLDecodeError(f"Expected {expect!r}", src, new_pos) from None
341 if not error_on.isdisjoint(src[pos:new_pos]):
342 while src[pos] not in error_on:
343 pos += 1
344 raise TOMLDecodeError(f"Found invalid character {src[pos]!r}", src, pos)
345 return new_pos
348def skip_comment(src: str, pos: Pos) -> Pos:
349 try:
350 char: str | None = src[pos]
351 except IndexError:
352 char = None
353 if char == "#":
354 return skip_until(
355 src, pos + 1, "\n", error_on=ILLEGAL_COMMENT_CHARS, error_on_eof=False
356 )
357 return pos
360def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos:
361 while True:
362 pos_before_skip = pos
363 pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
364 pos = skip_comment(src, pos)
365 if pos == pos_before_skip:
366 return pos
369def create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
370 pos += 1 # Skip "["
371 pos = skip_chars(src, pos, TOML_WS)
372 pos, key = parse_key(src, pos)
374 if out.flags.is_(key, Flags.EXPLICIT_NEST) or out.flags.is_(key, Flags.FROZEN):
375 raise TOMLDecodeError(f"Cannot declare {key} twice", src, pos)
376 out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
377 try:
378 out.data.get_or_create_nest(key)
379 except KeyError:
380 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
382 if not src.startswith("]", pos):
383 raise TOMLDecodeError(
384 "Expected ']' at the end of a table declaration", src, pos
385 )
386 return pos + 1, key
389def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
390 pos += 2 # Skip "[["
391 pos = skip_chars(src, pos, TOML_WS)
392 pos, key = parse_key(src, pos)
394 if out.flags.is_(key, Flags.FROZEN):
395 raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos)
396 # Free the namespace now that it points to another empty list item...
397 out.flags.unset_all(key)
398 # ...but this key precisely is still prohibited from table declaration
399 out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
400 try:
401 out.data.append_nest_to_list(key)
402 except KeyError:
403 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
405 if not src.startswith("]]", pos):
406 raise TOMLDecodeError(
407 "Expected ']]' at the end of an array declaration", src, pos
408 )
409 return pos + 2, key
412def key_value_rule(
413 src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat
414) -> Pos:
415 pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl=0)
416 key_parent, key_stem = key[:-1], key[-1]
417 abs_key_parent = header + key_parent
419 relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
420 for cont_key in relative_path_cont_keys:
421 # Check that dotted key syntax does not redefine an existing table
422 if out.flags.is_(cont_key, Flags.EXPLICIT_NEST):
423 raise TOMLDecodeError(f"Cannot redefine namespace {cont_key}", src, pos)
424 # Containers in the relative path can't be opened with the table syntax or
425 # dotted key/value syntax in following table sections.
426 out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST)
428 if out.flags.is_(abs_key_parent, Flags.FROZEN):
429 raise TOMLDecodeError(
430 f"Cannot mutate immutable namespace {abs_key_parent}", src, pos
431 )
433 try:
434 nest = out.data.get_or_create_nest(abs_key_parent)
435 except KeyError:
436 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
437 if key_stem in nest:
438 raise TOMLDecodeError("Cannot overwrite a value", src, pos)
439 # Mark inline table and array namespaces recursively immutable
440 if isinstance(value, (dict, list)):
441 out.flags.set(header + key, Flags.FROZEN, recursive=True)
442 nest[key_stem] = value
443 return pos
446def parse_key_value_pair(
447 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
448) -> tuple[Pos, Key, Any]:
449 pos, key = parse_key(src, pos)
450 try:
451 char: str | None = src[pos]
452 except IndexError:
453 char = None
454 if char != "=":
455 raise TOMLDecodeError("Expected '=' after a key in a key/value pair", src, pos)
456 pos += 1
457 pos = skip_chars(src, pos, TOML_WS)
458 pos, value = parse_value(src, pos, parse_float, nest_lvl)
459 return pos, key, value
462def parse_key(src: str, pos: Pos) -> tuple[Pos, Key]:
463 pos, key_part = parse_key_part(src, pos)
464 key: Key = (key_part,)
465 pos = skip_chars(src, pos, TOML_WS)
466 while True:
467 try:
468 char: str | None = src[pos]
469 except IndexError:
470 char = None
471 if char != ".":
472 return pos, key
473 pos += 1
474 pos = skip_chars(src, pos, TOML_WS)
475 pos, key_part = parse_key_part(src, pos)
476 key += (key_part,)
477 pos = skip_chars(src, pos, TOML_WS)
480def parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]:
481 try:
482 char: str | None = src[pos]
483 except IndexError:
484 char = None
485 if char in BARE_KEY_CHARS:
486 start_pos = pos
487 pos = skip_chars(src, pos, BARE_KEY_CHARS)
488 return pos, src[start_pos:pos]
489 if char == "'":
490 return parse_literal_str(src, pos)
491 if char == '"':
492 return parse_one_line_basic_str(src, pos)
493 raise TOMLDecodeError("Invalid initial character for a key part", src, pos)
496def parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]:
497 pos += 1
498 return parse_basic_str(src, pos, multiline=False)
501def parse_array(
502 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
503) -> tuple[Pos, list[Any]]:
504 pos += 1
505 array: list[Any] = []
507 pos = skip_comments_and_array_ws(src, pos)
508 if src.startswith("]", pos):
509 return pos + 1, array
510 while True:
511 pos, val = parse_value(src, pos, parse_float, nest_lvl)
512 array.append(val)
513 pos = skip_comments_and_array_ws(src, pos)
515 c = src[pos : pos + 1]
516 if c == "]":
517 return pos + 1, array
518 if c != ",":
519 raise TOMLDecodeError("Unclosed array", src, pos)
520 pos += 1
522 pos = skip_comments_and_array_ws(src, pos)
523 if src.startswith("]", pos):
524 return pos + 1, array
527def parse_inline_table(
528 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
529) -> tuple[Pos, dict[str, Any]]:
530 pos += 1
531 nested_dict = NestedDict()
532 flags = Flags()
534 pos = skip_chars(src, pos, TOML_WS)
535 if src.startswith("}", pos):
536 return pos + 1, nested_dict.dict
537 while True:
538 pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl)
539 key_parent, key_stem = key[:-1], key[-1]
540 if flags.is_(key, Flags.FROZEN):
541 raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos)
542 try:
543 nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
544 except KeyError:
545 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
546 if key_stem in nest:
547 raise TOMLDecodeError(f"Duplicate inline table key {key_stem!r}", src, pos)
548 nest[key_stem] = value
549 pos = skip_chars(src, pos, TOML_WS)
550 c = src[pos : pos + 1]
551 if c == "}":
552 return pos + 1, nested_dict.dict
553 if c != ",":
554 raise TOMLDecodeError("Unclosed inline table", src, pos)
555 if isinstance(value, (dict, list)):
556 flags.set(key, Flags.FROZEN, recursive=True)
557 pos += 1
558 pos = skip_chars(src, pos, TOML_WS)
561def parse_basic_str_escape(
562 src: str, pos: Pos, *, multiline: bool = False
563) -> tuple[Pos, str]:
564 escape_id = src[pos : pos + 2]
565 pos += 2
566 if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}:
567 # Skip whitespace until next non-whitespace character or end of
568 # the doc. Error if non-whitespace is found before newline.
569 if escape_id != "\\\n":
570 pos = skip_chars(src, pos, TOML_WS)
571 try:
572 char = src[pos]
573 except IndexError:
574 return pos, ""
575 if char != "\n":
576 raise TOMLDecodeError("Unescaped '\\' in a string", src, pos)
577 pos += 1
578 pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
579 return pos, ""
580 if escape_id == "\\u":
581 return parse_hex_char(src, pos, 4)
582 if escape_id == "\\U":
583 return parse_hex_char(src, pos, 8)
584 try:
585 return pos, BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
586 except KeyError:
587 raise TOMLDecodeError("Unescaped '\\' in a string", src, pos) from None
590def parse_basic_str_escape_multiline(src: str, pos: Pos) -> tuple[Pos, str]:
591 return parse_basic_str_escape(src, pos, multiline=True)
594def parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]:
595 hex_str = src[pos : pos + hex_len]
596 if len(hex_str) != hex_len or not HEXDIGIT_CHARS.issuperset(hex_str):
597 raise TOMLDecodeError("Invalid hex value", src, pos)
598 pos += hex_len
599 hex_int = int(hex_str, 16)
600 if not is_unicode_scalar_value(hex_int):
601 raise TOMLDecodeError(
602 "Escaped character is not a Unicode scalar value", src, pos
603 )
604 return pos, chr(hex_int)
607def parse_literal_str(src: str, pos: Pos) -> tuple[Pos, str]:
608 pos += 1 # Skip starting apostrophe
609 start_pos = pos
610 pos = skip_until(
611 src, pos, "'", error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True
612 )
613 return pos + 1, src[start_pos:pos] # Skip ending apostrophe
616def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> tuple[Pos, str]:
617 pos += 3
618 if src.startswith("\n", pos):
619 pos += 1
621 if literal:
622 delim = "'"
623 end_pos = skip_until(
624 src,
625 pos,
626 "'''",
627 error_on=ILLEGAL_MULTILINE_LITERAL_STR_CHARS,
628 error_on_eof=True,
629 )
630 result = src[pos:end_pos]
631 pos = end_pos + 3
632 else:
633 delim = '"'
634 pos, result = parse_basic_str(src, pos, multiline=True)
636 # Add at maximum two extra apostrophes/quotes if the end sequence
637 # is 4 or 5 chars long instead of just 3.
638 if not src.startswith(delim, pos):
639 return pos, result
640 pos += 1
641 if not src.startswith(delim, pos):
642 return pos, result + delim
643 pos += 1
644 return pos, result + (delim * 2)
647def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]:
648 if multiline:
649 error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS
650 parse_escapes = parse_basic_str_escape_multiline
651 else:
652 error_on = ILLEGAL_BASIC_STR_CHARS
653 parse_escapes = parse_basic_str_escape
654 result = ""
655 start_pos = pos
656 while True:
657 try:
658 char = src[pos]
659 except IndexError:
660 raise TOMLDecodeError("Unterminated string", src, pos) from None
661 if char == '"':
662 if not multiline:
663 return pos + 1, result + src[start_pos:pos]
664 if src.startswith('"""', pos):
665 return pos + 3, result + src[start_pos:pos]
666 pos += 1
667 continue
668 if char == "\\":
669 result += src[start_pos:pos]
670 pos, parsed_escape = parse_escapes(src, pos)
671 result += parsed_escape
672 start_pos = pos
673 continue
674 if char in error_on:
675 raise TOMLDecodeError(f"Illegal character {char!r}", src, pos)
676 pos += 1
679def parse_value( # noqa: C901
680 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
681) -> tuple[Pos, Any]:
682 if nest_lvl > MAX_INLINE_NESTING:
683 # Pure Python should have raised RecursionError already.
684 # This ensures mypyc binaries eventually do the same.
685 raise RecursionError( # pragma: no cover
686 "TOML inline arrays/tables are nested more than the allowed"
687 f" {MAX_INLINE_NESTING} levels"
688 )
690 try:
691 char: str | None = src[pos]
692 except IndexError:
693 char = None
695 # IMPORTANT: order conditions based on speed of checking and likelihood
697 # Basic strings
698 if char == '"':
699 if src.startswith('"""', pos):
700 return parse_multiline_str(src, pos, literal=False)
701 return parse_one_line_basic_str(src, pos)
703 # Literal strings
704 if char == "'":
705 if src.startswith("'''", pos):
706 return parse_multiline_str(src, pos, literal=True)
707 return parse_literal_str(src, pos)
709 # Booleans
710 if char == "t":
711 if src.startswith("true", pos):
712 return pos + 4, True
713 if char == "f":
714 if src.startswith("false", pos):
715 return pos + 5, False
717 # Arrays
718 if char == "[":
719 return parse_array(src, pos, parse_float, nest_lvl + 1)
721 # Inline tables
722 if char == "{":
723 return parse_inline_table(src, pos, parse_float, nest_lvl + 1)
725 # Dates and times
726 datetime_match = RE_DATETIME.match(src, pos)
727 if datetime_match:
728 try:
729 datetime_obj = match_to_datetime(datetime_match)
730 except ValueError as e:
731 raise TOMLDecodeError("Invalid date or datetime", src, pos) from e
732 return datetime_match.end(), datetime_obj
733 localtime_match = RE_LOCALTIME.match(src, pos)
734 if localtime_match:
735 return localtime_match.end(), match_to_localtime(localtime_match)
737 # Integers and "normal" floats.
738 # The regex will greedily match any type starting with a decimal
739 # char, so needs to be located after handling of dates and times.
740 number_match = RE_NUMBER.match(src, pos)
741 if number_match:
742 return number_match.end(), match_to_number(number_match, parse_float)
744 # Special floats
745 first_three = src[pos : pos + 3]
746 if first_three in {"inf", "nan"}:
747 return pos + 3, parse_float(first_three)
748 first_four = src[pos : pos + 4]
749 if first_four in {"-inf", "+inf", "-nan", "+nan"}:
750 return pos + 4, parse_float(first_four)
752 raise TOMLDecodeError("Invalid value", src, pos)
755def is_unicode_scalar_value(codepoint: int) -> bool:
756 return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)
759def make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat:
760 """A decorator to make `parse_float` safe.
762 `parse_float` must not return dicts or lists, because these types
763 would be mixed with parsed TOML tables and arrays, thus confusing
764 the parser. The returned decorated callable raises `ValueError`
765 instead of returning illegal types.
766 """
767 # The default `float` callable never returns illegal types. Optimize it.
768 if parse_float is float:
769 return float
771 def safe_parse_float(float_str: str) -> Any:
772 float_value = parse_float(float_str)
773 if isinstance(float_value, (dict, list)):
774 raise ValueError("parse_float must not return dicts or lists")
775 return float_value
777 return safe_parse_float