Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/tomli/_parser.py: 72%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# SPDX-License-Identifier: MIT
2# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
3# Licensed to PSF under a Contributor Agreement.
5from __future__ import annotations
7import sys
8from types import MappingProxyType
10from ._re import (
11 RE_DATETIME,
12 RE_LOCALTIME,
13 RE_NUMBER,
14 match_to_datetime,
15 match_to_localtime,
16 match_to_number,
17)
19TYPE_CHECKING = False
20if TYPE_CHECKING:
21 from collections.abc import Iterable
22 from typing import IO, Any, Final
24 from ._types import Key, ParseFloat, Pos
26# Inline tables/arrays are implemented using recursion. Pathologically
27# nested documents cause pure Python to raise RecursionError (which is OK),
28# but mypyc binary wheels will crash unrecoverably (not OK). According to
29# mypyc docs this will be fixed in the future:
30# https://mypyc.readthedocs.io/en/latest/differences_from_python.html#stack-overflows
31# Before mypyc's fix is in, recursion needs to be limited by this library.
32# Choosing `sys.getrecursionlimit()` as maximum inline table/array nesting
33# level, as it allows more nesting than pure Python, but still seems a far
34# lower number than where mypyc binaries crash.
35MAX_INLINE_NESTING: Final = sys.getrecursionlimit()
37ASCII_CTRL: Final = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
39# Neither of these sets include quotation mark or backslash. They are
40# currently handled as separate cases in the parser functions.
41ILLEGAL_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t")
42ILLEGAL_MULTILINE_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t\n")
44ILLEGAL_LITERAL_STR_CHARS: Final = ILLEGAL_BASIC_STR_CHARS
45ILLEGAL_MULTILINE_LITERAL_STR_CHARS: Final = ILLEGAL_MULTILINE_BASIC_STR_CHARS
47ILLEGAL_COMMENT_CHARS: Final = ILLEGAL_BASIC_STR_CHARS
49TOML_WS: Final = frozenset(" \t")
50TOML_WS_AND_NEWLINE: Final = TOML_WS | frozenset("\n")
51BARE_KEY_CHARS: Final = frozenset(
52 "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789" "-_"
53)
54KEY_INITIAL_CHARS: Final = BARE_KEY_CHARS | frozenset("\"'")
55HEXDIGIT_CHARS: Final = frozenset("abcdef" "ABCDEF" "0123456789")
57BASIC_STR_ESCAPE_REPLACEMENTS: Final = MappingProxyType(
58 {
59 "\\b": "\u0008", # backspace
60 "\\t": "\u0009", # tab
61 "\\n": "\u000a", # linefeed
62 "\\f": "\u000c", # form feed
63 "\\r": "\u000d", # carriage return
64 "\\e": "\u001b", # escape
65 '\\"': "\u0022", # quote
66 "\\\\": "\u005c", # backslash
67 }
68)
71class DEPRECATED_DEFAULT:
72 """Sentinel to be used as default arg during deprecation
73 period of TOMLDecodeError's free-form arguments."""
76class TOMLDecodeError(ValueError):
77 """An error raised if a document is not valid TOML.
79 Adds the following attributes to ValueError:
80 msg: The unformatted error message
81 doc: The TOML document being parsed
82 pos: The index of doc where parsing failed
83 lineno: The line corresponding to pos
84 colno: The column corresponding to pos
85 """
87 def __init__(
88 self,
89 msg: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT,
90 doc: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT,
91 pos: Pos | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT,
92 *args: Any,
93 ):
94 if (
95 args
96 or not isinstance(msg, str)
97 or not isinstance(doc, str)
98 or not isinstance(pos, int)
99 ):
100 import warnings
102 warnings.warn(
103 "Free-form arguments for TOMLDecodeError are deprecated. "
104 "Please set 'msg' (str), 'doc' (str) and 'pos' (int) arguments only.",
105 DeprecationWarning,
106 stacklevel=2,
107 )
108 if pos is not DEPRECATED_DEFAULT:
109 args = pos, *args
110 if doc is not DEPRECATED_DEFAULT:
111 args = doc, *args
112 if msg is not DEPRECATED_DEFAULT:
113 args = msg, *args
114 ValueError.__init__(self, *args)
115 return
117 lineno = doc.count("\n", 0, pos) + 1
118 if lineno == 1:
119 colno = pos + 1
120 else:
121 colno = pos - doc.rindex("\n", 0, pos)
123 if pos >= len(doc):
124 coord_repr = "end of document"
125 else:
126 coord_repr = f"line {lineno}, column {colno}"
127 errmsg = f"{msg} (at {coord_repr})"
128 ValueError.__init__(self, errmsg)
130 self.msg = msg
131 self.doc = doc
132 self.pos = pos
133 self.lineno = lineno
134 self.colno = colno
137def load(__fp: IO[bytes], *, parse_float: ParseFloat = float) -> dict[str, Any]:
138 """Parse TOML from a binary file object."""
139 b = __fp.read()
140 try:
141 s = b.decode()
142 except AttributeError:
143 raise TypeError(
144 "File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`"
145 ) from None
146 return loads(s, parse_float=parse_float)
149def loads(__s: str, *, parse_float: ParseFloat = float) -> dict[str, Any]:
150 """Parse TOML from a string."""
152 # The spec allows converting "\r\n" to "\n", even in string
153 # literals. Let's do so to simplify parsing.
154 try:
155 src = __s.replace("\r\n", "\n")
156 except (AttributeError, TypeError):
157 raise TypeError(
158 f"Expected str object, not '{type(__s).__qualname__}'"
159 ) from None
160 pos = 0
161 out = Output()
162 header: Key = ()
163 parse_float = make_safe_parse_float(parse_float)
165 # Parse one statement at a time
166 # (typically means one line in TOML source)
167 while True:
168 # 1. Skip line leading whitespace
169 pos = skip_chars(src, pos, TOML_WS)
171 # 2. Parse rules. Expect one of the following:
172 # - end of file
173 # - end of line
174 # - comment
175 # - key/value pair
176 # - append dict to list (and move to its namespace)
177 # - create dict (and move to its namespace)
178 # Skip trailing whitespace when applicable.
179 try:
180 char = src[pos]
181 except IndexError:
182 break
183 if char == "\n":
184 pos += 1
185 continue
186 if char in KEY_INITIAL_CHARS:
187 pos = key_value_rule(src, pos, out, header, parse_float)
188 pos = skip_chars(src, pos, TOML_WS)
189 elif char == "[":
190 try:
191 second_char: str | None = src[pos + 1]
192 except IndexError:
193 second_char = None
194 out.flags.finalize_pending()
195 if second_char == "[":
196 pos, header = create_list_rule(src, pos, out)
197 else:
198 pos, header = create_dict_rule(src, pos, out)
199 pos = skip_chars(src, pos, TOML_WS)
200 elif char != "#":
201 raise TOMLDecodeError("Invalid statement", src, pos)
203 # 3. Skip comment
204 pos = skip_comment(src, pos)
206 # 4. Expect end of line or end of file
207 try:
208 char = src[pos]
209 except IndexError:
210 break
211 if char != "\n":
212 raise TOMLDecodeError(
213 "Expected newline or end of document after a statement", src, pos
214 )
215 pos += 1
217 return out.data.dict
220class Flags:
221 """Flags that map to parsed keys/namespaces."""
223 # Marks an immutable namespace (inline array or inline table).
224 FROZEN: Final = 0
225 # Marks a nest that has been explicitly created and can no longer
226 # be opened using the "[table]" syntax.
227 EXPLICIT_NEST: Final = 1
229 def __init__(self) -> None:
230 self._flags: dict[str, dict[Any, Any]] = {}
231 self._pending_flags: set[tuple[Key, int]] = set()
233 def add_pending(self, key: Key, flag: int) -> None:
234 self._pending_flags.add((key, flag))
236 def finalize_pending(self) -> None:
237 for key, flag in self._pending_flags:
238 self.set(key, flag, recursive=False)
239 self._pending_flags.clear()
241 def unset_all(self, key: Key) -> None:
242 cont = self._flags
243 for k in key[:-1]:
244 if k not in cont:
245 return
246 cont = cont[k]["nested"]
247 cont.pop(key[-1], None)
249 def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003
250 cont = self._flags
251 key_parent, key_stem = key[:-1], key[-1]
252 for k in key_parent:
253 if k not in cont:
254 cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}}
255 cont = cont[k]["nested"]
256 if key_stem not in cont:
257 cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}}
258 cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag)
260 def is_(self, key: Key, flag: int) -> bool:
261 if not key:
262 return False # document root has no flags
263 cont = self._flags
264 for k in key[:-1]:
265 if k not in cont:
266 return False
267 inner_cont = cont[k]
268 if flag in inner_cont["recursive_flags"]:
269 return True
270 cont = inner_cont["nested"]
271 key_stem = key[-1]
272 if key_stem in cont:
273 inner_cont = cont[key_stem]
274 return flag in inner_cont["flags"] or flag in inner_cont["recursive_flags"]
275 return False
278class NestedDict:
279 def __init__(self) -> None:
280 # The parsed content of the TOML document
281 self.dict: dict[str, Any] = {}
283 def get_or_create_nest(
284 self,
285 key: Key,
286 *,
287 access_lists: bool = True,
288 ) -> dict[str, Any]:
289 cont: Any = self.dict
290 for k in key:
291 if k not in cont:
292 cont[k] = {}
293 cont = cont[k]
294 if access_lists and isinstance(cont, list):
295 cont = cont[-1]
296 if not isinstance(cont, dict):
297 raise KeyError("There is no nest behind this key")
298 return cont # type: ignore[no-any-return]
300 def append_nest_to_list(self, key: Key) -> None:
301 cont = self.get_or_create_nest(key[:-1])
302 last_key = key[-1]
303 if last_key in cont:
304 list_ = cont[last_key]
305 if not isinstance(list_, list):
306 raise KeyError("An object other than list found behind this key")
307 list_.append({})
308 else:
309 cont[last_key] = [{}]
312class Output:
313 def __init__(self) -> None:
314 self.data = NestedDict()
315 self.flags = Flags()
318def skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos:
319 try:
320 while src[pos] in chars:
321 pos += 1
322 except IndexError:
323 pass
324 return pos
327def skip_until(
328 src: str,
329 pos: Pos,
330 expect: str,
331 *,
332 error_on: frozenset[str],
333 error_on_eof: bool,
334) -> Pos:
335 try:
336 new_pos = src.index(expect, pos)
337 except ValueError:
338 new_pos = len(src)
339 if error_on_eof:
340 raise TOMLDecodeError(f"Expected {expect!r}", src, new_pos) from None
342 if not error_on.isdisjoint(src[pos:new_pos]):
343 while src[pos] not in error_on:
344 pos += 1
345 raise TOMLDecodeError(f"Found invalid character {src[pos]!r}", src, pos)
346 return new_pos
349def skip_comment(src: str, pos: Pos) -> Pos:
350 try:
351 char: str | None = src[pos]
352 except IndexError:
353 char = None
354 if char == "#":
355 return skip_until(
356 src, pos + 1, "\n", error_on=ILLEGAL_COMMENT_CHARS, error_on_eof=False
357 )
358 return pos
361def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos:
362 while True:
363 pos_before_skip = pos
364 pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
365 pos = skip_comment(src, pos)
366 if pos == pos_before_skip:
367 return pos
370def create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
371 pos += 1 # Skip "["
372 pos = skip_chars(src, pos, TOML_WS)
373 pos, key = parse_key(src, pos)
375 if out.flags.is_(key, Flags.EXPLICIT_NEST) or out.flags.is_(key, Flags.FROZEN):
376 raise TOMLDecodeError(f"Cannot declare {key} twice", src, pos)
377 out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
378 try:
379 out.data.get_or_create_nest(key)
380 except KeyError:
381 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
383 if not src.startswith("]", pos):
384 raise TOMLDecodeError(
385 "Expected ']' at the end of a table declaration", src, pos
386 )
387 return pos + 1, key
390def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
391 pos += 2 # Skip "[["
392 pos = skip_chars(src, pos, TOML_WS)
393 pos, key = parse_key(src, pos)
395 if out.flags.is_(key, Flags.FROZEN):
396 raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos)
397 # Free the namespace now that it points to another empty list item...
398 out.flags.unset_all(key)
399 # ...but this key precisely is still prohibited from table declaration
400 out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
401 try:
402 out.data.append_nest_to_list(key)
403 except KeyError:
404 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
406 if not src.startswith("]]", pos):
407 raise TOMLDecodeError(
408 "Expected ']]' at the end of an array declaration", src, pos
409 )
410 return pos + 2, key
413def key_value_rule(
414 src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat
415) -> Pos:
416 pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl=0)
417 key_parent, key_stem = key[:-1], key[-1]
418 abs_key_parent = header + key_parent
420 relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
421 for cont_key in relative_path_cont_keys:
422 # Check that dotted key syntax does not redefine an existing table
423 if out.flags.is_(cont_key, Flags.EXPLICIT_NEST):
424 raise TOMLDecodeError(f"Cannot redefine namespace {cont_key}", src, pos)
425 # Containers in the relative path can't be opened with the table syntax or
426 # dotted key/value syntax in following table sections.
427 out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST)
429 if out.flags.is_(abs_key_parent, Flags.FROZEN):
430 raise TOMLDecodeError(
431 f"Cannot mutate immutable namespace {abs_key_parent}", src, pos
432 )
434 try:
435 nest = out.data.get_or_create_nest(abs_key_parent)
436 except KeyError:
437 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
438 if key_stem in nest:
439 raise TOMLDecodeError("Cannot overwrite a value", src, pos)
440 # Mark inline table and array namespaces recursively immutable
441 if isinstance(value, (dict, list)):
442 out.flags.set(header + key, Flags.FROZEN, recursive=True)
443 nest[key_stem] = value
444 return pos
447def parse_key_value_pair(
448 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
449) -> tuple[Pos, Key, Any]:
450 pos, key = parse_key(src, pos)
451 try:
452 char: str | None = src[pos]
453 except IndexError:
454 char = None
455 if char != "=":
456 raise TOMLDecodeError("Expected '=' after a key in a key/value pair", src, pos)
457 pos += 1
458 pos = skip_chars(src, pos, TOML_WS)
459 pos, value = parse_value(src, pos, parse_float, nest_lvl)
460 return pos, key, value
463def parse_key(src: str, pos: Pos) -> tuple[Pos, Key]:
464 pos, key_part = parse_key_part(src, pos)
465 key: Key = (key_part,)
466 pos = skip_chars(src, pos, TOML_WS)
467 while True:
468 try:
469 char: str | None = src[pos]
470 except IndexError:
471 char = None
472 if char != ".":
473 return pos, key
474 pos += 1
475 pos = skip_chars(src, pos, TOML_WS)
476 pos, key_part = parse_key_part(src, pos)
477 key += (key_part,)
478 pos = skip_chars(src, pos, TOML_WS)
481def parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]:
482 try:
483 char: str | None = src[pos]
484 except IndexError:
485 char = None
486 if char in BARE_KEY_CHARS:
487 start_pos = pos
488 pos = skip_chars(src, pos, BARE_KEY_CHARS)
489 return pos, src[start_pos:pos]
490 if char == "'":
491 return parse_literal_str(src, pos)
492 if char == '"':
493 return parse_one_line_basic_str(src, pos)
494 raise TOMLDecodeError("Invalid initial character for a key part", src, pos)
497def parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]:
498 pos += 1
499 return parse_basic_str(src, pos, multiline=False)
502def parse_array(
503 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
504) -> tuple[Pos, list[Any]]:
505 pos += 1
506 array: list[Any] = []
508 pos = skip_comments_and_array_ws(src, pos)
509 if src.startswith("]", pos):
510 return pos + 1, array
511 while True:
512 pos, val = parse_value(src, pos, parse_float, nest_lvl)
513 array.append(val)
514 pos = skip_comments_and_array_ws(src, pos)
516 c = src[pos : pos + 1]
517 if c == "]":
518 return pos + 1, array
519 if c != ",":
520 raise TOMLDecodeError("Unclosed array", src, pos)
521 pos += 1
523 pos = skip_comments_and_array_ws(src, pos)
524 if src.startswith("]", pos):
525 return pos + 1, array
528def parse_inline_table(
529 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
530) -> tuple[Pos, dict[str, Any]]:
531 pos += 1
532 nested_dict = NestedDict()
533 flags = Flags()
535 pos = skip_chars(src, pos, TOML_WS)
536 if src.startswith("}", pos):
537 return pos + 1, nested_dict.dict
538 while True:
539 pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl)
540 key_parent, key_stem = key[:-1], key[-1]
541 if flags.is_(key, Flags.FROZEN):
542 raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos)
543 try:
544 nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
545 except KeyError:
546 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
547 if key_stem in nest:
548 raise TOMLDecodeError(f"Duplicate inline table key {key_stem!r}", src, pos)
549 nest[key_stem] = value
550 pos = skip_chars(src, pos, TOML_WS)
551 c = src[pos : pos + 1]
552 if c == "}":
553 return pos + 1, nested_dict.dict
554 if c != ",":
555 raise TOMLDecodeError("Unclosed inline table", src, pos)
556 if isinstance(value, (dict, list)):
557 flags.set(key, Flags.FROZEN, recursive=True)
558 pos += 1
559 pos = skip_chars(src, pos, TOML_WS)
562def parse_basic_str_escape(
563 src: str, pos: Pos, *, multiline: bool = False
564) -> tuple[Pos, str]:
565 escape_id = src[pos : pos + 2]
566 pos += 2
567 if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}:
568 # Skip whitespace until next non-whitespace character or end of
569 # the doc. Error if non-whitespace is found before newline.
570 if escape_id != "\\\n":
571 pos = skip_chars(src, pos, TOML_WS)
572 try:
573 char = src[pos]
574 except IndexError:
575 return pos, ""
576 if char != "\n":
577 raise TOMLDecodeError("Unescaped '\\' in a string", src, pos)
578 pos += 1
579 pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
580 return pos, ""
581 if escape_id == "\\u":
582 return parse_hex_char(src, pos, 4)
583 if escape_id == "\\U":
584 return parse_hex_char(src, pos, 8)
585 try:
586 return pos, BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
587 except KeyError:
588 raise TOMLDecodeError("Unescaped '\\' in a string", src, pos) from None
591def parse_basic_str_escape_multiline(src: str, pos: Pos) -> tuple[Pos, str]:
592 return parse_basic_str_escape(src, pos, multiline=True)
595def parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]:
596 hex_str = src[pos : pos + hex_len]
597 if len(hex_str) != hex_len or not HEXDIGIT_CHARS.issuperset(hex_str):
598 raise TOMLDecodeError("Invalid hex value", src, pos)
599 pos += hex_len
600 hex_int = int(hex_str, 16)
601 if not is_unicode_scalar_value(hex_int):
602 raise TOMLDecodeError(
603 "Escaped character is not a Unicode scalar value", src, pos
604 )
605 return pos, chr(hex_int)
608def parse_literal_str(src: str, pos: Pos) -> tuple[Pos, str]:
609 pos += 1 # Skip starting apostrophe
610 start_pos = pos
611 pos = skip_until(
612 src, pos, "'", error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True
613 )
614 return pos + 1, src[start_pos:pos] # Skip ending apostrophe
617def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> tuple[Pos, str]:
618 pos += 3
619 if src.startswith("\n", pos):
620 pos += 1
622 if literal:
623 delim = "'"
624 end_pos = skip_until(
625 src,
626 pos,
627 "'''",
628 error_on=ILLEGAL_MULTILINE_LITERAL_STR_CHARS,
629 error_on_eof=True,
630 )
631 result = src[pos:end_pos]
632 pos = end_pos + 3
633 else:
634 delim = '"'
635 pos, result = parse_basic_str(src, pos, multiline=True)
637 # Add at maximum two extra apostrophes/quotes if the end sequence
638 # is 4 or 5 chars long instead of just 3.
639 if not src.startswith(delim, pos):
640 return pos, result
641 pos += 1
642 if not src.startswith(delim, pos):
643 return pos, result + delim
644 pos += 1
645 return pos, result + (delim * 2)
648def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]:
649 if multiline:
650 error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS
651 parse_escapes = parse_basic_str_escape_multiline
652 else:
653 error_on = ILLEGAL_BASIC_STR_CHARS
654 parse_escapes = parse_basic_str_escape
655 result = ""
656 start_pos = pos
657 while True:
658 try:
659 char = src[pos]
660 except IndexError:
661 raise TOMLDecodeError("Unterminated string", src, pos) from None
662 if char == '"':
663 if not multiline:
664 return pos + 1, result + src[start_pos:pos]
665 if src.startswith('"""', pos):
666 return pos + 3, result + src[start_pos:pos]
667 pos += 1
668 continue
669 if char == "\\":
670 result += src[start_pos:pos]
671 pos, parsed_escape = parse_escapes(src, pos)
672 result += parsed_escape
673 start_pos = pos
674 continue
675 if char in error_on:
676 raise TOMLDecodeError(f"Illegal character {char!r}", src, pos)
677 pos += 1
680def parse_value(
681 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
682) -> tuple[Pos, Any]:
683 if nest_lvl > MAX_INLINE_NESTING:
684 # Pure Python should have raised RecursionError already.
685 # This ensures mypyc binaries eventually do the same.
686 raise RecursionError( # pragma: no cover
687 "TOML inline arrays/tables are nested more than the allowed"
688 f" {MAX_INLINE_NESTING} levels"
689 )
691 try:
692 char: str | None = src[pos]
693 except IndexError:
694 char = None
696 # IMPORTANT: order conditions based on speed of checking and likelihood
698 # Basic strings
699 if char == '"':
700 if src.startswith('"""', pos):
701 return parse_multiline_str(src, pos, literal=False)
702 return parse_one_line_basic_str(src, pos)
704 # Literal strings
705 if char == "'":
706 if src.startswith("'''", pos):
707 return parse_multiline_str(src, pos, literal=True)
708 return parse_literal_str(src, pos)
710 # Booleans
711 if char == "t":
712 if src.startswith("true", pos):
713 return pos + 4, True
714 if char == "f":
715 if src.startswith("false", pos):
716 return pos + 5, False
718 # Arrays
719 if char == "[":
720 return parse_array(src, pos, parse_float, nest_lvl + 1)
722 # Inline tables
723 if char == "{":
724 return parse_inline_table(src, pos, parse_float, nest_lvl + 1)
726 # Dates and times
727 datetime_match = RE_DATETIME.match(src, pos)
728 if datetime_match:
729 try:
730 datetime_obj = match_to_datetime(datetime_match)
731 except ValueError as e:
732 raise TOMLDecodeError("Invalid date or datetime", src, pos) from e
733 return datetime_match.end(), datetime_obj
734 localtime_match = RE_LOCALTIME.match(src, pos)
735 if localtime_match:
736 return localtime_match.end(), match_to_localtime(localtime_match)
738 # Integers and "normal" floats.
739 # The regex will greedily match any type starting with a decimal
740 # char, so needs to be located after handling of dates and times.
741 number_match = RE_NUMBER.match(src, pos)
742 if number_match:
743 return number_match.end(), match_to_number(number_match, parse_float)
745 # Special floats
746 first_three = src[pos : pos + 3]
747 if first_three in {"inf", "nan"}:
748 return pos + 3, parse_float(first_three)
749 first_four = src[pos : pos + 4]
750 if first_four in {"-inf", "+inf", "-nan", "+nan"}:
751 return pos + 4, parse_float(first_four)
753 raise TOMLDecodeError("Invalid value", src, pos)
756def is_unicode_scalar_value(codepoint: int) -> bool:
757 return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)
760def make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat:
761 """A decorator to make `parse_float` safe.
763 `parse_float` must not return dicts or lists, because these types
764 would be mixed with parsed TOML tables and arrays, thus confusing
765 the parser. The returned decorated callable raises `ValueError`
766 instead of returning illegal types.
767 """
768 # The default `float` callable never returns illegal types. Optimize it.
769 if parse_float is float:
770 return float
772 def safe_parse_float(float_str: str) -> Any:
773 float_value = parse_float(float_str)
774 if isinstance(float_value, (dict, list)):
775 raise ValueError("parse_float must not return dicts or lists")
776 return float_value
778 return safe_parse_float