Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/tomli/_parser.py: 72%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# SPDX-License-Identifier: MIT
2# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
3# Licensed to PSF under a Contributor Agreement.
5from __future__ import annotations
7import sys
8from types import MappingProxyType
10from ._re import (
11 RE_DATETIME,
12 RE_LOCALTIME,
13 RE_NUMBER,
14 match_to_datetime,
15 match_to_localtime,
16 match_to_number,
17)
19TYPE_CHECKING = False
20if TYPE_CHECKING:
21 from collections.abc import Iterable
22 from typing import IO, Any, Final
24 from ._types import Key, ParseFloat, Pos
26# Inline tables/arrays are implemented using recursion. Pathologically
27# nested documents cause pure Python to raise RecursionError (which is OK),
28# but mypyc binary wheels will crash unrecoverably (not OK). According to
29# mypyc docs this will be fixed in the future:
30# https://mypyc.readthedocs.io/en/latest/differences_from_python.html#stack-overflows
31# Before mypyc's fix is in, recursion needs to be limited by this library.
32# Choosing `sys.getrecursionlimit()` as maximum inline table/array nesting
33# level, as it allows more nesting than pure Python, but still seems a far
34# lower number than where mypyc binaries crash.
35MAX_INLINE_NESTING: Final = sys.getrecursionlimit()
37# Pathologically excessive number of parts in a key runs into quadratic
38# behavior (e.g. in Flags.is_).
39# Even if keys aren't currently parsed using recursion, they name a
40# recursive structure, so it makes sense to limit it using getrecursionlimit()
41# and RecursionError.
42MAX_KEY_PARTS: Final = sys.getrecursionlimit()
44ASCII_CTRL: Final = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
46# Neither of these sets include quotation mark or backslash. They are
47# currently handled as separate cases in the parser functions.
48ILLEGAL_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t")
49ILLEGAL_MULTILINE_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t\n")
51ILLEGAL_LITERAL_STR_CHARS: Final = ILLEGAL_BASIC_STR_CHARS
52ILLEGAL_MULTILINE_LITERAL_STR_CHARS: Final = ILLEGAL_MULTILINE_BASIC_STR_CHARS
54ILLEGAL_COMMENT_CHARS: Final = ILLEGAL_BASIC_STR_CHARS
56TOML_WS: Final = frozenset(" \t")
57TOML_WS_AND_NEWLINE: Final = TOML_WS | frozenset("\n")
58BARE_KEY_CHARS: Final = frozenset(
59 "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789" "-_"
60)
61KEY_INITIAL_CHARS: Final = BARE_KEY_CHARS | frozenset("\"'")
62HEXDIGIT_CHARS: Final = frozenset("abcdef" "ABCDEF" "0123456789")
64BASIC_STR_ESCAPE_REPLACEMENTS: Final = MappingProxyType(
65 {
66 "\\b": "\u0008", # backspace
67 "\\t": "\u0009", # tab
68 "\\n": "\u000a", # linefeed
69 "\\f": "\u000c", # form feed
70 "\\r": "\u000d", # carriage return
71 "\\e": "\u001b", # escape
72 '\\"': "\u0022", # quote
73 "\\\\": "\u005c", # backslash
74 }
75)
78class DEPRECATED_DEFAULT:
79 """Sentinel to be used as default arg during deprecation
80 period of TOMLDecodeError's free-form arguments."""
83class TOMLDecodeError(ValueError):
84 """An error raised if a document is not valid TOML.
86 Adds the following attributes to ValueError:
87 msg: The unformatted error message
88 doc: The TOML document being parsed
89 pos: The index of doc where parsing failed
90 lineno: The line corresponding to pos
91 colno: The column corresponding to pos
92 """
94 def __init__(
95 self,
96 msg: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT,
97 doc: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT,
98 pos: Pos | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT,
99 *args: Any,
100 ):
101 if (
102 args
103 or not isinstance(msg, str)
104 or not isinstance(doc, str)
105 or not isinstance(pos, int)
106 ):
107 import warnings
109 warnings.warn(
110 "Free-form arguments for TOMLDecodeError are deprecated. "
111 "Please set 'msg' (str), 'doc' (str) and 'pos' (int) arguments only.",
112 DeprecationWarning,
113 stacklevel=2,
114 )
115 if pos is not DEPRECATED_DEFAULT:
116 args = pos, *args
117 if doc is not DEPRECATED_DEFAULT:
118 args = doc, *args
119 if msg is not DEPRECATED_DEFAULT:
120 args = msg, *args
121 ValueError.__init__(self, *args)
122 return
124 lineno = doc.count("\n", 0, pos) + 1
125 if lineno == 1:
126 colno = pos + 1
127 else:
128 colno = pos - doc.rindex("\n", 0, pos)
130 if pos >= len(doc):
131 coord_repr = "end of document"
132 else:
133 coord_repr = f"line {lineno}, column {colno}"
134 errmsg = f"{msg} (at {coord_repr})"
135 ValueError.__init__(self, errmsg)
137 self.msg = msg
138 self.doc = doc
139 self.pos = pos
140 self.lineno = lineno
141 self.colno = colno
144def load(__fp: IO[bytes], *, parse_float: ParseFloat = float) -> dict[str, Any]:
145 """Parse TOML from a binary file object."""
146 b = __fp.read()
147 try:
148 s = b.decode()
149 except AttributeError:
150 raise TypeError(
151 "File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`"
152 ) from None
153 return loads(s, parse_float=parse_float)
156def loads(__s: str, *, parse_float: ParseFloat = float) -> dict[str, Any]:
157 """Parse TOML from a string."""
159 # The spec allows converting "\r\n" to "\n", even in string
160 # literals. Let's do so to simplify parsing.
161 try:
162 src = __s.replace("\r\n", "\n")
163 except (AttributeError, TypeError):
164 raise TypeError(
165 f"Expected str object, not '{type(__s).__qualname__}'"
166 ) from None
167 pos = 0
168 out = Output()
169 header: Key = ()
170 parse_float = make_safe_parse_float(parse_float)
172 # Parse one statement at a time
173 # (typically means one line in TOML source)
174 while True:
175 # 1. Skip line leading whitespace
176 pos = skip_chars(src, pos, TOML_WS)
178 # 2. Parse rules. Expect one of the following:
179 # - end of file
180 # - end of line
181 # - comment
182 # - key/value pair
183 # - append dict to list (and move to its namespace)
184 # - create dict (and move to its namespace)
185 # Skip trailing whitespace when applicable.
186 try:
187 char = src[pos]
188 except IndexError:
189 break
190 if char == "\n":
191 pos += 1
192 continue
193 if char in KEY_INITIAL_CHARS:
194 pos = key_value_rule(src, pos, out, header, parse_float)
195 pos = skip_chars(src, pos, TOML_WS)
196 elif char == "[":
197 try:
198 second_char: str | None = src[pos + 1]
199 except IndexError:
200 second_char = None
201 out.flags.finalize_pending()
202 if second_char == "[":
203 pos, header = create_list_rule(src, pos, out)
204 else:
205 pos, header = create_dict_rule(src, pos, out)
206 pos = skip_chars(src, pos, TOML_WS)
207 elif char != "#":
208 raise TOMLDecodeError("Invalid statement", src, pos)
210 # 3. Skip comment
211 pos = skip_comment(src, pos)
213 # 4. Expect end of line or end of file
214 try:
215 char = src[pos]
216 except IndexError:
217 break
218 if char != "\n":
219 raise TOMLDecodeError(
220 "Expected newline or end of document after a statement", src, pos
221 )
222 pos += 1
224 return out.data.dict
227class Flags:
228 """Flags that map to parsed keys/namespaces."""
230 # Marks an immutable namespace (inline array or inline table).
231 FROZEN: Final = 0
232 # Marks a nest that has been explicitly created and can no longer
233 # be opened using the "[table]" syntax.
234 EXPLICIT_NEST: Final = 1
236 def __init__(self) -> None:
237 self._flags: dict[str, dict[Any, Any]] = {}
238 self._pending_flags: set[tuple[Key, int]] = set()
240 def add_pending(self, key: Key, flag: int) -> None:
241 self._pending_flags.add((key, flag))
243 def finalize_pending(self) -> None:
244 for key, flag in self._pending_flags:
245 self.set(key, flag, recursive=False)
246 self._pending_flags.clear()
248 def unset_all(self, key: Key) -> None:
249 cont = self._flags
250 for k in key[:-1]:
251 if k not in cont:
252 return
253 cont = cont[k]["nested"]
254 cont.pop(key[-1], None)
256 def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003
257 cont = self._flags
258 key_parent, key_stem = key[:-1], key[-1]
259 for k in key_parent:
260 if k not in cont:
261 cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}}
262 cont = cont[k]["nested"]
263 if key_stem not in cont:
264 cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}}
265 cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag)
267 def is_(self, key: Key, flag: int) -> bool:
268 if not key:
269 return False # document root has no flags
270 cont = self._flags
271 for k in key[:-1]:
272 if k not in cont:
273 return False
274 inner_cont = cont[k]
275 if flag in inner_cont["recursive_flags"]:
276 return True
277 cont = inner_cont["nested"]
278 key_stem = key[-1]
279 if key_stem in cont:
280 inner_cont = cont[key_stem]
281 return flag in inner_cont["flags"] or flag in inner_cont["recursive_flags"]
282 return False
285class NestedDict:
286 def __init__(self) -> None:
287 # The parsed content of the TOML document
288 self.dict: dict[str, Any] = {}
290 def get_or_create_nest(
291 self,
292 key: Key,
293 *,
294 access_lists: bool = True,
295 ) -> dict[str, Any]:
296 cont: Any = self.dict
297 for k in key:
298 if k not in cont:
299 cont[k] = {}
300 cont = cont[k]
301 if access_lists and isinstance(cont, list):
302 cont = cont[-1]
303 if not isinstance(cont, dict):
304 raise KeyError("There is no nest behind this key")
305 return cont # type: ignore[no-any-return]
307 def append_nest_to_list(self, key: Key) -> None:
308 cont = self.get_or_create_nest(key[:-1])
309 last_key = key[-1]
310 if last_key in cont:
311 list_ = cont[last_key]
312 if not isinstance(list_, list):
313 raise KeyError("An object other than list found behind this key")
314 list_.append({})
315 else:
316 cont[last_key] = [{}]
319class Output:
320 def __init__(self) -> None:
321 self.data = NestedDict()
322 self.flags = Flags()
325def skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos:
326 try:
327 while src[pos] in chars:
328 pos += 1
329 except IndexError:
330 pass
331 return pos
334def skip_until(
335 src: str,
336 pos: Pos,
337 expect: str,
338 *,
339 error_on: frozenset[str],
340 error_on_eof: bool,
341) -> Pos:
342 try:
343 new_pos = src.index(expect, pos)
344 except ValueError:
345 new_pos = len(src)
346 if error_on_eof:
347 raise TOMLDecodeError(f"Expected {expect!r}", src, new_pos) from None
349 if not error_on.isdisjoint(src[pos:new_pos]):
350 while src[pos] not in error_on:
351 pos += 1
352 raise TOMLDecodeError(f"Found invalid character {src[pos]!r}", src, pos)
353 return new_pos
356def skip_comment(src: str, pos: Pos) -> Pos:
357 try:
358 char: str | None = src[pos]
359 except IndexError:
360 char = None
361 if char == "#":
362 return skip_until(
363 src, pos + 1, "\n", error_on=ILLEGAL_COMMENT_CHARS, error_on_eof=False
364 )
365 return pos
368def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos:
369 while True:
370 pos_before_skip = pos
371 pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
372 pos = skip_comment(src, pos)
373 if pos == pos_before_skip:
374 return pos
377def create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
378 pos += 1 # Skip "["
379 pos = skip_chars(src, pos, TOML_WS)
380 pos, key = parse_key(src, pos)
382 if out.flags.is_(key, Flags.EXPLICIT_NEST) or out.flags.is_(key, Flags.FROZEN):
383 raise TOMLDecodeError(f"Cannot declare {key} twice", src, pos)
384 out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
385 try:
386 out.data.get_or_create_nest(key)
387 except KeyError:
388 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
390 if not src.startswith("]", pos):
391 raise TOMLDecodeError(
392 "Expected ']' at the end of a table declaration", src, pos
393 )
394 return pos + 1, key
397def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
398 pos += 2 # Skip "[["
399 pos = skip_chars(src, pos, TOML_WS)
400 pos, key = parse_key(src, pos)
402 if out.flags.is_(key, Flags.FROZEN):
403 raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos)
404 # Free the namespace now that it points to another empty list item...
405 out.flags.unset_all(key)
406 # ...but this key precisely is still prohibited from table declaration
407 out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
408 try:
409 out.data.append_nest_to_list(key)
410 except KeyError:
411 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
413 if not src.startswith("]]", pos):
414 raise TOMLDecodeError(
415 "Expected ']]' at the end of an array declaration", src, pos
416 )
417 return pos + 2, key
420def key_value_rule(
421 src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat
422) -> Pos:
423 pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl=0)
424 key_parent, key_stem = key[:-1], key[-1]
425 abs_key_parent = header + key_parent
427 relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
428 for cont_key in relative_path_cont_keys:
429 # Check that dotted key syntax does not redefine an existing table
430 if out.flags.is_(cont_key, Flags.EXPLICIT_NEST):
431 raise TOMLDecodeError(f"Cannot redefine namespace {cont_key}", src, pos)
432 # Containers in the relative path can't be opened with the table syntax or
433 # dotted key/value syntax in following table sections.
434 out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST)
436 if out.flags.is_(abs_key_parent, Flags.FROZEN):
437 raise TOMLDecodeError(
438 f"Cannot mutate immutable namespace {abs_key_parent}", src, pos
439 )
441 try:
442 nest = out.data.get_or_create_nest(abs_key_parent)
443 except KeyError:
444 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
445 if key_stem in nest:
446 raise TOMLDecodeError("Cannot overwrite a value", src, pos)
447 # Mark inline table and array namespaces recursively immutable
448 if isinstance(value, (dict, list)):
449 out.flags.set(header + key, Flags.FROZEN, recursive=True)
450 nest[key_stem] = value
451 return pos
454def parse_key_value_pair(
455 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
456) -> tuple[Pos, Key, Any]:
457 pos, key = parse_key(src, pos)
458 try:
459 char: str | None = src[pos]
460 except IndexError:
461 char = None
462 if char != "=":
463 raise TOMLDecodeError("Expected '=' after a key in a key/value pair", src, pos)
464 pos += 1
465 pos = skip_chars(src, pos, TOML_WS)
466 pos, value = parse_value(src, pos, parse_float, nest_lvl)
467 return pos, key, value
470def parse_key(src: str, pos: Pos) -> tuple[Pos, Key]:
471 pos, key_part = parse_key_part(src, pos)
472 key: Key = (key_part,)
473 pos = skip_chars(src, pos, TOML_WS)
474 while True:
475 try:
476 char: str | None = src[pos]
477 except IndexError:
478 char = None
479 if char != ".":
480 return pos, key
481 pos += 1
482 pos = skip_chars(src, pos, TOML_WS)
483 pos, key_part = parse_key_part(src, pos)
484 key += (key_part,)
485 if len(key) > MAX_KEY_PARTS:
486 raise RecursionError(
487 f"TOML key has more than the allowed {MAX_KEY_PARTS} parts"
488 )
489 pos = skip_chars(src, pos, TOML_WS)
492def parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]:
493 try:
494 char: str | None = src[pos]
495 except IndexError:
496 char = None
497 if char in BARE_KEY_CHARS:
498 start_pos = pos
499 pos = skip_chars(src, pos, BARE_KEY_CHARS)
500 return pos, src[start_pos:pos]
501 if char == "'":
502 return parse_literal_str(src, pos)
503 if char == '"':
504 return parse_one_line_basic_str(src, pos)
505 raise TOMLDecodeError("Invalid initial character for a key part", src, pos)
508def parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]:
509 pos += 1
510 return parse_basic_str(src, pos, multiline=False)
513def parse_array(
514 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
515) -> tuple[Pos, list[Any]]:
516 pos += 1
517 array: list[Any] = []
519 pos = skip_comments_and_array_ws(src, pos)
520 if src.startswith("]", pos):
521 return pos + 1, array
522 while True:
523 pos, val = parse_value(src, pos, parse_float, nest_lvl)
524 array.append(val)
525 pos = skip_comments_and_array_ws(src, pos)
527 c = src[pos : pos + 1]
528 if c == "]":
529 return pos + 1, array
530 if c != ",":
531 raise TOMLDecodeError("Unclosed array", src, pos)
532 pos += 1
534 pos = skip_comments_and_array_ws(src, pos)
535 if src.startswith("]", pos):
536 return pos + 1, array
539def parse_inline_table(
540 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
541) -> tuple[Pos, dict[str, Any]]:
542 pos += 1
543 nested_dict = NestedDict()
544 flags = Flags()
546 pos = skip_comments_and_array_ws(src, pos)
547 if src.startswith("}", pos):
548 return pos + 1, nested_dict.dict
549 while True:
550 pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl)
551 key_parent, key_stem = key[:-1], key[-1]
552 if flags.is_(key, Flags.FROZEN):
553 raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos)
554 try:
555 nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
556 except KeyError:
557 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
558 if key_stem in nest:
559 raise TOMLDecodeError(f"Duplicate inline table key {key_stem!r}", src, pos)
560 nest[key_stem] = value
561 pos = skip_comments_and_array_ws(src, pos)
562 c = src[pos : pos + 1]
563 if c == "}":
564 return pos + 1, nested_dict.dict
565 if c != ",":
566 raise TOMLDecodeError("Unclosed inline table", src, pos)
567 pos += 1
568 pos = skip_comments_and_array_ws(src, pos)
569 if src.startswith("}", pos):
570 return pos + 1, nested_dict.dict
571 if isinstance(value, (dict, list)):
572 flags.set(key, Flags.FROZEN, recursive=True)
575def parse_basic_str_escape(
576 src: str, pos: Pos, *, multiline: bool = False
577) -> tuple[Pos, str]:
578 escape_id = src[pos : pos + 2]
579 pos += 2
580 if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}:
581 # Skip whitespace until next non-whitespace character or end of
582 # the doc. Error if non-whitespace is found before newline.
583 if escape_id != "\\\n":
584 pos = skip_chars(src, pos, TOML_WS)
585 try:
586 char = src[pos]
587 except IndexError:
588 return pos, ""
589 if char != "\n":
590 raise TOMLDecodeError("Unescaped '\\' in a string", src, pos)
591 pos += 1
592 pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
593 return pos, ""
594 if escape_id == "\\x":
595 return parse_hex_char(src, pos, 2)
596 if escape_id == "\\u":
597 return parse_hex_char(src, pos, 4)
598 if escape_id == "\\U":
599 return parse_hex_char(src, pos, 8)
600 try:
601 return pos, BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
602 except KeyError:
603 raise TOMLDecodeError("Unescaped '\\' in a string", src, pos) from None
606def parse_basic_str_escape_multiline(src: str, pos: Pos) -> tuple[Pos, str]:
607 return parse_basic_str_escape(src, pos, multiline=True)
610def parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]:
611 hex_str = src[pos : pos + hex_len]
612 if len(hex_str) != hex_len or not HEXDIGIT_CHARS.issuperset(hex_str):
613 raise TOMLDecodeError("Invalid hex value", src, pos)
614 pos += hex_len
615 hex_int = int(hex_str, 16)
616 if not is_unicode_scalar_value(hex_int):
617 raise TOMLDecodeError(
618 "Escaped character is not a Unicode scalar value", src, pos
619 )
620 return pos, chr(hex_int)
623def parse_literal_str(src: str, pos: Pos) -> tuple[Pos, str]:
624 pos += 1 # Skip starting apostrophe
625 start_pos = pos
626 pos = skip_until(
627 src, pos, "'", error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True
628 )
629 return pos + 1, src[start_pos:pos] # Skip ending apostrophe
632def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> tuple[Pos, str]:
633 pos += 3
634 if src.startswith("\n", pos):
635 pos += 1
637 if literal:
638 delim = "'"
639 end_pos = skip_until(
640 src,
641 pos,
642 "'''",
643 error_on=ILLEGAL_MULTILINE_LITERAL_STR_CHARS,
644 error_on_eof=True,
645 )
646 result = src[pos:end_pos]
647 pos = end_pos + 3
648 else:
649 delim = '"'
650 pos, result = parse_basic_str(src, pos, multiline=True)
652 # Add at maximum two extra apostrophes/quotes if the end sequence
653 # is 4 or 5 chars long instead of just 3.
654 if not src.startswith(delim, pos):
655 return pos, result
656 pos += 1
657 if not src.startswith(delim, pos):
658 return pos, result + delim
659 pos += 1
660 return pos, result + (delim * 2)
663def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]:
664 if multiline:
665 error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS
666 parse_escapes = parse_basic_str_escape_multiline
667 else:
668 error_on = ILLEGAL_BASIC_STR_CHARS
669 parse_escapes = parse_basic_str_escape
670 result = ""
671 start_pos = pos
672 while True:
673 try:
674 char = src[pos]
675 except IndexError:
676 raise TOMLDecodeError("Unterminated string", src, pos) from None
677 if char == '"':
678 if not multiline:
679 return pos + 1, result + src[start_pos:pos]
680 if src.startswith('"""', pos):
681 return pos + 3, result + src[start_pos:pos]
682 pos += 1
683 continue
684 if char == "\\":
685 result += src[start_pos:pos]
686 pos, parsed_escape = parse_escapes(src, pos)
687 result += parsed_escape
688 start_pos = pos
689 continue
690 if char in error_on:
691 raise TOMLDecodeError(f"Illegal character {char!r}", src, pos)
692 pos += 1
695def parse_value(
696 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
697) -> tuple[Pos, Any]:
698 if nest_lvl > MAX_INLINE_NESTING:
699 # Pure Python should have raised RecursionError already.
700 # This ensures mypyc binaries eventually do the same.
701 raise RecursionError( # pragma: no cover
702 "TOML inline arrays/tables are nested more than the allowed"
703 f" {MAX_INLINE_NESTING} levels"
704 )
706 try:
707 char: str | None = src[pos]
708 except IndexError:
709 char = None
711 # IMPORTANT: order conditions based on speed of checking and likelihood
713 # Basic strings
714 if char == '"':
715 if src.startswith('"""', pos):
716 return parse_multiline_str(src, pos, literal=False)
717 return parse_one_line_basic_str(src, pos)
719 # Literal strings
720 if char == "'":
721 if src.startswith("'''", pos):
722 return parse_multiline_str(src, pos, literal=True)
723 return parse_literal_str(src, pos)
725 # Booleans
726 if char == "t":
727 if src.startswith("true", pos):
728 return pos + 4, True
729 if char == "f":
730 if src.startswith("false", pos):
731 return pos + 5, False
733 # Arrays
734 if char == "[":
735 return parse_array(src, pos, parse_float, nest_lvl + 1)
737 # Inline tables
738 if char == "{":
739 return parse_inline_table(src, pos, parse_float, nest_lvl + 1)
741 # Dates and times
742 datetime_match = RE_DATETIME.match(src, pos)
743 if datetime_match:
744 try:
745 datetime_obj = match_to_datetime(datetime_match)
746 except ValueError as e:
747 raise TOMLDecodeError("Invalid date or datetime", src, pos) from e
748 return datetime_match.end(), datetime_obj
749 localtime_match = RE_LOCALTIME.match(src, pos)
750 if localtime_match:
751 return localtime_match.end(), match_to_localtime(localtime_match)
753 # Integers and "normal" floats.
754 # The regex will greedily match any type starting with a decimal
755 # char, so needs to be located after handling of dates and times.
756 number_match = RE_NUMBER.match(src, pos)
757 if number_match:
758 return number_match.end(), match_to_number(number_match, parse_float)
760 # Special floats
761 first_three = src[pos : pos + 3]
762 if first_three in {"inf", "nan"}:
763 return pos + 3, parse_float(first_three)
764 first_four = src[pos : pos + 4]
765 if first_four in {"-inf", "+inf", "-nan", "+nan"}:
766 return pos + 4, parse_float(first_four)
768 raise TOMLDecodeError("Invalid value", src, pos)
771def is_unicode_scalar_value(codepoint: int) -> bool:
772 return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)
775def make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat:
776 """A decorator to make `parse_float` safe.
778 `parse_float` must not return dicts or lists, because these types
779 would be mixed with parsed TOML tables and arrays, thus confusing
780 the parser. The returned decorated callable raises `ValueError`
781 instead of returning illegal types.
782 """
783 # The default `float` callable never returns illegal types. Optimize it.
784 if parse_float is float:
785 return float
787 def safe_parse_float(float_str: str) -> Any:
788 float_value = parse_float(float_str)
789 if isinstance(float_value, (dict, list)):
790 raise ValueError("parse_float must not return dicts or lists")
791 return float_value
793 return safe_parse_float