Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/tomli/_parser.py: 72%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# SPDX-License-Identifier: MIT
2# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
3# Licensed to PSF under a Contributor Agreement.
5from __future__ import annotations
7# Defer loading regular expressions until we actually need them in
8# parse_value().
9__lazy_modules__ = ["tomli._re"]
11import sys
13from ._re import (
14 RE_DATETIME,
15 RE_LOCALTIME,
16 RE_NUMBER,
17 match_to_datetime,
18 match_to_localtime,
19 match_to_number,
20)
22if sys.version_info < (3, 15): # pragma: no cover
23 from types import MappingProxyType as frozendict
25TYPE_CHECKING = False
26if TYPE_CHECKING:
27 from collections.abc import Iterable
28 from typing import IO, Any, Final
30 from ._types import Key, ParseFloat, Pos
32# Inline tables/arrays are implemented using recursion. Pathologically
33# nested documents cause pure Python to raise RecursionError (which is OK),
34# but mypyc binary wheels will crash unrecoverably (not OK). According to
35# mypyc docs this will be fixed in the future:
36# https://mypyc.readthedocs.io/en/latest/differences_from_python.html#stack-overflows
37# Before mypyc's fix is in, recursion needs to be limited by this library.
38# Choosing `sys.getrecursionlimit()` as maximum inline table/array nesting
39# level, as it allows more nesting than pure Python, but still seems a far
40# lower number than where mypyc binaries crash.
41MAX_INLINE_NESTING: Final = sys.getrecursionlimit()
43# Pathologically excessive number of parts in a key runs into quadratic
44# behavior (e.g. in Flags.is_).
45# Even if keys aren't currently parsed using recursion, they name a
46# recursive structure, so it makes sense to limit it using getrecursionlimit()
47# and RecursionError.
48MAX_KEY_PARTS: Final = sys.getrecursionlimit()
50ASCII_CTRL: Final = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
52# Neither of these sets include quotation mark or backslash. They are
53# currently handled as separate cases in the parser functions.
54ILLEGAL_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t")
55ILLEGAL_MULTILINE_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t\n")
57ILLEGAL_LITERAL_STR_CHARS: Final = ILLEGAL_BASIC_STR_CHARS
58ILLEGAL_MULTILINE_LITERAL_STR_CHARS: Final = ILLEGAL_MULTILINE_BASIC_STR_CHARS
60ILLEGAL_COMMENT_CHARS: Final = ILLEGAL_BASIC_STR_CHARS
62TOML_WS: Final = frozenset(" \t")
63TOML_WS_AND_NEWLINE: Final = TOML_WS | frozenset("\n")
64BARE_KEY_CHARS: Final = frozenset(
65 "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789" "-_"
66)
67KEY_INITIAL_CHARS: Final = BARE_KEY_CHARS | frozenset("\"'")
68HEXDIGIT_CHARS: Final = frozenset("abcdef" "ABCDEF" "0123456789")
70BASIC_STR_ESCAPE_REPLACEMENTS: Final = frozendict(
71 {
72 "\\b": "\u0008", # backspace
73 "\\t": "\u0009", # tab
74 "\\n": "\u000a", # linefeed
75 "\\f": "\u000c", # form feed
76 "\\r": "\u000d", # carriage return
77 "\\e": "\u001b", # escape
78 '\\"': "\u0022", # quote
79 "\\\\": "\u005c", # backslash
80 }
81)
84class DEPRECATED_DEFAULT:
85 """Sentinel to be used as default arg during deprecation
86 period of TOMLDecodeError's free-form arguments."""
89class TOMLDecodeError(ValueError):
90 """An error raised if a document is not valid TOML.
92 Adds the following attributes to ValueError:
93 msg: The unformatted error message
94 doc: The TOML document being parsed
95 pos: The index of doc where parsing failed
96 lineno: The line corresponding to pos
97 colno: The column corresponding to pos
98 """
100 def __init__(
101 self,
102 msg: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT,
103 doc: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT,
104 pos: Pos | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT,
105 *args: Any,
106 ):
107 if (
108 args
109 or not isinstance(msg, str)
110 or not isinstance(doc, str)
111 or not isinstance(pos, int)
112 ):
113 import warnings
115 warnings.warn(
116 "Free-form arguments for TOMLDecodeError are deprecated. "
117 "Please set 'msg' (str), 'doc' (str) and 'pos' (int) arguments only.",
118 DeprecationWarning,
119 stacklevel=2,
120 )
121 if pos is not DEPRECATED_DEFAULT:
122 args = pos, *args
123 if doc is not DEPRECATED_DEFAULT:
124 args = doc, *args
125 if msg is not DEPRECATED_DEFAULT:
126 args = msg, *args
127 ValueError.__init__(self, *args)
128 return
130 lineno = doc.count("\n", 0, pos) + 1
131 if lineno == 1:
132 colno = pos + 1
133 else:
134 colno = pos - doc.rindex("\n", 0, pos)
136 if pos >= len(doc):
137 coord_repr = "end of document"
138 else:
139 coord_repr = f"line {lineno}, column {colno}"
140 errmsg = f"{msg} (at {coord_repr})"
141 ValueError.__init__(self, errmsg)
143 self.msg = msg
144 self.doc = doc
145 self.pos = pos
146 self.lineno = lineno
147 self.colno = colno
150def load(__fp: IO[bytes], *, parse_float: ParseFloat = float) -> dict[str, Any]:
151 """Parse TOML from a binary file object."""
152 b = __fp.read()
153 try:
154 s = b.decode()
155 except AttributeError:
156 raise TypeError(
157 "File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`"
158 ) from None
159 return loads(s, parse_float=parse_float)
162def loads(__s: str, *, parse_float: ParseFloat = float) -> dict[str, Any]:
163 """Parse TOML from a string."""
165 # The spec allows converting "\r\n" to "\n", even in string
166 # literals. Let's do so to simplify parsing.
167 try:
168 src = __s.replace("\r\n", "\n")
169 except (AttributeError, TypeError):
170 raise TypeError(
171 f"Expected str object, not '{type(__s).__qualname__}'"
172 ) from None
173 pos = 0
174 out = Output()
175 header: Key = ()
176 parse_float = make_safe_parse_float(parse_float)
178 # Parse one statement at a time
179 # (typically means one line in TOML source)
180 while True:
181 # 1. Skip line leading whitespace
182 pos = skip_chars(src, pos, TOML_WS)
184 # 2. Parse rules. Expect one of the following:
185 # - end of file
186 # - end of line
187 # - comment
188 # - key/value pair
189 # - append dict to list (and move to its namespace)
190 # - create dict (and move to its namespace)
191 # Skip trailing whitespace when applicable.
192 try:
193 char = src[pos]
194 except IndexError:
195 break
196 if char == "\n":
197 pos += 1
198 continue
199 if char in KEY_INITIAL_CHARS:
200 pos = key_value_rule(src, pos, out, header, parse_float)
201 pos = skip_chars(src, pos, TOML_WS)
202 elif char == "[":
203 try:
204 second_char: str | None = src[pos + 1]
205 except IndexError:
206 second_char = None
207 out.flags.finalize_pending()
208 if second_char == "[":
209 pos, header = create_list_rule(src, pos, out)
210 else:
211 pos, header = create_dict_rule(src, pos, out)
212 pos = skip_chars(src, pos, TOML_WS)
213 elif char != "#":
214 raise TOMLDecodeError("Invalid statement", src, pos)
216 # 3. Skip comment
217 pos = skip_comment(src, pos)
219 # 4. Expect end of line or end of file
220 try:
221 char = src[pos]
222 except IndexError:
223 break
224 if char != "\n":
225 raise TOMLDecodeError(
226 "Expected newline or end of document after a statement", src, pos
227 )
228 pos += 1
230 return out.data.dict
233class Flags:
234 """Flags that map to parsed keys/namespaces."""
236 # Marks an immutable namespace (inline array or inline table).
237 FROZEN: Final = 0
238 # Marks a nest that has been explicitly created and can no longer
239 # be opened using the "[table]" syntax.
240 EXPLICIT_NEST: Final = 1
242 def __init__(self) -> None:
243 self._flags: dict[str, dict[Any, Any]] = {}
244 self._pending_flags: set[tuple[Key, int]] = set()
246 def add_pending(self, key: Key, flag: int) -> None:
247 self._pending_flags.add((key, flag))
249 def finalize_pending(self) -> None:
250 for key, flag in self._pending_flags:
251 self.set(key, flag, recursive=False)
252 self._pending_flags.clear()
254 def unset_all(self, key: Key) -> None:
255 cont = self._flags
256 for k in key[:-1]:
257 if k not in cont:
258 return
259 cont = cont[k]["nested"]
260 cont.pop(key[-1], None)
262 def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003
263 cont = self._flags
264 key_parent, key_stem = key[:-1], key[-1]
265 for k in key_parent:
266 if k not in cont:
267 cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}}
268 cont = cont[k]["nested"]
269 if key_stem not in cont:
270 cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}}
271 cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag)
273 def is_(self, key: Key, flag: int) -> bool:
274 if not key:
275 return False # document root has no flags
276 cont = self._flags
277 for k in key[:-1]:
278 if k not in cont:
279 return False
280 inner_cont = cont[k]
281 if flag in inner_cont["recursive_flags"]:
282 return True
283 cont = inner_cont["nested"]
284 key_stem = key[-1]
285 if key_stem in cont:
286 inner_cont = cont[key_stem]
287 return flag in inner_cont["flags"] or flag in inner_cont["recursive_flags"]
288 return False
291class NestedDict:
292 def __init__(self) -> None:
293 # The parsed content of the TOML document
294 self.dict: dict[str, Any] = {}
296 def get_or_create_nest(
297 self,
298 key: Key,
299 *,
300 access_lists: bool = True,
301 ) -> dict[str, Any]:
302 cont: Any = self.dict
303 for k in key:
304 if k not in cont:
305 cont[k] = {}
306 cont = cont[k]
307 if access_lists and isinstance(cont, list):
308 cont = cont[-1]
309 if not isinstance(cont, dict):
310 raise KeyError("There is no nest behind this key")
311 return cont # type: ignore[no-any-return]
313 def append_nest_to_list(self, key: Key) -> None:
314 cont = self.get_or_create_nest(key[:-1])
315 last_key = key[-1]
316 if last_key in cont:
317 list_ = cont[last_key]
318 if not isinstance(list_, list):
319 raise KeyError("An object other than list found behind this key")
320 list_.append({})
321 else:
322 cont[last_key] = [{}]
325class Output:
326 def __init__(self) -> None:
327 self.data = NestedDict()
328 self.flags = Flags()
331def skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos:
332 try:
333 while src[pos] in chars:
334 pos += 1
335 except IndexError:
336 pass
337 return pos
340def skip_until(
341 src: str,
342 pos: Pos,
343 expect: str,
344 *,
345 error_on: frozenset[str],
346 error_on_eof: bool,
347) -> Pos:
348 try:
349 new_pos = src.index(expect, pos)
350 except ValueError:
351 new_pos = len(src)
352 if error_on_eof:
353 raise TOMLDecodeError(f"Expected {expect!r}", src, new_pos) from None
355 if not error_on.isdisjoint(src[pos:new_pos]):
356 while src[pos] not in error_on:
357 pos += 1
358 raise TOMLDecodeError(f"Found invalid character {src[pos]!r}", src, pos)
359 return new_pos
362def skip_comment(src: str, pos: Pos) -> Pos:
363 try:
364 char: str | None = src[pos]
365 except IndexError:
366 char = None
367 if char == "#":
368 return skip_until(
369 src, pos + 1, "\n", error_on=ILLEGAL_COMMENT_CHARS, error_on_eof=False
370 )
371 return pos
374def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos:
375 while True:
376 pos_before_skip = pos
377 pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
378 pos = skip_comment(src, pos)
379 if pos == pos_before_skip:
380 return pos
383def create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
384 pos += 1 # Skip "["
385 pos = skip_chars(src, pos, TOML_WS)
386 pos, key = parse_key(src, pos)
388 if out.flags.is_(key, Flags.EXPLICIT_NEST) or out.flags.is_(key, Flags.FROZEN):
389 raise TOMLDecodeError(f"Cannot declare {key} twice", src, pos)
390 out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
391 try:
392 out.data.get_or_create_nest(key)
393 except KeyError:
394 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
396 if not src.startswith("]", pos):
397 raise TOMLDecodeError(
398 "Expected ']' at the end of a table declaration", src, pos
399 )
400 return pos + 1, key
403def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
404 pos += 2 # Skip "[["
405 pos = skip_chars(src, pos, TOML_WS)
406 pos, key = parse_key(src, pos)
408 if out.flags.is_(key, Flags.FROZEN):
409 raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos)
410 # Free the namespace now that it points to another empty list item...
411 out.flags.unset_all(key)
412 # ...but this key precisely is still prohibited from table declaration
413 out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
414 try:
415 out.data.append_nest_to_list(key)
416 except KeyError:
417 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
419 if not src.startswith("]]", pos):
420 raise TOMLDecodeError(
421 "Expected ']]' at the end of an array declaration", src, pos
422 )
423 return pos + 2, key
426def key_value_rule(
427 src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat
428) -> Pos:
429 pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl=0)
430 key_parent, key_stem = key[:-1], key[-1]
431 abs_key_parent = header + key_parent
433 relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
434 for cont_key in relative_path_cont_keys:
435 # Check that dotted key syntax does not redefine an existing table
436 if out.flags.is_(cont_key, Flags.EXPLICIT_NEST):
437 raise TOMLDecodeError(f"Cannot redefine namespace {cont_key}", src, pos)
438 # Containers in the relative path can't be opened with the table syntax or
439 # dotted key/value syntax in following table sections.
440 out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST)
442 if out.flags.is_(abs_key_parent, Flags.FROZEN):
443 raise TOMLDecodeError(
444 f"Cannot mutate immutable namespace {abs_key_parent}", src, pos
445 )
447 try:
448 nest = out.data.get_or_create_nest(abs_key_parent)
449 except KeyError:
450 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
451 if key_stem in nest:
452 raise TOMLDecodeError("Cannot overwrite a value", src, pos)
453 # Mark inline table and array namespaces recursively immutable
454 if isinstance(value, (dict, list)):
455 out.flags.set(header + key, Flags.FROZEN, recursive=True)
456 nest[key_stem] = value
457 return pos
460def parse_key_value_pair(
461 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
462) -> tuple[Pos, Key, Any]:
463 pos, key = parse_key(src, pos)
464 try:
465 char: str | None = src[pos]
466 except IndexError:
467 char = None
468 if char != "=":
469 raise TOMLDecodeError("Expected '=' after a key in a key/value pair", src, pos)
470 pos += 1
471 pos = skip_chars(src, pos, TOML_WS)
472 pos, value = parse_value(src, pos, parse_float, nest_lvl)
473 return pos, key, value
476def parse_key(src: str, pos: Pos) -> tuple[Pos, Key]:
477 pos, key_part = parse_key_part(src, pos)
478 key: Key = (key_part,)
479 pos = skip_chars(src, pos, TOML_WS)
480 while True:
481 try:
482 char: str | None = src[pos]
483 except IndexError:
484 char = None
485 if char != ".":
486 return pos, key
487 pos += 1
488 pos = skip_chars(src, pos, TOML_WS)
489 pos, key_part = parse_key_part(src, pos)
490 key += (key_part,)
491 if len(key) > MAX_KEY_PARTS:
492 raise RecursionError(
493 f"TOML key has more than the allowed {MAX_KEY_PARTS} parts"
494 )
495 pos = skip_chars(src, pos, TOML_WS)
498def parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]:
499 try:
500 char: str | None = src[pos]
501 except IndexError:
502 char = None
503 if char in BARE_KEY_CHARS:
504 start_pos = pos
505 pos = skip_chars(src, pos, BARE_KEY_CHARS)
506 return pos, src[start_pos:pos]
507 if char == "'":
508 return parse_literal_str(src, pos)
509 if char == '"':
510 return parse_one_line_basic_str(src, pos)
511 raise TOMLDecodeError("Invalid initial character for a key part", src, pos)
514def parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]:
515 pos += 1
516 return parse_basic_str(src, pos, multiline=False)
519def parse_array(
520 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
521) -> tuple[Pos, list[Any]]:
522 pos += 1
523 array: list[Any] = []
525 pos = skip_comments_and_array_ws(src, pos)
526 if src.startswith("]", pos):
527 return pos + 1, array
528 while True:
529 pos, val = parse_value(src, pos, parse_float, nest_lvl)
530 array.append(val)
531 pos = skip_comments_and_array_ws(src, pos)
533 c = src[pos : pos + 1]
534 if c == "]":
535 return pos + 1, array
536 if c != ",":
537 raise TOMLDecodeError("Unclosed array", src, pos)
538 pos += 1
540 pos = skip_comments_and_array_ws(src, pos)
541 if src.startswith("]", pos):
542 return pos + 1, array
545def parse_inline_table(
546 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
547) -> tuple[Pos, dict[str, Any]]:
548 pos += 1
549 nested_dict = NestedDict()
550 flags = Flags()
552 pos = skip_comments_and_array_ws(src, pos)
553 if src.startswith("}", pos):
554 return pos + 1, nested_dict.dict
555 while True:
556 pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl)
557 key_parent, key_stem = key[:-1], key[-1]
558 if flags.is_(key, Flags.FROZEN):
559 raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos)
560 try:
561 nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
562 except KeyError:
563 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
564 if key_stem in nest:
565 raise TOMLDecodeError(f"Duplicate inline table key {key_stem!r}", src, pos)
566 nest[key_stem] = value
567 pos = skip_comments_and_array_ws(src, pos)
568 c = src[pos : pos + 1]
569 if c == "}":
570 return pos + 1, nested_dict.dict
571 if c != ",":
572 raise TOMLDecodeError("Unclosed inline table", src, pos)
573 pos += 1
574 pos = skip_comments_and_array_ws(src, pos)
575 if src.startswith("}", pos):
576 return pos + 1, nested_dict.dict
577 if isinstance(value, (dict, list)):
578 flags.set(key, Flags.FROZEN, recursive=True)
581def parse_basic_str_escape(
582 src: str, pos: Pos, *, multiline: bool = False
583) -> tuple[Pos, str]:
584 escape_id = src[pos : pos + 2]
585 pos += 2
586 if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}:
587 # Skip whitespace until next non-whitespace character or end of
588 # the doc. Error if non-whitespace is found before newline.
589 if escape_id != "\\\n":
590 pos = skip_chars(src, pos, TOML_WS)
591 try:
592 char = src[pos]
593 except IndexError:
594 return pos, ""
595 if char != "\n":
596 raise TOMLDecodeError("Unescaped '\\' in a string", src, pos)
597 pos += 1
598 pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
599 return pos, ""
600 if escape_id == "\\x":
601 return parse_hex_char(src, pos, 2)
602 if escape_id == "\\u":
603 return parse_hex_char(src, pos, 4)
604 if escape_id == "\\U":
605 return parse_hex_char(src, pos, 8)
606 try:
607 return pos, BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
608 except KeyError:
609 raise TOMLDecodeError("Unescaped '\\' in a string", src, pos) from None
612def parse_basic_str_escape_multiline(src: str, pos: Pos) -> tuple[Pos, str]:
613 return parse_basic_str_escape(src, pos, multiline=True)
616def parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]:
617 hex_str = src[pos : pos + hex_len]
618 if len(hex_str) != hex_len or not HEXDIGIT_CHARS.issuperset(hex_str):
619 raise TOMLDecodeError("Invalid hex value", src, pos)
620 pos += hex_len
621 hex_int = int(hex_str, 16)
622 if not is_unicode_scalar_value(hex_int):
623 raise TOMLDecodeError(
624 "Escaped character is not a Unicode scalar value", src, pos
625 )
626 return pos, chr(hex_int)
629def parse_literal_str(src: str, pos: Pos) -> tuple[Pos, str]:
630 pos += 1 # Skip starting apostrophe
631 start_pos = pos
632 pos = skip_until(
633 src, pos, "'", error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True
634 )
635 return pos + 1, src[start_pos:pos] # Skip ending apostrophe
638def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> tuple[Pos, str]:
639 pos += 3
640 if src.startswith("\n", pos):
641 pos += 1
643 if literal:
644 delim = "'"
645 end_pos = skip_until(
646 src,
647 pos,
648 "'''",
649 error_on=ILLEGAL_MULTILINE_LITERAL_STR_CHARS,
650 error_on_eof=True,
651 )
652 result = src[pos:end_pos]
653 pos = end_pos + 3
654 else:
655 delim = '"'
656 pos, result = parse_basic_str(src, pos, multiline=True)
658 # Add at maximum two extra apostrophes/quotes if the end sequence
659 # is 4 or 5 chars long instead of just 3.
660 if not src.startswith(delim, pos):
661 return pos, result
662 pos += 1
663 if not src.startswith(delim, pos):
664 return pos, result + delim
665 pos += 1
666 return pos, result + (delim * 2)
669def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]:
670 if multiline:
671 error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS
672 parse_escapes = parse_basic_str_escape_multiline
673 else:
674 error_on = ILLEGAL_BASIC_STR_CHARS
675 parse_escapes = parse_basic_str_escape
676 result = ""
677 start_pos = pos
678 while True:
679 try:
680 char = src[pos]
681 except IndexError:
682 raise TOMLDecodeError("Unterminated string", src, pos) from None
683 if char == '"':
684 if not multiline:
685 return pos + 1, result + src[start_pos:pos]
686 if src.startswith('"""', pos):
687 return pos + 3, result + src[start_pos:pos]
688 pos += 1
689 continue
690 if char == "\\":
691 result += src[start_pos:pos]
692 pos, parsed_escape = parse_escapes(src, pos)
693 result += parsed_escape
694 start_pos = pos
695 continue
696 if char in error_on:
697 raise TOMLDecodeError(f"Illegal character {char!r}", src, pos)
698 pos += 1
701def parse_value(
702 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
703) -> tuple[Pos, Any]:
704 if nest_lvl > MAX_INLINE_NESTING:
705 # Pure Python should have raised RecursionError already.
706 # This ensures mypyc binaries eventually do the same.
707 raise RecursionError( # pragma: no cover
708 "TOML inline arrays/tables are nested more than the allowed"
709 f" {MAX_INLINE_NESTING} levels"
710 )
712 try:
713 char: str | None = src[pos]
714 except IndexError:
715 char = None
717 # IMPORTANT: order conditions based on speed of checking and likelihood
719 # Basic strings
720 if char == '"':
721 if src.startswith('"""', pos):
722 return parse_multiline_str(src, pos, literal=False)
723 return parse_one_line_basic_str(src, pos)
725 # Literal strings
726 if char == "'":
727 if src.startswith("'''", pos):
728 return parse_multiline_str(src, pos, literal=True)
729 return parse_literal_str(src, pos)
731 # Booleans
732 if char == "t":
733 if src.startswith("true", pos):
734 return pos + 4, True
735 if char == "f":
736 if src.startswith("false", pos):
737 return pos + 5, False
739 # Arrays
740 if char == "[":
741 return parse_array(src, pos, parse_float, nest_lvl + 1)
743 # Inline tables
744 if char == "{":
745 return parse_inline_table(src, pos, parse_float, nest_lvl + 1)
747 # Dates and times
748 datetime_match = RE_DATETIME.match(src, pos)
749 if datetime_match:
750 try:
751 datetime_obj = match_to_datetime(datetime_match)
752 except ValueError as e:
753 raise TOMLDecodeError("Invalid date or datetime", src, pos) from e
754 return datetime_match.end(), datetime_obj
755 localtime_match = RE_LOCALTIME.match(src, pos)
756 if localtime_match:
757 return localtime_match.end(), match_to_localtime(localtime_match)
759 # Integers and "normal" floats.
760 # The regex will greedily match any type starting with a decimal
761 # char, so needs to be located after handling of dates and times.
762 number_match = RE_NUMBER.match(src, pos)
763 if number_match:
764 return number_match.end(), match_to_number(number_match, parse_float)
766 # Special floats
767 first_three = src[pos : pos + 3]
768 if first_three in {"inf", "nan"}:
769 return pos + 3, parse_float(first_three)
770 first_four = src[pos : pos + 4]
771 if first_four in {"-inf", "+inf", "-nan", "+nan"}:
772 return pos + 4, parse_float(first_four)
774 raise TOMLDecodeError("Invalid value", src, pos)
777def is_unicode_scalar_value(codepoint: int) -> bool:
778 return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)
781def make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat:
782 """A decorator to make `parse_float` safe.
784 `parse_float` must not return dicts or lists, because these types
785 would be mixed with parsed TOML tables and arrays, thus confusing
786 the parser. The returned decorated callable raises `ValueError`
787 instead of returning illegal types.
788 """
789 # The default `float` callable never returns illegal types. Optimize it.
790 if parse_float is float:
791 return float
793 def safe_parse_float(float_str: str) -> Any:
794 float_value = parse_float(float_str)
795 if isinstance(float_value, (dict, list)):
796 raise ValueError("parse_float must not return dicts or lists")
797 return float_value
799 return safe_parse_float