Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/tomli/_parser.py: 12%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# SPDX-License-Identifier: MIT
2# SPDX-FileCopyrightText: 2021 Taneli Hukkinen
3# Licensed to PSF under a Contributor Agreement.
5from __future__ import annotations
7from collections.abc import Iterable
8import string
9import sys
10from types import MappingProxyType
11from typing import IO, Any, Final, NamedTuple
12import warnings
14from ._re import (
15 RE_DATETIME,
16 RE_LOCALTIME,
17 RE_NUMBER,
18 match_to_datetime,
19 match_to_localtime,
20 match_to_number,
21)
22from ._types import Key, ParseFloat, Pos
24# Inline tables/arrays are implemented using recursion. Pathologically
25# nested documents cause pure Python to raise RecursionError (which is OK),
26# but mypyc binary wheels will crash unrecoverably (not OK). According to
27# mypyc docs this will be fixed in the future:
28# https://mypyc.readthedocs.io/en/latest/differences_from_python.html#stack-overflows
29# Before mypyc's fix is in, recursion needs to be limited by this library.
30# Choosing `sys.getrecursionlimit()` as maximum inline table/array nesting
31# level, as it allows more nesting than pure Python, but still seems a far
32# lower number than where mypyc binaries crash.
33MAX_INLINE_NESTING: Final = sys.getrecursionlimit()
35ASCII_CTRL: Final = frozenset(chr(i) for i in range(32)) | frozenset(chr(127))
37# Neither of these sets include quotation mark or backslash. They are
38# currently handled as separate cases in the parser functions.
39ILLEGAL_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t")
40ILLEGAL_MULTILINE_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t\n")
42ILLEGAL_LITERAL_STR_CHARS: Final = ILLEGAL_BASIC_STR_CHARS
43ILLEGAL_MULTILINE_LITERAL_STR_CHARS: Final = ILLEGAL_MULTILINE_BASIC_STR_CHARS
45ILLEGAL_COMMENT_CHARS: Final = ILLEGAL_BASIC_STR_CHARS
47TOML_WS: Final = frozenset(" \t")
48TOML_WS_AND_NEWLINE: Final = TOML_WS | frozenset("\n")
49BARE_KEY_CHARS: Final = frozenset(string.ascii_letters + string.digits + "-_")
50KEY_INITIAL_CHARS: Final = BARE_KEY_CHARS | frozenset("\"'")
51HEXDIGIT_CHARS: Final = frozenset(string.hexdigits)
53BASIC_STR_ESCAPE_REPLACEMENTS: Final = MappingProxyType(
54 {
55 "\\b": "\u0008", # backspace
56 "\\t": "\u0009", # tab
57 "\\n": "\u000A", # linefeed
58 "\\f": "\u000C", # form feed
59 "\\r": "\u000D", # carriage return
60 '\\"': "\u0022", # quote
61 "\\\\": "\u005C", # backslash
62 }
63)
66class DEPRECATED_DEFAULT:
67 """Sentinel to be used as default arg during deprecation
68 period of TOMLDecodeError's free-form arguments."""
71class TOMLDecodeError(ValueError):
72 """An error raised if a document is not valid TOML.
74 Adds the following attributes to ValueError:
75 msg: The unformatted error message
76 doc: The TOML document being parsed
77 pos: The index of doc where parsing failed
78 lineno: The line corresponding to pos
79 colno: The column corresponding to pos
80 """
82 def __init__(
83 self,
84 msg: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT,
85 doc: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT,
86 pos: Pos | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT,
87 *args: Any,
88 ):
89 if (
90 args
91 or not isinstance(msg, str)
92 or not isinstance(doc, str)
93 or not isinstance(pos, int)
94 ):
95 warnings.warn(
96 "Free-form arguments for TOMLDecodeError are deprecated. "
97 "Please set 'msg' (str), 'doc' (str) and 'pos' (int) arguments only.",
98 DeprecationWarning,
99 stacklevel=2,
100 )
101 if pos is not DEPRECATED_DEFAULT:
102 args = pos, *args
103 if doc is not DEPRECATED_DEFAULT:
104 args = doc, *args
105 if msg is not DEPRECATED_DEFAULT:
106 args = msg, *args
107 ValueError.__init__(self, *args)
108 return
110 lineno = doc.count("\n", 0, pos) + 1
111 if lineno == 1:
112 colno = pos + 1
113 else:
114 colno = pos - doc.rindex("\n", 0, pos)
116 if pos >= len(doc):
117 coord_repr = "end of document"
118 else:
119 coord_repr = f"line {lineno}, column {colno}"
120 errmsg = f"{msg} (at {coord_repr})"
121 ValueError.__init__(self, errmsg)
123 self.msg = msg
124 self.doc = doc
125 self.pos = pos
126 self.lineno = lineno
127 self.colno = colno
130def load(__fp: IO[bytes], *, parse_float: ParseFloat = float) -> dict[str, Any]:
131 """Parse TOML from a binary file object."""
132 b = __fp.read()
133 try:
134 s = b.decode()
135 except AttributeError:
136 raise TypeError(
137 "File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`"
138 ) from None
139 return loads(s, parse_float=parse_float)
142def loads(__s: str, *, parse_float: ParseFloat = float) -> dict[str, Any]: # noqa: C901
143 """Parse TOML from a string."""
145 # The spec allows converting "\r\n" to "\n", even in string
146 # literals. Let's do so to simplify parsing.
147 try:
148 src = __s.replace("\r\n", "\n")
149 except (AttributeError, TypeError):
150 raise TypeError(
151 f"Expected str object, not '{type(__s).__qualname__}'"
152 ) from None
153 pos = 0
154 out = Output(NestedDict(), Flags())
155 header: Key = ()
156 parse_float = make_safe_parse_float(parse_float)
158 # Parse one statement at a time
159 # (typically means one line in TOML source)
160 while True:
161 # 1. Skip line leading whitespace
162 pos = skip_chars(src, pos, TOML_WS)
164 # 2. Parse rules. Expect one of the following:
165 # - end of file
166 # - end of line
167 # - comment
168 # - key/value pair
169 # - append dict to list (and move to its namespace)
170 # - create dict (and move to its namespace)
171 # Skip trailing whitespace when applicable.
172 try:
173 char = src[pos]
174 except IndexError:
175 break
176 if char == "\n":
177 pos += 1
178 continue
179 if char in KEY_INITIAL_CHARS:
180 pos = key_value_rule(src, pos, out, header, parse_float)
181 pos = skip_chars(src, pos, TOML_WS)
182 elif char == "[":
183 try:
184 second_char: str | None = src[pos + 1]
185 except IndexError:
186 second_char = None
187 out.flags.finalize_pending()
188 if second_char == "[":
189 pos, header = create_list_rule(src, pos, out)
190 else:
191 pos, header = create_dict_rule(src, pos, out)
192 pos = skip_chars(src, pos, TOML_WS)
193 elif char != "#":
194 raise TOMLDecodeError("Invalid statement", src, pos)
196 # 3. Skip comment
197 pos = skip_comment(src, pos)
199 # 4. Expect end of line or end of file
200 try:
201 char = src[pos]
202 except IndexError:
203 break
204 if char != "\n":
205 raise TOMLDecodeError(
206 "Expected newline or end of document after a statement", src, pos
207 )
208 pos += 1
210 return out.data.dict
213class Flags:
214 """Flags that map to parsed keys/namespaces."""
216 # Marks an immutable namespace (inline array or inline table).
217 FROZEN: Final = 0
218 # Marks a nest that has been explicitly created and can no longer
219 # be opened using the "[table]" syntax.
220 EXPLICIT_NEST: Final = 1
222 def __init__(self) -> None:
223 self._flags: dict[str, dict] = {}
224 self._pending_flags: set[tuple[Key, int]] = set()
226 def add_pending(self, key: Key, flag: int) -> None:
227 self._pending_flags.add((key, flag))
229 def finalize_pending(self) -> None:
230 for key, flag in self._pending_flags:
231 self.set(key, flag, recursive=False)
232 self._pending_flags.clear()
234 def unset_all(self, key: Key) -> None:
235 cont = self._flags
236 for k in key[:-1]:
237 if k not in cont:
238 return
239 cont = cont[k]["nested"]
240 cont.pop(key[-1], None)
242 def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003
243 cont = self._flags
244 key_parent, key_stem = key[:-1], key[-1]
245 for k in key_parent:
246 if k not in cont:
247 cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}}
248 cont = cont[k]["nested"]
249 if key_stem not in cont:
250 cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}}
251 cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag)
253 def is_(self, key: Key, flag: int) -> bool:
254 if not key:
255 return False # document root has no flags
256 cont = self._flags
257 for k in key[:-1]:
258 if k not in cont:
259 return False
260 inner_cont = cont[k]
261 if flag in inner_cont["recursive_flags"]:
262 return True
263 cont = inner_cont["nested"]
264 key_stem = key[-1]
265 if key_stem in cont:
266 inner_cont = cont[key_stem]
267 return flag in inner_cont["flags"] or flag in inner_cont["recursive_flags"]
268 return False
271class NestedDict:
272 def __init__(self) -> None:
273 # The parsed content of the TOML document
274 self.dict: dict[str, Any] = {}
276 def get_or_create_nest(
277 self,
278 key: Key,
279 *,
280 access_lists: bool = True,
281 ) -> dict:
282 cont: Any = self.dict
283 for k in key:
284 if k not in cont:
285 cont[k] = {}
286 cont = cont[k]
287 if access_lists and isinstance(cont, list):
288 cont = cont[-1]
289 if not isinstance(cont, dict):
290 raise KeyError("There is no nest behind this key")
291 return cont
293 def append_nest_to_list(self, key: Key) -> None:
294 cont = self.get_or_create_nest(key[:-1])
295 last_key = key[-1]
296 if last_key in cont:
297 list_ = cont[last_key]
298 if not isinstance(list_, list):
299 raise KeyError("An object other than list found behind this key")
300 list_.append({})
301 else:
302 cont[last_key] = [{}]
305class Output(NamedTuple):
306 data: NestedDict
307 flags: Flags
310def skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos:
311 try:
312 while src[pos] in chars:
313 pos += 1
314 except IndexError:
315 pass
316 return pos
319def skip_until(
320 src: str,
321 pos: Pos,
322 expect: str,
323 *,
324 error_on: frozenset[str],
325 error_on_eof: bool,
326) -> Pos:
327 try:
328 new_pos = src.index(expect, pos)
329 except ValueError:
330 new_pos = len(src)
331 if error_on_eof:
332 raise TOMLDecodeError(f"Expected {expect!r}", src, new_pos) from None
334 if not error_on.isdisjoint(src[pos:new_pos]):
335 while src[pos] not in error_on:
336 pos += 1
337 raise TOMLDecodeError(f"Found invalid character {src[pos]!r}", src, pos)
338 return new_pos
341def skip_comment(src: str, pos: Pos) -> Pos:
342 try:
343 char: str | None = src[pos]
344 except IndexError:
345 char = None
346 if char == "#":
347 return skip_until(
348 src, pos + 1, "\n", error_on=ILLEGAL_COMMENT_CHARS, error_on_eof=False
349 )
350 return pos
353def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos:
354 while True:
355 pos_before_skip = pos
356 pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
357 pos = skip_comment(src, pos)
358 if pos == pos_before_skip:
359 return pos
362def create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
363 pos += 1 # Skip "["
364 pos = skip_chars(src, pos, TOML_WS)
365 pos, key = parse_key(src, pos)
367 if out.flags.is_(key, Flags.EXPLICIT_NEST) or out.flags.is_(key, Flags.FROZEN):
368 raise TOMLDecodeError(f"Cannot declare {key} twice", src, pos)
369 out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
370 try:
371 out.data.get_or_create_nest(key)
372 except KeyError:
373 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
375 if not src.startswith("]", pos):
376 raise TOMLDecodeError(
377 "Expected ']' at the end of a table declaration", src, pos
378 )
379 return pos + 1, key
382def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]:
383 pos += 2 # Skip "[["
384 pos = skip_chars(src, pos, TOML_WS)
385 pos, key = parse_key(src, pos)
387 if out.flags.is_(key, Flags.FROZEN):
388 raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos)
389 # Free the namespace now that it points to another empty list item...
390 out.flags.unset_all(key)
391 # ...but this key precisely is still prohibited from table declaration
392 out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False)
393 try:
394 out.data.append_nest_to_list(key)
395 except KeyError:
396 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
398 if not src.startswith("]]", pos):
399 raise TOMLDecodeError(
400 "Expected ']]' at the end of an array declaration", src, pos
401 )
402 return pos + 2, key
405def key_value_rule(
406 src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat
407) -> Pos:
408 pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl=0)
409 key_parent, key_stem = key[:-1], key[-1]
410 abs_key_parent = header + key_parent
412 relative_path_cont_keys = (header + key[:i] for i in range(1, len(key)))
413 for cont_key in relative_path_cont_keys:
414 # Check that dotted key syntax does not redefine an existing table
415 if out.flags.is_(cont_key, Flags.EXPLICIT_NEST):
416 raise TOMLDecodeError(f"Cannot redefine namespace {cont_key}", src, pos)
417 # Containers in the relative path can't be opened with the table syntax or
418 # dotted key/value syntax in following table sections.
419 out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST)
421 if out.flags.is_(abs_key_parent, Flags.FROZEN):
422 raise TOMLDecodeError(
423 f"Cannot mutate immutable namespace {abs_key_parent}", src, pos
424 )
426 try:
427 nest = out.data.get_or_create_nest(abs_key_parent)
428 except KeyError:
429 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
430 if key_stem in nest:
431 raise TOMLDecodeError("Cannot overwrite a value", src, pos)
432 # Mark inline table and array namespaces recursively immutable
433 if isinstance(value, (dict, list)):
434 out.flags.set(header + key, Flags.FROZEN, recursive=True)
435 nest[key_stem] = value
436 return pos
439def parse_key_value_pair(
440 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
441) -> tuple[Pos, Key, Any]:
442 pos, key = parse_key(src, pos)
443 try:
444 char: str | None = src[pos]
445 except IndexError:
446 char = None
447 if char != "=":
448 raise TOMLDecodeError("Expected '=' after a key in a key/value pair", src, pos)
449 pos += 1
450 pos = skip_chars(src, pos, TOML_WS)
451 pos, value = parse_value(src, pos, parse_float, nest_lvl)
452 return pos, key, value
455def parse_key(src: str, pos: Pos) -> tuple[Pos, Key]:
456 pos, key_part = parse_key_part(src, pos)
457 key: Key = (key_part,)
458 pos = skip_chars(src, pos, TOML_WS)
459 while True:
460 try:
461 char: str | None = src[pos]
462 except IndexError:
463 char = None
464 if char != ".":
465 return pos, key
466 pos += 1
467 pos = skip_chars(src, pos, TOML_WS)
468 pos, key_part = parse_key_part(src, pos)
469 key += (key_part,)
470 pos = skip_chars(src, pos, TOML_WS)
473def parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]:
474 try:
475 char: str | None = src[pos]
476 except IndexError:
477 char = None
478 if char in BARE_KEY_CHARS:
479 start_pos = pos
480 pos = skip_chars(src, pos, BARE_KEY_CHARS)
481 return pos, src[start_pos:pos]
482 if char == "'":
483 return parse_literal_str(src, pos)
484 if char == '"':
485 return parse_one_line_basic_str(src, pos)
486 raise TOMLDecodeError("Invalid initial character for a key part", src, pos)
489def parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]:
490 pos += 1
491 return parse_basic_str(src, pos, multiline=False)
494def parse_array(
495 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
496) -> tuple[Pos, list]:
497 pos += 1
498 array: list = []
500 pos = skip_comments_and_array_ws(src, pos)
501 if src.startswith("]", pos):
502 return pos + 1, array
503 while True:
504 pos, val = parse_value(src, pos, parse_float, nest_lvl)
505 array.append(val)
506 pos = skip_comments_and_array_ws(src, pos)
508 c = src[pos : pos + 1]
509 if c == "]":
510 return pos + 1, array
511 if c != ",":
512 raise TOMLDecodeError("Unclosed array", src, pos)
513 pos += 1
515 pos = skip_comments_and_array_ws(src, pos)
516 if src.startswith("]", pos):
517 return pos + 1, array
520def parse_inline_table(
521 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
522) -> tuple[Pos, dict]:
523 pos += 1
524 nested_dict = NestedDict()
525 flags = Flags()
527 pos = skip_chars(src, pos, TOML_WS)
528 if src.startswith("}", pos):
529 return pos + 1, nested_dict.dict
530 while True:
531 pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl)
532 key_parent, key_stem = key[:-1], key[-1]
533 if flags.is_(key, Flags.FROZEN):
534 raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos)
535 try:
536 nest = nested_dict.get_or_create_nest(key_parent, access_lists=False)
537 except KeyError:
538 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None
539 if key_stem in nest:
540 raise TOMLDecodeError(f"Duplicate inline table key {key_stem!r}", src, pos)
541 nest[key_stem] = value
542 pos = skip_chars(src, pos, TOML_WS)
543 c = src[pos : pos + 1]
544 if c == "}":
545 return pos + 1, nested_dict.dict
546 if c != ",":
547 raise TOMLDecodeError("Unclosed inline table", src, pos)
548 if isinstance(value, (dict, list)):
549 flags.set(key, Flags.FROZEN, recursive=True)
550 pos += 1
551 pos = skip_chars(src, pos, TOML_WS)
554def parse_basic_str_escape(
555 src: str, pos: Pos, *, multiline: bool = False
556) -> tuple[Pos, str]:
557 escape_id = src[pos : pos + 2]
558 pos += 2
559 if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}:
560 # Skip whitespace until next non-whitespace character or end of
561 # the doc. Error if non-whitespace is found before newline.
562 if escape_id != "\\\n":
563 pos = skip_chars(src, pos, TOML_WS)
564 try:
565 char = src[pos]
566 except IndexError:
567 return pos, ""
568 if char != "\n":
569 raise TOMLDecodeError("Unescaped '\\' in a string", src, pos)
570 pos += 1
571 pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE)
572 return pos, ""
573 if escape_id == "\\u":
574 return parse_hex_char(src, pos, 4)
575 if escape_id == "\\U":
576 return parse_hex_char(src, pos, 8)
577 try:
578 return pos, BASIC_STR_ESCAPE_REPLACEMENTS[escape_id]
579 except KeyError:
580 raise TOMLDecodeError("Unescaped '\\' in a string", src, pos) from None
583def parse_basic_str_escape_multiline(src: str, pos: Pos) -> tuple[Pos, str]:
584 return parse_basic_str_escape(src, pos, multiline=True)
587def parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]:
588 hex_str = src[pos : pos + hex_len]
589 if len(hex_str) != hex_len or not HEXDIGIT_CHARS.issuperset(hex_str):
590 raise TOMLDecodeError("Invalid hex value", src, pos)
591 pos += hex_len
592 hex_int = int(hex_str, 16)
593 if not is_unicode_scalar_value(hex_int):
594 raise TOMLDecodeError(
595 "Escaped character is not a Unicode scalar value", src, pos
596 )
597 return pos, chr(hex_int)
600def parse_literal_str(src: str, pos: Pos) -> tuple[Pos, str]:
601 pos += 1 # Skip starting apostrophe
602 start_pos = pos
603 pos = skip_until(
604 src, pos, "'", error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True
605 )
606 return pos + 1, src[start_pos:pos] # Skip ending apostrophe
609def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> tuple[Pos, str]:
610 pos += 3
611 if src.startswith("\n", pos):
612 pos += 1
614 if literal:
615 delim = "'"
616 end_pos = skip_until(
617 src,
618 pos,
619 "'''",
620 error_on=ILLEGAL_MULTILINE_LITERAL_STR_CHARS,
621 error_on_eof=True,
622 )
623 result = src[pos:end_pos]
624 pos = end_pos + 3
625 else:
626 delim = '"'
627 pos, result = parse_basic_str(src, pos, multiline=True)
629 # Add at maximum two extra apostrophes/quotes if the end sequence
630 # is 4 or 5 chars long instead of just 3.
631 if not src.startswith(delim, pos):
632 return pos, result
633 pos += 1
634 if not src.startswith(delim, pos):
635 return pos, result + delim
636 pos += 1
637 return pos, result + (delim * 2)
640def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]:
641 if multiline:
642 error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS
643 parse_escapes = parse_basic_str_escape_multiline
644 else:
645 error_on = ILLEGAL_BASIC_STR_CHARS
646 parse_escapes = parse_basic_str_escape
647 result = ""
648 start_pos = pos
649 while True:
650 try:
651 char = src[pos]
652 except IndexError:
653 raise TOMLDecodeError("Unterminated string", src, pos) from None
654 if char == '"':
655 if not multiline:
656 return pos + 1, result + src[start_pos:pos]
657 if src.startswith('"""', pos):
658 return pos + 3, result + src[start_pos:pos]
659 pos += 1
660 continue
661 if char == "\\":
662 result += src[start_pos:pos]
663 pos, parsed_escape = parse_escapes(src, pos)
664 result += parsed_escape
665 start_pos = pos
666 continue
667 if char in error_on:
668 raise TOMLDecodeError(f"Illegal character {char!r}", src, pos)
669 pos += 1
672def parse_value( # noqa: C901
673 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int
674) -> tuple[Pos, Any]:
675 if nest_lvl > MAX_INLINE_NESTING:
676 # Pure Python should have raised RecursionError already.
677 # This ensures mypyc binaries eventually do the same.
678 raise RecursionError( # pragma: no cover
679 "TOML inline arrays/tables are nested more than the allowed"
680 f" {MAX_INLINE_NESTING} levels"
681 )
683 try:
684 char: str | None = src[pos]
685 except IndexError:
686 char = None
688 # IMPORTANT: order conditions based on speed of checking and likelihood
690 # Basic strings
691 if char == '"':
692 if src.startswith('"""', pos):
693 return parse_multiline_str(src, pos, literal=False)
694 return parse_one_line_basic_str(src, pos)
696 # Literal strings
697 if char == "'":
698 if src.startswith("'''", pos):
699 return parse_multiline_str(src, pos, literal=True)
700 return parse_literal_str(src, pos)
702 # Booleans
703 if char == "t":
704 if src.startswith("true", pos):
705 return pos + 4, True
706 if char == "f":
707 if src.startswith("false", pos):
708 return pos + 5, False
710 # Arrays
711 if char == "[":
712 return parse_array(src, pos, parse_float, nest_lvl + 1)
714 # Inline tables
715 if char == "{":
716 return parse_inline_table(src, pos, parse_float, nest_lvl + 1)
718 # Dates and times
719 datetime_match = RE_DATETIME.match(src, pos)
720 if datetime_match:
721 try:
722 datetime_obj = match_to_datetime(datetime_match)
723 except ValueError as e:
724 raise TOMLDecodeError("Invalid date or datetime", src, pos) from e
725 return datetime_match.end(), datetime_obj
726 localtime_match = RE_LOCALTIME.match(src, pos)
727 if localtime_match:
728 return localtime_match.end(), match_to_localtime(localtime_match)
730 # Integers and "normal" floats.
731 # The regex will greedily match any type starting with a decimal
732 # char, so needs to be located after handling of dates and times.
733 number_match = RE_NUMBER.match(src, pos)
734 if number_match:
735 return number_match.end(), match_to_number(number_match, parse_float)
737 # Special floats
738 first_three = src[pos : pos + 3]
739 if first_three in {"inf", "nan"}:
740 return pos + 3, parse_float(first_three)
741 first_four = src[pos : pos + 4]
742 if first_four in {"-inf", "+inf", "-nan", "+nan"}:
743 return pos + 4, parse_float(first_four)
745 raise TOMLDecodeError("Invalid value", src, pos)
748def is_unicode_scalar_value(codepoint: int) -> bool:
749 return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111)
752def make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat:
753 """A decorator to make `parse_float` safe.
755 `parse_float` must not return dicts or lists, because these types
756 would be mixed with parsed TOML tables and arrays, thus confusing
757 the parser. The returned decorated callable raises `ValueError`
758 instead of returning illegal types.
759 """
760 # The default `float` callable never returns illegal types. Optimize it.
761 if parse_float is float:
762 return float
764 def safe_parse_float(float_str: str) -> Any:
765 float_value = parse_float(float_str)
766 if isinstance(float_value, (dict, list)):
767 raise ValueError("parse_float must not return dicts or lists")
768 return float_value
770 return safe_parse_float