Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/tomli/_parser.py: 72%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

497 statements  

1# SPDX-License-Identifier: MIT 

2# SPDX-FileCopyrightText: 2021 Taneli Hukkinen 

3# Licensed to PSF under a Contributor Agreement. 

4 

5from __future__ import annotations 

6 

7# Defer loading regular expressions until we actually need them in 

8# parse_value(). 

9__lazy_modules__ = ["tomli._re"] 

10 

11import sys 

12 

13from ._re import ( 

14 RE_DATETIME, 

15 RE_LOCALTIME, 

16 RE_NUMBER, 

17 match_to_datetime, 

18 match_to_localtime, 

19 match_to_number, 

20) 

21 

22if sys.version_info < (3, 15): # pragma: no cover 

23 from types import MappingProxyType as frozendict 

24 

25TYPE_CHECKING = False 

26if TYPE_CHECKING: 

27 from collections.abc import Iterable 

28 from typing import IO, Any, Final 

29 

30 from ._types import Key, ParseFloat, Pos 

31 

32# Inline tables/arrays are implemented using recursion. Pathologically 

33# nested documents cause pure Python to raise RecursionError (which is OK), 

34# but mypyc binary wheels will crash unrecoverably (not OK). According to 

35# mypyc docs this will be fixed in the future: 

36# https://mypyc.readthedocs.io/en/latest/differences_from_python.html#stack-overflows 

37# Before mypyc's fix is in, recursion needs to be limited by this library. 

38# Choosing `sys.getrecursionlimit()` as maximum inline table/array nesting 

39# level, as it allows more nesting than pure Python, but still seems a far 

40# lower number than where mypyc binaries crash. 

41MAX_INLINE_NESTING: Final = sys.getrecursionlimit() 

42 

43# Pathologically excessive number of parts in a key runs into quadratic 

44# behavior (e.g. in Flags.is_). 

45# Even if keys aren't currently parsed using recursion, they name a 

46# recursive structure, so it makes sense to limit it using getrecursionlimit() 

47# and RecursionError. 

48MAX_KEY_PARTS: Final = sys.getrecursionlimit() 

49 

50ASCII_CTRL: Final = frozenset(chr(i) for i in range(32)) | frozenset(chr(127)) 

51 

52# Neither of these sets include quotation mark or backslash. They are 

53# currently handled as separate cases in the parser functions. 

54ILLEGAL_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t") 

55ILLEGAL_MULTILINE_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t\n") 

56 

57ILLEGAL_LITERAL_STR_CHARS: Final = ILLEGAL_BASIC_STR_CHARS 

58ILLEGAL_MULTILINE_LITERAL_STR_CHARS: Final = ILLEGAL_MULTILINE_BASIC_STR_CHARS 

59 

60ILLEGAL_COMMENT_CHARS: Final = ILLEGAL_BASIC_STR_CHARS 

61 

62TOML_WS: Final = frozenset(" \t") 

63TOML_WS_AND_NEWLINE: Final = TOML_WS | frozenset("\n") 

64BARE_KEY_CHARS: Final = frozenset( 

65 "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789" "-_" 

66) 

67KEY_INITIAL_CHARS: Final = BARE_KEY_CHARS | frozenset("\"'") 

68HEXDIGIT_CHARS: Final = frozenset("abcdef" "ABCDEF" "0123456789") 

69 

70BASIC_STR_ESCAPE_REPLACEMENTS: Final = frozendict( 

71 { 

72 "\\b": "\u0008", # backspace 

73 "\\t": "\u0009", # tab 

74 "\\n": "\u000a", # linefeed 

75 "\\f": "\u000c", # form feed 

76 "\\r": "\u000d", # carriage return 

77 "\\e": "\u001b", # escape 

78 '\\"': "\u0022", # quote 

79 "\\\\": "\u005c", # backslash 

80 } 

81) 

82 

83 

84class DEPRECATED_DEFAULT: 

85 """Sentinel to be used as default arg during deprecation 

86 period of TOMLDecodeError's free-form arguments.""" 

87 

88 

89class TOMLDecodeError(ValueError): 

90 """An error raised if a document is not valid TOML. 

91 

92 Adds the following attributes to ValueError: 

93 msg: The unformatted error message 

94 doc: The TOML document being parsed 

95 pos: The index of doc where parsing failed 

96 lineno: The line corresponding to pos 

97 colno: The column corresponding to pos 

98 """ 

99 

100 def __init__( 

101 self, 

102 msg: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT, 

103 doc: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT, 

104 pos: Pos | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT, 

105 *args: Any, 

106 ): 

107 if ( 

108 args 

109 or not isinstance(msg, str) 

110 or not isinstance(doc, str) 

111 or not isinstance(pos, int) 

112 ): 

113 import warnings 

114 

115 warnings.warn( 

116 "Free-form arguments for TOMLDecodeError are deprecated. " 

117 "Please set 'msg' (str), 'doc' (str) and 'pos' (int) arguments only.", 

118 DeprecationWarning, 

119 stacklevel=2, 

120 ) 

121 if pos is not DEPRECATED_DEFAULT: 

122 args = pos, *args 

123 if doc is not DEPRECATED_DEFAULT: 

124 args = doc, *args 

125 if msg is not DEPRECATED_DEFAULT: 

126 args = msg, *args 

127 ValueError.__init__(self, *args) 

128 return 

129 

130 lineno = doc.count("\n", 0, pos) + 1 

131 if lineno == 1: 

132 colno = pos + 1 

133 else: 

134 colno = pos - doc.rindex("\n", 0, pos) 

135 

136 if pos >= len(doc): 

137 coord_repr = "end of document" 

138 else: 

139 coord_repr = f"line {lineno}, column {colno}" 

140 errmsg = f"{msg} (at {coord_repr})" 

141 ValueError.__init__(self, errmsg) 

142 

143 self.msg = msg 

144 self.doc = doc 

145 self.pos = pos 

146 self.lineno = lineno 

147 self.colno = colno 

148 

149 

150def load(__fp: IO[bytes], *, parse_float: ParseFloat = float) -> dict[str, Any]: 

151 """Parse TOML from a binary file object.""" 

152 b = __fp.read() 

153 try: 

154 s = b.decode() 

155 except AttributeError: 

156 raise TypeError( 

157 "File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`" 

158 ) from None 

159 return loads(s, parse_float=parse_float) 

160 

161 

162def loads(__s: str, *, parse_float: ParseFloat = float) -> dict[str, Any]: 

163 """Parse TOML from a string.""" 

164 

165 # The spec allows converting "\r\n" to "\n", even in string 

166 # literals. Let's do so to simplify parsing. 

167 try: 

168 src = __s.replace("\r\n", "\n") 

169 except (AttributeError, TypeError): 

170 raise TypeError( 

171 f"Expected str object, not '{type(__s).__qualname__}'" 

172 ) from None 

173 pos = 0 

174 out = Output() 

175 header: Key = () 

176 parse_float = make_safe_parse_float(parse_float) 

177 

178 # Parse one statement at a time 

179 # (typically means one line in TOML source) 

180 while True: 

181 # 1. Skip line leading whitespace 

182 pos = skip_chars(src, pos, TOML_WS) 

183 

184 # 2. Parse rules. Expect one of the following: 

185 # - end of file 

186 # - end of line 

187 # - comment 

188 # - key/value pair 

189 # - append dict to list (and move to its namespace) 

190 # - create dict (and move to its namespace) 

191 # Skip trailing whitespace when applicable. 

192 try: 

193 char = src[pos] 

194 except IndexError: 

195 break 

196 if char == "\n": 

197 pos += 1 

198 continue 

199 if char in KEY_INITIAL_CHARS: 

200 pos = key_value_rule(src, pos, out, header, parse_float) 

201 pos = skip_chars(src, pos, TOML_WS) 

202 elif char == "[": 

203 try: 

204 second_char: str | None = src[pos + 1] 

205 except IndexError: 

206 second_char = None 

207 out.flags.finalize_pending() 

208 if second_char == "[": 

209 pos, header = create_list_rule(src, pos, out) 

210 else: 

211 pos, header = create_dict_rule(src, pos, out) 

212 pos = skip_chars(src, pos, TOML_WS) 

213 elif char != "#": 

214 raise TOMLDecodeError("Invalid statement", src, pos) 

215 

216 # 3. Skip comment 

217 pos = skip_comment(src, pos) 

218 

219 # 4. Expect end of line or end of file 

220 try: 

221 char = src[pos] 

222 except IndexError: 

223 break 

224 if char != "\n": 

225 raise TOMLDecodeError( 

226 "Expected newline or end of document after a statement", src, pos 

227 ) 

228 pos += 1 

229 

230 return out.data.dict 

231 

232 

233class Flags: 

234 """Flags that map to parsed keys/namespaces.""" 

235 

236 # Marks an immutable namespace (inline array or inline table). 

237 FROZEN: Final = 0 

238 # Marks a nest that has been explicitly created and can no longer 

239 # be opened using the "[table]" syntax. 

240 EXPLICIT_NEST: Final = 1 

241 

242 def __init__(self) -> None: 

243 self._flags: dict[str, dict[Any, Any]] = {} 

244 self._pending_flags: set[tuple[Key, int]] = set() 

245 

246 def add_pending(self, key: Key, flag: int) -> None: 

247 self._pending_flags.add((key, flag)) 

248 

249 def finalize_pending(self) -> None: 

250 for key, flag in self._pending_flags: 

251 self.set(key, flag, recursive=False) 

252 self._pending_flags.clear() 

253 

254 def unset_all(self, key: Key) -> None: 

255 cont = self._flags 

256 for k in key[:-1]: 

257 if k not in cont: 

258 return 

259 cont = cont[k]["nested"] 

260 cont.pop(key[-1], None) 

261 

262 def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003 

263 cont = self._flags 

264 key_parent, key_stem = key[:-1], key[-1] 

265 for k in key_parent: 

266 if k not in cont: 

267 cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}} 

268 cont = cont[k]["nested"] 

269 if key_stem not in cont: 

270 cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}} 

271 cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag) 

272 

273 def is_(self, key: Key, flag: int) -> bool: 

274 if not key: 

275 return False # document root has no flags 

276 cont = self._flags 

277 for k in key[:-1]: 

278 if k not in cont: 

279 return False 

280 inner_cont = cont[k] 

281 if flag in inner_cont["recursive_flags"]: 

282 return True 

283 cont = inner_cont["nested"] 

284 key_stem = key[-1] 

285 if key_stem in cont: 

286 inner_cont = cont[key_stem] 

287 return flag in inner_cont["flags"] or flag in inner_cont["recursive_flags"] 

288 return False 

289 

290 

291class NestedDict: 

292 def __init__(self) -> None: 

293 # The parsed content of the TOML document 

294 self.dict: dict[str, Any] = {} 

295 

296 def get_or_create_nest( 

297 self, 

298 key: Key, 

299 *, 

300 access_lists: bool = True, 

301 ) -> dict[str, Any]: 

302 cont: Any = self.dict 

303 for k in key: 

304 if k not in cont: 

305 cont[k] = {} 

306 cont = cont[k] 

307 if access_lists and isinstance(cont, list): 

308 cont = cont[-1] 

309 if not isinstance(cont, dict): 

310 raise KeyError("There is no nest behind this key") 

311 return cont # type: ignore[no-any-return] 

312 

313 def append_nest_to_list(self, key: Key) -> None: 

314 cont = self.get_or_create_nest(key[:-1]) 

315 last_key = key[-1] 

316 if last_key in cont: 

317 list_ = cont[last_key] 

318 if not isinstance(list_, list): 

319 raise KeyError("An object other than list found behind this key") 

320 list_.append({}) 

321 else: 

322 cont[last_key] = [{}] 

323 

324 

325class Output: 

326 def __init__(self) -> None: 

327 self.data = NestedDict() 

328 self.flags = Flags() 

329 

330 

331def skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos: 

332 try: 

333 while src[pos] in chars: 

334 pos += 1 

335 except IndexError: 

336 pass 

337 return pos 

338 

339 

340def skip_until( 

341 src: str, 

342 pos: Pos, 

343 expect: str, 

344 *, 

345 error_on: frozenset[str], 

346 error_on_eof: bool, 

347) -> Pos: 

348 try: 

349 new_pos = src.index(expect, pos) 

350 except ValueError: 

351 new_pos = len(src) 

352 if error_on_eof: 

353 raise TOMLDecodeError(f"Expected {expect!r}", src, new_pos) from None 

354 

355 if not error_on.isdisjoint(src[pos:new_pos]): 

356 while src[pos] not in error_on: 

357 pos += 1 

358 raise TOMLDecodeError(f"Found invalid character {src[pos]!r}", src, pos) 

359 return new_pos 

360 

361 

362def skip_comment(src: str, pos: Pos) -> Pos: 

363 try: 

364 char: str | None = src[pos] 

365 except IndexError: 

366 char = None 

367 if char == "#": 

368 return skip_until( 

369 src, pos + 1, "\n", error_on=ILLEGAL_COMMENT_CHARS, error_on_eof=False 

370 ) 

371 return pos 

372 

373 

374def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos: 

375 while True: 

376 pos_before_skip = pos 

377 pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE) 

378 pos = skip_comment(src, pos) 

379 if pos == pos_before_skip: 

380 return pos 

381 

382 

383def create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]: 

384 pos += 1 # Skip "[" 

385 pos = skip_chars(src, pos, TOML_WS) 

386 pos, key = parse_key(src, pos) 

387 

388 if out.flags.is_(key, Flags.EXPLICIT_NEST) or out.flags.is_(key, Flags.FROZEN): 

389 raise TOMLDecodeError(f"Cannot declare {key} twice", src, pos) 

390 out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False) 

391 try: 

392 out.data.get_or_create_nest(key) 

393 except KeyError: 

394 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None 

395 

396 if not src.startswith("]", pos): 

397 raise TOMLDecodeError( 

398 "Expected ']' at the end of a table declaration", src, pos 

399 ) 

400 return pos + 1, key 

401 

402 

403def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]: 

404 pos += 2 # Skip "[[" 

405 pos = skip_chars(src, pos, TOML_WS) 

406 pos, key = parse_key(src, pos) 

407 

408 if out.flags.is_(key, Flags.FROZEN): 

409 raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos) 

410 # Free the namespace now that it points to another empty list item... 

411 out.flags.unset_all(key) 

412 # ...but this key precisely is still prohibited from table declaration 

413 out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False) 

414 try: 

415 out.data.append_nest_to_list(key) 

416 except KeyError: 

417 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None 

418 

419 if not src.startswith("]]", pos): 

420 raise TOMLDecodeError( 

421 "Expected ']]' at the end of an array declaration", src, pos 

422 ) 

423 return pos + 2, key 

424 

425 

426def key_value_rule( 

427 src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat 

428) -> Pos: 

429 pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl=0) 

430 key_parent, key_stem = key[:-1], key[-1] 

431 abs_key_parent = header + key_parent 

432 

433 relative_path_cont_keys = (header + key[:i] for i in range(1, len(key))) 

434 for cont_key in relative_path_cont_keys: 

435 # Check that dotted key syntax does not redefine an existing table 

436 if out.flags.is_(cont_key, Flags.EXPLICIT_NEST): 

437 raise TOMLDecodeError(f"Cannot redefine namespace {cont_key}", src, pos) 

438 # Containers in the relative path can't be opened with the table syntax or 

439 # dotted key/value syntax in following table sections. 

440 out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST) 

441 

442 if out.flags.is_(abs_key_parent, Flags.FROZEN): 

443 raise TOMLDecodeError( 

444 f"Cannot mutate immutable namespace {abs_key_parent}", src, pos 

445 ) 

446 

447 try: 

448 nest = out.data.get_or_create_nest(abs_key_parent) 

449 except KeyError: 

450 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None 

451 if key_stem in nest: 

452 raise TOMLDecodeError("Cannot overwrite a value", src, pos) 

453 # Mark inline table and array namespaces recursively immutable 

454 if isinstance(value, (dict, list)): 

455 out.flags.set(header + key, Flags.FROZEN, recursive=True) 

456 nest[key_stem] = value 

457 return pos 

458 

459 

460def parse_key_value_pair( 

461 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int 

462) -> tuple[Pos, Key, Any]: 

463 pos, key = parse_key(src, pos) 

464 try: 

465 char: str | None = src[pos] 

466 except IndexError: 

467 char = None 

468 if char != "=": 

469 raise TOMLDecodeError("Expected '=' after a key in a key/value pair", src, pos) 

470 pos += 1 

471 pos = skip_chars(src, pos, TOML_WS) 

472 pos, value = parse_value(src, pos, parse_float, nest_lvl) 

473 return pos, key, value 

474 

475 

476def parse_key(src: str, pos: Pos) -> tuple[Pos, Key]: 

477 pos, key_part = parse_key_part(src, pos) 

478 key: Key = (key_part,) 

479 pos = skip_chars(src, pos, TOML_WS) 

480 while True: 

481 try: 

482 char: str | None = src[pos] 

483 except IndexError: 

484 char = None 

485 if char != ".": 

486 return pos, key 

487 pos += 1 

488 pos = skip_chars(src, pos, TOML_WS) 

489 pos, key_part = parse_key_part(src, pos) 

490 key += (key_part,) 

491 if len(key) > MAX_KEY_PARTS: 

492 raise RecursionError( 

493 f"TOML key has more than the allowed {MAX_KEY_PARTS} parts" 

494 ) 

495 pos = skip_chars(src, pos, TOML_WS) 

496 

497 

498def parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]: 

499 try: 

500 char: str | None = src[pos] 

501 except IndexError: 

502 char = None 

503 if char in BARE_KEY_CHARS: 

504 start_pos = pos 

505 pos = skip_chars(src, pos, BARE_KEY_CHARS) 

506 return pos, src[start_pos:pos] 

507 if char == "'": 

508 return parse_literal_str(src, pos) 

509 if char == '"': 

510 return parse_one_line_basic_str(src, pos) 

511 raise TOMLDecodeError("Invalid initial character for a key part", src, pos) 

512 

513 

514def parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]: 

515 pos += 1 

516 return parse_basic_str(src, pos, multiline=False) 

517 

518 

519def parse_array( 

520 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int 

521) -> tuple[Pos, list[Any]]: 

522 pos += 1 

523 array: list[Any] = [] 

524 

525 pos = skip_comments_and_array_ws(src, pos) 

526 if src.startswith("]", pos): 

527 return pos + 1, array 

528 while True: 

529 pos, val = parse_value(src, pos, parse_float, nest_lvl) 

530 array.append(val) 

531 pos = skip_comments_and_array_ws(src, pos) 

532 

533 c = src[pos : pos + 1] 

534 if c == "]": 

535 return pos + 1, array 

536 if c != ",": 

537 raise TOMLDecodeError("Unclosed array", src, pos) 

538 pos += 1 

539 

540 pos = skip_comments_and_array_ws(src, pos) 

541 if src.startswith("]", pos): 

542 return pos + 1, array 

543 

544 

545def parse_inline_table( 

546 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int 

547) -> tuple[Pos, dict[str, Any]]: 

548 pos += 1 

549 nested_dict = NestedDict() 

550 flags = Flags() 

551 

552 pos = skip_comments_and_array_ws(src, pos) 

553 if src.startswith("}", pos): 

554 return pos + 1, nested_dict.dict 

555 while True: 

556 pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl) 

557 key_parent, key_stem = key[:-1], key[-1] 

558 if flags.is_(key, Flags.FROZEN): 

559 raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos) 

560 try: 

561 nest = nested_dict.get_or_create_nest(key_parent, access_lists=False) 

562 except KeyError: 

563 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None 

564 if key_stem in nest: 

565 raise TOMLDecodeError(f"Duplicate inline table key {key_stem!r}", src, pos) 

566 nest[key_stem] = value 

567 pos = skip_comments_and_array_ws(src, pos) 

568 c = src[pos : pos + 1] 

569 if c == "}": 

570 return pos + 1, nested_dict.dict 

571 if c != ",": 

572 raise TOMLDecodeError("Unclosed inline table", src, pos) 

573 pos += 1 

574 pos = skip_comments_and_array_ws(src, pos) 

575 if src.startswith("}", pos): 

576 return pos + 1, nested_dict.dict 

577 if isinstance(value, (dict, list)): 

578 flags.set(key, Flags.FROZEN, recursive=True) 

579 

580 

581def parse_basic_str_escape( 

582 src: str, pos: Pos, *, multiline: bool = False 

583) -> tuple[Pos, str]: 

584 escape_id = src[pos : pos + 2] 

585 pos += 2 

586 if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}: 

587 # Skip whitespace until next non-whitespace character or end of 

588 # the doc. Error if non-whitespace is found before newline. 

589 if escape_id != "\\\n": 

590 pos = skip_chars(src, pos, TOML_WS) 

591 try: 

592 char = src[pos] 

593 except IndexError: 

594 return pos, "" 

595 if char != "\n": 

596 raise TOMLDecodeError("Unescaped '\\' in a string", src, pos) 

597 pos += 1 

598 pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE) 

599 return pos, "" 

600 if escape_id == "\\x": 

601 return parse_hex_char(src, pos, 2) 

602 if escape_id == "\\u": 

603 return parse_hex_char(src, pos, 4) 

604 if escape_id == "\\U": 

605 return parse_hex_char(src, pos, 8) 

606 try: 

607 return pos, BASIC_STR_ESCAPE_REPLACEMENTS[escape_id] 

608 except KeyError: 

609 raise TOMLDecodeError("Unescaped '\\' in a string", src, pos) from None 

610 

611 

612def parse_basic_str_escape_multiline(src: str, pos: Pos) -> tuple[Pos, str]: 

613 return parse_basic_str_escape(src, pos, multiline=True) 

614 

615 

616def parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]: 

617 hex_str = src[pos : pos + hex_len] 

618 if len(hex_str) != hex_len or not HEXDIGIT_CHARS.issuperset(hex_str): 

619 raise TOMLDecodeError("Invalid hex value", src, pos) 

620 pos += hex_len 

621 hex_int = int(hex_str, 16) 

622 if not is_unicode_scalar_value(hex_int): 

623 raise TOMLDecodeError( 

624 "Escaped character is not a Unicode scalar value", src, pos 

625 ) 

626 return pos, chr(hex_int) 

627 

628 

629def parse_literal_str(src: str, pos: Pos) -> tuple[Pos, str]: 

630 pos += 1 # Skip starting apostrophe 

631 start_pos = pos 

632 pos = skip_until( 

633 src, pos, "'", error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True 

634 ) 

635 return pos + 1, src[start_pos:pos] # Skip ending apostrophe 

636 

637 

638def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> tuple[Pos, str]: 

639 pos += 3 

640 if src.startswith("\n", pos): 

641 pos += 1 

642 

643 if literal: 

644 delim = "'" 

645 end_pos = skip_until( 

646 src, 

647 pos, 

648 "'''", 

649 error_on=ILLEGAL_MULTILINE_LITERAL_STR_CHARS, 

650 error_on_eof=True, 

651 ) 

652 result = src[pos:end_pos] 

653 pos = end_pos + 3 

654 else: 

655 delim = '"' 

656 pos, result = parse_basic_str(src, pos, multiline=True) 

657 

658 # Add at maximum two extra apostrophes/quotes if the end sequence 

659 # is 4 or 5 chars long instead of just 3. 

660 if not src.startswith(delim, pos): 

661 return pos, result 

662 pos += 1 

663 if not src.startswith(delim, pos): 

664 return pos, result + delim 

665 pos += 1 

666 return pos, result + (delim * 2) 

667 

668 

669def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]: 

670 if multiline: 

671 error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS 

672 parse_escapes = parse_basic_str_escape_multiline 

673 else: 

674 error_on = ILLEGAL_BASIC_STR_CHARS 

675 parse_escapes = parse_basic_str_escape 

676 result = "" 

677 start_pos = pos 

678 while True: 

679 try: 

680 char = src[pos] 

681 except IndexError: 

682 raise TOMLDecodeError("Unterminated string", src, pos) from None 

683 if char == '"': 

684 if not multiline: 

685 return pos + 1, result + src[start_pos:pos] 

686 if src.startswith('"""', pos): 

687 return pos + 3, result + src[start_pos:pos] 

688 pos += 1 

689 continue 

690 if char == "\\": 

691 result += src[start_pos:pos] 

692 pos, parsed_escape = parse_escapes(src, pos) 

693 result += parsed_escape 

694 start_pos = pos 

695 continue 

696 if char in error_on: 

697 raise TOMLDecodeError(f"Illegal character {char!r}", src, pos) 

698 pos += 1 

699 

700 

701def parse_value( 

702 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int 

703) -> tuple[Pos, Any]: 

704 if nest_lvl > MAX_INLINE_NESTING: 

705 # Pure Python should have raised RecursionError already. 

706 # This ensures mypyc binaries eventually do the same. 

707 raise RecursionError( # pragma: no cover 

708 "TOML inline arrays/tables are nested more than the allowed" 

709 f" {MAX_INLINE_NESTING} levels" 

710 ) 

711 

712 try: 

713 char: str | None = src[pos] 

714 except IndexError: 

715 char = None 

716 

717 # IMPORTANT: order conditions based on speed of checking and likelihood 

718 

719 # Basic strings 

720 if char == '"': 

721 if src.startswith('"""', pos): 

722 return parse_multiline_str(src, pos, literal=False) 

723 return parse_one_line_basic_str(src, pos) 

724 

725 # Literal strings 

726 if char == "'": 

727 if src.startswith("'''", pos): 

728 return parse_multiline_str(src, pos, literal=True) 

729 return parse_literal_str(src, pos) 

730 

731 # Booleans 

732 if char == "t": 

733 if src.startswith("true", pos): 

734 return pos + 4, True 

735 if char == "f": 

736 if src.startswith("false", pos): 

737 return pos + 5, False 

738 

739 # Arrays 

740 if char == "[": 

741 return parse_array(src, pos, parse_float, nest_lvl + 1) 

742 

743 # Inline tables 

744 if char == "{": 

745 return parse_inline_table(src, pos, parse_float, nest_lvl + 1) 

746 

747 # Dates and times 

748 datetime_match = RE_DATETIME.match(src, pos) 

749 if datetime_match: 

750 try: 

751 datetime_obj = match_to_datetime(datetime_match) 

752 except ValueError as e: 

753 raise TOMLDecodeError("Invalid date or datetime", src, pos) from e 

754 return datetime_match.end(), datetime_obj 

755 localtime_match = RE_LOCALTIME.match(src, pos) 

756 if localtime_match: 

757 return localtime_match.end(), match_to_localtime(localtime_match) 

758 

759 # Integers and "normal" floats. 

760 # The regex will greedily match any type starting with a decimal 

761 # char, so needs to be located after handling of dates and times. 

762 number_match = RE_NUMBER.match(src, pos) 

763 if number_match: 

764 return number_match.end(), match_to_number(number_match, parse_float) 

765 

766 # Special floats 

767 first_three = src[pos : pos + 3] 

768 if first_three in {"inf", "nan"}: 

769 return pos + 3, parse_float(first_three) 

770 first_four = src[pos : pos + 4] 

771 if first_four in {"-inf", "+inf", "-nan", "+nan"}: 

772 return pos + 4, parse_float(first_four) 

773 

774 raise TOMLDecodeError("Invalid value", src, pos) 

775 

776 

777def is_unicode_scalar_value(codepoint: int) -> bool: 

778 return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111) 

779 

780 

781def make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat: 

782 """A decorator to make `parse_float` safe. 

783 

784 `parse_float` must not return dicts or lists, because these types 

785 would be mixed with parsed TOML tables and arrays, thus confusing 

786 the parser. The returned decorated callable raises `ValueError` 

787 instead of returning illegal types. 

788 """ 

789 # The default `float` callable never returns illegal types. Optimize it. 

790 if parse_float is float: 

791 return float 

792 

793 def safe_parse_float(float_str: str) -> Any: 

794 float_value = parse_float(float_str) 

795 if isinstance(float_value, (dict, list)): 

796 raise ValueError("parse_float must not return dicts or lists") 

797 return float_value 

798 

799 return safe_parse_float