Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/tomli/_parser.py: 72%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

497 statements  

1# SPDX-License-Identifier: MIT 

2# SPDX-FileCopyrightText: 2021 Taneli Hukkinen 

3# Licensed to PSF under a Contributor Agreement. 

4 

5from __future__ import annotations 

6 

7import sys 

8from types import MappingProxyType 

9 

10from ._re import ( 

11 RE_DATETIME, 

12 RE_LOCALTIME, 

13 RE_NUMBER, 

14 match_to_datetime, 

15 match_to_localtime, 

16 match_to_number, 

17) 

18 

19TYPE_CHECKING = False 

20if TYPE_CHECKING: 

21 from collections.abc import Iterable 

22 from typing import IO, Any, Final 

23 

24 from ._types import Key, ParseFloat, Pos 

25 

26# Inline tables/arrays are implemented using recursion. Pathologically 

27# nested documents cause pure Python to raise RecursionError (which is OK), 

28# but mypyc binary wheels will crash unrecoverably (not OK). According to 

29# mypyc docs this will be fixed in the future: 

30# https://mypyc.readthedocs.io/en/latest/differences_from_python.html#stack-overflows 

31# Before mypyc's fix is in, recursion needs to be limited by this library. 

32# Choosing `sys.getrecursionlimit()` as maximum inline table/array nesting 

33# level, as it allows more nesting than pure Python, but still seems a far 

34# lower number than where mypyc binaries crash. 

35MAX_INLINE_NESTING: Final = sys.getrecursionlimit() 

36 

37# Pathologically excessive number of parts in a key runs into quadratic 

38# behavior (e.g. in Flags.is_). 

39# Even if keys aren't currently parsed using recursion, they name a 

40# recursive structure, so it makes sense to limit it using getrecursionlimit() 

41# and RecursionError. 

42MAX_KEY_PARTS: Final = sys.getrecursionlimit() 

43 

44ASCII_CTRL: Final = frozenset(chr(i) for i in range(32)) | frozenset(chr(127)) 

45 

46# Neither of these sets include quotation mark or backslash. They are 

47# currently handled as separate cases in the parser functions. 

48ILLEGAL_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t") 

49ILLEGAL_MULTILINE_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t\n") 

50 

51ILLEGAL_LITERAL_STR_CHARS: Final = ILLEGAL_BASIC_STR_CHARS 

52ILLEGAL_MULTILINE_LITERAL_STR_CHARS: Final = ILLEGAL_MULTILINE_BASIC_STR_CHARS 

53 

54ILLEGAL_COMMENT_CHARS: Final = ILLEGAL_BASIC_STR_CHARS 

55 

56TOML_WS: Final = frozenset(" \t") 

57TOML_WS_AND_NEWLINE: Final = TOML_WS | frozenset("\n") 

58BARE_KEY_CHARS: Final = frozenset( 

59 "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789" "-_" 

60) 

61KEY_INITIAL_CHARS: Final = BARE_KEY_CHARS | frozenset("\"'") 

62HEXDIGIT_CHARS: Final = frozenset("abcdef" "ABCDEF" "0123456789") 

63 

64BASIC_STR_ESCAPE_REPLACEMENTS: Final = MappingProxyType( 

65 { 

66 "\\b": "\u0008", # backspace 

67 "\\t": "\u0009", # tab 

68 "\\n": "\u000a", # linefeed 

69 "\\f": "\u000c", # form feed 

70 "\\r": "\u000d", # carriage return 

71 "\\e": "\u001b", # escape 

72 '\\"': "\u0022", # quote 

73 "\\\\": "\u005c", # backslash 

74 } 

75) 

76 

77 

78class DEPRECATED_DEFAULT: 

79 """Sentinel to be used as default arg during deprecation 

80 period of TOMLDecodeError's free-form arguments.""" 

81 

82 

83class TOMLDecodeError(ValueError): 

84 """An error raised if a document is not valid TOML. 

85 

86 Adds the following attributes to ValueError: 

87 msg: The unformatted error message 

88 doc: The TOML document being parsed 

89 pos: The index of doc where parsing failed 

90 lineno: The line corresponding to pos 

91 colno: The column corresponding to pos 

92 """ 

93 

94 def __init__( 

95 self, 

96 msg: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT, 

97 doc: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT, 

98 pos: Pos | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT, 

99 *args: Any, 

100 ): 

101 if ( 

102 args 

103 or not isinstance(msg, str) 

104 or not isinstance(doc, str) 

105 or not isinstance(pos, int) 

106 ): 

107 import warnings 

108 

109 warnings.warn( 

110 "Free-form arguments for TOMLDecodeError are deprecated. " 

111 "Please set 'msg' (str), 'doc' (str) and 'pos' (int) arguments only.", 

112 DeprecationWarning, 

113 stacklevel=2, 

114 ) 

115 if pos is not DEPRECATED_DEFAULT: 

116 args = pos, *args 

117 if doc is not DEPRECATED_DEFAULT: 

118 args = doc, *args 

119 if msg is not DEPRECATED_DEFAULT: 

120 args = msg, *args 

121 ValueError.__init__(self, *args) 

122 return 

123 

124 lineno = doc.count("\n", 0, pos) + 1 

125 if lineno == 1: 

126 colno = pos + 1 

127 else: 

128 colno = pos - doc.rindex("\n", 0, pos) 

129 

130 if pos >= len(doc): 

131 coord_repr = "end of document" 

132 else: 

133 coord_repr = f"line {lineno}, column {colno}" 

134 errmsg = f"{msg} (at {coord_repr})" 

135 ValueError.__init__(self, errmsg) 

136 

137 self.msg = msg 

138 self.doc = doc 

139 self.pos = pos 

140 self.lineno = lineno 

141 self.colno = colno 

142 

143 

144def load(__fp: IO[bytes], *, parse_float: ParseFloat = float) -> dict[str, Any]: 

145 """Parse TOML from a binary file object.""" 

146 b = __fp.read() 

147 try: 

148 s = b.decode() 

149 except AttributeError: 

150 raise TypeError( 

151 "File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`" 

152 ) from None 

153 return loads(s, parse_float=parse_float) 

154 

155 

156def loads(__s: str, *, parse_float: ParseFloat = float) -> dict[str, Any]: 

157 """Parse TOML from a string.""" 

158 

159 # The spec allows converting "\r\n" to "\n", even in string 

160 # literals. Let's do so to simplify parsing. 

161 try: 

162 src = __s.replace("\r\n", "\n") 

163 except (AttributeError, TypeError): 

164 raise TypeError( 

165 f"Expected str object, not '{type(__s).__qualname__}'" 

166 ) from None 

167 pos = 0 

168 out = Output() 

169 header: Key = () 

170 parse_float = make_safe_parse_float(parse_float) 

171 

172 # Parse one statement at a time 

173 # (typically means one line in TOML source) 

174 while True: 

175 # 1. Skip line leading whitespace 

176 pos = skip_chars(src, pos, TOML_WS) 

177 

178 # 2. Parse rules. Expect one of the following: 

179 # - end of file 

180 # - end of line 

181 # - comment 

182 # - key/value pair 

183 # - append dict to list (and move to its namespace) 

184 # - create dict (and move to its namespace) 

185 # Skip trailing whitespace when applicable. 

186 try: 

187 char = src[pos] 

188 except IndexError: 

189 break 

190 if char == "\n": 

191 pos += 1 

192 continue 

193 if char in KEY_INITIAL_CHARS: 

194 pos = key_value_rule(src, pos, out, header, parse_float) 

195 pos = skip_chars(src, pos, TOML_WS) 

196 elif char == "[": 

197 try: 

198 second_char: str | None = src[pos + 1] 

199 except IndexError: 

200 second_char = None 

201 out.flags.finalize_pending() 

202 if second_char == "[": 

203 pos, header = create_list_rule(src, pos, out) 

204 else: 

205 pos, header = create_dict_rule(src, pos, out) 

206 pos = skip_chars(src, pos, TOML_WS) 

207 elif char != "#": 

208 raise TOMLDecodeError("Invalid statement", src, pos) 

209 

210 # 3. Skip comment 

211 pos = skip_comment(src, pos) 

212 

213 # 4. Expect end of line or end of file 

214 try: 

215 char = src[pos] 

216 except IndexError: 

217 break 

218 if char != "\n": 

219 raise TOMLDecodeError( 

220 "Expected newline or end of document after a statement", src, pos 

221 ) 

222 pos += 1 

223 

224 return out.data.dict 

225 

226 

227class Flags: 

228 """Flags that map to parsed keys/namespaces.""" 

229 

230 # Marks an immutable namespace (inline array or inline table). 

231 FROZEN: Final = 0 

232 # Marks a nest that has been explicitly created and can no longer 

233 # be opened using the "[table]" syntax. 

234 EXPLICIT_NEST: Final = 1 

235 

236 def __init__(self) -> None: 

237 self._flags: dict[str, dict[Any, Any]] = {} 

238 self._pending_flags: set[tuple[Key, int]] = set() 

239 

240 def add_pending(self, key: Key, flag: int) -> None: 

241 self._pending_flags.add((key, flag)) 

242 

243 def finalize_pending(self) -> None: 

244 for key, flag in self._pending_flags: 

245 self.set(key, flag, recursive=False) 

246 self._pending_flags.clear() 

247 

248 def unset_all(self, key: Key) -> None: 

249 cont = self._flags 

250 for k in key[:-1]: 

251 if k not in cont: 

252 return 

253 cont = cont[k]["nested"] 

254 cont.pop(key[-1], None) 

255 

256 def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003 

257 cont = self._flags 

258 key_parent, key_stem = key[:-1], key[-1] 

259 for k in key_parent: 

260 if k not in cont: 

261 cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}} 

262 cont = cont[k]["nested"] 

263 if key_stem not in cont: 

264 cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}} 

265 cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag) 

266 

267 def is_(self, key: Key, flag: int) -> bool: 

268 if not key: 

269 return False # document root has no flags 

270 cont = self._flags 

271 for k in key[:-1]: 

272 if k not in cont: 

273 return False 

274 inner_cont = cont[k] 

275 if flag in inner_cont["recursive_flags"]: 

276 return True 

277 cont = inner_cont["nested"] 

278 key_stem = key[-1] 

279 if key_stem in cont: 

280 inner_cont = cont[key_stem] 

281 return flag in inner_cont["flags"] or flag in inner_cont["recursive_flags"] 

282 return False 

283 

284 

285class NestedDict: 

286 def __init__(self) -> None: 

287 # The parsed content of the TOML document 

288 self.dict: dict[str, Any] = {} 

289 

290 def get_or_create_nest( 

291 self, 

292 key: Key, 

293 *, 

294 access_lists: bool = True, 

295 ) -> dict[str, Any]: 

296 cont: Any = self.dict 

297 for k in key: 

298 if k not in cont: 

299 cont[k] = {} 

300 cont = cont[k] 

301 if access_lists and isinstance(cont, list): 

302 cont = cont[-1] 

303 if not isinstance(cont, dict): 

304 raise KeyError("There is no nest behind this key") 

305 return cont # type: ignore[no-any-return] 

306 

307 def append_nest_to_list(self, key: Key) -> None: 

308 cont = self.get_or_create_nest(key[:-1]) 

309 last_key = key[-1] 

310 if last_key in cont: 

311 list_ = cont[last_key] 

312 if not isinstance(list_, list): 

313 raise KeyError("An object other than list found behind this key") 

314 list_.append({}) 

315 else: 

316 cont[last_key] = [{}] 

317 

318 

319class Output: 

320 def __init__(self) -> None: 

321 self.data = NestedDict() 

322 self.flags = Flags() 

323 

324 

325def skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos: 

326 try: 

327 while src[pos] in chars: 

328 pos += 1 

329 except IndexError: 

330 pass 

331 return pos 

332 

333 

334def skip_until( 

335 src: str, 

336 pos: Pos, 

337 expect: str, 

338 *, 

339 error_on: frozenset[str], 

340 error_on_eof: bool, 

341) -> Pos: 

342 try: 

343 new_pos = src.index(expect, pos) 

344 except ValueError: 

345 new_pos = len(src) 

346 if error_on_eof: 

347 raise TOMLDecodeError(f"Expected {expect!r}", src, new_pos) from None 

348 

349 if not error_on.isdisjoint(src[pos:new_pos]): 

350 while src[pos] not in error_on: 

351 pos += 1 

352 raise TOMLDecodeError(f"Found invalid character {src[pos]!r}", src, pos) 

353 return new_pos 

354 

355 

356def skip_comment(src: str, pos: Pos) -> Pos: 

357 try: 

358 char: str | None = src[pos] 

359 except IndexError: 

360 char = None 

361 if char == "#": 

362 return skip_until( 

363 src, pos + 1, "\n", error_on=ILLEGAL_COMMENT_CHARS, error_on_eof=False 

364 ) 

365 return pos 

366 

367 

368def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos: 

369 while True: 

370 pos_before_skip = pos 

371 pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE) 

372 pos = skip_comment(src, pos) 

373 if pos == pos_before_skip: 

374 return pos 

375 

376 

377def create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]: 

378 pos += 1 # Skip "[" 

379 pos = skip_chars(src, pos, TOML_WS) 

380 pos, key = parse_key(src, pos) 

381 

382 if out.flags.is_(key, Flags.EXPLICIT_NEST) or out.flags.is_(key, Flags.FROZEN): 

383 raise TOMLDecodeError(f"Cannot declare {key} twice", src, pos) 

384 out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False) 

385 try: 

386 out.data.get_or_create_nest(key) 

387 except KeyError: 

388 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None 

389 

390 if not src.startswith("]", pos): 

391 raise TOMLDecodeError( 

392 "Expected ']' at the end of a table declaration", src, pos 

393 ) 

394 return pos + 1, key 

395 

396 

397def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]: 

398 pos += 2 # Skip "[[" 

399 pos = skip_chars(src, pos, TOML_WS) 

400 pos, key = parse_key(src, pos) 

401 

402 if out.flags.is_(key, Flags.FROZEN): 

403 raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos) 

404 # Free the namespace now that it points to another empty list item... 

405 out.flags.unset_all(key) 

406 # ...but this key precisely is still prohibited from table declaration 

407 out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False) 

408 try: 

409 out.data.append_nest_to_list(key) 

410 except KeyError: 

411 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None 

412 

413 if not src.startswith("]]", pos): 

414 raise TOMLDecodeError( 

415 "Expected ']]' at the end of an array declaration", src, pos 

416 ) 

417 return pos + 2, key 

418 

419 

420def key_value_rule( 

421 src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat 

422) -> Pos: 

423 pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl=0) 

424 key_parent, key_stem = key[:-1], key[-1] 

425 abs_key_parent = header + key_parent 

426 

427 relative_path_cont_keys = (header + key[:i] for i in range(1, len(key))) 

428 for cont_key in relative_path_cont_keys: 

429 # Check that dotted key syntax does not redefine an existing table 

430 if out.flags.is_(cont_key, Flags.EXPLICIT_NEST): 

431 raise TOMLDecodeError(f"Cannot redefine namespace {cont_key}", src, pos) 

432 # Containers in the relative path can't be opened with the table syntax or 

433 # dotted key/value syntax in following table sections. 

434 out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST) 

435 

436 if out.flags.is_(abs_key_parent, Flags.FROZEN): 

437 raise TOMLDecodeError( 

438 f"Cannot mutate immutable namespace {abs_key_parent}", src, pos 

439 ) 

440 

441 try: 

442 nest = out.data.get_or_create_nest(abs_key_parent) 

443 except KeyError: 

444 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None 

445 if key_stem in nest: 

446 raise TOMLDecodeError("Cannot overwrite a value", src, pos) 

447 # Mark inline table and array namespaces recursively immutable 

448 if isinstance(value, (dict, list)): 

449 out.flags.set(header + key, Flags.FROZEN, recursive=True) 

450 nest[key_stem] = value 

451 return pos 

452 

453 

454def parse_key_value_pair( 

455 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int 

456) -> tuple[Pos, Key, Any]: 

457 pos, key = parse_key(src, pos) 

458 try: 

459 char: str | None = src[pos] 

460 except IndexError: 

461 char = None 

462 if char != "=": 

463 raise TOMLDecodeError("Expected '=' after a key in a key/value pair", src, pos) 

464 pos += 1 

465 pos = skip_chars(src, pos, TOML_WS) 

466 pos, value = parse_value(src, pos, parse_float, nest_lvl) 

467 return pos, key, value 

468 

469 

470def parse_key(src: str, pos: Pos) -> tuple[Pos, Key]: 

471 pos, key_part = parse_key_part(src, pos) 

472 key: Key = (key_part,) 

473 pos = skip_chars(src, pos, TOML_WS) 

474 while True: 

475 try: 

476 char: str | None = src[pos] 

477 except IndexError: 

478 char = None 

479 if char != ".": 

480 return pos, key 

481 pos += 1 

482 pos = skip_chars(src, pos, TOML_WS) 

483 pos, key_part = parse_key_part(src, pos) 

484 key += (key_part,) 

485 if len(key) > MAX_KEY_PARTS: 

486 raise RecursionError( 

487 f"TOML key has more than the allowed {MAX_KEY_PARTS} parts" 

488 ) 

489 pos = skip_chars(src, pos, TOML_WS) 

490 

491 

492def parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]: 

493 try: 

494 char: str | None = src[pos] 

495 except IndexError: 

496 char = None 

497 if char in BARE_KEY_CHARS: 

498 start_pos = pos 

499 pos = skip_chars(src, pos, BARE_KEY_CHARS) 

500 return pos, src[start_pos:pos] 

501 if char == "'": 

502 return parse_literal_str(src, pos) 

503 if char == '"': 

504 return parse_one_line_basic_str(src, pos) 

505 raise TOMLDecodeError("Invalid initial character for a key part", src, pos) 

506 

507 

508def parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]: 

509 pos += 1 

510 return parse_basic_str(src, pos, multiline=False) 

511 

512 

513def parse_array( 

514 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int 

515) -> tuple[Pos, list[Any]]: 

516 pos += 1 

517 array: list[Any] = [] 

518 

519 pos = skip_comments_and_array_ws(src, pos) 

520 if src.startswith("]", pos): 

521 return pos + 1, array 

522 while True: 

523 pos, val = parse_value(src, pos, parse_float, nest_lvl) 

524 array.append(val) 

525 pos = skip_comments_and_array_ws(src, pos) 

526 

527 c = src[pos : pos + 1] 

528 if c == "]": 

529 return pos + 1, array 

530 if c != ",": 

531 raise TOMLDecodeError("Unclosed array", src, pos) 

532 pos += 1 

533 

534 pos = skip_comments_and_array_ws(src, pos) 

535 if src.startswith("]", pos): 

536 return pos + 1, array 

537 

538 

539def parse_inline_table( 

540 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int 

541) -> tuple[Pos, dict[str, Any]]: 

542 pos += 1 

543 nested_dict = NestedDict() 

544 flags = Flags() 

545 

546 pos = skip_comments_and_array_ws(src, pos) 

547 if src.startswith("}", pos): 

548 return pos + 1, nested_dict.dict 

549 while True: 

550 pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl) 

551 key_parent, key_stem = key[:-1], key[-1] 

552 if flags.is_(key, Flags.FROZEN): 

553 raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos) 

554 try: 

555 nest = nested_dict.get_or_create_nest(key_parent, access_lists=False) 

556 except KeyError: 

557 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None 

558 if key_stem in nest: 

559 raise TOMLDecodeError(f"Duplicate inline table key {key_stem!r}", src, pos) 

560 nest[key_stem] = value 

561 pos = skip_comments_and_array_ws(src, pos) 

562 c = src[pos : pos + 1] 

563 if c == "}": 

564 return pos + 1, nested_dict.dict 

565 if c != ",": 

566 raise TOMLDecodeError("Unclosed inline table", src, pos) 

567 pos += 1 

568 pos = skip_comments_and_array_ws(src, pos) 

569 if src.startswith("}", pos): 

570 return pos + 1, nested_dict.dict 

571 if isinstance(value, (dict, list)): 

572 flags.set(key, Flags.FROZEN, recursive=True) 

573 

574 

575def parse_basic_str_escape( 

576 src: str, pos: Pos, *, multiline: bool = False 

577) -> tuple[Pos, str]: 

578 escape_id = src[pos : pos + 2] 

579 pos += 2 

580 if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}: 

581 # Skip whitespace until next non-whitespace character or end of 

582 # the doc. Error if non-whitespace is found before newline. 

583 if escape_id != "\\\n": 

584 pos = skip_chars(src, pos, TOML_WS) 

585 try: 

586 char = src[pos] 

587 except IndexError: 

588 return pos, "" 

589 if char != "\n": 

590 raise TOMLDecodeError("Unescaped '\\' in a string", src, pos) 

591 pos += 1 

592 pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE) 

593 return pos, "" 

594 if escape_id == "\\x": 

595 return parse_hex_char(src, pos, 2) 

596 if escape_id == "\\u": 

597 return parse_hex_char(src, pos, 4) 

598 if escape_id == "\\U": 

599 return parse_hex_char(src, pos, 8) 

600 try: 

601 return pos, BASIC_STR_ESCAPE_REPLACEMENTS[escape_id] 

602 except KeyError: 

603 raise TOMLDecodeError("Unescaped '\\' in a string", src, pos) from None 

604 

605 

606def parse_basic_str_escape_multiline(src: str, pos: Pos) -> tuple[Pos, str]: 

607 return parse_basic_str_escape(src, pos, multiline=True) 

608 

609 

610def parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]: 

611 hex_str = src[pos : pos + hex_len] 

612 if len(hex_str) != hex_len or not HEXDIGIT_CHARS.issuperset(hex_str): 

613 raise TOMLDecodeError("Invalid hex value", src, pos) 

614 pos += hex_len 

615 hex_int = int(hex_str, 16) 

616 if not is_unicode_scalar_value(hex_int): 

617 raise TOMLDecodeError( 

618 "Escaped character is not a Unicode scalar value", src, pos 

619 ) 

620 return pos, chr(hex_int) 

621 

622 

623def parse_literal_str(src: str, pos: Pos) -> tuple[Pos, str]: 

624 pos += 1 # Skip starting apostrophe 

625 start_pos = pos 

626 pos = skip_until( 

627 src, pos, "'", error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True 

628 ) 

629 return pos + 1, src[start_pos:pos] # Skip ending apostrophe 

630 

631 

632def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> tuple[Pos, str]: 

633 pos += 3 

634 if src.startswith("\n", pos): 

635 pos += 1 

636 

637 if literal: 

638 delim = "'" 

639 end_pos = skip_until( 

640 src, 

641 pos, 

642 "'''", 

643 error_on=ILLEGAL_MULTILINE_LITERAL_STR_CHARS, 

644 error_on_eof=True, 

645 ) 

646 result = src[pos:end_pos] 

647 pos = end_pos + 3 

648 else: 

649 delim = '"' 

650 pos, result = parse_basic_str(src, pos, multiline=True) 

651 

652 # Add at maximum two extra apostrophes/quotes if the end sequence 

653 # is 4 or 5 chars long instead of just 3. 

654 if not src.startswith(delim, pos): 

655 return pos, result 

656 pos += 1 

657 if not src.startswith(delim, pos): 

658 return pos, result + delim 

659 pos += 1 

660 return pos, result + (delim * 2) 

661 

662 

663def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]: 

664 if multiline: 

665 error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS 

666 parse_escapes = parse_basic_str_escape_multiline 

667 else: 

668 error_on = ILLEGAL_BASIC_STR_CHARS 

669 parse_escapes = parse_basic_str_escape 

670 result = "" 

671 start_pos = pos 

672 while True: 

673 try: 

674 char = src[pos] 

675 except IndexError: 

676 raise TOMLDecodeError("Unterminated string", src, pos) from None 

677 if char == '"': 

678 if not multiline: 

679 return pos + 1, result + src[start_pos:pos] 

680 if src.startswith('"""', pos): 

681 return pos + 3, result + src[start_pos:pos] 

682 pos += 1 

683 continue 

684 if char == "\\": 

685 result += src[start_pos:pos] 

686 pos, parsed_escape = parse_escapes(src, pos) 

687 result += parsed_escape 

688 start_pos = pos 

689 continue 

690 if char in error_on: 

691 raise TOMLDecodeError(f"Illegal character {char!r}", src, pos) 

692 pos += 1 

693 

694 

695def parse_value( 

696 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int 

697) -> tuple[Pos, Any]: 

698 if nest_lvl > MAX_INLINE_NESTING: 

699 # Pure Python should have raised RecursionError already. 

700 # This ensures mypyc binaries eventually do the same. 

701 raise RecursionError( # pragma: no cover 

702 "TOML inline arrays/tables are nested more than the allowed" 

703 f" {MAX_INLINE_NESTING} levels" 

704 ) 

705 

706 try: 

707 char: str | None = src[pos] 

708 except IndexError: 

709 char = None 

710 

711 # IMPORTANT: order conditions based on speed of checking and likelihood 

712 

713 # Basic strings 

714 if char == '"': 

715 if src.startswith('"""', pos): 

716 return parse_multiline_str(src, pos, literal=False) 

717 return parse_one_line_basic_str(src, pos) 

718 

719 # Literal strings 

720 if char == "'": 

721 if src.startswith("'''", pos): 

722 return parse_multiline_str(src, pos, literal=True) 

723 return parse_literal_str(src, pos) 

724 

725 # Booleans 

726 if char == "t": 

727 if src.startswith("true", pos): 

728 return pos + 4, True 

729 if char == "f": 

730 if src.startswith("false", pos): 

731 return pos + 5, False 

732 

733 # Arrays 

734 if char == "[": 

735 return parse_array(src, pos, parse_float, nest_lvl + 1) 

736 

737 # Inline tables 

738 if char == "{": 

739 return parse_inline_table(src, pos, parse_float, nest_lvl + 1) 

740 

741 # Dates and times 

742 datetime_match = RE_DATETIME.match(src, pos) 

743 if datetime_match: 

744 try: 

745 datetime_obj = match_to_datetime(datetime_match) 

746 except ValueError as e: 

747 raise TOMLDecodeError("Invalid date or datetime", src, pos) from e 

748 return datetime_match.end(), datetime_obj 

749 localtime_match = RE_LOCALTIME.match(src, pos) 

750 if localtime_match: 

751 return localtime_match.end(), match_to_localtime(localtime_match) 

752 

753 # Integers and "normal" floats. 

754 # The regex will greedily match any type starting with a decimal 

755 # char, so needs to be located after handling of dates and times. 

756 number_match = RE_NUMBER.match(src, pos) 

757 if number_match: 

758 return number_match.end(), match_to_number(number_match, parse_float) 

759 

760 # Special floats 

761 first_three = src[pos : pos + 3] 

762 if first_three in {"inf", "nan"}: 

763 return pos + 3, parse_float(first_three) 

764 first_four = src[pos : pos + 4] 

765 if first_four in {"-inf", "+inf", "-nan", "+nan"}: 

766 return pos + 4, parse_float(first_four) 

767 

768 raise TOMLDecodeError("Invalid value", src, pos) 

769 

770 

771def is_unicode_scalar_value(codepoint: int) -> bool: 

772 return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111) 

773 

774 

775def make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat: 

776 """A decorator to make `parse_float` safe. 

777 

778 `parse_float` must not return dicts or lists, because these types 

779 would be mixed with parsed TOML tables and arrays, thus confusing 

780 the parser. The returned decorated callable raises `ValueError` 

781 instead of returning illegal types. 

782 """ 

783 # The default `float` callable never returns illegal types. Optimize it. 

784 if parse_float is float: 

785 return float 

786 

787 def safe_parse_float(float_str: str) -> Any: 

788 float_value = parse_float(float_str) 

789 if isinstance(float_value, (dict, list)): 

790 raise ValueError("parse_float must not return dicts or lists") 

791 return float_value 

792 

793 return safe_parse_float