Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.10/site-packages/tomli/_parser.py: 72%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

490 statements  

1# SPDX-License-Identifier: MIT 

2# SPDX-FileCopyrightText: 2021 Taneli Hukkinen 

3# Licensed to PSF under a Contributor Agreement. 

4 

5from __future__ import annotations 

6 

7import sys 

8from types import MappingProxyType 

9 

10from ._re import ( 

11 RE_DATETIME, 

12 RE_LOCALTIME, 

13 RE_NUMBER, 

14 match_to_datetime, 

15 match_to_localtime, 

16 match_to_number, 

17) 

18 

19TYPE_CHECKING = False 

20if TYPE_CHECKING: 

21 from collections.abc import Iterable 

22 from typing import IO, Any, Final 

23 

24 from ._types import Key, ParseFloat, Pos 

25 

26# Inline tables/arrays are implemented using recursion. Pathologically 

27# nested documents cause pure Python to raise RecursionError (which is OK), 

28# but mypyc binary wheels will crash unrecoverably (not OK). According to 

29# mypyc docs this will be fixed in the future: 

30# https://mypyc.readthedocs.io/en/latest/differences_from_python.html#stack-overflows 

31# Before mypyc's fix is in, recursion needs to be limited by this library. 

32# Choosing `sys.getrecursionlimit()` as maximum inline table/array nesting 

33# level, as it allows more nesting than pure Python, but still seems a far 

34# lower number than where mypyc binaries crash. 

35MAX_INLINE_NESTING: Final = sys.getrecursionlimit() 

36 

37ASCII_CTRL: Final = frozenset(chr(i) for i in range(32)) | frozenset(chr(127)) 

38 

39# Neither of these sets include quotation mark or backslash. They are 

40# currently handled as separate cases in the parser functions. 

41ILLEGAL_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t") 

42ILLEGAL_MULTILINE_BASIC_STR_CHARS: Final = ASCII_CTRL - frozenset("\t\n") 

43 

44ILLEGAL_LITERAL_STR_CHARS: Final = ILLEGAL_BASIC_STR_CHARS 

45ILLEGAL_MULTILINE_LITERAL_STR_CHARS: Final = ILLEGAL_MULTILINE_BASIC_STR_CHARS 

46 

47ILLEGAL_COMMENT_CHARS: Final = ILLEGAL_BASIC_STR_CHARS 

48 

49TOML_WS: Final = frozenset(" \t") 

50TOML_WS_AND_NEWLINE: Final = TOML_WS | frozenset("\n") 

51BARE_KEY_CHARS: Final = frozenset( 

52 "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ" "0123456789" "-_" 

53) 

54KEY_INITIAL_CHARS: Final = BARE_KEY_CHARS | frozenset("\"'") 

55HEXDIGIT_CHARS: Final = frozenset("abcdef" "ABCDEF" "0123456789") 

56 

57BASIC_STR_ESCAPE_REPLACEMENTS: Final = MappingProxyType( 

58 { 

59 "\\b": "\u0008", # backspace 

60 "\\t": "\u0009", # tab 

61 "\\n": "\u000a", # linefeed 

62 "\\f": "\u000c", # form feed 

63 "\\r": "\u000d", # carriage return 

64 "\\e": "\u001b", # escape 

65 '\\"': "\u0022", # quote 

66 "\\\\": "\u005c", # backslash 

67 } 

68) 

69 

70 

71class DEPRECATED_DEFAULT: 

72 """Sentinel to be used as default arg during deprecation 

73 period of TOMLDecodeError's free-form arguments.""" 

74 

75 

76class TOMLDecodeError(ValueError): 

77 """An error raised if a document is not valid TOML. 

78 

79 Adds the following attributes to ValueError: 

80 msg: The unformatted error message 

81 doc: The TOML document being parsed 

82 pos: The index of doc where parsing failed 

83 lineno: The line corresponding to pos 

84 colno: The column corresponding to pos 

85 """ 

86 

87 def __init__( 

88 self, 

89 msg: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT, 

90 doc: str | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT, 

91 pos: Pos | type[DEPRECATED_DEFAULT] = DEPRECATED_DEFAULT, 

92 *args: Any, 

93 ): 

94 if ( 

95 args 

96 or not isinstance(msg, str) 

97 or not isinstance(doc, str) 

98 or not isinstance(pos, int) 

99 ): 

100 import warnings 

101 

102 warnings.warn( 

103 "Free-form arguments for TOMLDecodeError are deprecated. " 

104 "Please set 'msg' (str), 'doc' (str) and 'pos' (int) arguments only.", 

105 DeprecationWarning, 

106 stacklevel=2, 

107 ) 

108 if pos is not DEPRECATED_DEFAULT: 

109 args = pos, *args 

110 if doc is not DEPRECATED_DEFAULT: 

111 args = doc, *args 

112 if msg is not DEPRECATED_DEFAULT: 

113 args = msg, *args 

114 ValueError.__init__(self, *args) 

115 return 

116 

117 lineno = doc.count("\n", 0, pos) + 1 

118 if lineno == 1: 

119 colno = pos + 1 

120 else: 

121 colno = pos - doc.rindex("\n", 0, pos) 

122 

123 if pos >= len(doc): 

124 coord_repr = "end of document" 

125 else: 

126 coord_repr = f"line {lineno}, column {colno}" 

127 errmsg = f"{msg} (at {coord_repr})" 

128 ValueError.__init__(self, errmsg) 

129 

130 self.msg = msg 

131 self.doc = doc 

132 self.pos = pos 

133 self.lineno = lineno 

134 self.colno = colno 

135 

136 

137def load(__fp: IO[bytes], *, parse_float: ParseFloat = float) -> dict[str, Any]: 

138 """Parse TOML from a binary file object.""" 

139 b = __fp.read() 

140 try: 

141 s = b.decode() 

142 except AttributeError: 

143 raise TypeError( 

144 "File must be opened in binary mode, e.g. use `open('foo.toml', 'rb')`" 

145 ) from None 

146 return loads(s, parse_float=parse_float) 

147 

148 

149def loads(__s: str, *, parse_float: ParseFloat = float) -> dict[str, Any]: 

150 """Parse TOML from a string.""" 

151 

152 # The spec allows converting "\r\n" to "\n", even in string 

153 # literals. Let's do so to simplify parsing. 

154 try: 

155 src = __s.replace("\r\n", "\n") 

156 except (AttributeError, TypeError): 

157 raise TypeError( 

158 f"Expected str object, not '{type(__s).__qualname__}'" 

159 ) from None 

160 pos = 0 

161 out = Output() 

162 header: Key = () 

163 parse_float = make_safe_parse_float(parse_float) 

164 

165 # Parse one statement at a time 

166 # (typically means one line in TOML source) 

167 while True: 

168 # 1. Skip line leading whitespace 

169 pos = skip_chars(src, pos, TOML_WS) 

170 

171 # 2. Parse rules. Expect one of the following: 

172 # - end of file 

173 # - end of line 

174 # - comment 

175 # - key/value pair 

176 # - append dict to list (and move to its namespace) 

177 # - create dict (and move to its namespace) 

178 # Skip trailing whitespace when applicable. 

179 try: 

180 char = src[pos] 

181 except IndexError: 

182 break 

183 if char == "\n": 

184 pos += 1 

185 continue 

186 if char in KEY_INITIAL_CHARS: 

187 pos = key_value_rule(src, pos, out, header, parse_float) 

188 pos = skip_chars(src, pos, TOML_WS) 

189 elif char == "[": 

190 try: 

191 second_char: str | None = src[pos + 1] 

192 except IndexError: 

193 second_char = None 

194 out.flags.finalize_pending() 

195 if second_char == "[": 

196 pos, header = create_list_rule(src, pos, out) 

197 else: 

198 pos, header = create_dict_rule(src, pos, out) 

199 pos = skip_chars(src, pos, TOML_WS) 

200 elif char != "#": 

201 raise TOMLDecodeError("Invalid statement", src, pos) 

202 

203 # 3. Skip comment 

204 pos = skip_comment(src, pos) 

205 

206 # 4. Expect end of line or end of file 

207 try: 

208 char = src[pos] 

209 except IndexError: 

210 break 

211 if char != "\n": 

212 raise TOMLDecodeError( 

213 "Expected newline or end of document after a statement", src, pos 

214 ) 

215 pos += 1 

216 

217 return out.data.dict 

218 

219 

220class Flags: 

221 """Flags that map to parsed keys/namespaces.""" 

222 

223 # Marks an immutable namespace (inline array or inline table). 

224 FROZEN: Final = 0 

225 # Marks a nest that has been explicitly created and can no longer 

226 # be opened using the "[table]" syntax. 

227 EXPLICIT_NEST: Final = 1 

228 

229 def __init__(self) -> None: 

230 self._flags: dict[str, dict[Any, Any]] = {} 

231 self._pending_flags: set[tuple[Key, int]] = set() 

232 

233 def add_pending(self, key: Key, flag: int) -> None: 

234 self._pending_flags.add((key, flag)) 

235 

236 def finalize_pending(self) -> None: 

237 for key, flag in self._pending_flags: 

238 self.set(key, flag, recursive=False) 

239 self._pending_flags.clear() 

240 

241 def unset_all(self, key: Key) -> None: 

242 cont = self._flags 

243 for k in key[:-1]: 

244 if k not in cont: 

245 return 

246 cont = cont[k]["nested"] 

247 cont.pop(key[-1], None) 

248 

249 def set(self, key: Key, flag: int, *, recursive: bool) -> None: # noqa: A003 

250 cont = self._flags 

251 key_parent, key_stem = key[:-1], key[-1] 

252 for k in key_parent: 

253 if k not in cont: 

254 cont[k] = {"flags": set(), "recursive_flags": set(), "nested": {}} 

255 cont = cont[k]["nested"] 

256 if key_stem not in cont: 

257 cont[key_stem] = {"flags": set(), "recursive_flags": set(), "nested": {}} 

258 cont[key_stem]["recursive_flags" if recursive else "flags"].add(flag) 

259 

260 def is_(self, key: Key, flag: int) -> bool: 

261 if not key: 

262 return False # document root has no flags 

263 cont = self._flags 

264 for k in key[:-1]: 

265 if k not in cont: 

266 return False 

267 inner_cont = cont[k] 

268 if flag in inner_cont["recursive_flags"]: 

269 return True 

270 cont = inner_cont["nested"] 

271 key_stem = key[-1] 

272 if key_stem in cont: 

273 inner_cont = cont[key_stem] 

274 return flag in inner_cont["flags"] or flag in inner_cont["recursive_flags"] 

275 return False 

276 

277 

278class NestedDict: 

279 def __init__(self) -> None: 

280 # The parsed content of the TOML document 

281 self.dict: dict[str, Any] = {} 

282 

283 def get_or_create_nest( 

284 self, 

285 key: Key, 

286 *, 

287 access_lists: bool = True, 

288 ) -> dict[str, Any]: 

289 cont: Any = self.dict 

290 for k in key: 

291 if k not in cont: 

292 cont[k] = {} 

293 cont = cont[k] 

294 if access_lists and isinstance(cont, list): 

295 cont = cont[-1] 

296 if not isinstance(cont, dict): 

297 raise KeyError("There is no nest behind this key") 

298 return cont # type: ignore[no-any-return] 

299 

300 def append_nest_to_list(self, key: Key) -> None: 

301 cont = self.get_or_create_nest(key[:-1]) 

302 last_key = key[-1] 

303 if last_key in cont: 

304 list_ = cont[last_key] 

305 if not isinstance(list_, list): 

306 raise KeyError("An object other than list found behind this key") 

307 list_.append({}) 

308 else: 

309 cont[last_key] = [{}] 

310 

311 

312class Output: 

313 def __init__(self) -> None: 

314 self.data = NestedDict() 

315 self.flags = Flags() 

316 

317 

318def skip_chars(src: str, pos: Pos, chars: Iterable[str]) -> Pos: 

319 try: 

320 while src[pos] in chars: 

321 pos += 1 

322 except IndexError: 

323 pass 

324 return pos 

325 

326 

327def skip_until( 

328 src: str, 

329 pos: Pos, 

330 expect: str, 

331 *, 

332 error_on: frozenset[str], 

333 error_on_eof: bool, 

334) -> Pos: 

335 try: 

336 new_pos = src.index(expect, pos) 

337 except ValueError: 

338 new_pos = len(src) 

339 if error_on_eof: 

340 raise TOMLDecodeError(f"Expected {expect!r}", src, new_pos) from None 

341 

342 if not error_on.isdisjoint(src[pos:new_pos]): 

343 while src[pos] not in error_on: 

344 pos += 1 

345 raise TOMLDecodeError(f"Found invalid character {src[pos]!r}", src, pos) 

346 return new_pos 

347 

348 

349def skip_comment(src: str, pos: Pos) -> Pos: 

350 try: 

351 char: str | None = src[pos] 

352 except IndexError: 

353 char = None 

354 if char == "#": 

355 return skip_until( 

356 src, pos + 1, "\n", error_on=ILLEGAL_COMMENT_CHARS, error_on_eof=False 

357 ) 

358 return pos 

359 

360 

361def skip_comments_and_array_ws(src: str, pos: Pos) -> Pos: 

362 while True: 

363 pos_before_skip = pos 

364 pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE) 

365 pos = skip_comment(src, pos) 

366 if pos == pos_before_skip: 

367 return pos 

368 

369 

370def create_dict_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]: 

371 pos += 1 # Skip "[" 

372 pos = skip_chars(src, pos, TOML_WS) 

373 pos, key = parse_key(src, pos) 

374 

375 if out.flags.is_(key, Flags.EXPLICIT_NEST) or out.flags.is_(key, Flags.FROZEN): 

376 raise TOMLDecodeError(f"Cannot declare {key} twice", src, pos) 

377 out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False) 

378 try: 

379 out.data.get_or_create_nest(key) 

380 except KeyError: 

381 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None 

382 

383 if not src.startswith("]", pos): 

384 raise TOMLDecodeError( 

385 "Expected ']' at the end of a table declaration", src, pos 

386 ) 

387 return pos + 1, key 

388 

389 

390def create_list_rule(src: str, pos: Pos, out: Output) -> tuple[Pos, Key]: 

391 pos += 2 # Skip "[[" 

392 pos = skip_chars(src, pos, TOML_WS) 

393 pos, key = parse_key(src, pos) 

394 

395 if out.flags.is_(key, Flags.FROZEN): 

396 raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos) 

397 # Free the namespace now that it points to another empty list item... 

398 out.flags.unset_all(key) 

399 # ...but this key precisely is still prohibited from table declaration 

400 out.flags.set(key, Flags.EXPLICIT_NEST, recursive=False) 

401 try: 

402 out.data.append_nest_to_list(key) 

403 except KeyError: 

404 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None 

405 

406 if not src.startswith("]]", pos): 

407 raise TOMLDecodeError( 

408 "Expected ']]' at the end of an array declaration", src, pos 

409 ) 

410 return pos + 2, key 

411 

412 

413def key_value_rule( 

414 src: str, pos: Pos, out: Output, header: Key, parse_float: ParseFloat 

415) -> Pos: 

416 pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl=0) 

417 key_parent, key_stem = key[:-1], key[-1] 

418 abs_key_parent = header + key_parent 

419 

420 relative_path_cont_keys = (header + key[:i] for i in range(1, len(key))) 

421 for cont_key in relative_path_cont_keys: 

422 # Check that dotted key syntax does not redefine an existing table 

423 if out.flags.is_(cont_key, Flags.EXPLICIT_NEST): 

424 raise TOMLDecodeError(f"Cannot redefine namespace {cont_key}", src, pos) 

425 # Containers in the relative path can't be opened with the table syntax or 

426 # dotted key/value syntax in following table sections. 

427 out.flags.add_pending(cont_key, Flags.EXPLICIT_NEST) 

428 

429 if out.flags.is_(abs_key_parent, Flags.FROZEN): 

430 raise TOMLDecodeError( 

431 f"Cannot mutate immutable namespace {abs_key_parent}", src, pos 

432 ) 

433 

434 try: 

435 nest = out.data.get_or_create_nest(abs_key_parent) 

436 except KeyError: 

437 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None 

438 if key_stem in nest: 

439 raise TOMLDecodeError("Cannot overwrite a value", src, pos) 

440 # Mark inline table and array namespaces recursively immutable 

441 if isinstance(value, (dict, list)): 

442 out.flags.set(header + key, Flags.FROZEN, recursive=True) 

443 nest[key_stem] = value 

444 return pos 

445 

446 

447def parse_key_value_pair( 

448 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int 

449) -> tuple[Pos, Key, Any]: 

450 pos, key = parse_key(src, pos) 

451 try: 

452 char: str | None = src[pos] 

453 except IndexError: 

454 char = None 

455 if char != "=": 

456 raise TOMLDecodeError("Expected '=' after a key in a key/value pair", src, pos) 

457 pos += 1 

458 pos = skip_chars(src, pos, TOML_WS) 

459 pos, value = parse_value(src, pos, parse_float, nest_lvl) 

460 return pos, key, value 

461 

462 

463def parse_key(src: str, pos: Pos) -> tuple[Pos, Key]: 

464 pos, key_part = parse_key_part(src, pos) 

465 key: Key = (key_part,) 

466 pos = skip_chars(src, pos, TOML_WS) 

467 while True: 

468 try: 

469 char: str | None = src[pos] 

470 except IndexError: 

471 char = None 

472 if char != ".": 

473 return pos, key 

474 pos += 1 

475 pos = skip_chars(src, pos, TOML_WS) 

476 pos, key_part = parse_key_part(src, pos) 

477 key += (key_part,) 

478 pos = skip_chars(src, pos, TOML_WS) 

479 

480 

481def parse_key_part(src: str, pos: Pos) -> tuple[Pos, str]: 

482 try: 

483 char: str | None = src[pos] 

484 except IndexError: 

485 char = None 

486 if char in BARE_KEY_CHARS: 

487 start_pos = pos 

488 pos = skip_chars(src, pos, BARE_KEY_CHARS) 

489 return pos, src[start_pos:pos] 

490 if char == "'": 

491 return parse_literal_str(src, pos) 

492 if char == '"': 

493 return parse_one_line_basic_str(src, pos) 

494 raise TOMLDecodeError("Invalid initial character for a key part", src, pos) 

495 

496 

497def parse_one_line_basic_str(src: str, pos: Pos) -> tuple[Pos, str]: 

498 pos += 1 

499 return parse_basic_str(src, pos, multiline=False) 

500 

501 

502def parse_array( 

503 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int 

504) -> tuple[Pos, list[Any]]: 

505 pos += 1 

506 array: list[Any] = [] 

507 

508 pos = skip_comments_and_array_ws(src, pos) 

509 if src.startswith("]", pos): 

510 return pos + 1, array 

511 while True: 

512 pos, val = parse_value(src, pos, parse_float, nest_lvl) 

513 array.append(val) 

514 pos = skip_comments_and_array_ws(src, pos) 

515 

516 c = src[pos : pos + 1] 

517 if c == "]": 

518 return pos + 1, array 

519 if c != ",": 

520 raise TOMLDecodeError("Unclosed array", src, pos) 

521 pos += 1 

522 

523 pos = skip_comments_and_array_ws(src, pos) 

524 if src.startswith("]", pos): 

525 return pos + 1, array 

526 

527 

528def parse_inline_table( 

529 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int 

530) -> tuple[Pos, dict[str, Any]]: 

531 pos += 1 

532 nested_dict = NestedDict() 

533 flags = Flags() 

534 

535 pos = skip_chars(src, pos, TOML_WS) 

536 if src.startswith("}", pos): 

537 return pos + 1, nested_dict.dict 

538 while True: 

539 pos, key, value = parse_key_value_pair(src, pos, parse_float, nest_lvl) 

540 key_parent, key_stem = key[:-1], key[-1] 

541 if flags.is_(key, Flags.FROZEN): 

542 raise TOMLDecodeError(f"Cannot mutate immutable namespace {key}", src, pos) 

543 try: 

544 nest = nested_dict.get_or_create_nest(key_parent, access_lists=False) 

545 except KeyError: 

546 raise TOMLDecodeError("Cannot overwrite a value", src, pos) from None 

547 if key_stem in nest: 

548 raise TOMLDecodeError(f"Duplicate inline table key {key_stem!r}", src, pos) 

549 nest[key_stem] = value 

550 pos = skip_chars(src, pos, TOML_WS) 

551 c = src[pos : pos + 1] 

552 if c == "}": 

553 return pos + 1, nested_dict.dict 

554 if c != ",": 

555 raise TOMLDecodeError("Unclosed inline table", src, pos) 

556 if isinstance(value, (dict, list)): 

557 flags.set(key, Flags.FROZEN, recursive=True) 

558 pos += 1 

559 pos = skip_chars(src, pos, TOML_WS) 

560 

561 

562def parse_basic_str_escape( 

563 src: str, pos: Pos, *, multiline: bool = False 

564) -> tuple[Pos, str]: 

565 escape_id = src[pos : pos + 2] 

566 pos += 2 

567 if multiline and escape_id in {"\\ ", "\\\t", "\\\n"}: 

568 # Skip whitespace until next non-whitespace character or end of 

569 # the doc. Error if non-whitespace is found before newline. 

570 if escape_id != "\\\n": 

571 pos = skip_chars(src, pos, TOML_WS) 

572 try: 

573 char = src[pos] 

574 except IndexError: 

575 return pos, "" 

576 if char != "\n": 

577 raise TOMLDecodeError("Unescaped '\\' in a string", src, pos) 

578 pos += 1 

579 pos = skip_chars(src, pos, TOML_WS_AND_NEWLINE) 

580 return pos, "" 

581 if escape_id == "\\u": 

582 return parse_hex_char(src, pos, 4) 

583 if escape_id == "\\U": 

584 return parse_hex_char(src, pos, 8) 

585 try: 

586 return pos, BASIC_STR_ESCAPE_REPLACEMENTS[escape_id] 

587 except KeyError: 

588 raise TOMLDecodeError("Unescaped '\\' in a string", src, pos) from None 

589 

590 

591def parse_basic_str_escape_multiline(src: str, pos: Pos) -> tuple[Pos, str]: 

592 return parse_basic_str_escape(src, pos, multiline=True) 

593 

594 

595def parse_hex_char(src: str, pos: Pos, hex_len: int) -> tuple[Pos, str]: 

596 hex_str = src[pos : pos + hex_len] 

597 if len(hex_str) != hex_len or not HEXDIGIT_CHARS.issuperset(hex_str): 

598 raise TOMLDecodeError("Invalid hex value", src, pos) 

599 pos += hex_len 

600 hex_int = int(hex_str, 16) 

601 if not is_unicode_scalar_value(hex_int): 

602 raise TOMLDecodeError( 

603 "Escaped character is not a Unicode scalar value", src, pos 

604 ) 

605 return pos, chr(hex_int) 

606 

607 

608def parse_literal_str(src: str, pos: Pos) -> tuple[Pos, str]: 

609 pos += 1 # Skip starting apostrophe 

610 start_pos = pos 

611 pos = skip_until( 

612 src, pos, "'", error_on=ILLEGAL_LITERAL_STR_CHARS, error_on_eof=True 

613 ) 

614 return pos + 1, src[start_pos:pos] # Skip ending apostrophe 

615 

616 

617def parse_multiline_str(src: str, pos: Pos, *, literal: bool) -> tuple[Pos, str]: 

618 pos += 3 

619 if src.startswith("\n", pos): 

620 pos += 1 

621 

622 if literal: 

623 delim = "'" 

624 end_pos = skip_until( 

625 src, 

626 pos, 

627 "'''", 

628 error_on=ILLEGAL_MULTILINE_LITERAL_STR_CHARS, 

629 error_on_eof=True, 

630 ) 

631 result = src[pos:end_pos] 

632 pos = end_pos + 3 

633 else: 

634 delim = '"' 

635 pos, result = parse_basic_str(src, pos, multiline=True) 

636 

637 # Add at maximum two extra apostrophes/quotes if the end sequence 

638 # is 4 or 5 chars long instead of just 3. 

639 if not src.startswith(delim, pos): 

640 return pos, result 

641 pos += 1 

642 if not src.startswith(delim, pos): 

643 return pos, result + delim 

644 pos += 1 

645 return pos, result + (delim * 2) 

646 

647 

648def parse_basic_str(src: str, pos: Pos, *, multiline: bool) -> tuple[Pos, str]: 

649 if multiline: 

650 error_on = ILLEGAL_MULTILINE_BASIC_STR_CHARS 

651 parse_escapes = parse_basic_str_escape_multiline 

652 else: 

653 error_on = ILLEGAL_BASIC_STR_CHARS 

654 parse_escapes = parse_basic_str_escape 

655 result = "" 

656 start_pos = pos 

657 while True: 

658 try: 

659 char = src[pos] 

660 except IndexError: 

661 raise TOMLDecodeError("Unterminated string", src, pos) from None 

662 if char == '"': 

663 if not multiline: 

664 return pos + 1, result + src[start_pos:pos] 

665 if src.startswith('"""', pos): 

666 return pos + 3, result + src[start_pos:pos] 

667 pos += 1 

668 continue 

669 if char == "\\": 

670 result += src[start_pos:pos] 

671 pos, parsed_escape = parse_escapes(src, pos) 

672 result += parsed_escape 

673 start_pos = pos 

674 continue 

675 if char in error_on: 

676 raise TOMLDecodeError(f"Illegal character {char!r}", src, pos) 

677 pos += 1 

678 

679 

680def parse_value( 

681 src: str, pos: Pos, parse_float: ParseFloat, nest_lvl: int 

682) -> tuple[Pos, Any]: 

683 if nest_lvl > MAX_INLINE_NESTING: 

684 # Pure Python should have raised RecursionError already. 

685 # This ensures mypyc binaries eventually do the same. 

686 raise RecursionError( # pragma: no cover 

687 "TOML inline arrays/tables are nested more than the allowed" 

688 f" {MAX_INLINE_NESTING} levels" 

689 ) 

690 

691 try: 

692 char: str | None = src[pos] 

693 except IndexError: 

694 char = None 

695 

696 # IMPORTANT: order conditions based on speed of checking and likelihood 

697 

698 # Basic strings 

699 if char == '"': 

700 if src.startswith('"""', pos): 

701 return parse_multiline_str(src, pos, literal=False) 

702 return parse_one_line_basic_str(src, pos) 

703 

704 # Literal strings 

705 if char == "'": 

706 if src.startswith("'''", pos): 

707 return parse_multiline_str(src, pos, literal=True) 

708 return parse_literal_str(src, pos) 

709 

710 # Booleans 

711 if char == "t": 

712 if src.startswith("true", pos): 

713 return pos + 4, True 

714 if char == "f": 

715 if src.startswith("false", pos): 

716 return pos + 5, False 

717 

718 # Arrays 

719 if char == "[": 

720 return parse_array(src, pos, parse_float, nest_lvl + 1) 

721 

722 # Inline tables 

723 if char == "{": 

724 return parse_inline_table(src, pos, parse_float, nest_lvl + 1) 

725 

726 # Dates and times 

727 datetime_match = RE_DATETIME.match(src, pos) 

728 if datetime_match: 

729 try: 

730 datetime_obj = match_to_datetime(datetime_match) 

731 except ValueError as e: 

732 raise TOMLDecodeError("Invalid date or datetime", src, pos) from e 

733 return datetime_match.end(), datetime_obj 

734 localtime_match = RE_LOCALTIME.match(src, pos) 

735 if localtime_match: 

736 return localtime_match.end(), match_to_localtime(localtime_match) 

737 

738 # Integers and "normal" floats. 

739 # The regex will greedily match any type starting with a decimal 

740 # char, so needs to be located after handling of dates and times. 

741 number_match = RE_NUMBER.match(src, pos) 

742 if number_match: 

743 return number_match.end(), match_to_number(number_match, parse_float) 

744 

745 # Special floats 

746 first_three = src[pos : pos + 3] 

747 if first_three in {"inf", "nan"}: 

748 return pos + 3, parse_float(first_three) 

749 first_four = src[pos : pos + 4] 

750 if first_four in {"-inf", "+inf", "-nan", "+nan"}: 

751 return pos + 4, parse_float(first_four) 

752 

753 raise TOMLDecodeError("Invalid value", src, pos) 

754 

755 

756def is_unicode_scalar_value(codepoint: int) -> bool: 

757 return (0 <= codepoint <= 55295) or (57344 <= codepoint <= 1114111) 

758 

759 

760def make_safe_parse_float(parse_float: ParseFloat) -> ParseFloat: 

761 """A decorator to make `parse_float` safe. 

762 

763 `parse_float` must not return dicts or lists, because these types 

764 would be mixed with parsed TOML tables and arrays, thus confusing 

765 the parser. The returned decorated callable raises `ValueError` 

766 instead of returning illegal types. 

767 """ 

768 # The default `float` callable never returns illegal types. Optimize it. 

769 if parse_float is float: 

770 return float 

771 

772 def safe_parse_float(float_str: str) -> Any: 

773 float_value = parse_float(float_str) 

774 if isinstance(float_value, (dict, list)): 

775 raise ValueError("parse_float must not return dicts or lists") 

776 return float_value 

777 

778 return safe_parse_float