Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/json5/lib.py: 30%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

303 statements  

1# Copyright 2015 Google Inc. All rights reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15import enum 

16import math 

17import re 

18from typing import ( 

19 Any, 

20 Callable, 

21 IO, 

22 Iterable, 

23 Mapping, 

24 Optional, 

25 Set, 

26 Tuple, 

27 Type, 

28 Union, 

29) 

30import unicodedata 

31 

32from json5.parser import Parser 

33 

34 

35# Used when encoding keys, below. 

36_reserved_word_re: Optional[re.Pattern] = None 

37 

38 

39class QuoteStyle(enum.Enum): 

40 """Controls how strings will be quoted during encoding. 

41 

42 By default, for compatibility with the `json` module and older versions of 

43 `json5`, strings (not being used as keys and that are legal identifiers) 

44 will always be double-quoted, and any double quotes in the string will be 

45 escaped. This is `QuoteStyle.ALWAYS_DOUBLE`. If you pass 

46 `QuoteStyle.ALWAYS_SINGLE`, then strings will always be single-quoted, and 

47 any single quotes in the string will be escaped. If you pass 

48 `QuoteStyle.PREFER_DOUBLE`, then the behavior is the same as ALWAYS_DOUBLE 

49 and strings will be double-quoted *unless* the string contains more double 

50 quotes than single quotes, in which case the string will be single-quoted 

51 and single quotes will be escaped. If you pass `QuoteStyle.PREFER_SINGLE`, 

52 then the behavior is the same as ALWAYS_SINGLE and strings will be 

53 single-quoted *unless* the string contains more single quotes than double 

54 quotes, in which case the string will be double-quoted and any double 

55 quotes will be escaped. 

56 

57 *Note:* PREFER_DOUBLE and PREFER_SINGLE can impact performance, since in 

58 order to know which encoding to use you have to iterate over the entire 

59 string to count the number of single and double quotes. The codes guesses 

60 at an encoding while doing so, but if it guess wrong, the entire string has 

61 to be re-encoded, which will slow things down. If you are very concerned 

62 about performance (a) you probably shouldn't be using this library in the 

63 first place, because it just isn't very fast, and (b) you should use 

64 ALWAYS_DOUBLE or ALWAYS_SINGLE, which won't have this issue. 

65 """ 

66 

67 ALWAYS_DOUBLE = 'always_double' 

68 ALWAYS_SINGLE = 'always_single' 

69 PREFER_DOUBLE = 'prefer_double' 

70 PREFER_SINGLE = 'prefer_single' 

71 

72 

73def load( 

74 fp: IO, 

75 *, 

76 encoding: Optional[str] = None, 

77 cls: Any = None, 

78 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None, 

79 parse_float: Optional[Callable[[str], Any]] = None, 

80 parse_int: Optional[Callable[[str], Any]] = None, 

81 parse_constant: Optional[Callable[[str], Any]] = None, 

82 strict: bool = True, 

83 object_pairs_hook: Optional[ 

84 Callable[[Iterable[Tuple[str, Any]]], Any] 

85 ] = None, 

86 allow_duplicate_keys: bool = True, 

87 consume_trailing: bool = True, 

88 start: Optional[int] = None, 

89) -> Any: 

90 """Deserialize ``fp`` (a ``.read()``-supporting file-like object 

91 containing a JSON document) to a Python object. 

92 

93 Supports almost the same arguments as ``json.load()`` except that: 

94 - the `cls` keyword is ignored. 

95 - an extra `allow_duplicate_keys` parameter supports checking for 

96 duplicate keys in a object; by default, this is True for 

97 compatibility with ``json.load()``, but if set to False and 

98 the object contains duplicate keys, a ValueError will be raised. 

99 - an extra `consume_trailing` parameter specifies whether to 

100 consume any trailing characters after a valid object has been 

101 parsed. By default, this value is True and the only legal 

102 trailing characters are whitespace. If this value is set to False, 

103 parsing will stop when a valid object has been parsed and any 

104 trailing characters in the string will be ignored. 

105 - an extra `start` parameter specifies the zero-based offset into the 

106 file to start parsing at. If `start` is None, parsing will 

107 start at the current position in the file, and line number 

108 and column values will be reported as if starting from the 

109 beginning of the file; If `start` is not None, 

110 `load` will seek to zero and then read (and discard) the 

111 appropriate number of characters before beginning parsing; 

112 the file must be seekable for this to work correctly. 

113 

114 You can use `load(..., consume_trailing=False)` to repeatedly read 

115 values from a file. However, in the current implementation `load` does 

116 this by reading the entire file into memory before doing anything, so 

117 it is not very efficient. 

118 

119 Raises 

120 - `ValueError` if given an invalid document. This is different 

121 from the `json` module, which raises `json.JSONDecodeError`. 

122 - `UnicodeDecodeError` if given a byte string that is not a 

123 legal UTF-8 document (or the equivalent, if using a different 

124 `encoding`). This matches the `json` module. 

125 """ 

126 

127 s = fp.read() 

128 val, err, _ = parse( 

129 s, 

130 encoding=encoding, 

131 cls=cls, 

132 object_hook=object_hook, 

133 parse_float=parse_float, 

134 parse_int=parse_int, 

135 parse_constant=parse_constant, 

136 strict=strict, 

137 object_pairs_hook=object_pairs_hook, 

138 allow_duplicate_keys=allow_duplicate_keys, 

139 consume_trailing=consume_trailing, 

140 start=start, 

141 ) 

142 if err: 

143 raise ValueError(err) 

144 return val 

145 

146 

147def loads( 

148 s: str, 

149 *, 

150 encoding: Optional[str] = None, 

151 cls: Any = None, 

152 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None, 

153 parse_float: Optional[Callable[[str], Any]] = None, 

154 parse_int: Optional[Callable[[str], Any]] = None, 

155 parse_constant: Optional[Callable[[str], Any]] = None, 

156 strict: bool = True, 

157 object_pairs_hook: Optional[ 

158 Callable[[Iterable[Tuple[str, Any]]], Any] 

159 ] = None, 

160 allow_duplicate_keys: bool = True, 

161 consume_trailing: bool = True, 

162 start: Optional[int] = None, 

163): 

164 """Deserialize ``s`` (a string containing a JSON5 document) to a Python 

165 object. 

166 

167 Supports the same arguments as ``json.load()`` except that: 

168 - the `cls` keyword is ignored. 

169 - an extra `allow_duplicate_keys` parameter supports checking for 

170 duplicate keys in a object; by default, this is True for 

171 compatibility with ``json.load()``, but if set to False and 

172 the object contains duplicate keys, a ValueError will be raised. 

173 - an extra `consume_trailing` parameter specifies whether to 

174 consume any trailing characters after a valid object has been 

175 parsed. By default, this value is True and the only legal 

176 trailing characters are whitespace. If this value is set to False, 

177 parsing will stop when a valid object has been parsed and any 

178 trailing characters in the string will be ignored. 

179 - an extra `start` parameter specifies the zero-based offset into the 

180 string to start parsing at. 

181 

182 Raises 

183 - `ValueError` if given an invalid document. This is different 

184 from the `json` module, which raises `json.JSONDecodeError`. 

185 - `UnicodeDecodeError` if given a byte string that is not a 

186 legal UTF-8 document (or the equivalent, if using a different 

187 `encoding`). This matches the `json` module. 

188 """ 

189 

190 val, err, _ = parse( 

191 s=s, 

192 encoding=encoding, 

193 cls=cls, 

194 object_hook=object_hook, 

195 parse_float=parse_float, 

196 parse_int=parse_int, 

197 parse_constant=parse_constant, 

198 strict=strict, 

199 object_pairs_hook=object_pairs_hook, 

200 allow_duplicate_keys=allow_duplicate_keys, 

201 consume_trailing=consume_trailing, 

202 start=start, 

203 ) 

204 if err: 

205 raise ValueError(err) 

206 return val 

207 

208 

209def parse( 

210 s: str, 

211 *, 

212 encoding: Optional[str] = None, 

213 cls: Any = None, 

214 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None, 

215 parse_float: Optional[Callable[[str], Any]] = None, 

216 parse_int: Optional[Callable[[str], Any]] = None, 

217 parse_constant: Optional[Callable[[str], Any]] = None, 

218 strict: bool = True, 

219 object_pairs_hook: Optional[ 

220 Callable[[Iterable[Tuple[str, Any]]], Any] 

221 ] = None, 

222 allow_duplicate_keys: bool = True, 

223 consume_trailing: bool = True, 

224 start: Optional[int] = None, 

225): 

226 """Parse ```s``, returning positional information along with a value. 

227 

228 This works exactly like `loads()`, except that (a) it returns the 

229 position in the string where the parsing stopped (either due to 

230 hitting an error or parsing a valid value) and any error as a string, 

231 (b) it takes an optional `consume_trailing` parameter that says whether 

232 to keep parsing the string after a valid value has been parsed; if True 

233 (the default), any trailing characters must be whitespace. If False, 

234 parsing stops when a valid value has been reached, (c) it takes an 

235 optional `start` parameter that specifies a zero-based offset to start 

236 parsing from in the string, and (d) the return value is different, as 

237 described below. 

238 

239 `parse()` is useful if you have a string that might contain multiple 

240 values and you need to extract all of them; you can do so by repeatedly 

241 calling `parse`, setting `start` to the value returned in `position` 

242 from the previous call. 

243 

244 Returns a tuple of (value, error_string, position). If the string 

245 was a legal value, `value` will be the deserialized value, 

246 `error_string` will be `None`, and `position` will be one 

247 past the zero-based offset where the parser stopped reading. 

248 If the string was not a legal value, 

249 `value` will be `None`, `error_string` will be the string value 

250 of the exception that would've been raised, and `position` will 

251 be the zero-based farthest offset into the string where the parser 

252 hit an error. 

253 

254 Raises: 

255 - `UnicodeDecodeError` if given a byte string that is not a 

256 legal UTF-8 document (or the equivalent, if using a different 

257 `encoding`). This matches the `json` module. 

258 

259 Note that this does *not* raise a `ValueError`; instead any error is 

260 returned as the second value in the tuple. 

261 

262 You can use this method to read in a series of values from a string 

263 `s` as follows: 

264 

265 >>> import json5 

266 >>> s = '1 2 3 4' 

267 >>> values = [] 

268 >>> start = 0 

269 >>> while True: 

270 ... v, err, pos = json5.parse(s, start=start, consume_trailing=False) 

271 ... if v: 

272 ... values.append(v) 

273 ... start = pos 

274 ... if start == len(s) or s[start:].isspace(): 

275 ... # Reached the end of the string (ignoring trailing 

276 ... # whitespace 

277 ... break 

278 ... continue 

279 ... raise ValueError(err) 

280 >>> values 

281 [1, 2, 3, 4] 

282 

283 """ 

284 assert cls is None, 'Custom decoders are not supported' 

285 

286 if isinstance(s, bytes): 

287 encoding = encoding or 'utf-8' 

288 s = s.decode(encoding) 

289 

290 if not s: 

291 raise ValueError('Empty strings are not legal JSON5') 

292 start = start or 0 

293 parser = Parser(s, '<string>', pos=start) 

294 ast, err, pos = parser.parse( 

295 global_vars={'_strict': strict, '_consume_trailing': consume_trailing} 

296 ) 

297 if err: 

298 return None, err, pos 

299 

300 try: 

301 value = _convert( 

302 ast, 

303 object_hook=object_hook, 

304 parse_float=parse_float, 

305 parse_int=parse_int, 

306 parse_constant=parse_constant, 

307 object_pairs_hook=object_pairs_hook, 

308 allow_duplicate_keys=allow_duplicate_keys, 

309 ) 

310 return value, None, pos 

311 except ValueError as e: 

312 return None, str(e), pos 

313 

314 

315def _convert( 

316 ast, 

317 object_hook, 

318 parse_float, 

319 parse_int, 

320 parse_constant, 

321 object_pairs_hook, 

322 allow_duplicate_keys, 

323): 

324 def _fp_constant_parser(s): 

325 return float(s.replace('Infinity', 'inf').replace('NaN', 'nan')) 

326 

327 def _dictify(pairs): 

328 if not allow_duplicate_keys: 

329 keys = set() 

330 for key, _ in pairs: 

331 if key in keys: 

332 raise ValueError(f'Duplicate key "{key}" found in object') 

333 keys.add(key) 

334 

335 if object_pairs_hook: 

336 return object_pairs_hook(pairs) 

337 if object_hook: 

338 return object_hook(dict(pairs)) 

339 return dict(pairs) 

340 

341 parse_float = parse_float or float 

342 parse_int = parse_int or int 

343 parse_constant = parse_constant or _fp_constant_parser 

344 

345 return _walk_ast(ast, _dictify, parse_float, parse_int, parse_constant) 

346 

347 

348def _walk_ast( 

349 el, 

350 dictify: Callable[[Iterable[Tuple[str, Any]]], Any], 

351 parse_float, 

352 parse_int, 

353 parse_constant, 

354): 

355 if el == 'None': 

356 return None 

357 if el == 'True': 

358 return True 

359 if el == 'False': 

360 return False 

361 ty, v = el 

362 if ty == 'number': 

363 if v.startswith('0x') or v.startswith('0X'): 

364 return parse_int(v, base=16) 

365 if '.' in v or 'e' in v or 'E' in v: 

366 return parse_float(v) 

367 if 'Infinity' in v or 'NaN' in v: 

368 return parse_constant(v) 

369 return parse_int(v) 

370 if ty == 'string': 

371 return v 

372 if ty == 'object': 

373 pairs = [] 

374 for key, val_expr in v: 

375 val = _walk_ast( 

376 val_expr, dictify, parse_float, parse_int, parse_constant 

377 ) 

378 pairs.append((key, val)) 

379 return dictify(pairs) 

380 if ty == 'array': 

381 return [ 

382 _walk_ast(el, dictify, parse_float, parse_int, parse_constant) 

383 for el in v 

384 ] 

385 raise ValueError('unknown el: ' + el) # pragma: no cover 

386 

387 

388def dump( 

389 obj: Any, 

390 fp: IO, 

391 *, 

392 skipkeys: bool = False, 

393 ensure_ascii: bool = True, 

394 check_circular: bool = True, 

395 allow_nan: bool = True, 

396 cls: Optional[Type['JSON5Encoder']] = None, 

397 indent: Optional[Union[int, str]] = None, 

398 separators: Optional[Tuple[str, str]] = None, 

399 default: Optional[Callable[[Any], Any]] = None, 

400 sort_keys: bool = False, 

401 quote_keys: bool = False, 

402 trailing_commas: bool = True, 

403 allow_duplicate_keys: bool = True, 

404 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE, 

405 **kw, 

406): 

407 """Serialize ``obj`` to a JSON5-formatted stream to ``fp``, 

408 a ``.write()``-supporting file-like object. 

409 

410 Supports the same arguments as ``dumps()``, below. 

411 

412 Calling ``dump(obj, fp, quote_keys=True, trailing_commas=False, \ 

413 allow_duplicate_keys=True)`` 

414 should produce exactly the same output as ``json.dump(obj, fp).`` 

415 """ 

416 

417 fp.write( 

418 dumps( 

419 obj=obj, 

420 skipkeys=skipkeys, 

421 ensure_ascii=ensure_ascii, 

422 check_circular=check_circular, 

423 allow_nan=allow_nan, 

424 cls=cls, 

425 indent=indent, 

426 separators=separators, 

427 default=default, 

428 sort_keys=sort_keys, 

429 quote_keys=quote_keys, 

430 trailing_commas=trailing_commas, 

431 allow_duplicate_keys=allow_duplicate_keys, 

432 quote_style=quote_style, 

433 **kw, 

434 ) 

435 ) 

436 

437 

438def dumps( 

439 obj: Any, 

440 *, 

441 skipkeys: bool = False, 

442 ensure_ascii: bool = True, 

443 check_circular: bool = True, 

444 allow_nan: bool = True, 

445 cls: Optional[Type['JSON5Encoder']] = None, 

446 indent: Optional[Union[int, str]] = None, 

447 separators: Optional[Tuple[str, str]] = None, 

448 default: Optional[Callable[[Any], Any]] = None, 

449 sort_keys: bool = False, 

450 quote_keys: bool = False, 

451 trailing_commas: bool = True, 

452 allow_duplicate_keys: bool = True, 

453 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE, 

454 **kw, 

455): 

456 """Serialize ``obj`` to a JSON5-formatted string. 

457 

458 Supports the same arguments as ``json.dumps()``, except that: 

459 

460 - The ``encoding`` keyword is ignored; Unicode strings are always written. 

461 - By default, object keys that are legal identifiers are not quoted; if you 

462 pass ``quote_keys=True``, they will be. 

463 - By default, if lists and objects span multiple lines of output (i.e., 

464 when ``indent`` >=0), the last item will have a trailing comma after it. 

465 If you pass ``trailing_commas=False``, it will not. 

466 - If you use a number, a boolean, or ``None`` as a key value in a dict, it 

467 will be converted to the corresponding JSON string value, e.g. "1", 

468 "true", or "null". By default, ``dump()`` will match the `json` modules 

469 behavior and produce malformed JSON if you mix keys of different types 

470 that have the same converted value; e.g., ``{1: "foo", "1": "bar"}`` 

471 produces '{"1": "foo", "1": "bar"}', an object with duplicated keys. If 

472 you pass ``allow_duplicate_keys=False``, an exception will be raised 

473 instead. 

474 - If `quote_keys` is true, then keys of objects will be enclosed in quotes, 

475 as in regular JSON. Otheriwse, keys will not be enclosed in quotes unless 

476 they contain whitespace. 

477 - If `trailing_commas` is false, then commas will not be inserted after the 

478 final elements of objects and arrays, as in regular JSON. Otherwise, 

479 such commas will be inserted. 

480 - If `allow_duplicate_keys` is false, then only the last entry with a given 

481 key will be written. Otherwise, all entries with the same key will be 

482 written. 

483 - `quote_style` controls how strings are encoded. See the documentation 

484 for the `QuoteStyle` class, above, for how this is used. 

485 

486 *Note*: Strings that are being used as unquoted keys are not affected 

487 by this parameter and remain unquoted. 

488 

489 *`quote_style` was added in version 0.10.0*. 

490 

491 Other keyword arguments are allowed and will be passed to the 

492 encoder so custom encoders can get them, but otherwise they will 

493 be ignored in an attempt to provide some amount of forward-compatibility. 

494 

495 *Note:* the standard JSON module explicitly calls `int.__repr(obj)__` 

496 and `float.__repr(obj)__` to encode ints and floats, thereby bypassing 

497 any custom representations you might have for objects that are subclasses 

498 of ints and floats, and, for compatibility, JSON5 does the same thing. 

499 To override this behavior, create a subclass of JSON5Encoder 

500 that overrides `encode()` and handles your custom representation. 

501 

502 For example: 

503 

504 ``` 

505 >>> import json5 

506 >>> from typing import Any, Set 

507 >>> 

508 >>> class Hex(int): 

509 ... def __repr__(self): 

510 ... return hex(self) 

511 >>> 

512 >>> class CustomEncoder(json5.JSON5Encoder): 

513 ... def encode( 

514 ... self, obj: Any, seen: Set, level: int, *, as_key: bool 

515 ... ) -> str: 

516 ... if isinstance(obj, Hex): 

517 ... return repr(obj) 

518 ... return super().encode(obj, seen, level, as_key=as_key) 

519 ... 

520 >>> json5.dumps([20, Hex(20)], cls=CustomEncoder) 

521 '[20, 0x14]' 

522 

523 ``` 

524 

525 *Note:* calling ``dumps(obj, quote_keys=True, trailing_commas=False, \ 

526 allow_duplicate_keys=True)`` 

527 should produce exactly the same output as ``json.dumps(obj).`` 

528 """ 

529 

530 cls = cls or JSON5Encoder 

531 enc = cls( 

532 skipkeys=skipkeys, 

533 ensure_ascii=ensure_ascii, 

534 check_circular=check_circular, 

535 allow_nan=allow_nan, 

536 indent=indent, 

537 separators=separators, 

538 default=default, 

539 sort_keys=sort_keys, 

540 quote_keys=quote_keys, 

541 trailing_commas=trailing_commas, 

542 allow_duplicate_keys=allow_duplicate_keys, 

543 quote_style=quote_style, 

544 **kw, 

545 ) 

546 return enc.encode(obj, seen=set(), level=0, as_key=False) 

547 

548 

549class JSON5Encoder: 

550 def __init__( 

551 self, 

552 *, 

553 skipkeys: bool = False, 

554 ensure_ascii: bool = True, 

555 check_circular: bool = True, 

556 allow_nan: bool = True, 

557 indent: Optional[Union[int, str]] = None, 

558 separators: Optional[Tuple[str, str]] = None, 

559 default: Optional[Callable[[Any], Any]] = None, 

560 sort_keys: bool = False, 

561 quote_keys: bool = False, 

562 trailing_commas: bool = True, 

563 allow_duplicate_keys: bool = True, 

564 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE, 

565 **kw, 

566 ): 

567 """Provides a class that may be overridden to customize the behavior 

568 of `dumps()`. The keyword args are the same as for that function. 

569 *Added in version 0.10.0""" 

570 # Ignore unrecognized keyword arguments in the hope of providing 

571 # some level of backwards- and forwards-compatibility. 

572 del kw 

573 

574 self.skipkeys = skipkeys 

575 self.ensure_ascii = ensure_ascii 

576 self.check_circular = check_circular 

577 self.allow_nan = allow_nan 

578 self.indent = indent 

579 self.separators = separators 

580 if separators is None: 

581 separators = (', ', ': ') if indent is None else (',', ': ') 

582 self.item_separator, self.kv_separator = separators 

583 self.default_fn = default or _raise_type_error 

584 self.sort_keys = sort_keys 

585 self.quote_keys = quote_keys 

586 self.trailing_commas = trailing_commas 

587 self.allow_duplicate_keys = allow_duplicate_keys 

588 self.quote_style = quote_style 

589 

590 def default(self, obj: Any) -> Any: 

591 """Provides a last-ditch option to encode a value that the encoder 

592 doesn't otherwise recognize, by converting `obj` to a value that 

593 *can* (and will) be serialized by the other methods in the class. 

594 

595 Note: this must not return a serialized value (i.e., string) 

596 directly, as that'll result in a doubly-encoded value.""" 

597 return self.default_fn(obj) 

598 

599 def encode( 

600 self, 

601 obj: Any, 

602 seen: Set, 

603 level: int, 

604 *, 

605 as_key: bool, 

606 ) -> str: 

607 """Returns an JSON5-encoded version of an arbitrary object. This can 

608 be used to provide customized serialization of objects. Overridden 

609 methods of this class should handle their custom objects and then 

610 fall back to super.encode() if they've been passed a normal object. 

611 

612 `seen` is used for duplicate object tracking when `check_circular` 

613 is True. 

614 

615 `level` represents the current indentation level, which increases 

616 by one for each recursive invocation of encode (i.e., whenever 

617 we're encoding the values of a dict or a list). 

618 

619 May raise `TypeError` if the object is the wrong type to be 

620 encoded (i.e., your custom routine can't handle it either), and 

621 `ValueError` if there's something wrong with the value, e.g. 

622 a float value of NaN when `allow_nan` is false. 

623 

624 If `as_key` is true, the return value should be a double-quoted string 

625 representation of the object, unless obj is a string that can be an 

626 identifier (and quote_keys is false and obj isn't a reserved word). 

627 If the object should not be used as a key, `TypeError` should be 

628 raised; that allows the base implementation to implement `skipkeys` 

629 properly. 

630 """ 

631 seen = seen or set() 

632 s = self._encode_basic_type(obj, as_key=as_key) 

633 if s is not None: 

634 return s 

635 

636 if as_key: 

637 raise TypeError(f'Invalid key f{obj}') 

638 return self._encode_non_basic_type(obj, seen, level) 

639 

640 def _encode_basic_type(self, obj: Any, *, as_key: bool) -> Optional[str]: 

641 """Returns None if the object is not a basic type.""" 

642 

643 if isinstance(obj, str): 

644 return self._encode_str(obj, as_key=as_key) 

645 

646 # Check for True/False before ints because True and False are 

647 # also considered ints and so would be represented as 1 and 0 

648 # if we did ints first. 

649 if obj is True: 

650 return '"true"' if as_key else 'true' 

651 if obj is False: 

652 return '"false"' if as_key else 'false' 

653 if obj is None: 

654 return '"null"' if as_key else 'null' 

655 

656 if isinstance(obj, int): 

657 return self._encode_int(obj, as_key=as_key) 

658 

659 if isinstance(obj, float): 

660 return self._encode_float(obj, as_key=as_key) 

661 

662 return None 

663 

664 def _encode_int(self, obj: int, *, as_key: bool) -> str: 

665 s = int.__repr__(obj) 

666 return f'"{s}"' if as_key else s 

667 

668 def _encode_float(self, obj: float, *, as_key: bool) -> str: 

669 if obj == float('inf'): 

670 allowed = self.allow_nan 

671 s = 'Infinity' 

672 elif obj == float('-inf'): 

673 allowed = self.allow_nan 

674 s = '-Infinity' 

675 elif math.isnan(obj): 

676 allowed = self.allow_nan 

677 s = 'NaN' 

678 else: 

679 allowed = True 

680 s = float.__repr__(obj) 

681 

682 if not allowed: 

683 raise ValueError('Illegal JSON5 value: f{obj}') 

684 return f'"{s}"' if as_key else s 

685 

686 def _encode_str(self, obj: str, *, as_key: bool) -> str: 

687 if ( 

688 as_key 

689 and self.is_identifier(obj) 

690 and not self.quote_keys 

691 and not self.is_reserved_word(obj) 

692 ): 

693 return obj 

694 

695 return self._encode_quoted_str(obj, self.quote_style) 

696 

697 def _encode_quoted_str(self, obj: str, quote_style: QuoteStyle) -> str: 

698 """Returns a quoted string with a minimal number of escaped quotes.""" 

699 ret = [] 

700 double_quotes_seen = 0 

701 single_quotes_seen = 0 

702 sq = "'" 

703 dq = '"' 

704 for ch in obj: 

705 if ch == dq: 

706 # At first we will guess at which quotes to escape. If 

707 # we guess wrong, we reencode the string below. 

708 double_quotes_seen += 1 

709 if quote_style in ( 

710 QuoteStyle.ALWAYS_DOUBLE, 

711 QuoteStyle.PREFER_DOUBLE, 

712 ): 

713 encoded_ch = self._escape_ch(dq) 

714 else: 

715 encoded_ch = dq 

716 elif ch == sq: 

717 single_quotes_seen += 1 

718 if quote_style in ( 

719 QuoteStyle.ALWAYS_SINGLE, 

720 QuoteStyle.PREFER_SINGLE, 

721 ): 

722 encoded_ch = self._escape_ch(sq) 

723 else: 

724 encoded_ch = sq 

725 elif ch == '\\': 

726 encoded_ch = self._escape_ch(ch) 

727 else: 

728 o = ord(ch) 

729 if o < 32: 

730 encoded_ch = self._escape_ch(ch) 

731 elif o < 128: 

732 encoded_ch = ch 

733 elif not self.ensure_ascii and ch not in ('\u2028', '\u2029'): 

734 encoded_ch = ch 

735 else: 

736 encoded_ch = self._escape_ch(ch) 

737 ret.append(encoded_ch) 

738 

739 # We may have guessed wrong and need to reencode the string. 

740 if ( 

741 double_quotes_seen > single_quotes_seen 

742 and quote_style == QuoteStyle.PREFER_DOUBLE 

743 ): 

744 return self._encode_quoted_str(obj, QuoteStyle.ALWAYS_SINGLE) 

745 if ( 

746 single_quotes_seen > double_quotes_seen 

747 and quote_style == QuoteStyle.PREFER_SINGLE 

748 ): 

749 return self._encode_quoted_str(obj, QuoteStyle.ALWAYS_DOUBLE) 

750 

751 if quote_style in (QuoteStyle.ALWAYS_DOUBLE, QuoteStyle.PREFER_DOUBLE): 

752 return '"' + ''.join(ret) + '"' 

753 return "'" + ''.join(ret) + "'" 

754 

755 def _escape_ch(self, ch: str) -> str: 

756 """Returns the backslash-escaped representation of the char.""" 

757 if ch == '\\': 

758 return '\\\\' 

759 if ch == "'": 

760 return r'\'' 

761 if ch == '"': 

762 return r'\"' 

763 if ch == '\n': 

764 return r'\n' 

765 if ch == '\r': 

766 return r'\r' 

767 if ch == '\t': 

768 return r'\t' 

769 if ch == '\b': 

770 return r'\b' 

771 if ch == '\f': 

772 return r'\f' 

773 if ch == '\v': 

774 return r'\v' 

775 if ch == '\0': 

776 return r'\0' 

777 

778 o = ord(ch) 

779 if o < 65536: 

780 return rf'\u{o:04x}' 

781 

782 val = o - 0x10000 

783 high = 0xD800 + (val >> 10) 

784 low = 0xDC00 + (val & 0x3FF) 

785 return rf'\u{high:04x}\u{low:04x}' 

786 

787 def _encode_non_basic_type(self, obj, seen: Set, level: int) -> str: 

788 # Basic types can't be recursive so we only check for circularity 

789 # on non-basic types. If for some reason the caller was using a 

790 # subclass of a basic type and wanted to check circularity on it, 

791 # it'd have to do so directly in a subclass of JSON5Encoder. 

792 if self.check_circular: 

793 i = id(obj) 

794 if i in seen: 

795 raise ValueError('Circular reference detected.') 

796 seen.add(i) 

797 

798 # Ideally we'd use collections.abc.Mapping and collections.abc.Sequence 

799 # here, but for backwards-compatibility with potential old callers, 

800 # we only check for the two attributes we need in each case. 

801 if hasattr(obj, 'keys') and hasattr(obj, '__getitem__'): 

802 s = self._encode_dict(obj, seen, level + 1) 

803 elif hasattr(obj, '__getitem__') and hasattr(obj, '__iter__'): 

804 s = self._encode_array(obj, seen, level + 1) 

805 else: 

806 s = self.encode(self.default(obj), seen, level + 1, as_key=False) 

807 assert s is not None 

808 

809 if self.check_circular: 

810 seen.remove(i) 

811 return s 

812 

813 def _encode_dict(self, obj: Any, seen: set, level: int) -> str: 

814 if not obj: 

815 return '{}' 

816 

817 indent_str, end_str = self._spacers(level) 

818 item_sep = self.item_separator + indent_str 

819 kv_sep = self.kv_separator 

820 

821 if self.sort_keys: 

822 keys = sorted(obj.keys()) 

823 else: 

824 keys = obj.keys() 

825 

826 s = '{' + indent_str 

827 

828 first_key = True 

829 new_keys = set() 

830 for key in keys: 

831 try: 

832 key_str = self.encode(key, seen, level, as_key=True) 

833 except TypeError: 

834 if self.skipkeys: 

835 continue 

836 raise 

837 

838 if not self.allow_duplicate_keys: 

839 if key_str in new_keys: 

840 raise ValueError(f'duplicate key {repr(key)}') 

841 new_keys.add(key_str) 

842 

843 if first_key: 

844 first_key = False 

845 else: 

846 s += item_sep 

847 

848 val_str = self.encode(obj[key], seen, level, as_key=False) 

849 s += key_str + kv_sep + val_str 

850 

851 s += end_str + '}' 

852 return s 

853 

854 def _encode_array(self, obj: Any, seen: Set, level: int) -> str: 

855 if not obj: 

856 return '[]' 

857 

858 indent_str, end_str = self._spacers(level) 

859 item_sep = self.item_separator + indent_str 

860 return ( 

861 '[' 

862 + indent_str 

863 + item_sep.join( 

864 self.encode(el, seen, level, as_key=False) for el in obj 

865 ) 

866 + end_str 

867 + ']' 

868 ) 

869 

870 def _spacers(self, level: int) -> Tuple[str, str]: 

871 if self.indent is not None: 

872 end_str = '' 

873 if self.trailing_commas: 

874 end_str = ',' 

875 if isinstance(self.indent, int): 

876 if self.indent > 0: 

877 indent_str = '\n' + ' ' * self.indent * level 

878 end_str += '\n' + ' ' * self.indent * (level - 1) 

879 else: 

880 indent_str = '\n' 

881 end_str += '\n' 

882 else: 

883 indent_str = '\n' + self.indent * level 

884 end_str += '\n' + self.indent * (level - 1) 

885 else: 

886 indent_str = '' 

887 end_str = '' 

888 return indent_str, end_str 

889 

890 def is_identifier(self, key: str) -> bool: 

891 """Returns whether the string could be used as a legal 

892 EcmaScript/JavaScript identifier. 

893 

894 There should normally be no reason to override this, unless 

895 the definition of identifiers change in later versions of the 

896 JSON5 spec and this implementation hasn't been updated to handle 

897 the changes yet.""" 

898 if ( 

899 not key 

900 or not self._is_id_start(key[0]) 

901 and key[0] not in ('$', '_') 

902 ): 

903 return False 

904 for ch in key[1:]: 

905 if not self._is_id_continue(ch) and ch not in ('$', '_'): 

906 return False 

907 return True 

908 

909 def _is_id_start(self, ch: str) -> bool: 

910 return unicodedata.category(ch) in ( 

911 'Lu', 

912 'Ll', 

913 'Li', 

914 'Lt', 

915 'Lm', 

916 'Lo', 

917 'Nl', 

918 ) 

919 

920 def _is_id_continue(self, ch: str) -> bool: 

921 return unicodedata.category(ch) in ( 

922 'Lu', 

923 'Ll', 

924 'Li', 

925 'Lt', 

926 'Lm', 

927 'Lo', 

928 'Nl', 

929 'Nd', 

930 'Mn', 

931 'Mc', 

932 'Pc', 

933 ) 

934 

935 def is_reserved_word(self, key: str) -> bool: 

936 """Returns whether the key is a reserved word. 

937 

938 There should normally be no need to override this, unless there 

939 have been reserved words added in later versions of the JSON5 

940 spec and this implementation has not yet been updated to handle 

941 the changes yet.""" 

942 global _reserved_word_re 

943 if _reserved_word_re is None: 

944 # List taken from section 7.6.1 of ECMA-262, version 5.1. 

945 # https://262.ecma-international.org/5.1/#sec-7.6.1. 

946 # This includes currently reserved words, words reserved 

947 # for future use (both as of 5.1), null, true, and false. 

948 _reserved_word_re = re.compile( 

949 '(' 

950 + '|'.join( 

951 [ 

952 'break', 

953 'case', 

954 'catch', 

955 'class', 

956 'const', 

957 'continue', 

958 'debugger', 

959 'default', 

960 'delete', 

961 'do', 

962 'else', 

963 'enum', 

964 'export', 

965 'extends', 

966 'false', 

967 'finally', 

968 'for', 

969 'function', 

970 'if', 

971 'implements', 

972 'import', 

973 'in', 

974 'instanceof', 

975 'interface', 

976 'let', 

977 'new', 

978 'null', 

979 'package', 

980 'private', 

981 'protected', 

982 'public', 

983 'return', 

984 'static', 

985 'super', 

986 'switch', 

987 'this', 

988 'throw', 

989 'true', 

990 'try', 

991 'typeof', 

992 'var', 

993 'void', 

994 'while', 

995 'with', 

996 'yield', 

997 ] 

998 ) 

999 + ')$' 

1000 ) 

1001 return _reserved_word_re.match(key) is not None 

1002 

1003 

1004def _raise_type_error(obj) -> Any: 

1005 raise TypeError(f'{repr(obj)} is not JSON5 serializable')