Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/json5/lib.py: 31%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

304 statements  

1# Copyright 2015 Google Inc. All rights reserved. 

2# 

3# Licensed under the Apache License, Version 2.0 (the "License"); 

4# you may not use this file except in compliance with the License. 

5# You may obtain a copy of the License at 

6# 

7# http://www.apache.org/licenses/LICENSE-2.0 

8# 

9# Unless required by applicable law or agreed to in writing, software 

10# distributed under the License is distributed on an "AS IS" BASIS, 

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 

12# See the License for the specific language governing permissions and 

13# limitations under the License. 

14 

15import enum 

16import math 

17import re 

18from typing import ( 

19 Any, 

20 Callable, 

21 IO, 

22 Iterable, 

23 Mapping, 

24 Optional, 

25 Set, 

26 Tuple, 

27 Type, 

28 Union, 

29) 

30import unicodedata 

31 

32from json5.parser import Parser 

33 

34 

35# Used when encoding keys, below. 

36_reserved_word_re: Optional[re.Pattern] = None 

37 

38 

39class QuoteStyle(enum.Enum): 

40 """Controls how strings will be quoted during encoding. 

41 

42 By default, for compatibility with the `json` module and older versions of 

43 `json5`, strings (not being used as keys and that are legal identifiers) 

44 will always be double-quoted, and any double quotes in the string will be 

45 escaped. This is `QuoteStyle.ALWAYS_DOUBLE`. If you pass 

46 `QuoteStyle.ALWAYS_SINGLE`, then strings will always be single-quoted, and 

47 any single quotes in the string will be escaped. If you pass 

48 `QuoteStyle.PREFER_DOUBLE`, then the behavior is the same as ALWAYS_DOUBLE 

49 and strings will be double-quoted *unless* the string contains more double 

50 quotes than single quotes, in which case the string will be single-quoted 

51 and single quotes will be escaped. If you pass `QuoteStyle.PREFER_SINGLE`, 

52 then the behavior is the same as ALWAYS_SINGLE and strings will be 

53 single-quoted *unless* the string contains more single quotes than double 

54 quotes, in which case the string will be double-quoted and any double 

55 quotes will be escaped. 

56 

57 *Note:* PREFER_DOUBLE and PREFER_SINGLE can impact performance, since in 

58 order to know which encoding to use you have to iterate over the entire 

59 string to count the number of single and double quotes. The codes guesses 

60 at an encoding while doing so, but if it guess wrong, the entire string has 

61 to be re-encoded, which will slow things down. If you are very concerned 

62 about performance (a) you probably shouldn't be using this library in the 

63 first place, because it just isn't very fast, and (b) you should use 

64 ALWAYS_DOUBLE or ALWAYS_SINGLE, which won't have this issue. 

65 """ 

66 

67 ALWAYS_DOUBLE = 'always_double' 

68 ALWAYS_SINGLE = 'always_single' 

69 PREFER_DOUBLE = 'prefer_double' 

70 PREFER_SINGLE = 'prefer_single' 

71 

72 

73def load( 

74 fp: IO, 

75 *, 

76 encoding: Optional[str] = None, 

77 cls: Any = None, 

78 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None, 

79 parse_float: Optional[Callable[[str], Any]] = None, 

80 parse_int: Optional[Callable[[str], Any]] = None, 

81 parse_constant: Optional[Callable[[str], Any]] = None, 

82 strict: bool = True, 

83 object_pairs_hook: Optional[ 

84 Callable[[Iterable[Tuple[str, Any]]], Any] 

85 ] = None, 

86 allow_duplicate_keys: bool = True, 

87 consume_trailing: bool = True, 

88 start: Optional[int] = None, 

89) -> Any: 

90 """Deserialize ``fp`` (a ``.read()``-supporting file-like object 

91 containing a JSON document) to a Python object. 

92 

93 Supports almost the same arguments as ``json.load()`` except that: 

94 - the `cls` keyword is ignored. 

95 - an extra `allow_duplicate_keys` parameter supports checking for 

96 duplicate keys in a object; by default, this is True for 

97 compatibility with ``json.load()``, but if set to False and 

98 the object contains duplicate keys, a ValueError will be raised. 

99 - an extra `consume_trailing` parameter specifies whether to 

100 consume any trailing characters after a valid object has been 

101 parsed. By default, this value is True and the only legal 

102 trailing characters are whitespace. If this value is set to False, 

103 parsing will stop when a valid object has been parsed and any 

104 trailing characters in the string will be ignored. 

105 - an extra `start` parameter specifies the zero-based offset into the 

106 file to start parsing at. If `start` is None, parsing will 

107 start at the current position in the file, and line number 

108 and column values will be reported as if starting from the 

109 beginning of the file; If `start` is not None, 

110 `load` will seek to zero and then read (and discard) the 

111 appropriate number of characters before beginning parsing; 

112 the file must be seekable for this to work correctly. 

113 

114 You can use `load(..., consume_trailing=False)` to repeatedly read 

115 values from a file. However, in the current implementation `load` does 

116 this by reading the entire file into memory before doing anything, so 

117 it is not very efficient. 

118 

119 Raises 

120 - `ValueError` if given an invalid document. This is different 

121 from the `json` module, which raises `json.JSONDecodeError`. 

122 - `UnicodeDecodeError` if given a byte string that is not a 

123 legal UTF-8 document (or the equivalent, if using a different 

124 `encoding`). This matches the `json` module. 

125 """ 

126 

127 s = fp.read() 

128 val, err, _ = parse( 

129 s, 

130 encoding=encoding, 

131 cls=cls, 

132 object_hook=object_hook, 

133 parse_float=parse_float, 

134 parse_int=parse_int, 

135 parse_constant=parse_constant, 

136 strict=strict, 

137 object_pairs_hook=object_pairs_hook, 

138 allow_duplicate_keys=allow_duplicate_keys, 

139 consume_trailing=consume_trailing, 

140 start=start, 

141 ) 

142 if err: 

143 raise ValueError(err) 

144 return val 

145 

146 

147def loads( 

148 s: str, 

149 *, 

150 encoding: Optional[str] = None, 

151 cls: Any = None, 

152 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None, 

153 parse_float: Optional[Callable[[str], Any]] = None, 

154 parse_int: Optional[Callable[[str], Any]] = None, 

155 parse_constant: Optional[Callable[[str], Any]] = None, 

156 strict: bool = True, 

157 object_pairs_hook: Optional[ 

158 Callable[[Iterable[Tuple[str, Any]]], Any] 

159 ] = None, 

160 allow_duplicate_keys: bool = True, 

161 consume_trailing: bool = True, 

162 start: Optional[int] = None, 

163) -> Any: 

164 """Deserialize ``s`` (a string containing a JSON5 document) to a Python 

165 object. 

166 

167 Supports the same arguments as ``json.loads()`` except that: 

168 - the `cls` keyword is ignored. 

169 - an extra `allow_duplicate_keys` parameter supports checking for 

170 duplicate keys in a object; by default, this is True for 

171 compatibility with ``json.load()``, but if set to False and 

172 the object contains duplicate keys, a ValueError will be raised. 

173 - an extra `consume_trailing` parameter specifies whether to 

174 consume any trailing characters after a valid object has been 

175 parsed. By default, this value is True and the only legal 

176 trailing characters are whitespace. If this value is set to False, 

177 parsing will stop when a valid object has been parsed and any 

178 trailing characters in the string will be ignored. 

179 - an extra `start` parameter specifies the zero-based offset into the 

180 string to start parsing at. 

181 

182 Raises 

183 - `ValueError` if given an invalid document. This is different 

184 from the `json` module, which raises `json.JSONDecodeError`. 

185 - `UnicodeDecodeError` if given a byte string that is not a 

186 legal UTF-8 document (or the equivalent, if using a different 

187 `encoding`). This matches the `json` module. 

188 """ 

189 

190 val, err, _ = parse( 

191 s=s, 

192 encoding=encoding, 

193 cls=cls, 

194 object_hook=object_hook, 

195 parse_float=parse_float, 

196 parse_int=parse_int, 

197 parse_constant=parse_constant, 

198 strict=strict, 

199 object_pairs_hook=object_pairs_hook, 

200 allow_duplicate_keys=allow_duplicate_keys, 

201 consume_trailing=consume_trailing, 

202 start=start, 

203 ) 

204 if err: 

205 raise ValueError(err) 

206 return val 

207 

208 

209def parse( 

210 s: str, 

211 *, 

212 encoding: Optional[str] = None, 

213 cls: Any = None, 

214 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None, 

215 parse_float: Optional[Callable[[str], Any]] = None, 

216 parse_int: Optional[Callable[[str], Any]] = None, 

217 parse_constant: Optional[Callable[[str], Any]] = None, 

218 strict: bool = True, 

219 object_pairs_hook: Optional[ 

220 Callable[[Iterable[Tuple[str, Any]]], Any] 

221 ] = None, 

222 allow_duplicate_keys: bool = True, 

223 consume_trailing: bool = True, 

224 start: Optional[int] = None, 

225) -> Union[Tuple[Any, None, int], Tuple[None, str, int]]: 

226 """Parse ```s``, returning positional information along with a value. 

227 

228 This works exactly like `loads()`, except that (a) it returns the 

229 position in the string where the parsing stopped (either due to 

230 hitting an error or parsing a valid value) and any error as a string, 

231 (b) it takes an optional `consume_trailing` parameter that says whether 

232 to keep parsing the string after a valid value has been parsed; if True 

233 (the default), any trailing characters must be whitespace. If False, 

234 parsing stops when a valid value has been reached, (c) it takes an 

235 optional `start` parameter that specifies a zero-based offset to start 

236 parsing from in the string, and (d) the return value is different, as 

237 described below. 

238 

239 `parse()` is useful if you have a string that might contain multiple 

240 values and you need to extract all of them; you can do so by repeatedly 

241 calling `parse`, setting `start` to the value returned in `position` 

242 from the previous call. 

243 

244 Returns a tuple of (value, error_string, position). If the string 

245 was a legal value, `value` will be the deserialized value, 

246 `error_string` will be `None`, and `position` will be one 

247 past the zero-based offset where the parser stopped reading. 

248 If the string was not a legal value, 

249 `value` will be `None`, `error_string` will be the string value 

250 of the exception that would've been raised, and `position` will 

251 be the zero-based farthest offset into the string where the parser 

252 hit an error. 

253 

254 Raises: 

255 - `UnicodeDecodeError` if given a byte string that is not a 

256 legal UTF-8 document (or the equivalent, if using a different 

257 `encoding`). This matches the `json` module. 

258 

259 Note that this does *not* raise a `ValueError`; instead any error is 

260 returned as the second value in the tuple. 

261 

262 You can use this method to read in a series of values from a string 

263 `s` as follows: 

264 

265 >>> import json5 

266 >>> s = '1 2 3 4' 

267 >>> values = [] 

268 >>> start = 0 

269 >>> while True: 

270 ... v, err, pos = json5.parse(s, start=start, consume_trailing=False) 

271 ... if v: 

272 ... values.append(v) 

273 ... start = pos 

274 ... if start == len(s) or s[start:].isspace(): 

275 ... # Reached the end of the string (ignoring trailing 

276 ... # whitespace 

277 ... break 

278 ... continue 

279 ... raise ValueError(err) 

280 >>> values 

281 [1, 2, 3, 4] 

282 

283 """ 

284 assert cls is None, 'Custom decoders are not supported' 

285 

286 if isinstance(s, bytes): 

287 encoding = encoding or 'utf-8' 

288 s = s.decode(encoding) 

289 

290 if not s: 

291 raise ValueError('Empty strings are not legal JSON5') 

292 start = start or 0 

293 parser = Parser(s, '<string>', pos=start) 

294 ast, err, pos = parser.parse( 

295 global_vars={'_strict': strict, '_consume_trailing': consume_trailing} 

296 ) 

297 if err: 

298 return None, err, pos 

299 

300 try: 

301 value = _convert( 

302 ast, 

303 object_hook=object_hook, 

304 parse_float=parse_float, 

305 parse_int=parse_int, 

306 parse_constant=parse_constant, 

307 object_pairs_hook=object_pairs_hook, 

308 allow_duplicate_keys=allow_duplicate_keys, 

309 ) 

310 return value, None, pos 

311 except ValueError as e: 

312 return None, str(e), pos 

313 

314 

315def _convert( 

316 ast, 

317 object_hook, 

318 parse_float, 

319 parse_int, 

320 parse_constant, 

321 object_pairs_hook, 

322 allow_duplicate_keys, 

323): 

324 def _fp_constant_parser(s): 

325 return float(s.replace('Infinity', 'inf').replace('NaN', 'nan')) 

326 

327 def _dictify(pairs): 

328 if not allow_duplicate_keys: 

329 keys = set() 

330 for key, _ in pairs: 

331 if key in keys: 

332 raise ValueError(f'Duplicate key "{key}" found in object') 

333 keys.add(key) 

334 

335 if object_pairs_hook: 

336 return object_pairs_hook(pairs) 

337 if object_hook: 

338 return object_hook(dict(pairs)) 

339 return dict(pairs) 

340 

341 parse_float = parse_float or float 

342 parse_int = parse_int or int 

343 parse_constant = parse_constant or _fp_constant_parser 

344 

345 return _walk_ast(ast, _dictify, parse_float, parse_int, parse_constant) 

346 

347 

348def _walk_ast( 

349 el, 

350 dictify: Callable[[Iterable[Tuple[str, Any]]], Any], 

351 parse_float, 

352 parse_int, 

353 parse_constant, 

354): 

355 if el == 'None': 

356 return None 

357 if el == 'True': 

358 return True 

359 if el == 'False': 

360 return False 

361 ty, v = el 

362 if ty == 'number': 

363 unsigned = v[1:] if v.startswith('-') else v 

364 if unsigned.startswith('0x') or unsigned.startswith('0X'): 

365 return parse_int(v, base=16) 

366 if '.' in v or 'e' in v or 'E' in v: 

367 return parse_float(v) 

368 if 'Infinity' in v or 'NaN' in v: 

369 return parse_constant(v) 

370 return parse_int(v) 

371 if ty == 'string': 

372 return v 

373 if ty == 'object': 

374 pairs = [] 

375 for key, val_expr in v: 

376 val = _walk_ast( 

377 val_expr, dictify, parse_float, parse_int, parse_constant 

378 ) 

379 pairs.append((key, val)) 

380 return dictify(pairs) 

381 if ty == 'array': 

382 return [ 

383 _walk_ast(el, dictify, parse_float, parse_int, parse_constant) 

384 for el in v 

385 ] 

386 raise ValueError('unknown el: ' + el) # pragma: no cover 

387 

388 

389def dump( 

390 obj: Any, 

391 fp: IO, 

392 *, 

393 skipkeys: bool = False, 

394 ensure_ascii: bool = True, 

395 check_circular: bool = True, 

396 allow_nan: bool = True, 

397 cls: Optional[Type['JSON5Encoder']] = None, 

398 indent: Optional[Union[int, str]] = None, 

399 separators: Optional[Tuple[str, str]] = None, 

400 default: Optional[Callable[[Any], Any]] = None, 

401 sort_keys: bool = False, 

402 quote_keys: bool = False, 

403 trailing_commas: bool = True, 

404 allow_duplicate_keys: bool = True, 

405 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE, 

406 **kw, 

407): 

408 """Serialize ``obj`` to a JSON5-formatted stream to ``fp``, 

409 a ``.write()``-supporting file-like object. 

410 

411 Supports the same arguments as ``dumps()``, below. 

412 

413 Calling ``dump(obj, fp, quote_keys=True, trailing_commas=False, \ 

414 allow_duplicate_keys=True)`` 

415 should produce exactly the same output as ``json.dump(obj, fp).`` 

416 """ 

417 

418 fp.write( 

419 dumps( 

420 obj=obj, 

421 skipkeys=skipkeys, 

422 ensure_ascii=ensure_ascii, 

423 check_circular=check_circular, 

424 allow_nan=allow_nan, 

425 cls=cls, 

426 indent=indent, 

427 separators=separators, 

428 default=default, 

429 sort_keys=sort_keys, 

430 quote_keys=quote_keys, 

431 trailing_commas=trailing_commas, 

432 allow_duplicate_keys=allow_duplicate_keys, 

433 quote_style=quote_style, 

434 **kw, 

435 ) 

436 ) 

437 

438 

439def dumps( 

440 obj: Any, 

441 *, 

442 skipkeys: bool = False, 

443 ensure_ascii: bool = True, 

444 check_circular: bool = True, 

445 allow_nan: bool = True, 

446 cls: Optional[Type['JSON5Encoder']] = None, 

447 indent: Optional[Union[int, str]] = None, 

448 separators: Optional[Tuple[str, str]] = None, 

449 default: Optional[Callable[[Any], Any]] = None, 

450 sort_keys: bool = False, 

451 quote_keys: bool = False, 

452 trailing_commas: bool = True, 

453 allow_duplicate_keys: bool = True, 

454 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE, 

455 **kw: Any, 

456): 

457 """Serialize ``obj`` to a JSON5-formatted string. 

458 

459 Supports the same arguments as ``json.dumps()``, except that: 

460 

461 - The ``encoding`` keyword is ignored; Unicode strings are always written. 

462 - By default, object keys that are legal identifiers are not quoted; if you 

463 pass ``quote_keys=True``, they will be. 

464 - By default, if lists and objects span multiple lines of output (i.e., 

465 when ``indent`` >=0), the last item will have a trailing comma after it. 

466 If you pass ``trailing_commas=False``, it will not. 

467 - If you use a number, a boolean, or ``None`` as a key value in a dict, it 

468 will be converted to the corresponding JSON string value, e.g. "1", 

469 "true", or "null". By default, ``dump()`` will match the `json` modules 

470 behavior and produce malformed JSON if you mix keys of different types 

471 that have the same converted value; e.g., ``{1: "foo", "1": "bar"}`` 

472 produces '{"1": "foo", "1": "bar"}', an object with duplicated keys. If 

473 you pass ``allow_duplicate_keys=False``, an exception will be raised 

474 instead. 

475 - If `quote_keys` is true, then keys of objects will be enclosed in quotes, 

476 as in regular JSON. Otheriwse, keys will not be enclosed in quotes unless 

477 they contain whitespace. 

478 - If `trailing_commas` is false, then commas will not be inserted after the 

479 final elements of objects and arrays, as in regular JSON. Otherwise, 

480 such commas will be inserted. 

481 - If `allow_duplicate_keys` is false, then only the last entry with a given 

482 key will be written. Otherwise, all entries with the same key will be 

483 written. 

484 - `quote_style` controls how strings are encoded. See the documentation 

485 for the `QuoteStyle` class, above, for how this is used. 

486 

487 *Note*: Strings that are being used as unquoted keys are not affected 

488 by this parameter and remain unquoted. 

489 

490 *`quote_style` was added in version 0.10.0*. 

491 

492 Other keyword arguments are allowed and will be passed to the 

493 encoder so custom encoders can get them, but otherwise they will 

494 be ignored in an attempt to provide some amount of forward-compatibility. 

495 

496 *Note:* the standard JSON module explicitly calls `int.__repr(obj)__` 

497 and `float.__repr(obj)__` to encode ints and floats, thereby bypassing 

498 any custom representations you might have for objects that are subclasses 

499 of ints and floats, and, for compatibility, JSON5 does the same thing. 

500 To override this behavior, create a subclass of JSON5Encoder 

501 that overrides `encode()` and handles your custom representation. 

502 

503 For example: 

504 

505 ``` 

506 >>> import json5 

507 >>> from typing import Any, Set 

508 >>> 

509 >>> class Hex(int): 

510 ... def __repr__(self): 

511 ... return hex(self) 

512 >>> 

513 >>> class CustomEncoder(json5.JSON5Encoder): 

514 ... def encode( 

515 ... self, obj: Any, seen: Set, level: int, *, as_key: bool 

516 ... ) -> str: 

517 ... if isinstance(obj, Hex): 

518 ... return repr(obj) 

519 ... return super().encode(obj, seen, level, as_key=as_key) 

520 ... 

521 >>> json5.dumps([20, Hex(20)], cls=CustomEncoder) 

522 '[20, 0x14]' 

523 

524 ``` 

525 

526 *Note:* calling ``dumps(obj, quote_keys=True, trailing_commas=False, \ 

527 allow_duplicate_keys=True)`` 

528 should produce exactly the same output as ``json.dumps(obj).`` 

529 """ 

530 

531 cls = cls or JSON5Encoder 

532 enc = cls( 

533 skipkeys=skipkeys, 

534 ensure_ascii=ensure_ascii, 

535 check_circular=check_circular, 

536 allow_nan=allow_nan, 

537 indent=indent, 

538 separators=separators, 

539 default=default, 

540 sort_keys=sort_keys, 

541 quote_keys=quote_keys, 

542 trailing_commas=trailing_commas, 

543 allow_duplicate_keys=allow_duplicate_keys, 

544 quote_style=quote_style, 

545 **kw, 

546 ) 

547 return enc.encode(obj, seen=set(), level=0, as_key=False) 

548 

549 

550class JSON5Encoder: 

551 def __init__( 

552 self, 

553 *, 

554 skipkeys: bool = False, 

555 ensure_ascii: bool = True, 

556 check_circular: bool = True, 

557 allow_nan: bool = True, 

558 indent: Optional[Union[int, str]] = None, 

559 separators: Optional[Tuple[str, str]] = None, 

560 default: Optional[Callable[[Any], Any]] = None, 

561 sort_keys: bool = False, 

562 quote_keys: bool = False, 

563 trailing_commas: bool = True, 

564 allow_duplicate_keys: bool = True, 

565 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE, 

566 **kw, 

567 ): 

568 """Provides a class that may be overridden to customize the behavior 

569 of `dumps()`. The keyword args are the same as for that function. 

570 *Added in version 0.10.0""" 

571 # Ignore unrecognized keyword arguments in the hope of providing 

572 # some level of backwards- and forwards-compatibility. 

573 del kw 

574 

575 self.skipkeys = skipkeys 

576 self.ensure_ascii = ensure_ascii 

577 self.check_circular = check_circular 

578 self.allow_nan = allow_nan 

579 self.indent = indent 

580 self.separators = separators 

581 if separators is None: 

582 separators = (', ', ': ') if indent is None else (',', ': ') 

583 self.item_separator, self.kv_separator = separators 

584 self.default_fn = default or _raise_type_error 

585 self.sort_keys = sort_keys 

586 self.quote_keys = quote_keys 

587 self.trailing_commas = trailing_commas 

588 self.allow_duplicate_keys = allow_duplicate_keys 

589 self.quote_style = quote_style 

590 

591 def default(self, obj: Any) -> Any: 

592 """Provides a last-ditch option to encode a value that the encoder 

593 doesn't otherwise recognize, by converting `obj` to a value that 

594 *can* (and will) be serialized by the other methods in the class. 

595 

596 Note: this must not return a serialized value (i.e., string) 

597 directly, as that'll result in a doubly-encoded value.""" 

598 return self.default_fn(obj) 

599 

600 def encode( 

601 self, 

602 obj: Any, 

603 seen: Set, 

604 level: int, 

605 *, 

606 as_key: bool, 

607 ) -> str: 

608 """Returns an JSON5-encoded version of an arbitrary object. This can 

609 be used to provide customized serialization of objects. Overridden 

610 methods of this class should handle their custom objects and then 

611 fall back to super.encode() if they've been passed a normal object. 

612 

613 `seen` is used for duplicate object tracking when `check_circular` 

614 is True. 

615 

616 `level` represents the current indentation level, which increases 

617 by one for each recursive invocation of encode (i.e., whenever 

618 we're encoding the values of a dict or a list). 

619 

620 May raise `TypeError` if the object is the wrong type to be 

621 encoded (i.e., your custom routine can't handle it either), and 

622 `ValueError` if there's something wrong with the value, e.g. 

623 a float value of NaN when `allow_nan` is false. 

624 

625 If `as_key` is true, the return value should be a double-quoted string 

626 representation of the object, unless obj is a string that can be an 

627 identifier (and quote_keys is false and obj isn't a reserved word). 

628 If the object should not be used as a key, `TypeError` should be 

629 raised; that allows the base implementation to implement `skipkeys` 

630 properly. 

631 """ 

632 seen = seen or set() 

633 s = self._encode_basic_type(obj, as_key=as_key) 

634 if s is not None: 

635 return s 

636 

637 if as_key: 

638 raise TypeError(f'Invalid key f{obj}') 

639 return self._encode_non_basic_type(obj, seen, level) 

640 

641 def _encode_basic_type(self, obj: Any, *, as_key: bool) -> Optional[str]: 

642 """Returns None if the object is not a basic type.""" 

643 

644 if isinstance(obj, str): 

645 return self._encode_str(obj, as_key=as_key) 

646 

647 # Check for True/False before ints because True and False are 

648 # also considered ints and so would be represented as 1 and 0 

649 # if we did ints first. 

650 if obj is True: 

651 return '"true"' if as_key else 'true' 

652 if obj is False: 

653 return '"false"' if as_key else 'false' 

654 if obj is None: 

655 return '"null"' if as_key else 'null' 

656 

657 if isinstance(obj, int): 

658 return self._encode_int(obj, as_key=as_key) 

659 

660 if isinstance(obj, float): 

661 return self._encode_float(obj, as_key=as_key) 

662 

663 return None 

664 

665 def _encode_int(self, obj: int, *, as_key: bool) -> str: 

666 s = int.__repr__(obj) 

667 return f'"{s}"' if as_key else s 

668 

669 def _encode_float(self, obj: float, *, as_key: bool) -> str: 

670 if obj == float('inf'): 

671 allowed = self.allow_nan 

672 s = 'Infinity' 

673 elif obj == float('-inf'): 

674 allowed = self.allow_nan 

675 s = '-Infinity' 

676 elif math.isnan(obj): 

677 allowed = self.allow_nan 

678 s = 'NaN' 

679 else: 

680 allowed = True 

681 s = float.__repr__(obj) 

682 

683 if not allowed: 

684 raise ValueError(f'Illegal JSON5 value: {obj}') 

685 return f'"{s}"' if as_key else s 

686 

687 def _encode_str(self, obj: str, *, as_key: bool) -> str: 

688 if ( 

689 as_key 

690 and self.is_identifier(obj) 

691 and not self.quote_keys 

692 and not self.is_reserved_word(obj) 

693 ): 

694 return obj 

695 

696 return self._encode_quoted_str(obj, self.quote_style) 

697 

698 def _encode_quoted_str(self, obj: str, quote_style: QuoteStyle) -> str: 

699 """Returns a quoted string with a minimal number of escaped quotes.""" 

700 ret = [] 

701 double_quotes_seen = 0 

702 single_quotes_seen = 0 

703 sq = "'" 

704 dq = '"' 

705 for ch in obj: 

706 if ch == dq: 

707 # At first we will guess at which quotes to escape. If 

708 # we guess wrong, we reencode the string below. 

709 double_quotes_seen += 1 

710 if quote_style in ( 

711 QuoteStyle.ALWAYS_DOUBLE, 

712 QuoteStyle.PREFER_DOUBLE, 

713 ): 

714 encoded_ch = self._escape_ch(dq) 

715 else: 

716 encoded_ch = dq 

717 elif ch == sq: 

718 single_quotes_seen += 1 

719 if quote_style in ( 

720 QuoteStyle.ALWAYS_SINGLE, 

721 QuoteStyle.PREFER_SINGLE, 

722 ): 

723 encoded_ch = self._escape_ch(sq) 

724 else: 

725 encoded_ch = sq 

726 elif ch == '\\': 

727 encoded_ch = self._escape_ch(ch) 

728 else: 

729 o = ord(ch) 

730 if o < 32: 

731 encoded_ch = self._escape_ch(ch) 

732 elif o < 128: 

733 encoded_ch = ch 

734 elif not self.ensure_ascii and ch not in ('\u2028', '\u2029'): 

735 encoded_ch = ch 

736 else: 

737 encoded_ch = self._escape_ch(ch) 

738 ret.append(encoded_ch) 

739 

740 # We may have guessed wrong and need to reencode the string. 

741 if ( 

742 double_quotes_seen > single_quotes_seen 

743 and quote_style == QuoteStyle.PREFER_DOUBLE 

744 ): 

745 return self._encode_quoted_str(obj, QuoteStyle.ALWAYS_SINGLE) 

746 if ( 

747 single_quotes_seen > double_quotes_seen 

748 and quote_style == QuoteStyle.PREFER_SINGLE 

749 ): 

750 return self._encode_quoted_str(obj, QuoteStyle.ALWAYS_DOUBLE) 

751 

752 if quote_style in (QuoteStyle.ALWAYS_DOUBLE, QuoteStyle.PREFER_DOUBLE): 

753 return '"' + ''.join(ret) + '"' 

754 return "'" + ''.join(ret) + "'" 

755 

756 def _escape_ch(self, ch: str) -> str: 

757 """Returns the backslash-escaped representation of the char.""" 

758 if ch == '\\': 

759 return '\\\\' 

760 if ch == "'": 

761 return r'\'' 

762 if ch == '"': 

763 return r'\"' 

764 if ch == '\n': 

765 return r'\n' 

766 if ch == '\r': 

767 return r'\r' 

768 if ch == '\t': 

769 return r'\t' 

770 if ch == '\b': 

771 return r'\b' 

772 if ch == '\f': 

773 return r'\f' 

774 if ch == '\v': 

775 return r'\v' 

776 if ch == '\0': 

777 return r'\0' 

778 

779 o = ord(ch) 

780 if o < 65536: 

781 return rf'\u{o:04x}' 

782 

783 val = o - 0x10000 

784 high = 0xD800 + (val >> 10) 

785 low = 0xDC00 + (val & 0x3FF) 

786 return rf'\u{high:04x}\u{low:04x}' 

787 

788 def _encode_non_basic_type(self, obj, seen: Set, level: int) -> str: 

789 # Basic types can't be recursive so we only check for circularity 

790 # on non-basic types. If for some reason the caller was using a 

791 # subclass of a basic type and wanted to check circularity on it, 

792 # it'd have to do so directly in a subclass of JSON5Encoder. 

793 if self.check_circular: 

794 i = id(obj) 

795 if i in seen: 

796 raise ValueError('Circular reference detected.') 

797 seen.add(i) 

798 

799 # Ideally we'd use collections.abc.Mapping and collections.abc.Sequence 

800 # here, but for backwards-compatibility with potential old callers, 

801 # we only check for the two attributes we need in each case. 

802 if hasattr(obj, 'keys') and hasattr(obj, '__getitem__'): 

803 s = self._encode_dict(obj, seen, level + 1) 

804 elif hasattr(obj, '__getitem__') and hasattr(obj, '__iter__'): 

805 s = self._encode_array(obj, seen, level + 1) 

806 else: 

807 s = self.encode(self.default(obj), seen, level, as_key=False) 

808 assert s is not None 

809 

810 if self.check_circular: 

811 seen.remove(i) 

812 return s 

813 

814 def _encode_dict(self, obj: Any, seen: set, level: int) -> str: 

815 if not obj: 

816 return '{}' 

817 

818 indent_str, end_str = self._spacers(level) 

819 item_sep = self.item_separator + indent_str 

820 kv_sep = self.kv_separator 

821 

822 if self.sort_keys: 

823 keys = sorted(obj.keys()) 

824 else: 

825 keys = obj.keys() 

826 

827 s = '{' + indent_str 

828 

829 first_key = True 

830 new_keys = set() 

831 for key in keys: 

832 try: 

833 key_str = self.encode(key, seen, level, as_key=True) 

834 except TypeError: 

835 if self.skipkeys: 

836 continue 

837 raise 

838 

839 if not self.allow_duplicate_keys: 

840 if key_str in new_keys: 

841 raise ValueError(f'duplicate key {repr(key)}') 

842 new_keys.add(key_str) 

843 

844 if first_key: 

845 first_key = False 

846 else: 

847 s += item_sep 

848 

849 val_str = self.encode(obj[key], seen, level, as_key=False) 

850 s += key_str + kv_sep + val_str 

851 

852 s += end_str + '}' 

853 return s 

854 

855 def _encode_array(self, obj: Any, seen: Set, level: int) -> str: 

856 if not obj: 

857 return '[]' 

858 

859 indent_str, end_str = self._spacers(level) 

860 item_sep = self.item_separator + indent_str 

861 return ( 

862 '[' 

863 + indent_str 

864 + item_sep.join( 

865 self.encode(el, seen, level, as_key=False) for el in obj 

866 ) 

867 + end_str 

868 + ']' 

869 ) 

870 

871 def _spacers(self, level: int) -> Tuple[str, str]: 

872 if self.indent is not None: 

873 end_str = '' 

874 if self.trailing_commas: 

875 end_str = ',' 

876 if isinstance(self.indent, int): 

877 if self.indent > 0: 

878 indent_str = '\n' + ' ' * self.indent * level 

879 end_str += '\n' + ' ' * self.indent * (level - 1) 

880 else: 

881 indent_str = '\n' 

882 end_str += '\n' 

883 else: 

884 indent_str = '\n' + self.indent * level 

885 end_str += '\n' + self.indent * (level - 1) 

886 else: 

887 indent_str = '' 

888 end_str = '' 

889 return indent_str, end_str 

890 

891 def is_identifier(self, key: str) -> bool: 

892 """Returns whether the string could be used as a legal 

893 EcmaScript/JavaScript identifier. 

894 

895 There should normally be no reason to override this, unless 

896 the definition of identifiers change in later versions of the 

897 JSON5 spec and this implementation hasn't been updated to handle 

898 the changes yet.""" 

899 if ( 

900 not key 

901 or not self._is_id_start(key[0]) 

902 and key[0] not in ('$', '_') 

903 ): 

904 return False 

905 for ch in key[1:]: 

906 if not self._is_id_continue(ch) and ch not in ('$', '_'): 

907 return False 

908 return True 

909 

910 def _is_id_start(self, ch: str) -> bool: 

911 return unicodedata.category(ch) in ( 

912 'Lu', 

913 'Ll', 

914 'Li', 

915 'Lt', 

916 'Lm', 

917 'Lo', 

918 'Nl', 

919 ) 

920 

921 def _is_id_continue(self, ch: str) -> bool: 

922 return unicodedata.category(ch) in ( 

923 'Lu', 

924 'Ll', 

925 'Li', 

926 'Lt', 

927 'Lm', 

928 'Lo', 

929 'Nl', 

930 'Nd', 

931 'Mn', 

932 'Mc', 

933 'Pc', 

934 ) 

935 

936 def is_reserved_word(self, key: str) -> bool: 

937 """Returns whether the key is a reserved word. 

938 

939 There should normally be no need to override this, unless there 

940 have been reserved words added in later versions of the JSON5 

941 spec and this implementation has not yet been updated to handle 

942 the changes yet.""" 

943 global _reserved_word_re 

944 if _reserved_word_re is None: 

945 # List taken from section 7.6.1 of ECMA-262, version 5.1. 

946 # https://262.ecma-international.org/5.1/#sec-7.6.1. 

947 # This includes currently reserved words, words reserved 

948 # for future use (both as of 5.1), null, true, and false. 

949 _reserved_word_re = re.compile( 

950 '(' 

951 + '|'.join( 

952 [ 

953 'break', 

954 'case', 

955 'catch', 

956 'class', 

957 'const', 

958 'continue', 

959 'debugger', 

960 'default', 

961 'delete', 

962 'do', 

963 'else', 

964 'enum', 

965 'export', 

966 'extends', 

967 'false', 

968 'finally', 

969 'for', 

970 'function', 

971 'if', 

972 'implements', 

973 'import', 

974 'in', 

975 'instanceof', 

976 'interface', 

977 'let', 

978 'new', 

979 'null', 

980 'package', 

981 'private', 

982 'protected', 

983 'public', 

984 'return', 

985 'static', 

986 'super', 

987 'switch', 

988 'this', 

989 'throw', 

990 'true', 

991 'try', 

992 'typeof', 

993 'var', 

994 'void', 

995 'while', 

996 'with', 

997 'yield', 

998 ] 

999 ) 

1000 + ')$' 

1001 ) 

1002 return _reserved_word_re.match(key) is not None 

1003 

1004 

1005def _raise_type_error(obj) -> Any: 

1006 raise TypeError(f'{repr(obj)} is not JSON5 serializable')