Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/json5/lib.py: 31%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

15import enum

16import math

17import re

18from typing import (

19 Any,

20 Callable,

21 IO,

22 Iterable,

23 Mapping,

24 Optional,

25 Set,

26 Tuple,

27 Type,

28 Union,

29)

30import unicodedata

32from json5.parser import Parser

35# Used when encoding keys, below.

36_reserved_word_re: Optional[re.Pattern] = None

39class QuoteStyle(enum.Enum):

40 """Controls how strings will be quoted during encoding.

42 By default, for compatibility with the `json` module and older versions of

43 `json5`, strings (not being used as keys and that are legal identifiers)

44 will always be double-quoted, and any double quotes in the string will be

45 escaped. This is `QuoteStyle.ALWAYS_DOUBLE`. If you pass

46 `QuoteStyle.ALWAYS_SINGLE`, then strings will always be single-quoted, and

47 any single quotes in the string will be escaped. If you pass

48 `QuoteStyle.PREFER_DOUBLE`, then the behavior is the same as ALWAYS_DOUBLE

49 and strings will be double-quoted *unless* the string contains more double

50 quotes than single quotes, in which case the string will be single-quoted

51 and single quotes will be escaped. If you pass `QuoteStyle.PREFER_SINGLE`,

52 then the behavior is the same as ALWAYS_SINGLE and strings will be

53 single-quoted *unless* the string contains more single quotes than double

54 quotes, in which case the string will be double-quoted and any double

55 quotes will be escaped.

57 *Note:* PREFER_DOUBLE and PREFER_SINGLE can impact performance, since in

58 order to know which encoding to use you have to iterate over the entire

59 string to count the number of single and double quotes. The codes guesses

60 at an encoding while doing so, but if it guess wrong, the entire string has

61 to be re-encoded, which will slow things down. If you are very concerned

62 about performance (a) you probably shouldn't be using this library in the

63 first place, because it just isn't very fast, and (b) you should use

64 ALWAYS_DOUBLE or ALWAYS_SINGLE, which won't have this issue.

65 """

67 ALWAYS_DOUBLE = 'always_double'

68 ALWAYS_SINGLE = 'always_single'

69 PREFER_DOUBLE = 'prefer_double'

70 PREFER_SINGLE = 'prefer_single'

73def load(

74 fp: IO,

75 *,

76 encoding: Optional[str] = None,

77 cls: Any = None,

78 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,

79 parse_float: Optional[Callable[[str], Any]] = None,

80 parse_int: Optional[Callable[[str], Any]] = None,

81 parse_constant: Optional[Callable[[str], Any]] = None,

82 strict: bool = True,

83 object_pairs_hook: Optional[

84 Callable[[Iterable[Tuple[str, Any]]], Any]

85 ] = None,

86 allow_duplicate_keys: bool = True,

87 consume_trailing: bool = True,

88 start: Optional[int] = None,

89) -> Any:

90 """Deserialize ``fp`` (a ``.read()``-supporting file-like object

91 containing a JSON document) to a Python object.

93 Supports almost the same arguments as ``json.load()`` except that:

94 - the `cls` keyword is ignored.

95 - an extra `allow_duplicate_keys` parameter supports checking for

96 duplicate keys in a object; by default, this is True for

97 compatibility with ``json.load()``, but if set to False and

98 the object contains duplicate keys, a ValueError will be raised.

99 - an extra `consume_trailing` parameter specifies whether to

100 consume any trailing characters after a valid object has been

101 parsed. By default, this value is True and the only legal

102 trailing characters are whitespace. If this value is set to False,

103 parsing will stop when a valid object has been parsed and any

104 trailing characters in the string will be ignored.

105 - an extra `start` parameter specifies the zero-based offset into the

106 file to start parsing at. If `start` is None, parsing will

107 start at the current position in the file, and line number

108 and column values will be reported as if starting from the

109 beginning of the file; If `start` is not None,

110 `load` will seek to zero and then read (and discard) the

111 appropriate number of characters before beginning parsing;

112 the file must be seekable for this to work correctly.

113

114 You can use `load(..., consume_trailing=False)` to repeatedly read

115 values from a file. However, in the current implementation `load` does

116 this by reading the entire file into memory before doing anything, so

117 it is not very efficient.

118

119 Raises

120 - `ValueError` if given an invalid document. This is different

121 from the `json` module, which raises `json.JSONDecodeError`.

122 - `UnicodeDecodeError` if given a byte string that is not a

123 legal UTF-8 document (or the equivalent, if using a different

124 `encoding`). This matches the `json` module.

125 """

126

127 s = fp.read()

128 val, err, _ = parse(

129 s,

130 encoding=encoding,

131 cls=cls,

132 object_hook=object_hook,

133 parse_float=parse_float,

134 parse_int=parse_int,

135 parse_constant=parse_constant,

136 strict=strict,

137 object_pairs_hook=object_pairs_hook,

138 allow_duplicate_keys=allow_duplicate_keys,

139 consume_trailing=consume_trailing,

140 start=start,

141 )

142 if err:

143 raise ValueError(err)

144 return val

145

146

147def loads(

148 s: str,

149 *,

150 encoding: Optional[str] = None,

151 cls: Any = None,

152 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,

153 parse_float: Optional[Callable[[str], Any]] = None,

154 parse_int: Optional[Callable[[str], Any]] = None,

155 parse_constant: Optional[Callable[[str], Any]] = None,

156 strict: bool = True,

157 object_pairs_hook: Optional[

158 Callable[[Iterable[Tuple[str, Any]]], Any]

159 ] = None,

160 allow_duplicate_keys: bool = True,

161 consume_trailing: bool = True,

162 start: Optional[int] = None,

163) -> Any:

164 """Deserialize ``s`` (a string containing a JSON5 document) to a Python

165 object.

166

167 Supports the same arguments as ``json.loads()`` except that:

168 - the `cls` keyword is ignored.

169 - an extra `allow_duplicate_keys` parameter supports checking for

170 duplicate keys in a object; by default, this is True for

171 compatibility with ``json.load()``, but if set to False and

172 the object contains duplicate keys, a ValueError will be raised.

173 - an extra `consume_trailing` parameter specifies whether to

174 consume any trailing characters after a valid object has been

175 parsed. By default, this value is True and the only legal

176 trailing characters are whitespace. If this value is set to False,

177 parsing will stop when a valid object has been parsed and any

178 trailing characters in the string will be ignored.

179 - an extra `start` parameter specifies the zero-based offset into the

180 string to start parsing at.

181

182 Raises

183 - `ValueError` if given an invalid document. This is different

184 from the `json` module, which raises `json.JSONDecodeError`.

185 - `UnicodeDecodeError` if given a byte string that is not a

186 legal UTF-8 document (or the equivalent, if using a different

187 `encoding`). This matches the `json` module.

188 """

189

190 val, err, _ = parse(

191 s=s,

192 encoding=encoding,

193 cls=cls,

194 object_hook=object_hook,

195 parse_float=parse_float,

196 parse_int=parse_int,

197 parse_constant=parse_constant,

198 strict=strict,

199 object_pairs_hook=object_pairs_hook,

200 allow_duplicate_keys=allow_duplicate_keys,

201 consume_trailing=consume_trailing,

202 start=start,

203 )

204 if err:

205 raise ValueError(err)

206 return val

207

208

209def parse(

210 s: str,

211 *,

212 encoding: Optional[str] = None,

213 cls: Any = None,

214 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,

215 parse_float: Optional[Callable[[str], Any]] = None,

216 parse_int: Optional[Callable[[str], Any]] = None,

217 parse_constant: Optional[Callable[[str], Any]] = None,

218 strict: bool = True,

219 object_pairs_hook: Optional[

220 Callable[[Iterable[Tuple[str, Any]]], Any]

221 ] = None,

222 allow_duplicate_keys: bool = True,

223 consume_trailing: bool = True,

224 start: Optional[int] = None,

225) -> Union[Tuple[Any, None, int], Tuple[None, str, int]]:

226 """Parse ```s``, returning positional information along with a value.

227

228 This works exactly like `loads()`, except that (a) it returns the

229 position in the string where the parsing stopped (either due to

230 hitting an error or parsing a valid value) and any error as a string,

231 (b) it takes an optional `consume_trailing` parameter that says whether

232 to keep parsing the string after a valid value has been parsed; if True

233 (the default), any trailing characters must be whitespace. If False,

234 parsing stops when a valid value has been reached, (c) it takes an

235 optional `start` parameter that specifies a zero-based offset to start

236 parsing from in the string, and (d) the return value is different, as

237 described below.

238

239 `parse()` is useful if you have a string that might contain multiple

240 values and you need to extract all of them; you can do so by repeatedly

241 calling `parse`, setting `start` to the value returned in `position`

242 from the previous call.

243

244 Returns a tuple of (value, error_string, position). If the string

245 was a legal value, `value` will be the deserialized value,

246 `error_string` will be `None`, and `position` will be one

247 past the zero-based offset where the parser stopped reading.

248 If the string was not a legal value,

249 `value` will be `None`, `error_string` will be the string value

250 of the exception that would've been raised, and `position` will

251 be the zero-based farthest offset into the string where the parser

252 hit an error.

253

254 Raises:

255 - `UnicodeDecodeError` if given a byte string that is not a

256 legal UTF-8 document (or the equivalent, if using a different

257 `encoding`). This matches the `json` module.

258

259 Note that this does *not* raise a `ValueError`; instead any error is

260 returned as the second value in the tuple.

261

262 You can use this method to read in a series of values from a string

263 `s` as follows:

264

265 >>> import json5

266 >>> s = '1 2 3 4'

267 >>> values = []

268 >>> start = 0

269 >>> while True:

270 ... v, err, pos = json5.parse(s, start=start, consume_trailing=False)

271 ... if v:

272 ... values.append(v)

273 ... start = pos

274 ... if start == len(s) or s[start:].isspace():

275 ... # Reached the end of the string (ignoring trailing

276 ... # whitespace

277 ... break

278 ... continue

279 ... raise ValueError(err)

280 >>> values

281 [1, 2, 3, 4]

282

283 """

284 assert cls is None, 'Custom decoders are not supported'

285

286 if isinstance(s, bytes):

287 encoding = encoding or 'utf-8'

288 s = s.decode(encoding)

289

290 if not s:

291 raise ValueError('Empty strings are not legal JSON5')

292 start = start or 0

293 parser = Parser(s, '<string>', pos=start)

294 ast, err, pos = parser.parse(

295 global_vars={'_strict': strict, '_consume_trailing': consume_trailing}

296 )

297 if err:

298 return None, err, pos

299

300 try:

301 value = _convert(

302 ast,

303 object_hook=object_hook,

304 parse_float=parse_float,

305 parse_int=parse_int,

306 parse_constant=parse_constant,

307 object_pairs_hook=object_pairs_hook,

308 allow_duplicate_keys=allow_duplicate_keys,

309 )

310 return value, None, pos

311 except ValueError as e:

312 return None, str(e), pos

313

314

315def _convert(

316 ast,

317 object_hook,

318 parse_float,

319 parse_int,

320 parse_constant,

321 object_pairs_hook,

322 allow_duplicate_keys,

323):

324 def _fp_constant_parser(s):

325 return float(s.replace('Infinity', 'inf').replace('NaN', 'nan'))

326

327 def _dictify(pairs):

328 if not allow_duplicate_keys:

329 keys = set()

330 for key, _ in pairs:

331 if key in keys:

332 raise ValueError(f'Duplicate key "{key}" found in object')

333 keys.add(key)

334

335 if object_pairs_hook:

336 return object_pairs_hook(pairs)

337 if object_hook:

338 return object_hook(dict(pairs))

339 return dict(pairs)

340

341 parse_float = parse_float or float

342 parse_int = parse_int or int

343 parse_constant = parse_constant or _fp_constant_parser

344

345 return _walk_ast(ast, _dictify, parse_float, parse_int, parse_constant)

346

347

348def _walk_ast(

349 el,

350 dictify: Callable[[Iterable[Tuple[str, Any]]], Any],

351 parse_float,

352 parse_int,

353 parse_constant,

354):

355 if el == 'None':

356 return None

357 if el == 'True':

358 return True

359 if el == 'False':

360 return False

361 ty, v = el

362 if ty == 'number':

363 unsigned = v[1:] if v.startswith('-') else v

364 if unsigned.startswith('0x') or unsigned.startswith('0X'):

365 return parse_int(v, base=16)

366 if '.' in v or 'e' in v or 'E' in v:

367 return parse_float(v)

368 if 'Infinity' in v or 'NaN' in v:

369 return parse_constant(v)

370 return parse_int(v)

371 if ty == 'string':

372 return v

373 if ty == 'object':

374 pairs = []

375 for key, val_expr in v:

376 val = _walk_ast(

377 val_expr, dictify, parse_float, parse_int, parse_constant

378 )

379 pairs.append((key, val))

380 return dictify(pairs)

381 if ty == 'array':

382 return [

383 _walk_ast(el, dictify, parse_float, parse_int, parse_constant)

384 for el in v

385 ]

386 raise ValueError('unknown el: ' + el) # pragma: no cover

387

388

389def dump(

390 obj: Any,

391 fp: IO,

392 *,

393 skipkeys: bool = False,

394 ensure_ascii: bool = True,

395 check_circular: bool = True,

396 allow_nan: bool = True,

397 cls: Optional[Type['JSON5Encoder']] = None,

398 indent: Optional[Union[int, str]] = None,

399 separators: Optional[Tuple[str, str]] = None,

400 default: Optional[Callable[[Any], Any]] = None,

401 sort_keys: bool = False,

402 quote_keys: bool = False,

403 trailing_commas: bool = True,

404 allow_duplicate_keys: bool = True,

405 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,

406 **kw,

407):

408 """Serialize ``obj`` to a JSON5-formatted stream to ``fp``,

409 a ``.write()``-supporting file-like object.

410

411 Supports the same arguments as ``dumps()``, below.

412

413 Calling ``dump(obj, fp, quote_keys=True, trailing_commas=False, \

414 allow_duplicate_keys=True)``

415 should produce exactly the same output as ``json.dump(obj, fp).``

416 """

417

418 fp.write(

419 dumps(

420 obj=obj,

421 skipkeys=skipkeys,

422 ensure_ascii=ensure_ascii,

423 check_circular=check_circular,

424 allow_nan=allow_nan,

425 cls=cls,

426 indent=indent,

427 separators=separators,

428 default=default,

429 sort_keys=sort_keys,

430 quote_keys=quote_keys,

431 trailing_commas=trailing_commas,

432 allow_duplicate_keys=allow_duplicate_keys,

433 quote_style=quote_style,

434 **kw,

435 )

436 )

437

438

439def dumps(

440 obj: Any,

441 *,

442 skipkeys: bool = False,

443 ensure_ascii: bool = True,

444 check_circular: bool = True,

445 allow_nan: bool = True,

446 cls: Optional[Type['JSON5Encoder']] = None,

447 indent: Optional[Union[int, str]] = None,

448 separators: Optional[Tuple[str, str]] = None,

449 default: Optional[Callable[[Any], Any]] = None,

450 sort_keys: bool = False,

451 quote_keys: bool = False,

452 trailing_commas: bool = True,

453 allow_duplicate_keys: bool = True,

454 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,

455 **kw: Any,

456):

457 """Serialize ``obj`` to a JSON5-formatted string.

458

459 Supports the same arguments as ``json.dumps()``, except that:

460

461 - The ``encoding`` keyword is ignored; Unicode strings are always written.

462 - By default, object keys that are legal identifiers are not quoted; if you

463 pass ``quote_keys=True``, they will be.

464 - By default, if lists and objects span multiple lines of output (i.e.,

465 when ``indent`` >=0), the last item will have a trailing comma after it.

466 If you pass ``trailing_commas=False``, it will not.

467 - If you use a number, a boolean, or ``None`` as a key value in a dict, it

468 will be converted to the corresponding JSON string value, e.g. "1",

469 "true", or "null". By default, ``dump()`` will match the `json` modules

470 behavior and produce malformed JSON if you mix keys of different types

471 that have the same converted value; e.g., ``{1: "foo", "1": "bar"}``

472 produces '{"1": "foo", "1": "bar"}', an object with duplicated keys. If

473 you pass ``allow_duplicate_keys=False``, an exception will be raised

474 instead.

475 - If `quote_keys` is true, then keys of objects will be enclosed in quotes,

476 as in regular JSON. Otheriwse, keys will not be enclosed in quotes unless

477 they contain whitespace.

478 - If `trailing_commas` is false, then commas will not be inserted after the

479 final elements of objects and arrays, as in regular JSON. Otherwise,

480 such commas will be inserted.

481 - If `allow_duplicate_keys` is false, then only the last entry with a given

482 key will be written. Otherwise, all entries with the same key will be

483 written.

484 - `quote_style` controls how strings are encoded. See the documentation

485 for the `QuoteStyle` class, above, for how this is used.

486

487 *Note*: Strings that are being used as unquoted keys are not affected

488 by this parameter and remain unquoted.

489

490 *`quote_style` was added in version 0.10.0*.

491

492 Other keyword arguments are allowed and will be passed to the

493 encoder so custom encoders can get them, but otherwise they will

494 be ignored in an attempt to provide some amount of forward-compatibility.

495

496 *Note:* the standard JSON module explicitly calls `int.__repr(obj)__`

497 and `float.__repr(obj)__` to encode ints and floats, thereby bypassing

498 any custom representations you might have for objects that are subclasses

499 of ints and floats, and, for compatibility, JSON5 does the same thing.

500 To override this behavior, create a subclass of JSON5Encoder

501 that overrides `encode()` and handles your custom representation.

502

503 For example:

504

505 ```

506 >>> import json5

507 >>> from typing import Any, Set

508 >>>

509 >>> class Hex(int):

510 ... def __repr__(self):

511 ... return hex(self)

512 >>>

513 >>> class CustomEncoder(json5.JSON5Encoder):

514 ... def encode(

515 ... self, obj: Any, seen: Set, level: int, *, as_key: bool

516 ... ) -> str:

517 ... if isinstance(obj, Hex):

518 ... return repr(obj)

519 ... return super().encode(obj, seen, level, as_key=as_key)

520 ...

521 >>> json5.dumps([20, Hex(20)], cls=CustomEncoder)

522 '[20, 0x14]'

523

524 ```

525

526 *Note:* calling ``dumps(obj, quote_keys=True, trailing_commas=False, \

527 allow_duplicate_keys=True)``

528 should produce exactly the same output as ``json.dumps(obj).``

529 """

530

531 cls = cls or JSON5Encoder

532 enc = cls(

533 skipkeys=skipkeys,

534 ensure_ascii=ensure_ascii,

535 check_circular=check_circular,

536 allow_nan=allow_nan,

537 indent=indent,

538 separators=separators,

539 default=default,

540 sort_keys=sort_keys,

541 quote_keys=quote_keys,

542 trailing_commas=trailing_commas,

543 allow_duplicate_keys=allow_duplicate_keys,

544 quote_style=quote_style,

545 **kw,

546 )

547 return enc.encode(obj, seen=set(), level=0, as_key=False)

548

549

550class JSON5Encoder:

551 def __init__(

552 self,

553 *,

554 skipkeys: bool = False,

555 ensure_ascii: bool = True,

556 check_circular: bool = True,

557 allow_nan: bool = True,

558 indent: Optional[Union[int, str]] = None,

559 separators: Optional[Tuple[str, str]] = None,

560 default: Optional[Callable[[Any], Any]] = None,

561 sort_keys: bool = False,

562 quote_keys: bool = False,

563 trailing_commas: bool = True,

564 allow_duplicate_keys: bool = True,

565 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,

566 **kw,

567 ):

568 """Provides a class that may be overridden to customize the behavior

569 of `dumps()`. The keyword args are the same as for that function.

570 *Added in version 0.10.0"""

571 # Ignore unrecognized keyword arguments in the hope of providing

572 # some level of backwards- and forwards-compatibility.

573 del kw

574

575 self.skipkeys = skipkeys

576 self.ensure_ascii = ensure_ascii

577 self.check_circular = check_circular

578 self.allow_nan = allow_nan

579 self.indent = indent

580 self.separators = separators

581 if separators is None:

582 separators = (', ', ': ') if indent is None else (',', ': ')

583 self.item_separator, self.kv_separator = separators

584 self.default_fn = default or _raise_type_error

585 self.sort_keys = sort_keys

586 self.quote_keys = quote_keys

587 self.trailing_commas = trailing_commas

588 self.allow_duplicate_keys = allow_duplicate_keys

589 self.quote_style = quote_style

590

591 def default(self, obj: Any) -> Any:

592 """Provides a last-ditch option to encode a value that the encoder

593 doesn't otherwise recognize, by converting `obj` to a value that

594 *can* (and will) be serialized by the other methods in the class.

595

596 Note: this must not return a serialized value (i.e., string)

597 directly, as that'll result in a doubly-encoded value."""

598 return self.default_fn(obj)

599

600 def encode(

601 self,

602 obj: Any,

603 seen: Set,

604 level: int,

605 *,

606 as_key: bool,

607 ) -> str:

608 """Returns an JSON5-encoded version of an arbitrary object. This can

609 be used to provide customized serialization of objects. Overridden

610 methods of this class should handle their custom objects and then

611 fall back to super.encode() if they've been passed a normal object.

612

613 `seen` is used for duplicate object tracking when `check_circular`

614 is True.

615

616 `level` represents the current indentation level, which increases

617 by one for each recursive invocation of encode (i.e., whenever

618 we're encoding the values of a dict or a list).

619

620 May raise `TypeError` if the object is the wrong type to be

621 encoded (i.e., your custom routine can't handle it either), and

622 `ValueError` if there's something wrong with the value, e.g.

623 a float value of NaN when `allow_nan` is false.

624

625 If `as_key` is true, the return value should be a double-quoted string

626 representation of the object, unless obj is a string that can be an

627 identifier (and quote_keys is false and obj isn't a reserved word).

628 If the object should not be used as a key, `TypeError` should be

629 raised; that allows the base implementation to implement `skipkeys`

630 properly.

631 """

632 seen = seen or set()

633 s = self._encode_basic_type(obj, as_key=as_key)

634 if s is not None:

635 return s

636

637 if as_key:

638 raise TypeError(f'Invalid key f{obj}')

639 return self._encode_non_basic_type(obj, seen, level)

640

641 def _encode_basic_type(self, obj: Any, *, as_key: bool) -> Optional[str]:

642 """Returns None if the object is not a basic type."""

643

644 if isinstance(obj, str):

645 return self._encode_str(obj, as_key=as_key)

646

647 # Check for True/False before ints because True and False are

648 # also considered ints and so would be represented as 1 and 0

649 # if we did ints first.

650 if obj is True:

651 return '"true"' if as_key else 'true'

652 if obj is False:

653 return '"false"' if as_key else 'false'

654 if obj is None:

655 return '"null"' if as_key else 'null'

656

657 if isinstance(obj, int):

658 return self._encode_int(obj, as_key=as_key)

659

660 if isinstance(obj, float):

661 return self._encode_float(obj, as_key=as_key)

662

663 return None

664

665 def _encode_int(self, obj: int, *, as_key: bool) -> str:

666 s = int.__repr__(obj)

667 return f'"{s}"' if as_key else s

668

669 def _encode_float(self, obj: float, *, as_key: bool) -> str:

670 if obj == float('inf'):

671 allowed = self.allow_nan

672 s = 'Infinity'

673 elif obj == float('-inf'):

674 allowed = self.allow_nan

675 s = '-Infinity'

676 elif math.isnan(obj):

677 allowed = self.allow_nan

678 s = 'NaN'

679 else:

680 allowed = True

681 s = float.__repr__(obj)

682

683 if not allowed:

684 raise ValueError(f'Illegal JSON5 value: {obj}')

685 return f'"{s}"' if as_key else s

686

687 def _encode_str(self, obj: str, *, as_key: bool) -> str:

688 if (

689 as_key

690 and self.is_identifier(obj)

691 and not self.quote_keys

692 and not self.is_reserved_word(obj)

693 ):

694 return obj

695

696 return self._encode_quoted_str(obj, self.quote_style)

697

698 def _encode_quoted_str(self, obj: str, quote_style: QuoteStyle) -> str:

699 """Returns a quoted string with a minimal number of escaped quotes."""

700 ret = []

701 double_quotes_seen = 0

702 single_quotes_seen = 0

703 sq = "'"

704 dq = '"'

705 for ch in obj:

706 if ch == dq:

707 # At first we will guess at which quotes to escape. If

708 # we guess wrong, we reencode the string below.

709 double_quotes_seen += 1

710 if quote_style in (

711 QuoteStyle.ALWAYS_DOUBLE,

712 QuoteStyle.PREFER_DOUBLE,

713 ):

714 encoded_ch = self._escape_ch(dq)

715 else:

716 encoded_ch = dq

717 elif ch == sq:

718 single_quotes_seen += 1

719 if quote_style in (

720 QuoteStyle.ALWAYS_SINGLE,

721 QuoteStyle.PREFER_SINGLE,

722 ):

723 encoded_ch = self._escape_ch(sq)

724 else:

725 encoded_ch = sq

726 elif ch == '\\':

727 encoded_ch = self._escape_ch(ch)

728 else:

729 o = ord(ch)

730 if o < 32:

731 encoded_ch = self._escape_ch(ch)

732 elif o < 128:

733 encoded_ch = ch

734 elif not self.ensure_ascii and ch not in ('\u2028', '\u2029'):

735 encoded_ch = ch

736 else:

737 encoded_ch = self._escape_ch(ch)

738 ret.append(encoded_ch)

739

740 # We may have guessed wrong and need to reencode the string.

741 if (

742 double_quotes_seen > single_quotes_seen

743 and quote_style == QuoteStyle.PREFER_DOUBLE

744 ):

745 return self._encode_quoted_str(obj, QuoteStyle.ALWAYS_SINGLE)

746 if (

747 single_quotes_seen > double_quotes_seen

748 and quote_style == QuoteStyle.PREFER_SINGLE

749 ):

750 return self._encode_quoted_str(obj, QuoteStyle.ALWAYS_DOUBLE)

751

752 if quote_style in (QuoteStyle.ALWAYS_DOUBLE, QuoteStyle.PREFER_DOUBLE):

753 return '"' + ''.join(ret) + '"'

754 return "'" + ''.join(ret) + "'"

755

756 def _escape_ch(self, ch: str) -> str:

757 """Returns the backslash-escaped representation of the char."""

758 if ch == '\\':

759 return '\\\\'

760 if ch == "'":

761 return r'\''

762 if ch == '"':

763 return r'\"'

764 if ch == '\n':

765 return r'\n'

766 if ch == '\r':

767 return r'\r'

768 if ch == '\t':

769 return r'\t'

770 if ch == '\b':

771 return r'\b'

772 if ch == '\f':

773 return r'\f'

774 if ch == '\v':

775 return r'\v'

776 if ch == '\0':

777 return r'\0'

778

779 o = ord(ch)

780 if o < 65536:

781 return rf'\u{o:04x}'

782

783 val = o - 0x10000

784 high = 0xD800 + (val >> 10)

785 low = 0xDC00 + (val & 0x3FF)

786 return rf'\u{high:04x}\u{low:04x}'

787

788 def _encode_non_basic_type(self, obj, seen: Set, level: int) -> str:

789 # Basic types can't be recursive so we only check for circularity

790 # on non-basic types. If for some reason the caller was using a

791 # subclass of a basic type and wanted to check circularity on it,

792 # it'd have to do so directly in a subclass of JSON5Encoder.

793 if self.check_circular:

794 i = id(obj)

795 if i in seen:

796 raise ValueError('Circular reference detected.')

797 seen.add(i)

798

799 # Ideally we'd use collections.abc.Mapping and collections.abc.Sequence

800 # here, but for backwards-compatibility with potential old callers,

801 # we only check for the two attributes we need in each case.

802 if hasattr(obj, 'keys') and hasattr(obj, '__getitem__'):

803 s = self._encode_dict(obj, seen, level + 1)

804 elif hasattr(obj, '__getitem__') and hasattr(obj, '__iter__'):

805 s = self._encode_array(obj, seen, level + 1)

806 else:

807 s = self.encode(self.default(obj), seen, level, as_key=False)

808 assert s is not None

809

810 if self.check_circular:

811 seen.remove(i)

812 return s

813

814 def _encode_dict(self, obj: Any, seen: set, level: int) -> str:

815 if not obj:

816 return '{}'

817

818 indent_str, end_str = self._spacers(level)

819 item_sep = self.item_separator + indent_str

820 kv_sep = self.kv_separator

821

822 if self.sort_keys:

823 keys = sorted(obj.keys())

824 else:

825 keys = obj.keys()

826

827 s = '{' + indent_str

828

829 first_key = True

830 new_keys = set()

831 for key in keys:

832 try:

833 key_str = self.encode(key, seen, level, as_key=True)

834 except TypeError:

835 if self.skipkeys:

836 continue

837 raise

838

839 if not self.allow_duplicate_keys:

840 if key_str in new_keys:

841 raise ValueError(f'duplicate key {repr(key)}')

842 new_keys.add(key_str)

843

844 if first_key:

845 first_key = False

846 else:

847 s += item_sep

848

849 val_str = self.encode(obj[key], seen, level, as_key=False)

850 s += key_str + kv_sep + val_str

851

852 s += end_str + '}'

853 return s

854

855 def _encode_array(self, obj: Any, seen: Set, level: int) -> str:

856 if not obj:

857 return '[]'

858

859 indent_str, end_str = self._spacers(level)

860 item_sep = self.item_separator + indent_str

861 return (

862 '['

863 + indent_str

864 + item_sep.join(

865 self.encode(el, seen, level, as_key=False) for el in obj

866 )

867 + end_str

868 + ']'

869 )

870

871 def _spacers(self, level: int) -> Tuple[str, str]:

872 if self.indent is not None:

873 end_str = ''

874 if self.trailing_commas:

875 end_str = ','

876 if isinstance(self.indent, int):

877 if self.indent > 0:

878 indent_str = '\n' + ' ' * self.indent * level

879 end_str += '\n' + ' ' * self.indent * (level - 1)

880 else:

881 indent_str = '\n'

882 end_str += '\n'

883 else:

884 indent_str = '\n' + self.indent * level

885 end_str += '\n' + self.indent * (level - 1)

886 else:

887 indent_str = ''

888 end_str = ''

889 return indent_str, end_str

890

891 def is_identifier(self, key: str) -> bool:

892 """Returns whether the string could be used as a legal

893 EcmaScript/JavaScript identifier.

894

895 There should normally be no reason to override this, unless

896 the definition of identifiers change in later versions of the

897 JSON5 spec and this implementation hasn't been updated to handle

898 the changes yet."""

899 if (

900 not key

901 or not self._is_id_start(key[0])

902 and key[0] not in ('$', '_')

903 ):

904 return False

905 for ch in key[1:]:

906 if not self._is_id_continue(ch) and ch not in ('$', '_'):

907 return False

908 return True

909

910 def _is_id_start(self, ch: str) -> bool:

911 return unicodedata.category(ch) in (

912 'Lu',

913 'Ll',

914 'Li',

915 'Lt',

916 'Lm',

917 'Lo',

918 'Nl',

919 )

920

921 def _is_id_continue(self, ch: str) -> bool:

922 return unicodedata.category(ch) in (

923 'Lu',

924 'Ll',

925 'Li',

926 'Lt',

927 'Lm',

928 'Lo',

929 'Nl',

930 'Nd',

931 'Mn',

932 'Mc',

933 'Pc',

934 )

935

936 def is_reserved_word(self, key: str) -> bool:

937 """Returns whether the key is a reserved word.

938

939 There should normally be no need to override this, unless there

940 have been reserved words added in later versions of the JSON5

941 spec and this implementation has not yet been updated to handle

942 the changes yet."""

943 global _reserved_word_re

944 if _reserved_word_re is None:

945 # List taken from section 7.6.1 of ECMA-262, version 5.1.

946 # https://262.ecma-international.org/5.1/#sec-7.6.1.

947 # This includes currently reserved words, words reserved

948 # for future use (both as of 5.1), null, true, and false.

949 _reserved_word_re = re.compile(

950 '('

951 + '|'.join(

952 [

953 'break',

954 'case',

955 'catch',

956 'class',

957 'const',

958 'continue',

959 'debugger',

960 'default',

961 'delete',

962 'do',

963 'else',

964 'enum',

965 'export',

966 'extends',

967 'false',

968 'finally',

969 'for',

970 'function',

971 'if',

972 'implements',

973 'import',

974 'in',

975 'instanceof',

976 'interface',

977 'let',

978 'new',

979 'null',

980 'package',

981 'private',

982 'protected',

983 'public',

984 'return',

985 'static',

986 'super',

987 'switch',

988 'this',

989 'throw',

990 'true',

991 'try',

992 'typeof',

993 'var',

994 'void',

995 'while',

996 'with',

997 'yield',

998 ]

999 )

1000 + ')$'

1001 )

1002 return _reserved_word_re.match(key) is not None

1003

1004

1005def _raise_type_error(obj) -> Any:

1006 raise TypeError(f'{repr(obj)} is not JSON5 serializable')