Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/json5/lib.py: 31%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Copyright 2015 Google Inc. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
15import enum
16import math
17import re
18from typing import (
19 Any,
20 Callable,
21 IO,
22 Iterable,
23 Mapping,
24 Optional,
25 Set,
26 Tuple,
27 Type,
28 Union,
29)
30import unicodedata
32from json5.parser import Parser
35# Used when encoding keys, below.
36_reserved_word_re: Optional[re.Pattern] = None
39class QuoteStyle(enum.Enum):
40 """Controls how strings will be quoted during encoding.
42 By default, for compatibility with the `json` module and older versions of
43 `json5`, strings (not being used as keys and that are legal identifiers)
44 will always be double-quoted, and any double quotes in the string will be
45 escaped. This is `QuoteStyle.ALWAYS_DOUBLE`. If you pass
46 `QuoteStyle.ALWAYS_SINGLE`, then strings will always be single-quoted, and
47 any single quotes in the string will be escaped. If you pass
48 `QuoteStyle.PREFER_DOUBLE`, then the behavior is the same as ALWAYS_DOUBLE
49 and strings will be double-quoted *unless* the string contains more double
50 quotes than single quotes, in which case the string will be single-quoted
51 and single quotes will be escaped. If you pass `QuoteStyle.PREFER_SINGLE`,
52 then the behavior is the same as ALWAYS_SINGLE and strings will be
53 single-quoted *unless* the string contains more single quotes than double
54 quotes, in which case the string will be double-quoted and any double
55 quotes will be escaped.
57 *Note:* PREFER_DOUBLE and PREFER_SINGLE can impact performance, since in
58 order to know which encoding to use you have to iterate over the entire
59 string to count the number of single and double quotes. The codes guesses
60 at an encoding while doing so, but if it guess wrong, the entire string has
61 to be re-encoded, which will slow things down. If you are very concerned
62 about performance (a) you probably shouldn't be using this library in the
63 first place, because it just isn't very fast, and (b) you should use
64 ALWAYS_DOUBLE or ALWAYS_SINGLE, which won't have this issue.
65 """
67 ALWAYS_DOUBLE = 'always_double'
68 ALWAYS_SINGLE = 'always_single'
69 PREFER_DOUBLE = 'prefer_double'
70 PREFER_SINGLE = 'prefer_single'
73def load(
74 fp: IO,
75 *,
76 encoding: Optional[str] = None,
77 cls: Any = None,
78 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,
79 parse_float: Optional[Callable[[str], Any]] = None,
80 parse_int: Optional[Callable[[str], Any]] = None,
81 parse_constant: Optional[Callable[[str], Any]] = None,
82 strict: bool = True,
83 object_pairs_hook: Optional[
84 Callable[[Iterable[Tuple[str, Any]]], Any]
85 ] = None,
86 allow_duplicate_keys: bool = True,
87 consume_trailing: bool = True,
88 start: Optional[int] = None,
89) -> Any:
90 """Deserialize ``fp`` (a ``.read()``-supporting file-like object
91 containing a JSON document) to a Python object.
93 Supports almost the same arguments as ``json.load()`` except that:
94 - the `cls` keyword is ignored.
95 - an extra `allow_duplicate_keys` parameter supports checking for
96 duplicate keys in a object; by default, this is True for
97 compatibility with ``json.load()``, but if set to False and
98 the object contains duplicate keys, a ValueError will be raised.
99 - an extra `consume_trailing` parameter specifies whether to
100 consume any trailing characters after a valid object has been
101 parsed. By default, this value is True and the only legal
102 trailing characters are whitespace. If this value is set to False,
103 parsing will stop when a valid object has been parsed and any
104 trailing characters in the string will be ignored.
105 - an extra `start` parameter specifies the zero-based offset into the
106 file to start parsing at. If `start` is None, parsing will
107 start at the current position in the file, and line number
108 and column values will be reported as if starting from the
109 beginning of the file; If `start` is not None,
110 `load` will seek to zero and then read (and discard) the
111 appropriate number of characters before beginning parsing;
112 the file must be seekable for this to work correctly.
114 You can use `load(..., consume_trailing=False)` to repeatedly read
115 values from a file. However, in the current implementation `load` does
116 this by reading the entire file into memory before doing anything, so
117 it is not very efficient.
119 Raises
120 - `ValueError` if given an invalid document. This is different
121 from the `json` module, which raises `json.JSONDecodeError`.
122 - `UnicodeDecodeError` if given a byte string that is not a
123 legal UTF-8 document (or the equivalent, if using a different
124 `encoding`). This matches the `json` module.
125 """
127 s = fp.read()
128 val, err, _ = parse(
129 s,
130 encoding=encoding,
131 cls=cls,
132 object_hook=object_hook,
133 parse_float=parse_float,
134 parse_int=parse_int,
135 parse_constant=parse_constant,
136 strict=strict,
137 object_pairs_hook=object_pairs_hook,
138 allow_duplicate_keys=allow_duplicate_keys,
139 consume_trailing=consume_trailing,
140 start=start,
141 )
142 if err:
143 raise ValueError(err)
144 return val
147def loads(
148 s: str,
149 *,
150 encoding: Optional[str] = None,
151 cls: Any = None,
152 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,
153 parse_float: Optional[Callable[[str], Any]] = None,
154 parse_int: Optional[Callable[[str], Any]] = None,
155 parse_constant: Optional[Callable[[str], Any]] = None,
156 strict: bool = True,
157 object_pairs_hook: Optional[
158 Callable[[Iterable[Tuple[str, Any]]], Any]
159 ] = None,
160 allow_duplicate_keys: bool = True,
161 consume_trailing: bool = True,
162 start: Optional[int] = None,
163) -> Any:
164 """Deserialize ``s`` (a string containing a JSON5 document) to a Python
165 object.
167 Supports the same arguments as ``json.loads()`` except that:
168 - the `cls` keyword is ignored.
169 - an extra `allow_duplicate_keys` parameter supports checking for
170 duplicate keys in a object; by default, this is True for
171 compatibility with ``json.load()``, but if set to False and
172 the object contains duplicate keys, a ValueError will be raised.
173 - an extra `consume_trailing` parameter specifies whether to
174 consume any trailing characters after a valid object has been
175 parsed. By default, this value is True and the only legal
176 trailing characters are whitespace. If this value is set to False,
177 parsing will stop when a valid object has been parsed and any
178 trailing characters in the string will be ignored.
179 - an extra `start` parameter specifies the zero-based offset into the
180 string to start parsing at.
182 Raises
183 - `ValueError` if given an invalid document. This is different
184 from the `json` module, which raises `json.JSONDecodeError`.
185 - `UnicodeDecodeError` if given a byte string that is not a
186 legal UTF-8 document (or the equivalent, if using a different
187 `encoding`). This matches the `json` module.
188 """
190 val, err, _ = parse(
191 s=s,
192 encoding=encoding,
193 cls=cls,
194 object_hook=object_hook,
195 parse_float=parse_float,
196 parse_int=parse_int,
197 parse_constant=parse_constant,
198 strict=strict,
199 object_pairs_hook=object_pairs_hook,
200 allow_duplicate_keys=allow_duplicate_keys,
201 consume_trailing=consume_trailing,
202 start=start,
203 )
204 if err:
205 raise ValueError(err)
206 return val
209def parse(
210 s: str,
211 *,
212 encoding: Optional[str] = None,
213 cls: Any = None,
214 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,
215 parse_float: Optional[Callable[[str], Any]] = None,
216 parse_int: Optional[Callable[[str], Any]] = None,
217 parse_constant: Optional[Callable[[str], Any]] = None,
218 strict: bool = True,
219 object_pairs_hook: Optional[
220 Callable[[Iterable[Tuple[str, Any]]], Any]
221 ] = None,
222 allow_duplicate_keys: bool = True,
223 consume_trailing: bool = True,
224 start: Optional[int] = None,
225) -> Union[Tuple[Any, None, int], Tuple[None, str, int]]:
226 """Parse ```s``, returning positional information along with a value.
228 This works exactly like `loads()`, except that (a) it returns the
229 position in the string where the parsing stopped (either due to
230 hitting an error or parsing a valid value) and any error as a string,
231 (b) it takes an optional `consume_trailing` parameter that says whether
232 to keep parsing the string after a valid value has been parsed; if True
233 (the default), any trailing characters must be whitespace. If False,
234 parsing stops when a valid value has been reached, (c) it takes an
235 optional `start` parameter that specifies a zero-based offset to start
236 parsing from in the string, and (d) the return value is different, as
237 described below.
239 `parse()` is useful if you have a string that might contain multiple
240 values and you need to extract all of them; you can do so by repeatedly
241 calling `parse`, setting `start` to the value returned in `position`
242 from the previous call.
244 Returns a tuple of (value, error_string, position). If the string
245 was a legal value, `value` will be the deserialized value,
246 `error_string` will be `None`, and `position` will be one
247 past the zero-based offset where the parser stopped reading.
248 If the string was not a legal value,
249 `value` will be `None`, `error_string` will be the string value
250 of the exception that would've been raised, and `position` will
251 be the zero-based farthest offset into the string where the parser
252 hit an error.
254 Raises:
255 - `UnicodeDecodeError` if given a byte string that is not a
256 legal UTF-8 document (or the equivalent, if using a different
257 `encoding`). This matches the `json` module.
259 Note that this does *not* raise a `ValueError`; instead any error is
260 returned as the second value in the tuple.
262 You can use this method to read in a series of values from a string
263 `s` as follows:
265 >>> import json5
266 >>> s = '1 2 3 4'
267 >>> values = []
268 >>> start = 0
269 >>> while True:
270 ... v, err, pos = json5.parse(s, start=start, consume_trailing=False)
271 ... if v:
272 ... values.append(v)
273 ... start = pos
274 ... if start == len(s) or s[start:].isspace():
275 ... # Reached the end of the string (ignoring trailing
276 ... # whitespace
277 ... break
278 ... continue
279 ... raise ValueError(err)
280 >>> values
281 [1, 2, 3, 4]
283 """
284 assert cls is None, 'Custom decoders are not supported'
286 if isinstance(s, bytes):
287 encoding = encoding or 'utf-8'
288 s = s.decode(encoding)
290 if not s:
291 raise ValueError('Empty strings are not legal JSON5')
292 start = start or 0
293 parser = Parser(s, '<string>', pos=start)
294 ast, err, pos = parser.parse(
295 global_vars={'_strict': strict, '_consume_trailing': consume_trailing}
296 )
297 if err:
298 return None, err, pos
300 try:
301 value = _convert(
302 ast,
303 object_hook=object_hook,
304 parse_float=parse_float,
305 parse_int=parse_int,
306 parse_constant=parse_constant,
307 object_pairs_hook=object_pairs_hook,
308 allow_duplicate_keys=allow_duplicate_keys,
309 )
310 return value, None, pos
311 except ValueError as e:
312 return None, str(e), pos
315def _convert(
316 ast,
317 object_hook,
318 parse_float,
319 parse_int,
320 parse_constant,
321 object_pairs_hook,
322 allow_duplicate_keys,
323):
324 def _fp_constant_parser(s):
325 return float(s.replace('Infinity', 'inf').replace('NaN', 'nan'))
327 def _dictify(pairs):
328 if not allow_duplicate_keys:
329 keys = set()
330 for key, _ in pairs:
331 if key in keys:
332 raise ValueError(f'Duplicate key "{key}" found in object')
333 keys.add(key)
335 if object_pairs_hook:
336 return object_pairs_hook(pairs)
337 if object_hook:
338 return object_hook(dict(pairs))
339 return dict(pairs)
341 parse_float = parse_float or float
342 parse_int = parse_int or int
343 parse_constant = parse_constant or _fp_constant_parser
345 return _walk_ast(ast, _dictify, parse_float, parse_int, parse_constant)
348def _walk_ast(
349 el,
350 dictify: Callable[[Iterable[Tuple[str, Any]]], Any],
351 parse_float,
352 parse_int,
353 parse_constant,
354):
355 if el == 'None':
356 return None
357 if el == 'True':
358 return True
359 if el == 'False':
360 return False
361 ty, v = el
362 if ty == 'number':
363 unsigned = v[1:] if v.startswith('-') else v
364 if unsigned.startswith('0x') or unsigned.startswith('0X'):
365 return parse_int(v, base=16)
366 if '.' in v or 'e' in v or 'E' in v:
367 return parse_float(v)
368 if 'Infinity' in v or 'NaN' in v:
369 return parse_constant(v)
370 return parse_int(v)
371 if ty == 'string':
372 return v
373 if ty == 'object':
374 pairs = []
375 for key, val_expr in v:
376 val = _walk_ast(
377 val_expr, dictify, parse_float, parse_int, parse_constant
378 )
379 pairs.append((key, val))
380 return dictify(pairs)
381 if ty == 'array':
382 return [
383 _walk_ast(el, dictify, parse_float, parse_int, parse_constant)
384 for el in v
385 ]
386 raise ValueError('unknown el: ' + el) # pragma: no cover
389def dump(
390 obj: Any,
391 fp: IO,
392 *,
393 skipkeys: bool = False,
394 ensure_ascii: bool = True,
395 check_circular: bool = True,
396 allow_nan: bool = True,
397 cls: Optional[Type['JSON5Encoder']] = None,
398 indent: Optional[Union[int, str]] = None,
399 separators: Optional[Tuple[str, str]] = None,
400 default: Optional[Callable[[Any], Any]] = None,
401 sort_keys: bool = False,
402 quote_keys: bool = False,
403 trailing_commas: bool = True,
404 allow_duplicate_keys: bool = True,
405 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,
406 **kw,
407):
408 """Serialize ``obj`` to a JSON5-formatted stream to ``fp``,
409 a ``.write()``-supporting file-like object.
411 Supports the same arguments as ``dumps()``, below.
413 Calling ``dump(obj, fp, quote_keys=True, trailing_commas=False, \
414 allow_duplicate_keys=True)``
415 should produce exactly the same output as ``json.dump(obj, fp).``
416 """
418 fp.write(
419 dumps(
420 obj=obj,
421 skipkeys=skipkeys,
422 ensure_ascii=ensure_ascii,
423 check_circular=check_circular,
424 allow_nan=allow_nan,
425 cls=cls,
426 indent=indent,
427 separators=separators,
428 default=default,
429 sort_keys=sort_keys,
430 quote_keys=quote_keys,
431 trailing_commas=trailing_commas,
432 allow_duplicate_keys=allow_duplicate_keys,
433 quote_style=quote_style,
434 **kw,
435 )
436 )
439def dumps(
440 obj: Any,
441 *,
442 skipkeys: bool = False,
443 ensure_ascii: bool = True,
444 check_circular: bool = True,
445 allow_nan: bool = True,
446 cls: Optional[Type['JSON5Encoder']] = None,
447 indent: Optional[Union[int, str]] = None,
448 separators: Optional[Tuple[str, str]] = None,
449 default: Optional[Callable[[Any], Any]] = None,
450 sort_keys: bool = False,
451 quote_keys: bool = False,
452 trailing_commas: bool = True,
453 allow_duplicate_keys: bool = True,
454 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,
455 **kw: Any,
456):
457 """Serialize ``obj`` to a JSON5-formatted string.
459 Supports the same arguments as ``json.dumps()``, except that:
461 - The ``encoding`` keyword is ignored; Unicode strings are always written.
462 - By default, object keys that are legal identifiers are not quoted; if you
463 pass ``quote_keys=True``, they will be.
464 - By default, if lists and objects span multiple lines of output (i.e.,
465 when ``indent`` >=0), the last item will have a trailing comma after it.
466 If you pass ``trailing_commas=False``, it will not.
467 - If you use a number, a boolean, or ``None`` as a key value in a dict, it
468 will be converted to the corresponding JSON string value, e.g. "1",
469 "true", or "null". By default, ``dump()`` will match the `json` modules
470 behavior and produce malformed JSON if you mix keys of different types
471 that have the same converted value; e.g., ``{1: "foo", "1": "bar"}``
472 produces '{"1": "foo", "1": "bar"}', an object with duplicated keys. If
473 you pass ``allow_duplicate_keys=False``, an exception will be raised
474 instead.
475 - If `quote_keys` is true, then keys of objects will be enclosed in quotes,
476 as in regular JSON. Otheriwse, keys will not be enclosed in quotes unless
477 they contain whitespace.
478 - If `trailing_commas` is false, then commas will not be inserted after the
479 final elements of objects and arrays, as in regular JSON. Otherwise,
480 such commas will be inserted.
481 - If `allow_duplicate_keys` is false, then only the last entry with a given
482 key will be written. Otherwise, all entries with the same key will be
483 written.
484 - `quote_style` controls how strings are encoded. See the documentation
485 for the `QuoteStyle` class, above, for how this is used.
487 *Note*: Strings that are being used as unquoted keys are not affected
488 by this parameter and remain unquoted.
490 *`quote_style` was added in version 0.10.0*.
492 Other keyword arguments are allowed and will be passed to the
493 encoder so custom encoders can get them, but otherwise they will
494 be ignored in an attempt to provide some amount of forward-compatibility.
496 *Note:* the standard JSON module explicitly calls `int.__repr(obj)__`
497 and `float.__repr(obj)__` to encode ints and floats, thereby bypassing
498 any custom representations you might have for objects that are subclasses
499 of ints and floats, and, for compatibility, JSON5 does the same thing.
500 To override this behavior, create a subclass of JSON5Encoder
501 that overrides `encode()` and handles your custom representation.
503 For example:
505 ```
506 >>> import json5
507 >>> from typing import Any, Set
508 >>>
509 >>> class Hex(int):
510 ... def __repr__(self):
511 ... return hex(self)
512 >>>
513 >>> class CustomEncoder(json5.JSON5Encoder):
514 ... def encode(
515 ... self, obj: Any, seen: Set, level: int, *, as_key: bool
516 ... ) -> str:
517 ... if isinstance(obj, Hex):
518 ... return repr(obj)
519 ... return super().encode(obj, seen, level, as_key=as_key)
520 ...
521 >>> json5.dumps([20, Hex(20)], cls=CustomEncoder)
522 '[20, 0x14]'
524 ```
526 *Note:* calling ``dumps(obj, quote_keys=True, trailing_commas=False, \
527 allow_duplicate_keys=True)``
528 should produce exactly the same output as ``json.dumps(obj).``
529 """
531 cls = cls or JSON5Encoder
532 enc = cls(
533 skipkeys=skipkeys,
534 ensure_ascii=ensure_ascii,
535 check_circular=check_circular,
536 allow_nan=allow_nan,
537 indent=indent,
538 separators=separators,
539 default=default,
540 sort_keys=sort_keys,
541 quote_keys=quote_keys,
542 trailing_commas=trailing_commas,
543 allow_duplicate_keys=allow_duplicate_keys,
544 quote_style=quote_style,
545 **kw,
546 )
547 return enc.encode(obj, seen=set(), level=0, as_key=False)
550class JSON5Encoder:
551 def __init__(
552 self,
553 *,
554 skipkeys: bool = False,
555 ensure_ascii: bool = True,
556 check_circular: bool = True,
557 allow_nan: bool = True,
558 indent: Optional[Union[int, str]] = None,
559 separators: Optional[Tuple[str, str]] = None,
560 default: Optional[Callable[[Any], Any]] = None,
561 sort_keys: bool = False,
562 quote_keys: bool = False,
563 trailing_commas: bool = True,
564 allow_duplicate_keys: bool = True,
565 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,
566 **kw,
567 ):
568 """Provides a class that may be overridden to customize the behavior
569 of `dumps()`. The keyword args are the same as for that function.
570 *Added in version 0.10.0"""
571 # Ignore unrecognized keyword arguments in the hope of providing
572 # some level of backwards- and forwards-compatibility.
573 del kw
575 self.skipkeys = skipkeys
576 self.ensure_ascii = ensure_ascii
577 self.check_circular = check_circular
578 self.allow_nan = allow_nan
579 self.indent = indent
580 self.separators = separators
581 if separators is None:
582 separators = (', ', ': ') if indent is None else (',', ': ')
583 self.item_separator, self.kv_separator = separators
584 self.default_fn = default or _raise_type_error
585 self.sort_keys = sort_keys
586 self.quote_keys = quote_keys
587 self.trailing_commas = trailing_commas
588 self.allow_duplicate_keys = allow_duplicate_keys
589 self.quote_style = quote_style
591 def default(self, obj: Any) -> Any:
592 """Provides a last-ditch option to encode a value that the encoder
593 doesn't otherwise recognize, by converting `obj` to a value that
594 *can* (and will) be serialized by the other methods in the class.
596 Note: this must not return a serialized value (i.e., string)
597 directly, as that'll result in a doubly-encoded value."""
598 return self.default_fn(obj)
600 def encode(
601 self,
602 obj: Any,
603 seen: Set,
604 level: int,
605 *,
606 as_key: bool,
607 ) -> str:
608 """Returns an JSON5-encoded version of an arbitrary object. This can
609 be used to provide customized serialization of objects. Overridden
610 methods of this class should handle their custom objects and then
611 fall back to super.encode() if they've been passed a normal object.
613 `seen` is used for duplicate object tracking when `check_circular`
614 is True.
616 `level` represents the current indentation level, which increases
617 by one for each recursive invocation of encode (i.e., whenever
618 we're encoding the values of a dict or a list).
620 May raise `TypeError` if the object is the wrong type to be
621 encoded (i.e., your custom routine can't handle it either), and
622 `ValueError` if there's something wrong with the value, e.g.
623 a float value of NaN when `allow_nan` is false.
625 If `as_key` is true, the return value should be a double-quoted string
626 representation of the object, unless obj is a string that can be an
627 identifier (and quote_keys is false and obj isn't a reserved word).
628 If the object should not be used as a key, `TypeError` should be
629 raised; that allows the base implementation to implement `skipkeys`
630 properly.
631 """
632 seen = seen or set()
633 s = self._encode_basic_type(obj, as_key=as_key)
634 if s is not None:
635 return s
637 if as_key:
638 raise TypeError(f'Invalid key f{obj}')
639 return self._encode_non_basic_type(obj, seen, level)
641 def _encode_basic_type(self, obj: Any, *, as_key: bool) -> Optional[str]:
642 """Returns None if the object is not a basic type."""
644 if isinstance(obj, str):
645 return self._encode_str(obj, as_key=as_key)
647 # Check for True/False before ints because True and False are
648 # also considered ints and so would be represented as 1 and 0
649 # if we did ints first.
650 if obj is True:
651 return '"true"' if as_key else 'true'
652 if obj is False:
653 return '"false"' if as_key else 'false'
654 if obj is None:
655 return '"null"' if as_key else 'null'
657 if isinstance(obj, int):
658 return self._encode_int(obj, as_key=as_key)
660 if isinstance(obj, float):
661 return self._encode_float(obj, as_key=as_key)
663 return None
665 def _encode_int(self, obj: int, *, as_key: bool) -> str:
666 s = int.__repr__(obj)
667 return f'"{s}"' if as_key else s
669 def _encode_float(self, obj: float, *, as_key: bool) -> str:
670 if obj == float('inf'):
671 allowed = self.allow_nan
672 s = 'Infinity'
673 elif obj == float('-inf'):
674 allowed = self.allow_nan
675 s = '-Infinity'
676 elif math.isnan(obj):
677 allowed = self.allow_nan
678 s = 'NaN'
679 else:
680 allowed = True
681 s = float.__repr__(obj)
683 if not allowed:
684 raise ValueError(f'Illegal JSON5 value: {obj}')
685 return f'"{s}"' if as_key else s
687 def _encode_str(self, obj: str, *, as_key: bool) -> str:
688 if (
689 as_key
690 and self.is_identifier(obj)
691 and not self.quote_keys
692 and not self.is_reserved_word(obj)
693 ):
694 return obj
696 return self._encode_quoted_str(obj, self.quote_style)
698 def _encode_quoted_str(self, obj: str, quote_style: QuoteStyle) -> str:
699 """Returns a quoted string with a minimal number of escaped quotes."""
700 ret = []
701 double_quotes_seen = 0
702 single_quotes_seen = 0
703 sq = "'"
704 dq = '"'
705 for ch in obj:
706 if ch == dq:
707 # At first we will guess at which quotes to escape. If
708 # we guess wrong, we reencode the string below.
709 double_quotes_seen += 1
710 if quote_style in (
711 QuoteStyle.ALWAYS_DOUBLE,
712 QuoteStyle.PREFER_DOUBLE,
713 ):
714 encoded_ch = self._escape_ch(dq)
715 else:
716 encoded_ch = dq
717 elif ch == sq:
718 single_quotes_seen += 1
719 if quote_style in (
720 QuoteStyle.ALWAYS_SINGLE,
721 QuoteStyle.PREFER_SINGLE,
722 ):
723 encoded_ch = self._escape_ch(sq)
724 else:
725 encoded_ch = sq
726 elif ch == '\\':
727 encoded_ch = self._escape_ch(ch)
728 else:
729 o = ord(ch)
730 if o < 32:
731 encoded_ch = self._escape_ch(ch)
732 elif o < 128:
733 encoded_ch = ch
734 elif not self.ensure_ascii and ch not in ('\u2028', '\u2029'):
735 encoded_ch = ch
736 else:
737 encoded_ch = self._escape_ch(ch)
738 ret.append(encoded_ch)
740 # We may have guessed wrong and need to reencode the string.
741 if (
742 double_quotes_seen > single_quotes_seen
743 and quote_style == QuoteStyle.PREFER_DOUBLE
744 ):
745 return self._encode_quoted_str(obj, QuoteStyle.ALWAYS_SINGLE)
746 if (
747 single_quotes_seen > double_quotes_seen
748 and quote_style == QuoteStyle.PREFER_SINGLE
749 ):
750 return self._encode_quoted_str(obj, QuoteStyle.ALWAYS_DOUBLE)
752 if quote_style in (QuoteStyle.ALWAYS_DOUBLE, QuoteStyle.PREFER_DOUBLE):
753 return '"' + ''.join(ret) + '"'
754 return "'" + ''.join(ret) + "'"
756 def _escape_ch(self, ch: str) -> str:
757 """Returns the backslash-escaped representation of the char."""
758 if ch == '\\':
759 return '\\\\'
760 if ch == "'":
761 return r'\''
762 if ch == '"':
763 return r'\"'
764 if ch == '\n':
765 return r'\n'
766 if ch == '\r':
767 return r'\r'
768 if ch == '\t':
769 return r'\t'
770 if ch == '\b':
771 return r'\b'
772 if ch == '\f':
773 return r'\f'
774 if ch == '\v':
775 return r'\v'
776 if ch == '\0':
777 return r'\0'
779 o = ord(ch)
780 if o < 65536:
781 return rf'\u{o:04x}'
783 val = o - 0x10000
784 high = 0xD800 + (val >> 10)
785 low = 0xDC00 + (val & 0x3FF)
786 return rf'\u{high:04x}\u{low:04x}'
788 def _encode_non_basic_type(self, obj, seen: Set, level: int) -> str:
789 # Basic types can't be recursive so we only check for circularity
790 # on non-basic types. If for some reason the caller was using a
791 # subclass of a basic type and wanted to check circularity on it,
792 # it'd have to do so directly in a subclass of JSON5Encoder.
793 if self.check_circular:
794 i = id(obj)
795 if i in seen:
796 raise ValueError('Circular reference detected.')
797 seen.add(i)
799 # Ideally we'd use collections.abc.Mapping and collections.abc.Sequence
800 # here, but for backwards-compatibility with potential old callers,
801 # we only check for the two attributes we need in each case.
802 if hasattr(obj, 'keys') and hasattr(obj, '__getitem__'):
803 s = self._encode_dict(obj, seen, level + 1)
804 elif hasattr(obj, '__getitem__') and hasattr(obj, '__iter__'):
805 s = self._encode_array(obj, seen, level + 1)
806 else:
807 s = self.encode(self.default(obj), seen, level, as_key=False)
808 assert s is not None
810 if self.check_circular:
811 seen.remove(i)
812 return s
814 def _encode_dict(self, obj: Any, seen: set, level: int) -> str:
815 if not obj:
816 return '{}'
818 indent_str, end_str = self._spacers(level)
819 item_sep = self.item_separator + indent_str
820 kv_sep = self.kv_separator
822 if self.sort_keys:
823 keys = sorted(obj.keys())
824 else:
825 keys = obj.keys()
827 s = '{' + indent_str
829 first_key = True
830 new_keys = set()
831 for key in keys:
832 try:
833 key_str = self.encode(key, seen, level, as_key=True)
834 except TypeError:
835 if self.skipkeys:
836 continue
837 raise
839 if not self.allow_duplicate_keys:
840 if key_str in new_keys:
841 raise ValueError(f'duplicate key {repr(key)}')
842 new_keys.add(key_str)
844 if first_key:
845 first_key = False
846 else:
847 s += item_sep
849 val_str = self.encode(obj[key], seen, level, as_key=False)
850 s += key_str + kv_sep + val_str
852 s += end_str + '}'
853 return s
855 def _encode_array(self, obj: Any, seen: Set, level: int) -> str:
856 if not obj:
857 return '[]'
859 indent_str, end_str = self._spacers(level)
860 item_sep = self.item_separator + indent_str
861 return (
862 '['
863 + indent_str
864 + item_sep.join(
865 self.encode(el, seen, level, as_key=False) for el in obj
866 )
867 + end_str
868 + ']'
869 )
871 def _spacers(self, level: int) -> Tuple[str, str]:
872 if self.indent is not None:
873 end_str = ''
874 if self.trailing_commas:
875 end_str = ','
876 if isinstance(self.indent, int):
877 if self.indent > 0:
878 indent_str = '\n' + ' ' * self.indent * level
879 end_str += '\n' + ' ' * self.indent * (level - 1)
880 else:
881 indent_str = '\n'
882 end_str += '\n'
883 else:
884 indent_str = '\n' + self.indent * level
885 end_str += '\n' + self.indent * (level - 1)
886 else:
887 indent_str = ''
888 end_str = ''
889 return indent_str, end_str
891 def is_identifier(self, key: str) -> bool:
892 """Returns whether the string could be used as a legal
893 EcmaScript/JavaScript identifier.
895 There should normally be no reason to override this, unless
896 the definition of identifiers change in later versions of the
897 JSON5 spec and this implementation hasn't been updated to handle
898 the changes yet."""
899 if (
900 not key
901 or not self._is_id_start(key[0])
902 and key[0] not in ('$', '_')
903 ):
904 return False
905 for ch in key[1:]:
906 if not self._is_id_continue(ch) and ch not in ('$', '_'):
907 return False
908 return True
910 def _is_id_start(self, ch: str) -> bool:
911 return unicodedata.category(ch) in (
912 'Lu',
913 'Ll',
914 'Li',
915 'Lt',
916 'Lm',
917 'Lo',
918 'Nl',
919 )
921 def _is_id_continue(self, ch: str) -> bool:
922 return unicodedata.category(ch) in (
923 'Lu',
924 'Ll',
925 'Li',
926 'Lt',
927 'Lm',
928 'Lo',
929 'Nl',
930 'Nd',
931 'Mn',
932 'Mc',
933 'Pc',
934 )
936 def is_reserved_word(self, key: str) -> bool:
937 """Returns whether the key is a reserved word.
939 There should normally be no need to override this, unless there
940 have been reserved words added in later versions of the JSON5
941 spec and this implementation has not yet been updated to handle
942 the changes yet."""
943 global _reserved_word_re
944 if _reserved_word_re is None:
945 # List taken from section 7.6.1 of ECMA-262, version 5.1.
946 # https://262.ecma-international.org/5.1/#sec-7.6.1.
947 # This includes currently reserved words, words reserved
948 # for future use (both as of 5.1), null, true, and false.
949 _reserved_word_re = re.compile(
950 '('
951 + '|'.join(
952 [
953 'break',
954 'case',
955 'catch',
956 'class',
957 'const',
958 'continue',
959 'debugger',
960 'default',
961 'delete',
962 'do',
963 'else',
964 'enum',
965 'export',
966 'extends',
967 'false',
968 'finally',
969 'for',
970 'function',
971 'if',
972 'implements',
973 'import',
974 'in',
975 'instanceof',
976 'interface',
977 'let',
978 'new',
979 'null',
980 'package',
981 'private',
982 'protected',
983 'public',
984 'return',
985 'static',
986 'super',
987 'switch',
988 'this',
989 'throw',
990 'true',
991 'try',
992 'typeof',
993 'var',
994 'void',
995 'while',
996 'with',
997 'yield',
998 ]
999 )
1000 + ')$'
1001 )
1002 return _reserved_word_re.match(key) is not None
1005def _raise_type_error(obj) -> Any:
1006 raise TypeError(f'{repr(obj)} is not JSON5 serializable')