Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/json5/lib.py: 30%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Copyright 2015 Google Inc. All rights reserved.
2#
3# Licensed under the Apache License, Version 2.0 (the "License");
4# you may not use this file except in compliance with the License.
5# You may obtain a copy of the License at
6#
7# http://www.apache.org/licenses/LICENSE-2.0
8#
9# Unless required by applicable law or agreed to in writing, software
10# distributed under the License is distributed on an "AS IS" BASIS,
11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12# See the License for the specific language governing permissions and
13# limitations under the License.
15import enum
16import math
17import re
18from typing import (
19 Any,
20 Callable,
21 IO,
22 Iterable,
23 Mapping,
24 Optional,
25 Set,
26 Tuple,
27 Type,
28 Union,
29)
30import unicodedata
32from json5.parser import Parser
35# Used when encoding keys, below.
36_reserved_word_re: Optional[re.Pattern] = None
39class QuoteStyle(enum.Enum):
40 """Controls how strings will be quoted during encoding.
42 By default, for compatibility with the `json` module and older versions of
43 `json5`, strings (not being used as keys and that are legal identifiers)
44 will always be double-quoted, and any double quotes in the string will be
45 escaped. This is `QuoteStyle.ALWAYS_DOUBLE`. If you pass
46 `QuoteStyle.ALWAYS_SINGLE`, then strings will always be single-quoted, and
47 any single quotes in the string will be escaped. If you pass
48 `QuoteStyle.PREFER_DOUBLE`, then the behavior is the same as ALWAYS_DOUBLE
49 and strings will be double-quoted *unless* the string contains more double
50 quotes than single quotes, in which case the string will be single-quoted
51 and single quotes will be escaped. If you pass `QuoteStyle.PREFER_SINGLE`,
52 then the behavior is the same as ALWAYS_SINGLE and strings will be
53 single-quoted *unless* the string contains more single quotes than double
54 quotes, in which case the string will be double-quoted and any double
55 quotes will be escaped.
57 *Note:* PREFER_DOUBLE and PREFER_SINGLE can impact performance, since in
58 order to know which encoding to use you have to iterate over the entire
59 string to count the number of single and double quotes. The codes guesses
60 at an encoding while doing so, but if it guess wrong, the entire string has
61 to be re-encoded, which will slow things down. If you are very concerned
62 about performance (a) you probably shouldn't be using this library in the
63 first place, because it just isn't very fast, and (b) you should use
64 ALWAYS_DOUBLE or ALWAYS_SINGLE, which won't have this issue.
65 """
67 ALWAYS_DOUBLE = 'always_double'
68 ALWAYS_SINGLE = 'always_single'
69 PREFER_DOUBLE = 'prefer_double'
70 PREFER_SINGLE = 'prefer_single'
73def load(
74 fp: IO,
75 *,
76 encoding: Optional[str] = None,
77 cls: Any = None,
78 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,
79 parse_float: Optional[Callable[[str], Any]] = None,
80 parse_int: Optional[Callable[[str], Any]] = None,
81 parse_constant: Optional[Callable[[str], Any]] = None,
82 strict: bool = True,
83 object_pairs_hook: Optional[
84 Callable[[Iterable[Tuple[str, Any]]], Any]
85 ] = None,
86 allow_duplicate_keys: bool = True,
87 consume_trailing: bool = True,
88 start: Optional[int] = None,
89) -> Any:
90 """Deserialize ``fp`` (a ``.read()``-supporting file-like object
91 containing a JSON document) to a Python object.
93 Supports almost the same arguments as ``json.load()`` except that:
94 - the `cls` keyword is ignored.
95 - an extra `allow_duplicate_keys` parameter supports checking for
96 duplicate keys in a object; by default, this is True for
97 compatibility with ``json.load()``, but if set to False and
98 the object contains duplicate keys, a ValueError will be raised.
99 - an extra `consume_trailing` parameter specifies whether to
100 consume any trailing characters after a valid object has been
101 parsed. By default, this value is True and the only legal
102 trailing characters are whitespace. If this value is set to False,
103 parsing will stop when a valid object has been parsed and any
104 trailing characters in the string will be ignored.
105 - an extra `start` parameter specifies the zero-based offset into the
106 file to start parsing at. If `start` is None, parsing will
107 start at the current position in the file, and line number
108 and column values will be reported as if starting from the
109 beginning of the file; If `start` is not None,
110 `load` will seek to zero and then read (and discard) the
111 appropriate number of characters before beginning parsing;
112 the file must be seekable for this to work correctly.
114 You can use `load(..., consume_trailing=False)` to repeatedly read
115 values from a file. However, in the current implementation `load` does
116 this by reading the entire file into memory before doing anything, so
117 it is not very efficient.
119 Raises
120 - `ValueError` if given an invalid document. This is different
121 from the `json` module, which raises `json.JSONDecodeError`.
122 - `UnicodeDecodeError` if given a byte string that is not a
123 legal UTF-8 document (or the equivalent, if using a different
124 `encoding`). This matches the `json` module.
125 """
127 s = fp.read()
128 val, err, _ = parse(
129 s,
130 encoding=encoding,
131 cls=cls,
132 object_hook=object_hook,
133 parse_float=parse_float,
134 parse_int=parse_int,
135 parse_constant=parse_constant,
136 strict=strict,
137 object_pairs_hook=object_pairs_hook,
138 allow_duplicate_keys=allow_duplicate_keys,
139 consume_trailing=consume_trailing,
140 start=start,
141 )
142 if err:
143 raise ValueError(err)
144 return val
147def loads(
148 s: str,
149 *,
150 encoding: Optional[str] = None,
151 cls: Any = None,
152 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,
153 parse_float: Optional[Callable[[str], Any]] = None,
154 parse_int: Optional[Callable[[str], Any]] = None,
155 parse_constant: Optional[Callable[[str], Any]] = None,
156 strict: bool = True,
157 object_pairs_hook: Optional[
158 Callable[[Iterable[Tuple[str, Any]]], Any]
159 ] = None,
160 allow_duplicate_keys: bool = True,
161 consume_trailing: bool = True,
162 start: Optional[int] = None,
163):
164 """Deserialize ``s`` (a string containing a JSON5 document) to a Python
165 object.
167 Supports the same arguments as ``json.load()`` except that:
168 - the `cls` keyword is ignored.
169 - an extra `allow_duplicate_keys` parameter supports checking for
170 duplicate keys in a object; by default, this is True for
171 compatibility with ``json.load()``, but if set to False and
172 the object contains duplicate keys, a ValueError will be raised.
173 - an extra `consume_trailing` parameter specifies whether to
174 consume any trailing characters after a valid object has been
175 parsed. By default, this value is True and the only legal
176 trailing characters are whitespace. If this value is set to False,
177 parsing will stop when a valid object has been parsed and any
178 trailing characters in the string will be ignored.
179 - an extra `start` parameter specifies the zero-based offset into the
180 string to start parsing at.
182 Raises
183 - `ValueError` if given an invalid document. This is different
184 from the `json` module, which raises `json.JSONDecodeError`.
185 - `UnicodeDecodeError` if given a byte string that is not a
186 legal UTF-8 document (or the equivalent, if using a different
187 `encoding`). This matches the `json` module.
188 """
190 val, err, _ = parse(
191 s=s,
192 encoding=encoding,
193 cls=cls,
194 object_hook=object_hook,
195 parse_float=parse_float,
196 parse_int=parse_int,
197 parse_constant=parse_constant,
198 strict=strict,
199 object_pairs_hook=object_pairs_hook,
200 allow_duplicate_keys=allow_duplicate_keys,
201 consume_trailing=consume_trailing,
202 start=start,
203 )
204 if err:
205 raise ValueError(err)
206 return val
209def parse(
210 s: str,
211 *,
212 encoding: Optional[str] = None,
213 cls: Any = None,
214 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,
215 parse_float: Optional[Callable[[str], Any]] = None,
216 parse_int: Optional[Callable[[str], Any]] = None,
217 parse_constant: Optional[Callable[[str], Any]] = None,
218 strict: bool = True,
219 object_pairs_hook: Optional[
220 Callable[[Iterable[Tuple[str, Any]]], Any]
221 ] = None,
222 allow_duplicate_keys: bool = True,
223 consume_trailing: bool = True,
224 start: Optional[int] = None,
225):
226 """Parse ```s``, returning positional information along with a value.
228 This works exactly like `loads()`, except that (a) it returns the
229 position in the string where the parsing stopped (either due to
230 hitting an error or parsing a valid value) and any error as a string,
231 (b) it takes an optional `consume_trailing` parameter that says whether
232 to keep parsing the string after a valid value has been parsed; if True
233 (the default), any trailing characters must be whitespace. If False,
234 parsing stops when a valid value has been reached, (c) it takes an
235 optional `start` parameter that specifies a zero-based offset to start
236 parsing from in the string, and (d) the return value is different, as
237 described below.
239 `parse()` is useful if you have a string that might contain multiple
240 values and you need to extract all of them; you can do so by repeatedly
241 calling `parse`, setting `start` to the value returned in `position`
242 from the previous call.
244 Returns a tuple of (value, error_string, position). If the string
245 was a legal value, `value` will be the deserialized value,
246 `error_string` will be `None`, and `position` will be one
247 past the zero-based offset where the parser stopped reading.
248 If the string was not a legal value,
249 `value` will be `None`, `error_string` will be the string value
250 of the exception that would've been raised, and `position` will
251 be the zero-based farthest offset into the string where the parser
252 hit an error.
254 Raises:
255 - `UnicodeDecodeError` if given a byte string that is not a
256 legal UTF-8 document (or the equivalent, if using a different
257 `encoding`). This matches the `json` module.
259 Note that this does *not* raise a `ValueError`; instead any error is
260 returned as the second value in the tuple.
262 You can use this method to read in a series of values from a string
263 `s` as follows:
265 >>> import json5
266 >>> s = '1 2 3 4'
267 >>> values = []
268 >>> start = 0
269 >>> while True:
270 ... v, err, pos = json5.parse(s, start=start, consume_trailing=False)
271 ... if v:
272 ... values.append(v)
273 ... start = pos
274 ... if start == len(s) or s[start:].isspace():
275 ... # Reached the end of the string (ignoring trailing
276 ... # whitespace
277 ... break
278 ... continue
279 ... raise ValueError(err)
280 >>> values
281 [1, 2, 3, 4]
283 """
284 assert cls is None, 'Custom decoders are not supported'
286 if isinstance(s, bytes):
287 encoding = encoding or 'utf-8'
288 s = s.decode(encoding)
290 if not s:
291 raise ValueError('Empty strings are not legal JSON5')
292 start = start or 0
293 parser = Parser(s, '<string>', pos=start)
294 ast, err, pos = parser.parse(
295 global_vars={'_strict': strict, '_consume_trailing': consume_trailing}
296 )
297 if err:
298 return None, err, pos
300 try:
301 value = _convert(
302 ast,
303 object_hook=object_hook,
304 parse_float=parse_float,
305 parse_int=parse_int,
306 parse_constant=parse_constant,
307 object_pairs_hook=object_pairs_hook,
308 allow_duplicate_keys=allow_duplicate_keys,
309 )
310 return value, None, pos
311 except ValueError as e:
312 return None, str(e), pos
315def _convert(
316 ast,
317 object_hook,
318 parse_float,
319 parse_int,
320 parse_constant,
321 object_pairs_hook,
322 allow_duplicate_keys,
323):
324 def _fp_constant_parser(s):
325 return float(s.replace('Infinity', 'inf').replace('NaN', 'nan'))
327 def _dictify(pairs):
328 if not allow_duplicate_keys:
329 keys = set()
330 for key, _ in pairs:
331 if key in keys:
332 raise ValueError(f'Duplicate key "{key}" found in object')
333 keys.add(key)
335 if object_pairs_hook:
336 return object_pairs_hook(pairs)
337 if object_hook:
338 return object_hook(dict(pairs))
339 return dict(pairs)
341 parse_float = parse_float or float
342 parse_int = parse_int or int
343 parse_constant = parse_constant or _fp_constant_parser
345 return _walk_ast(ast, _dictify, parse_float, parse_int, parse_constant)
348def _walk_ast(
349 el,
350 dictify: Callable[[Iterable[Tuple[str, Any]]], Any],
351 parse_float,
352 parse_int,
353 parse_constant,
354):
355 if el == 'None':
356 return None
357 if el == 'True':
358 return True
359 if el == 'False':
360 return False
361 ty, v = el
362 if ty == 'number':
363 if v.startswith('0x') or v.startswith('0X'):
364 return parse_int(v, base=16)
365 if '.' in v or 'e' in v or 'E' in v:
366 return parse_float(v)
367 if 'Infinity' in v or 'NaN' in v:
368 return parse_constant(v)
369 return parse_int(v)
370 if ty == 'string':
371 return v
372 if ty == 'object':
373 pairs = []
374 for key, val_expr in v:
375 val = _walk_ast(
376 val_expr, dictify, parse_float, parse_int, parse_constant
377 )
378 pairs.append((key, val))
379 return dictify(pairs)
380 if ty == 'array':
381 return [
382 _walk_ast(el, dictify, parse_float, parse_int, parse_constant)
383 for el in v
384 ]
385 raise ValueError('unknown el: ' + el) # pragma: no cover
388def dump(
389 obj: Any,
390 fp: IO,
391 *,
392 skipkeys: bool = False,
393 ensure_ascii: bool = True,
394 check_circular: bool = True,
395 allow_nan: bool = True,
396 cls: Optional[Type['JSON5Encoder']] = None,
397 indent: Optional[Union[int, str]] = None,
398 separators: Optional[Tuple[str, str]] = None,
399 default: Optional[Callable[[Any], Any]] = None,
400 sort_keys: bool = False,
401 quote_keys: bool = False,
402 trailing_commas: bool = True,
403 allow_duplicate_keys: bool = True,
404 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,
405 **kw,
406):
407 """Serialize ``obj`` to a JSON5-formatted stream to ``fp``,
408 a ``.write()``-supporting file-like object.
410 Supports the same arguments as ``dumps()``, below.
412 Calling ``dump(obj, fp, quote_keys=True, trailing_commas=False, \
413 allow_duplicate_keys=True)``
414 should produce exactly the same output as ``json.dump(obj, fp).``
415 """
417 fp.write(
418 dumps(
419 obj=obj,
420 skipkeys=skipkeys,
421 ensure_ascii=ensure_ascii,
422 check_circular=check_circular,
423 allow_nan=allow_nan,
424 cls=cls,
425 indent=indent,
426 separators=separators,
427 default=default,
428 sort_keys=sort_keys,
429 quote_keys=quote_keys,
430 trailing_commas=trailing_commas,
431 allow_duplicate_keys=allow_duplicate_keys,
432 quote_style=quote_style,
433 **kw,
434 )
435 )
438def dumps(
439 obj: Any,
440 *,
441 skipkeys: bool = False,
442 ensure_ascii: bool = True,
443 check_circular: bool = True,
444 allow_nan: bool = True,
445 cls: Optional[Type['JSON5Encoder']] = None,
446 indent: Optional[Union[int, str]] = None,
447 separators: Optional[Tuple[str, str]] = None,
448 default: Optional[Callable[[Any], Any]] = None,
449 sort_keys: bool = False,
450 quote_keys: bool = False,
451 trailing_commas: bool = True,
452 allow_duplicate_keys: bool = True,
453 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,
454 **kw,
455):
456 """Serialize ``obj`` to a JSON5-formatted string.
458 Supports the same arguments as ``json.dumps()``, except that:
460 - The ``encoding`` keyword is ignored; Unicode strings are always written.
461 - By default, object keys that are legal identifiers are not quoted; if you
462 pass ``quote_keys=True``, they will be.
463 - By default, if lists and objects span multiple lines of output (i.e.,
464 when ``indent`` >=0), the last item will have a trailing comma after it.
465 If you pass ``trailing_commas=False``, it will not.
466 - If you use a number, a boolean, or ``None`` as a key value in a dict, it
467 will be converted to the corresponding JSON string value, e.g. "1",
468 "true", or "null". By default, ``dump()`` will match the `json` modules
469 behavior and produce malformed JSON if you mix keys of different types
470 that have the same converted value; e.g., ``{1: "foo", "1": "bar"}``
471 produces '{"1": "foo", "1": "bar"}', an object with duplicated keys. If
472 you pass ``allow_duplicate_keys=False``, an exception will be raised
473 instead.
474 - If `quote_keys` is true, then keys of objects will be enclosed in quotes,
475 as in regular JSON. Otheriwse, keys will not be enclosed in quotes unless
476 they contain whitespace.
477 - If `trailing_commas` is false, then commas will not be inserted after the
478 final elements of objects and arrays, as in regular JSON. Otherwise,
479 such commas will be inserted.
480 - If `allow_duplicate_keys` is false, then only the last entry with a given
481 key will be written. Otherwise, all entries with the same key will be
482 written.
483 - `quote_style` controls how strings are encoded. See the documentation
484 for the `QuoteStyle` class, above, for how this is used.
486 *Note*: Strings that are being used as unquoted keys are not affected
487 by this parameter and remain unquoted.
489 *`quote_style` was added in version 0.10.0*.
491 Other keyword arguments are allowed and will be passed to the
492 encoder so custom encoders can get them, but otherwise they will
493 be ignored in an attempt to provide some amount of forward-compatibility.
495 *Note:* the standard JSON module explicitly calls `int.__repr(obj)__`
496 and `float.__repr(obj)__` to encode ints and floats, thereby bypassing
497 any custom representations you might have for objects that are subclasses
498 of ints and floats, and, for compatibility, JSON5 does the same thing.
499 To override this behavior, create a subclass of JSON5Encoder
500 that overrides `encode()` and handles your custom representation.
502 For example:
504 ```
505 >>> import json5
506 >>> from typing import Any, Set
507 >>>
508 >>> class Hex(int):
509 ... def __repr__(self):
510 ... return hex(self)
511 >>>
512 >>> class CustomEncoder(json5.JSON5Encoder):
513 ... def encode(
514 ... self, obj: Any, seen: Set, level: int, *, as_key: bool
515 ... ) -> str:
516 ... if isinstance(obj, Hex):
517 ... return repr(obj)
518 ... return super().encode(obj, seen, level, as_key=as_key)
519 ...
520 >>> json5.dumps([20, Hex(20)], cls=CustomEncoder)
521 '[20, 0x14]'
523 ```
525 *Note:* calling ``dumps(obj, quote_keys=True, trailing_commas=False, \
526 allow_duplicate_keys=True)``
527 should produce exactly the same output as ``json.dumps(obj).``
528 """
530 cls = cls or JSON5Encoder
531 enc = cls(
532 skipkeys=skipkeys,
533 ensure_ascii=ensure_ascii,
534 check_circular=check_circular,
535 allow_nan=allow_nan,
536 indent=indent,
537 separators=separators,
538 default=default,
539 sort_keys=sort_keys,
540 quote_keys=quote_keys,
541 trailing_commas=trailing_commas,
542 allow_duplicate_keys=allow_duplicate_keys,
543 quote_style=quote_style,
544 **kw,
545 )
546 return enc.encode(obj, seen=set(), level=0, as_key=False)
549class JSON5Encoder:
550 def __init__(
551 self,
552 *,
553 skipkeys: bool = False,
554 ensure_ascii: bool = True,
555 check_circular: bool = True,
556 allow_nan: bool = True,
557 indent: Optional[Union[int, str]] = None,
558 separators: Optional[Tuple[str, str]] = None,
559 default: Optional[Callable[[Any], Any]] = None,
560 sort_keys: bool = False,
561 quote_keys: bool = False,
562 trailing_commas: bool = True,
563 allow_duplicate_keys: bool = True,
564 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,
565 **kw,
566 ):
567 """Provides a class that may be overridden to customize the behavior
568 of `dumps()`. The keyword args are the same as for that function.
569 *Added in version 0.10.0"""
570 # Ignore unrecognized keyword arguments in the hope of providing
571 # some level of backwards- and forwards-compatibility.
572 del kw
574 self.skipkeys = skipkeys
575 self.ensure_ascii = ensure_ascii
576 self.check_circular = check_circular
577 self.allow_nan = allow_nan
578 self.indent = indent
579 self.separators = separators
580 if separators is None:
581 separators = (', ', ': ') if indent is None else (',', ': ')
582 self.item_separator, self.kv_separator = separators
583 self.default_fn = default or _raise_type_error
584 self.sort_keys = sort_keys
585 self.quote_keys = quote_keys
586 self.trailing_commas = trailing_commas
587 self.allow_duplicate_keys = allow_duplicate_keys
588 self.quote_style = quote_style
590 def default(self, obj: Any) -> Any:
591 """Provides a last-ditch option to encode a value that the encoder
592 doesn't otherwise recognize, by converting `obj` to a value that
593 *can* (and will) be serialized by the other methods in the class.
595 Note: this must not return a serialized value (i.e., string)
596 directly, as that'll result in a doubly-encoded value."""
597 return self.default_fn(obj)
599 def encode(
600 self,
601 obj: Any,
602 seen: Set,
603 level: int,
604 *,
605 as_key: bool,
606 ) -> str:
607 """Returns an JSON5-encoded version of an arbitrary object. This can
608 be used to provide customized serialization of objects. Overridden
609 methods of this class should handle their custom objects and then
610 fall back to super.encode() if they've been passed a normal object.
612 `seen` is used for duplicate object tracking when `check_circular`
613 is True.
615 `level` represents the current indentation level, which increases
616 by one for each recursive invocation of encode (i.e., whenever
617 we're encoding the values of a dict or a list).
619 May raise `TypeError` if the object is the wrong type to be
620 encoded (i.e., your custom routine can't handle it either), and
621 `ValueError` if there's something wrong with the value, e.g.
622 a float value of NaN when `allow_nan` is false.
624 If `as_key` is true, the return value should be a double-quoted string
625 representation of the object, unless obj is a string that can be an
626 identifier (and quote_keys is false and obj isn't a reserved word).
627 If the object should not be used as a key, `TypeError` should be
628 raised; that allows the base implementation to implement `skipkeys`
629 properly.
630 """
631 seen = seen or set()
632 s = self._encode_basic_type(obj, as_key=as_key)
633 if s is not None:
634 return s
636 if as_key:
637 raise TypeError(f'Invalid key f{obj}')
638 return self._encode_non_basic_type(obj, seen, level)
640 def _encode_basic_type(self, obj: Any, *, as_key: bool) -> Optional[str]:
641 """Returns None if the object is not a basic type."""
643 if isinstance(obj, str):
644 return self._encode_str(obj, as_key=as_key)
646 # Check for True/False before ints because True and False are
647 # also considered ints and so would be represented as 1 and 0
648 # if we did ints first.
649 if obj is True:
650 return '"true"' if as_key else 'true'
651 if obj is False:
652 return '"false"' if as_key else 'false'
653 if obj is None:
654 return '"null"' if as_key else 'null'
656 if isinstance(obj, int):
657 return self._encode_int(obj, as_key=as_key)
659 if isinstance(obj, float):
660 return self._encode_float(obj, as_key=as_key)
662 return None
664 def _encode_int(self, obj: int, *, as_key: bool) -> str:
665 s = int.__repr__(obj)
666 return f'"{s}"' if as_key else s
668 def _encode_float(self, obj: float, *, as_key: bool) -> str:
669 if obj == float('inf'):
670 allowed = self.allow_nan
671 s = 'Infinity'
672 elif obj == float('-inf'):
673 allowed = self.allow_nan
674 s = '-Infinity'
675 elif math.isnan(obj):
676 allowed = self.allow_nan
677 s = 'NaN'
678 else:
679 allowed = True
680 s = float.__repr__(obj)
682 if not allowed:
683 raise ValueError('Illegal JSON5 value: f{obj}')
684 return f'"{s}"' if as_key else s
686 def _encode_str(self, obj: str, *, as_key: bool) -> str:
687 if (
688 as_key
689 and self.is_identifier(obj)
690 and not self.quote_keys
691 and not self.is_reserved_word(obj)
692 ):
693 return obj
695 return self._encode_quoted_str(obj, self.quote_style)
697 def _encode_quoted_str(self, obj: str, quote_style: QuoteStyle) -> str:
698 """Returns a quoted string with a minimal number of escaped quotes."""
699 ret = []
700 double_quotes_seen = 0
701 single_quotes_seen = 0
702 sq = "'"
703 dq = '"'
704 for ch in obj:
705 if ch == dq:
706 # At first we will guess at which quotes to escape. If
707 # we guess wrong, we reencode the string below.
708 double_quotes_seen += 1
709 if quote_style in (
710 QuoteStyle.ALWAYS_DOUBLE,
711 QuoteStyle.PREFER_DOUBLE,
712 ):
713 encoded_ch = self._escape_ch(dq)
714 else:
715 encoded_ch = dq
716 elif ch == sq:
717 single_quotes_seen += 1
718 if quote_style in (
719 QuoteStyle.ALWAYS_SINGLE,
720 QuoteStyle.PREFER_SINGLE,
721 ):
722 encoded_ch = self._escape_ch(sq)
723 else:
724 encoded_ch = sq
725 elif ch == '\\':
726 encoded_ch = self._escape_ch(ch)
727 else:
728 o = ord(ch)
729 if o < 32:
730 encoded_ch = self._escape_ch(ch)
731 elif o < 128:
732 encoded_ch = ch
733 elif not self.ensure_ascii and ch not in ('\u2028', '\u2029'):
734 encoded_ch = ch
735 else:
736 encoded_ch = self._escape_ch(ch)
737 ret.append(encoded_ch)
739 # We may have guessed wrong and need to reencode the string.
740 if (
741 double_quotes_seen > single_quotes_seen
742 and quote_style == QuoteStyle.PREFER_DOUBLE
743 ):
744 return self._encode_quoted_str(obj, QuoteStyle.ALWAYS_SINGLE)
745 if (
746 single_quotes_seen > double_quotes_seen
747 and quote_style == QuoteStyle.PREFER_SINGLE
748 ):
749 return self._encode_quoted_str(obj, QuoteStyle.ALWAYS_DOUBLE)
751 if quote_style in (QuoteStyle.ALWAYS_DOUBLE, QuoteStyle.PREFER_DOUBLE):
752 return '"' + ''.join(ret) + '"'
753 return "'" + ''.join(ret) + "'"
755 def _escape_ch(self, ch: str) -> str:
756 """Returns the backslash-escaped representation of the char."""
757 if ch == '\\':
758 return '\\\\'
759 if ch == "'":
760 return r'\''
761 if ch == '"':
762 return r'\"'
763 if ch == '\n':
764 return r'\n'
765 if ch == '\r':
766 return r'\r'
767 if ch == '\t':
768 return r'\t'
769 if ch == '\b':
770 return r'\b'
771 if ch == '\f':
772 return r'\f'
773 if ch == '\v':
774 return r'\v'
775 if ch == '\0':
776 return r'\0'
778 o = ord(ch)
779 if o < 65536:
780 return rf'\u{o:04x}'
782 val = o - 0x10000
783 high = 0xD800 + (val >> 10)
784 low = 0xDC00 + (val & 0x3FF)
785 return rf'\u{high:04x}\u{low:04x}'
787 def _encode_non_basic_type(self, obj, seen: Set, level: int) -> str:
788 # Basic types can't be recursive so we only check for circularity
789 # on non-basic types. If for some reason the caller was using a
790 # subclass of a basic type and wanted to check circularity on it,
791 # it'd have to do so directly in a subclass of JSON5Encoder.
792 if self.check_circular:
793 i = id(obj)
794 if i in seen:
795 raise ValueError('Circular reference detected.')
796 seen.add(i)
798 # Ideally we'd use collections.abc.Mapping and collections.abc.Sequence
799 # here, but for backwards-compatibility with potential old callers,
800 # we only check for the two attributes we need in each case.
801 if hasattr(obj, 'keys') and hasattr(obj, '__getitem__'):
802 s = self._encode_dict(obj, seen, level + 1)
803 elif hasattr(obj, '__getitem__') and hasattr(obj, '__iter__'):
804 s = self._encode_array(obj, seen, level + 1)
805 else:
806 s = self.encode(self.default(obj), seen, level + 1, as_key=False)
807 assert s is not None
809 if self.check_circular:
810 seen.remove(i)
811 return s
813 def _encode_dict(self, obj: Any, seen: set, level: int) -> str:
814 if not obj:
815 return '{}'
817 indent_str, end_str = self._spacers(level)
818 item_sep = self.item_separator + indent_str
819 kv_sep = self.kv_separator
821 if self.sort_keys:
822 keys = sorted(obj.keys())
823 else:
824 keys = obj.keys()
826 s = '{' + indent_str
828 first_key = True
829 new_keys = set()
830 for key in keys:
831 try:
832 key_str = self.encode(key, seen, level, as_key=True)
833 except TypeError:
834 if self.skipkeys:
835 continue
836 raise
838 if not self.allow_duplicate_keys:
839 if key_str in new_keys:
840 raise ValueError(f'duplicate key {repr(key)}')
841 new_keys.add(key_str)
843 if first_key:
844 first_key = False
845 else:
846 s += item_sep
848 val_str = self.encode(obj[key], seen, level, as_key=False)
849 s += key_str + kv_sep + val_str
851 s += end_str + '}'
852 return s
854 def _encode_array(self, obj: Any, seen: Set, level: int) -> str:
855 if not obj:
856 return '[]'
858 indent_str, end_str = self._spacers(level)
859 item_sep = self.item_separator + indent_str
860 return (
861 '['
862 + indent_str
863 + item_sep.join(
864 self.encode(el, seen, level, as_key=False) for el in obj
865 )
866 + end_str
867 + ']'
868 )
870 def _spacers(self, level: int) -> Tuple[str, str]:
871 if self.indent is not None:
872 end_str = ''
873 if self.trailing_commas:
874 end_str = ','
875 if isinstance(self.indent, int):
876 if self.indent > 0:
877 indent_str = '\n' + ' ' * self.indent * level
878 end_str += '\n' + ' ' * self.indent * (level - 1)
879 else:
880 indent_str = '\n'
881 end_str += '\n'
882 else:
883 indent_str = '\n' + self.indent * level
884 end_str += '\n' + self.indent * (level - 1)
885 else:
886 indent_str = ''
887 end_str = ''
888 return indent_str, end_str
890 def is_identifier(self, key: str) -> bool:
891 """Returns whether the string could be used as a legal
892 EcmaScript/JavaScript identifier.
894 There should normally be no reason to override this, unless
895 the definition of identifiers change in later versions of the
896 JSON5 spec and this implementation hasn't been updated to handle
897 the changes yet."""
898 if (
899 not key
900 or not self._is_id_start(key[0])
901 and key[0] not in ('$', '_')
902 ):
903 return False
904 for ch in key[1:]:
905 if not self._is_id_continue(ch) and ch not in ('$', '_'):
906 return False
907 return True
909 def _is_id_start(self, ch: str) -> bool:
910 return unicodedata.category(ch) in (
911 'Lu',
912 'Ll',
913 'Li',
914 'Lt',
915 'Lm',
916 'Lo',
917 'Nl',
918 )
920 def _is_id_continue(self, ch: str) -> bool:
921 return unicodedata.category(ch) in (
922 'Lu',
923 'Ll',
924 'Li',
925 'Lt',
926 'Lm',
927 'Lo',
928 'Nl',
929 'Nd',
930 'Mn',
931 'Mc',
932 'Pc',
933 )
935 def is_reserved_word(self, key: str) -> bool:
936 """Returns whether the key is a reserved word.
938 There should normally be no need to override this, unless there
939 have been reserved words added in later versions of the JSON5
940 spec and this implementation has not yet been updated to handle
941 the changes yet."""
942 global _reserved_word_re
943 if _reserved_word_re is None:
944 # List taken from section 7.6.1 of ECMA-262, version 5.1.
945 # https://262.ecma-international.org/5.1/#sec-7.6.1.
946 # This includes currently reserved words, words reserved
947 # for future use (both as of 5.1), null, true, and false.
948 _reserved_word_re = re.compile(
949 '('
950 + '|'.join(
951 [
952 'break',
953 'case',
954 'catch',
955 'class',
956 'const',
957 'continue',
958 'debugger',
959 'default',
960 'delete',
961 'do',
962 'else',
963 'enum',
964 'export',
965 'extends',
966 'false',
967 'finally',
968 'for',
969 'function',
970 'if',
971 'implements',
972 'import',
973 'in',
974 'instanceof',
975 'interface',
976 'let',
977 'new',
978 'null',
979 'package',
980 'private',
981 'protected',
982 'public',
983 'return',
984 'static',
985 'super',
986 'switch',
987 'this',
988 'throw',
989 'true',
990 'try',
991 'typeof',
992 'var',
993 'void',
994 'while',
995 'with',
996 'yield',
997 ]
998 )
999 + ')$'
1000 )
1001 return _reserved_word_re.match(key) is not None
1004def _raise_type_error(obj) -> Any:
1005 raise TypeError(f'{repr(obj)} is not JSON5 serializable')