Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/json5/lib.py: 30%

3# Licensed under the Apache License, Version 2.0 (the "License");

4# you may not use this file except in compliance with the License.

5# You may obtain a copy of the License at

7# http://www.apache.org/licenses/LICENSE-2.0

9# Unless required by applicable law or agreed to in writing, software

10# distributed under the License is distributed on an "AS IS" BASIS,

11# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

12# See the License for the specific language governing permissions and

13# limitations under the License.

15import enum

16import math

17import re

18from typing import (

19 Any,

20 Callable,

21 IO,

22 Iterable,

23 Mapping,

24 Optional,

25 Set,

26 Tuple,

27 Type,

28 Union,

29)

30import unicodedata

32from json5.parser import Parser

35# Used when encoding keys, below.

36_reserved_word_re: Optional[re.Pattern] = None

39class QuoteStyle(enum.Enum):

40 """Controls how strings will be quoted during encoding.

42 By default, for compatibility with the `json` module and older versions of

43 `json5`, strings (not being used as keys and that are legal identifiers)

44 will always be double-quoted, and any double quotes in the string will be

45 escaped. This is `QuoteStyle.ALWAYS_DOUBLE`. If you pass

46 `QuoteStyle.ALWAYS_SINGLE`, then strings will always be single-quoted, and

47 any single quotes in the string will be escaped. If you pass

48 `QuoteStyle.PREFER_DOUBLE`, then the behavior is the same as ALWAYS_DOUBLE

49 and strings will be double-quoted *unless* the string contains more double

50 quotes than single quotes, in which case the string will be single-quoted

51 and single quotes will be escaped. If you pass `QuoteStyle.PREFER_SINGLE`,

52 then the behavior is the same as ALWAYS_SINGLE and strings will be

53 single-quoted *unless* the string contains more single quotes than double

54 quotes, in which case the string will be double-quoted and any double

55 quotes will be escaped.

57 *Note:* PREFER_DOUBLE and PREFER_SINGLE can impact performance, since in

58 order to know which encoding to use you have to iterate over the entire

59 string to count the number of single and double quotes. The codes guesses

60 at an encoding while doing so, but if it guess wrong, the entire string has

61 to be re-encoded, which will slow things down. If you are very concerned

62 about performance (a) you probably shouldn't be using this library in the

63 first place, because it just isn't very fast, and (b) you should use

64 ALWAYS_DOUBLE or ALWAYS_SINGLE, which won't have this issue.

65 """

67 ALWAYS_DOUBLE = 'always_double'

68 ALWAYS_SINGLE = 'always_single'

69 PREFER_DOUBLE = 'prefer_double'

70 PREFER_SINGLE = 'prefer_single'

73def load(

74 fp: IO,

75 *,

76 encoding: Optional[str] = None,

77 cls: Any = None,

78 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,

79 parse_float: Optional[Callable[[str], Any]] = None,

80 parse_int: Optional[Callable[[str], Any]] = None,

81 parse_constant: Optional[Callable[[str], Any]] = None,

82 strict: bool = True,

83 object_pairs_hook: Optional[

84 Callable[[Iterable[Tuple[str, Any]]], Any]

85 ] = None,

86 allow_duplicate_keys: bool = True,

87 consume_trailing: bool = True,

88 start: Optional[int] = None,

89) -> Any:

90 """Deserialize ``fp`` (a ``.read()``-supporting file-like object

91 containing a JSON document) to a Python object.

93 Supports almost the same arguments as ``json.load()`` except that:

94 - the `cls` keyword is ignored.

95 - an extra `allow_duplicate_keys` parameter supports checking for

96 duplicate keys in a object; by default, this is True for

97 compatibility with ``json.load()``, but if set to False and

98 the object contains duplicate keys, a ValueError will be raised.

99 - an extra `consume_trailing` parameter specifies whether to

100 consume any trailing characters after a valid object has been

101 parsed. By default, this value is True and the only legal

102 trailing characters are whitespace. If this value is set to False,

103 parsing will stop when a valid object has been parsed and any

104 trailing characters in the string will be ignored.

105 - an extra `start` parameter specifies the zero-based offset into the

106 file to start parsing at. If `start` is None, parsing will

107 start at the current position in the file, and line number

108 and column values will be reported as if starting from the

109 beginning of the file; If `start` is not None,

110 `load` will seek to zero and then read (and discard) the

111 appropriate number of characters before beginning parsing;

112 the file must be seekable for this to work correctly.

113

114 You can use `load(..., consume_trailing=False)` to repeatedly read

115 values from a file. However, in the current implementation `load` does

116 this by reading the entire file into memory before doing anything, so

117 it is not very efficient.

118

119 Raises

120 - `ValueError` if given an invalid document. This is different

121 from the `json` module, which raises `json.JSONDecodeError`.

122 - `UnicodeDecodeError` if given a byte string that is not a

123 legal UTF-8 document (or the equivalent, if using a different

124 `encoding`). This matches the `json` module.

125 """

126

127 s = fp.read()

128 val, err, _ = parse(

129 s,

130 encoding=encoding,

131 cls=cls,

132 object_hook=object_hook,

133 parse_float=parse_float,

134 parse_int=parse_int,

135 parse_constant=parse_constant,

136 strict=strict,

137 object_pairs_hook=object_pairs_hook,

138 allow_duplicate_keys=allow_duplicate_keys,

139 consume_trailing=consume_trailing,

140 start=start,

141 )

142 if err:

143 raise ValueError(err)

144 return val

145

146

147def loads(

148 s: str,

149 *,

150 encoding: Optional[str] = None,

151 cls: Any = None,

152 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,

153 parse_float: Optional[Callable[[str], Any]] = None,

154 parse_int: Optional[Callable[[str], Any]] = None,

155 parse_constant: Optional[Callable[[str], Any]] = None,

156 strict: bool = True,

157 object_pairs_hook: Optional[

158 Callable[[Iterable[Tuple[str, Any]]], Any]

159 ] = None,

160 allow_duplicate_keys: bool = True,

161 consume_trailing: bool = True,

162 start: Optional[int] = None,

163):

164 """Deserialize ``s`` (a string containing a JSON5 document) to a Python

165 object.

166

167 Supports the same arguments as ``json.load()`` except that:

168 - the `cls` keyword is ignored.

169 - an extra `allow_duplicate_keys` parameter supports checking for

170 duplicate keys in a object; by default, this is True for

171 compatibility with ``json.load()``, but if set to False and

172 the object contains duplicate keys, a ValueError will be raised.

173 - an extra `consume_trailing` parameter specifies whether to

174 consume any trailing characters after a valid object has been

175 parsed. By default, this value is True and the only legal

176 trailing characters are whitespace. If this value is set to False,

177 parsing will stop when a valid object has been parsed and any

178 trailing characters in the string will be ignored.

179 - an extra `start` parameter specifies the zero-based offset into the

180 string to start parsing at.

181

182 Raises

183 - `ValueError` if given an invalid document. This is different

184 from the `json` module, which raises `json.JSONDecodeError`.

185 - `UnicodeDecodeError` if given a byte string that is not a

186 legal UTF-8 document (or the equivalent, if using a different

187 `encoding`). This matches the `json` module.

188 """

189

190 val, err, _ = parse(

191 s=s,

192 encoding=encoding,

193 cls=cls,

194 object_hook=object_hook,

195 parse_float=parse_float,

196 parse_int=parse_int,

197 parse_constant=parse_constant,

198 strict=strict,

199 object_pairs_hook=object_pairs_hook,

200 allow_duplicate_keys=allow_duplicate_keys,

201 consume_trailing=consume_trailing,

202 start=start,

203 )

204 if err:

205 raise ValueError(err)

206 return val

207

208

209def parse(

210 s: str,

211 *,

212 encoding: Optional[str] = None,

213 cls: Any = None,

214 object_hook: Optional[Callable[[Mapping[str, Any]], Any]] = None,

215 parse_float: Optional[Callable[[str], Any]] = None,

216 parse_int: Optional[Callable[[str], Any]] = None,

217 parse_constant: Optional[Callable[[str], Any]] = None,

218 strict: bool = True,

219 object_pairs_hook: Optional[

220 Callable[[Iterable[Tuple[str, Any]]], Any]

221 ] = None,

222 allow_duplicate_keys: bool = True,

223 consume_trailing: bool = True,

224 start: Optional[int] = None,

225):

226 """Parse ```s``, returning positional information along with a value.

227

228 This works exactly like `loads()`, except that (a) it returns the

229 position in the string where the parsing stopped (either due to

230 hitting an error or parsing a valid value) and any error as a string,

231 (b) it takes an optional `consume_trailing` parameter that says whether

232 to keep parsing the string after a valid value has been parsed; if True

233 (the default), any trailing characters must be whitespace. If False,

234 parsing stops when a valid value has been reached, (c) it takes an

235 optional `start` parameter that specifies a zero-based offset to start

236 parsing from in the string, and (d) the return value is different, as

237 described below.

238

239 `parse()` is useful if you have a string that might contain multiple

240 values and you need to extract all of them; you can do so by repeatedly

241 calling `parse`, setting `start` to the value returned in `position`

242 from the previous call.

243

244 Returns a tuple of (value, error_string, position). If the string

245 was a legal value, `value` will be the deserialized value,

246 `error_string` will be `None`, and `position` will be one

247 past the zero-based offset where the parser stopped reading.

248 If the string was not a legal value,

249 `value` will be `None`, `error_string` will be the string value

250 of the exception that would've been raised, and `position` will

251 be the zero-based farthest offset into the string where the parser

252 hit an error.

253

254 Raises:

255 - `UnicodeDecodeError` if given a byte string that is not a

256 legal UTF-8 document (or the equivalent, if using a different

257 `encoding`). This matches the `json` module.

258

259 Note that this does *not* raise a `ValueError`; instead any error is

260 returned as the second value in the tuple.

261

262 You can use this method to read in a series of values from a string

263 `s` as follows:

264

265 >>> import json5

266 >>> s = '1 2 3 4'

267 >>> values = []

268 >>> start = 0

269 >>> while True:

270 ... v, err, pos = json5.parse(s, start=start, consume_trailing=False)

271 ... if v:

272 ... values.append(v)

273 ... start = pos

274 ... if start == len(s) or s[start:].isspace():

275 ... # Reached the end of the string (ignoring trailing

276 ... # whitespace

277 ... break

278 ... continue

279 ... raise ValueError(err)

280 >>> values

281 [1, 2, 3, 4]

282

283 """

284 assert cls is None, 'Custom decoders are not supported'

285

286 if isinstance(s, bytes):

287 encoding = encoding or 'utf-8'

288 s = s.decode(encoding)

289

290 if not s:

291 raise ValueError('Empty strings are not legal JSON5')

292 start = start or 0

293 parser = Parser(s, '<string>', pos=start)

294 ast, err, pos = parser.parse(

295 global_vars={'_strict': strict, '_consume_trailing': consume_trailing}

296 )

297 if err:

298 return None, err, pos

299

300 try:

301 value = _convert(

302 ast,

303 object_hook=object_hook,

304 parse_float=parse_float,

305 parse_int=parse_int,

306 parse_constant=parse_constant,

307 object_pairs_hook=object_pairs_hook,

308 allow_duplicate_keys=allow_duplicate_keys,

309 )

310 return value, None, pos

311 except ValueError as e:

312 return None, str(e), pos

313

314

315def _convert(

316 ast,

317 object_hook,

318 parse_float,

319 parse_int,

320 parse_constant,

321 object_pairs_hook,

322 allow_duplicate_keys,

323):

324 def _fp_constant_parser(s):

325 return float(s.replace('Infinity', 'inf').replace('NaN', 'nan'))

326

327 def _dictify(pairs):

328 if not allow_duplicate_keys:

329 keys = set()

330 for key, _ in pairs:

331 if key in keys:

332 raise ValueError(f'Duplicate key "{key}" found in object')

333 keys.add(key)

334

335 if object_pairs_hook:

336 return object_pairs_hook(pairs)

337 if object_hook:

338 return object_hook(dict(pairs))

339 return dict(pairs)

340

341 parse_float = parse_float or float

342 parse_int = parse_int or int

343 parse_constant = parse_constant or _fp_constant_parser

344

345 return _walk_ast(ast, _dictify, parse_float, parse_int, parse_constant)

346

347

348def _walk_ast(

349 el,

350 dictify: Callable[[Iterable[Tuple[str, Any]]], Any],

351 parse_float,

352 parse_int,

353 parse_constant,

354):

355 if el == 'None':

356 return None

357 if el == 'True':

358 return True

359 if el == 'False':

360 return False

361 ty, v = el

362 if ty == 'number':

363 if v.startswith('0x') or v.startswith('0X'):

364 return parse_int(v, base=16)

365 if '.' in v or 'e' in v or 'E' in v:

366 return parse_float(v)

367 if 'Infinity' in v or 'NaN' in v:

368 return parse_constant(v)

369 return parse_int(v)

370 if ty == 'string':

371 return v

372 if ty == 'object':

373 pairs = []

374 for key, val_expr in v:

375 val = _walk_ast(

376 val_expr, dictify, parse_float, parse_int, parse_constant

377 )

378 pairs.append((key, val))

379 return dictify(pairs)

380 if ty == 'array':

381 return [

382 _walk_ast(el, dictify, parse_float, parse_int, parse_constant)

383 for el in v

384 ]

385 raise ValueError('unknown el: ' + el) # pragma: no cover

386

387

388def dump(

389 obj: Any,

390 fp: IO,

391 *,

392 skipkeys: bool = False,

393 ensure_ascii: bool = True,

394 check_circular: bool = True,

395 allow_nan: bool = True,

396 cls: Optional[Type['JSON5Encoder']] = None,

397 indent: Optional[Union[int, str]] = None,

398 separators: Optional[Tuple[str, str]] = None,

399 default: Optional[Callable[[Any], Any]] = None,

400 sort_keys: bool = False,

401 quote_keys: bool = False,

402 trailing_commas: bool = True,

403 allow_duplicate_keys: bool = True,

404 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,

405 **kw,

406):

407 """Serialize ``obj`` to a JSON5-formatted stream to ``fp``,

408 a ``.write()``-supporting file-like object.

409

410 Supports the same arguments as ``dumps()``, below.

411

412 Calling ``dump(obj, fp, quote_keys=True, trailing_commas=False, \

413 allow_duplicate_keys=True)``

414 should produce exactly the same output as ``json.dump(obj, fp).``

415 """

416

417 fp.write(

418 dumps(

419 obj=obj,

420 skipkeys=skipkeys,

421 ensure_ascii=ensure_ascii,

422 check_circular=check_circular,

423 allow_nan=allow_nan,

424 cls=cls,

425 indent=indent,

426 separators=separators,

427 default=default,

428 sort_keys=sort_keys,

429 quote_keys=quote_keys,

430 trailing_commas=trailing_commas,

431 allow_duplicate_keys=allow_duplicate_keys,

432 quote_style=quote_style,

433 **kw,

434 )

435 )

436

437

438def dumps(

439 obj: Any,

440 *,

441 skipkeys: bool = False,

442 ensure_ascii: bool = True,

443 check_circular: bool = True,

444 allow_nan: bool = True,

445 cls: Optional[Type['JSON5Encoder']] = None,

446 indent: Optional[Union[int, str]] = None,

447 separators: Optional[Tuple[str, str]] = None,

448 default: Optional[Callable[[Any], Any]] = None,

449 sort_keys: bool = False,

450 quote_keys: bool = False,

451 trailing_commas: bool = True,

452 allow_duplicate_keys: bool = True,

453 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,

454 **kw,

455):

456 """Serialize ``obj`` to a JSON5-formatted string.

457

458 Supports the same arguments as ``json.dumps()``, except that:

459

460 - The ``encoding`` keyword is ignored; Unicode strings are always written.

461 - By default, object keys that are legal identifiers are not quoted; if you

462 pass ``quote_keys=True``, they will be.

463 - By default, if lists and objects span multiple lines of output (i.e.,

464 when ``indent`` >=0), the last item will have a trailing comma after it.

465 If you pass ``trailing_commas=False``, it will not.

466 - If you use a number, a boolean, or ``None`` as a key value in a dict, it

467 will be converted to the corresponding JSON string value, e.g. "1",

468 "true", or "null". By default, ``dump()`` will match the `json` modules

469 behavior and produce malformed JSON if you mix keys of different types

470 that have the same converted value; e.g., ``{1: "foo", "1": "bar"}``

471 produces '{"1": "foo", "1": "bar"}', an object with duplicated keys. If

472 you pass ``allow_duplicate_keys=False``, an exception will be raised

473 instead.

474 - If `quote_keys` is true, then keys of objects will be enclosed in quotes,

475 as in regular JSON. Otheriwse, keys will not be enclosed in quotes unless

476 they contain whitespace.

477 - If `trailing_commas` is false, then commas will not be inserted after the

478 final elements of objects and arrays, as in regular JSON. Otherwise,

479 such commas will be inserted.

480 - If `allow_duplicate_keys` is false, then only the last entry with a given

481 key will be written. Otherwise, all entries with the same key will be

482 written.

483 - `quote_style` controls how strings are encoded. See the documentation

484 for the `QuoteStyle` class, above, for how this is used.

485

486 *Note*: Strings that are being used as unquoted keys are not affected

487 by this parameter and remain unquoted.

488

489 *`quote_style` was added in version 0.10.0*.

490

491 Other keyword arguments are allowed and will be passed to the

492 encoder so custom encoders can get them, but otherwise they will

493 be ignored in an attempt to provide some amount of forward-compatibility.

494

495 *Note:* the standard JSON module explicitly calls `int.__repr(obj)__`

496 and `float.__repr(obj)__` to encode ints and floats, thereby bypassing

497 any custom representations you might have for objects that are subclasses

498 of ints and floats, and, for compatibility, JSON5 does the same thing.

499 To override this behavior, create a subclass of JSON5Encoder

500 that overrides `encode()` and handles your custom representation.

501

502 For example:

503

504 ```

505 >>> import json5

506 >>> from typing import Any, Set

507 >>>

508 >>> class Hex(int):

509 ... def __repr__(self):

510 ... return hex(self)

511 >>>

512 >>> class CustomEncoder(json5.JSON5Encoder):

513 ... def encode(

514 ... self, obj: Any, seen: Set, level: int, *, as_key: bool

515 ... ) -> str:

516 ... if isinstance(obj, Hex):

517 ... return repr(obj)

518 ... return super().encode(obj, seen, level, as_key=as_key)

519 ...

520 >>> json5.dumps([20, Hex(20)], cls=CustomEncoder)

521 '[20, 0x14]'

522

523 ```

524

525 *Note:* calling ``dumps(obj, quote_keys=True, trailing_commas=False, \

526 allow_duplicate_keys=True)``

527 should produce exactly the same output as ``json.dumps(obj).``

528 """

529

530 cls = cls or JSON5Encoder

531 enc = cls(

532 skipkeys=skipkeys,

533 ensure_ascii=ensure_ascii,

534 check_circular=check_circular,

535 allow_nan=allow_nan,

536 indent=indent,

537 separators=separators,

538 default=default,

539 sort_keys=sort_keys,

540 quote_keys=quote_keys,

541 trailing_commas=trailing_commas,

542 allow_duplicate_keys=allow_duplicate_keys,

543 quote_style=quote_style,

544 **kw,

545 )

546 return enc.encode(obj, seen=set(), level=0, as_key=False)

547

548

549class JSON5Encoder:

550 def __init__(

551 self,

552 *,

553 skipkeys: bool = False,

554 ensure_ascii: bool = True,

555 check_circular: bool = True,

556 allow_nan: bool = True,

557 indent: Optional[Union[int, str]] = None,

558 separators: Optional[Tuple[str, str]] = None,

559 default: Optional[Callable[[Any], Any]] = None,

560 sort_keys: bool = False,

561 quote_keys: bool = False,

562 trailing_commas: bool = True,

563 allow_duplicate_keys: bool = True,

564 quote_style: QuoteStyle = QuoteStyle.ALWAYS_DOUBLE,

565 **kw,

566 ):

567 """Provides a class that may be overridden to customize the behavior

568 of `dumps()`. The keyword args are the same as for that function.

569 *Added in version 0.10.0"""

570 # Ignore unrecognized keyword arguments in the hope of providing

571 # some level of backwards- and forwards-compatibility.

572 del kw

573

574 self.skipkeys = skipkeys

575 self.ensure_ascii = ensure_ascii

576 self.check_circular = check_circular

577 self.allow_nan = allow_nan

578 self.indent = indent

579 self.separators = separators

580 if separators is None:

581 separators = (', ', ': ') if indent is None else (',', ': ')

582 self.item_separator, self.kv_separator = separators

583 self.default_fn = default or _raise_type_error

584 self.sort_keys = sort_keys

585 self.quote_keys = quote_keys

586 self.trailing_commas = trailing_commas

587 self.allow_duplicate_keys = allow_duplicate_keys

588 self.quote_style = quote_style

589

590 def default(self, obj: Any) -> Any:

591 """Provides a last-ditch option to encode a value that the encoder

592 doesn't otherwise recognize, by converting `obj` to a value that

593 *can* (and will) be serialized by the other methods in the class.

594

595 Note: this must not return a serialized value (i.e., string)

596 directly, as that'll result in a doubly-encoded value."""

597 return self.default_fn(obj)

598

599 def encode(

600 self,

601 obj: Any,

602 seen: Set,

603 level: int,

604 *,

605 as_key: bool,

606 ) -> str:

607 """Returns an JSON5-encoded version of an arbitrary object. This can

608 be used to provide customized serialization of objects. Overridden

609 methods of this class should handle their custom objects and then

610 fall back to super.encode() if they've been passed a normal object.

611

612 `seen` is used for duplicate object tracking when `check_circular`

613 is True.

614

615 `level` represents the current indentation level, which increases

616 by one for each recursive invocation of encode (i.e., whenever

617 we're encoding the values of a dict or a list).

618

619 May raise `TypeError` if the object is the wrong type to be

620 encoded (i.e., your custom routine can't handle it either), and

621 `ValueError` if there's something wrong with the value, e.g.

622 a float value of NaN when `allow_nan` is false.

623

624 If `as_key` is true, the return value should be a double-quoted string

625 representation of the object, unless obj is a string that can be an

626 identifier (and quote_keys is false and obj isn't a reserved word).

627 If the object should not be used as a key, `TypeError` should be

628 raised; that allows the base implementation to implement `skipkeys`

629 properly.

630 """

631 seen = seen or set()

632 s = self._encode_basic_type(obj, as_key=as_key)

633 if s is not None:

634 return s

635

636 if as_key:

637 raise TypeError(f'Invalid key f{obj}')

638 return self._encode_non_basic_type(obj, seen, level)

639

640 def _encode_basic_type(self, obj: Any, *, as_key: bool) -> Optional[str]:

641 """Returns None if the object is not a basic type."""

642

643 if isinstance(obj, str):

644 return self._encode_str(obj, as_key=as_key)

645

646 # Check for True/False before ints because True and False are

647 # also considered ints and so would be represented as 1 and 0

648 # if we did ints first.

649 if obj is True:

650 return '"true"' if as_key else 'true'

651 if obj is False:

652 return '"false"' if as_key else 'false'

653 if obj is None:

654 return '"null"' if as_key else 'null'

655

656 if isinstance(obj, int):

657 return self._encode_int(obj, as_key=as_key)

658

659 if isinstance(obj, float):

660 return self._encode_float(obj, as_key=as_key)

661

662 return None

663

664 def _encode_int(self, obj: int, *, as_key: bool) -> str:

665 s = int.__repr__(obj)

666 return f'"{s}"' if as_key else s

667

668 def _encode_float(self, obj: float, *, as_key: bool) -> str:

669 if obj == float('inf'):

670 allowed = self.allow_nan

671 s = 'Infinity'

672 elif obj == float('-inf'):

673 allowed = self.allow_nan

674 s = '-Infinity'

675 elif math.isnan(obj):

676 allowed = self.allow_nan

677 s = 'NaN'

678 else:

679 allowed = True

680 s = float.__repr__(obj)

681

682 if not allowed:

683 raise ValueError('Illegal JSON5 value: f{obj}')

684 return f'"{s}"' if as_key else s

685

686 def _encode_str(self, obj: str, *, as_key: bool) -> str:

687 if (

688 as_key

689 and self.is_identifier(obj)

690 and not self.quote_keys

691 and not self.is_reserved_word(obj)

692 ):

693 return obj

694

695 return self._encode_quoted_str(obj, self.quote_style)

696

697 def _encode_quoted_str(self, obj: str, quote_style: QuoteStyle) -> str:

698 """Returns a quoted string with a minimal number of escaped quotes."""

699 ret = []

700 double_quotes_seen = 0

701 single_quotes_seen = 0

702 sq = "'"

703 dq = '"'

704 for ch in obj:

705 if ch == dq:

706 # At first we will guess at which quotes to escape. If

707 # we guess wrong, we reencode the string below.

708 double_quotes_seen += 1

709 if quote_style in (

710 QuoteStyle.ALWAYS_DOUBLE,

711 QuoteStyle.PREFER_DOUBLE,

712 ):

713 encoded_ch = self._escape_ch(dq)

714 else:

715 encoded_ch = dq

716 elif ch == sq:

717 single_quotes_seen += 1

718 if quote_style in (

719 QuoteStyle.ALWAYS_SINGLE,

720 QuoteStyle.PREFER_SINGLE,

721 ):

722 encoded_ch = self._escape_ch(sq)

723 else:

724 encoded_ch = sq

725 elif ch == '\\':

726 encoded_ch = self._escape_ch(ch)

727 else:

728 o = ord(ch)

729 if o < 32:

730 encoded_ch = self._escape_ch(ch)

731 elif o < 128:

732 encoded_ch = ch

733 elif not self.ensure_ascii and ch not in ('\u2028', '\u2029'):

734 encoded_ch = ch

735 else:

736 encoded_ch = self._escape_ch(ch)

737 ret.append(encoded_ch)

738

739 # We may have guessed wrong and need to reencode the string.

740 if (

741 double_quotes_seen > single_quotes_seen

742 and quote_style == QuoteStyle.PREFER_DOUBLE

743 ):

744 return self._encode_quoted_str(obj, QuoteStyle.ALWAYS_SINGLE)

745 if (

746 single_quotes_seen > double_quotes_seen

747 and quote_style == QuoteStyle.PREFER_SINGLE

748 ):

749 return self._encode_quoted_str(obj, QuoteStyle.ALWAYS_DOUBLE)

750

751 if quote_style in (QuoteStyle.ALWAYS_DOUBLE, QuoteStyle.PREFER_DOUBLE):

752 return '"' + ''.join(ret) + '"'

753 return "'" + ''.join(ret) + "'"

754

755 def _escape_ch(self, ch: str) -> str:

756 """Returns the backslash-escaped representation of the char."""

757 if ch == '\\':

758 return '\\\\'

759 if ch == "'":

760 return r'\''

761 if ch == '"':

762 return r'\"'

763 if ch == '\n':

764 return r'\n'

765 if ch == '\r':

766 return r'\r'

767 if ch == '\t':

768 return r'\t'

769 if ch == '\b':

770 return r'\b'

771 if ch == '\f':

772 return r'\f'

773 if ch == '\v':

774 return r'\v'

775 if ch == '\0':

776 return r'\0'

777

778 o = ord(ch)

779 if o < 65536:

780 return rf'\u{o:04x}'

781

782 val = o - 0x10000

783 high = 0xD800 + (val >> 10)

784 low = 0xDC00 + (val & 0x3FF)

785 return rf'\u{high:04x}\u{low:04x}'

786

787 def _encode_non_basic_type(self, obj, seen: Set, level: int) -> str:

788 # Basic types can't be recursive so we only check for circularity

789 # on non-basic types. If for some reason the caller was using a

790 # subclass of a basic type and wanted to check circularity on it,

791 # it'd have to do so directly in a subclass of JSON5Encoder.

792 if self.check_circular:

793 i = id(obj)

794 if i in seen:

795 raise ValueError('Circular reference detected.')

796 seen.add(i)

797

798 # Ideally we'd use collections.abc.Mapping and collections.abc.Sequence

799 # here, but for backwards-compatibility with potential old callers,

800 # we only check for the two attributes we need in each case.

801 if hasattr(obj, 'keys') and hasattr(obj, '__getitem__'):

802 s = self._encode_dict(obj, seen, level + 1)

803 elif hasattr(obj, '__getitem__') and hasattr(obj, '__iter__'):

804 s = self._encode_array(obj, seen, level + 1)

805 else:

806 s = self.encode(self.default(obj), seen, level + 1, as_key=False)

807 assert s is not None

808

809 if self.check_circular:

810 seen.remove(i)

811 return s

812

813 def _encode_dict(self, obj: Any, seen: set, level: int) -> str:

814 if not obj:

815 return '{}'

816

817 indent_str, end_str = self._spacers(level)

818 item_sep = self.item_separator + indent_str

819 kv_sep = self.kv_separator

820

821 if self.sort_keys:

822 keys = sorted(obj.keys())

823 else:

824 keys = obj.keys()

825

826 s = '{' + indent_str

827

828 first_key = True

829 new_keys = set()

830 for key in keys:

831 try:

832 key_str = self.encode(key, seen, level, as_key=True)

833 except TypeError:

834 if self.skipkeys:

835 continue

836 raise

837

838 if not self.allow_duplicate_keys:

839 if key_str in new_keys:

840 raise ValueError(f'duplicate key {repr(key)}')

841 new_keys.add(key_str)

842

843 if first_key:

844 first_key = False

845 else:

846 s += item_sep

847

848 val_str = self.encode(obj[key], seen, level, as_key=False)

849 s += key_str + kv_sep + val_str

850

851 s += end_str + '}'

852 return s

853

854 def _encode_array(self, obj: Any, seen: Set, level: int) -> str:

855 if not obj:

856 return '[]'

857

858 indent_str, end_str = self._spacers(level)

859 item_sep = self.item_separator + indent_str

860 return (

861 '['

862 + indent_str

863 + item_sep.join(

864 self.encode(el, seen, level, as_key=False) for el in obj

865 )

866 + end_str

867 + ']'

868 )

869

870 def _spacers(self, level: int) -> Tuple[str, str]:

871 if self.indent is not None:

872 end_str = ''

873 if self.trailing_commas:

874 end_str = ','

875 if isinstance(self.indent, int):

876 if self.indent > 0:

877 indent_str = '\n' + ' ' * self.indent * level

878 end_str += '\n' + ' ' * self.indent * (level - 1)

879 else:

880 indent_str = '\n'

881 end_str += '\n'

882 else:

883 indent_str = '\n' + self.indent * level

884 end_str += '\n' + self.indent * (level - 1)

885 else:

886 indent_str = ''

887 end_str = ''

888 return indent_str, end_str

889

890 def is_identifier(self, key: str) -> bool:

891 """Returns whether the string could be used as a legal

892 EcmaScript/JavaScript identifier.

893

894 There should normally be no reason to override this, unless

895 the definition of identifiers change in later versions of the

896 JSON5 spec and this implementation hasn't been updated to handle

897 the changes yet."""

898 if (

899 not key

900 or not self._is_id_start(key[0])

901 and key[0] not in ('$', '_')

902 ):

903 return False

904 for ch in key[1:]:

905 if not self._is_id_continue(ch) and ch not in ('$', '_'):

906 return False

907 return True

908

909 def _is_id_start(self, ch: str) -> bool:

910 return unicodedata.category(ch) in (

911 'Lu',

912 'Ll',

913 'Li',

914 'Lt',

915 'Lm',

916 'Lo',

917 'Nl',

918 )

919

920 def _is_id_continue(self, ch: str) -> bool:

921 return unicodedata.category(ch) in (

922 'Lu',

923 'Ll',

924 'Li',

925 'Lt',

926 'Lm',

927 'Lo',

928 'Nl',

929 'Nd',

930 'Mn',

931 'Mc',

932 'Pc',

933 )

934

935 def is_reserved_word(self, key: str) -> bool:

936 """Returns whether the key is a reserved word.

937

938 There should normally be no need to override this, unless there

939 have been reserved words added in later versions of the JSON5

940 spec and this implementation has not yet been updated to handle

941 the changes yet."""

942 global _reserved_word_re

943 if _reserved_word_re is None:

944 # List taken from section 7.6.1 of ECMA-262, version 5.1.

945 # https://262.ecma-international.org/5.1/#sec-7.6.1.

946 # This includes currently reserved words, words reserved

947 # for future use (both as of 5.1), null, true, and false.

948 _reserved_word_re = re.compile(

949 '('

950 + '|'.join(

951 [

952 'break',

953 'case',

954 'catch',

955 'class',

956 'const',

957 'continue',

958 'debugger',

959 'default',

960 'delete',

961 'do',

962 'else',

963 'enum',

964 'export',

965 'extends',

966 'false',

967 'finally',

968 'for',

969 'function',

970 'if',

971 'implements',

972 'import',

973 'in',

974 'instanceof',

975 'interface',

976 'let',

977 'new',

978 'null',

979 'package',

980 'private',

981 'protected',

982 'public',

983 'return',

984 'static',

985 'super',

986 'switch',

987 'this',

988 'throw',

989 'true',

990 'try',

991 'typeof',

992 'var',

993 'void',

994 'while',

995 'with',

996 'yield',

997 ]

998 )

999 + ')$'

1000 )

1001 return _reserved_word_re.match(key) is not None

1002

1003

1004def _raise_type_error(obj) -> Any:

1005 raise TypeError(f'{repr(obj)} is not JSON5 serializable')