Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/simplejson/decoder.py: 25%
226 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:20 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:20 +0000
1"""Implementation of JSONDecoder
2"""
3from __future__ import absolute_import
4import re
5import sys
6import struct
7from .compat import PY3, unichr
8from .scanner import make_scanner, JSONDecodeError
10def _import_c_scanstring():
11 try:
12 from ._speedups import scanstring
13 return scanstring
14 except ImportError:
15 return None
16c_scanstring = _import_c_scanstring()
18# NOTE (3.1.0): JSONDecodeError may still be imported from this module for
19# compatibility, but it was never in the __all__
20__all__ = ['JSONDecoder']
22FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
24def _floatconstants():
25 if sys.version_info < (2, 6):
26 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
27 nan, inf = struct.unpack('>dd', _BYTES)
28 else:
29 nan = float('nan')
30 inf = float('inf')
31 return nan, inf, -inf
33NaN, PosInf, NegInf = _floatconstants()
35_CONSTANTS = {
36 '-Infinity': NegInf,
37 'Infinity': PosInf,
38 'NaN': NaN,
39}
41STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
42BACKSLASH = {
43 '"': u'"', '\\': u'\\', '/': u'/',
44 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
45}
47DEFAULT_ENCODING = "utf-8"
49if hasattr(sys, 'get_int_max_str_digits'):
50 bounded_int = int
51else:
52 def bounded_int(s, INT_MAX_STR_DIGITS=4300):
53 """Backport of the integer string length conversion limitation
55 https://docs.python.org/3/library/stdtypes.html#int-max-str-digits
56 """
57 if len(s) > INT_MAX_STR_DIGITS:
58 raise ValueError("Exceeds the limit (%s) for integer string conversion: value has %s digits" % (INT_MAX_STR_DIGITS, len(s)))
59 return int(s)
62def scan_four_digit_hex(s, end, _m=re.compile(r'^[0-9a-fA-F]{4}$').match):
63 """Scan a four digit hex number from s[end:end + 4]
64 """
65 msg = "Invalid \\uXXXX escape sequence"
66 esc = s[end:end + 4]
67 if not _m(esc):
68 raise JSONDecodeError(msg, s, end - 2)
69 try:
70 return int(esc, 16), end + 4
71 except ValueError:
72 raise JSONDecodeError(msg, s, end - 2)
74def py_scanstring(s, end, encoding=None, strict=True,
75 _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join,
76 _PY3=PY3, _maxunicode=sys.maxunicode,
77 _scan_four_digit_hex=scan_four_digit_hex):
78 """Scan the string s for a JSON string. End is the index of the
79 character in s after the quote that started the JSON string.
80 Unescapes all valid JSON string escape sequences and raises ValueError
81 on attempt to decode an invalid string. If strict is False then literal
82 control characters are allowed in the string.
84 Returns a tuple of the decoded string and the index of the character in s
85 after the end quote."""
86 if encoding is None:
87 encoding = DEFAULT_ENCODING
88 chunks = []
89 _append = chunks.append
90 begin = end - 1
91 while 1:
92 chunk = _m(s, end)
93 if chunk is None:
94 raise JSONDecodeError(
95 "Unterminated string starting at", s, begin)
96 prev_end = end
97 end = chunk.end()
98 content, terminator = chunk.groups()
99 # Content is contains zero or more unescaped string characters
100 if content:
101 if not _PY3 and not isinstance(content, unicode):
102 content = unicode(content, encoding)
103 _append(content)
104 # Terminator is the end of string, a literal control character,
105 # or a backslash denoting that an escape sequence follows
106 if terminator == '"':
107 break
108 elif terminator != '\\':
109 if strict:
110 msg = "Invalid control character %r at"
111 raise JSONDecodeError(msg, s, prev_end)
112 else:
113 _append(terminator)
114 continue
115 try:
116 esc = s[end]
117 except IndexError:
118 raise JSONDecodeError(
119 "Unterminated string starting at", s, begin)
120 # If not a unicode escape sequence, must be in the lookup table
121 if esc != 'u':
122 try:
123 char = _b[esc]
124 except KeyError:
125 msg = "Invalid \\X escape sequence %r"
126 raise JSONDecodeError(msg, s, end)
127 end += 1
128 else:
129 # Unicode escape sequence
130 uni, end = _scan_four_digit_hex(s, end + 1)
131 # Check for surrogate pair on UCS-4 systems
132 # Note that this will join high/low surrogate pairs
133 # but will also pass unpaired surrogates through
134 if (_maxunicode > 65535 and
135 uni & 0xfc00 == 0xd800 and
136 s[end:end + 2] == '\\u'):
137 uni2, end2 = _scan_four_digit_hex(s, end + 2)
138 if uni2 & 0xfc00 == 0xdc00:
139 uni = 0x10000 + (((uni - 0xd800) << 10) |
140 (uni2 - 0xdc00))
141 end = end2
142 char = unichr(uni)
143 # Append the unescaped character
144 _append(char)
145 return _join(chunks), end
148# Use speedup if available
149scanstring = c_scanstring or py_scanstring
151WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
152WHITESPACE_STR = ' \t\n\r'
154def JSONObject(state, encoding, strict, scan_once, object_hook,
155 object_pairs_hook, memo=None,
156 _w=WHITESPACE.match, _ws=WHITESPACE_STR):
157 (s, end) = state
158 # Backwards compatibility
159 if memo is None:
160 memo = {}
161 memo_get = memo.setdefault
162 pairs = []
163 # Use a slice to prevent IndexError from being raised, the following
164 # check will raise a more specific ValueError if the string is empty
165 nextchar = s[end:end + 1]
166 # Normally we expect nextchar == '"'
167 if nextchar != '"':
168 if nextchar in _ws:
169 end = _w(s, end).end()
170 nextchar = s[end:end + 1]
171 # Trivial empty object
172 if nextchar == '}':
173 if object_pairs_hook is not None:
174 result = object_pairs_hook(pairs)
175 return result, end + 1
176 pairs = {}
177 if object_hook is not None:
178 pairs = object_hook(pairs)
179 return pairs, end + 1
180 elif nextchar != '"':
181 raise JSONDecodeError(
182 "Expecting property name enclosed in double quotes or '}'",
183 s, end)
184 end += 1
185 while True:
186 key, end = scanstring(s, end, encoding, strict)
187 key = memo_get(key, key)
189 # To skip some function call overhead we optimize the fast paths where
190 # the JSON key separator is ": " or just ":".
191 if s[end:end + 1] != ':':
192 end = _w(s, end).end()
193 if s[end:end + 1] != ':':
194 raise JSONDecodeError("Expecting ':' delimiter", s, end)
196 end += 1
198 try:
199 if s[end] in _ws:
200 end += 1
201 if s[end] in _ws:
202 end = _w(s, end + 1).end()
203 except IndexError:
204 pass
206 value, end = scan_once(s, end)
207 pairs.append((key, value))
209 try:
210 nextchar = s[end]
211 if nextchar in _ws:
212 end = _w(s, end + 1).end()
213 nextchar = s[end]
214 except IndexError:
215 nextchar = ''
216 end += 1
218 if nextchar == '}':
219 break
220 elif nextchar != ',':
221 raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1)
223 try:
224 nextchar = s[end]
225 if nextchar in _ws:
226 end += 1
227 nextchar = s[end]
228 if nextchar in _ws:
229 end = _w(s, end + 1).end()
230 nextchar = s[end]
231 except IndexError:
232 nextchar = ''
234 end += 1
235 if nextchar != '"':
236 raise JSONDecodeError(
237 "Expecting property name enclosed in double quotes",
238 s, end - 1)
240 if object_pairs_hook is not None:
241 result = object_pairs_hook(pairs)
242 return result, end
243 pairs = dict(pairs)
244 if object_hook is not None:
245 pairs = object_hook(pairs)
246 return pairs, end
248def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
249 (s, end) = state
250 values = []
251 nextchar = s[end:end + 1]
252 if nextchar in _ws:
253 end = _w(s, end + 1).end()
254 nextchar = s[end:end + 1]
255 # Look-ahead for trivial empty array
256 if nextchar == ']':
257 return values, end + 1
258 elif nextchar == '':
259 raise JSONDecodeError("Expecting value or ']'", s, end)
260 _append = values.append
261 while True:
262 value, end = scan_once(s, end)
263 _append(value)
264 nextchar = s[end:end + 1]
265 if nextchar in _ws:
266 end = _w(s, end + 1).end()
267 nextchar = s[end:end + 1]
268 end += 1
269 if nextchar == ']':
270 break
271 elif nextchar != ',':
272 raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1)
274 try:
275 if s[end] in _ws:
276 end += 1
277 if s[end] in _ws:
278 end = _w(s, end + 1).end()
279 except IndexError:
280 pass
282 return values, end
284class JSONDecoder(object):
285 """Simple JSON <http://json.org> decoder
287 Performs the following translations in decoding by default:
289 +---------------+-------------------+
290 | JSON | Python |
291 +===============+===================+
292 | object | dict |
293 +---------------+-------------------+
294 | array | list |
295 +---------------+-------------------+
296 | string | str, unicode |
297 +---------------+-------------------+
298 | number (int) | int, long |
299 +---------------+-------------------+
300 | number (real) | float |
301 +---------------+-------------------+
302 | true | True |
303 +---------------+-------------------+
304 | false | False |
305 +---------------+-------------------+
306 | null | None |
307 +---------------+-------------------+
309 When allow_nan=True, it also understands
310 ``NaN``, ``Infinity``, and ``-Infinity`` as
311 their corresponding ``float`` values, which is outside the JSON spec.
313 """
315 def __init__(self, encoding=None, object_hook=None, parse_float=None,
316 parse_int=None, parse_constant=None, strict=True,
317 object_pairs_hook=None, allow_nan=False):
318 """
319 *encoding* determines the encoding used to interpret any
320 :class:`str` objects decoded by this instance (``'utf-8'`` by
321 default). It has no effect when decoding :class:`unicode` objects.
323 Note that currently only encodings that are a superset of ASCII work,
324 strings of other encodings should be passed in as :class:`unicode`.
326 *object_hook*, if specified, will be called with the result of every
327 JSON object decoded and its return value will be used in place of the
328 given :class:`dict`. This can be used to provide custom
329 deserializations (e.g. to support JSON-RPC class hinting).
331 *object_pairs_hook* is an optional function that will be called with
332 the result of any object literal decode with an ordered list of pairs.
333 The return value of *object_pairs_hook* will be used instead of the
334 :class:`dict`. This feature can be used to implement custom decoders
335 that rely on the order that the key and value pairs are decoded (for
336 example, :func:`collections.OrderedDict` will remember the order of
337 insertion). If *object_hook* is also defined, the *object_pairs_hook*
338 takes priority.
340 *parse_float*, if specified, will be called with the string of every
341 JSON float to be decoded. By default, this is equivalent to
342 ``float(num_str)``. This can be used to use another datatype or parser
343 for JSON floats (e.g. :class:`decimal.Decimal`).
345 *parse_int*, if specified, will be called with the string of every
346 JSON int to be decoded. By default, this is equivalent to
347 ``int(num_str)``. This can be used to use another datatype or parser
348 for JSON integers (e.g. :class:`float`).
350 *allow_nan*, if True (default false), will allow the parser to
351 accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``.
353 *parse_constant*, if specified, will be
354 called with one of the following strings: ``'-Infinity'``,
355 ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,
356 as it is rare to parse non-compliant JSON containing these values.
358 *strict* controls the parser's behavior when it encounters an
359 invalid control character in a string. The default setting of
360 ``True`` means that unescaped control characters are parse errors, if
361 ``False`` then control characters will be allowed in strings.
363 """
364 if encoding is None:
365 encoding = DEFAULT_ENCODING
366 self.encoding = encoding
367 self.object_hook = object_hook
368 self.object_pairs_hook = object_pairs_hook
369 self.parse_float = parse_float or float
370 self.parse_int = parse_int or bounded_int
371 self.parse_constant = parse_constant or (allow_nan and _CONSTANTS.__getitem__ or None)
372 self.strict = strict
373 self.parse_object = JSONObject
374 self.parse_array = JSONArray
375 self.parse_string = scanstring
376 self.memo = {}
377 self.scan_once = make_scanner(self)
379 def decode(self, s, _w=WHITESPACE.match, _PY3=PY3):
380 """Return the Python representation of ``s`` (a ``str`` or ``unicode``
381 instance containing a JSON document)
383 """
384 if _PY3 and isinstance(s, bytes):
385 s = str(s, self.encoding)
386 obj, end = self.raw_decode(s)
387 end = _w(s, end).end()
388 if end != len(s):
389 raise JSONDecodeError("Extra data", s, end, len(s))
390 return obj
392 def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3):
393 """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
394 beginning with a JSON document) and return a 2-tuple of the Python
395 representation and the index in ``s`` where the document ended.
396 Optionally, ``idx`` can be used to specify an offset in ``s`` where
397 the JSON document begins.
399 This can be used to decode a JSON document from a string that may
400 have extraneous data at the end.
402 """
403 if idx < 0:
404 # Ensure that raw_decode bails on negative indexes, the regex
405 # would otherwise mask this behavior. #98
406 raise JSONDecodeError('Expecting value', s, idx)
407 if _PY3 and not isinstance(s, str):
408 raise TypeError("Input string must be text, not bytes")
409 # strip UTF-8 bom
410 if len(s) > idx:
411 ord0 = ord(s[idx])
412 if ord0 == 0xfeff:
413 idx += 1
414 elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf':
415 idx += 3
416 return self.scan_once(s, idx=_w(s, idx).end())