Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/simplejson-3.18.4-py3.8-linux-x86_64.egg/simplejson/decoder.py: 27%
227 statements
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-26 06:04 +0000
« prev ^ index » next coverage.py v7.2.2, created at 2023-03-26 06:04 +0000
1"""Implementation of JSONDecoder
2"""
3from __future__ import absolute_import
4import re
5import sys
6import struct
7from .compat import PY3, unichr
8from .scanner import make_scanner, JSONDecodeError
10def _import_c_scanstring():
11 try:
12 from ._speedups import scanstring
13 return scanstring
14 except ImportError:
15 return None
16c_scanstring = _import_c_scanstring()
18# NOTE (3.1.0): JSONDecodeError may still be imported from this module for
19# compatibility, but it was never in the __all__
20__all__ = ['JSONDecoder']
22FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
24def _floatconstants():
25 if sys.version_info < (2, 6):
26 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
27 nan, inf = struct.unpack('>dd', _BYTES)
28 else:
29 nan = float('nan')
30 inf = float('inf')
31 return nan, inf, -inf
33NaN, PosInf, NegInf = _floatconstants()
35_CONSTANTS = {
36 '-Infinity': NegInf,
37 'Infinity': PosInf,
38 'NaN': NaN,
39}
41STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
42BACKSLASH = {
43 '"': u'"', '\\': u'\\', '/': u'/',
44 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
45}
47DEFAULT_ENCODING = "utf-8"
49def py_scanstring(s, end, encoding=None, strict=True,
50 _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join,
51 _PY3=PY3, _maxunicode=sys.maxunicode):
52 """Scan the string s for a JSON string. End is the index of the
53 character in s after the quote that started the JSON string.
54 Unescapes all valid JSON string escape sequences and raises ValueError
55 on attempt to decode an invalid string. If strict is False then literal
56 control characters are allowed in the string.
58 Returns a tuple of the decoded string and the index of the character in s
59 after the end quote."""
60 if encoding is None:
61 encoding = DEFAULT_ENCODING
62 chunks = []
63 _append = chunks.append
64 begin = end - 1
65 while 1:
66 chunk = _m(s, end)
67 if chunk is None:
68 raise JSONDecodeError(
69 "Unterminated string starting at", s, begin)
70 end = chunk.end()
71 content, terminator = chunk.groups()
72 # Content is contains zero or more unescaped string characters
73 if content:
74 if not _PY3 and not isinstance(content, unicode):
75 content = unicode(content, encoding)
76 _append(content)
77 # Terminator is the end of string, a literal control character,
78 # or a backslash denoting that an escape sequence follows
79 if terminator == '"':
80 break
81 elif terminator != '\\':
82 if strict:
83 msg = "Invalid control character %r at"
84 raise JSONDecodeError(msg, s, end)
85 else:
86 _append(terminator)
87 continue
88 try:
89 esc = s[end]
90 except IndexError:
91 raise JSONDecodeError(
92 "Unterminated string starting at", s, begin)
93 # If not a unicode escape sequence, must be in the lookup table
94 if esc != 'u':
95 try:
96 char = _b[esc]
97 except KeyError:
98 msg = "Invalid \\X escape sequence %r"
99 raise JSONDecodeError(msg, s, end)
100 end += 1
101 else:
102 # Unicode escape sequence
103 msg = "Invalid \\uXXXX escape sequence"
104 esc = s[end + 1:end + 5]
105 escX = esc[1:2]
106 if len(esc) != 4 or escX == 'x' or escX == 'X':
107 raise JSONDecodeError(msg, s, end - 1)
108 try:
109 uni = int(esc, 16)
110 except ValueError:
111 raise JSONDecodeError(msg, s, end - 1)
112 if uni < 0 or uni > _maxunicode:
113 raise JSONDecodeError(msg, s, end - 1)
114 end += 5
115 # Check for surrogate pair on UCS-4 systems
116 # Note that this will join high/low surrogate pairs
117 # but will also pass unpaired surrogates through
118 if (_maxunicode > 65535 and
119 uni & 0xfc00 == 0xd800 and
120 s[end:end + 2] == '\\u'):
121 esc2 = s[end + 2:end + 6]
122 escX = esc2[1:2]
123 if len(esc2) == 4 and not (escX == 'x' or escX == 'X'):
124 try:
125 uni2 = int(esc2, 16)
126 except ValueError:
127 raise JSONDecodeError(msg, s, end)
128 if uni2 & 0xfc00 == 0xdc00:
129 uni = 0x10000 + (((uni - 0xd800) << 10) |
130 (uni2 - 0xdc00))
131 end += 6
132 char = unichr(uni)
133 # Append the unescaped character
134 _append(char)
135 return _join(chunks), end
138# Use speedup if available
139scanstring = c_scanstring or py_scanstring
141WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
142WHITESPACE_STR = ' \t\n\r'
144def JSONObject(state, encoding, strict, scan_once, object_hook,
145 object_pairs_hook, memo=None,
146 _w=WHITESPACE.match, _ws=WHITESPACE_STR):
147 (s, end) = state
148 # Backwards compatibility
149 if memo is None:
150 memo = {}
151 memo_get = memo.setdefault
152 pairs = []
153 # Use a slice to prevent IndexError from being raised, the following
154 # check will raise a more specific ValueError if the string is empty
155 nextchar = s[end:end + 1]
156 # Normally we expect nextchar == '"'
157 if nextchar != '"':
158 if nextchar in _ws:
159 end = _w(s, end).end()
160 nextchar = s[end:end + 1]
161 # Trivial empty object
162 if nextchar == '}':
163 if object_pairs_hook is not None:
164 result = object_pairs_hook(pairs)
165 return result, end + 1
166 pairs = {}
167 if object_hook is not None:
168 pairs = object_hook(pairs)
169 return pairs, end + 1
170 elif nextchar != '"':
171 raise JSONDecodeError(
172 "Expecting property name enclosed in double quotes",
173 s, end)
174 end += 1
175 while True:
176 key, end = scanstring(s, end, encoding, strict)
177 key = memo_get(key, key)
179 # To skip some function call overhead we optimize the fast paths where
180 # the JSON key separator is ": " or just ":".
181 if s[end:end + 1] != ':':
182 end = _w(s, end).end()
183 if s[end:end + 1] != ':':
184 raise JSONDecodeError("Expecting ':' delimiter", s, end)
186 end += 1
188 try:
189 if s[end] in _ws:
190 end += 1
191 if s[end] in _ws:
192 end = _w(s, end + 1).end()
193 except IndexError:
194 pass
196 value, end = scan_once(s, end)
197 pairs.append((key, value))
199 try:
200 nextchar = s[end]
201 if nextchar in _ws:
202 end = _w(s, end + 1).end()
203 nextchar = s[end]
204 except IndexError:
205 nextchar = ''
206 end += 1
208 if nextchar == '}':
209 break
210 elif nextchar != ',':
211 raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1)
213 try:
214 nextchar = s[end]
215 if nextchar in _ws:
216 end += 1
217 nextchar = s[end]
218 if nextchar in _ws:
219 end = _w(s, end + 1).end()
220 nextchar = s[end]
221 except IndexError:
222 nextchar = ''
224 end += 1
225 if nextchar != '"':
226 raise JSONDecodeError(
227 "Expecting property name enclosed in double quotes",
228 s, end - 1)
230 if object_pairs_hook is not None:
231 result = object_pairs_hook(pairs)
232 return result, end
233 pairs = dict(pairs)
234 if object_hook is not None:
235 pairs = object_hook(pairs)
236 return pairs, end
238def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
239 (s, end) = state
240 values = []
241 nextchar = s[end:end + 1]
242 if nextchar in _ws:
243 end = _w(s, end + 1).end()
244 nextchar = s[end:end + 1]
245 # Look-ahead for trivial empty array
246 if nextchar == ']':
247 return values, end + 1
248 elif nextchar == '':
249 raise JSONDecodeError("Expecting value or ']'", s, end)
250 _append = values.append
251 while True:
252 value, end = scan_once(s, end)
253 _append(value)
254 nextchar = s[end:end + 1]
255 if nextchar in _ws:
256 end = _w(s, end + 1).end()
257 nextchar = s[end:end + 1]
258 end += 1
259 if nextchar == ']':
260 break
261 elif nextchar != ',':
262 raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1)
264 try:
265 if s[end] in _ws:
266 end += 1
267 if s[end] in _ws:
268 end = _w(s, end + 1).end()
269 except IndexError:
270 pass
272 return values, end
274class JSONDecoder(object):
275 """Simple JSON <http://json.org> decoder
277 Performs the following translations in decoding by default:
279 +---------------+-------------------+
280 | JSON | Python |
281 +===============+===================+
282 | object | dict |
283 +---------------+-------------------+
284 | array | list |
285 +---------------+-------------------+
286 | string | str, unicode |
287 +---------------+-------------------+
288 | number (int) | int, long |
289 +---------------+-------------------+
290 | number (real) | float |
291 +---------------+-------------------+
292 | true | True |
293 +---------------+-------------------+
294 | false | False |
295 +---------------+-------------------+
296 | null | None |
297 +---------------+-------------------+
299 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
300 their corresponding ``float`` values, which is outside the JSON spec.
302 """
304 def __init__(self, encoding=None, object_hook=None, parse_float=None,
305 parse_int=None, parse_constant=None, strict=True,
306 object_pairs_hook=None):
307 """
308 *encoding* determines the encoding used to interpret any
309 :class:`str` objects decoded by this instance (``'utf-8'`` by
310 default). It has no effect when decoding :class:`unicode` objects.
312 Note that currently only encodings that are a superset of ASCII work,
313 strings of other encodings should be passed in as :class:`unicode`.
315 *object_hook*, if specified, will be called with the result of every
316 JSON object decoded and its return value will be used in place of the
317 given :class:`dict`. This can be used to provide custom
318 deserializations (e.g. to support JSON-RPC class hinting).
320 *object_pairs_hook* is an optional function that will be called with
321 the result of any object literal decode with an ordered list of pairs.
322 The return value of *object_pairs_hook* will be used instead of the
323 :class:`dict`. This feature can be used to implement custom decoders
324 that rely on the order that the key and value pairs are decoded (for
325 example, :func:`collections.OrderedDict` will remember the order of
326 insertion). If *object_hook* is also defined, the *object_pairs_hook*
327 takes priority.
329 *parse_float*, if specified, will be called with the string of every
330 JSON float to be decoded. By default, this is equivalent to
331 ``float(num_str)``. This can be used to use another datatype or parser
332 for JSON floats (e.g. :class:`decimal.Decimal`).
334 *parse_int*, if specified, will be called with the string of every
335 JSON int to be decoded. By default, this is equivalent to
336 ``int(num_str)``. This can be used to use another datatype or parser
337 for JSON integers (e.g. :class:`float`).
339 *parse_constant*, if specified, will be called with one of the
340 following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
341 can be used to raise an exception if invalid JSON numbers are
342 encountered.
344 *strict* controls the parser's behavior when it encounters an
345 invalid control character in a string. The default setting of
346 ``True`` means that unescaped control characters are parse errors, if
347 ``False`` then control characters will be allowed in strings.
349 """
350 if encoding is None:
351 encoding = DEFAULT_ENCODING
352 self.encoding = encoding
353 self.object_hook = object_hook
354 self.object_pairs_hook = object_pairs_hook
355 self.parse_float = parse_float or float
356 self.parse_int = parse_int or int
357 self.parse_constant = parse_constant or _CONSTANTS.__getitem__
358 self.strict = strict
359 self.parse_object = JSONObject
360 self.parse_array = JSONArray
361 self.parse_string = scanstring
362 self.memo = {}
363 self.scan_once = make_scanner(self)
365 def decode(self, s, _w=WHITESPACE.match, _PY3=PY3):
366 """Return the Python representation of ``s`` (a ``str`` or ``unicode``
367 instance containing a JSON document)
369 """
370 if _PY3 and isinstance(s, bytes):
371 s = str(s, self.encoding)
372 obj, end = self.raw_decode(s)
373 end = _w(s, end).end()
374 if end != len(s):
375 raise JSONDecodeError("Extra data", s, end, len(s))
376 return obj
378 def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3):
379 """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
380 beginning with a JSON document) and return a 2-tuple of the Python
381 representation and the index in ``s`` where the document ended.
382 Optionally, ``idx`` can be used to specify an offset in ``s`` where
383 the JSON document begins.
385 This can be used to decode a JSON document from a string that may
386 have extraneous data at the end.
388 """
389 if idx < 0:
390 # Ensure that raw_decode bails on negative indexes, the regex
391 # would otherwise mask this behavior. #98
392 raise JSONDecodeError('Expecting value', s, idx)
393 if _PY3 and not isinstance(s, str):
394 raise TypeError("Input string must be text, not bytes")
395 # strip UTF-8 bom
396 if len(s) > idx:
397 ord0 = ord(s[idx])
398 if ord0 == 0xfeff:
399 idx += 1
400 elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf':
401 idx += 3
402 return self.scan_once(s, idx=_w(s, idx).end())