Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/simplejson/decoder.py: 25%

226 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:20 +0000

1"""Implementation of JSONDecoder 

2""" 

3from __future__ import absolute_import 

4import re 

5import sys 

6import struct 

7from .compat import PY3, unichr 

8from .scanner import make_scanner, JSONDecodeError 

9 

10def _import_c_scanstring(): 

11 try: 

12 from ._speedups import scanstring 

13 return scanstring 

14 except ImportError: 

15 return None 

16c_scanstring = _import_c_scanstring() 

17 

18# NOTE (3.1.0): JSONDecodeError may still be imported from this module for 

19# compatibility, but it was never in the __all__ 

20__all__ = ['JSONDecoder'] 

21 

22FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 

23 

24def _floatconstants(): 

25 if sys.version_info < (2, 6): 

26 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') 

27 nan, inf = struct.unpack('>dd', _BYTES) 

28 else: 

29 nan = float('nan') 

30 inf = float('inf') 

31 return nan, inf, -inf 

32 

33NaN, PosInf, NegInf = _floatconstants() 

34 

35_CONSTANTS = { 

36 '-Infinity': NegInf, 

37 'Infinity': PosInf, 

38 'NaN': NaN, 

39} 

40 

41STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) 

42BACKSLASH = { 

43 '"': u'"', '\\': u'\\', '/': u'/', 

44 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', 

45} 

46 

47DEFAULT_ENCODING = "utf-8" 

48 

49if hasattr(sys, 'get_int_max_str_digits'): 

50 bounded_int = int 

51else: 

52 def bounded_int(s, INT_MAX_STR_DIGITS=4300): 

53 """Backport of the integer string length conversion limitation 

54 

55 https://docs.python.org/3/library/stdtypes.html#int-max-str-digits 

56 """ 

57 if len(s) > INT_MAX_STR_DIGITS: 

58 raise ValueError("Exceeds the limit (%s) for integer string conversion: value has %s digits" % (INT_MAX_STR_DIGITS, len(s))) 

59 return int(s) 

60 

61 

62def scan_four_digit_hex(s, end, _m=re.compile(r'^[0-9a-fA-F]{4}$').match): 

63 """Scan a four digit hex number from s[end:end + 4] 

64 """ 

65 msg = "Invalid \\uXXXX escape sequence" 

66 esc = s[end:end + 4] 

67 if not _m(esc): 

68 raise JSONDecodeError(msg, s, end - 2) 

69 try: 

70 return int(esc, 16), end + 4 

71 except ValueError: 

72 raise JSONDecodeError(msg, s, end - 2) 

73 

74def py_scanstring(s, end, encoding=None, strict=True, 

75 _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join, 

76 _PY3=PY3, _maxunicode=sys.maxunicode, 

77 _scan_four_digit_hex=scan_four_digit_hex): 

78 """Scan the string s for a JSON string. End is the index of the 

79 character in s after the quote that started the JSON string. 

80 Unescapes all valid JSON string escape sequences and raises ValueError 

81 on attempt to decode an invalid string. If strict is False then literal 

82 control characters are allowed in the string. 

83 

84 Returns a tuple of the decoded string and the index of the character in s 

85 after the end quote.""" 

86 if encoding is None: 

87 encoding = DEFAULT_ENCODING 

88 chunks = [] 

89 _append = chunks.append 

90 begin = end - 1 

91 while 1: 

92 chunk = _m(s, end) 

93 if chunk is None: 

94 raise JSONDecodeError( 

95 "Unterminated string starting at", s, begin) 

96 prev_end = end 

97 end = chunk.end() 

98 content, terminator = chunk.groups() 

99 # Content is contains zero or more unescaped string characters 

100 if content: 

101 if not _PY3 and not isinstance(content, unicode): 

102 content = unicode(content, encoding) 

103 _append(content) 

104 # Terminator is the end of string, a literal control character, 

105 # or a backslash denoting that an escape sequence follows 

106 if terminator == '"': 

107 break 

108 elif terminator != '\\': 

109 if strict: 

110 msg = "Invalid control character %r at" 

111 raise JSONDecodeError(msg, s, prev_end) 

112 else: 

113 _append(terminator) 

114 continue 

115 try: 

116 esc = s[end] 

117 except IndexError: 

118 raise JSONDecodeError( 

119 "Unterminated string starting at", s, begin) 

120 # If not a unicode escape sequence, must be in the lookup table 

121 if esc != 'u': 

122 try: 

123 char = _b[esc] 

124 except KeyError: 

125 msg = "Invalid \\X escape sequence %r" 

126 raise JSONDecodeError(msg, s, end) 

127 end += 1 

128 else: 

129 # Unicode escape sequence 

130 uni, end = _scan_four_digit_hex(s, end + 1) 

131 # Check for surrogate pair on UCS-4 systems 

132 # Note that this will join high/low surrogate pairs 

133 # but will also pass unpaired surrogates through 

134 if (_maxunicode > 65535 and 

135 uni & 0xfc00 == 0xd800 and 

136 s[end:end + 2] == '\\u'): 

137 uni2, end2 = _scan_four_digit_hex(s, end + 2) 

138 if uni2 & 0xfc00 == 0xdc00: 

139 uni = 0x10000 + (((uni - 0xd800) << 10) | 

140 (uni2 - 0xdc00)) 

141 end = end2 

142 char = unichr(uni) 

143 # Append the unescaped character 

144 _append(char) 

145 return _join(chunks), end 

146 

147 

148# Use speedup if available 

149scanstring = c_scanstring or py_scanstring 

150 

151WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) 

152WHITESPACE_STR = ' \t\n\r' 

153 

154def JSONObject(state, encoding, strict, scan_once, object_hook, 

155 object_pairs_hook, memo=None, 

156 _w=WHITESPACE.match, _ws=WHITESPACE_STR): 

157 (s, end) = state 

158 # Backwards compatibility 

159 if memo is None: 

160 memo = {} 

161 memo_get = memo.setdefault 

162 pairs = [] 

163 # Use a slice to prevent IndexError from being raised, the following 

164 # check will raise a more specific ValueError if the string is empty 

165 nextchar = s[end:end + 1] 

166 # Normally we expect nextchar == '"' 

167 if nextchar != '"': 

168 if nextchar in _ws: 

169 end = _w(s, end).end() 

170 nextchar = s[end:end + 1] 

171 # Trivial empty object 

172 if nextchar == '}': 

173 if object_pairs_hook is not None: 

174 result = object_pairs_hook(pairs) 

175 return result, end + 1 

176 pairs = {} 

177 if object_hook is not None: 

178 pairs = object_hook(pairs) 

179 return pairs, end + 1 

180 elif nextchar != '"': 

181 raise JSONDecodeError( 

182 "Expecting property name enclosed in double quotes or '}'", 

183 s, end) 

184 end += 1 

185 while True: 

186 key, end = scanstring(s, end, encoding, strict) 

187 key = memo_get(key, key) 

188 

189 # To skip some function call overhead we optimize the fast paths where 

190 # the JSON key separator is ": " or just ":". 

191 if s[end:end + 1] != ':': 

192 end = _w(s, end).end() 

193 if s[end:end + 1] != ':': 

194 raise JSONDecodeError("Expecting ':' delimiter", s, end) 

195 

196 end += 1 

197 

198 try: 

199 if s[end] in _ws: 

200 end += 1 

201 if s[end] in _ws: 

202 end = _w(s, end + 1).end() 

203 except IndexError: 

204 pass 

205 

206 value, end = scan_once(s, end) 

207 pairs.append((key, value)) 

208 

209 try: 

210 nextchar = s[end] 

211 if nextchar in _ws: 

212 end = _w(s, end + 1).end() 

213 nextchar = s[end] 

214 except IndexError: 

215 nextchar = '' 

216 end += 1 

217 

218 if nextchar == '}': 

219 break 

220 elif nextchar != ',': 

221 raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1) 

222 

223 try: 

224 nextchar = s[end] 

225 if nextchar in _ws: 

226 end += 1 

227 nextchar = s[end] 

228 if nextchar in _ws: 

229 end = _w(s, end + 1).end() 

230 nextchar = s[end] 

231 except IndexError: 

232 nextchar = '' 

233 

234 end += 1 

235 if nextchar != '"': 

236 raise JSONDecodeError( 

237 "Expecting property name enclosed in double quotes", 

238 s, end - 1) 

239 

240 if object_pairs_hook is not None: 

241 result = object_pairs_hook(pairs) 

242 return result, end 

243 pairs = dict(pairs) 

244 if object_hook is not None: 

245 pairs = object_hook(pairs) 

246 return pairs, end 

247 

248def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 

249 (s, end) = state 

250 values = [] 

251 nextchar = s[end:end + 1] 

252 if nextchar in _ws: 

253 end = _w(s, end + 1).end() 

254 nextchar = s[end:end + 1] 

255 # Look-ahead for trivial empty array 

256 if nextchar == ']': 

257 return values, end + 1 

258 elif nextchar == '': 

259 raise JSONDecodeError("Expecting value or ']'", s, end) 

260 _append = values.append 

261 while True: 

262 value, end = scan_once(s, end) 

263 _append(value) 

264 nextchar = s[end:end + 1] 

265 if nextchar in _ws: 

266 end = _w(s, end + 1).end() 

267 nextchar = s[end:end + 1] 

268 end += 1 

269 if nextchar == ']': 

270 break 

271 elif nextchar != ',': 

272 raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1) 

273 

274 try: 

275 if s[end] in _ws: 

276 end += 1 

277 if s[end] in _ws: 

278 end = _w(s, end + 1).end() 

279 except IndexError: 

280 pass 

281 

282 return values, end 

283 

284class JSONDecoder(object): 

285 """Simple JSON <http://json.org> decoder 

286 

287 Performs the following translations in decoding by default: 

288 

289 +---------------+-------------------+ 

290 | JSON | Python | 

291 +===============+===================+ 

292 | object | dict | 

293 +---------------+-------------------+ 

294 | array | list | 

295 +---------------+-------------------+ 

296 | string | str, unicode | 

297 +---------------+-------------------+ 

298 | number (int) | int, long | 

299 +---------------+-------------------+ 

300 | number (real) | float | 

301 +---------------+-------------------+ 

302 | true | True | 

303 +---------------+-------------------+ 

304 | false | False | 

305 +---------------+-------------------+ 

306 | null | None | 

307 +---------------+-------------------+ 

308 

309 When allow_nan=True, it also understands 

310 ``NaN``, ``Infinity``, and ``-Infinity`` as 

311 their corresponding ``float`` values, which is outside the JSON spec. 

312 

313 """ 

314 

315 def __init__(self, encoding=None, object_hook=None, parse_float=None, 

316 parse_int=None, parse_constant=None, strict=True, 

317 object_pairs_hook=None, allow_nan=False): 

318 """ 

319 *encoding* determines the encoding used to interpret any 

320 :class:`str` objects decoded by this instance (``'utf-8'`` by 

321 default). It has no effect when decoding :class:`unicode` objects. 

322 

323 Note that currently only encodings that are a superset of ASCII work, 

324 strings of other encodings should be passed in as :class:`unicode`. 

325 

326 *object_hook*, if specified, will be called with the result of every 

327 JSON object decoded and its return value will be used in place of the 

328 given :class:`dict`. This can be used to provide custom 

329 deserializations (e.g. to support JSON-RPC class hinting). 

330 

331 *object_pairs_hook* is an optional function that will be called with 

332 the result of any object literal decode with an ordered list of pairs. 

333 The return value of *object_pairs_hook* will be used instead of the 

334 :class:`dict`. This feature can be used to implement custom decoders 

335 that rely on the order that the key and value pairs are decoded (for 

336 example, :func:`collections.OrderedDict` will remember the order of 

337 insertion). If *object_hook* is also defined, the *object_pairs_hook* 

338 takes priority. 

339 

340 *parse_float*, if specified, will be called with the string of every 

341 JSON float to be decoded. By default, this is equivalent to 

342 ``float(num_str)``. This can be used to use another datatype or parser 

343 for JSON floats (e.g. :class:`decimal.Decimal`). 

344 

345 *parse_int*, if specified, will be called with the string of every 

346 JSON int to be decoded. By default, this is equivalent to 

347 ``int(num_str)``. This can be used to use another datatype or parser 

348 for JSON integers (e.g. :class:`float`). 

349 

350 *allow_nan*, if True (default false), will allow the parser to 

351 accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``. 

352 

353 *parse_constant*, if specified, will be 

354 called with one of the following strings: ``'-Infinity'``, 

355 ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature, 

356 as it is rare to parse non-compliant JSON containing these values. 

357 

358 *strict* controls the parser's behavior when it encounters an 

359 invalid control character in a string. The default setting of 

360 ``True`` means that unescaped control characters are parse errors, if 

361 ``False`` then control characters will be allowed in strings. 

362 

363 """ 

364 if encoding is None: 

365 encoding = DEFAULT_ENCODING 

366 self.encoding = encoding 

367 self.object_hook = object_hook 

368 self.object_pairs_hook = object_pairs_hook 

369 self.parse_float = parse_float or float 

370 self.parse_int = parse_int or bounded_int 

371 self.parse_constant = parse_constant or (allow_nan and _CONSTANTS.__getitem__ or None) 

372 self.strict = strict 

373 self.parse_object = JSONObject 

374 self.parse_array = JSONArray 

375 self.parse_string = scanstring 

376 self.memo = {} 

377 self.scan_once = make_scanner(self) 

378 

379 def decode(self, s, _w=WHITESPACE.match, _PY3=PY3): 

380 """Return the Python representation of ``s`` (a ``str`` or ``unicode`` 

381 instance containing a JSON document) 

382 

383 """ 

384 if _PY3 and isinstance(s, bytes): 

385 s = str(s, self.encoding) 

386 obj, end = self.raw_decode(s) 

387 end = _w(s, end).end() 

388 if end != len(s): 

389 raise JSONDecodeError("Extra data", s, end, len(s)) 

390 return obj 

391 

392 def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3): 

393 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` 

394 beginning with a JSON document) and return a 2-tuple of the Python 

395 representation and the index in ``s`` where the document ended. 

396 Optionally, ``idx`` can be used to specify an offset in ``s`` where 

397 the JSON document begins. 

398 

399 This can be used to decode a JSON document from a string that may 

400 have extraneous data at the end. 

401 

402 """ 

403 if idx < 0: 

404 # Ensure that raw_decode bails on negative indexes, the regex 

405 # would otherwise mask this behavior. #98 

406 raise JSONDecodeError('Expecting value', s, idx) 

407 if _PY3 and not isinstance(s, str): 

408 raise TypeError("Input string must be text, not bytes") 

409 # strip UTF-8 bom 

410 if len(s) > idx: 

411 ord0 = ord(s[idx]) 

412 if ord0 == 0xfeff: 

413 idx += 1 

414 elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf': 

415 idx += 3 

416 return self.scan_once(s, idx=_w(s, idx).end())