Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/simplejson-4.1.1-py3.11-linux-x86_64.egg/simplejson/decoder.py: 27%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

233 statements  

1"""Implementation of JSONDecoder 

2""" 

3from __future__ import absolute_import 

4import re 

5import sys 

6from .compat import PY3, unichr 

7from .scanner import make_scanner, JSONDecodeError 

8 

9 

10def _import_c_scanstring(): 

11 try: 

12 from ._speedups import scanstring 

13 return scanstring 

14 except ImportError: 

15 return None 

16c_scanstring = _import_c_scanstring() 

17 

18# NOTE (3.1.0): JSONDecodeError may still be imported from this module for 

19# compatibility, but it was never in the __all__ 

20__all__ = ['JSONDecoder'] 

21 

22FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 

23 

24def _floatconstants(): 

25 return float('nan'), float('inf'), float('-inf') 

26 

27NaN, PosInf, NegInf = _floatconstants() 

28 

29_CONSTANTS = { 

30 '-Infinity': NegInf, 

31 'Infinity': PosInf, 

32 'NaN': NaN, 

33} 

34 

35STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) 

36BACKSLASH = { 

37 '"': u'"', '\\': u'\\', '/': u'/', 

38 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', 

39} 

40 

41DEFAULT_ENCODING = "utf-8" 

42 

43if hasattr(sys, 'get_int_max_str_digits'): 

44 bounded_int = int 

45else: 

46 def bounded_int(s, INT_MAX_STR_DIGITS=4300): 

47 """Backport of the integer string length conversion limitation 

48 

49 https://docs.python.org/3/library/stdtypes.html#int-max-str-digits 

50 """ 

51 if len(s) > INT_MAX_STR_DIGITS: 

52 raise ValueError("Exceeds the limit (%s) for integer string conversion: value has %s digits" % (INT_MAX_STR_DIGITS, len(s))) 

53 return int(s) 

54 

55 

56def scan_four_digit_hex(s, end, _m=re.compile(r'^[0-9a-fA-F]{4}$').match): 

57 """Scan a four digit hex number from s[end:end + 4] 

58 """ 

59 msg = "Invalid \\uXXXX escape sequence" 

60 esc = s[end:end + 4] 

61 if not _m(esc): 

62 raise JSONDecodeError(msg, s, end - 2) 

63 try: 

64 return int(esc, 16), end + 4 

65 except ValueError: 

66 raise JSONDecodeError(msg, s, end - 2) 

67 

68def py_scanstring(s, end, encoding=None, strict=True, 

69 _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join, 

70 _PY3=PY3, _maxunicode=sys.maxunicode, 

71 _scan_four_digit_hex=scan_four_digit_hex): 

72 """Scan the string s for a JSON string. End is the index of the 

73 character in s after the quote that started the JSON string. 

74 Unescapes all valid JSON string escape sequences and raises ValueError 

75 on attempt to decode an invalid string. If strict is False then literal 

76 control characters are allowed in the string. 

77 

78 Returns a tuple of the decoded string and the index of the character in s 

79 after the end quote.""" 

80 if encoding is None: 

81 encoding = DEFAULT_ENCODING 

82 chunks = [] 

83 _append = chunks.append 

84 begin = end - 1 

85 while 1: 

86 chunk = _m(s, end) 

87 if chunk is None: 

88 raise JSONDecodeError( 

89 "Unterminated string starting at", s, begin) 

90 prev_end = end 

91 end = chunk.end() 

92 content, terminator = chunk.groups() 

93 # Content is contains zero or more unescaped string characters 

94 if content: 

95 if not _PY3 and not isinstance(content, unicode): 

96 content = unicode(content, encoding) 

97 _append(content) 

98 # Terminator is the end of string, a literal control character, 

99 # or a backslash denoting that an escape sequence follows 

100 if terminator == '"': 

101 break 

102 elif terminator != '\\': 

103 if strict: 

104 msg = "Invalid control character %r at" 

105 raise JSONDecodeError(msg, s, prev_end) 

106 else: 

107 _append(terminator) 

108 continue 

109 try: 

110 esc = s[end] 

111 except IndexError: 

112 raise JSONDecodeError( 

113 "Unterminated string starting at", s, begin) 

114 # If not a unicode escape sequence, must be in the lookup table 

115 if esc != 'u': 

116 try: 

117 char = _b[esc] 

118 except KeyError: 

119 msg = "Invalid \\X escape sequence %r" 

120 raise JSONDecodeError(msg, s, end) 

121 end += 1 

122 else: 

123 # Unicode escape sequence 

124 uni, end = _scan_four_digit_hex(s, end + 1) 

125 # Check for surrogate pair on UCS-4 systems 

126 # Note that this will join high/low surrogate pairs 

127 # but will also pass unpaired surrogates through 

128 if (_maxunicode > 65535 and 

129 uni & 0xfc00 == 0xd800 and 

130 s[end:end + 2] == '\\u'): 

131 uni2, end2 = _scan_four_digit_hex(s, end + 2) 

132 if uni2 & 0xfc00 == 0xdc00: 

133 uni = 0x10000 + (((uni - 0xd800) << 10) | 

134 (uni2 - 0xdc00)) 

135 end = end2 

136 char = unichr(uni) 

137 # Append the unescaped character 

138 _append(char) 

139 return _join(chunks), end 

140 

141 

142# Use speedup if available 

143scanstring = c_scanstring or py_scanstring 

144 

145WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) 

146WHITESPACE_STR = ' \t\n\r' 

147 

148def JSONObject(state, encoding, strict, scan_once, object_hook, 

149 object_pairs_hook, memo=None, 

150 _w=WHITESPACE.match, _ws=WHITESPACE_STR): 

151 (s, end) = state 

152 # Backwards compatibility 

153 if memo is None: 

154 memo = {} 

155 memo_get = memo.setdefault 

156 pairs = [] 

157 # Use a slice to prevent IndexError from being raised, the following 

158 # check will raise a more specific ValueError if the string is empty 

159 nextchar = s[end:end + 1] 

160 # Normally we expect nextchar == '"' 

161 if nextchar != '"': 

162 if nextchar in _ws: 

163 end = _w(s, end).end() 

164 nextchar = s[end:end + 1] 

165 # Trivial empty object 

166 if nextchar == '}': 

167 if object_pairs_hook is not None: 

168 result = object_pairs_hook(pairs) 

169 return result, end + 1 

170 pairs = {} 

171 if object_hook is not None: 

172 pairs = object_hook(pairs) 

173 return pairs, end + 1 

174 elif nextchar != '"': 

175 raise JSONDecodeError( 

176 "Expecting property name enclosed in double quotes or '}'", 

177 s, end) 

178 end += 1 

179 while True: 

180 key, end = scanstring(s, end, encoding, strict) 

181 key = memo_get(key, key) 

182 

183 # To skip some function call overhead we optimize the fast paths where 

184 # the JSON key separator is ": " or just ":". 

185 if s[end:end + 1] != ':': 

186 end = _w(s, end).end() 

187 if s[end:end + 1] != ':': 

188 raise JSONDecodeError("Expecting ':' delimiter", s, end) 

189 

190 end += 1 

191 

192 try: 

193 if s[end] in _ws: 

194 end += 1 

195 if s[end] in _ws: 

196 end = _w(s, end + 1).end() 

197 except IndexError: 

198 pass 

199 

200 value, end = scan_once(s, end) 

201 pairs.append((key, value)) 

202 

203 try: 

204 nextchar = s[end] 

205 if nextchar in _ws: 

206 end = _w(s, end + 1).end() 

207 nextchar = s[end] 

208 except IndexError: 

209 nextchar = '' 

210 end += 1 

211 

212 if nextchar == '}': 

213 break 

214 elif nextchar != ',': 

215 raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1) 

216 

217 try: 

218 nextchar = s[end] 

219 if nextchar in _ws: 

220 end += 1 

221 nextchar = s[end] 

222 if nextchar in _ws: 

223 end = _w(s, end + 1).end() 

224 nextchar = s[end] 

225 except IndexError: 

226 nextchar = '' 

227 

228 end += 1 

229 if nextchar != '"': 

230 if nextchar == '}': 

231 raise JSONDecodeError( 

232 "Illegal trailing comma before end of object", 

233 s, end - 1) 

234 raise JSONDecodeError( 

235 "Expecting property name enclosed in double quotes", 

236 s, end - 1) 

237 

238 if object_pairs_hook is not None: 

239 result = object_pairs_hook(pairs) 

240 return result, end 

241 pairs = dict(pairs) 

242 if object_hook is not None: 

243 pairs = object_hook(pairs) 

244 return pairs, end 

245 

246def JSONArray(state, scan_once, array_hook=None, 

247 _w=WHITESPACE.match, _ws=WHITESPACE_STR): 

248 (s, end) = state 

249 values = [] 

250 nextchar = s[end:end + 1] 

251 if nextchar in _ws: 

252 end = _w(s, end + 1).end() 

253 nextchar = s[end:end + 1] 

254 # Look-ahead for trivial empty array 

255 if nextchar == ']': 

256 if array_hook is not None: 

257 values = array_hook(values) 

258 return values, end + 1 

259 elif nextchar == '': 

260 raise JSONDecodeError("Expecting value or ']'", s, end) 

261 _append = values.append 

262 while True: 

263 value, end = scan_once(s, end) 

264 _append(value) 

265 nextchar = s[end:end + 1] 

266 if nextchar in _ws: 

267 end = _w(s, end + 1).end() 

268 nextchar = s[end:end + 1] 

269 end += 1 

270 if nextchar == ']': 

271 break 

272 elif nextchar != ',': 

273 raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1) 

274 

275 try: 

276 if s[end] in _ws: 

277 end += 1 

278 if s[end] in _ws: 

279 end = _w(s, end + 1).end() 

280 except IndexError: 

281 pass 

282 

283 if s[end:end + 1] == ']': 

284 raise JSONDecodeError( 

285 "Illegal trailing comma before end of array", 

286 s, end - 1) 

287 

288 if array_hook is not None: 

289 values = array_hook(values) 

290 return values, end 

291 

292class JSONDecoder(object): 

293 """Simple JSON <http://json.org> decoder 

294 

295 Performs the following translations in decoding by default: 

296 

297 +---------------+-------------------+ 

298 | JSON | Python | 

299 +===============+===================+ 

300 | object | dict | 

301 +---------------+-------------------+ 

302 | array | list | 

303 +---------------+-------------------+ 

304 | string | str, unicode | 

305 +---------------+-------------------+ 

306 | number (int) | int, long | 

307 +---------------+-------------------+ 

308 | number (real) | float | 

309 +---------------+-------------------+ 

310 | true | True | 

311 +---------------+-------------------+ 

312 | false | False | 

313 +---------------+-------------------+ 

314 | null | None | 

315 +---------------+-------------------+ 

316 

317 When allow_nan=True, it also understands 

318 ``NaN``, ``Infinity``, and ``-Infinity`` as 

319 their corresponding ``float`` values, which is outside the JSON spec. 

320 

321 """ 

322 

323 def __init__(self, encoding=None, object_hook=None, parse_float=None, 

324 parse_int=None, parse_constant=None, strict=True, 

325 object_pairs_hook=None, allow_nan=False, 

326 array_hook=None): 

327 """ 

328 *encoding* determines the encoding used to interpret any 

329 :class:`str` objects decoded by this instance (``'utf-8'`` by 

330 default). It has no effect when decoding :class:`unicode` objects. 

331 

332 Note that currently only encodings that are a superset of ASCII work, 

333 strings of other encodings should be passed in as :class:`unicode`. 

334 

335 *object_hook*, if specified, will be called with the result of every 

336 JSON object decoded and its return value will be used in place of the 

337 given :class:`dict`. This can be used to provide custom 

338 deserializations (e.g. to support JSON-RPC class hinting). 

339 

340 *object_pairs_hook* is an optional function that will be called with 

341 the result of any object literal decode with an ordered list of pairs. 

342 The return value of *object_pairs_hook* will be used instead of the 

343 :class:`dict`. This feature can be used to implement custom decoders 

344 that rely on the order that the key and value pairs are decoded (for 

345 example, :func:`collections.OrderedDict` will remember the order of 

346 insertion). If *object_hook* is also defined, the *object_pairs_hook* 

347 takes priority. 

348 

349 *parse_float*, if specified, will be called with the string of every 

350 JSON float to be decoded. By default, this is equivalent to 

351 ``float(num_str)``. This can be used to use another datatype or parser 

352 for JSON floats (e.g. :class:`decimal.Decimal`). 

353 

354 *parse_int*, if specified, will be called with the string of every 

355 JSON int to be decoded. By default, this is equivalent to 

356 ``int(num_str)``. This can be used to use another datatype or parser 

357 for JSON integers (e.g. :class:`float`). 

358 

359 *allow_nan*, if True (default false), will allow the parser to 

360 accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``. 

361 

362 *parse_constant*, if specified, will be 

363 called with one of the following strings: ``'-Infinity'``, 

364 ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature, 

365 as it is rare to parse non-compliant JSON containing these values. 

366 

367 *strict* controls the parser's behavior when it encounters an 

368 invalid control character in a string. The default setting of 

369 ``True`` means that unescaped control characters are parse errors, if 

370 ``False`` then control characters will be allowed in strings. 

371 

372 """ 

373 if encoding is None: 

374 encoding = DEFAULT_ENCODING 

375 self.encoding = encoding 

376 self.object_hook = object_hook 

377 self.object_pairs_hook = object_pairs_hook 

378 self.parse_float = parse_float or float 

379 self.parse_int = parse_int or bounded_int 

380 self.parse_constant = parse_constant or (allow_nan and _CONSTANTS.__getitem__ or None) 

381 self.strict = strict 

382 self.array_hook = array_hook 

383 self.parse_object = JSONObject 

384 self.parse_array = JSONArray 

385 self.parse_string = scanstring 

386 self.memo = {} 

387 self.scan_once = make_scanner(self) 

388 

389 def decode(self, s, _w=WHITESPACE.match, _PY3=PY3): 

390 """Return the Python representation of ``s`` (a ``str`` or ``unicode`` 

391 instance containing a JSON document) 

392 

393 """ 

394 if _PY3 and isinstance(s, bytes): 

395 s = str(s, self.encoding) 

396 obj, end = self.raw_decode(s) 

397 end = _w(s, end).end() 

398 if end != len(s): 

399 raise JSONDecodeError("Extra data", s, end, len(s)) 

400 return obj 

401 

402 def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3): 

403 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` 

404 beginning with a JSON document) and return a 2-tuple of the Python 

405 representation and the index in ``s`` where the document ended. 

406 Optionally, ``idx`` can be used to specify an offset in ``s`` where 

407 the JSON document begins. 

408 

409 This can be used to decode a JSON document from a string that may 

410 have extraneous data at the end. 

411 

412 """ 

413 if idx < 0: 

414 # Ensure that raw_decode bails on negative indexes, the regex 

415 # would otherwise mask this behavior. #98 

416 raise JSONDecodeError('Expecting value', s, idx) 

417 if _PY3 and not isinstance(s, str): 

418 raise TypeError("Input string must be text, not bytes") 

419 # strip UTF-8 bom 

420 if len(s) > idx: 

421 ord0 = ord(s[idx]) 

422 if ord0 == 0xfeff: 

423 idx += 1 

424 elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf': 

425 idx += 3 

426 return self.scan_once(s, idx=_w(s, idx).end())