Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/simplejson-3.18.4-py3.8-linux-x86_64.egg/simplejson/decoder.py: 27%

227 statements  

« prev     ^ index     » next       coverage.py v7.2.2, created at 2023-03-26 06:04 +0000

1"""Implementation of JSONDecoder 

2""" 

3from __future__ import absolute_import 

4import re 

5import sys 

6import struct 

7from .compat import PY3, unichr 

8from .scanner import make_scanner, JSONDecodeError 

9 

10def _import_c_scanstring(): 

11 try: 

12 from ._speedups import scanstring 

13 return scanstring 

14 except ImportError: 

15 return None 

16c_scanstring = _import_c_scanstring() 

17 

18# NOTE (3.1.0): JSONDecodeError may still be imported from this module for 

19# compatibility, but it was never in the __all__ 

20__all__ = ['JSONDecoder'] 

21 

22FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL 

23 

24def _floatconstants(): 

25 if sys.version_info < (2, 6): 

26 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex') 

27 nan, inf = struct.unpack('>dd', _BYTES) 

28 else: 

29 nan = float('nan') 

30 inf = float('inf') 

31 return nan, inf, -inf 

32 

33NaN, PosInf, NegInf = _floatconstants() 

34 

35_CONSTANTS = { 

36 '-Infinity': NegInf, 

37 'Infinity': PosInf, 

38 'NaN': NaN, 

39} 

40 

41STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS) 

42BACKSLASH = { 

43 '"': u'"', '\\': u'\\', '/': u'/', 

44 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t', 

45} 

46 

47DEFAULT_ENCODING = "utf-8" 

48 

49def py_scanstring(s, end, encoding=None, strict=True, 

50 _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join, 

51 _PY3=PY3, _maxunicode=sys.maxunicode): 

52 """Scan the string s for a JSON string. End is the index of the 

53 character in s after the quote that started the JSON string. 

54 Unescapes all valid JSON string escape sequences and raises ValueError 

55 on attempt to decode an invalid string. If strict is False then literal 

56 control characters are allowed in the string. 

57 

58 Returns a tuple of the decoded string and the index of the character in s 

59 after the end quote.""" 

60 if encoding is None: 

61 encoding = DEFAULT_ENCODING 

62 chunks = [] 

63 _append = chunks.append 

64 begin = end - 1 

65 while 1: 

66 chunk = _m(s, end) 

67 if chunk is None: 

68 raise JSONDecodeError( 

69 "Unterminated string starting at", s, begin) 

70 end = chunk.end() 

71 content, terminator = chunk.groups() 

72 # Content is contains zero or more unescaped string characters 

73 if content: 

74 if not _PY3 and not isinstance(content, unicode): 

75 content = unicode(content, encoding) 

76 _append(content) 

77 # Terminator is the end of string, a literal control character, 

78 # or a backslash denoting that an escape sequence follows 

79 if terminator == '"': 

80 break 

81 elif terminator != '\\': 

82 if strict: 

83 msg = "Invalid control character %r at" 

84 raise JSONDecodeError(msg, s, end) 

85 else: 

86 _append(terminator) 

87 continue 

88 try: 

89 esc = s[end] 

90 except IndexError: 

91 raise JSONDecodeError( 

92 "Unterminated string starting at", s, begin) 

93 # If not a unicode escape sequence, must be in the lookup table 

94 if esc != 'u': 

95 try: 

96 char = _b[esc] 

97 except KeyError: 

98 msg = "Invalid \\X escape sequence %r" 

99 raise JSONDecodeError(msg, s, end) 

100 end += 1 

101 else: 

102 # Unicode escape sequence 

103 msg = "Invalid \\uXXXX escape sequence" 

104 esc = s[end + 1:end + 5] 

105 escX = esc[1:2] 

106 if len(esc) != 4 or escX == 'x' or escX == 'X': 

107 raise JSONDecodeError(msg, s, end - 1) 

108 try: 

109 uni = int(esc, 16) 

110 except ValueError: 

111 raise JSONDecodeError(msg, s, end - 1) 

112 if uni < 0 or uni > _maxunicode: 

113 raise JSONDecodeError(msg, s, end - 1) 

114 end += 5 

115 # Check for surrogate pair on UCS-4 systems 

116 # Note that this will join high/low surrogate pairs 

117 # but will also pass unpaired surrogates through 

118 if (_maxunicode > 65535 and 

119 uni & 0xfc00 == 0xd800 and 

120 s[end:end + 2] == '\\u'): 

121 esc2 = s[end + 2:end + 6] 

122 escX = esc2[1:2] 

123 if len(esc2) == 4 and not (escX == 'x' or escX == 'X'): 

124 try: 

125 uni2 = int(esc2, 16) 

126 except ValueError: 

127 raise JSONDecodeError(msg, s, end) 

128 if uni2 & 0xfc00 == 0xdc00: 

129 uni = 0x10000 + (((uni - 0xd800) << 10) | 

130 (uni2 - 0xdc00)) 

131 end += 6 

132 char = unichr(uni) 

133 # Append the unescaped character 

134 _append(char) 

135 return _join(chunks), end 

136 

137 

138# Use speedup if available 

139scanstring = c_scanstring or py_scanstring 

140 

141WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS) 

142WHITESPACE_STR = ' \t\n\r' 

143 

144def JSONObject(state, encoding, strict, scan_once, object_hook, 

145 object_pairs_hook, memo=None, 

146 _w=WHITESPACE.match, _ws=WHITESPACE_STR): 

147 (s, end) = state 

148 # Backwards compatibility 

149 if memo is None: 

150 memo = {} 

151 memo_get = memo.setdefault 

152 pairs = [] 

153 # Use a slice to prevent IndexError from being raised, the following 

154 # check will raise a more specific ValueError if the string is empty 

155 nextchar = s[end:end + 1] 

156 # Normally we expect nextchar == '"' 

157 if nextchar != '"': 

158 if nextchar in _ws: 

159 end = _w(s, end).end() 

160 nextchar = s[end:end + 1] 

161 # Trivial empty object 

162 if nextchar == '}': 

163 if object_pairs_hook is not None: 

164 result = object_pairs_hook(pairs) 

165 return result, end + 1 

166 pairs = {} 

167 if object_hook is not None: 

168 pairs = object_hook(pairs) 

169 return pairs, end + 1 

170 elif nextchar != '"': 

171 raise JSONDecodeError( 

172 "Expecting property name enclosed in double quotes", 

173 s, end) 

174 end += 1 

175 while True: 

176 key, end = scanstring(s, end, encoding, strict) 

177 key = memo_get(key, key) 

178 

179 # To skip some function call overhead we optimize the fast paths where 

180 # the JSON key separator is ": " or just ":". 

181 if s[end:end + 1] != ':': 

182 end = _w(s, end).end() 

183 if s[end:end + 1] != ':': 

184 raise JSONDecodeError("Expecting ':' delimiter", s, end) 

185 

186 end += 1 

187 

188 try: 

189 if s[end] in _ws: 

190 end += 1 

191 if s[end] in _ws: 

192 end = _w(s, end + 1).end() 

193 except IndexError: 

194 pass 

195 

196 value, end = scan_once(s, end) 

197 pairs.append((key, value)) 

198 

199 try: 

200 nextchar = s[end] 

201 if nextchar in _ws: 

202 end = _w(s, end + 1).end() 

203 nextchar = s[end] 

204 except IndexError: 

205 nextchar = '' 

206 end += 1 

207 

208 if nextchar == '}': 

209 break 

210 elif nextchar != ',': 

211 raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1) 

212 

213 try: 

214 nextchar = s[end] 

215 if nextchar in _ws: 

216 end += 1 

217 nextchar = s[end] 

218 if nextchar in _ws: 

219 end = _w(s, end + 1).end() 

220 nextchar = s[end] 

221 except IndexError: 

222 nextchar = '' 

223 

224 end += 1 

225 if nextchar != '"': 

226 raise JSONDecodeError( 

227 "Expecting property name enclosed in double quotes", 

228 s, end - 1) 

229 

230 if object_pairs_hook is not None: 

231 result = object_pairs_hook(pairs) 

232 return result, end 

233 pairs = dict(pairs) 

234 if object_hook is not None: 

235 pairs = object_hook(pairs) 

236 return pairs, end 

237 

238def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR): 

239 (s, end) = state 

240 values = [] 

241 nextchar = s[end:end + 1] 

242 if nextchar in _ws: 

243 end = _w(s, end + 1).end() 

244 nextchar = s[end:end + 1] 

245 # Look-ahead for trivial empty array 

246 if nextchar == ']': 

247 return values, end + 1 

248 elif nextchar == '': 

249 raise JSONDecodeError("Expecting value or ']'", s, end) 

250 _append = values.append 

251 while True: 

252 value, end = scan_once(s, end) 

253 _append(value) 

254 nextchar = s[end:end + 1] 

255 if nextchar in _ws: 

256 end = _w(s, end + 1).end() 

257 nextchar = s[end:end + 1] 

258 end += 1 

259 if nextchar == ']': 

260 break 

261 elif nextchar != ',': 

262 raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1) 

263 

264 try: 

265 if s[end] in _ws: 

266 end += 1 

267 if s[end] in _ws: 

268 end = _w(s, end + 1).end() 

269 except IndexError: 

270 pass 

271 

272 return values, end 

273 

274class JSONDecoder(object): 

275 """Simple JSON <http://json.org> decoder 

276 

277 Performs the following translations in decoding by default: 

278 

279 +---------------+-------------------+ 

280 | JSON | Python | 

281 +===============+===================+ 

282 | object | dict | 

283 +---------------+-------------------+ 

284 | array | list | 

285 +---------------+-------------------+ 

286 | string | str, unicode | 

287 +---------------+-------------------+ 

288 | number (int) | int, long | 

289 +---------------+-------------------+ 

290 | number (real) | float | 

291 +---------------+-------------------+ 

292 | true | True | 

293 +---------------+-------------------+ 

294 | false | False | 

295 +---------------+-------------------+ 

296 | null | None | 

297 +---------------+-------------------+ 

298 

299 It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as 

300 their corresponding ``float`` values, which is outside the JSON spec. 

301 

302 """ 

303 

304 def __init__(self, encoding=None, object_hook=None, parse_float=None, 

305 parse_int=None, parse_constant=None, strict=True, 

306 object_pairs_hook=None): 

307 """ 

308 *encoding* determines the encoding used to interpret any 

309 :class:`str` objects decoded by this instance (``'utf-8'`` by 

310 default). It has no effect when decoding :class:`unicode` objects. 

311 

312 Note that currently only encodings that are a superset of ASCII work, 

313 strings of other encodings should be passed in as :class:`unicode`. 

314 

315 *object_hook*, if specified, will be called with the result of every 

316 JSON object decoded and its return value will be used in place of the 

317 given :class:`dict`. This can be used to provide custom 

318 deserializations (e.g. to support JSON-RPC class hinting). 

319 

320 *object_pairs_hook* is an optional function that will be called with 

321 the result of any object literal decode with an ordered list of pairs. 

322 The return value of *object_pairs_hook* will be used instead of the 

323 :class:`dict`. This feature can be used to implement custom decoders 

324 that rely on the order that the key and value pairs are decoded (for 

325 example, :func:`collections.OrderedDict` will remember the order of 

326 insertion). If *object_hook* is also defined, the *object_pairs_hook* 

327 takes priority. 

328 

329 *parse_float*, if specified, will be called with the string of every 

330 JSON float to be decoded. By default, this is equivalent to 

331 ``float(num_str)``. This can be used to use another datatype or parser 

332 for JSON floats (e.g. :class:`decimal.Decimal`). 

333 

334 *parse_int*, if specified, will be called with the string of every 

335 JSON int to be decoded. By default, this is equivalent to 

336 ``int(num_str)``. This can be used to use another datatype or parser 

337 for JSON integers (e.g. :class:`float`). 

338 

339 *parse_constant*, if specified, will be called with one of the 

340 following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This 

341 can be used to raise an exception if invalid JSON numbers are 

342 encountered. 

343 

344 *strict* controls the parser's behavior when it encounters an 

345 invalid control character in a string. The default setting of 

346 ``True`` means that unescaped control characters are parse errors, if 

347 ``False`` then control characters will be allowed in strings. 

348 

349 """ 

350 if encoding is None: 

351 encoding = DEFAULT_ENCODING 

352 self.encoding = encoding 

353 self.object_hook = object_hook 

354 self.object_pairs_hook = object_pairs_hook 

355 self.parse_float = parse_float or float 

356 self.parse_int = parse_int or int 

357 self.parse_constant = parse_constant or _CONSTANTS.__getitem__ 

358 self.strict = strict 

359 self.parse_object = JSONObject 

360 self.parse_array = JSONArray 

361 self.parse_string = scanstring 

362 self.memo = {} 

363 self.scan_once = make_scanner(self) 

364 

365 def decode(self, s, _w=WHITESPACE.match, _PY3=PY3): 

366 """Return the Python representation of ``s`` (a ``str`` or ``unicode`` 

367 instance containing a JSON document) 

368 

369 """ 

370 if _PY3 and isinstance(s, bytes): 

371 s = str(s, self.encoding) 

372 obj, end = self.raw_decode(s) 

373 end = _w(s, end).end() 

374 if end != len(s): 

375 raise JSONDecodeError("Extra data", s, end, len(s)) 

376 return obj 

377 

378 def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3): 

379 """Decode a JSON document from ``s`` (a ``str`` or ``unicode`` 

380 beginning with a JSON document) and return a 2-tuple of the Python 

381 representation and the index in ``s`` where the document ended. 

382 Optionally, ``idx`` can be used to specify an offset in ``s`` where 

383 the JSON document begins. 

384 

385 This can be used to decode a JSON document from a string that may 

386 have extraneous data at the end. 

387 

388 """ 

389 if idx < 0: 

390 # Ensure that raw_decode bails on negative indexes, the regex 

391 # would otherwise mask this behavior. #98 

392 raise JSONDecodeError('Expecting value', s, idx) 

393 if _PY3 and not isinstance(s, str): 

394 raise TypeError("Input string must be text, not bytes") 

395 # strip UTF-8 bom 

396 if len(s) > idx: 

397 ord0 = ord(s[idx]) 

398 if ord0 == 0xfeff: 

399 idx += 1 

400 elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf': 

401 idx += 3 

402 return self.scan_once(s, idx=_w(s, idx).end())