Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/simplejson/decoder.py: 25%

1"""Implementation of JSONDecoder

2"""

3from __future__ import absolute_import

4import re

5import sys

6import struct

7from .compat import PY3, unichr

8from .scanner import make_scanner, JSONDecodeError

10def _import_c_scanstring():

11 try:

12 from ._speedups import scanstring

13 return scanstring

14 except ImportError:

15 return None

16c_scanstring = _import_c_scanstring()

18# NOTE (3.1.0): JSONDecodeError may still be imported from this module for

19# compatibility, but it was never in the __all__

20__all__ = ['JSONDecoder']

22FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL

24def _floatconstants():

25 if sys.version_info < (2, 6):

26 _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')

27 nan, inf = struct.unpack('>dd', _BYTES)

28 else:

29 nan = float('nan')

30 inf = float('inf')

31 return nan, inf, -inf

33NaN, PosInf, NegInf = _floatconstants()

35_CONSTANTS = {

36 '-Infinity': NegInf,

37 'Infinity': PosInf,

38 'NaN': NaN,

39}

41STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)

42BACKSLASH = {

43 '"': u'"', '\\': u'\\', '/': u'/',

44 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',

45}

47DEFAULT_ENCODING = "utf-8"

49if hasattr(sys, 'get_int_max_str_digits'):

50 bounded_int = int

51else:

52 def bounded_int(s, INT_MAX_STR_DIGITS=4300):

53 """Backport of the integer string length conversion limitation

55 https://docs.python.org/3/library/stdtypes.html#int-max-str-digits

56 """

57 if len(s) > INT_MAX_STR_DIGITS:

58 raise ValueError("Exceeds the limit (%s) for integer string conversion: value has %s digits" % (INT_MAX_STR_DIGITS, len(s)))

59 return int(s)

62def scan_four_digit_hex(s, end, _m=re.compile(r'^[0-9a-fA-F]{4}$').match):

63 """Scan a four digit hex number from s[end:end + 4]

64 """

65 msg = "Invalid \\uXXXX escape sequence"

66 esc = s[end:end + 4]

67 if not _m(esc):

68 raise JSONDecodeError(msg, s, end - 2)

69 try:

70 return int(esc, 16), end + 4

71 except ValueError:

72 raise JSONDecodeError(msg, s, end - 2)

74def py_scanstring(s, end, encoding=None, strict=True,

75 _b=BACKSLASH, _m=STRINGCHUNK.match, _join=u''.join,

76 _PY3=PY3, _maxunicode=sys.maxunicode,

77 _scan_four_digit_hex=scan_four_digit_hex):

78 """Scan the string s for a JSON string. End is the index of the

79 character in s after the quote that started the JSON string.

80 Unescapes all valid JSON string escape sequences and raises ValueError

81 on attempt to decode an invalid string. If strict is False then literal

82 control characters are allowed in the string.

84 Returns a tuple of the decoded string and the index of the character in s

85 after the end quote."""

86 if encoding is None:

87 encoding = DEFAULT_ENCODING

88 chunks = []

89 _append = chunks.append

90 begin = end - 1

91 while 1:

92 chunk = _m(s, end)

93 if chunk is None:

94 raise JSONDecodeError(

95 "Unterminated string starting at", s, begin)

96 prev_end = end

97 end = chunk.end()

98 content, terminator = chunk.groups()

99 # Content is contains zero or more unescaped string characters

100 if content:

101 if not _PY3 and not isinstance(content, unicode):

102 content = unicode(content, encoding)

103 _append(content)

104 # Terminator is the end of string, a literal control character,

105 # or a backslash denoting that an escape sequence follows

106 if terminator == '"':

107 break

108 elif terminator != '\\':

109 if strict:

110 msg = "Invalid control character %r at"

111 raise JSONDecodeError(msg, s, prev_end)

112 else:

113 _append(terminator)

114 continue

115 try:

116 esc = s[end]

117 except IndexError:

118 raise JSONDecodeError(

119 "Unterminated string starting at", s, begin)

120 # If not a unicode escape sequence, must be in the lookup table

121 if esc != 'u':

122 try:

123 char = _b[esc]

124 except KeyError:

125 msg = "Invalid \\X escape sequence %r"

126 raise JSONDecodeError(msg, s, end)

127 end += 1

128 else:

129 # Unicode escape sequence

130 uni, end = _scan_four_digit_hex(s, end + 1)

131 # Check for surrogate pair on UCS-4 systems

132 # Note that this will join high/low surrogate pairs

133 # but will also pass unpaired surrogates through

134 if (_maxunicode > 65535 and

135 uni & 0xfc00 == 0xd800 and

136 s[end:end + 2] == '\\u'):

137 uni2, end2 = _scan_four_digit_hex(s, end + 2)

138 if uni2 & 0xfc00 == 0xdc00:

139 uni = 0x10000 + (((uni - 0xd800) << 10) |

140 (uni2 - 0xdc00))

141 end = end2

142 char = unichr(uni)

143 # Append the unescaped character

144 _append(char)

145 return _join(chunks), end

146

147

148# Use speedup if available

149scanstring = c_scanstring or py_scanstring

150

151WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)

152WHITESPACE_STR = ' \t\n\r'

153

154def JSONObject(state, encoding, strict, scan_once, object_hook,

155 object_pairs_hook, memo=None,

156 _w=WHITESPACE.match, _ws=WHITESPACE_STR):

157 (s, end) = state

158 # Backwards compatibility

159 if memo is None:

160 memo = {}

161 memo_get = memo.setdefault

162 pairs = []

163 # Use a slice to prevent IndexError from being raised, the following

164 # check will raise a more specific ValueError if the string is empty

165 nextchar = s[end:end + 1]

166 # Normally we expect nextchar == '"'

167 if nextchar != '"':

168 if nextchar in _ws:

169 end = _w(s, end).end()

170 nextchar = s[end:end + 1]

171 # Trivial empty object

172 if nextchar == '}':

173 if object_pairs_hook is not None:

174 result = object_pairs_hook(pairs)

175 return result, end + 1

176 pairs = {}

177 if object_hook is not None:

178 pairs = object_hook(pairs)

179 return pairs, end + 1

180 elif nextchar != '"':

181 raise JSONDecodeError(

182 "Expecting property name enclosed in double quotes or '}'",

183 s, end)

184 end += 1

185 while True:

186 key, end = scanstring(s, end, encoding, strict)

187 key = memo_get(key, key)

188

189 # To skip some function call overhead we optimize the fast paths where

190 # the JSON key separator is ": " or just ":".

191 if s[end:end + 1] != ':':

192 end = _w(s, end).end()

193 if s[end:end + 1] != ':':

194 raise JSONDecodeError("Expecting ':' delimiter", s, end)

195

196 end += 1

197

198 try:

199 if s[end] in _ws:

200 end += 1

201 if s[end] in _ws:

202 end = _w(s, end + 1).end()

203 except IndexError:

204 pass

205

206 value, end = scan_once(s, end)

207 pairs.append((key, value))

208

209 try:

210 nextchar = s[end]

211 if nextchar in _ws:

212 end = _w(s, end + 1).end()

213 nextchar = s[end]

214 except IndexError:

215 nextchar = ''

216 end += 1

217

218 if nextchar == '}':

219 break

220 elif nextchar != ',':

221 raise JSONDecodeError("Expecting ',' delimiter or '}'", s, end - 1)

222

223 try:

224 nextchar = s[end]

225 if nextchar in _ws:

226 end += 1

227 nextchar = s[end]

228 if nextchar in _ws:

229 end = _w(s, end + 1).end()

230 nextchar = s[end]

231 except IndexError:

232 nextchar = ''

233

234 end += 1

235 if nextchar != '"':

236 raise JSONDecodeError(

237 "Expecting property name enclosed in double quotes",

238 s, end - 1)

239

240 if object_pairs_hook is not None:

241 result = object_pairs_hook(pairs)

242 return result, end

243 pairs = dict(pairs)

244 if object_hook is not None:

245 pairs = object_hook(pairs)

246 return pairs, end

247

248def JSONArray(state, scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):

249 (s, end) = state

250 values = []

251 nextchar = s[end:end + 1]

252 if nextchar in _ws:

253 end = _w(s, end + 1).end()

254 nextchar = s[end:end + 1]

255 # Look-ahead for trivial empty array

256 if nextchar == ']':

257 return values, end + 1

258 elif nextchar == '':

259 raise JSONDecodeError("Expecting value or ']'", s, end)

260 _append = values.append

261 while True:

262 value, end = scan_once(s, end)

263 _append(value)

264 nextchar = s[end:end + 1]

265 if nextchar in _ws:

266 end = _w(s, end + 1).end()

267 nextchar = s[end:end + 1]

268 end += 1

269 if nextchar == ']':

270 break

271 elif nextchar != ',':

272 raise JSONDecodeError("Expecting ',' delimiter or ']'", s, end - 1)

273

274 try:

275 if s[end] in _ws:

276 end += 1

277 if s[end] in _ws:

278 end = _w(s, end + 1).end()

279 except IndexError:

280 pass

281

282 return values, end

283

284class JSONDecoder(object):

285 """Simple JSON <http://json.org> decoder

286

287 Performs the following translations in decoding by default:

288

289 +---------------+-------------------+

290 | JSON | Python |

291 +===============+===================+

292 | object | dict |

293 +---------------+-------------------+

294 | array | list |

295 +---------------+-------------------+

296 | string | str, unicode |

297 +---------------+-------------------+

298 | number (int) | int, long |

299 +---------------+-------------------+

300 | number (real) | float |

301 +---------------+-------------------+

302 | true | True |

303 +---------------+-------------------+

304 | false | False |

305 +---------------+-------------------+

306 | null | None |

307 +---------------+-------------------+

308

309 When allow_nan=True, it also understands

310 ``NaN``, ``Infinity``, and ``-Infinity`` as

311 their corresponding ``float`` values, which is outside the JSON spec.

312

313 """

314

315 def __init__(self, encoding=None, object_hook=None, parse_float=None,

316 parse_int=None, parse_constant=None, strict=True,

317 object_pairs_hook=None, allow_nan=False):

318 """

319 *encoding* determines the encoding used to interpret any

320 :class:`str` objects decoded by this instance (``'utf-8'`` by

321 default). It has no effect when decoding :class:`unicode` objects.

322

323 Note that currently only encodings that are a superset of ASCII work,

324 strings of other encodings should be passed in as :class:`unicode`.

325

326 *object_hook*, if specified, will be called with the result of every

327 JSON object decoded and its return value will be used in place of the

328 given :class:`dict`. This can be used to provide custom

329 deserializations (e.g. to support JSON-RPC class hinting).

330

331 *object_pairs_hook* is an optional function that will be called with

332 the result of any object literal decode with an ordered list of pairs.

333 The return value of *object_pairs_hook* will be used instead of the

334 :class:`dict`. This feature can be used to implement custom decoders

335 that rely on the order that the key and value pairs are decoded (for

336 example, :func:`collections.OrderedDict` will remember the order of

337 insertion). If *object_hook* is also defined, the *object_pairs_hook*

338 takes priority.

339

340 *parse_float*, if specified, will be called with the string of every

341 JSON float to be decoded. By default, this is equivalent to

342 ``float(num_str)``. This can be used to use another datatype or parser

343 for JSON floats (e.g. :class:`decimal.Decimal`).

344

345 *parse_int*, if specified, will be called with the string of every

346 JSON int to be decoded. By default, this is equivalent to

347 ``int(num_str)``. This can be used to use another datatype or parser

348 for JSON integers (e.g. :class:`float`).

349

350 *allow_nan*, if True (default false), will allow the parser to

351 accept the non-standard floats ``NaN``, ``Infinity``, and ``-Infinity``.

352

353 *parse_constant*, if specified, will be

354 called with one of the following strings: ``'-Infinity'``,

355 ``'Infinity'``, ``'NaN'``. It is not recommended to use this feature,

356 as it is rare to parse non-compliant JSON containing these values.

357

358 *strict* controls the parser's behavior when it encounters an

359 invalid control character in a string. The default setting of

360 ``True`` means that unescaped control characters are parse errors, if

361 ``False`` then control characters will be allowed in strings.

362

363 """

364 if encoding is None:

365 encoding = DEFAULT_ENCODING

366 self.encoding = encoding

367 self.object_hook = object_hook

368 self.object_pairs_hook = object_pairs_hook

369 self.parse_float = parse_float or float

370 self.parse_int = parse_int or bounded_int

371 self.parse_constant = parse_constant or (allow_nan and _CONSTANTS.__getitem__ or None)

372 self.strict = strict

373 self.parse_object = JSONObject

374 self.parse_array = JSONArray

375 self.parse_string = scanstring

376 self.memo = {}

377 self.scan_once = make_scanner(self)

378

379 def decode(self, s, _w=WHITESPACE.match, _PY3=PY3):

380 """Return the Python representation of ``s`` (a ``str`` or ``unicode``

381 instance containing a JSON document)

382

383 """

384 if _PY3 and isinstance(s, bytes):

385 s = str(s, self.encoding)

386 obj, end = self.raw_decode(s)

387 end = _w(s, end).end()

388 if end != len(s):

389 raise JSONDecodeError("Extra data", s, end, len(s))

390 return obj

391

392 def raw_decode(self, s, idx=0, _w=WHITESPACE.match, _PY3=PY3):

393 """Decode a JSON document from ``s`` (a ``str`` or ``unicode``

394 beginning with a JSON document) and return a 2-tuple of the Python

395 representation and the index in ``s`` where the document ended.

396 Optionally, ``idx`` can be used to specify an offset in ``s`` where

397 the JSON document begins.

398

399 This can be used to decode a JSON document from a string that may

400 have extraneous data at the end.

401

402 """

403 if idx < 0:

404 # Ensure that raw_decode bails on negative indexes, the regex

405 # would otherwise mask this behavior. #98

406 raise JSONDecodeError('Expecting value', s, idx)

407 if _PY3 and not isinstance(s, str):

408 raise TypeError("Input string must be text, not bytes")

409 # strip UTF-8 bom

410 if len(s) > idx:

411 ord0 = ord(s[idx])

412 if ord0 == 0xfeff:

413 idx += 1

414 elif ord0 == 0xef and s[idx:idx + 3] == '\xef\xbb\xbf':

415 idx += 3

416 return self.scan_once(s, idx=_w(s, idx).end())