Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/ijson-3.2.0.post0-py3.8-linux-x86_64.egg/ijson/backends/python.py: 97%

206 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:10 +0000

1''' 

2Pure-python parsing backend. 

3''' 

4from json.decoder import scanstring 

5import re 

6 

7from ijson import common, utils 

8import codecs 

9 

10 

11LEXEME_RE = re.compile(r'[a-z0-9eE\.\+-]+|\S') 

12UNARY_LEXEMES = set('[]{},') 

13EOF = -1, None 

14 

15 

16class UnexpectedSymbol(common.JSONError): 

17 def __init__(self, symbol, pos): 

18 super(UnexpectedSymbol, self).__init__( 

19 'Unexpected symbol %r at %d' % (symbol, pos) 

20 ) 

21 

22 

23@utils.coroutine 

24def utf8_encoder(target): 

25 decoder = codecs.getincrementaldecoder('utf-8')() 

26 decode = decoder.decode 

27 send = target.send 

28 while True: 

29 try: 

30 final = False 

31 bdata = (yield) 

32 except GeneratorExit: 

33 final = True 

34 bdata = b'' 

35 try: 

36 sdata = decode(bdata, final) 

37 except UnicodeDecodeError as e: 

38 try: 

39 target.close() 

40 except: 

41 pass 

42 raise common.IncompleteJSONError(e) 

43 if sdata: 

44 send(sdata) 

45 elif not bdata: 

46 target.close() 

47 break 

48 

49@utils.coroutine 

50def Lexer(target): 

51 """ 

52 Parses lexemes out of the incoming content, and sends them to parse_value. 

53 A special EOF result is sent when the data source has been exhausted to 

54 give parse_value the possibility of raising custom exceptions due to missing 

55 content. 

56 """ 

57 try: 

58 data = (yield) 

59 except GeneratorExit: 

60 data = '' 

61 buf = data 

62 pos = 0 

63 discarded = 0 

64 send = target.send 

65 while True: 

66 match = LEXEME_RE.search(buf, pos) 

67 if match: 

68 lexeme = match.group() 

69 if lexeme == '"': 

70 pos = match.start() 

71 start = pos + 1 

72 while True: 

73 try: 

74 end = buf.index('"', start) 

75 escpos = end - 1 

76 while buf[escpos] == '\\': 

77 escpos -= 1 

78 if (end - escpos) % 2 == 0: 

79 start = end + 1 

80 else: 

81 break 

82 except ValueError: 

83 try: 

84 data = (yield) 

85 except GeneratorExit: 

86 data = '' 

87 if not data: 

88 raise common.IncompleteJSONError('Incomplete string lexeme') 

89 buf += data 

90 send((discarded + pos, buf[pos:end + 1])) 

91 pos = end + 1 

92 else: 

93 while lexeme not in UNARY_LEXEMES and match.end() == len(buf): 

94 try: 

95 data = (yield) 

96 except GeneratorExit: 

97 data = '' 

98 if not data: 

99 break 

100 buf += data 

101 match = LEXEME_RE.search(buf, pos) 

102 lexeme = match.group() 

103 send((discarded + match.start(), lexeme)) 

104 pos = match.end() 

105 else: 

106 # Don't ask data from an already exhausted source 

107 if data: 

108 try: 

109 data = (yield) 

110 except GeneratorExit: 

111 data = '' 

112 if not data: 

113 # Normally should raise StopIteration, but can raise 

114 # IncompleteJSONError too, which is the point of sending EOF 

115 try: 

116 target.send(EOF) 

117 except StopIteration: 

118 pass 

119 break 

120 discarded += len(buf) 

121 buf = data 

122 pos = 0 

123 

124 

125# Parsing states 

126_PARSE_VALUE = 0 

127_PARSE_ARRAY_ELEMENT_END = 1 

128_PARSE_OBJECT_KEY = 2 

129_PARSE_OBJECT_END = 3 

130 

131# infinity singleton for overflow checks 

132inf = float("inf") 

133 

134@utils.coroutine 

135def parse_value(target, multivalue, use_float): 

136 """ 

137 Parses results coming out of the Lexer into ijson events, which are sent to 

138 `target`. A stack keeps track of the type of object being parsed at the time 

139 (a value, and object or array -- the last two being values themselves). 

140 

141 A special EOF result coming from the Lexer indicates that no more content is 

142 expected. This is used to check for incomplete content and raise the 

143 appropriate exception, which wouldn't be possible if the Lexer simply closed 

144 this co-routine (either explicitly via .close(), or implicitly by itself 

145 finishing and decreasing the only reference to the co-routine) since that 

146 causes a GeneratorExit exception that cannot be replaced with a custom one. 

147 """ 

148 

149 state_stack = [_PARSE_VALUE] 

150 pop = state_stack.pop 

151 push = state_stack.append 

152 send = target.send 

153 prev_pos, prev_symbol = None, None 

154 to_number = common.integer_or_float if use_float else common.integer_or_decimal 

155 while True: 

156 

157 if prev_pos is None: 

158 pos, symbol = (yield) 

159 if (pos, symbol) == EOF: 

160 if state_stack: 

161 raise common.IncompleteJSONError('Incomplete JSON content') 

162 break 

163 else: 

164 pos, symbol = prev_pos, prev_symbol 

165 prev_pos, prev_symbol = None, None 

166 try: 

167 state = state_stack[-1] 

168 except IndexError: 

169 if multivalue: 

170 state = _PARSE_VALUE 

171 push(state) 

172 else: 

173 raise common.JSONError('Additional data found') 

174 assert state_stack 

175 

176 if state == _PARSE_VALUE: 

177 # Simple, common cases 

178 if symbol == 'null': 

179 send(('null', None)) 

180 pop() 

181 elif symbol == 'true': 

182 send(('boolean', True)) 

183 pop() 

184 elif symbol == 'false': 

185 send(('boolean', False)) 

186 pop() 

187 elif symbol[0] == '"': 

188 send(('string', parse_string(symbol))) 

189 pop() 

190 # Array start 

191 elif symbol == '[': 

192 send(('start_array', None)) 

193 pos, symbol = (yield) 

194 if (pos, symbol) == EOF: 

195 raise common.IncompleteJSONError('Incomplete JSON content') 

196 if symbol == ']': 

197 send(('end_array', None)) 

198 pop() 

199 else: 

200 prev_pos, prev_symbol = pos, symbol 

201 push(_PARSE_ARRAY_ELEMENT_END) 

202 push(_PARSE_VALUE) 

203 # Object start 

204 elif symbol == '{': 

205 send(('start_map', None)) 

206 pos, symbol = (yield) 

207 if (pos, symbol) == EOF: 

208 raise common.IncompleteJSONError('Incomplete JSON content') 

209 if symbol == '}': 

210 send(('end_map', None)) 

211 pop() 

212 else: 

213 prev_pos, prev_symbol = pos, symbol 

214 push(_PARSE_OBJECT_KEY) 

215 # A number 

216 else: 

217 # JSON numbers can't contain leading zeros 

218 if ((len(symbol) > 1 and symbol[0] == '0' and symbol[1] not in ('e', 'E', '.')) or 

219 (len(symbol) > 2 and symbol[0:2] == '-0' and symbol[2] not in ('e', 'E', '.'))): 

220 raise common.JSONError('Invalid JSON number: %s' % (symbol,)) 

221 # Fractions need a leading digit and must be followed by a digit 

222 if symbol[0] == '.' or symbol[-1] == '.': 

223 raise common.JSONError('Invalid JSON number: %s' % (symbol,)) 

224 try: 

225 number = to_number(symbol) 

226 if number == inf: 

227 raise common.JSONError("float overflow: %s" % (symbol,)) 

228 except: 

229 if 'true'.startswith(symbol) or 'false'.startswith(symbol) or 'null'.startswith(symbol): 

230 raise common.IncompleteJSONError('Incomplete JSON content') 

231 raise UnexpectedSymbol(symbol, pos) 

232 else: 

233 send(('number', number)) 

234 pop() 

235 

236 elif state == _PARSE_OBJECT_KEY: 

237 if symbol[0] != '"': 

238 raise UnexpectedSymbol(symbol, pos) 

239 send(('map_key', parse_string(symbol))) 

240 pos, symbol = (yield) 

241 if (pos, symbol) == EOF: 

242 raise common.IncompleteJSONError('Incomplete JSON content') 

243 if symbol != ':': 

244 raise UnexpectedSymbol(symbol, pos) 

245 state_stack[-1] = _PARSE_OBJECT_END 

246 push(_PARSE_VALUE) 

247 

248 elif state == _PARSE_OBJECT_END: 

249 if symbol == ',': 

250 state_stack[-1] = _PARSE_OBJECT_KEY 

251 elif symbol != '}': 

252 raise UnexpectedSymbol(symbol, pos) 

253 else: 

254 send(('end_map', None)) 

255 pop() 

256 pop() 

257 

258 elif state == _PARSE_ARRAY_ELEMENT_END: 

259 if symbol == ',': 

260 state_stack[-1] = _PARSE_ARRAY_ELEMENT_END 

261 push(_PARSE_VALUE) 

262 elif symbol != ']': 

263 raise UnexpectedSymbol(symbol, pos) 

264 else: 

265 send(('end_array', None)) 

266 pop() 

267 pop() 

268 

269 

270def parse_string(symbol): 

271 return scanstring(symbol, 1)[0] 

272 

273 

274def basic_parse_basecoro(target, multiple_values=False, allow_comments=False, 

275 use_float=False): 

276 ''' 

277 Iterator yielding unprefixed events. 

278 

279 Parameters: 

280 

281 - file: a readable file-like object with JSON input 

282 ''' 

283 if allow_comments: 

284 raise ValueError("Comments are not supported by the python backend") 

285 return utf8_encoder(Lexer(parse_value(target, multiple_values, use_float))) 

286 

287 

288common.enrich_backend(globals())