Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/ijson/common.py: 43%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

250 statements  

1''' 

2Backend independent higher level interfaces, common exceptions. 

3''' 

4import decimal 

5import inspect 

6import io 

7import warnings 

8 

9from ijson import compat, utils, utils35 

10 

11 

12class JSONError(Exception): 

13 ''' 

14 Base exception for all parsing errors. 

15 ''' 

16 pass 

17 

18 

19class IncompleteJSONError(JSONError): 

20 ''' 

21 Raised when the parser can't read expected data from a stream. 

22 ''' 

23 pass 

24 

25 

26@utils.coroutine 

27def parse_basecoro(target): 

28 ''' 

29 A coroutine dispatching parsing events with the information about their 

30 location with the JSON object tree. Events are tuples 

31 ``(prefix, type, value)``. 

32 

33 Available types and values are: 

34 

35 ('null', None) 

36 ('boolean', <True or False>) 

37 ('number', <int or Decimal>) 

38 ('string', <unicode>) 

39 ('map_key', <str>) 

40 ('start_map', None) 

41 ('end_map', None) 

42 ('start_array', None) 

43 ('end_array', None) 

44 

45 Prefixes represent the path to the nested elements from the root of the JSON 

46 document. For example, given this document:: 

47 

48 { 

49 "array": [1, 2], 

50 "map": { 

51 "key": "value" 

52 } 

53 } 

54 

55 the parser would yield events: 

56 

57 ('', 'start_map', None) 

58 ('', 'map_key', 'array') 

59 ('array', 'start_array', None) 

60 ('array.item', 'number', 1) 

61 ('array.item', 'number', 2) 

62 ('array', 'end_array', None) 

63 ('', 'map_key', 'map') 

64 ('map', 'start_map', None) 

65 ('map', 'map_key', 'key') 

66 ('map.key', 'string', u'value') 

67 ('map', 'end_map', None) 

68 ('', 'end_map', None) 

69 

70 ''' 

71 path = [] 

72 while True: 

73 event, value = yield 

74 if event == 'map_key': 

75 prefix = '.'.join(path[:-1]) 

76 path[-1] = value 

77 elif event == 'start_map': 

78 prefix = '.'.join(path) 

79 path.append(None) 

80 elif event == 'end_map': 

81 path.pop() 

82 prefix = '.'.join(path) 

83 elif event == 'start_array': 

84 prefix = '.'.join(path) 

85 path.append('item') 

86 elif event == 'end_array': 

87 path.pop() 

88 prefix = '.'.join(path) 

89 else: # any scalar value 

90 prefix = '.'.join(path) 

91 target.send((prefix, event, value)) 

92 

93 

94class ObjectBuilder: 

95 ''' 

96 Incrementally builds an object from JSON parser events. Events are passed 

97 into the `event` function that accepts two parameters: event type and 

98 value. The object being built is available at any time from the `value` 

99 attribute. 

100 

101 Example:: 

102 

103 >>> from io import BytesIO 

104 >>> from ijson import basic_parse 

105 >>> from ijson.common import ObjectBuilder 

106 

107 >>> builder = ObjectBuilder() 

108 >>> f = BytesIO(b'{"key": "value"}') 

109 >>> for event, value in basic_parse(f): 

110 ... builder.event(event, value) 

111 >>> builder.value == {'key': 'value'} 

112 True 

113 

114 ''' 

115 def __init__(self, map_type=None): 

116 def initial_set(value): 

117 self.value = value 

118 self.containers = [initial_set] 

119 self.map_type = map_type or dict 

120 

121 def event(self, event, value): 

122 if event == 'map_key': 

123 self.key = value 

124 elif event == 'start_map': 

125 mappable = self.map_type() 

126 self.containers[-1](mappable) 

127 def setter(value): 

128 mappable[self.key] = value 

129 self.containers.append(setter) 

130 elif event == 'start_array': 

131 array = [] 

132 self.containers[-1](array) 

133 self.containers.append(array.append) 

134 elif event == 'end_array' or event == 'end_map': 

135 self.containers.pop() 

136 else: 

137 self.containers[-1](value) 

138 

139 

140@utils.coroutine 

141def items_basecoro(target, prefix, map_type=None): 

142 ''' 

143 An couroutine dispatching native Python objects constructed from the events 

144 under a given prefix. 

145 ''' 

146 while True: 

147 current, event, value = (yield) 

148 if current == prefix: 

149 if event in ('start_map', 'start_array'): 

150 object_depth = 1 

151 builder = ObjectBuilder(map_type=map_type) 

152 while object_depth: 

153 builder.event(event, value) 

154 current, event, value = (yield) 

155 if event in ('start_map', 'start_array'): 

156 object_depth += 1 

157 elif event in ('end_map', 'end_array'): 

158 object_depth -= 1 

159 del builder.containers[:] 

160 target.send(builder.value) 

161 else: 

162 target.send(value) 

163 

164 

165@utils.coroutine 

166def kvitems_basecoro(target, prefix, map_type=None): 

167 ''' 

168 An coroutine dispatching (key, value) pairs constructed from the events 

169 under a given prefix. The prefix should point to JSON objects 

170 ''' 

171 builder = None 

172 while True: 

173 path, event, value = (yield) 

174 while path == prefix and event == 'map_key': 

175 object_depth = 0 

176 key = value 

177 builder = ObjectBuilder(map_type=map_type) 

178 path, event, value = (yield) 

179 if event == 'start_map': 

180 object_depth += 1 

181 while ( 

182 (event != 'map_key' or object_depth != 0) and 

183 (event != 'end_map' or object_depth != -1)): 

184 builder.event(event, value) 

185 path, event, value = (yield) 

186 if event == 'start_map': 

187 object_depth += 1 

188 elif event == 'end_map': 

189 object_depth -= 1 

190 del builder.containers[:] 

191 target.send((key, builder.value)) 

192 

193 

194def integer_or_decimal(str_value): 

195 ''' 

196 Converts string with a numeric value into an int or a Decimal. 

197 Used in different backends for consistent number representation. 

198 ''' 

199 if not ('.' in str_value or 'e' in str_value or 'E' in str_value): 

200 return int(str_value) 

201 return decimal.Decimal(str_value) 

202 

203def integer_or_float(str_value): 

204 ''' 

205 Converts string with a numeric value into an int or a float. 

206 Used in different backends for consistent number representation. 

207 ''' 

208 if not ('.' in str_value or 'e' in str_value or 'E' in str_value): 

209 return int(str_value) 

210 return float(str_value) 

211 

212def number(str_value): 

213 warnings.warn("number() function will be removed in a later release", DeprecationWarning) 

214 return integer_or_decimal(str_value) 

215 

216def file_source(f, buf_size=64*1024): 

217 '''A generator that yields data from a file-like object''' 

218 f = compat.bytes_reader(f) 

219 while True: 

220 data = f.read(buf_size) 

221 yield data 

222 if not data: 

223 break 

224 

225 

226def _basic_parse_pipeline(backend, config): 

227 return ( 

228 (backend['basic_parse_basecoro'], [], config), 

229 ) 

230 

231 

232def _parse_pipeline(backend, config): 

233 return ( 

234 (backend['parse_basecoro'], [], {}), 

235 (backend['basic_parse_basecoro'], [], config) 

236 ) 

237 

238 

239def _items_pipeline(backend, prefix, map_type, config): 

240 return ( 

241 (backend['items_basecoro'], (prefix,), {'map_type': map_type}), 

242 (backend['parse_basecoro'], [], {}), 

243 (backend['basic_parse_basecoro'], [], config) 

244 ) 

245 

246 

247def _kvitems_pipeline(backend, prefix, map_type, config): 

248 return ( 

249 (backend['kvitems_basecoro'], (prefix,), {'map_type': map_type}), 

250 (backend['parse_basecoro'], [], {}), 

251 (backend['basic_parse_basecoro'], [], config) 

252 ) 

253 

254 

255def _make_basic_parse_coro(backend): 

256 def basic_parse_coro(target, **config): 

257 return utils.chain( 

258 target, 

259 *_basic_parse_pipeline(backend, config) 

260 ) 

261 return basic_parse_coro 

262 

263 

264def _make_parse_coro(backend): 

265 def parse_coro(target, **config): 

266 return utils.chain( 

267 target, 

268 *_parse_pipeline(backend, config) 

269 ) 

270 return parse_coro 

271 

272 

273def _make_items_coro(backend): 

274 def items_coro(target, prefix, map_type=None, **config): 

275 return utils.chain( 

276 target, 

277 *_items_pipeline(backend, prefix, map_type, config) 

278 ) 

279 return items_coro 

280 

281 

282def _make_kvitems_coro(backend): 

283 def kvitems_coro(target, prefix, map_type=None, **config): 

284 return utils.chain( 

285 target, 

286 *_kvitems_pipeline(backend, prefix, map_type, config) 

287 ) 

288 return kvitems_coro 

289 

290 

291def is_awaitablefunction(func): 

292 """True if `func` is an awaitable function""" 

293 return ( 

294 inspect.iscoroutinefunction(func) or ( 

295 inspect.isgeneratorfunction(func) and 

296 (func.__code__.co_flags & inspect.CO_ITERABLE_COROUTINE) 

297 ) 

298 ) 

299 

300def is_async_file(f): 

301 """True if `f` has an asynchronous `read` method""" 

302 return ( 

303 hasattr(f, 'read') and 

304 is_awaitablefunction(f.read) 

305 ) 

306 

307def is_file(x): 

308 """True if x has a `read` method""" 

309 return hasattr(x, 'read') 

310 

311 

312def is_iterable(x): 

313 """True if x can be iterated over""" 

314 return hasattr(x, '__iter__') 

315 

316 

317def _get_source(source): 

318 if isinstance(source, bytes): 

319 return io.BytesIO(source) 

320 elif isinstance(source, str): 

321 return io.StringIO(source) 

322 return source 

323 

324 

325def _make_basic_parse_gen(backend): 

326 def basic_parse_gen(file_obj, buf_size=64*1024, **config): 

327 return utils.coros2gen( 

328 file_source(file_obj, buf_size=buf_size), 

329 *_basic_parse_pipeline(backend, config) 

330 ) 

331 return basic_parse_gen 

332 

333 

334def _make_parse_gen(backend): 

335 def parse_gen(file_obj, buf_size=64*1024, **config): 

336 return utils.coros2gen( 

337 file_source(file_obj, buf_size=buf_size), 

338 *_parse_pipeline(backend, config) 

339 ) 

340 return parse_gen 

341 

342 

343def _make_items_gen(backend): 

344 def items_gen(file_obj, prefix, map_type=None, buf_size=64*1024, **config): 

345 return utils.coros2gen( 

346 file_source(file_obj, buf_size=buf_size), 

347 *_items_pipeline(backend, prefix, map_type, config) 

348 ) 

349 return items_gen 

350 

351 

352def _make_kvitems_gen(backend): 

353 def kvitems_gen(file_obj, prefix, map_type=None, buf_size=64*1024, **config): 

354 return utils.coros2gen( 

355 file_source(file_obj, buf_size=buf_size), 

356 *_kvitems_pipeline(backend, prefix, map_type, config) 

357 ) 

358 return kvitems_gen 

359 

360 

361def _make_basic_parse(backend): 

362 def basic_parse(source, buf_size=64*1024, **config): 

363 source = _get_source(source) 

364 if is_async_file(source): 

365 return backend['basic_parse_async']( 

366 source, buf_size=buf_size, **config 

367 ) 

368 elif is_file(source): 

369 return backend['basic_parse_gen']( 

370 source, buf_size=buf_size, **config 

371 ) 

372 raise ValueError("Unknown source type: %r" % type(source)) 

373 return basic_parse 

374 

375 

376def _make_parse(backend): 

377 def parse(source, buf_size=64*1024, **config): 

378 source = _get_source(source) 

379 if is_async_file(source): 

380 return backend['parse_async']( 

381 source, buf_size=buf_size, **config 

382 ) 

383 elif is_file(source): 

384 return backend['parse_gen']( 

385 source, buf_size=buf_size, **config 

386 ) 

387 elif is_iterable(source): 

388 return utils.coros2gen(source, 

389 (backend['parse_basecoro'], (), {}) 

390 ) 

391 raise ValueError("Unknown source type: %r" % type(source)) 

392 return parse 

393 

394 

395def _make_items(backend): 

396 def items(source, prefix, map_type=None, buf_size=64*1024, **config): 

397 source = _get_source(source) 

398 if is_async_file(source): 

399 return backend['items_async']( 

400 source, prefix, map_type=map_type, buf_size=buf_size, **config 

401 ) 

402 elif is_file(source): 

403 return backend['items_gen']( 

404 source, prefix, map_type=map_type, buf_size=buf_size, **config 

405 ) 

406 elif is_iterable(source): 

407 return utils.coros2gen(source, 

408 (backend['items_basecoro'], (prefix,), {'map_type': map_type}) 

409 ) 

410 raise ValueError("Unknown source type: %r" % type(source)) 

411 return items 

412 

413 

414def _make_kvitems(backend): 

415 def kvitems(source, prefix, map_type=None, buf_size=64*1024, **config): 

416 source = _get_source(source) 

417 if is_async_file(source): 

418 return backend['kvitems_async']( 

419 source, prefix, map_type=map_type, buf_size=buf_size, **config 

420 ) 

421 elif is_file(source): 

422 return backend['kvitems_gen']( 

423 source, prefix, map_type=map_type, buf_size=buf_size, **config 

424 ) 

425 elif is_iterable(source): 

426 return utils.coros2gen(source, 

427 (backend['kvitems_basecoro'], (prefix,), {'map_type': map_type}) 

428 ) 

429 raise ValueError("Unknown source type: %r" % type(source)) 

430 return kvitems 

431 

432 

433_common_functions_warn = ''' 

434Don't use the ijson.common.* functions; instead go directly with the ijson.* ones. 

435See the documentation for more information. 

436''' 

437 

438def parse(events): 

439 """Like ijson.parse, but takes events generated via ijson.basic_parse instead 

440 of a file""" 

441 warnings.warn(_common_functions_warn, DeprecationWarning) 

442 return utils.coros2gen(events, 

443 (parse_basecoro, (), {}) 

444 ) 

445 

446 

447def kvitems(events, prefix, map_type=None): 

448 """Like ijson.kvitems, but takes events generated via ijson.parse instead of 

449 a file""" 

450 warnings.warn(_common_functions_warn, DeprecationWarning) 

451 return utils.coros2gen(events, 

452 (kvitems_basecoro, (prefix,), {'map_type': map_type}) 

453 ) 

454 

455 

456def items(events, prefix, map_type=None): 

457 """Like ijson.items, but takes events generated via ijson.parse instead of 

458 a file""" 

459 warnings.warn(_common_functions_warn, DeprecationWarning) 

460 return utils.coros2gen(events, 

461 (items_basecoro, (prefix,), {'map_type': map_type}) 

462 ) 

463 

464 

465class BackendCapabilities: 

466 ''' 

467 Capabilities supported by a backend. 

468 ''' 

469 

470 __slots__ = { 

471 'c_comments': 'C-ctyle comments (non-standard in JSON)', 

472 'multiple_values': 'Multiple top-level values (non-standard in JSON)', 

473 'invalid_leading_zeros_detection': 'Detection of leading zeros in numbers, marking them as invalid', 

474 'incomplete_json_tokens_detection': 'Documents with incomplete JSON tokens', 

475 'int64': '64 bit integers supported when running with ``use_float=True``', 

476 } 

477 

478 def __init__(self): 

479 self.c_comments = True 

480 self.multiple_values = True 

481 self.invalid_leading_zeros_detection = True 

482 self.incomplete_json_tokens_detection = True 

483 self.int64 = True 

484 

485 

486def enrich_backend(backend, **capabilities_overrides): 

487 ''' 

488 Provides a backend with any missing coroutines/generators/async-iterables 

489 it might be missing by using the generic ones written in python. 

490 ''' 

491 # Backends unset some of these 

492 capabilities = BackendCapabilities() 

493 for name, value in capabilities_overrides.items(): 

494 setattr(capabilities, name, value) 

495 backend['capabilities'] = capabilities 

496 backend['backend'] = backend['__name__'].split('.')[-1] 

497 backend['backend_name'] = backend['backend'] 

498 for name in ('basic_parse', 'parse', 'items', 'kvitems'): 

499 basecoro_name = name + '_basecoro' 

500 if basecoro_name not in backend: 

501 backend[basecoro_name] = globals()[basecoro_name] 

502 coro_name = name + '_coro' 

503 if coro_name not in backend: 

504 factory = globals()['_make_' + coro_name] 

505 backend[coro_name] = factory(backend) 

506 gen_name = name + '_gen' 

507 if gen_name not in backend: 

508 factory = globals()['_make_' + gen_name] 

509 backend[gen_name] = factory(backend) 

510 async_name = name + '_async' 

511 if async_name not in backend: 

512 factory = getattr(utils35, '_make_' + async_name) 

513 backend[async_name] = factory(backend) 

514 factory = globals()['_make_' + name] 

515 backend[name] = factory(backend)