Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/ijson/common.py: 43%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1'''
2Backend independent higher level interfaces, common exceptions.
3'''
4import decimal
5import inspect
6import io
7import warnings
9from ijson import compat, utils, utils35
12class JSONError(Exception):
13 '''
14 Base exception for all parsing errors.
15 '''
16 pass
19class IncompleteJSONError(JSONError):
20 '''
21 Raised when the parser can't read expected data from a stream.
22 '''
23 pass
26@utils.coroutine
27def parse_basecoro(target):
28 '''
29 A coroutine dispatching parsing events with the information about their
30 location with the JSON object tree. Events are tuples
31 ``(prefix, type, value)``.
33 Available types and values are:
35 ('null', None)
36 ('boolean', <True or False>)
37 ('number', <int or Decimal>)
38 ('string', <unicode>)
39 ('map_key', <str>)
40 ('start_map', None)
41 ('end_map', None)
42 ('start_array', None)
43 ('end_array', None)
45 Prefixes represent the path to the nested elements from the root of the JSON
46 document. For example, given this document::
48 {
49 "array": [1, 2],
50 "map": {
51 "key": "value"
52 }
53 }
55 the parser would yield events:
57 ('', 'start_map', None)
58 ('', 'map_key', 'array')
59 ('array', 'start_array', None)
60 ('array.item', 'number', 1)
61 ('array.item', 'number', 2)
62 ('array', 'end_array', None)
63 ('', 'map_key', 'map')
64 ('map', 'start_map', None)
65 ('map', 'map_key', 'key')
66 ('map.key', 'string', u'value')
67 ('map', 'end_map', None)
68 ('', 'end_map', None)
70 '''
71 path = []
72 while True:
73 event, value = yield
74 if event == 'map_key':
75 prefix = '.'.join(path[:-1])
76 path[-1] = value
77 elif event == 'start_map':
78 prefix = '.'.join(path)
79 path.append(None)
80 elif event == 'end_map':
81 path.pop()
82 prefix = '.'.join(path)
83 elif event == 'start_array':
84 prefix = '.'.join(path)
85 path.append('item')
86 elif event == 'end_array':
87 path.pop()
88 prefix = '.'.join(path)
89 else: # any scalar value
90 prefix = '.'.join(path)
91 target.send((prefix, event, value))
94class ObjectBuilder:
95 '''
96 Incrementally builds an object from JSON parser events. Events are passed
97 into the `event` function that accepts two parameters: event type and
98 value. The object being built is available at any time from the `value`
99 attribute.
101 Example::
103 >>> from io import BytesIO
104 >>> from ijson import basic_parse
105 >>> from ijson.common import ObjectBuilder
107 >>> builder = ObjectBuilder()
108 >>> f = BytesIO(b'{"key": "value"}')
109 >>> for event, value in basic_parse(f):
110 ... builder.event(event, value)
111 >>> builder.value == {'key': 'value'}
112 True
114 '''
115 def __init__(self, map_type=None):
116 def initial_set(value):
117 self.value = value
118 self.containers = [initial_set]
119 self.map_type = map_type or dict
121 def event(self, event, value):
122 if event == 'map_key':
123 self.key = value
124 elif event == 'start_map':
125 mappable = self.map_type()
126 self.containers[-1](mappable)
127 def setter(value):
128 mappable[self.key] = value
129 self.containers.append(setter)
130 elif event == 'start_array':
131 array = []
132 self.containers[-1](array)
133 self.containers.append(array.append)
134 elif event == 'end_array' or event == 'end_map':
135 self.containers.pop()
136 else:
137 self.containers[-1](value)
140@utils.coroutine
141def items_basecoro(target, prefix, map_type=None):
142 '''
143 An couroutine dispatching native Python objects constructed from the events
144 under a given prefix.
145 '''
146 while True:
147 current, event, value = (yield)
148 if current == prefix:
149 if event in ('start_map', 'start_array'):
150 object_depth = 1
151 builder = ObjectBuilder(map_type=map_type)
152 while object_depth:
153 builder.event(event, value)
154 current, event, value = (yield)
155 if event in ('start_map', 'start_array'):
156 object_depth += 1
157 elif event in ('end_map', 'end_array'):
158 object_depth -= 1
159 del builder.containers[:]
160 target.send(builder.value)
161 else:
162 target.send(value)
165@utils.coroutine
166def kvitems_basecoro(target, prefix, map_type=None):
167 '''
168 An coroutine dispatching (key, value) pairs constructed from the events
169 under a given prefix. The prefix should point to JSON objects
170 '''
171 builder = None
172 while True:
173 path, event, value = (yield)
174 while path == prefix and event == 'map_key':
175 object_depth = 0
176 key = value
177 builder = ObjectBuilder(map_type=map_type)
178 path, event, value = (yield)
179 if event == 'start_map':
180 object_depth += 1
181 while (
182 (event != 'map_key' or object_depth != 0) and
183 (event != 'end_map' or object_depth != -1)):
184 builder.event(event, value)
185 path, event, value = (yield)
186 if event == 'start_map':
187 object_depth += 1
188 elif event == 'end_map':
189 object_depth -= 1
190 del builder.containers[:]
191 target.send((key, builder.value))
194def integer_or_decimal(str_value):
195 '''
196 Converts string with a numeric value into an int or a Decimal.
197 Used in different backends for consistent number representation.
198 '''
199 if not ('.' in str_value or 'e' in str_value or 'E' in str_value):
200 return int(str_value)
201 return decimal.Decimal(str_value)
203def integer_or_float(str_value):
204 '''
205 Converts string with a numeric value into an int or a float.
206 Used in different backends for consistent number representation.
207 '''
208 if not ('.' in str_value or 'e' in str_value or 'E' in str_value):
209 return int(str_value)
210 return float(str_value)
212def number(str_value):
213 warnings.warn("number() function will be removed in a later release", DeprecationWarning)
214 return integer_or_decimal(str_value)
216def file_source(f, buf_size=64*1024):
217 '''A generator that yields data from a file-like object'''
218 f = compat.bytes_reader(f)
219 while True:
220 data = f.read(buf_size)
221 yield data
222 if not data:
223 break
226def _basic_parse_pipeline(backend, config):
227 return (
228 (backend['basic_parse_basecoro'], [], config),
229 )
232def _parse_pipeline(backend, config):
233 return (
234 (backend['parse_basecoro'], [], {}),
235 (backend['basic_parse_basecoro'], [], config)
236 )
239def _items_pipeline(backend, prefix, map_type, config):
240 return (
241 (backend['items_basecoro'], (prefix,), {'map_type': map_type}),
242 (backend['parse_basecoro'], [], {}),
243 (backend['basic_parse_basecoro'], [], config)
244 )
247def _kvitems_pipeline(backend, prefix, map_type, config):
248 return (
249 (backend['kvitems_basecoro'], (prefix,), {'map_type': map_type}),
250 (backend['parse_basecoro'], [], {}),
251 (backend['basic_parse_basecoro'], [], config)
252 )
255def _make_basic_parse_coro(backend):
256 def basic_parse_coro(target, **config):
257 return utils.chain(
258 target,
259 *_basic_parse_pipeline(backend, config)
260 )
261 return basic_parse_coro
264def _make_parse_coro(backend):
265 def parse_coro(target, **config):
266 return utils.chain(
267 target,
268 *_parse_pipeline(backend, config)
269 )
270 return parse_coro
273def _make_items_coro(backend):
274 def items_coro(target, prefix, map_type=None, **config):
275 return utils.chain(
276 target,
277 *_items_pipeline(backend, prefix, map_type, config)
278 )
279 return items_coro
282def _make_kvitems_coro(backend):
283 def kvitems_coro(target, prefix, map_type=None, **config):
284 return utils.chain(
285 target,
286 *_kvitems_pipeline(backend, prefix, map_type, config)
287 )
288 return kvitems_coro
291def is_awaitablefunction(func):
292 """True if `func` is an awaitable function"""
293 return (
294 inspect.iscoroutinefunction(func) or (
295 inspect.isgeneratorfunction(func) and
296 (func.__code__.co_flags & inspect.CO_ITERABLE_COROUTINE)
297 )
298 )
300def is_async_file(f):
301 """True if `f` has an asynchronous `read` method"""
302 return (
303 hasattr(f, 'read') and
304 is_awaitablefunction(f.read)
305 )
307def is_file(x):
308 """True if x has a `read` method"""
309 return hasattr(x, 'read')
312def is_iterable(x):
313 """True if x can be iterated over"""
314 return hasattr(x, '__iter__')
317def _get_source(source):
318 if isinstance(source, bytes):
319 return io.BytesIO(source)
320 elif isinstance(source, str):
321 return io.StringIO(source)
322 return source
325def _make_basic_parse_gen(backend):
326 def basic_parse_gen(file_obj, buf_size=64*1024, **config):
327 return utils.coros2gen(
328 file_source(file_obj, buf_size=buf_size),
329 *_basic_parse_pipeline(backend, config)
330 )
331 return basic_parse_gen
334def _make_parse_gen(backend):
335 def parse_gen(file_obj, buf_size=64*1024, **config):
336 return utils.coros2gen(
337 file_source(file_obj, buf_size=buf_size),
338 *_parse_pipeline(backend, config)
339 )
340 return parse_gen
343def _make_items_gen(backend):
344 def items_gen(file_obj, prefix, map_type=None, buf_size=64*1024, **config):
345 return utils.coros2gen(
346 file_source(file_obj, buf_size=buf_size),
347 *_items_pipeline(backend, prefix, map_type, config)
348 )
349 return items_gen
352def _make_kvitems_gen(backend):
353 def kvitems_gen(file_obj, prefix, map_type=None, buf_size=64*1024, **config):
354 return utils.coros2gen(
355 file_source(file_obj, buf_size=buf_size),
356 *_kvitems_pipeline(backend, prefix, map_type, config)
357 )
358 return kvitems_gen
361def _make_basic_parse(backend):
362 def basic_parse(source, buf_size=64*1024, **config):
363 source = _get_source(source)
364 if is_async_file(source):
365 return backend['basic_parse_async'](
366 source, buf_size=buf_size, **config
367 )
368 elif is_file(source):
369 return backend['basic_parse_gen'](
370 source, buf_size=buf_size, **config
371 )
372 raise ValueError("Unknown source type: %r" % type(source))
373 return basic_parse
376def _make_parse(backend):
377 def parse(source, buf_size=64*1024, **config):
378 source = _get_source(source)
379 if is_async_file(source):
380 return backend['parse_async'](
381 source, buf_size=buf_size, **config
382 )
383 elif is_file(source):
384 return backend['parse_gen'](
385 source, buf_size=buf_size, **config
386 )
387 elif is_iterable(source):
388 return utils.coros2gen(source,
389 (backend['parse_basecoro'], (), {})
390 )
391 raise ValueError("Unknown source type: %r" % type(source))
392 return parse
395def _make_items(backend):
396 def items(source, prefix, map_type=None, buf_size=64*1024, **config):
397 source = _get_source(source)
398 if is_async_file(source):
399 return backend['items_async'](
400 source, prefix, map_type=map_type, buf_size=buf_size, **config
401 )
402 elif is_file(source):
403 return backend['items_gen'](
404 source, prefix, map_type=map_type, buf_size=buf_size, **config
405 )
406 elif is_iterable(source):
407 return utils.coros2gen(source,
408 (backend['items_basecoro'], (prefix,), {'map_type': map_type})
409 )
410 raise ValueError("Unknown source type: %r" % type(source))
411 return items
414def _make_kvitems(backend):
415 def kvitems(source, prefix, map_type=None, buf_size=64*1024, **config):
416 source = _get_source(source)
417 if is_async_file(source):
418 return backend['kvitems_async'](
419 source, prefix, map_type=map_type, buf_size=buf_size, **config
420 )
421 elif is_file(source):
422 return backend['kvitems_gen'](
423 source, prefix, map_type=map_type, buf_size=buf_size, **config
424 )
425 elif is_iterable(source):
426 return utils.coros2gen(source,
427 (backend['kvitems_basecoro'], (prefix,), {'map_type': map_type})
428 )
429 raise ValueError("Unknown source type: %r" % type(source))
430 return kvitems
433_common_functions_warn = '''
434Don't use the ijson.common.* functions; instead go directly with the ijson.* ones.
435See the documentation for more information.
436'''
438def parse(events):
439 """Like ijson.parse, but takes events generated via ijson.basic_parse instead
440 of a file"""
441 warnings.warn(_common_functions_warn, DeprecationWarning)
442 return utils.coros2gen(events,
443 (parse_basecoro, (), {})
444 )
447def kvitems(events, prefix, map_type=None):
448 """Like ijson.kvitems, but takes events generated via ijson.parse instead of
449 a file"""
450 warnings.warn(_common_functions_warn, DeprecationWarning)
451 return utils.coros2gen(events,
452 (kvitems_basecoro, (prefix,), {'map_type': map_type})
453 )
456def items(events, prefix, map_type=None):
457 """Like ijson.items, but takes events generated via ijson.parse instead of
458 a file"""
459 warnings.warn(_common_functions_warn, DeprecationWarning)
460 return utils.coros2gen(events,
461 (items_basecoro, (prefix,), {'map_type': map_type})
462 )
465class BackendCapabilities:
466 '''
467 Capabilities supported by a backend.
468 '''
470 __slots__ = {
471 'c_comments': 'C-ctyle comments (non-standard in JSON)',
472 'multiple_values': 'Multiple top-level values (non-standard in JSON)',
473 'invalid_leading_zeros_detection': 'Detection of leading zeros in numbers, marking them as invalid',
474 'incomplete_json_tokens_detection': 'Documents with incomplete JSON tokens',
475 'int64': '64 bit integers supported when running with ``use_float=True``',
476 }
478 def __init__(self):
479 self.c_comments = True
480 self.multiple_values = True
481 self.invalid_leading_zeros_detection = True
482 self.incomplete_json_tokens_detection = True
483 self.int64 = True
486def enrich_backend(backend, **capabilities_overrides):
487 '''
488 Provides a backend with any missing coroutines/generators/async-iterables
489 it might be missing by using the generic ones written in python.
490 '''
491 # Backends unset some of these
492 capabilities = BackendCapabilities()
493 for name, value in capabilities_overrides.items():
494 setattr(capabilities, name, value)
495 backend['capabilities'] = capabilities
496 backend['backend'] = backend['__name__'].split('.')[-1]
497 backend['backend_name'] = backend['backend']
498 for name in ('basic_parse', 'parse', 'items', 'kvitems'):
499 basecoro_name = name + '_basecoro'
500 if basecoro_name not in backend:
501 backend[basecoro_name] = globals()[basecoro_name]
502 coro_name = name + '_coro'
503 if coro_name not in backend:
504 factory = globals()['_make_' + coro_name]
505 backend[coro_name] = factory(backend)
506 gen_name = name + '_gen'
507 if gen_name not in backend:
508 factory = globals()['_make_' + gen_name]
509 backend[gen_name] = factory(backend)
510 async_name = name + '_async'
511 if async_name not in backend:
512 factory = getattr(utils35, '_make_' + async_name)
513 backend[async_name] = factory(backend)
514 factory = globals()['_make_' + name]
515 backend[name] = factory(backend)