Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/glom/core.py: 58%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""*glom gets results.*
3The ``glom`` package has one central entrypoint,
4:func:`glom.glom`. Everything else in the package revolves around that
5one function. Sometimes, big things come in small packages.
7A couple of conventional terms you'll see repeated many times below:
9* **target** - glom is built to work on any data, so we simply
10 refer to the object being accessed as the *"target"*
11* **spec** - *(aka "glomspec", short for specification)* The
12 accompanying template used to specify the structure of the return
13 value.
15Now that you know the terms, let's take a look around glom's powerful
16semantics.
18"""
21import os
22import sys
23import pdb
24import copy
25import warnings
26import weakref
27import operator
28from abc import ABCMeta
29from pprint import pprint
30import string
31from collections import OrderedDict
32import traceback
34from face.helpers import get_wrap_width
35from boltons.typeutils import make_sentinel
36from boltons.iterutils import is_iterable
37#from boltons.funcutils import format_invocation
39basestring = str
40_AbstractIterableBase = ABCMeta('_AbstractIterableBase', (object,), {})
41from collections import ChainMap
42from reprlib import Repr, recursive_repr
44GLOM_DEBUG = os.getenv('GLOM_DEBUG', '').strip().lower()
45GLOM_DEBUG = False if (GLOM_DEBUG in ('', '0', 'false')) else True
47TRACE_WIDTH = max(get_wrap_width(max_width=110), 50) # min width
49PATH_STAR = True
50# should * and ** be interpreted as parallel traversal in Path.from_text()?
51# Changed to True in 23.1, this option to disable will go away soon
53_type_type = type
55_MISSING = make_sentinel('_MISSING')
56SKIP = make_sentinel('SKIP')
57SKIP.__doc__ = """
58The ``SKIP`` singleton can be returned from a function or included
59via a :class:`~glom.Val` to cancel assignment into the output
60object.
62>>> target = {'a': 'b'}
63>>> spec = {'a': lambda t: t['a'] if t['a'] == 'a' else SKIP}
64>>> glom(target, spec)
65{}
66>>> target = {'a': 'a'}
67>>> glom(target, spec)
68{'a': 'a'}
70Mostly used to drop keys from dicts (as above) or filter objects from
71lists.
73.. note::
75 SKIP was known as OMIT in versions 18.3.1 and prior. Versions 19+
76 will remove the OMIT alias entirely.
77"""
78OMIT = SKIP # backwards compat, remove in 19+
80STOP = make_sentinel('STOP')
81STOP.__doc__ = """
82The ``STOP`` singleton can be used to halt iteration of a list or
83execution of a tuple of subspecs.
85>>> target = range(10)
86>>> spec = [lambda x: x if x < 5 else STOP]
87>>> glom(target, spec)
88[0, 1, 2, 3, 4]
89"""
91LAST_CHILD_SCOPE = make_sentinel('LAST_CHILD_SCOPE')
92LAST_CHILD_SCOPE.__doc__ = """
93Marker that can be used by parents to keep track of the last child
94scope executed. Useful for "lifting" results out of child scopes
95for scopes that want to chain the scopes of their children together
96similar to tuple.
97"""
99NO_PYFRAME = make_sentinel('NO_PYFRAME')
100NO_PYFRAME.__doc__ = """
101Used internally to mark scopes which are no longer wrapped
102in a recursive glom() call, so that they can be cleaned up correctly
103in case of exceptions
104"""
106MODE = make_sentinel('MODE')
108MIN_MODE = make_sentinel('MIN_MODE')
110CHILD_ERRORS = make_sentinel('CHILD_ERRORS')
111CHILD_ERRORS.__doc__ = """
112``CHILD_ERRORS`` is used by glom internals to keep track of
113failed child branches of the current scope.
114"""
116CUR_ERROR = make_sentinel('CUR_ERROR')
117CUR_ERROR.__doc__ = """
118``CUR_ERROR`` is used by glom internals to keep track of
119thrown exceptions.
120"""
122_PKG_DIR_PATH = os.path.dirname(os.path.abspath(__file__))
124class GlomError(Exception):
125 """The base exception for all the errors that might be raised from
126 :func:`glom` processing logic.
128 By default, exceptions raised from within functions passed to glom
129 (e.g., ``len``, ``sum``, any ``lambda``) will not be wrapped in a
130 GlomError.
131 """
132 @classmethod
133 def wrap(cls, exc):
134 # TODO: need to test this against a wide array of exception types
135 # this approach to wrapping errors works for exceptions
136 # defined in pure-python as well as C
137 exc_type = type(exc)
138 bases = (GlomError,) if issubclass(GlomError, exc_type) else (exc_type, GlomError)
139 exc_wrapper_type = type(f"GlomError.wrap({exc_type.__name__})", bases, {})
140 try:
141 wrapper = exc_wrapper_type(*exc.args)
142 wrapper.__wrapped = exc
143 return wrapper
144 except Exception: # maybe exception can't be re-created
145 return exc
147 def _set_wrapped(self, exc):
148 self.__wrapped = exc
150 def _finalize(self, scope):
151 # careful when changing how this functionality works; pytest seems to mess with
152 # the traceback module or sys.exc_info(). we saw different stacks when originally
153 # developing this in June 2020.
154 etype, evalue, _ = sys.exc_info()
155 tb_lines = traceback.format_exc().strip().splitlines()
156 limit = 0
157 for line in reversed(tb_lines):
158 if _PKG_DIR_PATH in line:
159 limit -= 1
160 break
161 limit += 1
162 self._tb_lines = tb_lines[-limit:]
163 # if the first line is trying to put a caret at a byte-code location on a line that
164 # isn't being displayed, skip it
165 if set(self._tb_lines[0]) <= {' ', '^', '~'}:
166 self._tb_lines = self._tb_lines[1:]
167 self._scope = scope
169 def __str__(self):
170 if getattr(self, '_finalized_str', None):
171 return self._finalized_str
172 elif getattr(self, '_scope', None) is not None:
173 self._target_spec_trace = format_target_spec_trace(self._scope, self.__wrapped)
174 parts = ["error raised while processing, details below.",
175 " Target-spec trace (most recent last):",
176 self._target_spec_trace]
177 parts.extend(self._tb_lines)
178 self._finalized_str = "\n".join(parts)
179 return self._finalized_str
181 # else, not finalized
182 try:
183 exc_get_message = self.get_message
184 except AttributeError:
185 exc_get_message = super().__str__
186 return exc_get_message()
189def _unpack_stack(scope, only_errors=True):
190 """
191 convert scope to [[scope, spec, target, error, [children]]]
193 this is a convenience method for printing stacks
195 only_errors=True means ignore branches which may still be hanging around
196 which were not involved in the stack trace of the error
198 only_errors=False could be useful for debugger / introspection (similar
199 to traceback.print_stack())
200 """
201 stack = []
202 scope = scope.maps[0]
203 while LAST_CHILD_SCOPE in scope:
204 child = scope[LAST_CHILD_SCOPE]
205 branches = scope[CHILD_ERRORS]
206 if branches == [child]:
207 branches = [] # if there's only one branch, count it as linear
208 stack.append([scope, scope[Spec], scope[T], scope.get(CUR_ERROR), branches])
210 # NB: this id() business is necessary to avoid a
211 # nondeterministic bug in abc's __eq__ see #189 for details
212 if id(child) in [id(b) for b in branches]:
213 break # if child already covered by branches, stop the linear descent
215 scope = child.maps[0]
216 else: # if break executed above, cur scope was already added
217 stack.append([scope, scope[Spec], scope[T], scope.get(CUR_ERROR), []])
218 # push errors "down" to where they were first raised / first observed
219 for i in range(len(stack) - 1):
220 cur, nxt = stack[i], stack[i + 1]
221 if cur[3] == nxt[3]:
222 cur[3] = None
223 if only_errors: # trim the stack to the last error
224 # leave at least 1 to not break formatting func below
225 # TODO: make format_target_spec_trace() tolerate an "empty" stack cleanly
226 while len(stack) > 1 and stack[-1][3] is None:
227 stack.pop()
228 return stack
231def _format_trace_value(value, maxlen):
232 s = bbrepr(value).replace("\\'", "'")
233 if len(s) > maxlen:
234 try:
235 suffix = '... (len=%s)' % len(value)
236 except Exception:
237 suffix = '...'
238 s = s[:maxlen - len(suffix)] + suffix
239 return s
242def format_target_spec_trace(scope, root_error, width=TRACE_WIDTH, depth=0, prev_target=_MISSING, last_branch=True):
243 """
244 unpack a scope into a multi-line but short summary
245 """
246 segments = []
247 indent = " " + "|" * depth
248 tick = "| " if depth else "- "
249 def mk_fmt(label, t=None):
250 pre = indent + (t or tick) + label + ": "
251 fmt_width = width - len(pre)
252 return lambda v: pre + _format_trace_value(v, fmt_width)
253 fmt_t = mk_fmt("Target")
254 fmt_s = mk_fmt("Spec")
255 fmt_b = mk_fmt("Spec", "+ ")
256 recurse = lambda s, last=False: format_target_spec_trace(s, root_error, width, depth + 1, prev_target, last)
257 tb_exc_line = lambda e: "".join(traceback.format_exception_only(type(e), e))[:-1]
258 fmt_e = lambda e: indent + tick + tb_exc_line(e)
259 for scope, spec, target, error, branches in _unpack_stack(scope):
260 if target is not prev_target:
261 segments.append(fmt_t(target))
262 prev_target = target
263 if branches:
264 segments.append(fmt_b(spec))
265 segments.extend([recurse(s) for s in branches[:-1]])
266 segments.append(recurse(branches[-1], last_branch))
267 else:
268 segments.append(fmt_s(spec))
269 if error is not None and error is not root_error:
270 last_line_error = True
271 segments.append(fmt_e(error))
272 else:
273 last_line_error = False
274 if depth: # \ on first line, X on last line
275 remark = lambda s, m: s[:depth + 1] + m + s[depth + 2:]
276 segments[0] = remark(segments[0], "\\")
277 if not last_branch or last_line_error:
278 segments[-1] = remark(segments[-1], "X")
279 return "\n".join(segments)
282# TODO: not used (yet)
283def format_oneline_trace(scope):
284 """
285 unpack a scope into a single line summary
286 (shortest summary possible)
287 """
288 # the goal here is to do a kind of delta-compression --
289 # if the target is the same, don't repeat it
290 segments = []
291 prev_target = _MISSING
292 for scope, spec, target, error, branches in _unpack_stack(scope, only_errors=False):
293 segments.append('/')
294 if type(spec) in (TType, Path):
295 segments.append(bbrepr(spec))
296 else:
297 segments.append(type(spec).__name__)
298 if target != prev_target:
299 segments.append('!')
300 segments.append(type(target).__name__)
301 if Path in scope:
302 segments.append('<')
303 segments.append('->'.join([str(p) for p in scope[Path]]))
304 segments.append('>')
305 prev_target = target
307 return "".join(segments)
310class PathAccessError(GlomError, AttributeError, KeyError, IndexError):
311 """This :exc:`GlomError` subtype represents a failure to access an
312 attribute as dictated by the spec. The most commonly-seen error
313 when using glom, it maintains a copy of the original exception and
314 produces a readable error message for easy debugging.
316 If you see this error, you may want to:
318 * Check the target data is accurate using :class:`~glom.Inspect`
319 * Catch the exception and return a semantically meaningful error message
320 * Use :class:`glom.Coalesce` to specify a default
321 * Use the top-level ``default`` kwarg on :func:`~glom.glom()`
323 In any case, be glad you got this error and not the one it was
324 wrapping!
326 Args:
327 exc (Exception): The error that arose when we tried to access
328 *path*. Typically an instance of KeyError, AttributeError,
329 IndexError, or TypeError, and sometimes others.
330 path (Path): The full Path glom was in the middle of accessing
331 when the error occurred.
332 part_idx (int): The index of the part of the *path* that caused
333 the error.
335 >>> target = {'a': {'b': None}}
336 >>> glom(target, 'a.b.c')
337 Traceback (most recent call last):
338 ...
339 PathAccessError: could not access 'c', part 2 of Path('a', 'b', 'c'), got error: ...
341 """
342 def __init__(self, exc, path, part_idx):
343 self.exc = exc
344 self.path = path
345 self.part_idx = part_idx
347 def get_message(self):
348 path_part = Path(self.path).values()[self.part_idx]
349 return ('could not access %r, part %r of %r, got error: %r'
350 % (path_part, self.part_idx, self.path, self.exc))
352 def __repr__(self):
353 cn = self.__class__.__name__
354 return f'{cn}({self.exc!r}, {self.path!r}, {self.part_idx!r})'
357class PathAssignError(GlomError):
358 """This :exc:`GlomError` subtype is raised when an assignment fails,
359 stemming from an :func:`~glom.assign` call or other
360 :class:`~glom.Assign` usage.
362 One example would be assigning to an out-of-range position in a list::
364 >>> assign(["short", "list"], Path(5), 'too far') # doctest: +SKIP
365 Traceback (most recent call last):
366 ...
367 PathAssignError: could not assign 5 on object at Path(), got error: IndexError(...
369 Other assignment failures could be due to assigning to an
370 ``@property`` or exception being raised inside a ``__setattr__()``.
372 """
373 def __init__(self, exc, path, dest_name):
374 self.exc = exc
375 self.path = path
376 self.dest_name = dest_name
378 def get_message(self):
379 return ('could not assign %r on object at %r, got error: %r'
380 % (self.dest_name, self.path, self.exc))
382 def __repr__(self):
383 cn = self.__class__.__name__
384 return f'{cn}({self.exc!r}, {self.path!r}, {self.dest_name!r})'
387class CoalesceError(GlomError):
388 """This :exc:`GlomError` subtype is raised from within a
389 :class:`Coalesce` spec's processing, when none of the subspecs
390 match and no default is provided.
392 The exception object itself keeps track of several values which
393 may be useful for processing:
395 Args:
396 coal_obj (Coalesce): The original failing spec, see
397 :class:`Coalesce`'s docs for details.
398 skipped (list): A list of ignored values and exceptions, in the
399 order that their respective subspecs appear in the original
400 *coal_obj*.
401 path: Like many GlomErrors, this exception knows the path at
402 which it occurred.
404 >>> target = {}
405 >>> glom(target, Coalesce('a', 'b'))
406 Traceback (most recent call last):
407 ...
408 CoalesceError: no valid values found. Tried ('a', 'b') and got (PathAccessError, PathAccessError) ...
410 .. note::
412 Coalesce is a *branching* specifier type, so as of v20.7.0, its
413 exception messages feature an error tree. See
414 :ref:`branched-exceptions` for details on how to interpret these
415 exceptions.
417 """
418 def __init__(self, coal_obj, skipped, path):
419 self.coal_obj = coal_obj
420 self.skipped = skipped
421 self.path = path
423 def __repr__(self):
424 cn = self.__class__.__name__
425 return f'{cn}({self.coal_obj!r}, {self.skipped!r}, {self.path!r})'
427 def get_message(self):
428 missed_specs = tuple(self.coal_obj.subspecs)
429 skipped_vals = [v.__class__.__name__
430 if isinstance(v, self.coal_obj.skip_exc)
431 else '<skipped %s>' % v.__class__.__name__
432 for v in self.skipped]
433 msg = ('no valid values found. Tried %r and got (%s)'
434 % (missed_specs, ', '.join(skipped_vals)))
435 if self.coal_obj.skip is not _MISSING:
436 msg += f', skip set to {self.coal_obj.skip!r}'
437 if self.coal_obj.skip_exc is not GlomError:
438 msg += f', skip_exc set to {self.coal_obj.skip_exc!r}'
439 if self.path is not None:
440 msg += f' (at path {self.path!r})'
441 return msg
444class BadSpec(GlomError, TypeError):
445 """Raised when a spec structure is malformed, e.g., when a specifier
446 type is invalid for the current mode."""
449class UnregisteredTarget(GlomError):
450 """This :class:`GlomError` subtype is raised when a spec calls for an
451 unsupported action on a target type. For instance, trying to
452 iterate on an non-iterable target:
454 >>> glom(object(), ['a.b.c'])
455 Traceback (most recent call last):
456 ...
457 UnregisteredTarget: target type 'object' not registered for 'iterate', expected one of registered types: (...)
459 It should be noted that this is a pretty uncommon occurrence in
460 production glom usage. See the :ref:`setup-and-registration`
461 section for details on how to avoid this error.
463 An UnregisteredTarget takes and tracks a few values:
465 Args:
466 op (str): The name of the operation being performed ('get' or 'iterate')
467 target_type (type): The type of the target being processed.
468 type_map (dict): A mapping of target types that do support this operation
469 path: The path at which the error occurred.
471 """
472 def __init__(self, op, target_type, type_map, path):
473 self.op = op
474 self.target_type = target_type
475 self.type_map = type_map
476 self.path = path
477 super().__init__(op, target_type, type_map, path)
479 def __repr__(self):
480 cn = self.__class__.__name__
481 # <type %r> is because Python 3 inexplicably changed the type
482 # repr from <type *> to <class *>
483 return ('%s(%r, <type %r>, %r, %r)'
484 % (cn, self.op, self.target_type.__name__, self.type_map, self.path))
486 def get_message(self):
487 if not self.type_map:
488 return ("glom() called without registering any types for operation '%s'. see"
489 " glom.register() or Glommer's constructor for details." % (self.op,))
490 reg_types = sorted([t.__name__ for t, h in self.type_map.items() if h])
491 reg_types_str = '()' if not reg_types else ('(%s)' % ', '.join(reg_types))
492 msg = ("target type %r not registered for '%s', expected one of"
493 " registered types: %s" % (self.target_type.__name__, self.op, reg_types_str))
494 if self.path:
495 msg += f' (at {self.path!r})'
496 return msg
499if getattr(__builtins__, '__dict__', None) is not None:
500 # pypy's __builtins__ is a module, as is CPython's REPL, but at
501 # normal execution time it's a dict?
502 __builtins__ = __builtins__.__dict__
505_BUILTIN_ID_NAME_MAP = {id(v): k
506 for k, v in __builtins__.items()}
509class _BBRepr(Repr):
510 """A better repr for builtins, when the built-in repr isn't
511 roundtrippable.
512 """
513 def __init__(self):
514 super().__init__()
515 # turn up all the length limits very high
516 for name in self.__dict__:
517 if not isinstance(getattr(self, name), int):
518 continue
519 setattr(self, name, 1024)
521 def repr1(self, x, level):
522 ret = Repr.repr1(self, x, level)
523 if not ret.startswith('<'):
524 return ret
525 return _BUILTIN_ID_NAME_MAP.get(id(x), ret)
528bbrepr = recursive_repr()(_BBRepr().repr)
531class _BBReprFormatter(string.Formatter):
532 """
533 allow format strings to be evaluated where {!r} will use bbrepr
534 instead of repr
535 """
536 def convert_field(self, value, conversion):
537 if conversion == 'r':
538 return bbrepr(value).replace("\\'", "'")
539 return super().convert_field(value, conversion)
542bbformat = _BBReprFormatter().format
545# TODO: push this back up to boltons with repr kwarg
546def format_invocation(name='', args=(), kwargs=None, **kw):
547 """Given a name, positional arguments, and keyword arguments, format
548 a basic Python-style function call.
550 >>> print(format_invocation('func', args=(1, 2), kwargs={'c': 3}))
551 func(1, 2, c=3)
552 >>> print(format_invocation('a_func', args=(1,)))
553 a_func(1)
554 >>> print(format_invocation('kw_func', kwargs=[('a', 1), ('b', 2)]))
555 kw_func(a=1, b=2)
557 """
558 _repr = kw.pop('repr', bbrepr)
559 if kw:
560 raise TypeError('unexpected keyword args: %r' % ', '.join(kw.keys()))
561 kwargs = kwargs or {}
562 a_text = ', '.join([_repr(a) for a in args])
563 if isinstance(kwargs, dict):
564 kwarg_items = [(k, kwargs[k]) for k in sorted(kwargs)]
565 else:
566 kwarg_items = kwargs
567 kw_text = ', '.join([f'{k}={_repr(v)}' for k, v in kwarg_items])
569 all_args_text = a_text
570 if all_args_text and kw_text:
571 all_args_text += ', '
572 all_args_text += kw_text
574 return f'{name}({all_args_text})'
577class Path:
578 """Path objects specify explicit paths when the default
579 ``'a.b.c'``-style general access syntax won't work or isn't
580 desirable. Use this to wrap ints, datetimes, and other valid
581 keys, as well as strings with dots that shouldn't be expanded.
583 >>> target = {'a': {'b': 'c', 'd.e': 'f', 2: 3}}
584 >>> glom(target, Path('a', 2))
585 3
586 >>> glom(target, Path('a', 'd.e'))
587 'f'
589 Paths can be used to join together other Path objects, as
590 well as :data:`~glom.T` objects:
592 >>> Path(T['a'], T['b'])
593 T['a']['b']
594 >>> Path(Path('a', 'b'), Path('c', 'd'))
595 Path('a', 'b', 'c', 'd')
597 Paths also support indexing and slicing, with each access
598 returning a new Path object:
600 >>> path = Path('a', 'b', 1, 2)
601 >>> path[0]
602 Path('a')
603 >>> path[-2:]
604 Path(1, 2)
606 To build a Path object from a string, use :meth:`Path.from_text()`.
607 This is the default behavior when the top-level :func:`~glom.glom`
608 function gets a string spec.
609 """
610 def __init__(self, *path_parts):
611 if not path_parts:
612 self.path_t = T
613 return
614 if isinstance(path_parts[0], TType):
615 path_t = path_parts[0]
616 offset = 1
617 else:
618 path_t = T
619 offset = 0
620 for part in path_parts[offset:]:
621 if isinstance(part, Path):
622 part = part.path_t
623 if isinstance(part, TType):
624 sub_parts = part.__ops__
625 if sub_parts[0] is not T:
626 raise ValueError('path segment must be path from T, not %r'
627 % sub_parts[0])
628 i = 1
629 while i < len(sub_parts):
630 path_t = _t_child(path_t, sub_parts[i], sub_parts[i + 1])
631 i += 2
632 else:
633 path_t = _t_child(path_t, 'P', part)
634 self.path_t = path_t
636 _CACHE = {True: {}, False: {}}
637 _MAX_CACHE = 10000
638 _STAR_WARNED = False
640 @classmethod
641 def from_text(cls, text):
642 """Make a Path from .-delimited text:
644 >>> Path.from_text('a.b.c')
645 Path('a', 'b', 'c')
647 This is the default behavior when :func:`~glom.glom` gets a string spec.
648 """
649 def create():
650 segs = text.split('.')
651 if PATH_STAR:
652 segs = [
653 _T_STAR if seg == '*' else
654 _T_STARSTAR if seg == '**' else seg
655 for seg in segs]
656 elif not cls._STAR_WARNED:
657 if '*' in segs or '**' in segs:
658 warnings.warn(
659 "'*' and '**' have changed behavior in glom version 23.1."
660 " Recommend switch to T['*'] or T['**'].")
661 cls._STAR_WARNED = True
662 return cls(*segs)
664 cache = cls._CACHE[PATH_STAR] # remove this when PATH_STAR is default
665 if text not in cache:
666 if len(cache) > cls._MAX_CACHE:
667 return create()
668 cache[text] = create()
669 return cache[text]
671 def glomit(self, target, scope):
672 # The entrypoint for the Path extension
673 return _t_eval(target, self.path_t, scope)
675 def __len__(self):
676 return (len(self.path_t.__ops__) - 1) // 2
678 def __eq__(self, other):
679 if type(other) is Path:
680 return self.path_t.__ops__ == other.path_t.__ops__
681 elif type(other) is TType:
682 return self.path_t.__ops__ == other.__ops__
683 return False
685 def __ne__(self, other):
686 return not self == other
688 def values(self):
689 """
690 Returns a tuple of values referenced in this path.
692 >>> Path(T.a.b, 'c', T['d']).values()
693 ('a', 'b', 'c', 'd')
694 """
695 cur_t_path = self.path_t.__ops__
696 return cur_t_path[2::2]
698 def items(self):
699 """
700 Returns a tuple of (operation, value) pairs.
702 >>> Path(T.a.b, 'c', T['d']).items()
703 (('.', 'a'), ('.', 'b'), ('P', 'c'), ('[', 'd'))
705 """
706 cur_t_path = self.path_t.__ops__
707 return tuple(zip(cur_t_path[1::2], cur_t_path[2::2]))
709 def startswith(self, other):
710 if isinstance(other, basestring):
711 other = Path(other)
712 if isinstance(other, Path):
713 other = other.path_t
714 if not isinstance(other, TType):
715 raise TypeError('can only check if Path starts with string, Path or T')
716 o_path = other.__ops__
717 return self.path_t.__ops__[:len(o_path)] == o_path
719 def from_t(self):
720 '''return the same path but starting from T'''
721 t_path = self.path_t.__ops__
722 if t_path[0] is S:
723 new_t = TType()
724 new_t.__ops__ = (T,) + t_path[1:]
725 return Path(new_t)
726 return self
728 def __getitem__(self, i):
729 cur_t_path = self.path_t.__ops__
730 try:
731 step = i.step
732 start = i.start if i.start is not None else 0
733 stop = i.stop
735 start = (start * 2) + 1 if start >= 0 else (start * 2) + len(cur_t_path)
736 if stop is not None:
737 stop = (stop * 2) + 1 if stop >= 0 else (stop * 2) + len(cur_t_path)
738 except AttributeError:
739 step = 1
740 start = (i * 2) + 1 if i >= 0 else (i * 2) + len(cur_t_path)
741 if start < 0 or start > len(cur_t_path):
742 raise IndexError('Path index out of range')
743 stop = ((i + 1) * 2) + 1 if i >= 0 else ((i + 1) * 2) + len(cur_t_path)
745 new_t = TType()
746 new_path = cur_t_path[start:stop]
747 if step is not None and step != 1:
748 new_path = tuple(zip(new_path[::2], new_path[1::2]))[::step]
749 new_path = sum(new_path, ())
750 new_t.__ops__ = (cur_t_path[0],) + new_path
751 return Path(new_t)
753 def __repr__(self):
754 return _format_path(self.path_t.__ops__[1:])
757def _format_path(t_path):
758 path_parts, cur_t_path = [], []
759 i = 0
760 while i < len(t_path):
761 op, arg = t_path[i], t_path[i + 1]
762 i += 2
763 if op == 'P':
764 if cur_t_path:
765 path_parts.append(cur_t_path)
766 cur_t_path = []
767 path_parts.append(arg)
768 else:
769 cur_t_path.append(op)
770 cur_t_path.append(arg)
771 if path_parts and cur_t_path:
772 path_parts.append(cur_t_path)
774 if path_parts or not cur_t_path:
775 return 'Path(%s)' % ', '.join([_format_t(part)
776 if type(part) is list else repr(part)
777 for part in path_parts])
778 return _format_t(cur_t_path)
781class Spec:
782 """Spec objects serve three purposes, here they are, roughly ordered
783 by utility:
785 1. As a form of compiled or "curried" glom call, similar to
786 Python's built-in :func:`re.compile`.
787 2. A marker as an object as representing a spec rather than a
788 literal value in certain cases where that might be ambiguous.
789 3. A way to update the scope within another Spec.
791 In the second usage, Spec objects are the complement to
792 :class:`~glom.Val`, wrapping a value and marking that it
793 should be interpreted as a glom spec, rather than a literal value.
794 This is useful in places where it would be interpreted as a value
795 by default. (Such as T[key], Call(func) where key and func are
796 assumed to be literal values and not specs.)
798 Args:
799 spec: The glom spec.
800 scope (dict): additional values to add to the scope when
801 evaluating this Spec
803 """
804 def __init__(self, spec, scope=None):
805 self.spec = spec
806 self.scope = scope or {}
808 def glom(self, target, **kw):
809 scope = dict(self.scope)
810 scope.update(kw.get('scope', {}))
811 kw['scope'] = ChainMap(scope)
812 glom_ = scope.get(glom, glom)
813 return glom_(target, self.spec, **kw)
815 def glomit(self, target, scope):
816 scope.update(self.scope)
817 return scope[glom](target, self.spec, scope)
819 def __repr__(self):
820 cn = self.__class__.__name__
821 if self.scope:
822 return f'{cn}({bbrepr(self.spec)}, scope={self.scope!r})'
823 return f'{cn}({bbrepr(self.spec)})'
826class Coalesce:
827 """Coalesce objects specify fallback behavior for a list of
828 subspecs.
830 Subspecs are passed as positional arguments, and keyword arguments
831 control defaults. Each subspec is evaluated in turn, and if none
832 match, a :exc:`CoalesceError` is raised, or a default is returned,
833 depending on the options used.
835 .. note::
837 This operation may seem very familar if you have experience with
838 `SQL`_ or even `C# and others`_.
841 In practice, this fallback behavior's simplicity is only surpassed
842 by its utility:
844 >>> target = {'c': 'd'}
845 >>> glom(target, Coalesce('a', 'b', 'c'))
846 'd'
848 glom tries to get ``'a'`` from ``target``, but gets a
849 KeyError. Rather than raise a :exc:`~glom.PathAccessError` as usual,
850 glom *coalesces* into the next subspec, ``'b'``. The process
851 repeats until it gets to ``'c'``, which returns our value,
852 ``'d'``. If our value weren't present, we'd see:
854 >>> target = {}
855 >>> glom(target, Coalesce('a', 'b'))
856 Traceback (most recent call last):
857 ...
858 CoalesceError: no valid values found. Tried ('a', 'b') and got (PathAccessError, PathAccessError) ...
860 Same process, but because ``target`` is empty, we get a
861 :exc:`CoalesceError`.
863 .. note::
865 Coalesce is a *branching* specifier type, so as of v20.7.0, its
866 exception messages feature an error tree. See
867 :ref:`branched-exceptions` for details on how to interpret these
868 exceptions.
871 If we want to avoid an exception, and we know which value we want
872 by default, we can set *default*:
874 >>> target = {}
875 >>> glom(target, Coalesce('a', 'b', 'c'), default='d-fault')
876 'd-fault'
878 ``'a'``, ``'b'``, and ``'c'`` weren't present so we got ``'d-fault'``.
880 Args:
882 subspecs: One or more glommable subspecs
883 default: A value to return if no subspec results in a valid value
884 default_factory: A callable whose result will be returned as a default
885 skip: A value, tuple of values, or predicate function
886 representing values to ignore
887 skip_exc: An exception or tuple of exception types to catch and
888 move on to the next subspec. Defaults to :exc:`GlomError`, the
889 parent type of all glom runtime exceptions.
891 If all subspecs produce skipped values or exceptions, a
892 :exc:`CoalesceError` will be raised. For more examples, check out
893 the :doc:`tutorial`, which makes extensive use of Coalesce.
895 .. _SQL: https://en.wikipedia.org/w/index.php?title=Null_(SQL)&oldid=833093792#COALESCE
896 .. _C# and others: https://en.wikipedia.org/w/index.php?title=Null_coalescing_operator&oldid=839493322#C#
898 """
899 def __init__(self, *subspecs, **kwargs):
900 self.subspecs = subspecs
901 self._orig_kwargs = dict(kwargs)
902 self.default = kwargs.pop('default', _MISSING)
903 self.default_factory = kwargs.pop('default_factory', _MISSING)
904 if self.default and self.default_factory:
905 raise ValueError('expected one of "default" or "default_factory", not both')
906 self.skip = kwargs.pop('skip', _MISSING)
907 if self.skip is _MISSING:
908 self.skip_func = lambda v: False
909 elif callable(self.skip):
910 self.skip_func = self.skip
911 elif isinstance(self.skip, tuple):
912 self.skip_func = lambda v: v in self.skip
913 else:
914 self.skip_func = lambda v: v == self.skip
915 self.skip_exc = kwargs.pop('skip_exc', GlomError)
916 if kwargs:
917 raise TypeError(f'unexpected keyword args: {sorted(kwargs.keys())!r}')
919 def glomit(self, target, scope):
920 skipped = []
921 for subspec in self.subspecs:
922 try:
923 ret = scope[glom](target, subspec, scope)
924 if not self.skip_func(ret):
925 break
926 skipped.append(ret)
927 except self.skip_exc as e:
928 skipped.append(e)
929 continue
930 else:
931 if self.default is not _MISSING:
932 ret = arg_val(target, self.default, scope)
933 elif self.default_factory is not _MISSING:
934 ret = self.default_factory()
935 else:
936 raise CoalesceError(self, skipped, scope[Path])
937 return ret
939 def __repr__(self):
940 cn = self.__class__.__name__
941 return format_invocation(cn, self.subspecs, self._orig_kwargs, repr=bbrepr)
944class Inspect:
945 """The :class:`~glom.Inspect` specifier type provides a way to get
946 visibility into glom's evaluation of a specification, enabling
947 debugging of those tricky problems that may arise with unexpected
948 data.
950 :class:`~glom.Inspect` can be inserted into an existing spec in one of two
951 ways. First, as a wrapper around the spec in question, or second,
952 as an argument-less placeholder wherever a spec could be.
954 :class:`~glom.Inspect` supports several modes, controlled by
955 keyword arguments. Its default, no-argument mode, simply echos the
956 state of the glom at the point where it appears:
958 >>> target = {'a': {'b': {}}}
959 >>> val = glom(target, Inspect('a.b')) # wrapping a spec
960 ---
961 path: ['a.b']
962 target: {'a': {'b': {}}}
963 output: {}
964 ---
966 Debugging behavior aside, :class:`~glom.Inspect` has no effect on
967 values in the target, spec, or result.
969 Args:
970 echo (bool): Whether to print the path, target, and output of
971 each inspected glom. Defaults to True.
972 recursive (bool): Whether or not the Inspect should be applied
973 at every level, at or below the spec that it wraps. Defaults
974 to False.
975 breakpoint (bool): This flag controls whether a debugging prompt
976 should appear before evaluating each inspected spec. Can also
977 take a callable. Defaults to False.
978 post_mortem (bool): This flag controls whether exceptions
979 should be caught and interactively debugged with :mod:`pdb` on
980 inspected specs.
982 All arguments above are keyword-only to avoid overlap with a
983 wrapped spec.
985 .. note::
987 Just like ``pdb.set_trace()``, be careful about leaving stray
988 ``Inspect()`` instances in production glom specs.
990 """
991 def __init__(self, *a, **kw):
992 self.wrapped = a[0] if a else Path()
993 self.recursive = kw.pop('recursive', False)
994 self.echo = kw.pop('echo', True)
995 breakpoint = kw.pop('breakpoint', False)
996 if breakpoint is True:
997 breakpoint = pdb.set_trace
998 if breakpoint and not callable(breakpoint):
999 raise TypeError('breakpoint expected bool or callable, not: %r' % breakpoint)
1000 self.breakpoint = breakpoint
1001 post_mortem = kw.pop('post_mortem', False)
1002 if post_mortem is True:
1003 post_mortem = pdb.post_mortem
1004 if post_mortem and not callable(post_mortem):
1005 raise TypeError('post_mortem expected bool or callable, not: %r' % post_mortem)
1006 self.post_mortem = post_mortem
1008 def __repr__(self):
1009 return '<INSPECT>'
1011 def glomit(self, target, scope):
1012 # stash the real handler under Inspect,
1013 # and replace the child handler with a trace callback
1014 scope[Inspect] = scope[glom]
1015 scope[glom] = self._trace
1016 return scope[glom](target, self.wrapped, scope)
1018 def _trace(self, target, spec, scope):
1019 if not self.recursive:
1020 scope[glom] = scope[Inspect]
1021 if self.echo:
1022 print('---')
1023 # TODO: switch from scope[Path] to the Target-Spec format trace above
1024 # ... but maybe be smart about only printing deltas instead of the whole
1025 # thing
1026 print('path: ', scope[Path] + [spec])
1027 print('target:', target)
1028 if self.breakpoint:
1029 # TODO: real debugger here?
1030 self.breakpoint()
1031 try:
1032 ret = scope[Inspect](target, spec, scope)
1033 except Exception:
1034 if self.post_mortem:
1035 self.post_mortem()
1036 raise
1037 if self.echo:
1038 print('output:', ret)
1039 print('---')
1040 return ret
1043class Call:
1044 """:class:`Call` specifies when a target should be passed to a function,
1045 *func*.
1047 :class:`Call` is similar to :func:`~functools.partial` in that
1048 it is no more powerful than ``lambda`` or other functions, but
1049 it is designed to be more readable, with a better ``repr``.
1051 Args:
1052 func (callable): a function or other callable to be called with
1053 the target
1055 :class:`Call` combines well with :attr:`~glom.T` to construct objects. For
1056 instance, to generate a dict and then pass it to a constructor:
1058 >>> class ExampleClass(object):
1059 ... def __init__(self, attr):
1060 ... self.attr = attr
1061 ...
1062 >>> target = {'attr': 3.14}
1063 >>> glom(target, Call(ExampleClass, kwargs=T)).attr
1064 3.14
1066 This does the same as ``glom(target, lambda target:
1067 ExampleClass(**target))``, but it's easy to see which one reads
1068 better.
1070 .. note::
1072 ``Call`` is mostly for functions. Use a :attr:`~glom.T` object
1073 if you need to call a method.
1075 .. warning::
1077 :class:`Call` has a successor with a fuller-featured API, new
1078 in 19.10.0: the :class:`Invoke` specifier type.
1079 """
1080 def __init__(self, func=None, args=None, kwargs=None):
1081 if func is None:
1082 func = T
1083 if not (callable(func) or isinstance(func, (Spec, TType))):
1084 raise TypeError('expected func to be a callable or T'
1085 ' expression, not: %r' % (func,))
1086 if args is None:
1087 args = ()
1088 if kwargs is None:
1089 kwargs = {}
1090 self.func, self.args, self.kwargs = func, args, kwargs
1092 def glomit(self, target, scope):
1093 'run against the current target'
1094 r = lambda spec: arg_val(target, spec, scope)
1095 return r(self.func)(*r(self.args), **r(self.kwargs))
1097 def __repr__(self):
1098 cn = self.__class__.__name__
1099 return f'{cn}({bbrepr(self.func)}, args={self.args!r}, kwargs={self.kwargs!r})'
1102def _is_spec(obj, strict=False):
1103 # a little util for codifying the spec type checking in glom
1104 if isinstance(obj, TType):
1105 return True
1106 if strict:
1107 return type(obj) is Spec
1109 return _has_callable_glomit(obj) # pragma: no cover
1112class Invoke:
1113 """Specifier type designed for easy invocation of callables from glom.
1115 Args:
1116 func (callable): A function or other callable object.
1118 ``Invoke`` is similar to :func:`functools.partial`, but with the
1119 ability to set up a "templated" call which interleaves constants and
1120 glom specs.
1122 For example, the following creates a spec which can be used to
1123 check if targets are integers:
1125 >>> is_int = Invoke(isinstance).specs(T).constants(int)
1126 >>> glom(5, is_int)
1127 True
1129 And this composes like any other glom spec:
1131 >>> target = [7, object(), 9]
1132 >>> glom(target, [is_int])
1133 [True, False, True]
1135 Another example, mixing positional and keyword arguments:
1137 >>> spec = Invoke(sorted).specs(T).constants(key=int, reverse=True)
1138 >>> target = ['10', '5', '20', '1']
1139 >>> glom(target, spec)
1140 ['20', '10', '5', '1']
1142 Invoke also helps with evaluating zero-argument functions:
1144 >>> glom(target={}, spec=Invoke(int))
1145 0
1147 (A trivial example, but from timestamps to UUIDs, zero-arg calls do come up!)
1149 .. note::
1151 ``Invoke`` is mostly for functions, object construction, and callable
1152 objects. For calling methods, consider the :attr:`~glom.T` object.
1154 """
1155 def __init__(self, func):
1156 if not callable(func) and not _is_spec(func, strict=True):
1157 raise TypeError('expected func to be a callable or Spec instance,'
1158 ' not: %r' % (func,))
1159 self.func = func
1160 self._args = ()
1161 # a registry of every known kwarg to its freshest value as set
1162 # by the methods below. the **kw dict is used as a unique marker.
1163 self._cur_kwargs = {}
1165 @classmethod
1166 def specfunc(cls, spec):
1167 """Creates an :class:`Invoke` instance where the function is
1168 indicated by a spec.
1170 >>> spec = Invoke.specfunc('func').constants(5)
1171 >>> glom({'func': range}, (spec, list))
1172 [0, 1, 2, 3, 4]
1174 """
1175 return cls(Spec(spec))
1177 def constants(self, *a, **kw):
1178 """Returns a new :class:`Invoke` spec, with the provided positional
1179 and keyword argument values stored for passing to the
1180 underlying function.
1182 >>> spec = Invoke(T).constants(5)
1183 >>> glom(range, (spec, list))
1184 [0, 1, 2, 3, 4]
1186 Subsequent positional arguments are appended:
1188 >>> spec = Invoke(T).constants(2).constants(10, 2)
1189 >>> glom(range, (spec, list))
1190 [2, 4, 6, 8]
1192 Keyword arguments also work as one might expect:
1194 >>> round_2 = Invoke(round).constants(ndigits=2).specs(T)
1195 >>> glom(3.14159, round_2)
1196 3.14
1198 :meth:`~Invoke.constants()` and other :class:`Invoke`
1199 methods may be called multiple times, just remember that every
1200 call returns a new spec.
1201 """
1202 ret = self.__class__(self.func)
1203 ret._args = self._args + ('C', a, kw)
1204 ret._cur_kwargs = dict(self._cur_kwargs)
1205 ret._cur_kwargs.update({k: kw for k, _ in kw.items()})
1206 return ret
1208 def specs(self, *a, **kw):
1209 """Returns a new :class:`Invoke` spec, with the provided positional
1210 and keyword arguments stored to be interpreted as specs, with
1211 the results passed to the underlying function.
1213 >>> spec = Invoke(range).specs('value')
1214 >>> glom({'value': 5}, (spec, list))
1215 [0, 1, 2, 3, 4]
1217 Subsequent positional arguments are appended:
1219 >>> spec = Invoke(range).specs('start').specs('end', 'step')
1220 >>> target = {'start': 2, 'end': 10, 'step': 2}
1221 >>> glom(target, (spec, list))
1222 [2, 4, 6, 8]
1224 Keyword arguments also work as one might expect:
1226 >>> multiply = lambda x, y: x * y
1227 >>> times_3 = Invoke(multiply).constants(y=3).specs(x='value')
1228 >>> glom({'value': 5}, times_3)
1229 15
1231 :meth:`~Invoke.specs()` and other :class:`Invoke`
1232 methods may be called multiple times, just remember that every
1233 call returns a new spec.
1235 """
1236 ret = self.__class__(self.func)
1237 ret._args = self._args + ('S', a, kw)
1238 ret._cur_kwargs = dict(self._cur_kwargs)
1239 ret._cur_kwargs.update({k: kw for k, _ in kw.items()})
1240 return ret
1242 def star(self, args=None, kwargs=None):
1243 """Returns a new :class:`Invoke` spec, with *args* and/or *kwargs*
1244 specs set to be "starred" or "star-starred" (respectively)
1246 >>> spec = Invoke(zip).star(args='lists')
1247 >>> target = {'lists': [[1, 2], [3, 4], [5, 6]]}
1248 >>> list(glom(target, spec))
1249 [(1, 3, 5), (2, 4, 6)]
1251 Args:
1252 args (spec): A spec to be evaluated and "starred" into the
1253 underlying function.
1254 kwargs (spec): A spec to be evaluated and "star-starred" into
1255 the underlying function.
1257 One or both of the above arguments should be set.
1259 The :meth:`~Invoke.star()`, like other :class:`Invoke`
1260 methods, may be called multiple times. The *args* and *kwargs*
1261 will be stacked in the order in which they are provided.
1262 """
1263 if args is None and kwargs is None:
1264 raise TypeError('expected one or both of args/kwargs to be passed')
1265 ret = self.__class__(self.func)
1266 ret._args = self._args + ('*', args, kwargs)
1267 ret._cur_kwargs = dict(self._cur_kwargs)
1268 return ret
1270 def __repr__(self):
1271 base_fname = self.__class__.__name__
1272 fname_map = {'C': 'constants', 'S': 'specs', '*': 'star'}
1273 if type(self.func) is Spec:
1274 base_fname += '.specfunc'
1275 args = (self.func.spec,)
1276 else:
1277 args = (self.func,)
1278 chunks = [format_invocation(base_fname, args, repr=bbrepr)]
1280 for i in range(len(self._args) // 3):
1281 op, args, _kwargs = self._args[i * 3: i * 3 + 3]
1282 fname = fname_map[op]
1283 if op in ('C', 'S'):
1284 kwargs = [(k, v) for k, v in _kwargs.items()
1285 if self._cur_kwargs[k] is _kwargs]
1286 else:
1287 kwargs = {}
1288 if args:
1289 kwargs['args'] = args
1290 if _kwargs:
1291 kwargs['kwargs'] = _kwargs
1292 args = ()
1294 chunks.append('.' + format_invocation(fname, args, kwargs, repr=bbrepr))
1296 return ''.join(chunks)
1298 def glomit(self, target, scope):
1299 all_args = []
1300 all_kwargs = {}
1302 recurse = lambda spec: scope[glom](target, spec, scope)
1303 func = recurse(self.func) if _is_spec(self.func, strict=True) else self.func
1305 for i in range(len(self._args) // 3):
1306 op, args, kwargs = self._args[i * 3: i * 3 + 3]
1307 if op == 'C':
1308 all_args.extend(args)
1309 all_kwargs.update({k: v for k, v in kwargs.items()
1310 if self._cur_kwargs[k] is kwargs})
1311 elif op == 'S':
1312 all_args.extend([recurse(arg) for arg in args])
1313 all_kwargs.update({k: recurse(v) for k, v in kwargs.items()
1314 if self._cur_kwargs[k] is kwargs})
1315 elif op == '*':
1316 if args is not None:
1317 all_args.extend(recurse(args))
1318 if kwargs is not None:
1319 all_kwargs.update(recurse(kwargs))
1321 return func(*all_args, **all_kwargs)
1324class Ref:
1325 """Name a part of a spec and refer to it elsewhere in the same spec,
1326 useful for trees and other self-similar data structures.
1328 Args:
1329 name (str): The name of the spec to reference.
1330 subspec: Pass a spec to name it *name*, or leave unset to refer
1331 to an already-named spec.
1332 """
1333 def __init__(self, name, subspec=_MISSING):
1334 self.name, self.subspec = name, subspec
1336 def glomit(self, target, scope):
1337 subspec = self.subspec
1338 scope_key = (Ref, self.name)
1339 if subspec is _MISSING:
1340 subspec = scope[scope_key]
1341 else:
1342 scope[scope_key] = subspec
1343 return scope[glom](target, subspec, scope)
1345 def __repr__(self):
1346 if self.subspec is _MISSING:
1347 args = bbrepr(self.name)
1348 else:
1349 args = bbrepr((self.name, self.subspec))[1:-1]
1350 return "Ref(" + args + ")"
1353class TType:
1354 """``T``, short for "target". A singleton object that enables
1355 object-oriented expression of a glom specification.
1357 .. note::
1359 ``T`` is a singleton, and does not need to be constructed.
1361 Basically, think of ``T`` as your data's stunt double. Everything
1362 that you do to ``T`` will be recorded and executed during the
1363 :func:`glom` call. Take this example:
1365 >>> spec = T['a']['b']['c']
1366 >>> target = {'a': {'b': {'c': 'd'}}}
1367 >>> glom(target, spec)
1368 'd'
1370 So far, we've relied on the ``'a.b.c'``-style shorthand for
1371 access, or used the :class:`~glom.Path` objects, but if you want
1372 to explicitly do attribute and key lookups, look no further than
1373 ``T``.
1375 But T doesn't stop with unambiguous access. You can also call
1376 methods and perform almost any action you would with a normal
1377 object:
1379 >>> spec = ('a', (T['b'].items(), list)) # reviewed below
1380 >>> glom(target, spec)
1381 [('c', 'd')]
1383 A ``T`` object can go anywhere in the spec. As seen in the example
1384 above, we access ``'a'``, use a ``T`` to get ``'b'`` and iterate
1385 over its ``items``, turning them into a ``list``.
1387 You can even use ``T`` with :class:`~glom.Call` to construct objects:
1389 >>> class ExampleClass(object):
1390 ... def __init__(self, attr):
1391 ... self.attr = attr
1392 ...
1393 >>> target = {'attr': 3.14}
1394 >>> glom(target, Call(ExampleClass, kwargs=T)).attr
1395 3.14
1397 On a further note, while ``lambda`` works great in glom specs, and
1398 can be very handy at times, ``T`` and :class:`~glom.Call`
1399 eliminate the need for the vast majority of ``lambda`` usage with
1400 glom.
1402 Unlike ``lambda`` and other functions, ``T`` roundtrips
1403 beautifully and transparently:
1405 >>> T['a'].b['c']('success')
1406 T['a'].b['c']('success')
1408 ``T``-related access errors raise a :exc:`~glom.PathAccessError`
1409 during the :func:`~glom.glom` call.
1411 .. note::
1413 While ``T`` is clearly useful, powerful, and here to stay, its
1414 semantics are still being refined. Currently, operations beyond
1415 method calls and attribute/item access are considered
1416 experimental and should not be relied upon.
1418 .. note::
1420 ``T`` attributes starting with __ are reserved to avoid
1421 colliding with many built-in Python behaviors, current and
1422 future. The ``T.__()`` method is available for cases where
1423 they are needed. For example, ``T.__('class__')`` is
1424 equivalent to accessing the ``__class__`` attribute.
1426 """
1427 __slots__ = ('__ops__',)
1429 def __getattr__(self, name):
1430 if name.startswith('__'):
1431 raise AttributeError('T instances reserve dunder attributes.'
1432 ' To access the "{name}" attribute, use'
1433 ' T.__("{d_name}")'.format(name=name, d_name=name[2:]))
1434 return _t_child(self, '.', name)
1436 def __getitem__(self, item):
1437 return _t_child(self, '[', item)
1439 def __call__(self, *args, **kwargs):
1440 if self is S:
1441 if args:
1442 raise TypeError(f'S() takes no positional arguments, got: {args!r}')
1443 if not kwargs:
1444 raise TypeError('S() expected at least one kwarg, got none')
1445 # TODO: typecheck kwarg vals?
1446 return _t_child(self, '(', (args, kwargs))
1448 def __star__(self):
1449 return _t_child(self, 'x', None)
1451 def __starstar__(self):
1452 return _t_child(self, 'X', None)
1454 def __stars__(self):
1455 """how many times the result will be wrapped in extra lists"""
1456 t_ops = self.__ops__[1::2]
1457 return t_ops.count('x') + t_ops.count('X')
1459 def __add__(self, arg):
1460 return _t_child(self, '+', arg)
1462 def __sub__(self, arg):
1463 return _t_child(self, '-', arg)
1465 def __mul__(self, arg):
1466 return _t_child(self, '*', arg)
1468 def __floordiv__(self, arg):
1469 return _t_child(self, '#', arg)
1471 def __truediv__(self, arg):
1472 return _t_child(self, '/', arg)
1474 __div__ = __truediv__
1476 def __mod__(self, arg):
1477 return _t_child(self, '%', arg)
1479 def __pow__(self, arg):
1480 return _t_child(self, ':', arg)
1482 def __and__(self, arg):
1483 return _t_child(self, '&', arg)
1485 def __or__(self, arg):
1486 return _t_child(self, '|', arg)
1488 def __xor__(self, arg):
1489 return _t_child(self, '^', arg)
1491 def __invert__(self):
1492 return _t_child(self, '~', None)
1494 def __neg__(self):
1495 return _t_child(self, '_', None)
1497 def __(self, name):
1498 return _t_child(self, '.', '__' + name)
1500 def __repr__(self):
1501 t_path = self.__ops__
1502 return _format_t(t_path[1:], t_path[0])
1504 def __getstate__(self):
1505 t_path = self.__ops__
1506 return tuple(({T: 'T', S: 'S', A: 'A'}[t_path[0]],) + t_path[1:])
1508 def __setstate__(self, state):
1509 self.__ops__ = ({'T': T, 'S': S, 'A': A}[state[0]],) + state[1:]
1512def _t_child(parent, operation, arg):
1513 base = parent.__ops__
1514 if base[0] is A and operation not in ('.', '[', 'P'):
1515 # whitelist rather than blacklist assignment friendly operations
1516 # TODO: error type?
1517 raise BadSpec("operation not allowed on A assignment path")
1518 t = TType()
1519 t.__ops__ = base + (operation, arg)
1520 return t
1523def _s_first_magic(scope, key, _t):
1524 """
1525 enable S.a to do S['a'] or S['a'].val as a special
1526 case for accessing user defined string variables
1527 """
1528 err = None
1529 try:
1530 cur = scope[key]
1531 except KeyError as e:
1532 err = PathAccessError(e, Path(_t), 0) # always only one level depth, hence 0
1533 if err:
1534 raise err
1535 return cur
1538def _t_eval(target, _t, scope):
1539 t_path = _t.__ops__
1540 i = 1
1541 fetch_till = len(t_path)
1542 root = t_path[0]
1543 if root is T:
1544 cur = target
1545 elif root is S or root is A:
1546 # A is basically the same as S, but last step is assign
1547 if root is A:
1548 fetch_till -= 2
1549 if fetch_till < 1:
1550 raise BadSpec('cannot assign without destination')
1551 cur = scope
1552 if fetch_till > 1 and t_path[1] in ('.', 'P'):
1553 cur = _s_first_magic(cur, t_path[2], _t)
1554 i += 2
1555 elif root is S and fetch_till > 1 and t_path[1] == '(':
1556 # S(var='spec') style assignment
1557 _, kwargs = t_path[2]
1558 scope.update({
1559 k: arg_val(target, v, scope) for k, v in kwargs.items()})
1560 return target
1562 else:
1563 raise ValueError('TType instance with invalid root') # pragma: no cover
1564 pae = None
1565 while i < fetch_till:
1566 op, arg = t_path[i], t_path[i + 1]
1567 arg = arg_val(target, arg, scope)
1568 if op == '.':
1569 try:
1570 cur = getattr(cur, arg)
1571 except AttributeError as e:
1572 pae = PathAccessError(e, Path(_t), i // 2)
1573 elif op == '[':
1574 try:
1575 cur = cur[arg]
1576 except (KeyError, IndexError, TypeError) as e:
1577 pae = PathAccessError(e, Path(_t), i // 2)
1578 elif op == 'P':
1579 # Path type stuff (fuzzy match)
1580 get = scope[TargetRegistry].get_handler('get', cur, path=t_path[2:i+2:2])
1581 try:
1582 cur = get(cur, arg)
1583 except Exception as e:
1584 pae = PathAccessError(e, Path(_t), i // 2)
1585 elif op in 'xX':
1586 nxt = []
1587 get_handler = scope[TargetRegistry].get_handler
1588 if op == 'x': # increases arity of cur each time through
1589 # TODO: so many try/except -- could scope[TargetRegistry] stuff be cached on type?
1590 _extend_children(nxt, cur, get_handler)
1591 elif op == 'X':
1592 sofar = set()
1593 _extend_children(nxt, cur, get_handler)
1594 for item in nxt:
1595 if id(item) not in sofar:
1596 sofar.add(id(item))
1597 _extend_children(nxt, item, get_handler)
1598 nxt.insert(0, cur)
1599 # handle the rest of the t_path in recursive calls
1600 cur = []
1601 todo = TType()
1602 todo.__ops__ = (root,) + t_path[i+2:]
1603 for child in nxt:
1604 try:
1605 cur.append(_t_eval(child, todo, scope))
1606 except PathAccessError:
1607 pass
1608 break # we handled the rest in recursive call, break loop
1609 elif op == '(':
1610 args, kwargs = arg
1611 scope[Path] += t_path[2:i+2:2]
1612 cur = scope[glom](
1613 target, Call(cur, args, kwargs), scope)
1614 # call with target rather than cur,
1615 # because it is probably more intuitive
1616 # if args to the call "reset" their path
1617 # e.g. "T.a" should mean the same thing
1618 # in both of these specs: T.a and T.b(T.a)
1619 else: # arithmetic operators
1620 try:
1621 if op == '+':
1622 cur = cur + arg
1623 elif op == '-':
1624 cur = cur - arg
1625 elif op == '*':
1626 cur = cur * arg
1627 #elif op == '#':
1628 # cur = cur // arg # TODO: python 2 friendly approach?
1629 elif op == '/':
1630 cur = cur / arg
1631 elif op == '%':
1632 cur = cur % arg
1633 elif op == ':':
1634 cur = cur ** arg
1635 elif op == '&':
1636 cur = cur & arg
1637 elif op == '|':
1638 cur = cur | arg
1639 elif op == '^':
1640 cur = cur ^ arg
1641 elif op == '~':
1642 cur = ~cur
1643 elif op == '_':
1644 cur = -cur
1645 except (TypeError, ZeroDivisionError) as e:
1646 pae = PathAccessError(e, Path(_t), i // 2)
1647 if pae:
1648 raise pae
1649 i += 2
1650 if root is A:
1651 op, arg = t_path[-2:]
1652 if cur is scope:
1653 op = '[' # all assignment on scope is setitem
1654 _assign_op(dest=cur, op=op, arg=arg, val=target, path=_t, scope=scope)
1655 return target # A should not change the target
1656 return cur
1659def _assign_op(dest, op, arg, val, path, scope):
1660 """helper method for doing the assignment on a T operation"""
1661 if op == '[':
1662 dest[arg] = val
1663 elif op == '.':
1664 setattr(dest, arg, val)
1665 elif op == 'P':
1666 _assign = scope[TargetRegistry].get_handler('assign', dest)
1667 try:
1668 _assign(dest, arg, val)
1669 except Exception as e:
1670 raise PathAssignError(e, path, arg)
1671 else: # pragma: no cover
1672 raise ValueError('unsupported T operation for assignment')
1675def _extend_children(children, item, get_handler):
1676 try: # dict or obj-like
1677 keys = get_handler('keys', item)
1678 get = get_handler('get', item)
1679 except UnregisteredTarget:
1680 try:
1681 iterate = get_handler('iterate', item)
1682 except UnregisteredTarget:
1683 pass
1684 else:
1685 try: # list-like
1686 children.extend(iterate(item))
1687 except Exception:
1688 pass
1689 else:
1690 try:
1691 for key in keys(item):
1692 try:
1693 children.append(get(item, key))
1694 except Exception:
1695 pass
1696 except Exception:
1697 pass
1700T = TType() # target aka Mr. T aka "this"
1701S = TType() # like T, but means grab stuff from Scope, not Target
1702A = TType() # like S, but shorthand to assign target to scope
1704T.__ops__ = (T,)
1705S.__ops__ = (S,)
1706A.__ops__ = (A,)
1708_T_STAR = T.__star__() # helper constant for Path.from_text
1709_T_STARSTAR = T.__starstar__() # helper constant for Path.from_text
1711UP = make_sentinel('UP')
1712ROOT = make_sentinel('ROOT')
1715def _format_slice(x):
1716 if type(x) is not slice:
1717 return bbrepr(x)
1718 fmt = lambda v: "" if v is None else bbrepr(v)
1719 if x.step is None:
1720 return fmt(x.start) + ":" + fmt(x.stop)
1721 return fmt(x.start) + ":" + fmt(x.stop) + ":" + fmt(x.step)
1724def _format_t(path, root=T):
1725 prepr = [{T: 'T', S: 'S', A: 'A'}[root]]
1726 i = 0
1727 while i < len(path):
1728 op, arg = path[i], path[i + 1]
1729 if op == '.':
1730 prepr.append('.' + arg)
1731 elif op == '[':
1732 if type(arg) is tuple:
1733 index = ", ".join([_format_slice(x) for x in arg])
1734 else:
1735 index = _format_slice(arg)
1736 prepr.append(f"[{index}]")
1737 elif op == '(':
1738 args, kwargs = arg
1739 prepr.append(format_invocation(args=args, kwargs=kwargs, repr=bbrepr))
1740 elif op == 'P':
1741 return _format_path(path)
1742 elif op == 'x':
1743 prepr.append(".__star__()")
1744 elif op == 'X':
1745 prepr.append(".__starstar__()")
1746 elif op in ('_', '~'): # unary arithmetic operators
1747 if any([o in path[:i] for o in '+-/%:&|^~_']):
1748 prepr = ['('] + prepr + [')']
1749 prepr = ['-' if op == '_' else op] + prepr
1750 else: # binary arithmetic operators
1751 formatted_arg = bbrepr(arg)
1752 if type(arg) is TType:
1753 arg_path = arg.__ops__
1754 if any([o in arg_path for o in '+-/%:&|^~_']):
1755 formatted_arg = '(' + formatted_arg + ')'
1756 prepr.append(' ' + ('**' if op == ':' else op) + ' ')
1757 prepr.append(formatted_arg)
1758 i += 2
1759 return "".join(prepr)
1762class Val:
1763 """Val objects are specs which evaluate to the wrapped *value*.
1765 >>> target = {'a': {'b': 'c'}}
1766 >>> spec = {'a': 'a.b', 'readability': Val('counts')}
1767 >>> pprint(glom(target, spec))
1768 {'a': 'c', 'readability': 'counts'}
1770 Instead of accessing ``'counts'`` as a key like it did with
1771 ``'a.b'``, :func:`~glom.glom` just unwrapped the Val and
1772 included the value.
1774 :class:`~glom.Val` takes one argument, the value to be returned.
1776 .. note::
1778 :class:`Val` was named ``Literal`` in versions of glom before
1779 20.7.0. An alias has been preserved for backwards
1780 compatibility, but reprs have changed.
1782 """
1783 def __init__(self, value):
1784 self.value = value
1786 def glomit(self, target, scope):
1787 return self.value
1789 def __repr__(self):
1790 cn = self.__class__.__name__
1791 return f'{cn}({bbrepr(self.value)})'
1794Literal = Val # backwards compat for pre-20.7.0
1797class ScopeVars:
1798 """This is the runtime partner of :class:`Vars` -- this is what
1799 actually lives in the scope and stores runtime values.
1801 While not part of the importable API of glom, it's half expected
1802 that some folks may write sepcs to populate and export scopes, at
1803 which point this type makes it easy to access values by attribute
1804 access or by converting to a dict.
1806 """
1807 def __init__(self, base, defaults):
1808 self.__dict__ = dict(base)
1809 self.__dict__.update(defaults)
1811 def __iter__(self):
1812 return iter(self.__dict__.items())
1814 def __repr__(self):
1815 return f"{self.__class__.__name__}({bbrepr(self.__dict__)})"
1818class Vars:
1819 """
1820 :class:`Vars` is a helper that can be used with **S** in order to
1821 store shared mutable state.
1823 Takes the same arguments as :class:`dict()`.
1825 Arguments here should be thought of the same way as default arguments
1826 to a function. Each time the spec is evaluated, the same arguments
1827 will be referenced; so, think carefully about mutable data structures.
1828 """
1829 def __init__(self, base=(), **kw):
1830 dict(base) # ensure it is a dict-compatible first arg
1831 self.base = base
1832 self.defaults = kw
1834 def glomit(self, target, spec):
1835 return ScopeVars(self.base, self.defaults)
1837 def __repr__(self):
1838 ret = format_invocation(self.__class__.__name__,
1839 args=(self.base,) if self.base else (),
1840 kwargs=self.defaults,
1841 repr=bbrepr)
1842 return ret
1845class Let:
1846 """
1847 Deprecated, kept for backwards compat. Use S(x='y') instead.
1849 >>> target = {'data': {'val': 9}}
1850 >>> spec = (Let(value=T['data']['val']), {'val': S['value']})
1851 >>> glom(target, spec)
1852 {'val': 9}
1854 """
1855 def __init__(self, **kw):
1856 if not kw:
1857 raise TypeError('expected at least one keyword argument')
1858 self._binding = kw
1860 def glomit(self, target, scope):
1861 scope.update({
1862 k: scope[glom](target, v, scope) for k, v in self._binding.items()})
1863 return target
1865 def __repr__(self):
1866 cn = self.__class__.__name__
1867 return format_invocation(cn, kwargs=self._binding, repr=bbrepr)
1870class Auto:
1871 """
1872 Switch to Auto mode (the default)
1874 TODO: this seems like it should be a sub-class of class Spec() --
1875 if Spec() could help define the interface for new "modes" or dialects
1876 that would also help make match mode feel less duct-taped on
1877 """
1878 def __init__(self, spec=None):
1879 self.spec = spec
1881 def glomit(self, target, scope):
1882 scope[MODE] = AUTO
1883 return scope[glom](target, self.spec, scope)
1885 def __repr__(self):
1886 cn = self.__class__.__name__
1887 rpr = '' if self.spec is None else bbrepr(self.spec)
1888 return f'{cn}({rpr})'
1891class _AbstractIterable(_AbstractIterableBase):
1892 __metaclass__ = ABCMeta
1893 @classmethod
1894 def __subclasshook__(cls, C):
1895 if C in (str, bytes):
1896 return False
1897 return callable(getattr(C, "__iter__", None))
1900class _ObjStyleKeysMeta(type):
1901 def __instancecheck__(cls, C):
1902 return hasattr(C, "__dict__") and hasattr(C.__dict__, "keys")
1905class _ObjStyleKeys(_ObjStyleKeysMeta('_AbstractKeys', (object,), {})):
1906 __metaclass__ = _ObjStyleKeysMeta
1908 @staticmethod
1909 def get_keys(obj):
1910 ret = obj.__dict__.keys()
1911 return ret
1914def _get_sequence_item(target, index):
1915 return target[int(index)]
1918# handlers are 3-arg callables, with args (spec, target, scope)
1919# spec is the first argument for convenience in the case
1920# that the handler is a method of the spec type
1921def _handle_dict(target, spec, scope):
1922 ret = type(spec)() # TODO: works for dict + ordereddict, but sufficient for all?
1923 for field, subspec in spec.items():
1924 val = scope[glom](target, subspec, scope)
1925 if val is SKIP:
1926 continue
1927 if type(field) in (Spec, TType):
1928 field = scope[glom](target, field, scope)
1929 ret[field] = val
1930 return ret
1933def _handle_list(target, spec, scope):
1934 subspec = spec[0]
1935 iterate = scope[TargetRegistry].get_handler('iterate', target, path=scope[Path])
1936 try:
1937 iterator = iterate(target)
1938 except Exception as e:
1939 raise TypeError('failed to iterate on instance of type %r at %r (got %r)'
1940 % (target.__class__.__name__, Path(*scope[Path]), e))
1941 ret = []
1942 base_path = scope[Path]
1943 for i, t in enumerate(iterator):
1944 scope[Path] = base_path + [i]
1945 val = scope[glom](t, subspec, scope)
1946 if val is SKIP:
1947 continue
1948 if val is STOP:
1949 break
1950 ret.append(val)
1951 return ret
1954def _handle_tuple(target, spec, scope):
1955 res = target
1956 for subspec in spec:
1957 scope = chain_child(scope)
1958 nxt = scope[glom](res, subspec, scope)
1959 if nxt is SKIP:
1960 continue
1961 if nxt is STOP:
1962 break
1963 res = nxt
1964 if not isinstance(subspec, list):
1965 scope[Path] += [getattr(subspec, '__name__', subspec)]
1966 return res
1969class Pipe:
1970 """Evaluate specs one after the other, passing the result of
1971 the previous evaluation in as the target of the next spec:
1973 >>> glom({'a': {'b': -5}}, Pipe('a', 'b', abs))
1974 5
1976 Same behavior as ``Auto(tuple(steps))``, but useful for explicit
1977 usage in other modes.
1978 """
1979 def __init__(self, *steps):
1980 self.steps = steps
1982 def glomit(self, target, scope):
1983 return _handle_tuple(target, self.steps, scope)
1985 def __repr__(self):
1986 return self.__class__.__name__ + bbrepr(self.steps)
1989class TargetRegistry:
1990 '''
1991 responsible for registration of target types for iteration
1992 and attribute walking
1993 '''
1994 def __init__(self, register_default_types=True):
1995 self._op_type_map = {}
1996 self._op_type_tree = {} # see _register_fuzzy_type for details
1997 self._type_cache = {}
1999 self._op_auto_map = OrderedDict() # op name to function that returns handler function
2001 self._register_builtin_ops()
2003 if register_default_types:
2004 self._register_default_types()
2005 return
2007 def get_handler(self, op, obj, path=None, raise_exc=True):
2008 """for an operation and object **instance**, obj, return the
2009 closest-matching handler function, raising UnregisteredTarget
2010 if no handler can be found for *obj* (or False if
2011 raise_exc=False)
2013 """
2014 ret = False
2015 obj_type = type(obj)
2016 cache_key = (obj_type, op)
2017 if cache_key not in self._type_cache:
2018 type_map = self.get_type_map(op)
2019 if type_map:
2020 try:
2021 ret = type_map[obj_type]
2022 except KeyError:
2023 type_tree = self._op_type_tree.get(op, {})
2024 closest = self._get_closest_type(obj, type_tree=type_tree)
2025 if closest is None:
2026 ret = False
2027 else:
2028 ret = type_map[closest]
2030 if ret is False and raise_exc:
2031 raise UnregisteredTarget(op, obj_type, type_map=type_map, path=path)
2033 self._type_cache[cache_key] = ret
2034 return self._type_cache[cache_key]
2036 def get_type_map(self, op):
2037 try:
2038 return self._op_type_map[op]
2039 except KeyError:
2040 return OrderedDict()
2042 def _get_closest_type(self, obj, type_tree):
2043 default = None
2044 for cur_type, sub_tree in type_tree.items():
2045 if isinstance(obj, cur_type):
2046 sub_type = self._get_closest_type(obj, type_tree=sub_tree)
2047 ret = cur_type if sub_type is None else sub_type
2048 return ret
2049 return default
2051 def _register_default_types(self):
2052 self.register(object)
2053 self.register(dict, get=operator.getitem)
2054 self.register(dict, keys=dict.keys)
2055 self.register(list, get=_get_sequence_item)
2056 self.register(tuple, get=_get_sequence_item)
2057 self.register(OrderedDict, get=operator.getitem)
2058 self.register(OrderedDict, keys=OrderedDict.keys)
2059 self.register(_AbstractIterable, iterate=iter)
2060 self.register(_ObjStyleKeys, keys=_ObjStyleKeys.get_keys)
2062 def _register_fuzzy_type(self, op, new_type, _type_tree=None):
2063 """Build a "type tree", an OrderedDict mapping registered types to
2064 their subtypes
2066 The type tree's invariant is that a key in the mapping is a
2067 valid parent type of all its children.
2069 Order is preserved such that non-overlapping parts of the
2070 subtree take precedence by which was most recently added.
2071 """
2072 if _type_tree is None:
2073 try:
2074 _type_tree = self._op_type_tree[op]
2075 except KeyError:
2076 _type_tree = self._op_type_tree[op] = OrderedDict()
2078 registered = False
2079 for cur_type, sub_tree in list(_type_tree.items()):
2080 if issubclass(cur_type, new_type):
2081 sub_tree = _type_tree.pop(cur_type) # mutation for recursion brevity
2082 try:
2083 _type_tree[new_type][cur_type] = sub_tree
2084 except KeyError:
2085 _type_tree[new_type] = OrderedDict({cur_type: sub_tree})
2086 registered = True
2087 elif issubclass(new_type, cur_type):
2088 _type_tree[cur_type] = self._register_fuzzy_type(op, new_type, _type_tree=sub_tree)
2089 registered = True
2090 if not registered:
2091 _type_tree[new_type] = OrderedDict()
2092 return _type_tree
2094 def register(self, target_type, **kwargs):
2095 if not isinstance(target_type, type):
2096 raise TypeError(f'register expected a type, not an instance: {target_type!r}')
2097 exact = kwargs.pop('exact', None)
2098 new_op_map = dict(kwargs)
2100 for op_name in sorted(set(self._op_auto_map.keys()) | set(new_op_map.keys())):
2101 cur_type_map = self._op_type_map.setdefault(op_name, OrderedDict())
2103 if op_name in new_op_map:
2104 handler = new_op_map[op_name]
2105 elif target_type in cur_type_map:
2106 handler = cur_type_map[target_type]
2107 else:
2108 try:
2109 handler = self._op_auto_map[op_name](target_type)
2110 except Exception as e:
2111 raise TypeError('error while determining support for operation'
2112 ' "%s" on target type: %s (got %r)'
2113 % (op_name, target_type.__name__, e))
2114 if handler is not False and not callable(handler):
2115 raise TypeError('expected handler for op "%s" to be'
2116 ' callable or False, not: %r' % (op_name, handler))
2117 new_op_map[op_name] = handler
2119 for op_name, handler in new_op_map.items():
2120 self._op_type_map[op_name][target_type] = handler
2122 if not exact:
2123 for op_name in new_op_map:
2124 self._register_fuzzy_type(op_name, target_type)
2126 self._type_cache = {} # reset type cache
2128 return
2130 def register_op(self, op_name, auto_func=None, exact=False):
2131 """add operations beyond the builtins ('get' and 'iterate' at the time
2132 of writing).
2134 auto_func is a function that when passed a type, returns a
2135 handler associated with op_name if it's supported, or False if
2136 it's not.
2138 See glom.core.register_op() for the global version used by
2139 extensions.
2140 """
2141 if not isinstance(op_name, basestring):
2142 raise TypeError(f'expected op_name to be a text name, not: {op_name!r}')
2143 if auto_func is None:
2144 auto_func = lambda t: False
2145 elif not callable(auto_func):
2146 raise TypeError(f'expected auto_func to be callable, not: {auto_func!r}')
2148 # determine support for any previously known types
2149 known_types = set(sum([list(m.keys()) for m
2150 in self._op_type_map.values()], []))
2151 type_map = self._op_type_map.get(op_name, OrderedDict())
2152 type_tree = self._op_type_tree.get(op_name, OrderedDict())
2153 for t in sorted(known_types, key=lambda t: t.__name__):
2154 if t in type_map:
2155 continue
2156 try:
2157 handler = auto_func(t)
2158 except Exception as e:
2159 raise TypeError('error while determining support for operation'
2160 ' "%s" on target type: %s (got %r)'
2161 % (op_name, t.__name__, e))
2162 if handler is not False and not callable(handler):
2163 raise TypeError('expected handler for op "%s" to be'
2164 ' callable or False, not: %r' % (op_name, handler))
2165 type_map[t] = handler
2167 if not exact:
2168 for t in known_types:
2169 self._register_fuzzy_type(op_name, t, _type_tree=type_tree)
2171 self._op_type_map[op_name] = type_map
2172 self._op_type_tree[op_name] = type_tree
2173 self._op_auto_map[op_name] = auto_func
2175 def _register_builtin_ops(self):
2176 def _get_iterable_handler(type_obj):
2177 return iter if callable(getattr(type_obj, '__iter__', None)) else False
2179 self.register_op('iterate', _get_iterable_handler)
2180 self.register_op('get', lambda _: getattr)
2183_DEFAULT_SCOPE = ChainMap({})
2186def glom(target, spec, **kwargs):
2187 """Access or construct a value from a given *target* based on the
2188 specification declared by *spec*.
2190 Accessing nested data, aka deep-get:
2192 >>> target = {'a': {'b': 'c'}}
2193 >>> glom(target, 'a.b')
2194 'c'
2196 Here the *spec* was just a string denoting a path,
2197 ``'a.b'``. As simple as it should be. You can also use
2198 :mod:`glob`-like wildcard selectors:
2200 >>> target = {'a': [{'k': 'v1'}, {'k': 'v2'}]}
2201 >>> glom(target, 'a.*.k')
2202 ['v1', 'v2']
2204 In addition to ``*``, you can also use ``**`` for recursive access:
2206 >>> target = {'a': [{'k': 'v3'}, {'k': 'v4'}], 'k': 'v0'}
2207 >>> glom(target, '**.k')
2208 ['v0', 'v3', 'v4']
2210 The next example shows how to use nested data to
2211 access many fields at once, and make a new nested structure.
2213 Constructing, or restructuring more-complicated nested data:
2215 >>> target = {'a': {'b': 'c', 'd': 'e'}, 'f': 'g', 'h': [0, 1, 2]}
2216 >>> spec = {'a': 'a.b', 'd': 'a.d', 'h': ('h', [lambda x: x * 2])}
2217 >>> output = glom(target, spec)
2218 >>> pprint(output)
2219 {'a': 'c', 'd': 'e', 'h': [0, 2, 4]}
2221 ``glom`` also takes a keyword-argument, *default*. When set,
2222 if a ``glom`` operation fails with a :exc:`GlomError`, the
2223 *default* will be returned, very much like
2224 :meth:`dict.get()`:
2226 >>> glom(target, 'a.xx', default='nada')
2227 'nada'
2229 The *skip_exc* keyword argument controls which errors should
2230 be ignored.
2232 >>> glom({}, lambda x: 100.0 / len(x), default=0.0, skip_exc=ZeroDivisionError)
2233 0.0
2235 Args:
2236 target (object): the object on which the glom will operate.
2237 spec (object): Specification of the output object in the form
2238 of a dict, list, tuple, string, other glom construct, or
2239 any composition of these.
2240 default (object): An optional default to return in the case
2241 an exception, specified by *skip_exc*, is raised.
2242 skip_exc (Exception): An optional exception or tuple of
2243 exceptions to ignore and return *default* (None if
2244 omitted). If *skip_exc* and *default* are both not set,
2245 glom raises errors through.
2246 scope (dict): Additional data that can be accessed
2247 via S inside the glom-spec. Read more: :ref:`scope`.
2249 It's a small API with big functionality, and glom's power is
2250 only surpassed by its intuitiveness. Give it a whirl!
2252 """
2253 # TODO: check spec up front
2254 default = kwargs.pop('default', None if 'skip_exc' in kwargs else _MISSING)
2255 skip_exc = kwargs.pop('skip_exc', () if default is _MISSING else GlomError)
2256 glom_debug = kwargs.pop('glom_debug', GLOM_DEBUG)
2257 scope = _DEFAULT_SCOPE.new_child({
2258 Path: kwargs.pop('path', []),
2259 Inspect: kwargs.pop('inspector', None),
2260 MODE: AUTO,
2261 MIN_MODE: None,
2262 CHILD_ERRORS: [],
2263 'globals': ScopeVars({}, {}),
2264 })
2265 scope[UP] = scope
2266 scope[ROOT] = scope
2267 scope[T] = target
2268 scope.update(kwargs.pop('scope', {}))
2269 err = None
2270 if kwargs:
2271 raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys()))
2272 try:
2273 try:
2274 ret = _glom(target, spec, scope)
2275 except skip_exc:
2276 if default is _MISSING:
2277 raise
2278 ret = default # should this also be arg_val'd?
2279 except Exception as e:
2280 if glom_debug:
2281 raise
2282 if isinstance(e, GlomError):
2283 # need to change id or else py3 seems to not let us truncate the
2284 # stack trace with the explicit "raise err" below
2285 err = copy.copy(e)
2286 err._set_wrapped(e)
2287 else:
2288 err = GlomError.wrap(e)
2289 if isinstance(err, GlomError):
2290 err._finalize(scope[LAST_CHILD_SCOPE])
2291 else: # wrapping failed, fall back to default behavior
2292 raise
2294 if err:
2295 raise err
2296 return ret
2299def chain_child(scope):
2300 """
2301 used for specs like Auto(tuple), Switch(), etc
2302 that want to chain their child scopes together
2304 returns a new scope that can be passed to
2305 the next recursive glom call, e.g.
2307 scope[glom](target, spec, chain_child(scope))
2308 """
2309 if LAST_CHILD_SCOPE not in scope.maps[0]:
2310 return scope # no children yet, nothing to do
2311 # NOTE: an option here is to drill down on LAST_CHILD_SCOPE;
2312 # this would have some interesting consequences for scoping
2313 # of tuples
2314 nxt_in_chain = scope[LAST_CHILD_SCOPE]
2315 nxt_in_chain.maps[0][NO_PYFRAME] = True
2316 # previous failed branches are forgiven as the
2317 # scope is re-wired into a new stack
2318 del nxt_in_chain.maps[0][CHILD_ERRORS][:]
2319 return nxt_in_chain
2322unbound_methods = {type(str.__len__)} #, type(Ref.glomit)])
2325def _has_callable_glomit(obj):
2326 glomit = getattr(obj, 'glomit', None)
2327 return callable(glomit) and not isinstance(obj, type)
2330def _glom(target, spec, scope):
2331 parent = scope
2332 pmap = parent.maps[0]
2333 scope = scope.new_child({
2334 T: target,
2335 Spec: spec,
2336 UP: parent,
2337 CHILD_ERRORS: [],
2338 MODE: pmap[MODE],
2339 MIN_MODE: pmap[MIN_MODE],
2340 })
2341 pmap[LAST_CHILD_SCOPE] = scope
2343 try:
2344 if type(spec) is TType: # must go first, due to callability
2345 scope[MIN_MODE] = None # None is tombstone
2346 return _t_eval(target, spec, scope)
2347 elif _has_callable_glomit(spec):
2348 scope[MIN_MODE] = None
2349 return spec.glomit(target, scope)
2351 return (scope.maps[0][MIN_MODE] or scope.maps[0][MODE])(target, spec, scope)
2352 except Exception as e:
2353 scope.maps[1][CHILD_ERRORS].append(scope)
2354 scope.maps[0][CUR_ERROR] = e
2355 if NO_PYFRAME in scope.maps[1]:
2356 cur_scope = scope[UP]
2357 while NO_PYFRAME in cur_scope.maps[0]:
2358 cur_scope.maps[1][CHILD_ERRORS].append(cur_scope)
2359 cur_scope.maps[0][CUR_ERROR] = e
2360 cur_scope = cur_scope[UP]
2361 raise
2364def AUTO(target, spec, scope):
2365 if type(spec) is str: # shortcut to make deep-get use case faster
2366 return _t_eval(target, Path.from_text(spec).path_t, scope)
2367 if isinstance(spec, dict):
2368 return _handle_dict(target, spec, scope)
2369 elif isinstance(spec, list):
2370 return _handle_list(target, spec, scope)
2371 elif isinstance(spec, tuple):
2372 return _handle_tuple(target, spec, scope)
2373 elif isinstance(spec, basestring):
2374 return Path.from_text(spec).glomit(target, scope)
2375 elif callable(spec):
2376 return spec(target)
2378 raise TypeError('expected spec to be dict, list, tuple, callable, string,'
2379 ' or other Spec-like type, not: %r' % (spec,))
2382_DEFAULT_SCOPE.update({
2383 glom: _glom,
2384 TargetRegistry: TargetRegistry(register_default_types=True),
2385})
2388def register(target_type, **kwargs):
2389 """Register *target_type* so :meth:`~Glommer.glom()` will
2390 know how to handle instances of that type as targets.
2392 Here's an example of adding basic iterabile support for Django's ORM:
2394 .. code-block:: python
2396 import glom
2397 import django.db.models
2399 glom.register(django.db.models.Manager, iterate=lambda m: m.all())
2400 glom.register(django.db.models.QuerySet, iterate=lambda qs: qs.all())
2404 Args:
2405 target_type (type): A type expected to appear in a glom()
2406 call target
2407 get (callable): A function which takes a target object and
2408 a name, acting as a default accessor. Defaults to
2409 :func:`getattr`.
2410 iterate (callable): A function which takes a target object
2411 and returns an iterator. Defaults to :func:`iter` if
2412 *target_type* appears to be iterable.
2413 exact (bool): Whether or not to match instances of subtypes
2414 of *target_type*.
2416 .. note::
2418 The module-level :func:`register()` function affects the
2419 module-level :func:`glom()` function's behavior. If this
2420 global effect is undesirable for your application, or
2421 you're implementing a library, consider instantiating a
2422 :class:`Glommer` instance, and using the
2423 :meth:`~Glommer.register()` and :meth:`Glommer.glom()`
2424 methods instead.
2426 """
2427 _DEFAULT_SCOPE[TargetRegistry].register(target_type, **kwargs)
2428 return
2431def register_op(op_name, **kwargs):
2432 """For extension authors needing to add operations beyond the builtin
2433 'get', 'iterate', 'keys', 'assign', and 'delete' to the default scope.
2434 See TargetRegistry for more details.
2435 """
2436 _DEFAULT_SCOPE[TargetRegistry].register_op(op_name, **kwargs)
2437 return
2440class Glommer:
2441 """The :class:`Glommer` type mostly serves to encapsulate type
2442 registration context so that advanced uses of glom don't need to
2443 worry about stepping on each other.
2445 Glommer objects are lightweight and, once instantiated, provide
2446 a :func:`glom()` method:
2448 >>> glommer = Glommer()
2449 >>> glommer.glom({}, 'a.b.c', default='d')
2450 'd'
2451 >>> Glommer().glom({'vals': list(range(3))}, ('vals', len))
2452 3
2454 Instances also provide :meth:`~Glommer.register()` method for
2455 localized control over type handling.
2457 Args:
2458 register_default_types (bool): Whether or not to enable the
2459 handling behaviors of the default :func:`glom()`. These
2460 default actions include dict access, list and iterable
2461 iteration, and generic object attribute access. Defaults to
2462 True.
2464 """
2465 def __init__(self, **kwargs):
2466 register_default_types = kwargs.pop('register_default_types', True)
2467 scope = kwargs.pop('scope', _DEFAULT_SCOPE)
2469 # this "freezes" the scope in at the time of construction
2470 self.scope = ChainMap(dict(scope))
2471 self.scope[TargetRegistry] = TargetRegistry(register_default_types=register_default_types)
2473 def register(self, target_type, **kwargs):
2474 """Register *target_type* so :meth:`~Glommer.glom()` will
2475 know how to handle instances of that type as targets.
2477 Args:
2478 target_type (type): A type expected to appear in a glom()
2479 call target
2480 get (callable): A function which takes a target object and
2481 a name, acting as a default accessor. Defaults to
2482 :func:`getattr`.
2483 iterate (callable): A function which takes a target object
2484 and returns an iterator. Defaults to :func:`iter` if
2485 *target_type* appears to be iterable.
2486 exact (bool): Whether or not to match instances of subtypes
2487 of *target_type*.
2489 .. note::
2491 The module-level :func:`register()` function affects the
2492 module-level :func:`glom()` function's behavior. If this
2493 global effect is undesirable for your application, or
2494 you're implementing a library, consider instantiating a
2495 :class:`Glommer` instance, and using the
2496 :meth:`~Glommer.register()` and :meth:`Glommer.glom()`
2497 methods instead.
2499 """
2500 exact = kwargs.pop('exact', False)
2501 self.scope[TargetRegistry].register(target_type, exact=exact, **kwargs)
2502 return
2504 def glom(self, target, spec, **kwargs):
2505 return glom(target, spec, scope=self.scope, **kwargs)
2508class Fill:
2509 """A specifier type which switches to glom into "fill-mode". For the
2510 spec contained within the Fill, glom will only interpret explicit
2511 specifier types (including T objects). Whereas the default mode
2512 has special interpretations for each of these builtins, fill-mode
2513 takes a lighter touch, making Fill great for "filling out" Python
2514 literals, like tuples, dicts, sets, and lists.
2516 >>> target = {'data': [0, 2, 4]}
2517 >>> spec = Fill((T['data'][2], T['data'][0]))
2518 >>> glom(target, spec)
2519 (4, 0)
2521 As you can see, glom's usual built-in tuple item chaining behavior
2522 has switched into a simple tuple constructor.
2524 (Sidenote for Lisp fans: Fill is like glom's quasi-quoting.)
2526 """
2527 def __init__(self, spec=None):
2528 self.spec = spec
2530 def glomit(self, target, scope):
2531 scope[MODE] = FILL
2532 return scope[glom](target, self.spec, scope)
2534 def fill(self, target):
2535 return glom(target, self)
2537 def __repr__(self):
2538 cn = self.__class__.__name__
2539 rpr = '' if self.spec is None else bbrepr(self.spec)
2540 return f'{cn}({rpr})'
2543def FILL(target, spec, scope):
2544 # TODO: register an operator or two for the following to allow
2545 # extension. This operator can probably be shared with the
2546 # upcoming traversal/remap feature.
2547 recurse = lambda val: scope[glom](target, val, scope)
2548 if type(spec) is dict:
2549 return {recurse(key): recurse(val) for key, val in spec.items()}
2550 if type(spec) in (list, tuple, set, frozenset):
2551 result = [recurse(val) for val in spec]
2552 if type(spec) is list:
2553 return result
2554 return type(spec)(result)
2555 if callable(spec):
2556 return spec(target)
2557 return spec
2559class _ArgValuator:
2560 def __init__(self):
2561 self.cache = {}
2563 def mode(self, target, spec, scope):
2564 """
2565 similar to FILL, but without function calling;
2566 useful for default, scope assignment, call/invoke, etc
2567 """
2568 recur = lambda val: scope[glom](target, val, scope)
2569 result = spec
2570 if type(spec) in (list, dict): # can contain themselves
2571 if id(spec) in self.cache:
2572 return self.cache[id(spec)]
2573 result = self.cache[id(spec)] = type(spec)()
2574 if type(spec) is dict:
2575 result.update({recur(key): recur(val) for key, val in spec.items()})
2576 else:
2577 result.extend([recur(val) for val in spec])
2578 if type(spec) in (tuple, set, frozenset): # cannot contain themselves
2579 result = type(spec)([recur(val) for val in spec])
2580 return result
2583def arg_val(target, arg, scope):
2584 """
2585 evaluate an argument to find its value
2586 (arg_val phonetically similar to "eval" -- evaluate as an arg)
2587 """
2588 mode = scope[MIN_MODE]
2589 scope[MIN_MODE] = _ArgValuator().mode
2590 result = scope[glom](target, arg, scope)
2591 scope[MIN_MODE] = mode
2592 return result