Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/glom/core.py: 57%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""*glom gets results.*
3The ``glom`` package has one central entrypoint,
4:func:`glom.glom`. Everything else in the package revolves around that
5one function. Sometimes, big things come in small packages.
7A couple of conventional terms you'll see repeated many times below:
9* **target** - glom is built to work on any data, so we simply
10 refer to the object being accessed as the *"target"*
11* **spec** - *(aka "glomspec", short for specification)* The
12 accompanying template used to specify the structure of the return
13 value.
15Now that you know the terms, let's take a look around glom's powerful
16semantics.
18"""
20from __future__ import print_function
22import os
23import sys
24import pdb
25import copy
26import warnings
27import weakref
28import operator
29from abc import ABCMeta
30from pprint import pprint
31import string
32from collections import OrderedDict
33import traceback
35from face.helpers import get_wrap_width
36from boltons.typeutils import make_sentinel
37from boltons.iterutils import is_iterable
38#from boltons.funcutils import format_invocation
40basestring = str
41_AbstractIterableBase = ABCMeta('_AbstractIterableBase', (object,), {})
42from collections import ChainMap
43from reprlib import Repr, recursive_repr
45GLOM_DEBUG = os.getenv('GLOM_DEBUG', '').strip().lower()
46GLOM_DEBUG = False if (GLOM_DEBUG in ('', '0', 'false')) else True
48TRACE_WIDTH = max(get_wrap_width(max_width=110), 50) # min width
50PATH_STAR = True
51# should * and ** be interpreted as parallel traversal in Path.from_text()?
52# Changed to True in 23.1, this option to disable will go away soon
54_type_type = type
56_MISSING = make_sentinel('_MISSING')
57SKIP = make_sentinel('SKIP')
58SKIP.__doc__ = """
59The ``SKIP`` singleton can be returned from a function or included
60via a :class:`~glom.Val` to cancel assignment into the output
61object.
63>>> target = {'a': 'b'}
64>>> spec = {'a': lambda t: t['a'] if t['a'] == 'a' else SKIP}
65>>> glom(target, spec)
66{}
67>>> target = {'a': 'a'}
68>>> glom(target, spec)
69{'a': 'a'}
71Mostly used to drop keys from dicts (as above) or filter objects from
72lists.
74.. note::
76 SKIP was known as OMIT in versions 18.3.1 and prior. Versions 19+
77 will remove the OMIT alias entirely.
78"""
79OMIT = SKIP # backwards compat, remove in 19+
81STOP = make_sentinel('STOP')
82STOP.__doc__ = """
83The ``STOP`` singleton can be used to halt iteration of a list or
84execution of a tuple of subspecs.
86>>> target = range(10)
87>>> spec = [lambda x: x if x < 5 else STOP]
88>>> glom(target, spec)
89[0, 1, 2, 3, 4]
90"""
92LAST_CHILD_SCOPE = make_sentinel('LAST_CHILD_SCOPE')
93LAST_CHILD_SCOPE.__doc__ = """
94Marker that can be used by parents to keep track of the last child
95scope executed. Useful for "lifting" results out of child scopes
96for scopes that want to chain the scopes of their children together
97similar to tuple.
98"""
100NO_PYFRAME = make_sentinel('NO_PYFRAME')
101NO_PYFRAME.__doc__ = """
102Used internally to mark scopes which are no longer wrapped
103in a recursive glom() call, so that they can be cleaned up correctly
104in case of exceptions
105"""
107MODE = make_sentinel('MODE')
109MIN_MODE = make_sentinel('MIN_MODE')
111CHILD_ERRORS = make_sentinel('CHILD_ERRORS')
112CHILD_ERRORS.__doc__ = """
113``CHILD_ERRORS`` is used by glom internals to keep track of
114failed child branches of the current scope.
115"""
117CUR_ERROR = make_sentinel('CUR_ERROR')
118CUR_ERROR.__doc__ = """
119``CUR_ERROR`` is used by glom internals to keep track of
120thrown exceptions.
121"""
123_PKG_DIR_PATH = os.path.dirname(os.path.abspath(__file__))
125class GlomError(Exception):
126 """The base exception for all the errors that might be raised from
127 :func:`glom` processing logic.
129 By default, exceptions raised from within functions passed to glom
130 (e.g., ``len``, ``sum``, any ``lambda``) will not be wrapped in a
131 GlomError.
132 """
133 @classmethod
134 def wrap(cls, exc):
135 # TODO: need to test this against a wide array of exception types
136 # this approach to wrapping errors works for exceptions
137 # defined in pure-python as well as C
138 exc_type = type(exc)
139 bases = (GlomError,) if issubclass(GlomError, exc_type) else (exc_type, GlomError)
140 exc_wrapper_type = type("GlomError.wrap({})".format(exc_type.__name__), bases, {})
141 try:
142 wrapper = exc_wrapper_type(*exc.args)
143 wrapper.__wrapped = exc
144 return wrapper
145 except Exception: # maybe exception can't be re-created
146 return exc
148 def _set_wrapped(self, exc):
149 self.__wrapped = exc
151 def _finalize(self, scope):
152 # careful when changing how this functionality works; pytest seems to mess with
153 # the traceback module or sys.exc_info(). we saw different stacks when originally
154 # developing this in June 2020.
155 etype, evalue, _ = sys.exc_info()
156 tb_lines = traceback.format_exc().strip().splitlines()
157 limit = 0
158 for line in reversed(tb_lines):
159 if _PKG_DIR_PATH in line:
160 limit -= 1
161 break
162 limit += 1
163 self._tb_lines = tb_lines[-limit:]
164 # if the first line is trying to put a caret at a byte-code location on a line that
165 # isn't being displayed, skip it
166 if set(self._tb_lines[0]) <= {' ', '^', '~'}:
167 self._tb_lines = self._tb_lines[1:]
168 self._scope = scope
170 def __str__(self):
171 if getattr(self, '_finalized_str', None):
172 return self._finalized_str
173 elif getattr(self, '_scope', None) is not None:
174 self._target_spec_trace = format_target_spec_trace(self._scope, self.__wrapped)
175 parts = ["error raised while processing, details below.",
176 " Target-spec trace (most recent last):",
177 self._target_spec_trace]
178 parts.extend(self._tb_lines)
179 self._finalized_str = "\n".join(parts)
180 return self._finalized_str
182 # else, not finalized
183 try:
184 exc_get_message = self.get_message
185 except AttributeError:
186 exc_get_message = super(GlomError, self).__str__
187 return exc_get_message()
190def _unpack_stack(scope, only_errors=True):
191 """
192 convert scope to [[scope, spec, target, error, [children]]]
194 this is a convenience method for printing stacks
196 only_errors=True means ignore branches which may still be hanging around
197 which were not involved in the stack trace of the error
199 only_errors=False could be useful for debugger / introspection (similar
200 to traceback.print_stack())
201 """
202 stack = []
203 scope = scope.maps[0]
204 while LAST_CHILD_SCOPE in scope:
205 child = scope[LAST_CHILD_SCOPE]
206 branches = scope[CHILD_ERRORS]
207 if branches == [child]:
208 branches = [] # if there's only one branch, count it as linear
209 stack.append([scope, scope[Spec], scope[T], scope.get(CUR_ERROR), branches])
211 # NB: this id() business is necessary to avoid a
212 # nondeterministic bug in abc's __eq__ see #189 for details
213 if id(child) in [id(b) for b in branches]:
214 break # if child already covered by branches, stop the linear descent
216 scope = child.maps[0]
217 else: # if break executed above, cur scope was already added
218 stack.append([scope, scope[Spec], scope[T], scope.get(CUR_ERROR), []])
219 # push errors "down" to where they were first raised / first observed
220 for i in range(len(stack) - 1):
221 cur, nxt = stack[i], stack[i + 1]
222 if cur[3] == nxt[3]:
223 cur[3] = None
224 if only_errors: # trim the stack to the last error
225 # leave at least 1 to not break formatting func below
226 # TODO: make format_target_spec_trace() tolerate an "empty" stack cleanly
227 while len(stack) > 1 and stack[-1][3] is None:
228 stack.pop()
229 return stack
232def _format_trace_value(value, maxlen):
233 s = bbrepr(value).replace("\\'", "'")
234 if len(s) > maxlen:
235 try:
236 suffix = '... (len=%s)' % len(value)
237 except Exception:
238 suffix = '...'
239 s = s[:maxlen - len(suffix)] + suffix
240 return s
243def format_target_spec_trace(scope, root_error, width=TRACE_WIDTH, depth=0, prev_target=_MISSING, last_branch=True):
244 """
245 unpack a scope into a multi-line but short summary
246 """
247 segments = []
248 indent = " " + "|" * depth
249 tick = "| " if depth else "- "
250 def mk_fmt(label, t=None):
251 pre = indent + (t or tick) + label + ": "
252 fmt_width = width - len(pre)
253 return lambda v: pre + _format_trace_value(v, fmt_width)
254 fmt_t = mk_fmt("Target")
255 fmt_s = mk_fmt("Spec")
256 fmt_b = mk_fmt("Spec", "+ ")
257 recurse = lambda s, last=False: format_target_spec_trace(s, root_error, width, depth + 1, prev_target, last)
258 tb_exc_line = lambda e: "".join(traceback.format_exception_only(type(e), e))[:-1]
259 fmt_e = lambda e: indent + tick + tb_exc_line(e)
260 for scope, spec, target, error, branches in _unpack_stack(scope):
261 if target is not prev_target:
262 segments.append(fmt_t(target))
263 prev_target = target
264 if branches:
265 segments.append(fmt_b(spec))
266 segments.extend([recurse(s) for s in branches[:-1]])
267 segments.append(recurse(branches[-1], last_branch))
268 else:
269 segments.append(fmt_s(spec))
270 if error is not None and error is not root_error:
271 last_line_error = True
272 segments.append(fmt_e(error))
273 else:
274 last_line_error = False
275 if depth: # \ on first line, X on last line
276 remark = lambda s, m: s[:depth + 1] + m + s[depth + 2:]
277 segments[0] = remark(segments[0], "\\")
278 if not last_branch or last_line_error:
279 segments[-1] = remark(segments[-1], "X")
280 return "\n".join(segments)
283# TODO: not used (yet)
284def format_oneline_trace(scope):
285 """
286 unpack a scope into a single line summary
287 (shortest summary possible)
288 """
289 # the goal here is to do a kind of delta-compression --
290 # if the target is the same, don't repeat it
291 segments = []
292 prev_target = _MISSING
293 for scope, spec, target, error, branches in _unpack_stack(scope, only_errors=False):
294 segments.append('/')
295 if type(spec) in (TType, Path):
296 segments.append(bbrepr(spec))
297 else:
298 segments.append(type(spec).__name__)
299 if target != prev_target:
300 segments.append('!')
301 segments.append(type(target).__name__)
302 if Path in scope:
303 segments.append('<')
304 segments.append('->'.join([str(p) for p in scope[Path]]))
305 segments.append('>')
306 prev_target = target
308 return "".join(segments)
311class PathAccessError(GlomError, AttributeError, KeyError, IndexError):
312 """This :exc:`GlomError` subtype represents a failure to access an
313 attribute as dictated by the spec. The most commonly-seen error
314 when using glom, it maintains a copy of the original exception and
315 produces a readable error message for easy debugging.
317 If you see this error, you may want to:
319 * Check the target data is accurate using :class:`~glom.Inspect`
320 * Catch the exception and return a semantically meaningful error message
321 * Use :class:`glom.Coalesce` to specify a default
322 * Use the top-level ``default`` kwarg on :func:`~glom.glom()`
324 In any case, be glad you got this error and not the one it was
325 wrapping!
327 Args:
328 exc (Exception): The error that arose when we tried to access
329 *path*. Typically an instance of KeyError, AttributeError,
330 IndexError, or TypeError, and sometimes others.
331 path (Path): The full Path glom was in the middle of accessing
332 when the error occurred.
333 part_idx (int): The index of the part of the *path* that caused
334 the error.
336 >>> target = {'a': {'b': None}}
337 >>> glom(target, 'a.b.c')
338 Traceback (most recent call last):
339 ...
340 PathAccessError: could not access 'c', part 2 of Path('a', 'b', 'c'), got error: ...
342 """
343 def __init__(self, exc, path, part_idx):
344 self.exc = exc
345 self.path = path
346 self.part_idx = part_idx
348 def get_message(self):
349 path_part = Path(self.path).values()[self.part_idx]
350 return ('could not access %r, part %r of %r, got error: %r'
351 % (path_part, self.part_idx, self.path, self.exc))
353 def __repr__(self):
354 cn = self.__class__.__name__
355 return '%s(%r, %r, %r)' % (cn, self.exc, self.path, self.part_idx)
358class PathAssignError(GlomError):
359 """This :exc:`GlomError` subtype is raised when an assignment fails,
360 stemming from an :func:`~glom.assign` call or other
361 :class:`~glom.Assign` usage.
363 One example would be assigning to an out-of-range position in a list::
365 >>> assign(["short", "list"], Path(5), 'too far') # doctest: +SKIP
366 Traceback (most recent call last):
367 ...
368 PathAssignError: could not assign 5 on object at Path(), got error: IndexError(...
370 Other assignment failures could be due to assigning to an
371 ``@property`` or exception being raised inside a ``__setattr__()``.
373 """
374 def __init__(self, exc, path, dest_name):
375 self.exc = exc
376 self.path = path
377 self.dest_name = dest_name
379 def get_message(self):
380 return ('could not assign %r on object at %r, got error: %r'
381 % (self.dest_name, self.path, self.exc))
383 def __repr__(self):
384 cn = self.__class__.__name__
385 return '%s(%r, %r, %r)' % (cn, self.exc, self.path, self.dest_name)
388class CoalesceError(GlomError):
389 """This :exc:`GlomError` subtype is raised from within a
390 :class:`Coalesce` spec's processing, when none of the subspecs
391 match and no default is provided.
393 The exception object itself keeps track of several values which
394 may be useful for processing:
396 Args:
397 coal_obj (Coalesce): The original failing spec, see
398 :class:`Coalesce`'s docs for details.
399 skipped (list): A list of ignored values and exceptions, in the
400 order that their respective subspecs appear in the original
401 *coal_obj*.
402 path: Like many GlomErrors, this exception knows the path at
403 which it occurred.
405 >>> target = {}
406 >>> glom(target, Coalesce('a', 'b'))
407 Traceback (most recent call last):
408 ...
409 CoalesceError: no valid values found. Tried ('a', 'b') and got (PathAccessError, PathAccessError) ...
411 .. note::
413 Coalesce is a *branching* specifier type, so as of v20.7.0, its
414 exception messages feature an error tree. See
415 :ref:`branched-exceptions` for details on how to interpret these
416 exceptions.
418 """
419 def __init__(self, coal_obj, skipped, path):
420 self.coal_obj = coal_obj
421 self.skipped = skipped
422 self.path = path
424 def __repr__(self):
425 cn = self.__class__.__name__
426 return '%s(%r, %r, %r)' % (cn, self.coal_obj, self.skipped, self.path)
428 def get_message(self):
429 missed_specs = tuple(self.coal_obj.subspecs)
430 skipped_vals = [v.__class__.__name__
431 if isinstance(v, self.coal_obj.skip_exc)
432 else '<skipped %s>' % v.__class__.__name__
433 for v in self.skipped]
434 msg = ('no valid values found. Tried %r and got (%s)'
435 % (missed_specs, ', '.join(skipped_vals)))
436 if self.coal_obj.skip is not _MISSING:
437 msg += ', skip set to %r' % (self.coal_obj.skip,)
438 if self.coal_obj.skip_exc is not GlomError:
439 msg += ', skip_exc set to %r' % (self.coal_obj.skip_exc,)
440 if self.path is not None:
441 msg += ' (at path %r)' % (self.path,)
442 return msg
445class BadSpec(GlomError, TypeError):
446 """Raised when a spec structure is malformed, e.g., when a specifier
447 type is invalid for the current mode."""
450class UnregisteredTarget(GlomError):
451 """This :class:`GlomError` subtype is raised when a spec calls for an
452 unsupported action on a target type. For instance, trying to
453 iterate on an non-iterable target:
455 >>> glom(object(), ['a.b.c'])
456 Traceback (most recent call last):
457 ...
458 UnregisteredTarget: target type 'object' not registered for 'iterate', expected one of registered types: (...)
460 It should be noted that this is a pretty uncommon occurrence in
461 production glom usage. See the :ref:`setup-and-registration`
462 section for details on how to avoid this error.
464 An UnregisteredTarget takes and tracks a few values:
466 Args:
467 op (str): The name of the operation being performed ('get' or 'iterate')
468 target_type (type): The type of the target being processed.
469 type_map (dict): A mapping of target types that do support this operation
470 path: The path at which the error occurred.
472 """
473 def __init__(self, op, target_type, type_map, path):
474 self.op = op
475 self.target_type = target_type
476 self.type_map = type_map
477 self.path = path
478 super(UnregisteredTarget, self).__init__(op, target_type, type_map, path)
480 def __repr__(self):
481 cn = self.__class__.__name__
482 # <type %r> is because Python 3 inexplicably changed the type
483 # repr from <type *> to <class *>
484 return ('%s(%r, <type %r>, %r, %r)'
485 % (cn, self.op, self.target_type.__name__, self.type_map, self.path))
487 def get_message(self):
488 if not self.type_map:
489 return ("glom() called without registering any types for operation '%s'. see"
490 " glom.register() or Glommer's constructor for details." % (self.op,))
491 reg_types = sorted([t.__name__ for t, h in self.type_map.items() if h])
492 reg_types_str = '()' if not reg_types else ('(%s)' % ', '.join(reg_types))
493 msg = ("target type %r not registered for '%s', expected one of"
494 " registered types: %s" % (self.target_type.__name__, self.op, reg_types_str))
495 if self.path:
496 msg += ' (at %r)' % (self.path,)
497 return msg
500if getattr(__builtins__, '__dict__', None) is not None:
501 # pypy's __builtins__ is a module, as is CPython's REPL, but at
502 # normal execution time it's a dict?
503 __builtins__ = __builtins__.__dict__
506_BUILTIN_ID_NAME_MAP = dict([(id(v), k)
507 for k, v in __builtins__.items()])
510class _BBRepr(Repr):
511 """A better repr for builtins, when the built-in repr isn't
512 roundtrippable.
513 """
514 def __init__(self):
515 super().__init__()
516 # turn up all the length limits very high
517 for name in self.__dict__:
518 if not isinstance(getattr(self, name), int):
519 continue
520 setattr(self, name, 1024)
522 def repr1(self, x, level):
523 ret = Repr.repr1(self, x, level)
524 if not ret.startswith('<'):
525 return ret
526 return _BUILTIN_ID_NAME_MAP.get(id(x), ret)
529bbrepr = recursive_repr()(_BBRepr().repr)
532class _BBReprFormatter(string.Formatter):
533 """
534 allow format strings to be evaluated where {!r} will use bbrepr
535 instead of repr
536 """
537 def convert_field(self, value, conversion):
538 if conversion == 'r':
539 return bbrepr(value).replace("\\'", "'")
540 return super(_BBReprFormatter, self).convert_field(value, conversion)
543bbformat = _BBReprFormatter().format
546# TODO: push this back up to boltons with repr kwarg
547def format_invocation(name='', args=(), kwargs=None, **kw):
548 """Given a name, positional arguments, and keyword arguments, format
549 a basic Python-style function call.
551 >>> print(format_invocation('func', args=(1, 2), kwargs={'c': 3}))
552 func(1, 2, c=3)
553 >>> print(format_invocation('a_func', args=(1,)))
554 a_func(1)
555 >>> print(format_invocation('kw_func', kwargs=[('a', 1), ('b', 2)]))
556 kw_func(a=1, b=2)
558 """
559 _repr = kw.pop('repr', bbrepr)
560 if kw:
561 raise TypeError('unexpected keyword args: %r' % ', '.join(kw.keys()))
562 kwargs = kwargs or {}
563 a_text = ', '.join([_repr(a) for a in args])
564 if isinstance(kwargs, dict):
565 kwarg_items = [(k, kwargs[k]) for k in sorted(kwargs)]
566 else:
567 kwarg_items = kwargs
568 kw_text = ', '.join(['%s=%s' % (k, _repr(v)) for k, v in kwarg_items])
570 all_args_text = a_text
571 if all_args_text and kw_text:
572 all_args_text += ', '
573 all_args_text += kw_text
575 return '%s(%s)' % (name, all_args_text)
578class Path(object):
579 """Path objects specify explicit paths when the default
580 ``'a.b.c'``-style general access syntax won't work or isn't
581 desirable. Use this to wrap ints, datetimes, and other valid
582 keys, as well as strings with dots that shouldn't be expanded.
584 >>> target = {'a': {'b': 'c', 'd.e': 'f', 2: 3}}
585 >>> glom(target, Path('a', 2))
586 3
587 >>> glom(target, Path('a', 'd.e'))
588 'f'
590 Paths can be used to join together other Path objects, as
591 well as :data:`~glom.T` objects:
593 >>> Path(T['a'], T['b'])
594 T['a']['b']
595 >>> Path(Path('a', 'b'), Path('c', 'd'))
596 Path('a', 'b', 'c', 'd')
598 Paths also support indexing and slicing, with each access
599 returning a new Path object:
601 >>> path = Path('a', 'b', 1, 2)
602 >>> path[0]
603 Path('a')
604 >>> path[-2:]
605 Path(1, 2)
607 To build a Path object from a string, use :meth:`Path.from_text()`.
608 This is the default behavior when the top-level :func:`~glom.glom`
609 function gets a string spec.
610 """
611 def __init__(self, *path_parts):
612 if not path_parts:
613 self.path_t = T
614 return
615 if isinstance(path_parts[0], TType):
616 path_t = path_parts[0]
617 offset = 1
618 else:
619 path_t = T
620 offset = 0
621 for part in path_parts[offset:]:
622 if isinstance(part, Path):
623 part = part.path_t
624 if isinstance(part, TType):
625 sub_parts = part.__ops__
626 if sub_parts[0] is not T:
627 raise ValueError('path segment must be path from T, not %r'
628 % sub_parts[0])
629 i = 1
630 while i < len(sub_parts):
631 path_t = _t_child(path_t, sub_parts[i], sub_parts[i + 1])
632 i += 2
633 else:
634 path_t = _t_child(path_t, 'P', part)
635 self.path_t = path_t
637 _CACHE = {True: {}, False: {}}
638 _MAX_CACHE = 10000
639 _STAR_WARNED = False
641 @classmethod
642 def from_text(cls, text):
643 """Make a Path from .-delimited text:
645 >>> Path.from_text('a.b.c')
646 Path('a', 'b', 'c')
648 This is the default behavior when :func:`~glom.glom` gets a string spec.
649 """
650 def create():
651 segs = text.split('.')
652 if PATH_STAR:
653 segs = [
654 _T_STAR if seg == '*' else
655 _T_STARSTAR if seg == '**' else seg
656 for seg in segs]
657 elif not cls._STAR_WARNED:
658 if '*' in segs or '**' in segs:
659 warnings.warn(
660 "'*' and '**' have changed behavior in glom version 23.1."
661 " Recommend switch to T['*'] or T['**'].")
662 cls._STAR_WARNED = True
663 return cls(*segs)
665 cache = cls._CACHE[PATH_STAR] # remove this when PATH_STAR is default
666 if text not in cache:
667 if len(cache) > cls._MAX_CACHE:
668 return create()
669 cache[text] = create()
670 return cache[text]
672 def glomit(self, target, scope):
673 # The entrypoint for the Path extension
674 return _t_eval(target, self.path_t, scope)
676 def __len__(self):
677 return (len(self.path_t.__ops__) - 1) // 2
679 def __eq__(self, other):
680 if type(other) is Path:
681 return self.path_t.__ops__ == other.path_t.__ops__
682 elif type(other) is TType:
683 return self.path_t.__ops__ == other.__ops__
684 return False
686 def __ne__(self, other):
687 return not self == other
689 def values(self):
690 """
691 Returns a tuple of values referenced in this path.
693 >>> Path(T.a.b, 'c', T['d']).values()
694 ('a', 'b', 'c', 'd')
695 """
696 cur_t_path = self.path_t.__ops__
697 return cur_t_path[2::2]
699 def items(self):
700 """
701 Returns a tuple of (operation, value) pairs.
703 >>> Path(T.a.b, 'c', T['d']).items()
704 (('.', 'a'), ('.', 'b'), ('P', 'c'), ('[', 'd'))
706 """
707 cur_t_path = self.path_t.__ops__
708 return tuple(zip(cur_t_path[1::2], cur_t_path[2::2]))
710 def startswith(self, other):
711 if isinstance(other, basestring):
712 other = Path(other)
713 if isinstance(other, Path):
714 other = other.path_t
715 if not isinstance(other, TType):
716 raise TypeError('can only check if Path starts with string, Path or T')
717 o_path = other.__ops__
718 return self.path_t.__ops__[:len(o_path)] == o_path
720 def from_t(self):
721 '''return the same path but starting from T'''
722 t_path = self.path_t.__ops__
723 if t_path[0] is S:
724 new_t = TType()
725 new_t.__ops__ = (T,) + t_path[1:]
726 return Path(new_t)
727 return self
729 def __getitem__(self, i):
730 cur_t_path = self.path_t.__ops__
731 try:
732 step = i.step
733 start = i.start if i.start is not None else 0
734 stop = i.stop
736 start = (start * 2) + 1 if start >= 0 else (start * 2) + len(cur_t_path)
737 if stop is not None:
738 stop = (stop * 2) + 1 if stop >= 0 else (stop * 2) + len(cur_t_path)
739 except AttributeError:
740 step = 1
741 start = (i * 2) + 1 if i >= 0 else (i * 2) + len(cur_t_path)
742 if start < 0 or start > len(cur_t_path):
743 raise IndexError('Path index out of range')
744 stop = ((i + 1) * 2) + 1 if i >= 0 else ((i + 1) * 2) + len(cur_t_path)
746 new_t = TType()
747 new_path = cur_t_path[start:stop]
748 if step is not None and step != 1:
749 new_path = tuple(zip(new_path[::2], new_path[1::2]))[::step]
750 new_path = sum(new_path, ())
751 new_t.__ops__ = (cur_t_path[0],) + new_path
752 return Path(new_t)
754 def __repr__(self):
755 return _format_path(self.path_t.__ops__[1:])
758def _format_path(t_path):
759 path_parts, cur_t_path = [], []
760 i = 0
761 while i < len(t_path):
762 op, arg = t_path[i], t_path[i + 1]
763 i += 2
764 if op == 'P':
765 if cur_t_path:
766 path_parts.append(cur_t_path)
767 cur_t_path = []
768 path_parts.append(arg)
769 else:
770 cur_t_path.append(op)
771 cur_t_path.append(arg)
772 if path_parts and cur_t_path:
773 path_parts.append(cur_t_path)
775 if path_parts or not cur_t_path:
776 return 'Path(%s)' % ', '.join([_format_t(part)
777 if type(part) is list else repr(part)
778 for part in path_parts])
779 return _format_t(cur_t_path)
782class Spec(object):
783 """Spec objects serve three purposes, here they are, roughly ordered
784 by utility:
786 1. As a form of compiled or "curried" glom call, similar to
787 Python's built-in :func:`re.compile`.
788 2. A marker as an object as representing a spec rather than a
789 literal value in certain cases where that might be ambiguous.
790 3. A way to update the scope within another Spec.
792 In the second usage, Spec objects are the complement to
793 :class:`~glom.Val`, wrapping a value and marking that it
794 should be interpreted as a glom spec, rather than a literal value.
795 This is useful in places where it would be interpreted as a value
796 by default. (Such as T[key], Call(func) where key and func are
797 assumed to be literal values and not specs.)
799 Args:
800 spec: The glom spec.
801 scope (dict): additional values to add to the scope when
802 evaluating this Spec
804 """
805 def __init__(self, spec, scope=None):
806 self.spec = spec
807 self.scope = scope or {}
809 def glom(self, target, **kw):
810 scope = dict(self.scope)
811 scope.update(kw.get('scope', {}))
812 kw['scope'] = ChainMap(scope)
813 glom_ = scope.get(glom, glom)
814 return glom_(target, self.spec, **kw)
816 def glomit(self, target, scope):
817 scope.update(self.scope)
818 return scope[glom](target, self.spec, scope)
820 def __repr__(self):
821 cn = self.__class__.__name__
822 if self.scope:
823 return '%s(%s, scope=%r)' % (cn, bbrepr(self.spec), self.scope)
824 return '%s(%s)' % (cn, bbrepr(self.spec))
827class Coalesce(object):
828 """Coalesce objects specify fallback behavior for a list of
829 subspecs.
831 Subspecs are passed as positional arguments, and keyword arguments
832 control defaults. Each subspec is evaluated in turn, and if none
833 match, a :exc:`CoalesceError` is raised, or a default is returned,
834 depending on the options used.
836 .. note::
838 This operation may seem very familar if you have experience with
839 `SQL`_ or even `C# and others`_.
842 In practice, this fallback behavior's simplicity is only surpassed
843 by its utility:
845 >>> target = {'c': 'd'}
846 >>> glom(target, Coalesce('a', 'b', 'c'))
847 'd'
849 glom tries to get ``'a'`` from ``target``, but gets a
850 KeyError. Rather than raise a :exc:`~glom.PathAccessError` as usual,
851 glom *coalesces* into the next subspec, ``'b'``. The process
852 repeats until it gets to ``'c'``, which returns our value,
853 ``'d'``. If our value weren't present, we'd see:
855 >>> target = {}
856 >>> glom(target, Coalesce('a', 'b'))
857 Traceback (most recent call last):
858 ...
859 CoalesceError: no valid values found. Tried ('a', 'b') and got (PathAccessError, PathAccessError) ...
861 Same process, but because ``target`` is empty, we get a
862 :exc:`CoalesceError`.
864 .. note::
866 Coalesce is a *branching* specifier type, so as of v20.7.0, its
867 exception messages feature an error tree. See
868 :ref:`branched-exceptions` for details on how to interpret these
869 exceptions.
872 If we want to avoid an exception, and we know which value we want
873 by default, we can set *default*:
875 >>> target = {}
876 >>> glom(target, Coalesce('a', 'b', 'c'), default='d-fault')
877 'd-fault'
879 ``'a'``, ``'b'``, and ``'c'`` weren't present so we got ``'d-fault'``.
881 Args:
883 subspecs: One or more glommable subspecs
884 default: A value to return if no subspec results in a valid value
885 default_factory: A callable whose result will be returned as a default
886 skip: A value, tuple of values, or predicate function
887 representing values to ignore
888 skip_exc: An exception or tuple of exception types to catch and
889 move on to the next subspec. Defaults to :exc:`GlomError`, the
890 parent type of all glom runtime exceptions.
892 If all subspecs produce skipped values or exceptions, a
893 :exc:`CoalesceError` will be raised. For more examples, check out
894 the :doc:`tutorial`, which makes extensive use of Coalesce.
896 .. _SQL: https://en.wikipedia.org/w/index.php?title=Null_(SQL)&oldid=833093792#COALESCE
897 .. _C# and others: https://en.wikipedia.org/w/index.php?title=Null_coalescing_operator&oldid=839493322#C#
899 """
900 def __init__(self, *subspecs, **kwargs):
901 self.subspecs = subspecs
902 self._orig_kwargs = dict(kwargs)
903 self.default = kwargs.pop('default', _MISSING)
904 self.default_factory = kwargs.pop('default_factory', _MISSING)
905 if self.default and self.default_factory:
906 raise ValueError('expected one of "default" or "default_factory", not both')
907 self.skip = kwargs.pop('skip', _MISSING)
908 if self.skip is _MISSING:
909 self.skip_func = lambda v: False
910 elif callable(self.skip):
911 self.skip_func = self.skip
912 elif isinstance(self.skip, tuple):
913 self.skip_func = lambda v: v in self.skip
914 else:
915 self.skip_func = lambda v: v == self.skip
916 self.skip_exc = kwargs.pop('skip_exc', GlomError)
917 if kwargs:
918 raise TypeError('unexpected keyword args: %r' % (sorted(kwargs.keys()),))
920 def glomit(self, target, scope):
921 skipped = []
922 for subspec in self.subspecs:
923 try:
924 ret = scope[glom](target, subspec, scope)
925 if not self.skip_func(ret):
926 break
927 skipped.append(ret)
928 except self.skip_exc as e:
929 skipped.append(e)
930 continue
931 else:
932 if self.default is not _MISSING:
933 ret = arg_val(target, self.default, scope)
934 elif self.default_factory is not _MISSING:
935 ret = self.default_factory()
936 else:
937 raise CoalesceError(self, skipped, scope[Path])
938 return ret
940 def __repr__(self):
941 cn = self.__class__.__name__
942 return format_invocation(cn, self.subspecs, self._orig_kwargs, repr=bbrepr)
945class Inspect(object):
946 """The :class:`~glom.Inspect` specifier type provides a way to get
947 visibility into glom's evaluation of a specification, enabling
948 debugging of those tricky problems that may arise with unexpected
949 data.
951 :class:`~glom.Inspect` can be inserted into an existing spec in one of two
952 ways. First, as a wrapper around the spec in question, or second,
953 as an argument-less placeholder wherever a spec could be.
955 :class:`~glom.Inspect` supports several modes, controlled by
956 keyword arguments. Its default, no-argument mode, simply echos the
957 state of the glom at the point where it appears:
959 >>> target = {'a': {'b': {}}}
960 >>> val = glom(target, Inspect('a.b')) # wrapping a spec
961 ---
962 path: ['a.b']
963 target: {'a': {'b': {}}}
964 output: {}
965 ---
967 Debugging behavior aside, :class:`~glom.Inspect` has no effect on
968 values in the target, spec, or result.
970 Args:
971 echo (bool): Whether to print the path, target, and output of
972 each inspected glom. Defaults to True.
973 recursive (bool): Whether or not the Inspect should be applied
974 at every level, at or below the spec that it wraps. Defaults
975 to False.
976 breakpoint (bool): This flag controls whether a debugging prompt
977 should appear before evaluating each inspected spec. Can also
978 take a callable. Defaults to False.
979 post_mortem (bool): This flag controls whether exceptions
980 should be caught and interactively debugged with :mod:`pdb` on
981 inspected specs.
983 All arguments above are keyword-only to avoid overlap with a
984 wrapped spec.
986 .. note::
988 Just like ``pdb.set_trace()``, be careful about leaving stray
989 ``Inspect()`` instances in production glom specs.
991 """
992 def __init__(self, *a, **kw):
993 self.wrapped = a[0] if a else Path()
994 self.recursive = kw.pop('recursive', False)
995 self.echo = kw.pop('echo', True)
996 breakpoint = kw.pop('breakpoint', False)
997 if breakpoint is True:
998 breakpoint = pdb.set_trace
999 if breakpoint and not callable(breakpoint):
1000 raise TypeError('breakpoint expected bool or callable, not: %r' % breakpoint)
1001 self.breakpoint = breakpoint
1002 post_mortem = kw.pop('post_mortem', False)
1003 if post_mortem is True:
1004 post_mortem = pdb.post_mortem
1005 if post_mortem and not callable(post_mortem):
1006 raise TypeError('post_mortem expected bool or callable, not: %r' % post_mortem)
1007 self.post_mortem = post_mortem
1009 def __repr__(self):
1010 return '<INSPECT>'
1012 def glomit(self, target, scope):
1013 # stash the real handler under Inspect,
1014 # and replace the child handler with a trace callback
1015 scope[Inspect] = scope[glom]
1016 scope[glom] = self._trace
1017 return scope[glom](target, self.wrapped, scope)
1019 def _trace(self, target, spec, scope):
1020 if not self.recursive:
1021 scope[glom] = scope[Inspect]
1022 if self.echo:
1023 print('---')
1024 # TODO: switch from scope[Path] to the Target-Spec format trace above
1025 # ... but maybe be smart about only printing deltas instead of the whole
1026 # thing
1027 print('path: ', scope[Path] + [spec])
1028 print('target:', target)
1029 if self.breakpoint:
1030 # TODO: real debugger here?
1031 self.breakpoint()
1032 try:
1033 ret = scope[Inspect](target, spec, scope)
1034 except Exception:
1035 if self.post_mortem:
1036 self.post_mortem()
1037 raise
1038 if self.echo:
1039 print('output:', ret)
1040 print('---')
1041 return ret
1044class Call(object):
1045 """:class:`Call` specifies when a target should be passed to a function,
1046 *func*.
1048 :class:`Call` is similar to :func:`~functools.partial` in that
1049 it is no more powerful than ``lambda`` or other functions, but
1050 it is designed to be more readable, with a better ``repr``.
1052 Args:
1053 func (callable): a function or other callable to be called with
1054 the target
1056 :class:`Call` combines well with :attr:`~glom.T` to construct objects. For
1057 instance, to generate a dict and then pass it to a constructor:
1059 >>> class ExampleClass(object):
1060 ... def __init__(self, attr):
1061 ... self.attr = attr
1062 ...
1063 >>> target = {'attr': 3.14}
1064 >>> glom(target, Call(ExampleClass, kwargs=T)).attr
1065 3.14
1067 This does the same as ``glom(target, lambda target:
1068 ExampleClass(**target))``, but it's easy to see which one reads
1069 better.
1071 .. note::
1073 ``Call`` is mostly for functions. Use a :attr:`~glom.T` object
1074 if you need to call a method.
1076 .. warning::
1078 :class:`Call` has a successor with a fuller-featured API, new
1079 in 19.10.0: the :class:`Invoke` specifier type.
1080 """
1081 def __init__(self, func=None, args=None, kwargs=None):
1082 if func is None:
1083 func = T
1084 if not (callable(func) or isinstance(func, (Spec, TType))):
1085 raise TypeError('expected func to be a callable or T'
1086 ' expression, not: %r' % (func,))
1087 if args is None:
1088 args = ()
1089 if kwargs is None:
1090 kwargs = {}
1091 self.func, self.args, self.kwargs = func, args, kwargs
1093 def glomit(self, target, scope):
1094 'run against the current target'
1095 r = lambda spec: arg_val(target, spec, scope)
1096 return r(self.func)(*r(self.args), **r(self.kwargs))
1098 def __repr__(self):
1099 cn = self.__class__.__name__
1100 return '%s(%s, args=%r, kwargs=%r)' % (cn, bbrepr(self.func), self.args, self.kwargs)
1103def _is_spec(obj, strict=False):
1104 # a little util for codifying the spec type checking in glom
1105 if isinstance(obj, TType):
1106 return True
1107 if strict:
1108 return type(obj) is Spec
1110 return _has_callable_glomit(obj) # pragma: no cover
1113class Invoke(object):
1114 """Specifier type designed for easy invocation of callables from glom.
1116 Args:
1117 func (callable): A function or other callable object.
1119 ``Invoke`` is similar to :func:`functools.partial`, but with the
1120 ability to set up a "templated" call which interleaves constants and
1121 glom specs.
1123 For example, the following creates a spec which can be used to
1124 check if targets are integers:
1126 >>> is_int = Invoke(isinstance).specs(T).constants(int)
1127 >>> glom(5, is_int)
1128 True
1130 And this composes like any other glom spec:
1132 >>> target = [7, object(), 9]
1133 >>> glom(target, [is_int])
1134 [True, False, True]
1136 Another example, mixing positional and keyword arguments:
1138 >>> spec = Invoke(sorted).specs(T).constants(key=int, reverse=True)
1139 >>> target = ['10', '5', '20', '1']
1140 >>> glom(target, spec)
1141 ['20', '10', '5', '1']
1143 Invoke also helps with evaluating zero-argument functions:
1145 >>> glom(target={}, spec=Invoke(int))
1146 0
1148 (A trivial example, but from timestamps to UUIDs, zero-arg calls do come up!)
1150 .. note::
1152 ``Invoke`` is mostly for functions, object construction, and callable
1153 objects. For calling methods, consider the :attr:`~glom.T` object.
1155 """
1156 def __init__(self, func):
1157 if not callable(func) and not _is_spec(func, strict=True):
1158 raise TypeError('expected func to be a callable or Spec instance,'
1159 ' not: %r' % (func,))
1160 self.func = func
1161 self._args = ()
1162 # a registry of every known kwarg to its freshest value as set
1163 # by the methods below. the **kw dict is used as a unique marker.
1164 self._cur_kwargs = {}
1166 @classmethod
1167 def specfunc(cls, spec):
1168 """Creates an :class:`Invoke` instance where the function is
1169 indicated by a spec.
1171 >>> spec = Invoke.specfunc('func').constants(5)
1172 >>> glom({'func': range}, (spec, list))
1173 [0, 1, 2, 3, 4]
1175 """
1176 return cls(Spec(spec))
1178 def constants(self, *a, **kw):
1179 """Returns a new :class:`Invoke` spec, with the provided positional
1180 and keyword argument values stored for passing to the
1181 underlying function.
1183 >>> spec = Invoke(T).constants(5)
1184 >>> glom(range, (spec, list))
1185 [0, 1, 2, 3, 4]
1187 Subsequent positional arguments are appended:
1189 >>> spec = Invoke(T).constants(2).constants(10, 2)
1190 >>> glom(range, (spec, list))
1191 [2, 4, 6, 8]
1193 Keyword arguments also work as one might expect:
1195 >>> round_2 = Invoke(round).constants(ndigits=2).specs(T)
1196 >>> glom(3.14159, round_2)
1197 3.14
1199 :meth:`~Invoke.constants()` and other :class:`Invoke`
1200 methods may be called multiple times, just remember that every
1201 call returns a new spec.
1202 """
1203 ret = self.__class__(self.func)
1204 ret._args = self._args + ('C', a, kw)
1205 ret._cur_kwargs = dict(self._cur_kwargs)
1206 ret._cur_kwargs.update({k: kw for k, _ in kw.items()})
1207 return ret
1209 def specs(self, *a, **kw):
1210 """Returns a new :class:`Invoke` spec, with the provided positional
1211 and keyword arguments stored to be interpreted as specs, with
1212 the results passed to the underlying function.
1214 >>> spec = Invoke(range).specs('value')
1215 >>> glom({'value': 5}, (spec, list))
1216 [0, 1, 2, 3, 4]
1218 Subsequent positional arguments are appended:
1220 >>> spec = Invoke(range).specs('start').specs('end', 'step')
1221 >>> target = {'start': 2, 'end': 10, 'step': 2}
1222 >>> glom(target, (spec, list))
1223 [2, 4, 6, 8]
1225 Keyword arguments also work as one might expect:
1227 >>> multiply = lambda x, y: x * y
1228 >>> times_3 = Invoke(multiply).constants(y=3).specs(x='value')
1229 >>> glom({'value': 5}, times_3)
1230 15
1232 :meth:`~Invoke.specs()` and other :class:`Invoke`
1233 methods may be called multiple times, just remember that every
1234 call returns a new spec.
1236 """
1237 ret = self.__class__(self.func)
1238 ret._args = self._args + ('S', a, kw)
1239 ret._cur_kwargs = dict(self._cur_kwargs)
1240 ret._cur_kwargs.update({k: kw for k, _ in kw.items()})
1241 return ret
1243 def star(self, args=None, kwargs=None):
1244 """Returns a new :class:`Invoke` spec, with *args* and/or *kwargs*
1245 specs set to be "starred" or "star-starred" (respectively)
1247 >>> spec = Invoke(zip).star(args='lists')
1248 >>> target = {'lists': [[1, 2], [3, 4], [5, 6]]}
1249 >>> list(glom(target, spec))
1250 [(1, 3, 5), (2, 4, 6)]
1252 Args:
1253 args (spec): A spec to be evaluated and "starred" into the
1254 underlying function.
1255 kwargs (spec): A spec to be evaluated and "star-starred" into
1256 the underlying function.
1258 One or both of the above arguments should be set.
1260 The :meth:`~Invoke.star()`, like other :class:`Invoke`
1261 methods, may be called multiple times. The *args* and *kwargs*
1262 will be stacked in the order in which they are provided.
1263 """
1264 if args is None and kwargs is None:
1265 raise TypeError('expected one or both of args/kwargs to be passed')
1266 ret = self.__class__(self.func)
1267 ret._args = self._args + ('*', args, kwargs)
1268 ret._cur_kwargs = dict(self._cur_kwargs)
1269 return ret
1271 def __repr__(self):
1272 base_fname = self.__class__.__name__
1273 fname_map = {'C': 'constants', 'S': 'specs', '*': 'star'}
1274 if type(self.func) is Spec:
1275 base_fname += '.specfunc'
1276 args = (self.func.spec,)
1277 else:
1278 args = (self.func,)
1279 chunks = [format_invocation(base_fname, args, repr=bbrepr)]
1281 for i in range(len(self._args) // 3):
1282 op, args, _kwargs = self._args[i * 3: i * 3 + 3]
1283 fname = fname_map[op]
1284 if op in ('C', 'S'):
1285 kwargs = [(k, v) for k, v in _kwargs.items()
1286 if self._cur_kwargs[k] is _kwargs]
1287 else:
1288 kwargs = {}
1289 if args:
1290 kwargs['args'] = args
1291 if _kwargs:
1292 kwargs['kwargs'] = _kwargs
1293 args = ()
1295 chunks.append('.' + format_invocation(fname, args, kwargs, repr=bbrepr))
1297 return ''.join(chunks)
1299 def glomit(self, target, scope):
1300 all_args = []
1301 all_kwargs = {}
1303 recurse = lambda spec: scope[glom](target, spec, scope)
1304 func = recurse(self.func) if _is_spec(self.func, strict=True) else self.func
1306 for i in range(len(self._args) // 3):
1307 op, args, kwargs = self._args[i * 3: i * 3 + 3]
1308 if op == 'C':
1309 all_args.extend(args)
1310 all_kwargs.update({k: v for k, v in kwargs.items()
1311 if self._cur_kwargs[k] is kwargs})
1312 elif op == 'S':
1313 all_args.extend([recurse(arg) for arg in args])
1314 all_kwargs.update({k: recurse(v) for k, v in kwargs.items()
1315 if self._cur_kwargs[k] is kwargs})
1316 elif op == '*':
1317 if args is not None:
1318 all_args.extend(recurse(args))
1319 if kwargs is not None:
1320 all_kwargs.update(recurse(kwargs))
1322 return func(*all_args, **all_kwargs)
1325class Ref(object):
1326 """Name a part of a spec and refer to it elsewhere in the same spec,
1327 useful for trees and other self-similar data structures.
1329 Args:
1330 name (str): The name of the spec to reference.
1331 subspec: Pass a spec to name it *name*, or leave unset to refer
1332 to an already-named spec.
1333 """
1334 def __init__(self, name, subspec=_MISSING):
1335 self.name, self.subspec = name, subspec
1337 def glomit(self, target, scope):
1338 subspec = self.subspec
1339 scope_key = (Ref, self.name)
1340 if subspec is _MISSING:
1341 subspec = scope[scope_key]
1342 else:
1343 scope[scope_key] = subspec
1344 return scope[glom](target, subspec, scope)
1346 def __repr__(self):
1347 if self.subspec is _MISSING:
1348 args = bbrepr(self.name)
1349 else:
1350 args = bbrepr((self.name, self.subspec))[1:-1]
1351 return "Ref(" + args + ")"
1354class TType(object):
1355 """``T``, short for "target". A singleton object that enables
1356 object-oriented expression of a glom specification.
1358 .. note::
1360 ``T`` is a singleton, and does not need to be constructed.
1362 Basically, think of ``T`` as your data's stunt double. Everything
1363 that you do to ``T`` will be recorded and executed during the
1364 :func:`glom` call. Take this example:
1366 >>> spec = T['a']['b']['c']
1367 >>> target = {'a': {'b': {'c': 'd'}}}
1368 >>> glom(target, spec)
1369 'd'
1371 So far, we've relied on the ``'a.b.c'``-style shorthand for
1372 access, or used the :class:`~glom.Path` objects, but if you want
1373 to explicitly do attribute and key lookups, look no further than
1374 ``T``.
1376 But T doesn't stop with unambiguous access. You can also call
1377 methods and perform almost any action you would with a normal
1378 object:
1380 >>> spec = ('a', (T['b'].items(), list)) # reviewed below
1381 >>> glom(target, spec)
1382 [('c', 'd')]
1384 A ``T`` object can go anywhere in the spec. As seen in the example
1385 above, we access ``'a'``, use a ``T`` to get ``'b'`` and iterate
1386 over its ``items``, turning them into a ``list``.
1388 You can even use ``T`` with :class:`~glom.Call` to construct objects:
1390 >>> class ExampleClass(object):
1391 ... def __init__(self, attr):
1392 ... self.attr = attr
1393 ...
1394 >>> target = {'attr': 3.14}
1395 >>> glom(target, Call(ExampleClass, kwargs=T)).attr
1396 3.14
1398 On a further note, while ``lambda`` works great in glom specs, and
1399 can be very handy at times, ``T`` and :class:`~glom.Call`
1400 eliminate the need for the vast majority of ``lambda`` usage with
1401 glom.
1403 Unlike ``lambda`` and other functions, ``T`` roundtrips
1404 beautifully and transparently:
1406 >>> T['a'].b['c']('success')
1407 T['a'].b['c']('success')
1409 ``T``-related access errors raise a :exc:`~glom.PathAccessError`
1410 during the :func:`~glom.glom` call.
1412 .. note::
1414 While ``T`` is clearly useful, powerful, and here to stay, its
1415 semantics are still being refined. Currently, operations beyond
1416 method calls and attribute/item access are considered
1417 experimental and should not be relied upon.
1419 .. note::
1421 ``T`` attributes starting with __ are reserved to avoid
1422 colliding with many built-in Python behaviors, current and
1423 future. The ``T.__()`` method is available for cases where
1424 they are needed. For example, ``T.__('class__')`` is
1425 equivalent to accessing the ``__class__`` attribute.
1427 """
1428 __slots__ = ('__ops__',)
1430 def __getattr__(self, name):
1431 if name.startswith('__'):
1432 raise AttributeError('T instances reserve dunder attributes.'
1433 ' To access the "{name}" attribute, use'
1434 ' T.__("{d_name}")'.format(name=name, d_name=name[2:]))
1435 return _t_child(self, '.', name)
1437 def __getitem__(self, item):
1438 return _t_child(self, '[', item)
1440 def __call__(self, *args, **kwargs):
1441 if self is S:
1442 if args:
1443 raise TypeError('S() takes no positional arguments, got: %r' % (args,))
1444 if not kwargs:
1445 raise TypeError('S() expected at least one kwarg, got none')
1446 # TODO: typecheck kwarg vals?
1447 return _t_child(self, '(', (args, kwargs))
1449 def __star__(self):
1450 return _t_child(self, 'x', None)
1452 def __starstar__(self):
1453 return _t_child(self, 'X', None)
1455 def __stars__(self):
1456 """how many times the result will be wrapped in extra lists"""
1457 t_ops = self.__ops__[1::2]
1458 return t_ops.count('x') + t_ops.count('X')
1460 def __add__(self, arg):
1461 return _t_child(self, '+', arg)
1463 def __sub__(self, arg):
1464 return _t_child(self, '-', arg)
1466 def __mul__(self, arg):
1467 return _t_child(self, '*', arg)
1469 def __floordiv__(self, arg):
1470 return _t_child(self, '#', arg)
1472 def __truediv__(self, arg):
1473 return _t_child(self, '/', arg)
1475 __div__ = __truediv__
1477 def __mod__(self, arg):
1478 return _t_child(self, '%', arg)
1480 def __pow__(self, arg):
1481 return _t_child(self, ':', arg)
1483 def __and__(self, arg):
1484 return _t_child(self, '&', arg)
1486 def __or__(self, arg):
1487 return _t_child(self, '|', arg)
1489 def __xor__(self, arg):
1490 return _t_child(self, '^', arg)
1492 def __invert__(self):
1493 return _t_child(self, '~', None)
1495 def __neg__(self):
1496 return _t_child(self, '_', None)
1498 def __(self, name):
1499 return _t_child(self, '.', '__' + name)
1501 def __repr__(self):
1502 t_path = self.__ops__
1503 return _format_t(t_path[1:], t_path[0])
1505 def __getstate__(self):
1506 t_path = self.__ops__
1507 return tuple(({T: 'T', S: 'S', A: 'A'}[t_path[0]],) + t_path[1:])
1509 def __setstate__(self, state):
1510 self.__ops__ = ({'T': T, 'S': S, 'A': A}[state[0]],) + state[1:]
1513def _t_child(parent, operation, arg):
1514 base = parent.__ops__
1515 if base[0] is A and operation not in ('.', '[', 'P'):
1516 # whitelist rather than blacklist assignment friendly operations
1517 # TODO: error type?
1518 raise BadSpec("operation not allowed on A assignment path")
1519 t = TType()
1520 t.__ops__ = base + (operation, arg)
1521 return t
1524def _s_first_magic(scope, key, _t):
1525 """
1526 enable S.a to do S['a'] or S['a'].val as a special
1527 case for accessing user defined string variables
1528 """
1529 err = None
1530 try:
1531 cur = scope[key]
1532 except KeyError as e:
1533 err = PathAccessError(e, Path(_t), 0) # always only one level depth, hence 0
1534 if err:
1535 raise err
1536 return cur
1539def _t_eval(target, _t, scope):
1540 t_path = _t.__ops__
1541 i = 1
1542 fetch_till = len(t_path)
1543 root = t_path[0]
1544 if root is T:
1545 cur = target
1546 elif root is S or root is A:
1547 # A is basically the same as S, but last step is assign
1548 if root is A:
1549 fetch_till -= 2
1550 if fetch_till < 1:
1551 raise BadSpec('cannot assign without destination')
1552 cur = scope
1553 if fetch_till > 1 and t_path[1] in ('.', 'P'):
1554 cur = _s_first_magic(cur, t_path[2], _t)
1555 i += 2
1556 elif root is S and fetch_till > 1 and t_path[1] == '(':
1557 # S(var='spec') style assignment
1558 _, kwargs = t_path[2]
1559 scope.update({
1560 k: arg_val(target, v, scope) for k, v in kwargs.items()})
1561 return target
1563 else:
1564 raise ValueError('TType instance with invalid root') # pragma: no cover
1565 pae = None
1566 while i < fetch_till:
1567 op, arg = t_path[i], t_path[i + 1]
1568 arg = arg_val(target, arg, scope)
1569 if op == '.':
1570 try:
1571 cur = getattr(cur, arg)
1572 except AttributeError as e:
1573 pae = PathAccessError(e, Path(_t), i // 2)
1574 elif op == '[':
1575 try:
1576 cur = cur[arg]
1577 except (KeyError, IndexError, TypeError) as e:
1578 pae = PathAccessError(e, Path(_t), i // 2)
1579 elif op == 'P':
1580 # Path type stuff (fuzzy match)
1581 get = scope[TargetRegistry].get_handler('get', cur, path=t_path[2:i+2:2])
1582 try:
1583 cur = get(cur, arg)
1584 except Exception as e:
1585 pae = PathAccessError(e, Path(_t), i // 2)
1586 elif op in 'xX':
1587 nxt = []
1588 get_handler = scope[TargetRegistry].get_handler
1589 if op == 'x': # increases arity of cur each time through
1590 # TODO: so many try/except -- could scope[TargetRegistry] stuff be cached on type?
1591 _extend_children(nxt, cur, get_handler)
1592 elif op == 'X':
1593 sofar = set()
1594 _extend_children(nxt, cur, get_handler)
1595 for item in nxt:
1596 if id(item) not in sofar:
1597 sofar.add(id(item))
1598 _extend_children(nxt, item, get_handler)
1599 nxt.insert(0, cur)
1600 # handle the rest of the t_path in recursive calls
1601 cur = []
1602 todo = TType()
1603 todo.__ops__ = (root,) + t_path[i+2:]
1604 for child in nxt:
1605 try:
1606 cur.append(_t_eval(child, todo, scope))
1607 except PathAccessError:
1608 pass
1609 break # we handled the rest in recursive call, break loop
1610 elif op == '(':
1611 args, kwargs = arg
1612 scope[Path] += t_path[2:i+2:2]
1613 cur = scope[glom](
1614 target, Call(cur, args, kwargs), scope)
1615 # call with target rather than cur,
1616 # because it is probably more intuitive
1617 # if args to the call "reset" their path
1618 # e.g. "T.a" should mean the same thing
1619 # in both of these specs: T.a and T.b(T.a)
1620 else: # arithmetic operators
1621 try:
1622 if op == '+':
1623 cur = cur + arg
1624 elif op == '-':
1625 cur = cur - arg
1626 elif op == '*':
1627 cur = cur * arg
1628 #elif op == '#':
1629 # cur = cur // arg # TODO: python 2 friendly approach?
1630 elif op == '/':
1631 cur = cur / arg
1632 elif op == '%':
1633 cur = cur % arg
1634 elif op == ':':
1635 cur = cur ** arg
1636 elif op == '&':
1637 cur = cur & arg
1638 elif op == '|':
1639 cur = cur | arg
1640 elif op == '^':
1641 cur = cur ^ arg
1642 elif op == '~':
1643 cur = ~cur
1644 elif op == '_':
1645 cur = -cur
1646 except (TypeError, ZeroDivisionError) as e:
1647 pae = PathAccessError(e, Path(_t), i // 2)
1648 if pae:
1649 raise pae
1650 i += 2
1651 if root is A:
1652 op, arg = t_path[-2:]
1653 if cur is scope:
1654 op = '[' # all assignment on scope is setitem
1655 _assign_op(dest=cur, op=op, arg=arg, val=target, path=_t, scope=scope)
1656 return target # A should not change the target
1657 return cur
1660def _assign_op(dest, op, arg, val, path, scope):
1661 """helper method for doing the assignment on a T operation"""
1662 if op == '[':
1663 dest[arg] = val
1664 elif op == '.':
1665 setattr(dest, arg, val)
1666 elif op == 'P':
1667 _assign = scope[TargetRegistry].get_handler('assign', dest)
1668 try:
1669 _assign(dest, arg, val)
1670 except Exception as e:
1671 raise PathAssignError(e, path, arg)
1672 else: # pragma: no cover
1673 raise ValueError('unsupported T operation for assignment')
1676def _extend_children(children, item, get_handler):
1677 try: # dict or obj-like
1678 keys = get_handler('keys', item)
1679 get = get_handler('get', item)
1680 except UnregisteredTarget:
1681 try:
1682 iterate = get_handler('iterate', item)
1683 except UnregisteredTarget:
1684 pass
1685 else:
1686 try: # list-like
1687 children.extend(iterate(item))
1688 except Exception:
1689 pass
1690 else:
1691 try:
1692 for key in keys(item):
1693 try:
1694 children.append(get(item, key))
1695 except Exception:
1696 pass
1697 except Exception:
1698 pass
1701T = TType() # target aka Mr. T aka "this"
1702S = TType() # like T, but means grab stuff from Scope, not Target
1703A = TType() # like S, but shorthand to assign target to scope
1705T.__ops__ = (T,)
1706S.__ops__ = (S,)
1707A.__ops__ = (A,)
1709_T_STAR = T.__star__() # helper constant for Path.from_text
1710_T_STARSTAR = T.__starstar__() # helper constant for Path.from_text
1712UP = make_sentinel('UP')
1713ROOT = make_sentinel('ROOT')
1716def _format_slice(x):
1717 if type(x) is not slice:
1718 return bbrepr(x)
1719 fmt = lambda v: "" if v is None else bbrepr(v)
1720 if x.step is None:
1721 return fmt(x.start) + ":" + fmt(x.stop)
1722 return fmt(x.start) + ":" + fmt(x.stop) + ":" + fmt(x.step)
1725def _format_t(path, root=T):
1726 prepr = [{T: 'T', S: 'S', A: 'A'}[root]]
1727 i = 0
1728 while i < len(path):
1729 op, arg = path[i], path[i + 1]
1730 if op == '.':
1731 prepr.append('.' + arg)
1732 elif op == '[':
1733 if type(arg) is tuple:
1734 index = ", ".join([_format_slice(x) for x in arg])
1735 else:
1736 index = _format_slice(arg)
1737 prepr.append("[%s]" % (index,))
1738 elif op == '(':
1739 args, kwargs = arg
1740 prepr.append(format_invocation(args=args, kwargs=kwargs, repr=bbrepr))
1741 elif op == 'P':
1742 return _format_path(path)
1743 elif op == 'x':
1744 prepr.append(".__star__()")
1745 elif op == 'X':
1746 prepr.append(".__starstar__()")
1747 elif op in ('_', '~'): # unary arithmetic operators
1748 if any([o in path[:i] for o in '+-/%:&|^~_']):
1749 prepr = ['('] + prepr + [')']
1750 prepr = ['-' if op == '_' else op] + prepr
1751 else: # binary arithmetic operators
1752 formatted_arg = bbrepr(arg)
1753 if type(arg) is TType:
1754 arg_path = arg.__ops__
1755 if any([o in arg_path for o in '+-/%:&|^~_']):
1756 formatted_arg = '(' + formatted_arg + ')'
1757 prepr.append(' ' + ('**' if op == ':' else op) + ' ')
1758 prepr.append(formatted_arg)
1759 i += 2
1760 return "".join(prepr)
1763class Val(object):
1764 """Val objects are specs which evaluate to the wrapped *value*.
1766 >>> target = {'a': {'b': 'c'}}
1767 >>> spec = {'a': 'a.b', 'readability': Val('counts')}
1768 >>> pprint(glom(target, spec))
1769 {'a': 'c', 'readability': 'counts'}
1771 Instead of accessing ``'counts'`` as a key like it did with
1772 ``'a.b'``, :func:`~glom.glom` just unwrapped the Val and
1773 included the value.
1775 :class:`~glom.Val` takes one argument, the value to be returned.
1777 .. note::
1779 :class:`Val` was named ``Literal`` in versions of glom before
1780 20.7.0. An alias has been preserved for backwards
1781 compatibility, but reprs have changed.
1783 """
1784 def __init__(self, value):
1785 self.value = value
1787 def glomit(self, target, scope):
1788 return self.value
1790 def __repr__(self):
1791 cn = self.__class__.__name__
1792 return '%s(%s)' % (cn, bbrepr(self.value))
1795Literal = Val # backwards compat for pre-20.7.0
1798class ScopeVars(object):
1799 """This is the runtime partner of :class:`Vars` -- this is what
1800 actually lives in the scope and stores runtime values.
1802 While not part of the importable API of glom, it's half expected
1803 that some folks may write sepcs to populate and export scopes, at
1804 which point this type makes it easy to access values by attribute
1805 access or by converting to a dict.
1807 """
1808 def __init__(self, base, defaults):
1809 self.__dict__ = dict(base)
1810 self.__dict__.update(defaults)
1812 def __iter__(self):
1813 return iter(self.__dict__.items())
1815 def __repr__(self):
1816 return "%s(%s)" % (self.__class__.__name__, bbrepr(self.__dict__))
1819class Vars(object):
1820 """
1821 :class:`Vars` is a helper that can be used with **S** in order to
1822 store shared mutable state.
1824 Takes the same arguments as :class:`dict()`.
1826 Arguments here should be thought of the same way as default arguments
1827 to a function. Each time the spec is evaluated, the same arguments
1828 will be referenced; so, think carefully about mutable data structures.
1829 """
1830 def __init__(self, base=(), **kw):
1831 dict(base) # ensure it is a dict-compatible first arg
1832 self.base = base
1833 self.defaults = kw
1835 def glomit(self, target, spec):
1836 return ScopeVars(self.base, self.defaults)
1838 def __repr__(self):
1839 ret = format_invocation(self.__class__.__name__,
1840 args=(self.base,) if self.base else (),
1841 kwargs=self.defaults,
1842 repr=bbrepr)
1843 return ret
1846class Let(object):
1847 """
1848 Deprecated, kept for backwards compat. Use S(x='y') instead.
1850 >>> target = {'data': {'val': 9}}
1851 >>> spec = (Let(value=T['data']['val']), {'val': S['value']})
1852 >>> glom(target, spec)
1853 {'val': 9}
1855 """
1856 def __init__(self, **kw):
1857 if not kw:
1858 raise TypeError('expected at least one keyword argument')
1859 self._binding = kw
1861 def glomit(self, target, scope):
1862 scope.update({
1863 k: scope[glom](target, v, scope) for k, v in self._binding.items()})
1864 return target
1866 def __repr__(self):
1867 cn = self.__class__.__name__
1868 return format_invocation(cn, kwargs=self._binding, repr=bbrepr)
1871class Auto(object):
1872 """
1873 Switch to Auto mode (the default)
1875 TODO: this seems like it should be a sub-class of class Spec() --
1876 if Spec() could help define the interface for new "modes" or dialects
1877 that would also help make match mode feel less duct-taped on
1878 """
1879 def __init__(self, spec=None):
1880 self.spec = spec
1882 def glomit(self, target, scope):
1883 scope[MODE] = AUTO
1884 return scope[glom](target, self.spec, scope)
1886 def __repr__(self):
1887 cn = self.__class__.__name__
1888 rpr = '' if self.spec is None else bbrepr(self.spec)
1889 return '%s(%s)' % (cn, rpr)
1892class _AbstractIterable(_AbstractIterableBase):
1893 __metaclass__ = ABCMeta
1894 @classmethod
1895 def __subclasshook__(cls, C):
1896 if C in (str, bytes):
1897 return False
1898 return callable(getattr(C, "__iter__", None))
1901class _ObjStyleKeysMeta(type):
1902 def __instancecheck__(cls, C):
1903 return hasattr(C, "__dict__") and hasattr(C.__dict__, "keys")
1906class _ObjStyleKeys(_ObjStyleKeysMeta('_AbstractKeys', (object,), {})):
1907 __metaclass__ = _ObjStyleKeysMeta
1909 @staticmethod
1910 def get_keys(obj):
1911 ret = obj.__dict__.keys()
1912 return ret
1915def _get_sequence_item(target, index):
1916 return target[int(index)]
1919# handlers are 3-arg callables, with args (spec, target, scope)
1920# spec is the first argument for convenience in the case
1921# that the handler is a method of the spec type
1922def _handle_dict(target, spec, scope):
1923 ret = type(spec)() # TODO: works for dict + ordereddict, but sufficient for all?
1924 for field, subspec in spec.items():
1925 val = scope[glom](target, subspec, scope)
1926 if val is SKIP:
1927 continue
1928 if type(field) in (Spec, TType):
1929 field = scope[glom](target, field, scope)
1930 ret[field] = val
1931 return ret
1934def _handle_list(target, spec, scope):
1935 subspec = spec[0]
1936 iterate = scope[TargetRegistry].get_handler('iterate', target, path=scope[Path])
1937 try:
1938 iterator = iterate(target)
1939 except Exception as e:
1940 raise TypeError('failed to iterate on instance of type %r at %r (got %r)'
1941 % (target.__class__.__name__, Path(*scope[Path]), e))
1942 ret = []
1943 base_path = scope[Path]
1944 for i, t in enumerate(iterator):
1945 scope[Path] = base_path + [i]
1946 val = scope[glom](t, subspec, scope)
1947 if val is SKIP:
1948 continue
1949 if val is STOP:
1950 break
1951 ret.append(val)
1952 return ret
1955def _handle_tuple(target, spec, scope):
1956 res = target
1957 for subspec in spec:
1958 scope = chain_child(scope)
1959 nxt = scope[glom](res, subspec, scope)
1960 if nxt is SKIP:
1961 continue
1962 if nxt is STOP:
1963 break
1964 res = nxt
1965 if not isinstance(subspec, list):
1966 scope[Path] += [getattr(subspec, '__name__', subspec)]
1967 return res
1970class Pipe(object):
1971 """Evaluate specs one after the other, passing the result of
1972 the previous evaluation in as the target of the next spec:
1974 >>> glom({'a': {'b': -5}}, Pipe('a', 'b', abs))
1975 5
1977 Same behavior as ``Auto(tuple(steps))``, but useful for explicit
1978 usage in other modes.
1979 """
1980 def __init__(self, *steps):
1981 self.steps = steps
1983 def glomit(self, target, scope):
1984 return _handle_tuple(target, self.steps, scope)
1986 def __repr__(self):
1987 return self.__class__.__name__ + bbrepr(self.steps)
1990class TargetRegistry(object):
1991 '''
1992 responsible for registration of target types for iteration
1993 and attribute walking
1994 '''
1995 def __init__(self, register_default_types=True):
1996 self._op_type_map = {}
1997 self._op_type_tree = {} # see _register_fuzzy_type for details
1998 self._type_cache = {}
2000 self._op_auto_map = OrderedDict() # op name to function that returns handler function
2002 self._register_builtin_ops()
2004 if register_default_types:
2005 self._register_default_types()
2006 return
2008 def get_handler(self, op, obj, path=None, raise_exc=True):
2009 """for an operation and object **instance**, obj, return the
2010 closest-matching handler function, raising UnregisteredTarget
2011 if no handler can be found for *obj* (or False if
2012 raise_exc=False)
2014 """
2015 ret = False
2016 obj_type = type(obj)
2017 cache_key = (obj_type, op)
2018 if cache_key not in self._type_cache:
2019 type_map = self.get_type_map(op)
2020 if type_map:
2021 try:
2022 ret = type_map[obj_type]
2023 except KeyError:
2024 type_tree = self._op_type_tree.get(op, {})
2025 closest = self._get_closest_type(obj, type_tree=type_tree)
2026 if closest is None:
2027 ret = False
2028 else:
2029 ret = type_map[closest]
2031 if ret is False and raise_exc:
2032 raise UnregisteredTarget(op, obj_type, type_map=type_map, path=path)
2034 self._type_cache[cache_key] = ret
2035 return self._type_cache[cache_key]
2037 def get_type_map(self, op):
2038 try:
2039 return self._op_type_map[op]
2040 except KeyError:
2041 return OrderedDict()
2043 def _get_closest_type(self, obj, type_tree):
2044 default = None
2045 for cur_type, sub_tree in type_tree.items():
2046 if isinstance(obj, cur_type):
2047 sub_type = self._get_closest_type(obj, type_tree=sub_tree)
2048 ret = cur_type if sub_type is None else sub_type
2049 return ret
2050 return default
2052 def _register_default_types(self):
2053 self.register(object)
2054 self.register(dict, get=operator.getitem)
2055 self.register(dict, keys=dict.keys)
2056 self.register(list, get=_get_sequence_item)
2057 self.register(tuple, get=_get_sequence_item)
2058 self.register(OrderedDict, get=operator.getitem)
2059 self.register(OrderedDict, keys=OrderedDict.keys)
2060 self.register(_AbstractIterable, iterate=iter)
2061 self.register(_ObjStyleKeys, keys=_ObjStyleKeys.get_keys)
2063 def _register_fuzzy_type(self, op, new_type, _type_tree=None):
2064 """Build a "type tree", an OrderedDict mapping registered types to
2065 their subtypes
2067 The type tree's invariant is that a key in the mapping is a
2068 valid parent type of all its children.
2070 Order is preserved such that non-overlapping parts of the
2071 subtree take precedence by which was most recently added.
2072 """
2073 if _type_tree is None:
2074 try:
2075 _type_tree = self._op_type_tree[op]
2076 except KeyError:
2077 _type_tree = self._op_type_tree[op] = OrderedDict()
2079 registered = False
2080 for cur_type, sub_tree in list(_type_tree.items()):
2081 if issubclass(cur_type, new_type):
2082 sub_tree = _type_tree.pop(cur_type) # mutation for recursion brevity
2083 try:
2084 _type_tree[new_type][cur_type] = sub_tree
2085 except KeyError:
2086 _type_tree[new_type] = OrderedDict({cur_type: sub_tree})
2087 registered = True
2088 elif issubclass(new_type, cur_type):
2089 _type_tree[cur_type] = self._register_fuzzy_type(op, new_type, _type_tree=sub_tree)
2090 registered = True
2091 if not registered:
2092 _type_tree[new_type] = OrderedDict()
2093 return _type_tree
2095 def register(self, target_type, **kwargs):
2096 if not isinstance(target_type, type):
2097 raise TypeError('register expected a type, not an instance: %r' % (target_type,))
2098 exact = kwargs.pop('exact', None)
2099 new_op_map = dict(kwargs)
2101 for op_name in sorted(set(self._op_auto_map.keys()) | set(new_op_map.keys())):
2102 cur_type_map = self._op_type_map.setdefault(op_name, OrderedDict())
2104 if op_name in new_op_map:
2105 handler = new_op_map[op_name]
2106 elif target_type in cur_type_map:
2107 handler = cur_type_map[target_type]
2108 else:
2109 try:
2110 handler = self._op_auto_map[op_name](target_type)
2111 except Exception as e:
2112 raise TypeError('error while determining support for operation'
2113 ' "%s" on target type: %s (got %r)'
2114 % (op_name, target_type.__name__, e))
2115 if handler is not False and not callable(handler):
2116 raise TypeError('expected handler for op "%s" to be'
2117 ' callable or False, not: %r' % (op_name, handler))
2118 new_op_map[op_name] = handler
2120 for op_name, handler in new_op_map.items():
2121 self._op_type_map[op_name][target_type] = handler
2123 if not exact:
2124 for op_name in new_op_map:
2125 self._register_fuzzy_type(op_name, target_type)
2127 self._type_cache = {} # reset type cache
2129 return
2131 def register_op(self, op_name, auto_func=None, exact=False):
2132 """add operations beyond the builtins ('get' and 'iterate' at the time
2133 of writing).
2135 auto_func is a function that when passed a type, returns a
2136 handler associated with op_name if it's supported, or False if
2137 it's not.
2139 See glom.core.register_op() for the global version used by
2140 extensions.
2141 """
2142 if not isinstance(op_name, basestring):
2143 raise TypeError('expected op_name to be a text name, not: %r' % (op_name,))
2144 if auto_func is None:
2145 auto_func = lambda t: False
2146 elif not callable(auto_func):
2147 raise TypeError('expected auto_func to be callable, not: %r' % (auto_func,))
2149 # determine support for any previously known types
2150 known_types = set(sum([list(m.keys()) for m
2151 in self._op_type_map.values()], []))
2152 type_map = self._op_type_map.get(op_name, OrderedDict())
2153 type_tree = self._op_type_tree.get(op_name, OrderedDict())
2154 for t in sorted(known_types, key=lambda t: t.__name__):
2155 if t in type_map:
2156 continue
2157 try:
2158 handler = auto_func(t)
2159 except Exception as e:
2160 raise TypeError('error while determining support for operation'
2161 ' "%s" on target type: %s (got %r)'
2162 % (op_name, t.__name__, e))
2163 if handler is not False and not callable(handler):
2164 raise TypeError('expected handler for op "%s" to be'
2165 ' callable or False, not: %r' % (op_name, handler))
2166 type_map[t] = handler
2168 if not exact:
2169 for t in known_types:
2170 self._register_fuzzy_type(op_name, t, _type_tree=type_tree)
2172 self._op_type_map[op_name] = type_map
2173 self._op_type_tree[op_name] = type_tree
2174 self._op_auto_map[op_name] = auto_func
2176 def _register_builtin_ops(self):
2177 def _get_iterable_handler(type_obj):
2178 return iter if callable(getattr(type_obj, '__iter__', None)) else False
2180 self.register_op('iterate', _get_iterable_handler)
2181 self.register_op('get', lambda _: getattr)
2184_DEFAULT_SCOPE = ChainMap({})
2187def glom(target, spec, **kwargs):
2188 """Access or construct a value from a given *target* based on the
2189 specification declared by *spec*.
2191 Accessing nested data, aka deep-get:
2193 >>> target = {'a': {'b': 'c'}}
2194 >>> glom(target, 'a.b')
2195 'c'
2197 Here the *spec* was just a string denoting a path,
2198 ``'a.b'``. As simple as it should be. You can also use
2199 :mod:`glob`-like wildcard selectors:
2201 >>> target = {'a': [{'k': 'v1'}, {'k': 'v2'}]}
2202 >>> glom(target, 'a.*.k')
2203 ['v1', 'v2']
2205 In addition to ``*``, you can also use ``**`` for recursive access:
2207 >>> target = {'a': [{'k': 'v3'}, {'k': 'v4'}], 'k': 'v0'}
2208 >>> glom(target, '**.k')
2209 ['v0', 'v3', 'v4']
2211 The next example shows how to use nested data to
2212 access many fields at once, and make a new nested structure.
2214 Constructing, or restructuring more-complicated nested data:
2216 >>> target = {'a': {'b': 'c', 'd': 'e'}, 'f': 'g', 'h': [0, 1, 2]}
2217 >>> spec = {'a': 'a.b', 'd': 'a.d', 'h': ('h', [lambda x: x * 2])}
2218 >>> output = glom(target, spec)
2219 >>> pprint(output)
2220 {'a': 'c', 'd': 'e', 'h': [0, 2, 4]}
2222 ``glom`` also takes a keyword-argument, *default*. When set,
2223 if a ``glom`` operation fails with a :exc:`GlomError`, the
2224 *default* will be returned, very much like
2225 :meth:`dict.get()`:
2227 >>> glom(target, 'a.xx', default='nada')
2228 'nada'
2230 The *skip_exc* keyword argument controls which errors should
2231 be ignored.
2233 >>> glom({}, lambda x: 100.0 / len(x), default=0.0, skip_exc=ZeroDivisionError)
2234 0.0
2236 Args:
2237 target (object): the object on which the glom will operate.
2238 spec (object): Specification of the output object in the form
2239 of a dict, list, tuple, string, other glom construct, or
2240 any composition of these.
2241 default (object): An optional default to return in the case
2242 an exception, specified by *skip_exc*, is raised.
2243 skip_exc (Exception): An optional exception or tuple of
2244 exceptions to ignore and return *default* (None if
2245 omitted). If *skip_exc* and *default* are both not set,
2246 glom raises errors through.
2247 scope (dict): Additional data that can be accessed
2248 via S inside the glom-spec. Read more: :ref:`scope`.
2250 It's a small API with big functionality, and glom's power is
2251 only surpassed by its intuitiveness. Give it a whirl!
2253 """
2254 # TODO: check spec up front
2255 default = kwargs.pop('default', None if 'skip_exc' in kwargs else _MISSING)
2256 skip_exc = kwargs.pop('skip_exc', () if default is _MISSING else GlomError)
2257 glom_debug = kwargs.pop('glom_debug', GLOM_DEBUG)
2258 scope = _DEFAULT_SCOPE.new_child({
2259 Path: kwargs.pop('path', []),
2260 Inspect: kwargs.pop('inspector', None),
2261 MODE: AUTO,
2262 MIN_MODE: None,
2263 CHILD_ERRORS: [],
2264 'globals': ScopeVars({}, {}),
2265 })
2266 scope[UP] = scope
2267 scope[ROOT] = scope
2268 scope[T] = target
2269 scope.update(kwargs.pop('scope', {}))
2270 err = None
2271 if kwargs:
2272 raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys()))
2273 try:
2274 try:
2275 ret = _glom(target, spec, scope)
2276 except skip_exc:
2277 if default is _MISSING:
2278 raise
2279 ret = default # should this also be arg_val'd?
2280 except Exception as e:
2281 if glom_debug:
2282 raise
2283 if isinstance(e, GlomError):
2284 # need to change id or else py3 seems to not let us truncate the
2285 # stack trace with the explicit "raise err" below
2286 err = copy.copy(e)
2287 err._set_wrapped(e)
2288 else:
2289 err = GlomError.wrap(e)
2290 if isinstance(err, GlomError):
2291 err._finalize(scope[LAST_CHILD_SCOPE])
2292 else: # wrapping failed, fall back to default behavior
2293 raise
2295 if err:
2296 raise err
2297 return ret
2300def chain_child(scope):
2301 """
2302 used for specs like Auto(tuple), Switch(), etc
2303 that want to chain their child scopes together
2305 returns a new scope that can be passed to
2306 the next recursive glom call, e.g.
2308 scope[glom](target, spec, chain_child(scope))
2309 """
2310 if LAST_CHILD_SCOPE not in scope.maps[0]:
2311 return scope # no children yet, nothing to do
2312 # NOTE: an option here is to drill down on LAST_CHILD_SCOPE;
2313 # this would have some interesting consequences for scoping
2314 # of tuples
2315 nxt_in_chain = scope[LAST_CHILD_SCOPE]
2316 nxt_in_chain.maps[0][NO_PYFRAME] = True
2317 # previous failed branches are forgiven as the
2318 # scope is re-wired into a new stack
2319 del nxt_in_chain.maps[0][CHILD_ERRORS][:]
2320 return nxt_in_chain
2323unbound_methods = set([type(str.__len__)]) #, type(Ref.glomit)])
2326def _has_callable_glomit(obj):
2327 glomit = getattr(obj, 'glomit', None)
2328 return callable(glomit) and not isinstance(obj, type)
2331def _glom(target, spec, scope):
2332 parent = scope
2333 pmap = parent.maps[0]
2334 scope = scope.new_child({
2335 T: target,
2336 Spec: spec,
2337 UP: parent,
2338 CHILD_ERRORS: [],
2339 MODE: pmap[MODE],
2340 MIN_MODE: pmap[MIN_MODE],
2341 })
2342 pmap[LAST_CHILD_SCOPE] = scope
2344 try:
2345 if type(spec) is TType: # must go first, due to callability
2346 scope[MIN_MODE] = None # None is tombstone
2347 return _t_eval(target, spec, scope)
2348 elif _has_callable_glomit(spec):
2349 scope[MIN_MODE] = None
2350 return spec.glomit(target, scope)
2352 return (scope.maps[0][MIN_MODE] or scope.maps[0][MODE])(target, spec, scope)
2353 except Exception as e:
2354 scope.maps[1][CHILD_ERRORS].append(scope)
2355 scope.maps[0][CUR_ERROR] = e
2356 if NO_PYFRAME in scope.maps[1]:
2357 cur_scope = scope[UP]
2358 while NO_PYFRAME in cur_scope.maps[0]:
2359 cur_scope.maps[1][CHILD_ERRORS].append(cur_scope)
2360 cur_scope.maps[0][CUR_ERROR] = e
2361 cur_scope = cur_scope[UP]
2362 raise
2365def AUTO(target, spec, scope):
2366 if type(spec) is str: # shortcut to make deep-get use case faster
2367 return _t_eval(target, Path.from_text(spec).path_t, scope)
2368 if isinstance(spec, dict):
2369 return _handle_dict(target, spec, scope)
2370 elif isinstance(spec, list):
2371 return _handle_list(target, spec, scope)
2372 elif isinstance(spec, tuple):
2373 return _handle_tuple(target, spec, scope)
2374 elif isinstance(spec, basestring):
2375 return Path.from_text(spec).glomit(target, scope)
2376 elif callable(spec):
2377 return spec(target)
2379 raise TypeError('expected spec to be dict, list, tuple, callable, string,'
2380 ' or other Spec-like type, not: %r' % (spec,))
2383_DEFAULT_SCOPE.update({
2384 glom: _glom,
2385 TargetRegistry: TargetRegistry(register_default_types=True),
2386})
2389def register(target_type, **kwargs):
2390 """Register *target_type* so :meth:`~Glommer.glom()` will
2391 know how to handle instances of that type as targets.
2393 Here's an example of adding basic iterabile support for Django's ORM:
2395 .. code-block:: python
2397 import glom
2398 import django.db.models
2400 glom.register(django.db.models.Manager, iterate=lambda m: m.all())
2401 glom.register(django.db.models.QuerySet, iterate=lambda qs: qs.all())
2405 Args:
2406 target_type (type): A type expected to appear in a glom()
2407 call target
2408 get (callable): A function which takes a target object and
2409 a name, acting as a default accessor. Defaults to
2410 :func:`getattr`.
2411 iterate (callable): A function which takes a target object
2412 and returns an iterator. Defaults to :func:`iter` if
2413 *target_type* appears to be iterable.
2414 exact (bool): Whether or not to match instances of subtypes
2415 of *target_type*.
2417 .. note::
2419 The module-level :func:`register()` function affects the
2420 module-level :func:`glom()` function's behavior. If this
2421 global effect is undesirable for your application, or
2422 you're implementing a library, consider instantiating a
2423 :class:`Glommer` instance, and using the
2424 :meth:`~Glommer.register()` and :meth:`Glommer.glom()`
2425 methods instead.
2427 """
2428 _DEFAULT_SCOPE[TargetRegistry].register(target_type, **kwargs)
2429 return
2432def register_op(op_name, **kwargs):
2433 """For extension authors needing to add operations beyond the builtin
2434 'get', 'iterate', 'keys', 'assign', and 'delete' to the default scope.
2435 See TargetRegistry for more details.
2436 """
2437 _DEFAULT_SCOPE[TargetRegistry].register_op(op_name, **kwargs)
2438 return
2441class Glommer(object):
2442 """The :class:`Glommer` type mostly serves to encapsulate type
2443 registration context so that advanced uses of glom don't need to
2444 worry about stepping on each other.
2446 Glommer objects are lightweight and, once instantiated, provide
2447 a :func:`glom()` method:
2449 >>> glommer = Glommer()
2450 >>> glommer.glom({}, 'a.b.c', default='d')
2451 'd'
2452 >>> Glommer().glom({'vals': list(range(3))}, ('vals', len))
2453 3
2455 Instances also provide :meth:`~Glommer.register()` method for
2456 localized control over type handling.
2458 Args:
2459 register_default_types (bool): Whether or not to enable the
2460 handling behaviors of the default :func:`glom()`. These
2461 default actions include dict access, list and iterable
2462 iteration, and generic object attribute access. Defaults to
2463 True.
2465 """
2466 def __init__(self, **kwargs):
2467 register_default_types = kwargs.pop('register_default_types', True)
2468 scope = kwargs.pop('scope', _DEFAULT_SCOPE)
2470 # this "freezes" the scope in at the time of construction
2471 self.scope = ChainMap(dict(scope))
2472 self.scope[TargetRegistry] = TargetRegistry(register_default_types=register_default_types)
2474 def register(self, target_type, **kwargs):
2475 """Register *target_type* so :meth:`~Glommer.glom()` will
2476 know how to handle instances of that type as targets.
2478 Args:
2479 target_type (type): A type expected to appear in a glom()
2480 call target
2481 get (callable): A function which takes a target object and
2482 a name, acting as a default accessor. Defaults to
2483 :func:`getattr`.
2484 iterate (callable): A function which takes a target object
2485 and returns an iterator. Defaults to :func:`iter` if
2486 *target_type* appears to be iterable.
2487 exact (bool): Whether or not to match instances of subtypes
2488 of *target_type*.
2490 .. note::
2492 The module-level :func:`register()` function affects the
2493 module-level :func:`glom()` function's behavior. If this
2494 global effect is undesirable for your application, or
2495 you're implementing a library, consider instantiating a
2496 :class:`Glommer` instance, and using the
2497 :meth:`~Glommer.register()` and :meth:`Glommer.glom()`
2498 methods instead.
2500 """
2501 exact = kwargs.pop('exact', False)
2502 self.scope[TargetRegistry].register(target_type, exact=exact, **kwargs)
2503 return
2505 def glom(self, target, spec, **kwargs):
2506 return glom(target, spec, scope=self.scope, **kwargs)
2509class Fill(object):
2510 """A specifier type which switches to glom into "fill-mode". For the
2511 spec contained within the Fill, glom will only interpret explicit
2512 specifier types (including T objects). Whereas the default mode
2513 has special interpretations for each of these builtins, fill-mode
2514 takes a lighter touch, making Fill great for "filling out" Python
2515 literals, like tuples, dicts, sets, and lists.
2517 >>> target = {'data': [0, 2, 4]}
2518 >>> spec = Fill((T['data'][2], T['data'][0]))
2519 >>> glom(target, spec)
2520 (4, 0)
2522 As you can see, glom's usual built-in tuple item chaining behavior
2523 has switched into a simple tuple constructor.
2525 (Sidenote for Lisp fans: Fill is like glom's quasi-quoting.)
2527 """
2528 def __init__(self, spec=None):
2529 self.spec = spec
2531 def glomit(self, target, scope):
2532 scope[MODE] = FILL
2533 return scope[glom](target, self.spec, scope)
2535 def fill(self, target):
2536 return glom(target, self)
2538 def __repr__(self):
2539 cn = self.__class__.__name__
2540 rpr = '' if self.spec is None else bbrepr(self.spec)
2541 return '%s(%s)' % (cn, rpr)
2544def FILL(target, spec, scope):
2545 # TODO: register an operator or two for the following to allow
2546 # extension. This operator can probably be shared with the
2547 # upcoming traversal/remap feature.
2548 recurse = lambda val: scope[glom](target, val, scope)
2549 if type(spec) is dict:
2550 return {recurse(key): recurse(val) for key, val in spec.items()}
2551 if type(spec) in (list, tuple, set, frozenset):
2552 result = [recurse(val) for val in spec]
2553 if type(spec) is list:
2554 return result
2555 return type(spec)(result)
2556 if callable(spec):
2557 return spec(target)
2558 return spec
2560class _ArgValuator(object):
2561 def __init__(self):
2562 self.cache = {}
2564 def mode(self, target, spec, scope):
2565 """
2566 similar to FILL, but without function calling;
2567 useful for default, scope assignment, call/invoke, etc
2568 """
2569 recur = lambda val: scope[glom](target, val, scope)
2570 result = spec
2571 if type(spec) in (list, dict): # can contain themselves
2572 if id(spec) in self.cache:
2573 return self.cache[id(spec)]
2574 result = self.cache[id(spec)] = type(spec)()
2575 if type(spec) is dict:
2576 result.update({recur(key): recur(val) for key, val in spec.items()})
2577 else:
2578 result.extend([recur(val) for val in spec])
2579 if type(spec) in (tuple, set, frozenset): # cannot contain themselves
2580 result = type(spec)([recur(val) for val in spec])
2581 return result
2584def arg_val(target, arg, scope):
2585 """
2586 evaluate an argument to find its value
2587 (arg_val phonetically similar to "eval" -- evaluate as an arg)
2588 """
2589 mode = scope[MIN_MODE]
2590 scope[MIN_MODE] = _ArgValuator().mode
2591 result = scope[glom](target, arg, scope)
2592 scope[MIN_MODE] = mode
2593 return result