Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/glom/core.py: 57%
1226 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:23 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:23 +0000
1"""*glom gets results.*
3The ``glom`` package has one central entrypoint,
4:func:`glom.glom`. Everything else in the package revolves around that
5one function. Sometimes, big things come in small packages.
7A couple of conventional terms you'll see repeated many times below:
9* **target** - glom is built to work on any data, so we simply
10 refer to the object being accessed as the *"target"*
11* **spec** - *(aka "glomspec", short for specification)* The
12 accompanying template used to specify the structure of the return
13 value.
15Now that you know the terms, let's take a look around glom's powerful
16semantics.
18"""
20from __future__ import print_function
22import os
23import sys
24import pdb
25import copy
26import warnings
27import weakref
28import operator
29from abc import ABCMeta
30from pprint import pprint
31import string
32from collections import OrderedDict
33import traceback
35from face.helpers import get_wrap_width
36from boltons.typeutils import make_sentinel
37from boltons.iterutils import is_iterable
38#from boltons.funcutils import format_invocation
40basestring = str
41_AbstractIterableBase = ABCMeta('_AbstractIterableBase', (object,), {})
42from collections import ChainMap
43from reprlib import Repr, recursive_repr
45GLOM_DEBUG = os.getenv('GLOM_DEBUG', '').strip().lower()
46GLOM_DEBUG = False if (GLOM_DEBUG in ('', '0', 'false')) else True
48TRACE_WIDTH = max(get_wrap_width(max_width=110), 50) # min width
50PATH_STAR = True
51# should * and ** be interpreted as parallel traversal in Path.from_text()?
52# Changed to True in 23.1, this option to disable will go away soon
54_type_type = type
56_MISSING = make_sentinel('_MISSING')
57SKIP = make_sentinel('SKIP')
58SKIP.__doc__ = """
59The ``SKIP`` singleton can be returned from a function or included
60via a :class:`~glom.Val` to cancel assignment into the output
61object.
63>>> target = {'a': 'b'}
64>>> spec = {'a': lambda t: t['a'] if t['a'] == 'a' else SKIP}
65>>> glom(target, spec)
66{}
67>>> target = {'a': 'a'}
68>>> glom(target, spec)
69{'a': 'a'}
71Mostly used to drop keys from dicts (as above) or filter objects from
72lists.
74.. note::
76 SKIP was known as OMIT in versions 18.3.1 and prior. Versions 19+
77 will remove the OMIT alias entirely.
78"""
79OMIT = SKIP # backwards compat, remove in 19+
81STOP = make_sentinel('STOP')
82STOP.__doc__ = """
83The ``STOP`` singleton can be used to halt iteration of a list or
84execution of a tuple of subspecs.
86>>> target = range(10)
87>>> spec = [lambda x: x if x < 5 else STOP]
88>>> glom(target, spec)
89[0, 1, 2, 3, 4]
90"""
92LAST_CHILD_SCOPE = make_sentinel('LAST_CHILD_SCOPE')
93LAST_CHILD_SCOPE.__doc__ = """
94Marker that can be used by parents to keep track of the last child
95scope executed. Useful for "lifting" results out of child scopes
96for scopes that want to chain the scopes of their children together
97similar to tuple.
98"""
100NO_PYFRAME = make_sentinel('NO_PYFRAME')
101NO_PYFRAME.__doc__ = """
102Used internally to mark scopes which are no longer wrapped
103in a recursive glom() call, so that they can be cleaned up correctly
104in case of exceptions
105"""
107MODE = make_sentinel('MODE')
109MIN_MODE = make_sentinel('MIN_MODE')
111CHILD_ERRORS = make_sentinel('CHILD_ERRORS')
112CHILD_ERRORS.__doc__ = """
113``CHILD_ERRORS`` is used by glom internals to keep track of
114failed child branches of the current scope.
115"""
117CUR_ERROR = make_sentinel('CUR_ERROR')
118CUR_ERROR.__doc__ = """
119``CUR_ERROR`` is used by glom internals to keep track of
120thrown exceptions.
121"""
123_PKG_DIR_PATH = os.path.dirname(os.path.abspath(__file__))
125class GlomError(Exception):
126 """The base exception for all the errors that might be raised from
127 :func:`glom` processing logic.
129 By default, exceptions raised from within functions passed to glom
130 (e.g., ``len``, ``sum``, any ``lambda``) will not be wrapped in a
131 GlomError.
132 """
133 @classmethod
134 def wrap(cls, exc):
135 # TODO: need to test this against a wide array of exception types
136 # this approach to wrapping errors works for exceptions
137 # defined in pure-python as well as C
138 exc_type = type(exc)
139 bases = (GlomError,) if issubclass(GlomError, exc_type) else (exc_type, GlomError)
140 exc_wrapper_type = type("GlomError.wrap({})".format(exc_type.__name__), bases, {})
141 try:
142 wrapper = exc_wrapper_type(*exc.args)
143 wrapper.__wrapped = exc
144 return wrapper
145 except Exception: # maybe exception can't be re-created
146 return exc
148 def _set_wrapped(self, exc):
149 self.__wrapped = exc
151 def _finalize(self, scope):
152 # careful when changing how this functionality works; pytest seems to mess with
153 # the traceback module or sys.exc_info(). we saw different stacks when originally
154 # developing this in June 2020.
155 etype, evalue, _ = sys.exc_info()
156 tb_lines = traceback.format_exc().strip().splitlines()
157 limit = 0
158 for line in reversed(tb_lines):
159 if _PKG_DIR_PATH in line:
160 limit -= 1
161 break
162 limit += 1
163 self._tb_lines = tb_lines[-limit:]
164 self._scope = scope
166 def __str__(self):
167 if getattr(self, '_finalized_str', None):
168 return self._finalized_str
169 elif getattr(self, '_scope', None) is not None:
170 self._target_spec_trace = format_target_spec_trace(self._scope, self.__wrapped)
171 parts = ["error raised while processing, details below.",
172 " Target-spec trace (most recent last):",
173 self._target_spec_trace]
174 parts.extend(self._tb_lines)
175 self._finalized_str = "\n".join(parts)
176 return self._finalized_str
178 # else, not finalized
179 try:
180 exc_get_message = self.get_message
181 except AttributeError:
182 exc_get_message = super(GlomError, self).__str__
183 return exc_get_message()
186def _unpack_stack(scope, only_errors=True):
187 """
188 convert scope to [[scope, spec, target, error, [children]]]
190 this is a convenience method for printing stacks
192 only_errors=True means ignore branches which may still be hanging around
193 which were not involved in the stack trace of the error
195 only_errors=False could be useful for debugger / introspection (similar
196 to traceback.print_stack())
197 """
198 stack = []
199 scope = scope.maps[0]
200 while LAST_CHILD_SCOPE in scope:
201 child = scope[LAST_CHILD_SCOPE]
202 branches = scope[CHILD_ERRORS]
203 if branches == [child]:
204 branches = [] # if there's only one branch, count it as linear
205 stack.append([scope, scope[Spec], scope[T], scope.get(CUR_ERROR), branches])
207 # NB: this id() business is necessary to avoid a
208 # nondeterministic bug in abc's __eq__ see #189 for details
209 if id(child) in [id(b) for b in branches]:
210 break # if child already covered by branches, stop the linear descent
212 scope = child.maps[0]
213 else: # if break executed above, cur scope was already added
214 stack.append([scope, scope[Spec], scope[T], scope.get(CUR_ERROR), []])
215 # push errors "down" to where they were first raised / first observed
216 for i in range(len(stack) - 1):
217 cur, nxt = stack[i], stack[i + 1]
218 if cur[3] == nxt[3]:
219 cur[3] = None
220 if only_errors: # trim the stack to the last error
221 # leave at least 1 to not break formatting func below
222 # TODO: make format_target_spec_trace() tolerate an "empty" stack cleanly
223 while len(stack) > 1 and stack[-1][3] is None:
224 stack.pop()
225 return stack
228def _format_trace_value(value, maxlen):
229 s = bbrepr(value).replace("\\'", "'")
230 if len(s) > maxlen:
231 try:
232 suffix = '... (len=%s)' % len(value)
233 except Exception:
234 suffix = '...'
235 s = s[:maxlen - len(suffix)] + suffix
236 return s
239def format_target_spec_trace(scope, root_error, width=TRACE_WIDTH, depth=0, prev_target=_MISSING, last_branch=True):
240 """
241 unpack a scope into a multi-line but short summary
242 """
243 segments = []
244 indent = " " + "|" * depth
245 tick = "| " if depth else "- "
246 def mk_fmt(label, t=None):
247 pre = indent + (t or tick) + label + ": "
248 fmt_width = width - len(pre)
249 return lambda v: pre + _format_trace_value(v, fmt_width)
250 fmt_t = mk_fmt("Target")
251 fmt_s = mk_fmt("Spec")
252 fmt_b = mk_fmt("Spec", "+ ")
253 recurse = lambda s, last=False: format_target_spec_trace(s, root_error, width, depth + 1, prev_target, last)
254 tb_exc_line = lambda e: "".join(traceback.format_exception_only(type(e), e))[:-1]
255 fmt_e = lambda e: indent + tick + tb_exc_line(e)
256 for scope, spec, target, error, branches in _unpack_stack(scope):
257 if target is not prev_target:
258 segments.append(fmt_t(target))
259 prev_target = target
260 if branches:
261 segments.append(fmt_b(spec))
262 segments.extend([recurse(s) for s in branches[:-1]])
263 segments.append(recurse(branches[-1], last_branch))
264 else:
265 segments.append(fmt_s(spec))
266 if error is not None and error is not root_error:
267 last_line_error = True
268 segments.append(fmt_e(error))
269 else:
270 last_line_error = False
271 if depth: # \ on first line, X on last line
272 remark = lambda s, m: s[:depth + 1] + m + s[depth + 2:]
273 segments[0] = remark(segments[0], "\\")
274 if not last_branch or last_line_error:
275 segments[-1] = remark(segments[-1], "X")
276 return "\n".join(segments)
279# TODO: not used (yet)
280def format_oneline_trace(scope):
281 """
282 unpack a scope into a single line summary
283 (shortest summary possible)
284 """
285 # the goal here is to do a kind of delta-compression --
286 # if the target is the same, don't repeat it
287 segments = []
288 prev_target = _MISSING
289 for scope, spec, target, error, branches in _unpack_stack(scope, only_errors=False):
290 segments.append('/')
291 if type(spec) in (TType, Path):
292 segments.append(bbrepr(spec))
293 else:
294 segments.append(type(spec).__name__)
295 if target != prev_target:
296 segments.append('!')
297 segments.append(type(target).__name__)
298 if Path in scope:
299 segments.append('<')
300 segments.append('->'.join([str(p) for p in scope[Path]]))
301 segments.append('>')
302 prev_target = target
304 return "".join(segments)
307class PathAccessError(GlomError, AttributeError, KeyError, IndexError):
308 """This :exc:`GlomError` subtype represents a failure to access an
309 attribute as dictated by the spec. The most commonly-seen error
310 when using glom, it maintains a copy of the original exception and
311 produces a readable error message for easy debugging.
313 If you see this error, you may want to:
315 * Check the target data is accurate using :class:`~glom.Inspect`
316 * Catch the exception and return a semantically meaningful error message
317 * Use :class:`glom.Coalesce` to specify a default
318 * Use the top-level ``default`` kwarg on :func:`~glom.glom()`
320 In any case, be glad you got this error and not the one it was
321 wrapping!
323 Args:
324 exc (Exception): The error that arose when we tried to access
325 *path*. Typically an instance of KeyError, AttributeError,
326 IndexError, or TypeError, and sometimes others.
327 path (Path): The full Path glom was in the middle of accessing
328 when the error occurred.
329 part_idx (int): The index of the part of the *path* that caused
330 the error.
332 >>> target = {'a': {'b': None}}
333 >>> glom(target, 'a.b.c')
334 Traceback (most recent call last):
335 ...
336 PathAccessError: could not access 'c', part 2 of Path('a', 'b', 'c'), got error: ...
338 """
339 def __init__(self, exc, path, part_idx):
340 self.exc = exc
341 self.path = path
342 self.part_idx = part_idx
344 def get_message(self):
345 path_part = Path(self.path).values()[self.part_idx]
346 return ('could not access %r, part %r of %r, got error: %r'
347 % (path_part, self.part_idx, self.path, self.exc))
349 def __repr__(self):
350 cn = self.__class__.__name__
351 return '%s(%r, %r, %r)' % (cn, self.exc, self.path, self.part_idx)
354class PathAssignError(GlomError):
355 """This :exc:`GlomError` subtype is raised when an assignment fails,
356 stemming from an :func:`~glom.assign` call or other
357 :class:`~glom.Assign` usage.
359 One example would be assigning to an out-of-range position in a list::
361 >>> assign(["short", "list"], Path(5), 'too far') # doctest: +SKIP
362 Traceback (most recent call last):
363 ...
364 PathAssignError: could not assign 5 on object at Path(), got error: IndexError(...
366 Other assignment failures could be due to assigning to an
367 ``@property`` or exception being raised inside a ``__setattr__()``.
369 """
370 def __init__(self, exc, path, dest_name):
371 self.exc = exc
372 self.path = path
373 self.dest_name = dest_name
375 def get_message(self):
376 return ('could not assign %r on object at %r, got error: %r'
377 % (self.dest_name, self.path, self.exc))
379 def __repr__(self):
380 cn = self.__class__.__name__
381 return '%s(%r, %r, %r)' % (cn, self.exc, self.path, self.dest_name)
384class CoalesceError(GlomError):
385 """This :exc:`GlomError` subtype is raised from within a
386 :class:`Coalesce` spec's processing, when none of the subspecs
387 match and no default is provided.
389 The exception object itself keeps track of several values which
390 may be useful for processing:
392 Args:
393 coal_obj (Coalesce): The original failing spec, see
394 :class:`Coalesce`'s docs for details.
395 skipped (list): A list of ignored values and exceptions, in the
396 order that their respective subspecs appear in the original
397 *coal_obj*.
398 path: Like many GlomErrors, this exception knows the path at
399 which it occurred.
401 >>> target = {}
402 >>> glom(target, Coalesce('a', 'b'))
403 Traceback (most recent call last):
404 ...
405 CoalesceError: no valid values found. Tried ('a', 'b') and got (PathAccessError, PathAccessError) ...
407 .. note::
409 Coalesce is a *branching* specifier type, so as of v20.7.0, its
410 exception messages feature an error tree. See
411 :ref:`branched-exceptions` for details on how to interpret these
412 exceptions.
414 """
415 def __init__(self, coal_obj, skipped, path):
416 self.coal_obj = coal_obj
417 self.skipped = skipped
418 self.path = path
420 def __repr__(self):
421 cn = self.__class__.__name__
422 return '%s(%r, %r, %r)' % (cn, self.coal_obj, self.skipped, self.path)
424 def get_message(self):
425 missed_specs = tuple(self.coal_obj.subspecs)
426 skipped_vals = [v.__class__.__name__
427 if isinstance(v, self.coal_obj.skip_exc)
428 else '<skipped %s>' % v.__class__.__name__
429 for v in self.skipped]
430 msg = ('no valid values found. Tried %r and got (%s)'
431 % (missed_specs, ', '.join(skipped_vals)))
432 if self.coal_obj.skip is not _MISSING:
433 msg += ', skip set to %r' % (self.coal_obj.skip,)
434 if self.coal_obj.skip_exc is not GlomError:
435 msg += ', skip_exc set to %r' % (self.coal_obj.skip_exc,)
436 if self.path is not None:
437 msg += ' (at path %r)' % (self.path,)
438 return msg
441class BadSpec(GlomError, TypeError):
442 """Raised when a spec structure is malformed, e.g., when a specifier
443 type is invalid for the current mode."""
446class UnregisteredTarget(GlomError):
447 """This :class:`GlomError` subtype is raised when a spec calls for an
448 unsupported action on a target type. For instance, trying to
449 iterate on an non-iterable target:
451 >>> glom(object(), ['a.b.c'])
452 Traceback (most recent call last):
453 ...
454 UnregisteredTarget: target type 'object' not registered for 'iterate', expected one of registered types: (...)
456 It should be noted that this is a pretty uncommon occurrence in
457 production glom usage. See the :ref:`setup-and-registration`
458 section for details on how to avoid this error.
460 An UnregisteredTarget takes and tracks a few values:
462 Args:
463 op (str): The name of the operation being performed ('get' or 'iterate')
464 target_type (type): The type of the target being processed.
465 type_map (dict): A mapping of target types that do support this operation
466 path: The path at which the error occurred.
468 """
469 def __init__(self, op, target_type, type_map, path):
470 self.op = op
471 self.target_type = target_type
472 self.type_map = type_map
473 self.path = path
474 super(UnregisteredTarget, self).__init__(op, target_type, type_map, path)
476 def __repr__(self):
477 cn = self.__class__.__name__
478 # <type %r> is because Python 3 inexplicably changed the type
479 # repr from <type *> to <class *>
480 return ('%s(%r, <type %r>, %r, %r)'
481 % (cn, self.op, self.target_type.__name__, self.type_map, self.path))
483 def get_message(self):
484 if not self.type_map:
485 return ("glom() called without registering any types for operation '%s'. see"
486 " glom.register() or Glommer's constructor for details." % (self.op,))
487 reg_types = sorted([t.__name__ for t, h in self.type_map.items() if h])
488 reg_types_str = '()' if not reg_types else ('(%s)' % ', '.join(reg_types))
489 msg = ("target type %r not registered for '%s', expected one of"
490 " registered types: %s" % (self.target_type.__name__, self.op, reg_types_str))
491 if self.path:
492 msg += ' (at %r)' % (self.path,)
493 return msg
496if getattr(__builtins__, '__dict__', None) is not None:
497 # pypy's __builtins__ is a module, as is CPython's REPL, but at
498 # normal execution time it's a dict?
499 __builtins__ = __builtins__.__dict__
502_BUILTIN_ID_NAME_MAP = dict([(id(v), k)
503 for k, v in __builtins__.items()])
506class _BBRepr(Repr):
507 """A better repr for builtins, when the built-in repr isn't
508 roundtrippable.
509 """
510 def __init__(self):
511 super().__init__()
512 # turn up all the length limits very high
513 for name in self.__dict__:
514 setattr(self, name, 1024)
516 def repr1(self, x, level):
517 ret = Repr.repr1(self, x, level)
518 if not ret.startswith('<'):
519 return ret
520 return _BUILTIN_ID_NAME_MAP.get(id(x), ret)
523bbrepr = recursive_repr()(_BBRepr().repr)
526class _BBReprFormatter(string.Formatter):
527 """
528 allow format strings to be evaluated where {!r} will use bbrepr
529 instead of repr
530 """
531 def convert_field(self, value, conversion):
532 if conversion == 'r':
533 return bbrepr(value).replace("\\'", "'")
534 return super(_BBReprFormatter, self).convert_field(value, conversion)
537bbformat = _BBReprFormatter().format
540# TODO: push this back up to boltons with repr kwarg
541def format_invocation(name='', args=(), kwargs=None, **kw):
542 """Given a name, positional arguments, and keyword arguments, format
543 a basic Python-style function call.
545 >>> print(format_invocation('func', args=(1, 2), kwargs={'c': 3}))
546 func(1, 2, c=3)
547 >>> print(format_invocation('a_func', args=(1,)))
548 a_func(1)
549 >>> print(format_invocation('kw_func', kwargs=[('a', 1), ('b', 2)]))
550 kw_func(a=1, b=2)
552 """
553 _repr = kw.pop('repr', bbrepr)
554 if kw:
555 raise TypeError('unexpected keyword args: %r' % ', '.join(kw.keys()))
556 kwargs = kwargs or {}
557 a_text = ', '.join([_repr(a) for a in args])
558 if isinstance(kwargs, dict):
559 kwarg_items = [(k, kwargs[k]) for k in sorted(kwargs)]
560 else:
561 kwarg_items = kwargs
562 kw_text = ', '.join(['%s=%s' % (k, _repr(v)) for k, v in kwarg_items])
564 all_args_text = a_text
565 if all_args_text and kw_text:
566 all_args_text += ', '
567 all_args_text += kw_text
569 return '%s(%s)' % (name, all_args_text)
572class Path(object):
573 """Path objects specify explicit paths when the default
574 ``'a.b.c'``-style general access syntax won't work or isn't
575 desirable. Use this to wrap ints, datetimes, and other valid
576 keys, as well as strings with dots that shouldn't be expanded.
578 >>> target = {'a': {'b': 'c', 'd.e': 'f', 2: 3}}
579 >>> glom(target, Path('a', 2))
580 3
581 >>> glom(target, Path('a', 'd.e'))
582 'f'
584 Paths can be used to join together other Path objects, as
585 well as :data:`~glom.T` objects:
587 >>> Path(T['a'], T['b'])
588 T['a']['b']
589 >>> Path(Path('a', 'b'), Path('c', 'd'))
590 Path('a', 'b', 'c', 'd')
592 Paths also support indexing and slicing, with each access
593 returning a new Path object:
595 >>> path = Path('a', 'b', 1, 2)
596 >>> path[0]
597 Path('a')
598 >>> path[-2:]
599 Path(1, 2)
601 To build a Path object from a string, use :meth:`Path.from_text()`.
602 This is the default behavior when the top-level :func:`~glom.glom`
603 function gets a string spec.
604 """
605 def __init__(self, *path_parts):
606 if not path_parts:
607 self.path_t = T
608 return
609 if isinstance(path_parts[0], TType):
610 path_t = path_parts[0]
611 offset = 1
612 else:
613 path_t = T
614 offset = 0
615 for part in path_parts[offset:]:
616 if isinstance(part, Path):
617 part = part.path_t
618 if isinstance(part, TType):
619 sub_parts = part.__ops__
620 if sub_parts[0] is not T:
621 raise ValueError('path segment must be path from T, not %r'
622 % sub_parts[0])
623 i = 1
624 while i < len(sub_parts):
625 path_t = _t_child(path_t, sub_parts[i], sub_parts[i + 1])
626 i += 2
627 else:
628 path_t = _t_child(path_t, 'P', part)
629 self.path_t = path_t
631 _CACHE = {True: {}, False: {}}
632 _MAX_CACHE = 10000
633 _STAR_WARNED = False
635 @classmethod
636 def from_text(cls, text):
637 """Make a Path from .-delimited text:
639 >>> Path.from_text('a.b.c')
640 Path('a', 'b', 'c')
642 This is the default behavior when :func:`~glom.glom` gets a string spec.
643 """
644 def create():
645 segs = text.split('.')
646 if PATH_STAR:
647 segs = [
648 _T_STAR if seg == '*' else
649 _T_STARSTAR if seg == '**' else seg
650 for seg in segs]
651 elif not cls._STAR_WARNED:
652 if '*' in segs or '**' in segs:
653 warnings.warn(
654 "'*' and '**' have changed behavior in glom version 23.1."
655 " Recommend switch to T['*'] or T['**'].")
656 cls._STAR_WARNED = True
657 return cls(*segs)
659 cache = cls._CACHE[PATH_STAR] # remove this when PATH_STAR is default
660 if text not in cache:
661 if len(cache) > cls._MAX_CACHE:
662 return create()
663 cache[text] = create()
664 return cache[text]
666 def glomit(self, target, scope):
667 # The entrypoint for the Path extension
668 return _t_eval(target, self.path_t, scope)
670 def __len__(self):
671 return (len(self.path_t.__ops__) - 1) // 2
673 def __eq__(self, other):
674 if type(other) is Path:
675 return self.path_t.__ops__ == other.path_t.__ops__
676 elif type(other) is TType:
677 return self.path_t.__ops__ == other.__ops__
678 return False
680 def __ne__(self, other):
681 return not self == other
683 def values(self):
684 """
685 Returns a tuple of values referenced in this path.
687 >>> Path(T.a.b, 'c', T['d']).values()
688 ('a', 'b', 'c', 'd')
689 """
690 cur_t_path = self.path_t.__ops__
691 return cur_t_path[2::2]
693 def items(self):
694 """
695 Returns a tuple of (operation, value) pairs.
697 >>> Path(T.a.b, 'c', T['d']).items()
698 (('.', 'a'), ('.', 'b'), ('P', 'c'), ('[', 'd'))
700 """
701 cur_t_path = self.path_t.__ops__
702 return tuple(zip(cur_t_path[1::2], cur_t_path[2::2]))
704 def startswith(self, other):
705 if isinstance(other, basestring):
706 other = Path(other)
707 if isinstance(other, Path):
708 other = other.path_t
709 if not isinstance(other, TType):
710 raise TypeError('can only check if Path starts with string, Path or T')
711 o_path = other.__ops__
712 return self.path_t.__ops__[:len(o_path)] == o_path
714 def from_t(self):
715 '''return the same path but starting from T'''
716 t_path = self.path_t.__ops__
717 if t_path[0] is S:
718 new_t = TType()
719 new_t.__ops__ = (T,) + t_path[1:]
720 return Path(new_t)
721 return self
723 def __getitem__(self, i):
724 cur_t_path = self.path_t.__ops__
725 try:
726 step = i.step
727 start = i.start if i.start is not None else 0
728 stop = i.stop
730 start = (start * 2) + 1 if start >= 0 else (start * 2) + len(cur_t_path)
731 if stop is not None:
732 stop = (stop * 2) + 1 if stop >= 0 else (stop * 2) + len(cur_t_path)
733 except AttributeError:
734 step = 1
735 start = (i * 2) + 1 if i >= 0 else (i * 2) + len(cur_t_path)
736 if start < 0 or start > len(cur_t_path):
737 raise IndexError('Path index out of range')
738 stop = ((i + 1) * 2) + 1 if i >= 0 else ((i + 1) * 2) + len(cur_t_path)
740 new_t = TType()
741 new_path = cur_t_path[start:stop]
742 if step is not None and step != 1:
743 new_path = tuple(zip(new_path[::2], new_path[1::2]))[::step]
744 new_path = sum(new_path, ())
745 new_t.__ops__ = (cur_t_path[0],) + new_path
746 return Path(new_t)
748 def __repr__(self):
749 return _format_path(self.path_t.__ops__[1:])
752def _format_path(t_path):
753 path_parts, cur_t_path = [], []
754 i = 0
755 while i < len(t_path):
756 op, arg = t_path[i], t_path[i + 1]
757 i += 2
758 if op == 'P':
759 if cur_t_path:
760 path_parts.append(cur_t_path)
761 cur_t_path = []
762 path_parts.append(arg)
763 else:
764 cur_t_path.append(op)
765 cur_t_path.append(arg)
766 if path_parts and cur_t_path:
767 path_parts.append(cur_t_path)
769 if path_parts or not cur_t_path:
770 return 'Path(%s)' % ', '.join([_format_t(part)
771 if type(part) is list else repr(part)
772 for part in path_parts])
773 return _format_t(cur_t_path)
776class Spec(object):
777 """Spec objects serve three purposes, here they are, roughly ordered
778 by utility:
780 1. As a form of compiled or "curried" glom call, similar to
781 Python's built-in :func:`re.compile`.
782 2. A marker as an object as representing a spec rather than a
783 literal value in certain cases where that might be ambiguous.
784 3. A way to update the scope within another Spec.
786 In the second usage, Spec objects are the complement to
787 :class:`~glom.Val`, wrapping a value and marking that it
788 should be interpreted as a glom spec, rather than a literal value.
789 This is useful in places where it would be interpreted as a value
790 by default. (Such as T[key], Call(func) where key and func are
791 assumed to be literal values and not specs.)
793 Args:
794 spec: The glom spec.
795 scope (dict): additional values to add to the scope when
796 evaluating this Spec
798 """
799 def __init__(self, spec, scope=None):
800 self.spec = spec
801 self.scope = scope or {}
803 def glom(self, target, **kw):
804 scope = dict(self.scope)
805 scope.update(kw.get('scope', {}))
806 kw['scope'] = ChainMap(scope)
807 glom_ = scope.get(glom, glom)
808 return glom_(target, self.spec, **kw)
810 def glomit(self, target, scope):
811 scope.update(self.scope)
812 return scope[glom](target, self.spec, scope)
814 def __repr__(self):
815 cn = self.__class__.__name__
816 if self.scope:
817 return '%s(%s, scope=%r)' % (cn, bbrepr(self.spec), self.scope)
818 return '%s(%s)' % (cn, bbrepr(self.spec))
821class Coalesce(object):
822 """Coalesce objects specify fallback behavior for a list of
823 subspecs.
825 Subspecs are passed as positional arguments, and keyword arguments
826 control defaults. Each subspec is evaluated in turn, and if none
827 match, a :exc:`CoalesceError` is raised, or a default is returned,
828 depending on the options used.
830 .. note::
832 This operation may seem very familar if you have experience with
833 `SQL`_ or even `C# and others`_.
836 In practice, this fallback behavior's simplicity is only surpassed
837 by its utility:
839 >>> target = {'c': 'd'}
840 >>> glom(target, Coalesce('a', 'b', 'c'))
841 'd'
843 glom tries to get ``'a'`` from ``target``, but gets a
844 KeyError. Rather than raise a :exc:`~glom.PathAccessError` as usual,
845 glom *coalesces* into the next subspec, ``'b'``. The process
846 repeats until it gets to ``'c'``, which returns our value,
847 ``'d'``. If our value weren't present, we'd see:
849 >>> target = {}
850 >>> glom(target, Coalesce('a', 'b'))
851 Traceback (most recent call last):
852 ...
853 CoalesceError: no valid values found. Tried ('a', 'b') and got (PathAccessError, PathAccessError) ...
855 Same process, but because ``target`` is empty, we get a
856 :exc:`CoalesceError`.
858 .. note::
860 Coalesce is a *branching* specifier type, so as of v20.7.0, its
861 exception messages feature an error tree. See
862 :ref:`branched-exceptions` for details on how to interpret these
863 exceptions.
866 If we want to avoid an exception, and we know which value we want
867 by default, we can set *default*:
869 >>> target = {}
870 >>> glom(target, Coalesce('a', 'b', 'c'), default='d-fault')
871 'd-fault'
873 ``'a'``, ``'b'``, and ``'c'`` weren't present so we got ``'d-fault'``.
875 Args:
877 subspecs: One or more glommable subspecs
878 default: A value to return if no subspec results in a valid value
879 default_factory: A callable whose result will be returned as a default
880 skip: A value, tuple of values, or predicate function
881 representing values to ignore
882 skip_exc: An exception or tuple of exception types to catch and
883 move on to the next subspec. Defaults to :exc:`GlomError`, the
884 parent type of all glom runtime exceptions.
886 If all subspecs produce skipped values or exceptions, a
887 :exc:`CoalesceError` will be raised. For more examples, check out
888 the :doc:`tutorial`, which makes extensive use of Coalesce.
890 .. _SQL: https://en.wikipedia.org/w/index.php?title=Null_(SQL)&oldid=833093792#COALESCE
891 .. _C# and others: https://en.wikipedia.org/w/index.php?title=Null_coalescing_operator&oldid=839493322#C#
893 """
894 def __init__(self, *subspecs, **kwargs):
895 self.subspecs = subspecs
896 self._orig_kwargs = dict(kwargs)
897 self.default = kwargs.pop('default', _MISSING)
898 self.default_factory = kwargs.pop('default_factory', _MISSING)
899 if self.default and self.default_factory:
900 raise ValueError('expected one of "default" or "default_factory", not both')
901 self.skip = kwargs.pop('skip', _MISSING)
902 if self.skip is _MISSING:
903 self.skip_func = lambda v: False
904 elif callable(self.skip):
905 self.skip_func = self.skip
906 elif isinstance(self.skip, tuple):
907 self.skip_func = lambda v: v in self.skip
908 else:
909 self.skip_func = lambda v: v == self.skip
910 self.skip_exc = kwargs.pop('skip_exc', GlomError)
911 if kwargs:
912 raise TypeError('unexpected keyword args: %r' % (sorted(kwargs.keys()),))
914 def glomit(self, target, scope):
915 skipped = []
916 for subspec in self.subspecs:
917 try:
918 ret = scope[glom](target, subspec, scope)
919 if not self.skip_func(ret):
920 break
921 skipped.append(ret)
922 except self.skip_exc as e:
923 skipped.append(e)
924 continue
925 else:
926 if self.default is not _MISSING:
927 ret = arg_val(target, self.default, scope)
928 elif self.default_factory is not _MISSING:
929 ret = self.default_factory()
930 else:
931 raise CoalesceError(self, skipped, scope[Path])
932 return ret
934 def __repr__(self):
935 cn = self.__class__.__name__
936 return format_invocation(cn, self.subspecs, self._orig_kwargs, repr=bbrepr)
939class Inspect(object):
940 """The :class:`~glom.Inspect` specifier type provides a way to get
941 visibility into glom's evaluation of a specification, enabling
942 debugging of those tricky problems that may arise with unexpected
943 data.
945 :class:`~glom.Inspect` can be inserted into an existing spec in one of two
946 ways. First, as a wrapper around the spec in question, or second,
947 as an argument-less placeholder wherever a spec could be.
949 :class:`~glom.Inspect` supports several modes, controlled by
950 keyword arguments. Its default, no-argument mode, simply echos the
951 state of the glom at the point where it appears:
953 >>> target = {'a': {'b': {}}}
954 >>> val = glom(target, Inspect('a.b')) # wrapping a spec
955 ---
956 path: ['a.b']
957 target: {'a': {'b': {}}}
958 output: {}
959 ---
961 Debugging behavior aside, :class:`~glom.Inspect` has no effect on
962 values in the target, spec, or result.
964 Args:
965 echo (bool): Whether to print the path, target, and output of
966 each inspected glom. Defaults to True.
967 recursive (bool): Whether or not the Inspect should be applied
968 at every level, at or below the spec that it wraps. Defaults
969 to False.
970 breakpoint (bool): This flag controls whether a debugging prompt
971 should appear before evaluating each inspected spec. Can also
972 take a callable. Defaults to False.
973 post_mortem (bool): This flag controls whether exceptions
974 should be caught and interactively debugged with :mod:`pdb` on
975 inspected specs.
977 All arguments above are keyword-only to avoid overlap with a
978 wrapped spec.
980 .. note::
982 Just like ``pdb.set_trace()``, be careful about leaving stray
983 ``Inspect()`` instances in production glom specs.
985 """
986 def __init__(self, *a, **kw):
987 self.wrapped = a[0] if a else Path()
988 self.recursive = kw.pop('recursive', False)
989 self.echo = kw.pop('echo', True)
990 breakpoint = kw.pop('breakpoint', False)
991 if breakpoint is True:
992 breakpoint = pdb.set_trace
993 if breakpoint and not callable(breakpoint):
994 raise TypeError('breakpoint expected bool or callable, not: %r' % breakpoint)
995 self.breakpoint = breakpoint
996 post_mortem = kw.pop('post_mortem', False)
997 if post_mortem is True:
998 post_mortem = pdb.post_mortem
999 if post_mortem and not callable(post_mortem):
1000 raise TypeError('post_mortem expected bool or callable, not: %r' % post_mortem)
1001 self.post_mortem = post_mortem
1003 def __repr__(self):
1004 return '<INSPECT>'
1006 def glomit(self, target, scope):
1007 # stash the real handler under Inspect,
1008 # and replace the child handler with a trace callback
1009 scope[Inspect] = scope[glom]
1010 scope[glom] = self._trace
1011 return scope[glom](target, self.wrapped, scope)
1013 def _trace(self, target, spec, scope):
1014 if not self.recursive:
1015 scope[glom] = scope[Inspect]
1016 if self.echo:
1017 print('---')
1018 # TODO: switch from scope[Path] to the Target-Spec format trace above
1019 # ... but maybe be smart about only printing deltas instead of the whole
1020 # thing
1021 print('path: ', scope[Path] + [spec])
1022 print('target:', target)
1023 if self.breakpoint:
1024 # TODO: real debugger here?
1025 self.breakpoint()
1026 try:
1027 ret = scope[Inspect](target, spec, scope)
1028 except Exception:
1029 if self.post_mortem:
1030 self.post_mortem()
1031 raise
1032 if self.echo:
1033 print('output:', ret)
1034 print('---')
1035 return ret
1038class Call(object):
1039 """:class:`Call` specifies when a target should be passed to a function,
1040 *func*.
1042 :class:`Call` is similar to :func:`~functools.partial` in that
1043 it is no more powerful than ``lambda`` or other functions, but
1044 it is designed to be more readable, with a better ``repr``.
1046 Args:
1047 func (callable): a function or other callable to be called with
1048 the target
1050 :class:`Call` combines well with :attr:`~glom.T` to construct objects. For
1051 instance, to generate a dict and then pass it to a constructor:
1053 >>> class ExampleClass(object):
1054 ... def __init__(self, attr):
1055 ... self.attr = attr
1056 ...
1057 >>> target = {'attr': 3.14}
1058 >>> glom(target, Call(ExampleClass, kwargs=T)).attr
1059 3.14
1061 This does the same as ``glom(target, lambda target:
1062 ExampleClass(**target))``, but it's easy to see which one reads
1063 better.
1065 .. note::
1067 ``Call`` is mostly for functions. Use a :attr:`~glom.T` object
1068 if you need to call a method.
1070 .. warning::
1072 :class:`Call` has a successor with a fuller-featured API, new
1073 in 19.10.0: the :class:`Invoke` specifier type.
1074 """
1075 def __init__(self, func=None, args=None, kwargs=None):
1076 if func is None:
1077 func = T
1078 if not (callable(func) or isinstance(func, (Spec, TType))):
1079 raise TypeError('expected func to be a callable or T'
1080 ' expression, not: %r' % (func,))
1081 if args is None:
1082 args = ()
1083 if kwargs is None:
1084 kwargs = {}
1085 self.func, self.args, self.kwargs = func, args, kwargs
1087 def glomit(self, target, scope):
1088 'run against the current target'
1089 r = lambda spec: arg_val(target, spec, scope)
1090 return r(self.func)(*r(self.args), **r(self.kwargs))
1092 def __repr__(self):
1093 cn = self.__class__.__name__
1094 return '%s(%s, args=%r, kwargs=%r)' % (cn, bbrepr(self.func), self.args, self.kwargs)
1097def _is_spec(obj, strict=False):
1098 # a little util for codifying the spec type checking in glom
1099 if isinstance(obj, TType):
1100 return True
1101 if strict:
1102 return type(obj) is Spec
1104 return _has_callable_glomit(obj) # pragma: no cover
1107class Invoke(object):
1108 """Specifier type designed for easy invocation of callables from glom.
1110 Args:
1111 func (callable): A function or other callable object.
1113 ``Invoke`` is similar to :func:`functools.partial`, but with the
1114 ability to set up a "templated" call which interleaves constants and
1115 glom specs.
1117 For example, the following creates a spec which can be used to
1118 check if targets are integers:
1120 >>> is_int = Invoke(isinstance).specs(T).constants(int)
1121 >>> glom(5, is_int)
1122 True
1124 And this composes like any other glom spec:
1126 >>> target = [7, object(), 9]
1127 >>> glom(target, [is_int])
1128 [True, False, True]
1130 Another example, mixing positional and keyword arguments:
1132 >>> spec = Invoke(sorted).specs(T).constants(key=int, reverse=True)
1133 >>> target = ['10', '5', '20', '1']
1134 >>> glom(target, spec)
1135 ['20', '10', '5', '1']
1137 Invoke also helps with evaluating zero-argument functions:
1139 >>> glom(target={}, spec=Invoke(int))
1140 0
1142 (A trivial example, but from timestamps to UUIDs, zero-arg calls do come up!)
1144 .. note::
1146 ``Invoke`` is mostly for functions, object construction, and callable
1147 objects. For calling methods, consider the :attr:`~glom.T` object.
1149 """
1150 def __init__(self, func):
1151 if not callable(func) and not _is_spec(func, strict=True):
1152 raise TypeError('expected func to be a callable or Spec instance,'
1153 ' not: %r' % (func,))
1154 self.func = func
1155 self._args = ()
1156 # a registry of every known kwarg to its freshest value as set
1157 # by the methods below. the **kw dict is used as a unique marker.
1158 self._cur_kwargs = {}
1160 @classmethod
1161 def specfunc(cls, spec):
1162 """Creates an :class:`Invoke` instance where the function is
1163 indicated by a spec.
1165 >>> spec = Invoke.specfunc('func').constants(5)
1166 >>> glom({'func': range}, (spec, list))
1167 [0, 1, 2, 3, 4]
1169 """
1170 return cls(Spec(spec))
1172 def constants(self, *a, **kw):
1173 """Returns a new :class:`Invoke` spec, with the provided positional
1174 and keyword argument values stored for passing to the
1175 underlying function.
1177 >>> spec = Invoke(T).constants(5)
1178 >>> glom(range, (spec, list))
1179 [0, 1, 2, 3, 4]
1181 Subsequent positional arguments are appended:
1183 >>> spec = Invoke(T).constants(2).constants(10, 2)
1184 >>> glom(range, (spec, list))
1185 [2, 4, 6, 8]
1187 Keyword arguments also work as one might expect:
1189 >>> round_2 = Invoke(round).constants(ndigits=2).specs(T)
1190 >>> glom(3.14159, round_2)
1191 3.14
1193 :meth:`~Invoke.constants()` and other :class:`Invoke`
1194 methods may be called multiple times, just remember that every
1195 call returns a new spec.
1196 """
1197 ret = self.__class__(self.func)
1198 ret._args = self._args + ('C', a, kw)
1199 ret._cur_kwargs = dict(self._cur_kwargs)
1200 ret._cur_kwargs.update({k: kw for k, _ in kw.items()})
1201 return ret
1203 def specs(self, *a, **kw):
1204 """Returns a new :class:`Invoke` spec, with the provided positional
1205 and keyword arguments stored to be interpreted as specs, with
1206 the results passed to the underlying function.
1208 >>> spec = Invoke(range).specs('value')
1209 >>> glom({'value': 5}, (spec, list))
1210 [0, 1, 2, 3, 4]
1212 Subsequent positional arguments are appended:
1214 >>> spec = Invoke(range).specs('start').specs('end', 'step')
1215 >>> target = {'start': 2, 'end': 10, 'step': 2}
1216 >>> glom(target, (spec, list))
1217 [2, 4, 6, 8]
1219 Keyword arguments also work as one might expect:
1221 >>> multiply = lambda x, y: x * y
1222 >>> times_3 = Invoke(multiply).constants(y=3).specs(x='value')
1223 >>> glom({'value': 5}, times_3)
1224 15
1226 :meth:`~Invoke.specs()` and other :class:`Invoke`
1227 methods may be called multiple times, just remember that every
1228 call returns a new spec.
1230 """
1231 ret = self.__class__(self.func)
1232 ret._args = self._args + ('S', a, kw)
1233 ret._cur_kwargs = dict(self._cur_kwargs)
1234 ret._cur_kwargs.update({k: kw for k, _ in kw.items()})
1235 return ret
1237 def star(self, args=None, kwargs=None):
1238 """Returns a new :class:`Invoke` spec, with *args* and/or *kwargs*
1239 specs set to be "starred" or "star-starred" (respectively)
1241 >>> spec = Invoke(zip).star(args='lists')
1242 >>> target = {'lists': [[1, 2], [3, 4], [5, 6]]}
1243 >>> list(glom(target, spec))
1244 [(1, 3, 5), (2, 4, 6)]
1246 Args:
1247 args (spec): A spec to be evaluated and "starred" into the
1248 underlying function.
1249 kwargs (spec): A spec to be evaluated and "star-starred" into
1250 the underlying function.
1252 One or both of the above arguments should be set.
1254 The :meth:`~Invoke.star()`, like other :class:`Invoke`
1255 methods, may be called multiple times. The *args* and *kwargs*
1256 will be stacked in the order in which they are provided.
1257 """
1258 if args is None and kwargs is None:
1259 raise TypeError('expected one or both of args/kwargs to be passed')
1260 ret = self.__class__(self.func)
1261 ret._args = self._args + ('*', args, kwargs)
1262 ret._cur_kwargs = dict(self._cur_kwargs)
1263 return ret
1265 def __repr__(self):
1266 base_fname = self.__class__.__name__
1267 fname_map = {'C': 'constants', 'S': 'specs', '*': 'star'}
1268 if type(self.func) is Spec:
1269 base_fname += '.specfunc'
1270 args = (self.func.spec,)
1271 else:
1272 args = (self.func,)
1273 chunks = [format_invocation(base_fname, args, repr=bbrepr)]
1275 for i in range(len(self._args) // 3):
1276 op, args, _kwargs = self._args[i * 3: i * 3 + 3]
1277 fname = fname_map[op]
1278 if op in ('C', 'S'):
1279 kwargs = [(k, v) for k, v in _kwargs.items()
1280 if self._cur_kwargs[k] is _kwargs]
1281 else:
1282 kwargs = {}
1283 if args:
1284 kwargs['args'] = args
1285 if _kwargs:
1286 kwargs['kwargs'] = _kwargs
1287 args = ()
1289 chunks.append('.' + format_invocation(fname, args, kwargs, repr=bbrepr))
1291 return ''.join(chunks)
1293 def glomit(self, target, scope):
1294 all_args = []
1295 all_kwargs = {}
1297 recurse = lambda spec: scope[glom](target, spec, scope)
1298 func = recurse(self.func) if _is_spec(self.func, strict=True) else self.func
1300 for i in range(len(self._args) // 3):
1301 op, args, kwargs = self._args[i * 3: i * 3 + 3]
1302 if op == 'C':
1303 all_args.extend(args)
1304 all_kwargs.update({k: v for k, v in kwargs.items()
1305 if self._cur_kwargs[k] is kwargs})
1306 elif op == 'S':
1307 all_args.extend([recurse(arg) for arg in args])
1308 all_kwargs.update({k: recurse(v) for k, v in kwargs.items()
1309 if self._cur_kwargs[k] is kwargs})
1310 elif op == '*':
1311 if args is not None:
1312 all_args.extend(recurse(args))
1313 if kwargs is not None:
1314 all_kwargs.update(recurse(kwargs))
1316 return func(*all_args, **all_kwargs)
1319class Ref(object):
1320 """Name a part of a spec and refer to it elsewhere in the same spec,
1321 useful for trees and other self-similar data structures.
1323 Args:
1324 name (str): The name of the spec to reference.
1325 subspec: Pass a spec to name it *name*, or leave unset to refer
1326 to an already-named spec.
1327 """
1328 def __init__(self, name, subspec=_MISSING):
1329 self.name, self.subspec = name, subspec
1331 def glomit(self, target, scope):
1332 subspec = self.subspec
1333 scope_key = (Ref, self.name)
1334 if subspec is _MISSING:
1335 subspec = scope[scope_key]
1336 else:
1337 scope[scope_key] = subspec
1338 return scope[glom](target, subspec, scope)
1340 def __repr__(self):
1341 if self.subspec is _MISSING:
1342 args = bbrepr(self.name)
1343 else:
1344 args = bbrepr((self.name, self.subspec))[1:-1]
1345 return "Ref(" + args + ")"
1348class TType(object):
1349 """``T``, short for "target". A singleton object that enables
1350 object-oriented expression of a glom specification.
1352 .. note::
1354 ``T`` is a singleton, and does not need to be constructed.
1356 Basically, think of ``T`` as your data's stunt double. Everything
1357 that you do to ``T`` will be recorded and executed during the
1358 :func:`glom` call. Take this example:
1360 >>> spec = T['a']['b']['c']
1361 >>> target = {'a': {'b': {'c': 'd'}}}
1362 >>> glom(target, spec)
1363 'd'
1365 So far, we've relied on the ``'a.b.c'``-style shorthand for
1366 access, or used the :class:`~glom.Path` objects, but if you want
1367 to explicitly do attribute and key lookups, look no further than
1368 ``T``.
1370 But T doesn't stop with unambiguous access. You can also call
1371 methods and perform almost any action you would with a normal
1372 object:
1374 >>> spec = ('a', (T['b'].items(), list)) # reviewed below
1375 >>> glom(target, spec)
1376 [('c', 'd')]
1378 A ``T`` object can go anywhere in the spec. As seen in the example
1379 above, we access ``'a'``, use a ``T`` to get ``'b'`` and iterate
1380 over its ``items``, turning them into a ``list``.
1382 You can even use ``T`` with :class:`~glom.Call` to construct objects:
1384 >>> class ExampleClass(object):
1385 ... def __init__(self, attr):
1386 ... self.attr = attr
1387 ...
1388 >>> target = {'attr': 3.14}
1389 >>> glom(target, Call(ExampleClass, kwargs=T)).attr
1390 3.14
1392 On a further note, while ``lambda`` works great in glom specs, and
1393 can be very handy at times, ``T`` and :class:`~glom.Call`
1394 eliminate the need for the vast majority of ``lambda`` usage with
1395 glom.
1397 Unlike ``lambda`` and other functions, ``T`` roundtrips
1398 beautifully and transparently:
1400 >>> T['a'].b['c']('success')
1401 T['a'].b['c']('success')
1403 ``T``-related access errors raise a :exc:`~glom.PathAccessError`
1404 during the :func:`~glom.glom` call.
1406 .. note::
1408 While ``T`` is clearly useful, powerful, and here to stay, its
1409 semantics are still being refined. Currently, operations beyond
1410 method calls and attribute/item access are considered
1411 experimental and should not be relied upon.
1413 .. note::
1415 ``T`` attributes starting with __ are reserved to avoid
1416 colliding with many built-in Python behaviors, current and
1417 future. The ``T.__()`` method is available for cases where
1418 they are needed. For example, ``T.__('class__')`` is
1419 equivalent to accessing the ``__class__`` attribute.
1421 """
1422 __slots__ = ('__ops__',)
1424 def __getattr__(self, name):
1425 if name.startswith('__'):
1426 raise AttributeError('T instances reserve dunder attributes.'
1427 ' To access the "{name}" attribute, use'
1428 ' T.__("{d_name}")'.format(name=name, d_name=name[2:]))
1429 return _t_child(self, '.', name)
1431 def __getitem__(self, item):
1432 return _t_child(self, '[', item)
1434 def __call__(self, *args, **kwargs):
1435 if self is S:
1436 if args:
1437 raise TypeError('S() takes no positional arguments, got: %r' % (args,))
1438 if not kwargs:
1439 raise TypeError('S() expected at least one kwarg, got none')
1440 # TODO: typecheck kwarg vals?
1441 return _t_child(self, '(', (args, kwargs))
1443 def __star__(self):
1444 return _t_child(self, 'x', None)
1446 def __starstar__(self):
1447 return _t_child(self, 'X', None)
1449 def __stars__(self):
1450 """how many times the result will be wrapped in extra lists"""
1451 t_ops = self.__ops__[1::2]
1452 return t_ops.count('x') + t_ops.count('X')
1454 def __add__(self, arg):
1455 return _t_child(self, '+', arg)
1457 def __sub__(self, arg):
1458 return _t_child(self, '-', arg)
1460 def __mul__(self, arg):
1461 return _t_child(self, '*', arg)
1463 def __floordiv__(self, arg):
1464 return _t_child(self, '#', arg)
1466 def __truediv__(self, arg):
1467 return _t_child(self, '/', arg)
1469 __div__ = __truediv__
1471 def __mod__(self, arg):
1472 return _t_child(self, '%', arg)
1474 def __pow__(self, arg):
1475 return _t_child(self, ':', arg)
1477 def __and__(self, arg):
1478 return _t_child(self, '&', arg)
1480 def __or__(self, arg):
1481 return _t_child(self, '|', arg)
1483 def __xor__(self, arg):
1484 return _t_child(self, '^', arg)
1486 def __invert__(self):
1487 return _t_child(self, '~', None)
1489 def __neg__(self):
1490 return _t_child(self, '_', None)
1492 def __(self, name):
1493 return _t_child(self, '.', '__' + name)
1495 def __repr__(self):
1496 t_path = self.__ops__
1497 return _format_t(t_path[1:], t_path[0])
1499 def __getstate__(self):
1500 t_path = self.__ops__
1501 return tuple(({T: 'T', S: 'S', A: 'A'}[t_path[0]],) + t_path[1:])
1503 def __setstate__(self, state):
1504 self.__ops__ = ({'T': T, 'S': S, 'A': A}[state[0]],) + state[1:]
1507def _t_child(parent, operation, arg):
1508 base = parent.__ops__
1509 if base[0] is A and operation not in ('.', '[', 'P'):
1510 # whitelist rather than blacklist assignment friendly operations
1511 # TODO: error type?
1512 raise BadSpec("operation not allowed on A assignment path")
1513 t = TType()
1514 t.__ops__ = base + (operation, arg)
1515 return t
1518def _s_first_magic(scope, key, _t):
1519 """
1520 enable S.a to do S['a'] or S['a'].val as a special
1521 case for accessing user defined string variables
1522 """
1523 err = None
1524 try:
1525 cur = scope[key]
1526 except KeyError as e:
1527 err = PathAccessError(e, Path(_t), 0) # always only one level depth, hence 0
1528 if err:
1529 raise err
1530 return cur
1533def _t_eval(target, _t, scope):
1534 t_path = _t.__ops__
1535 i = 1
1536 fetch_till = len(t_path)
1537 root = t_path[0]
1538 if root is T:
1539 cur = target
1540 elif root is S or root is A:
1541 # A is basically the same as S, but last step is assign
1542 if root is A:
1543 fetch_till -= 2
1544 if fetch_till < 1:
1545 raise BadSpec('cannot assign without destination')
1546 cur = scope
1547 if fetch_till > 1 and t_path[1] in ('.', 'P'):
1548 cur = _s_first_magic(cur, t_path[2], _t)
1549 i += 2
1550 elif root is S and fetch_till > 1 and t_path[1] == '(':
1551 # S(var='spec') style assignment
1552 _, kwargs = t_path[2]
1553 scope.update({
1554 k: arg_val(target, v, scope) for k, v in kwargs.items()})
1555 return target
1557 else:
1558 raise ValueError('TType instance with invalid root') # pragma: no cover
1559 pae = None
1560 while i < fetch_till:
1561 op, arg = t_path[i], t_path[i + 1]
1562 arg = arg_val(target, arg, scope)
1563 if op == '.':
1564 try:
1565 cur = getattr(cur, arg)
1566 except AttributeError as e:
1567 pae = PathAccessError(e, Path(_t), i // 2)
1568 elif op == '[':
1569 try:
1570 cur = cur[arg]
1571 except (KeyError, IndexError, TypeError) as e:
1572 pae = PathAccessError(e, Path(_t), i // 2)
1573 elif op == 'P':
1574 # Path type stuff (fuzzy match)
1575 get = scope[TargetRegistry].get_handler('get', cur, path=t_path[2:i+2:2])
1576 try:
1577 cur = get(cur, arg)
1578 except Exception as e:
1579 pae = PathAccessError(e, Path(_t), i // 2)
1580 elif op in 'xX':
1581 nxt = []
1582 get_handler = scope[TargetRegistry].get_handler
1583 if op == 'x': # increases arity of cur each time through
1584 # TODO: so many try/except -- could scope[TargetRegistry] stuff be cached on type?
1585 _extend_children(nxt, cur, get_handler)
1586 elif op == 'X':
1587 sofar = set()
1588 _extend_children(nxt, cur, get_handler)
1589 for item in nxt:
1590 if id(item) not in sofar:
1591 sofar.add(id(item))
1592 _extend_children(nxt, item, get_handler)
1593 nxt.insert(0, cur)
1594 # handle the rest of the t_path in recursive calls
1595 cur = []
1596 todo = TType()
1597 todo.__ops__ = (root,) + t_path[i+2:]
1598 for child in nxt:
1599 try:
1600 cur.append(_t_eval(child, todo, scope))
1601 except PathAccessError:
1602 pass
1603 break # we handled the rest in recursive call, break loop
1604 elif op == '(':
1605 args, kwargs = arg
1606 scope[Path] += t_path[2:i+2:2]
1607 cur = scope[glom](
1608 target, Call(cur, args, kwargs), scope)
1609 # call with target rather than cur,
1610 # because it is probably more intuitive
1611 # if args to the call "reset" their path
1612 # e.g. "T.a" should mean the same thing
1613 # in both of these specs: T.a and T.b(T.a)
1614 else: # arithmetic operators
1615 try:
1616 if op == '+':
1617 cur = cur + arg
1618 elif op == '-':
1619 cur = cur - arg
1620 elif op == '*':
1621 cur = cur * arg
1622 #elif op == '#':
1623 # cur = cur // arg # TODO: python 2 friendly approach?
1624 elif op == '/':
1625 cur = cur / arg
1626 elif op == '%':
1627 cur = cur % arg
1628 elif op == ':':
1629 cur = cur ** arg
1630 elif op == '&':
1631 cur = cur & arg
1632 elif op == '|':
1633 cur = cur | arg
1634 elif op == '^':
1635 cur = cur ^ arg
1636 elif op == '~':
1637 cur = ~cur
1638 elif op == '_':
1639 cur = -cur
1640 except (TypeError, ZeroDivisionError) as e:
1641 pae = PathAccessError(e, Path(_t), i // 2)
1642 if pae:
1643 raise pae
1644 i += 2
1645 if root is A:
1646 op, arg = t_path[-2:]
1647 if cur is scope:
1648 op = '[' # all assignment on scope is setitem
1649 _assign_op(dest=cur, op=op, arg=arg, val=target, path=_t, scope=scope)
1650 return target # A should not change the target
1651 return cur
1654def _assign_op(dest, op, arg, val, path, scope):
1655 """helper method for doing the assignment on a T operation"""
1656 if op == '[':
1657 dest[arg] = val
1658 elif op == '.':
1659 setattr(dest, arg, val)
1660 elif op == 'P':
1661 _assign = scope[TargetRegistry].get_handler('assign', dest)
1662 try:
1663 _assign(dest, arg, val)
1664 except Exception as e:
1665 raise PathAssignError(e, path, arg)
1666 else: # pragma: no cover
1667 raise ValueError('unsupported T operation for assignment')
1670def _extend_children(children, item, get_handler):
1671 try: # dict or obj-like
1672 keys = get_handler('keys', item)
1673 get = get_handler('get', item)
1674 except UnregisteredTarget:
1675 try:
1676 iterate = get_handler('iterate', item)
1677 except UnregisteredTarget:
1678 pass
1679 else:
1680 try: # list-like
1681 children.extend(iterate(item))
1682 except Exception:
1683 pass
1684 else:
1685 try:
1686 for key in keys(item):
1687 try:
1688 children.append(get(item, key))
1689 except Exception:
1690 pass
1691 except Exception:
1692 pass
1695T = TType() # target aka Mr. T aka "this"
1696S = TType() # like T, but means grab stuff from Scope, not Target
1697A = TType() # like S, but shorthand to assign target to scope
1699T.__ops__ = (T,)
1700S.__ops__ = (S,)
1701A.__ops__ = (A,)
1703_T_STAR = T.__star__() # helper constant for Path.from_text
1704_T_STARSTAR = T.__starstar__() # helper constant for Path.from_text
1706UP = make_sentinel('UP')
1707ROOT = make_sentinel('ROOT')
1710def _format_slice(x):
1711 if type(x) is not slice:
1712 return bbrepr(x)
1713 fmt = lambda v: "" if v is None else bbrepr(v)
1714 if x.step is None:
1715 return fmt(x.start) + ":" + fmt(x.stop)
1716 return fmt(x.start) + ":" + fmt(x.stop) + ":" + fmt(x.step)
1719def _format_t(path, root=T):
1720 prepr = [{T: 'T', S: 'S', A: 'A'}[root]]
1721 i = 0
1722 while i < len(path):
1723 op, arg = path[i], path[i + 1]
1724 if op == '.':
1725 prepr.append('.' + arg)
1726 elif op == '[':
1727 if type(arg) is tuple:
1728 index = ", ".join([_format_slice(x) for x in arg])
1729 else:
1730 index = _format_slice(arg)
1731 prepr.append("[%s]" % (index,))
1732 elif op == '(':
1733 args, kwargs = arg
1734 prepr.append(format_invocation(args=args, kwargs=kwargs, repr=bbrepr))
1735 elif op == 'P':
1736 return _format_path(path)
1737 elif op == 'x':
1738 prepr.append(".__star__()")
1739 elif op == 'X':
1740 prepr.append(".__starstar__()")
1741 elif op in ('_', '~'): # unary arithmetic operators
1742 if any([o in path[:i] for o in '+-/%:&|^~_']):
1743 prepr = ['('] + prepr + [')']
1744 prepr = ['-' if op == '_' else op] + prepr
1745 else: # binary arithmetic operators
1746 formatted_arg = bbrepr(arg)
1747 if type(arg) is TType:
1748 arg_path = arg.__ops__
1749 if any([o in arg_path for o in '+-/%:&|^~_']):
1750 formatted_arg = '(' + formatted_arg + ')'
1751 prepr.append(' ' + ('**' if op == ':' else op) + ' ')
1752 prepr.append(formatted_arg)
1753 i += 2
1754 return "".join(prepr)
1757class Val(object):
1758 """Val objects are specs which evaluate to the wrapped *value*.
1760 >>> target = {'a': {'b': 'c'}}
1761 >>> spec = {'a': 'a.b', 'readability': Val('counts')}
1762 >>> pprint(glom(target, spec))
1763 {'a': 'c', 'readability': 'counts'}
1765 Instead of accessing ``'counts'`` as a key like it did with
1766 ``'a.b'``, :func:`~glom.glom` just unwrapped the Val and
1767 included the value.
1769 :class:`~glom.Val` takes one argument, the value to be returned.
1771 .. note::
1773 :class:`Val` was named ``Literal`` in versions of glom before
1774 20.7.0. An alias has been preserved for backwards
1775 compatibility, but reprs have changed.
1777 """
1778 def __init__(self, value):
1779 self.value = value
1781 def glomit(self, target, scope):
1782 return self.value
1784 def __repr__(self):
1785 cn = self.__class__.__name__
1786 return '%s(%s)' % (cn, bbrepr(self.value))
1789Literal = Val # backwards compat for pre-20.7.0
1792class ScopeVars(object):
1793 """This is the runtime partner of :class:`Vars` -- this is what
1794 actually lives in the scope and stores runtime values.
1796 While not part of the importable API of glom, it's half expected
1797 that some folks may write sepcs to populate and export scopes, at
1798 which point this type makes it easy to access values by attribute
1799 access or by converting to a dict.
1801 """
1802 def __init__(self, base, defaults):
1803 self.__dict__ = dict(base)
1804 self.__dict__.update(defaults)
1806 def __iter__(self):
1807 return iter(self.__dict__.items())
1809 def __repr__(self):
1810 return "%s(%s)" % (self.__class__.__name__, bbrepr(self.__dict__))
1813class Vars(object):
1814 """
1815 :class:`Vars` is a helper that can be used with **S** in order to
1816 store shared mutable state.
1818 Takes the same arguments as :class:`dict()`.
1820 Arguments here should be thought of the same way as default arguments
1821 to a function. Each time the spec is evaluated, the same arguments
1822 will be referenced; so, think carefully about mutable data structures.
1823 """
1824 def __init__(self, base=(), **kw):
1825 dict(base) # ensure it is a dict-compatible first arg
1826 self.base = base
1827 self.defaults = kw
1829 def glomit(self, target, spec):
1830 return ScopeVars(self.base, self.defaults)
1832 def __repr__(self):
1833 ret = format_invocation(self.__class__.__name__,
1834 args=(self.base,) if self.base else (),
1835 kwargs=self.defaults,
1836 repr=bbrepr)
1837 return ret
1840class Let(object):
1841 """
1842 Deprecated, kept for backwards compat. Use S(x='y') instead.
1844 >>> target = {'data': {'val': 9}}
1845 >>> spec = (Let(value=T['data']['val']), {'val': S['value']})
1846 >>> glom(target, spec)
1847 {'val': 9}
1849 """
1850 def __init__(self, **kw):
1851 if not kw:
1852 raise TypeError('expected at least one keyword argument')
1853 self._binding = kw
1855 def glomit(self, target, scope):
1856 scope.update({
1857 k: scope[glom](target, v, scope) for k, v in self._binding.items()})
1858 return target
1860 def __repr__(self):
1861 cn = self.__class__.__name__
1862 return format_invocation(cn, kwargs=self._binding, repr=bbrepr)
1865class Auto(object):
1866 """
1867 Switch to Auto mode (the default)
1869 TODO: this seems like it should be a sub-class of class Spec() --
1870 if Spec() could help define the interface for new "modes" or dialects
1871 that would also help make match mode feel less duct-taped on
1872 """
1873 def __init__(self, spec=None):
1874 self.spec = spec
1876 def glomit(self, target, scope):
1877 scope[MODE] = AUTO
1878 return scope[glom](target, self.spec, scope)
1880 def __repr__(self):
1881 cn = self.__class__.__name__
1882 rpr = '' if self.spec is None else bbrepr(self.spec)
1883 return '%s(%s)' % (cn, rpr)
1886class _AbstractIterable(_AbstractIterableBase):
1887 __metaclass__ = ABCMeta
1888 @classmethod
1889 def __subclasshook__(cls, C):
1890 if C in (str, bytes):
1891 return False
1892 return callable(getattr(C, "__iter__", None))
1895class _ObjStyleKeysMeta(type):
1896 def __instancecheck__(cls, C):
1897 return hasattr(C, "__dict__") and hasattr(C.__dict__, "keys")
1900class _ObjStyleKeys(_ObjStyleKeysMeta('_AbstractKeys', (object,), {})):
1901 __metaclass__ = _ObjStyleKeysMeta
1903 @staticmethod
1904 def get_keys(obj):
1905 ret = obj.__dict__.keys()
1906 return ret
1909def _get_sequence_item(target, index):
1910 return target[int(index)]
1913# handlers are 3-arg callables, with args (spec, target, scope)
1914# spec is the first argument for convenience in the case
1915# that the handler is a method of the spec type
1916def _handle_dict(target, spec, scope):
1917 ret = type(spec)() # TODO: works for dict + ordereddict, but sufficient for all?
1918 for field, subspec in spec.items():
1919 val = scope[glom](target, subspec, scope)
1920 if val is SKIP:
1921 continue
1922 if type(field) in (Spec, TType):
1923 field = scope[glom](target, field, scope)
1924 ret[field] = val
1925 return ret
1928def _handle_list(target, spec, scope):
1929 subspec = spec[0]
1930 iterate = scope[TargetRegistry].get_handler('iterate', target, path=scope[Path])
1931 try:
1932 iterator = iterate(target)
1933 except Exception as e:
1934 raise TypeError('failed to iterate on instance of type %r at %r (got %r)'
1935 % (target.__class__.__name__, Path(*scope[Path]), e))
1936 ret = []
1937 base_path = scope[Path]
1938 for i, t in enumerate(iterator):
1939 scope[Path] = base_path + [i]
1940 val = scope[glom](t, subspec, scope)
1941 if val is SKIP:
1942 continue
1943 if val is STOP:
1944 break
1945 ret.append(val)
1946 return ret
1949def _handle_tuple(target, spec, scope):
1950 res = target
1951 for subspec in spec:
1952 scope = chain_child(scope)
1953 nxt = scope[glom](res, subspec, scope)
1954 if nxt is SKIP:
1955 continue
1956 if nxt is STOP:
1957 break
1958 res = nxt
1959 if not isinstance(subspec, list):
1960 scope[Path] += [getattr(subspec, '__name__', subspec)]
1961 return res
1964class Pipe(object):
1965 """Evaluate specs one after the other, passing the result of
1966 the previous evaluation in as the target of the next spec:
1968 >>> glom({'a': {'b': -5}}, Pipe('a', 'b', abs))
1969 5
1971 Same behavior as ``Auto(tuple(steps))``, but useful for explicit
1972 usage in other modes.
1973 """
1974 def __init__(self, *steps):
1975 self.steps = steps
1977 def glomit(self, target, scope):
1978 return _handle_tuple(target, self.steps, scope)
1980 def __repr__(self):
1981 return self.__class__.__name__ + bbrepr(self.steps)
1984class TargetRegistry(object):
1985 '''
1986 responsible for registration of target types for iteration
1987 and attribute walking
1988 '''
1989 def __init__(self, register_default_types=True):
1990 self._op_type_map = {}
1991 self._op_type_tree = {} # see _register_fuzzy_type for details
1992 self._type_cache = {}
1994 self._op_auto_map = OrderedDict() # op name to function that returns handler function
1996 self._register_builtin_ops()
1998 if register_default_types:
1999 self._register_default_types()
2000 return
2002 def get_handler(self, op, obj, path=None, raise_exc=True):
2003 """for an operation and object **instance**, obj, return the
2004 closest-matching handler function, raising UnregisteredTarget
2005 if no handler can be found for *obj* (or False if
2006 raise_exc=False)
2008 """
2009 ret = False
2010 obj_type = type(obj)
2011 cache_key = (obj_type, op)
2012 if cache_key not in self._type_cache:
2013 type_map = self.get_type_map(op)
2014 if type_map:
2015 try:
2016 ret = type_map[obj_type]
2017 except KeyError:
2018 type_tree = self._op_type_tree.get(op, {})
2019 closest = self._get_closest_type(obj, type_tree=type_tree)
2020 if closest is None:
2021 ret = False
2022 else:
2023 ret = type_map[closest]
2025 if ret is False and raise_exc:
2026 raise UnregisteredTarget(op, obj_type, type_map=type_map, path=path)
2028 self._type_cache[cache_key] = ret
2029 return self._type_cache[cache_key]
2031 def get_type_map(self, op):
2032 try:
2033 return self._op_type_map[op]
2034 except KeyError:
2035 return OrderedDict()
2037 def _get_closest_type(self, obj, type_tree):
2038 default = None
2039 for cur_type, sub_tree in type_tree.items():
2040 if isinstance(obj, cur_type):
2041 sub_type = self._get_closest_type(obj, type_tree=sub_tree)
2042 ret = cur_type if sub_type is None else sub_type
2043 return ret
2044 return default
2046 def _register_default_types(self):
2047 self.register(object)
2048 self.register(dict, get=operator.getitem)
2049 self.register(dict, keys=dict.keys)
2050 self.register(list, get=_get_sequence_item)
2051 self.register(tuple, get=_get_sequence_item)
2052 self.register(OrderedDict, get=operator.getitem)
2053 self.register(OrderedDict, keys=OrderedDict.keys)
2054 self.register(_AbstractIterable, iterate=iter)
2055 self.register(_ObjStyleKeys, keys=_ObjStyleKeys.get_keys)
2057 def _register_fuzzy_type(self, op, new_type, _type_tree=None):
2058 """Build a "type tree", an OrderedDict mapping registered types to
2059 their subtypes
2061 The type tree's invariant is that a key in the mapping is a
2062 valid parent type of all its children.
2064 Order is preserved such that non-overlapping parts of the
2065 subtree take precedence by which was most recently added.
2066 """
2067 if _type_tree is None:
2068 try:
2069 _type_tree = self._op_type_tree[op]
2070 except KeyError:
2071 _type_tree = self._op_type_tree[op] = OrderedDict()
2073 registered = False
2074 for cur_type, sub_tree in list(_type_tree.items()):
2075 if issubclass(cur_type, new_type):
2076 sub_tree = _type_tree.pop(cur_type) # mutation for recursion brevity
2077 try:
2078 _type_tree[new_type][cur_type] = sub_tree
2079 except KeyError:
2080 _type_tree[new_type] = OrderedDict({cur_type: sub_tree})
2081 registered = True
2082 elif issubclass(new_type, cur_type):
2083 _type_tree[cur_type] = self._register_fuzzy_type(op, new_type, _type_tree=sub_tree)
2084 registered = True
2085 if not registered:
2086 _type_tree[new_type] = OrderedDict()
2087 return _type_tree
2089 def register(self, target_type, **kwargs):
2090 if not isinstance(target_type, type):
2091 raise TypeError('register expected a type, not an instance: %r' % (target_type,))
2092 exact = kwargs.pop('exact', None)
2093 new_op_map = dict(kwargs)
2095 for op_name in sorted(set(self._op_auto_map.keys()) | set(new_op_map.keys())):
2096 cur_type_map = self._op_type_map.setdefault(op_name, OrderedDict())
2098 if op_name in new_op_map:
2099 handler = new_op_map[op_name]
2100 elif target_type in cur_type_map:
2101 handler = cur_type_map[target_type]
2102 else:
2103 try:
2104 handler = self._op_auto_map[op_name](target_type)
2105 except Exception as e:
2106 raise TypeError('error while determining support for operation'
2107 ' "%s" on target type: %s (got %r)'
2108 % (op_name, target_type.__name__, e))
2109 if handler is not False and not callable(handler):
2110 raise TypeError('expected handler for op "%s" to be'
2111 ' callable or False, not: %r' % (op_name, handler))
2112 new_op_map[op_name] = handler
2114 for op_name, handler in new_op_map.items():
2115 self._op_type_map[op_name][target_type] = handler
2117 if not exact:
2118 for op_name in new_op_map:
2119 self._register_fuzzy_type(op_name, target_type)
2121 self._type_cache = {} # reset type cache
2123 return
2125 def register_op(self, op_name, auto_func=None, exact=False):
2126 """add operations beyond the builtins ('get' and 'iterate' at the time
2127 of writing).
2129 auto_func is a function that when passed a type, returns a
2130 handler associated with op_name if it's supported, or False if
2131 it's not.
2133 See glom.core.register_op() for the global version used by
2134 extensions.
2135 """
2136 if not isinstance(op_name, basestring):
2137 raise TypeError('expected op_name to be a text name, not: %r' % (op_name,))
2138 if auto_func is None:
2139 auto_func = lambda t: False
2140 elif not callable(auto_func):
2141 raise TypeError('expected auto_func to be callable, not: %r' % (auto_func,))
2143 # determine support for any previously known types
2144 known_types = set(sum([list(m.keys()) for m
2145 in self._op_type_map.values()], []))
2146 type_map = self._op_type_map.get(op_name, OrderedDict())
2147 type_tree = self._op_type_tree.get(op_name, OrderedDict())
2148 for t in sorted(known_types, key=lambda t: t.__name__):
2149 if t in type_map:
2150 continue
2151 try:
2152 handler = auto_func(t)
2153 except Exception as e:
2154 raise TypeError('error while determining support for operation'
2155 ' "%s" on target type: %s (got %r)'
2156 % (op_name, t.__name__, e))
2157 if handler is not False and not callable(handler):
2158 raise TypeError('expected handler for op "%s" to be'
2159 ' callable or False, not: %r' % (op_name, handler))
2160 type_map[t] = handler
2162 if not exact:
2163 for t in known_types:
2164 self._register_fuzzy_type(op_name, t, _type_tree=type_tree)
2166 self._op_type_map[op_name] = type_map
2167 self._op_type_tree[op_name] = type_tree
2168 self._op_auto_map[op_name] = auto_func
2170 def _register_builtin_ops(self):
2171 def _get_iterable_handler(type_obj):
2172 return iter if callable(getattr(type_obj, '__iter__', None)) else False
2174 self.register_op('iterate', _get_iterable_handler)
2175 self.register_op('get', lambda _: getattr)
2178_DEFAULT_SCOPE = ChainMap({})
2181def glom(target, spec, **kwargs):
2182 """Access or construct a value from a given *target* based on the
2183 specification declared by *spec*.
2185 Accessing nested data, aka deep-get:
2187 >>> target = {'a': {'b': 'c'}}
2188 >>> glom(target, 'a.b')
2189 'c'
2191 Here the *spec* was just a string denoting a path,
2192 ``'a.b.``. As simple as it should be. You can also use
2193 :mod:`glob`-like wildcard selectors:
2195 >>> target = {'a': [{'k': 'v1'}, {'k': 'v2'}]}
2196 >>> glom(target, 'a.*.k')
2197 ['v1', 'v2']
2199 In addition to ``*``, you can also use ``**`` for recursive access:
2201 >>> target = {'a': [{'k': 'v3'}, {'k': 'v4'}], 'k': 'v0'}
2202 >>> glom(target, '**.k')
2203 ['v0', 'v3', 'v4']
2205 The next example shows how to use nested data to
2206 access many fields at once, and make a new nested structure.
2208 Constructing, or restructuring more-complicated nested data:
2210 >>> target = {'a': {'b': 'c', 'd': 'e'}, 'f': 'g', 'h': [0, 1, 2]}
2211 >>> spec = {'a': 'a.b', 'd': 'a.d', 'h': ('h', [lambda x: x * 2])}
2212 >>> output = glom(target, spec)
2213 >>> pprint(output)
2214 {'a': 'c', 'd': 'e', 'h': [0, 2, 4]}
2216 ``glom`` also takes a keyword-argument, *default*. When set,
2217 if a ``glom`` operation fails with a :exc:`GlomError`, the
2218 *default* will be returned, very much like
2219 :meth:`dict.get()`:
2221 >>> glom(target, 'a.xx', default='nada')
2222 'nada'
2224 The *skip_exc* keyword argument controls which errors should
2225 be ignored.
2227 >>> glom({}, lambda x: 100.0 / len(x), default=0.0, skip_exc=ZeroDivisionError)
2228 0.0
2230 Args:
2231 target (object): the object on which the glom will operate.
2232 spec (object): Specification of the output object in the form
2233 of a dict, list, tuple, string, other glom construct, or
2234 any composition of these.
2235 default (object): An optional default to return in the case
2236 an exception, specified by *skip_exc*, is raised.
2237 skip_exc (Exception): An optional exception or tuple of
2238 exceptions to ignore and return *default* (None if
2239 omitted). If *skip_exc* and *default* are both not set,
2240 glom raises errors through.
2241 scope (dict): Additional data that can be accessed
2242 via S inside the glom-spec. Read more: :ref:`scope`.
2244 It's a small API with big functionality, and glom's power is
2245 only surpassed by its intuitiveness. Give it a whirl!
2247 """
2248 # TODO: check spec up front
2249 default = kwargs.pop('default', None if 'skip_exc' in kwargs else _MISSING)
2250 skip_exc = kwargs.pop('skip_exc', () if default is _MISSING else GlomError)
2251 glom_debug = kwargs.pop('glom_debug', GLOM_DEBUG)
2252 scope = _DEFAULT_SCOPE.new_child({
2253 Path: kwargs.pop('path', []),
2254 Inspect: kwargs.pop('inspector', None),
2255 MODE: AUTO,
2256 MIN_MODE: None,
2257 CHILD_ERRORS: [],
2258 'globals': ScopeVars({}, {}),
2259 })
2260 scope[UP] = scope
2261 scope[ROOT] = scope
2262 scope[T] = target
2263 scope.update(kwargs.pop('scope', {}))
2264 err = None
2265 if kwargs:
2266 raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys()))
2267 try:
2268 try:
2269 ret = _glom(target, spec, scope)
2270 except skip_exc:
2271 if default is _MISSING:
2272 raise
2273 ret = default # should this also be arg_val'd?
2274 except Exception as e:
2275 if glom_debug:
2276 raise
2277 if isinstance(e, GlomError):
2278 # need to change id or else py3 seems to not let us truncate the
2279 # stack trace with the explicit "raise err" below
2280 err = copy.copy(e)
2281 err._set_wrapped(e)
2282 else:
2283 err = GlomError.wrap(e)
2284 if isinstance(err, GlomError):
2285 err._finalize(scope[LAST_CHILD_SCOPE])
2286 else: # wrapping failed, fall back to default behavior
2287 raise
2289 if err:
2290 raise err
2291 return ret
2294def chain_child(scope):
2295 """
2296 used for specs like Auto(tuple), Switch(), etc
2297 that want to chain their child scopes together
2299 returns a new scope that can be passed to
2300 the next recursive glom call, e.g.
2302 scope[glom](target, spec, chain_child(scope))
2303 """
2304 if LAST_CHILD_SCOPE not in scope.maps[0]:
2305 return scope # no children yet, nothing to do
2306 # NOTE: an option here is to drill down on LAST_CHILD_SCOPE;
2307 # this would have some interesting consequences for scoping
2308 # of tuples
2309 nxt_in_chain = scope[LAST_CHILD_SCOPE]
2310 nxt_in_chain.maps[0][NO_PYFRAME] = True
2311 # previous failed branches are forgiven as the
2312 # scope is re-wired into a new stack
2313 del nxt_in_chain.maps[0][CHILD_ERRORS][:]
2314 return nxt_in_chain
2317unbound_methods = set([type(str.__len__)]) #, type(Ref.glomit)])
2320def _has_callable_glomit(obj):
2321 glomit = getattr(obj, 'glomit', None)
2322 return callable(glomit) and not isinstance(obj, type)
2325def _glom(target, spec, scope):
2326 parent = scope
2327 pmap = parent.maps[0]
2328 scope = scope.new_child({
2329 T: target,
2330 Spec: spec,
2331 UP: parent,
2332 CHILD_ERRORS: [],
2333 MODE: pmap[MODE],
2334 MIN_MODE: pmap[MIN_MODE],
2335 })
2336 pmap[LAST_CHILD_SCOPE] = scope
2338 try:
2339 if type(spec) is TType: # must go first, due to callability
2340 scope[MIN_MODE] = None # None is tombstone
2341 return _t_eval(target, spec, scope)
2342 elif _has_callable_glomit(spec):
2343 scope[MIN_MODE] = None
2344 return spec.glomit(target, scope)
2346 return (scope.maps[0][MIN_MODE] or scope.maps[0][MODE])(target, spec, scope)
2347 except Exception as e:
2348 scope.maps[1][CHILD_ERRORS].append(scope)
2349 scope.maps[0][CUR_ERROR] = e
2350 if NO_PYFRAME in scope.maps[1]:
2351 cur_scope = scope[UP]
2352 while NO_PYFRAME in cur_scope.maps[0]:
2353 cur_scope.maps[1][CHILD_ERRORS].append(cur_scope)
2354 cur_scope.maps[0][CUR_ERROR] = e
2355 cur_scope = cur_scope[UP]
2356 raise
2359def AUTO(target, spec, scope):
2360 if type(spec) is str: # shortcut to make deep-get use case faster
2361 return _t_eval(target, Path.from_text(spec).path_t, scope)
2362 if isinstance(spec, dict):
2363 return _handle_dict(target, spec, scope)
2364 elif isinstance(spec, list):
2365 return _handle_list(target, spec, scope)
2366 elif isinstance(spec, tuple):
2367 return _handle_tuple(target, spec, scope)
2368 elif isinstance(spec, basestring):
2369 return Path.from_text(spec).glomit(target, scope)
2370 elif callable(spec):
2371 return spec(target)
2373 raise TypeError('expected spec to be dict, list, tuple, callable, string,'
2374 ' or other Spec-like type, not: %r' % (spec,))
2377_DEFAULT_SCOPE.update({
2378 glom: _glom,
2379 TargetRegistry: TargetRegistry(register_default_types=True),
2380})
2383def register(target_type, **kwargs):
2384 """Register *target_type* so :meth:`~Glommer.glom()` will
2385 know how to handle instances of that type as targets.
2387 Here's an example of adding basic iterabile support for Django's ORM:
2389 .. code-block:: python
2391 import glom
2392 import django.db.models
2394 glom.register(django.db.models.Manager, iterate=lambda m: m.all())
2395 glom.register(django.db.models.QuerySet, iterate=lambda qs: qs.all())
2399 Args:
2400 target_type (type): A type expected to appear in a glom()
2401 call target
2402 get (callable): A function which takes a target object and
2403 a name, acting as a default accessor. Defaults to
2404 :func:`getattr`.
2405 iterate (callable): A function which takes a target object
2406 and returns an iterator. Defaults to :func:`iter` if
2407 *target_type* appears to be iterable.
2408 exact (bool): Whether or not to match instances of subtypes
2409 of *target_type*.
2411 .. note::
2413 The module-level :func:`register()` function affects the
2414 module-level :func:`glom()` function's behavior. If this
2415 global effect is undesirable for your application, or
2416 you're implementing a library, consider instantiating a
2417 :class:`Glommer` instance, and using the
2418 :meth:`~Glommer.register()` and :meth:`Glommer.glom()`
2419 methods instead.
2421 """
2422 _DEFAULT_SCOPE[TargetRegistry].register(target_type, **kwargs)
2423 return
2426def register_op(op_name, **kwargs):
2427 """For extension authors needing to add operations beyond the builtin
2428 'get', 'iterate', 'keys', 'assign', and 'delete' to the default scope.
2429 See TargetRegistry for more details.
2430 """
2431 _DEFAULT_SCOPE[TargetRegistry].register_op(op_name, **kwargs)
2432 return
2435class Glommer(object):
2436 """The :class:`Glommer` type mostly serves to encapsulate type
2437 registration context so that advanced uses of glom don't need to
2438 worry about stepping on each other.
2440 Glommer objects are lightweight and, once instantiated, provide
2441 a :func:`glom()` method:
2443 >>> glommer = Glommer()
2444 >>> glommer.glom({}, 'a.b.c', default='d')
2445 'd'
2446 >>> Glommer().glom({'vals': list(range(3))}, ('vals', len))
2447 3
2449 Instances also provide :meth:`~Glommer.register()` method for
2450 localized control over type handling.
2452 Args:
2453 register_default_types (bool): Whether or not to enable the
2454 handling behaviors of the default :func:`glom()`. These
2455 default actions include dict access, list and iterable
2456 iteration, and generic object attribute access. Defaults to
2457 True.
2459 """
2460 def __init__(self, **kwargs):
2461 register_default_types = kwargs.pop('register_default_types', True)
2462 scope = kwargs.pop('scope', _DEFAULT_SCOPE)
2464 # this "freezes" the scope in at the time of construction
2465 self.scope = ChainMap(dict(scope))
2466 self.scope[TargetRegistry] = TargetRegistry(register_default_types=register_default_types)
2468 def register(self, target_type, **kwargs):
2469 """Register *target_type* so :meth:`~Glommer.glom()` will
2470 know how to handle instances of that type as targets.
2472 Args:
2473 target_type (type): A type expected to appear in a glom()
2474 call target
2475 get (callable): A function which takes a target object and
2476 a name, acting as a default accessor. Defaults to
2477 :func:`getattr`.
2478 iterate (callable): A function which takes a target object
2479 and returns an iterator. Defaults to :func:`iter` if
2480 *target_type* appears to be iterable.
2481 exact (bool): Whether or not to match instances of subtypes
2482 of *target_type*.
2484 .. note::
2486 The module-level :func:`register()` function affects the
2487 module-level :func:`glom()` function's behavior. If this
2488 global effect is undesirable for your application, or
2489 you're implementing a library, consider instantiating a
2490 :class:`Glommer` instance, and using the
2491 :meth:`~Glommer.register()` and :meth:`Glommer.glom()`
2492 methods instead.
2494 """
2495 exact = kwargs.pop('exact', False)
2496 self.scope[TargetRegistry].register(target_type, exact=exact, **kwargs)
2497 return
2499 def glom(self, target, spec, **kwargs):
2500 return glom(target, spec, scope=self.scope, **kwargs)
2503class Fill(object):
2504 """A specifier type which switches to glom into "fill-mode". For the
2505 spec contained within the Fill, glom will only interpret explicit
2506 specifier types (including T objects). Whereas the default mode
2507 has special interpretations for each of these builtins, fill-mode
2508 takes a lighter touch, making Fill great for "filling out" Python
2509 literals, like tuples, dicts, sets, and lists.
2511 >>> target = {'data': [0, 2, 4]}
2512 >>> spec = Fill((T['data'][2], T['data'][0]))
2513 >>> glom(target, spec)
2514 (4, 0)
2516 As you can see, glom's usual built-in tuple item chaining behavior
2517 has switched into a simple tuple constructor.
2519 (Sidenote for Lisp fans: Fill is like glom's quasi-quoting.)
2521 """
2522 def __init__(self, spec=None):
2523 self.spec = spec
2525 def glomit(self, target, scope):
2526 scope[MODE] = FILL
2527 return scope[glom](target, self.spec, scope)
2529 def fill(self, target):
2530 return glom(target, self)
2532 def __repr__(self):
2533 cn = self.__class__.__name__
2534 rpr = '' if self.spec is None else bbrepr(self.spec)
2535 return '%s(%s)' % (cn, rpr)
2538def FILL(target, spec, scope):
2539 # TODO: register an operator or two for the following to allow
2540 # extension. This operator can probably be shared with the
2541 # upcoming traversal/remap feature.
2542 recurse = lambda val: scope[glom](target, val, scope)
2543 if type(spec) is dict:
2544 return {recurse(key): recurse(val) for key, val in spec.items()}
2545 if type(spec) in (list, tuple, set, frozenset):
2546 result = [recurse(val) for val in spec]
2547 if type(spec) is list:
2548 return result
2549 return type(spec)(result)
2550 if callable(spec):
2551 return spec(target)
2552 return spec
2554class _ArgValuator(object):
2555 def __init__(self):
2556 self.cache = {}
2558 def mode(self, target, spec, scope):
2559 """
2560 similar to FILL, but without function calling;
2561 useful for default, scope assignment, call/invoke, etc
2562 """
2563 recur = lambda val: scope[glom](target, val, scope)
2564 result = spec
2565 if type(spec) in (list, dict): # can contain themselves
2566 if id(spec) in self.cache:
2567 return self.cache[id(spec)]
2568 result = self.cache[id(spec)] = type(spec)()
2569 if type(spec) is dict:
2570 result.update({recur(key): recur(val) for key, val in spec.items()})
2571 else:
2572 result.extend([recur(val) for val in spec])
2573 if type(spec) in (tuple, set, frozenset): # cannot contain themselves
2574 result = type(spec)([recur(val) for val in spec])
2575 return result
2578def arg_val(target, arg, scope):
2579 """
2580 evaluate an argument to find its value
2581 (arg_val phonetically similar to "eval" -- evaluate as an arg)
2582 """
2583 mode = scope[MIN_MODE]
2584 scope[MIN_MODE] = _ArgValuator().mode
2585 result = scope[glom](target, arg, scope)
2586 scope[MIN_MODE] = mode
2587 return result