Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/glom/core.py: 57%

1"""*glom gets results.*

3The ``glom`` package has one central entrypoint,

4:func:`glom.glom`. Everything else in the package revolves around that

5one function. Sometimes, big things come in small packages.

7A couple of conventional terms you'll see repeated many times below:

9* **target** - glom is built to work on any data, so we simply

10 refer to the object being accessed as the *"target"*

11* **spec** - *(aka "glomspec", short for specification)* The

12 accompanying template used to specify the structure of the return

13 value.

15Now that you know the terms, let's take a look around glom's powerful

16semantics.

18"""

20from __future__ import print_function

22import os

23import sys

24import pdb

25import copy

26import warnings

27import weakref

28import operator

29from abc import ABCMeta

30from pprint import pprint

31import string

32from collections import OrderedDict

33import traceback

35from face.helpers import get_wrap_width

36from boltons.typeutils import make_sentinel

37from boltons.iterutils import is_iterable

38#from boltons.funcutils import format_invocation

40basestring = str

41_AbstractIterableBase = ABCMeta('_AbstractIterableBase', (object,), {})

42from collections import ChainMap

43from reprlib import Repr, recursive_repr

45GLOM_DEBUG = os.getenv('GLOM_DEBUG', '').strip().lower()

46GLOM_DEBUG = False if (GLOM_DEBUG in ('', '0', 'false')) else True

48TRACE_WIDTH = max(get_wrap_width(max_width=110), 50) # min width

50PATH_STAR = True

51# should * and ** be interpreted as parallel traversal in Path.from_text()?

52# Changed to True in 23.1, this option to disable will go away soon

54_type_type = type

56_MISSING = make_sentinel('_MISSING')

57SKIP = make_sentinel('SKIP')

58SKIP.__doc__ = """

59The ``SKIP`` singleton can be returned from a function or included

60via a :class:`~glom.Val` to cancel assignment into the output

61object.

63>>> target = {'a': 'b'}

64>>> spec = {'a': lambda t: t['a'] if t['a'] == 'a' else SKIP}

65>>> glom(target, spec)

66{}

67>>> target = {'a': 'a'}

68>>> glom(target, spec)

69{'a': 'a'}

71Mostly used to drop keys from dicts (as above) or filter objects from

72lists.

74.. note::

76 SKIP was known as OMIT in versions 18.3.1 and prior. Versions 19+

77 will remove the OMIT alias entirely.

78"""

79OMIT = SKIP # backwards compat, remove in 19+

81STOP = make_sentinel('STOP')

82STOP.__doc__ = """

83The ``STOP`` singleton can be used to halt iteration of a list or

84execution of a tuple of subspecs.

86>>> target = range(10)

87>>> spec = [lambda x: x if x < 5 else STOP]

88>>> glom(target, spec)

89[0, 1, 2, 3, 4]

90"""

92LAST_CHILD_SCOPE = make_sentinel('LAST_CHILD_SCOPE')

93LAST_CHILD_SCOPE.__doc__ = """

94Marker that can be used by parents to keep track of the last child

95scope executed. Useful for "lifting" results out of child scopes

96for scopes that want to chain the scopes of their children together

97similar to tuple.

98"""

100NO_PYFRAME = make_sentinel('NO_PYFRAME')

101NO_PYFRAME.__doc__ = """

102Used internally to mark scopes which are no longer wrapped

103in a recursive glom() call, so that they can be cleaned up correctly

104in case of exceptions

105"""

106

107MODE = make_sentinel('MODE')

108

109MIN_MODE = make_sentinel('MIN_MODE')

110

111CHILD_ERRORS = make_sentinel('CHILD_ERRORS')

112CHILD_ERRORS.__doc__ = """

113``CHILD_ERRORS`` is used by glom internals to keep track of

114failed child branches of the current scope.

115"""

116

117CUR_ERROR = make_sentinel('CUR_ERROR')

118CUR_ERROR.__doc__ = """

119``CUR_ERROR`` is used by glom internals to keep track of

120thrown exceptions.

121"""

122

123_PKG_DIR_PATH = os.path.dirname(os.path.abspath(__file__))

124

125class GlomError(Exception):

126 """The base exception for all the errors that might be raised from

127 :func:`glom` processing logic.

128

129 By default, exceptions raised from within functions passed to glom

130 (e.g., ``len``, ``sum``, any ``lambda``) will not be wrapped in a

131 GlomError.

132 """

133 @classmethod

134 def wrap(cls, exc):

135 # TODO: need to test this against a wide array of exception types

136 # this approach to wrapping errors works for exceptions

137 # defined in pure-python as well as C

138 exc_type = type(exc)

139 bases = (GlomError,) if issubclass(GlomError, exc_type) else (exc_type, GlomError)

140 exc_wrapper_type = type("GlomError.wrap({})".format(exc_type.__name__), bases, {})

141 try:

142 wrapper = exc_wrapper_type(*exc.args)

143 wrapper.__wrapped = exc

144 return wrapper

145 except Exception: # maybe exception can't be re-created

146 return exc

147

148 def _set_wrapped(self, exc):

149 self.__wrapped = exc

150

151 def _finalize(self, scope):

152 # careful when changing how this functionality works; pytest seems to mess with

153 # the traceback module or sys.exc_info(). we saw different stacks when originally

154 # developing this in June 2020.

155 etype, evalue, _ = sys.exc_info()

156 tb_lines = traceback.format_exc().strip().splitlines()

157 limit = 0

158 for line in reversed(tb_lines):

159 if _PKG_DIR_PATH in line:

160 limit -= 1

161 break

162 limit += 1

163 self._tb_lines = tb_lines[-limit:]

164 # if the first line is trying to put a caret at a byte-code location on a line that

165 # isn't being displayed, skip it

166 if set(self._tb_lines[0]) <= {' ', '^', '~'}:

167 self._tb_lines = self._tb_lines[1:]

168 self._scope = scope

169

170 def __str__(self):

171 if getattr(self, '_finalized_str', None):

172 return self._finalized_str

173 elif getattr(self, '_scope', None) is not None:

174 self._target_spec_trace = format_target_spec_trace(self._scope, self.__wrapped)

175 parts = ["error raised while processing, details below.",

176 " Target-spec trace (most recent last):",

177 self._target_spec_trace]

178 parts.extend(self._tb_lines)

179 self._finalized_str = "\n".join(parts)

180 return self._finalized_str

181

182 # else, not finalized

183 try:

184 exc_get_message = self.get_message

185 except AttributeError:

186 exc_get_message = super(GlomError, self).__str__

187 return exc_get_message()

188

189

190def _unpack_stack(scope, only_errors=True):

191 """

192 convert scope to [[scope, spec, target, error, [children]]]

193

194 this is a convenience method for printing stacks

195

196 only_errors=True means ignore branches which may still be hanging around

197 which were not involved in the stack trace of the error

198

199 only_errors=False could be useful for debugger / introspection (similar

200 to traceback.print_stack())

201 """

202 stack = []

203 scope = scope.maps[0]

204 while LAST_CHILD_SCOPE in scope:

205 child = scope[LAST_CHILD_SCOPE]

206 branches = scope[CHILD_ERRORS]

207 if branches == [child]:

208 branches = [] # if there's only one branch, count it as linear

209 stack.append([scope, scope[Spec], scope[T], scope.get(CUR_ERROR), branches])

210

211 # NB: this id() business is necessary to avoid a

212 # nondeterministic bug in abc's __eq__ see #189 for details

213 if id(child) in [id(b) for b in branches]:

214 break # if child already covered by branches, stop the linear descent

215

216 scope = child.maps[0]

217 else: # if break executed above, cur scope was already added

218 stack.append([scope, scope[Spec], scope[T], scope.get(CUR_ERROR), []])

219 # push errors "down" to where they were first raised / first observed

220 for i in range(len(stack) - 1):

221 cur, nxt = stack[i], stack[i + 1]

222 if cur[3] == nxt[3]:

223 cur[3] = None

224 if only_errors: # trim the stack to the last error

225 # leave at least 1 to not break formatting func below

226 # TODO: make format_target_spec_trace() tolerate an "empty" stack cleanly

227 while len(stack) > 1 and stack[-1][3] is None:

228 stack.pop()

229 return stack

230

231

232def _format_trace_value(value, maxlen):

233 s = bbrepr(value).replace("\\'", "'")

234 if len(s) > maxlen:

235 try:

236 suffix = '... (len=%s)' % len(value)

237 except Exception:

238 suffix = '...'

239 s = s[:maxlen - len(suffix)] + suffix

240 return s

241

242

243def format_target_spec_trace(scope, root_error, width=TRACE_WIDTH, depth=0, prev_target=_MISSING, last_branch=True):

244 """

245 unpack a scope into a multi-line but short summary

246 """

247 segments = []

248 indent = " " + "|" * depth

249 tick = "| " if depth else "- "

250 def mk_fmt(label, t=None):

251 pre = indent + (t or tick) + label + ": "

252 fmt_width = width - len(pre)

253 return lambda v: pre + _format_trace_value(v, fmt_width)

254 fmt_t = mk_fmt("Target")

255 fmt_s = mk_fmt("Spec")

256 fmt_b = mk_fmt("Spec", "+ ")

257 recurse = lambda s, last=False: format_target_spec_trace(s, root_error, width, depth + 1, prev_target, last)

258 tb_exc_line = lambda e: "".join(traceback.format_exception_only(type(e), e))[:-1]

259 fmt_e = lambda e: indent + tick + tb_exc_line(e)

260 for scope, spec, target, error, branches in _unpack_stack(scope):

261 if target is not prev_target:

262 segments.append(fmt_t(target))

263 prev_target = target

264 if branches:

265 segments.append(fmt_b(spec))

266 segments.extend([recurse(s) for s in branches[:-1]])

267 segments.append(recurse(branches[-1], last_branch))

268 else:

269 segments.append(fmt_s(spec))

270 if error is not None and error is not root_error:

271 last_line_error = True

272 segments.append(fmt_e(error))

273 else:

274 last_line_error = False

275 if depth: # \ on first line, X on last line

276 remark = lambda s, m: s[:depth + 1] + m + s[depth + 2:]

277 segments[0] = remark(segments[0], "\\")

278 if not last_branch or last_line_error:

279 segments[-1] = remark(segments[-1], "X")

280 return "\n".join(segments)

281

282

283# TODO: not used (yet)

284def format_oneline_trace(scope):

285 """

286 unpack a scope into a single line summary

287 (shortest summary possible)

288 """

289 # the goal here is to do a kind of delta-compression --

290 # if the target is the same, don't repeat it

291 segments = []

292 prev_target = _MISSING

293 for scope, spec, target, error, branches in _unpack_stack(scope, only_errors=False):

294 segments.append('/')

295 if type(spec) in (TType, Path):

296 segments.append(bbrepr(spec))

297 else:

298 segments.append(type(spec).__name__)

299 if target != prev_target:

300 segments.append('!')

301 segments.append(type(target).__name__)

302 if Path in scope:

303 segments.append('<')

304 segments.append('->'.join([str(p) for p in scope[Path]]))

305 segments.append('>')

306 prev_target = target

307

308 return "".join(segments)

309

310

311class PathAccessError(GlomError, AttributeError, KeyError, IndexError):

312 """This :exc:`GlomError` subtype represents a failure to access an

313 attribute as dictated by the spec. The most commonly-seen error

314 when using glom, it maintains a copy of the original exception and

315 produces a readable error message for easy debugging.

316

317 If you see this error, you may want to:

318

319 * Check the target data is accurate using :class:`~glom.Inspect`

320 * Catch the exception and return a semantically meaningful error message

321 * Use :class:`glom.Coalesce` to specify a default

322 * Use the top-level ``default`` kwarg on :func:`~glom.glom()`

323

324 In any case, be glad you got this error and not the one it was

325 wrapping!

326

327 Args:

328 exc (Exception): The error that arose when we tried to access

329 *path*. Typically an instance of KeyError, AttributeError,

330 IndexError, or TypeError, and sometimes others.

331 path (Path): The full Path glom was in the middle of accessing

332 when the error occurred.

333 part_idx (int): The index of the part of the *path* that caused

334 the error.

335

336 >>> target = {'a': {'b': None}}

337 >>> glom(target, 'a.b.c')

338 Traceback (most recent call last):

339 ...

340 PathAccessError: could not access 'c', part 2 of Path('a', 'b', 'c'), got error: ...

341

342 """

343 def __init__(self, exc, path, part_idx):

344 self.exc = exc

345 self.path = path

346 self.part_idx = part_idx

347

348 def get_message(self):

349 path_part = Path(self.path).values()[self.part_idx]

350 return ('could not access %r, part %r of %r, got error: %r'

351 % (path_part, self.part_idx, self.path, self.exc))

352

353 def __repr__(self):

354 cn = self.__class__.__name__

355 return '%s(%r, %r, %r)' % (cn, self.exc, self.path, self.part_idx)

356

357

358class PathAssignError(GlomError):

359 """This :exc:`GlomError` subtype is raised when an assignment fails,

360 stemming from an :func:`~glom.assign` call or other

361 :class:`~glom.Assign` usage.

362

363 One example would be assigning to an out-of-range position in a list::

364

365 >>> assign(["short", "list"], Path(5), 'too far') # doctest: +SKIP

366 Traceback (most recent call last):

367 ...

368 PathAssignError: could not assign 5 on object at Path(), got error: IndexError(...

369

370 Other assignment failures could be due to assigning to an

371 ``@property`` or exception being raised inside a ``__setattr__()``.

372

373 """

374 def __init__(self, exc, path, dest_name):

375 self.exc = exc

376 self.path = path

377 self.dest_name = dest_name

378

379 def get_message(self):

380 return ('could not assign %r on object at %r, got error: %r'

381 % (self.dest_name, self.path, self.exc))

382

383 def __repr__(self):

384 cn = self.__class__.__name__

385 return '%s(%r, %r, %r)' % (cn, self.exc, self.path, self.dest_name)

386

387

388class CoalesceError(GlomError):

389 """This :exc:`GlomError` subtype is raised from within a

390 :class:`Coalesce` spec's processing, when none of the subspecs

391 match and no default is provided.

392

393 The exception object itself keeps track of several values which

394 may be useful for processing:

395

396 Args:

397 coal_obj (Coalesce): The original failing spec, see

398 :class:`Coalesce`'s docs for details.

399 skipped (list): A list of ignored values and exceptions, in the

400 order that their respective subspecs appear in the original

401 *coal_obj*.

402 path: Like many GlomErrors, this exception knows the path at

403 which it occurred.

404

405 >>> target = {}

406 >>> glom(target, Coalesce('a', 'b'))

407 Traceback (most recent call last):

408 ...

409 CoalesceError: no valid values found. Tried ('a', 'b') and got (PathAccessError, PathAccessError) ...

410

411 .. note::

412

413 Coalesce is a *branching* specifier type, so as of v20.7.0, its

414 exception messages feature an error tree. See

415 :ref:`branched-exceptions` for details on how to interpret these

416 exceptions.

417

418 """

419 def __init__(self, coal_obj, skipped, path):

420 self.coal_obj = coal_obj

421 self.skipped = skipped

422 self.path = path

423

424 def __repr__(self):

425 cn = self.__class__.__name__

426 return '%s(%r, %r, %r)' % (cn, self.coal_obj, self.skipped, self.path)

427

428 def get_message(self):

429 missed_specs = tuple(self.coal_obj.subspecs)

430 skipped_vals = [v.__class__.__name__

431 if isinstance(v, self.coal_obj.skip_exc)

432 else '<skipped %s>' % v.__class__.__name__

433 for v in self.skipped]

434 msg = ('no valid values found. Tried %r and got (%s)'

435 % (missed_specs, ', '.join(skipped_vals)))

436 if self.coal_obj.skip is not _MISSING:

437 msg += ', skip set to %r' % (self.coal_obj.skip,)

438 if self.coal_obj.skip_exc is not GlomError:

439 msg += ', skip_exc set to %r' % (self.coal_obj.skip_exc,)

440 if self.path is not None:

441 msg += ' (at path %r)' % (self.path,)

442 return msg

443

444

445class BadSpec(GlomError, TypeError):

446 """Raised when a spec structure is malformed, e.g., when a specifier

447 type is invalid for the current mode."""

448

449

450class UnregisteredTarget(GlomError):

451 """This :class:`GlomError` subtype is raised when a spec calls for an

452 unsupported action on a target type. For instance, trying to

453 iterate on an non-iterable target:

454

455 >>> glom(object(), ['a.b.c'])

456 Traceback (most recent call last):

457 ...

458 UnregisteredTarget: target type 'object' not registered for 'iterate', expected one of registered types: (...)

459

460 It should be noted that this is a pretty uncommon occurrence in

461 production glom usage. See the :ref:`setup-and-registration`

462 section for details on how to avoid this error.

463

464 An UnregisteredTarget takes and tracks a few values:

465

466 Args:

467 op (str): The name of the operation being performed ('get' or 'iterate')

468 target_type (type): The type of the target being processed.

469 type_map (dict): A mapping of target types that do support this operation

470 path: The path at which the error occurred.

471

472 """

473 def __init__(self, op, target_type, type_map, path):

474 self.op = op

475 self.target_type = target_type

476 self.type_map = type_map

477 self.path = path

478 super(UnregisteredTarget, self).__init__(op, target_type, type_map, path)

479

480 def __repr__(self):

481 cn = self.__class__.__name__

482 # <type %r> is because Python 3 inexplicably changed the type

483 # repr from <type *> to <class *>

484 return ('%s(%r, <type %r>, %r, %r)'

485 % (cn, self.op, self.target_type.__name__, self.type_map, self.path))

486

487 def get_message(self):

488 if not self.type_map:

489 return ("glom() called without registering any types for operation '%s'. see"

490 " glom.register() or Glommer's constructor for details." % (self.op,))

491 reg_types = sorted([t.__name__ for t, h in self.type_map.items() if h])

492 reg_types_str = '()' if not reg_types else ('(%s)' % ', '.join(reg_types))

493 msg = ("target type %r not registered for '%s', expected one of"

494 " registered types: %s" % (self.target_type.__name__, self.op, reg_types_str))

495 if self.path:

496 msg += ' (at %r)' % (self.path,)

497 return msg

498

499

500if getattr(__builtins__, '__dict__', None) is not None:

501 # pypy's __builtins__ is a module, as is CPython's REPL, but at

502 # normal execution time it's a dict?

503 __builtins__ = __builtins__.__dict__

504

505

506_BUILTIN_ID_NAME_MAP = dict([(id(v), k)

507 for k, v in __builtins__.items()])

508

509

510class _BBRepr(Repr):

511 """A better repr for builtins, when the built-in repr isn't

512 roundtrippable.

513 """

514 def __init__(self):

515 super().__init__()

516 # turn up all the length limits very high

517 for name in self.__dict__:

518 if not isinstance(getattr(self, name), int):

519 continue

520 setattr(self, name, 1024)

521

522 def repr1(self, x, level):

523 ret = Repr.repr1(self, x, level)

524 if not ret.startswith('<'):

525 return ret

526 return _BUILTIN_ID_NAME_MAP.get(id(x), ret)

527

528

529bbrepr = recursive_repr()(_BBRepr().repr)

530

531

532class _BBReprFormatter(string.Formatter):

533 """

534 allow format strings to be evaluated where {!r} will use bbrepr

535 instead of repr

536 """

537 def convert_field(self, value, conversion):

538 if conversion == 'r':

539 return bbrepr(value).replace("\\'", "'")

540 return super(_BBReprFormatter, self).convert_field(value, conversion)

541

542

543bbformat = _BBReprFormatter().format

544

545

546# TODO: push this back up to boltons with repr kwarg

547def format_invocation(name='', args=(), kwargs=None, **kw):

548 """Given a name, positional arguments, and keyword arguments, format

549 a basic Python-style function call.

550

551 >>> print(format_invocation('func', args=(1, 2), kwargs={'c': 3}))

552 func(1, 2, c=3)

553 >>> print(format_invocation('a_func', args=(1,)))

554 a_func(1)

555 >>> print(format_invocation('kw_func', kwargs=[('a', 1), ('b', 2)]))

556 kw_func(a=1, b=2)

557

558 """

559 _repr = kw.pop('repr', bbrepr)

560 if kw:

561 raise TypeError('unexpected keyword args: %r' % ', '.join(kw.keys()))

562 kwargs = kwargs or {}

563 a_text = ', '.join([_repr(a) for a in args])

564 if isinstance(kwargs, dict):

565 kwarg_items = [(k, kwargs[k]) for k in sorted(kwargs)]

566 else:

567 kwarg_items = kwargs

568 kw_text = ', '.join(['%s=%s' % (k, _repr(v)) for k, v in kwarg_items])

569

570 all_args_text = a_text

571 if all_args_text and kw_text:

572 all_args_text += ', '

573 all_args_text += kw_text

574

575 return '%s(%s)' % (name, all_args_text)

576

577

578class Path(object):

579 """Path objects specify explicit paths when the default

580 ``'a.b.c'``-style general access syntax won't work or isn't

581 desirable. Use this to wrap ints, datetimes, and other valid

582 keys, as well as strings with dots that shouldn't be expanded.

583

584 >>> target = {'a': {'b': 'c', 'd.e': 'f', 2: 3}}

585 >>> glom(target, Path('a', 2))

586 3

587 >>> glom(target, Path('a', 'd.e'))

588 'f'

589

590 Paths can be used to join together other Path objects, as

591 well as :data:`~glom.T` objects:

592

593 >>> Path(T['a'], T['b'])

594 T['a']['b']

595 >>> Path(Path('a', 'b'), Path('c', 'd'))

596 Path('a', 'b', 'c', 'd')

597

598 Paths also support indexing and slicing, with each access

599 returning a new Path object:

600

601 >>> path = Path('a', 'b', 1, 2)

602 >>> path[0]

603 Path('a')

604 >>> path[-2:]

605 Path(1, 2)

606

607 To build a Path object from a string, use :meth:`Path.from_text()`.

608 This is the default behavior when the top-level :func:`~glom.glom`

609 function gets a string spec.

610 """

611 def __init__(self, *path_parts):

612 if not path_parts:

613 self.path_t = T

614 return

615 if isinstance(path_parts[0], TType):

616 path_t = path_parts[0]

617 offset = 1

618 else:

619 path_t = T

620 offset = 0

621 for part in path_parts[offset:]:

622 if isinstance(part, Path):

623 part = part.path_t

624 if isinstance(part, TType):

625 sub_parts = part.__ops__

626 if sub_parts[0] is not T:

627 raise ValueError('path segment must be path from T, not %r'

628 % sub_parts[0])

629 i = 1

630 while i < len(sub_parts):

631 path_t = _t_child(path_t, sub_parts[i], sub_parts[i + 1])

632 i += 2

633 else:

634 path_t = _t_child(path_t, 'P', part)

635 self.path_t = path_t

636

637 _CACHE = {True: {}, False: {}}

638 _MAX_CACHE = 10000

639 _STAR_WARNED = False

640

641 @classmethod

642 def from_text(cls, text):

643 """Make a Path from .-delimited text:

644

645 >>> Path.from_text('a.b.c')

646 Path('a', 'b', 'c')

647

648 This is the default behavior when :func:`~glom.glom` gets a string spec.

649 """

650 def create():

651 segs = text.split('.')

652 if PATH_STAR:

653 segs = [

654 _T_STAR if seg == '*' else

655 _T_STARSTAR if seg == '**' else seg

656 for seg in segs]

657 elif not cls._STAR_WARNED:

658 if '*' in segs or '**' in segs:

659 warnings.warn(

660 "'*' and '**' have changed behavior in glom version 23.1."

661 " Recommend switch to T['*'] or T['**'].")

662 cls._STAR_WARNED = True

663 return cls(*segs)

664

665 cache = cls._CACHE[PATH_STAR] # remove this when PATH_STAR is default

666 if text not in cache:

667 if len(cache) > cls._MAX_CACHE:

668 return create()

669 cache[text] = create()

670 return cache[text]

671

672 def glomit(self, target, scope):

673 # The entrypoint for the Path extension

674 return _t_eval(target, self.path_t, scope)

675

676 def __len__(self):

677 return (len(self.path_t.__ops__) - 1) // 2

678

679 def __eq__(self, other):

680 if type(other) is Path:

681 return self.path_t.__ops__ == other.path_t.__ops__

682 elif type(other) is TType:

683 return self.path_t.__ops__ == other.__ops__

684 return False

685

686 def __ne__(self, other):

687 return not self == other

688

689 def values(self):

690 """

691 Returns a tuple of values referenced in this path.

692

693 >>> Path(T.a.b, 'c', T['d']).values()

694 ('a', 'b', 'c', 'd')

695 """

696 cur_t_path = self.path_t.__ops__

697 return cur_t_path[2::2]

698

699 def items(self):

700 """

701 Returns a tuple of (operation, value) pairs.

702

703 >>> Path(T.a.b, 'c', T['d']).items()

704 (('.', 'a'), ('.', 'b'), ('P', 'c'), ('[', 'd'))

705

706 """

707 cur_t_path = self.path_t.__ops__

708 return tuple(zip(cur_t_path[1::2], cur_t_path[2::2]))

709

710 def startswith(self, other):

711 if isinstance(other, basestring):

712 other = Path(other)

713 if isinstance(other, Path):

714 other = other.path_t

715 if not isinstance(other, TType):

716 raise TypeError('can only check if Path starts with string, Path or T')

717 o_path = other.__ops__

718 return self.path_t.__ops__[:len(o_path)] == o_path

719

720 def from_t(self):

721 '''return the same path but starting from T'''

722 t_path = self.path_t.__ops__

723 if t_path[0] is S:

724 new_t = TType()

725 new_t.__ops__ = (T,) + t_path[1:]

726 return Path(new_t)

727 return self

728

729 def __getitem__(self, i):

730 cur_t_path = self.path_t.__ops__

731 try:

732 step = i.step

733 start = i.start if i.start is not None else 0

734 stop = i.stop

735

736 start = (start * 2) + 1 if start >= 0 else (start * 2) + len(cur_t_path)

737 if stop is not None:

738 stop = (stop * 2) + 1 if stop >= 0 else (stop * 2) + len(cur_t_path)

739 except AttributeError:

740 step = 1

741 start = (i * 2) + 1 if i >= 0 else (i * 2) + len(cur_t_path)

742 if start < 0 or start > len(cur_t_path):

743 raise IndexError('Path index out of range')

744 stop = ((i + 1) * 2) + 1 if i >= 0 else ((i + 1) * 2) + len(cur_t_path)

745

746 new_t = TType()

747 new_path = cur_t_path[start:stop]

748 if step is not None and step != 1:

749 new_path = tuple(zip(new_path[::2], new_path[1::2]))[::step]

750 new_path = sum(new_path, ())

751 new_t.__ops__ = (cur_t_path[0],) + new_path

752 return Path(new_t)

753

754 def __repr__(self):

755 return _format_path(self.path_t.__ops__[1:])

756

757

758def _format_path(t_path):

759 path_parts, cur_t_path = [], []

760 i = 0

761 while i < len(t_path):

762 op, arg = t_path[i], t_path[i + 1]

763 i += 2

764 if op == 'P':

765 if cur_t_path:

766 path_parts.append(cur_t_path)

767 cur_t_path = []

768 path_parts.append(arg)

769 else:

770 cur_t_path.append(op)

771 cur_t_path.append(arg)

772 if path_parts and cur_t_path:

773 path_parts.append(cur_t_path)

774

775 if path_parts or not cur_t_path:

776 return 'Path(%s)' % ', '.join([_format_t(part)

777 if type(part) is list else repr(part)

778 for part in path_parts])

779 return _format_t(cur_t_path)

780

781

782class Spec(object):

783 """Spec objects serve three purposes, here they are, roughly ordered

784 by utility:

785

786 1. As a form of compiled or "curried" glom call, similar to

787 Python's built-in :func:`re.compile`.

788 2. A marker as an object as representing a spec rather than a

789 literal value in certain cases where that might be ambiguous.

790 3. A way to update the scope within another Spec.

791

792 In the second usage, Spec objects are the complement to

793 :class:`~glom.Val`, wrapping a value and marking that it

794 should be interpreted as a glom spec, rather than a literal value.

795 This is useful in places where it would be interpreted as a value

796 by default. (Such as T[key], Call(func) where key and func are

797 assumed to be literal values and not specs.)

798

799 Args:

800 spec: The glom spec.

801 scope (dict): additional values to add to the scope when

802 evaluating this Spec

803

804 """

805 def __init__(self, spec, scope=None):

806 self.spec = spec

807 self.scope = scope or {}

808

809 def glom(self, target, **kw):

810 scope = dict(self.scope)

811 scope.update(kw.get('scope', {}))

812 kw['scope'] = ChainMap(scope)

813 glom_ = scope.get(glom, glom)

814 return glom_(target, self.spec, **kw)

815

816 def glomit(self, target, scope):

817 scope.update(self.scope)

818 return scope[glom](target, self.spec, scope)

819

820 def __repr__(self):

821 cn = self.__class__.__name__

822 if self.scope:

823 return '%s(%s, scope=%r)' % (cn, bbrepr(self.spec), self.scope)

824 return '%s(%s)' % (cn, bbrepr(self.spec))

825

826

827class Coalesce(object):

828 """Coalesce objects specify fallback behavior for a list of

829 subspecs.

830

831 Subspecs are passed as positional arguments, and keyword arguments

832 control defaults. Each subspec is evaluated in turn, and if none

833 match, a :exc:`CoalesceError` is raised, or a default is returned,

834 depending on the options used.

835

836 .. note::

837

838 This operation may seem very familar if you have experience with

839 `SQL`_ or even `C# and others`_.

840

841

842 In practice, this fallback behavior's simplicity is only surpassed

843 by its utility:

844

845 >>> target = {'c': 'd'}

846 >>> glom(target, Coalesce('a', 'b', 'c'))

847 'd'

848

849 glom tries to get ``'a'`` from ``target``, but gets a

850 KeyError. Rather than raise a :exc:`~glom.PathAccessError` as usual,

851 glom *coalesces* into the next subspec, ``'b'``. The process

852 repeats until it gets to ``'c'``, which returns our value,

853 ``'d'``. If our value weren't present, we'd see:

854

855 >>> target = {}

856 >>> glom(target, Coalesce('a', 'b'))

857 Traceback (most recent call last):

858 ...

859 CoalesceError: no valid values found. Tried ('a', 'b') and got (PathAccessError, PathAccessError) ...

860

861 Same process, but because ``target`` is empty, we get a

862 :exc:`CoalesceError`.

863

864 .. note::

865

866 Coalesce is a *branching* specifier type, so as of v20.7.0, its

867 exception messages feature an error tree. See

868 :ref:`branched-exceptions` for details on how to interpret these

869 exceptions.

870

871

872 If we want to avoid an exception, and we know which value we want

873 by default, we can set *default*:

874

875 >>> target = {}

876 >>> glom(target, Coalesce('a', 'b', 'c'), default='d-fault')

877 'd-fault'

878

879 ``'a'``, ``'b'``, and ``'c'`` weren't present so we got ``'d-fault'``.

880

881 Args:

882

883 subspecs: One or more glommable subspecs

884 default: A value to return if no subspec results in a valid value

885 default_factory: A callable whose result will be returned as a default

886 skip: A value, tuple of values, or predicate function

887 representing values to ignore

888 skip_exc: An exception or tuple of exception types to catch and

889 move on to the next subspec. Defaults to :exc:`GlomError`, the

890 parent type of all glom runtime exceptions.

891

892 If all subspecs produce skipped values or exceptions, a

893 :exc:`CoalesceError` will be raised. For more examples, check out

894 the :doc:`tutorial`, which makes extensive use of Coalesce.

895

896 .. _SQL: https://en.wikipedia.org/w/index.php?title=Null_(SQL)&oldid=833093792#COALESCE

897 .. _C# and others: https://en.wikipedia.org/w/index.php?title=Null_coalescing_operator&oldid=839493322#C#

898

899 """

900 def __init__(self, *subspecs, **kwargs):

901 self.subspecs = subspecs

902 self._orig_kwargs = dict(kwargs)

903 self.default = kwargs.pop('default', _MISSING)

904 self.default_factory = kwargs.pop('default_factory', _MISSING)

905 if self.default and self.default_factory:

906 raise ValueError('expected one of "default" or "default_factory", not both')

907 self.skip = kwargs.pop('skip', _MISSING)

908 if self.skip is _MISSING:

909 self.skip_func = lambda v: False

910 elif callable(self.skip):

911 self.skip_func = self.skip

912 elif isinstance(self.skip, tuple):

913 self.skip_func = lambda v: v in self.skip

914 else:

915 self.skip_func = lambda v: v == self.skip

916 self.skip_exc = kwargs.pop('skip_exc', GlomError)

917 if kwargs:

918 raise TypeError('unexpected keyword args: %r' % (sorted(kwargs.keys()),))

919

920 def glomit(self, target, scope):

921 skipped = []

922 for subspec in self.subspecs:

923 try:

924 ret = scope[glom](target, subspec, scope)

925 if not self.skip_func(ret):

926 break

927 skipped.append(ret)

928 except self.skip_exc as e:

929 skipped.append(e)

930 continue

931 else:

932 if self.default is not _MISSING:

933 ret = arg_val(target, self.default, scope)

934 elif self.default_factory is not _MISSING:

935 ret = self.default_factory()

936 else:

937 raise CoalesceError(self, skipped, scope[Path])

938 return ret

939

940 def __repr__(self):

941 cn = self.__class__.__name__

942 return format_invocation(cn, self.subspecs, self._orig_kwargs, repr=bbrepr)

943

944

945class Inspect(object):

946 """The :class:`~glom.Inspect` specifier type provides a way to get

947 visibility into glom's evaluation of a specification, enabling

948 debugging of those tricky problems that may arise with unexpected

949 data.

950

951 :class:`~glom.Inspect` can be inserted into an existing spec in one of two

952 ways. First, as a wrapper around the spec in question, or second,

953 as an argument-less placeholder wherever a spec could be.

954

955 :class:`~glom.Inspect` supports several modes, controlled by

956 keyword arguments. Its default, no-argument mode, simply echos the

957 state of the glom at the point where it appears:

958

959 >>> target = {'a': {'b': {}}}

960 >>> val = glom(target, Inspect('a.b')) # wrapping a spec

961 ---

962 path: ['a.b']

963 target: {'a': {'b': {}}}

964 output: {}

965 ---

966

967 Debugging behavior aside, :class:`~glom.Inspect` has no effect on

968 values in the target, spec, or result.

969

970 Args:

971 echo (bool): Whether to print the path, target, and output of

972 each inspected glom. Defaults to True.

973 recursive (bool): Whether or not the Inspect should be applied

974 at every level, at or below the spec that it wraps. Defaults

975 to False.

976 breakpoint (bool): This flag controls whether a debugging prompt

977 should appear before evaluating each inspected spec. Can also

978 take a callable. Defaults to False.

979 post_mortem (bool): This flag controls whether exceptions

980 should be caught and interactively debugged with :mod:`pdb` on

981 inspected specs.

982

983 All arguments above are keyword-only to avoid overlap with a

984 wrapped spec.

985

986 .. note::

987

988 Just like ``pdb.set_trace()``, be careful about leaving stray

989 ``Inspect()`` instances in production glom specs.

990

991 """

992 def __init__(self, *a, **kw):

993 self.wrapped = a[0] if a else Path()

994 self.recursive = kw.pop('recursive', False)

995 self.echo = kw.pop('echo', True)

996 breakpoint = kw.pop('breakpoint', False)

997 if breakpoint is True:

998 breakpoint = pdb.set_trace

999 if breakpoint and not callable(breakpoint):

1000 raise TypeError('breakpoint expected bool or callable, not: %r' % breakpoint)

1001 self.breakpoint = breakpoint

1002 post_mortem = kw.pop('post_mortem', False)

1003 if post_mortem is True:

1004 post_mortem = pdb.post_mortem

1005 if post_mortem and not callable(post_mortem):

1006 raise TypeError('post_mortem expected bool or callable, not: %r' % post_mortem)

1007 self.post_mortem = post_mortem

1008

1009 def __repr__(self):

1010 return '<INSPECT>'

1011

1012 def glomit(self, target, scope):

1013 # stash the real handler under Inspect,

1014 # and replace the child handler with a trace callback

1015 scope[Inspect] = scope[glom]

1016 scope[glom] = self._trace

1017 return scope[glom](target, self.wrapped, scope)

1018

1019 def _trace(self, target, spec, scope):

1020 if not self.recursive:

1021 scope[glom] = scope[Inspect]

1022 if self.echo:

1023 print('---')

1024 # TODO: switch from scope[Path] to the Target-Spec format trace above

1025 # ... but maybe be smart about only printing deltas instead of the whole

1026 # thing

1027 print('path: ', scope[Path] + [spec])

1028 print('target:', target)

1029 if self.breakpoint:

1030 # TODO: real debugger here?

1031 self.breakpoint()

1032 try:

1033 ret = scope[Inspect](target, spec, scope)

1034 except Exception:

1035 if self.post_mortem:

1036 self.post_mortem()

1037 raise

1038 if self.echo:

1039 print('output:', ret)

1040 print('---')

1041 return ret

1042

1043

1044class Call(object):

1045 """:class:`Call` specifies when a target should be passed to a function,

1046 *func*.

1047

1048 :class:`Call` is similar to :func:`~functools.partial` in that

1049 it is no more powerful than ``lambda`` or other functions, but

1050 it is designed to be more readable, with a better ``repr``.

1051

1052 Args:

1053 func (callable): a function or other callable to be called with

1054 the target

1055

1056 :class:`Call` combines well with :attr:`~glom.T` to construct objects. For

1057 instance, to generate a dict and then pass it to a constructor:

1058

1059 >>> class ExampleClass(object):

1060 ... def __init__(self, attr):

1061 ... self.attr = attr

1062 ...

1063 >>> target = {'attr': 3.14}

1064 >>> glom(target, Call(ExampleClass, kwargs=T)).attr

1065 3.14

1066

1067 This does the same as ``glom(target, lambda target:

1068 ExampleClass(**target))``, but it's easy to see which one reads

1069 better.

1070

1071 .. note::

1072

1073 ``Call`` is mostly for functions. Use a :attr:`~glom.T` object

1074 if you need to call a method.

1075

1076 .. warning::

1077

1078 :class:`Call` has a successor with a fuller-featured API, new

1079 in 19.10.0: the :class:`Invoke` specifier type.

1080 """

1081 def __init__(self, func=None, args=None, kwargs=None):

1082 if func is None:

1083 func = T

1084 if not (callable(func) or isinstance(func, (Spec, TType))):

1085 raise TypeError('expected func to be a callable or T'

1086 ' expression, not: %r' % (func,))

1087 if args is None:

1088 args = ()

1089 if kwargs is None:

1090 kwargs = {}

1091 self.func, self.args, self.kwargs = func, args, kwargs

1092

1093 def glomit(self, target, scope):

1094 'run against the current target'

1095 r = lambda spec: arg_val(target, spec, scope)

1096 return r(self.func)(*r(self.args), **r(self.kwargs))

1097

1098 def __repr__(self):

1099 cn = self.__class__.__name__

1100 return '%s(%s, args=%r, kwargs=%r)' % (cn, bbrepr(self.func), self.args, self.kwargs)

1101

1102

1103def _is_spec(obj, strict=False):

1104 # a little util for codifying the spec type checking in glom

1105 if isinstance(obj, TType):

1106 return True

1107 if strict:

1108 return type(obj) is Spec

1109

1110 return _has_callable_glomit(obj) # pragma: no cover

1111

1112

1113class Invoke(object):

1114 """Specifier type designed for easy invocation of callables from glom.

1115

1116 Args:

1117 func (callable): A function or other callable object.

1118

1119 ``Invoke`` is similar to :func:`functools.partial`, but with the

1120 ability to set up a "templated" call which interleaves constants and

1121 glom specs.

1122

1123 For example, the following creates a spec which can be used to

1124 check if targets are integers:

1125

1126 >>> is_int = Invoke(isinstance).specs(T).constants(int)

1127 >>> glom(5, is_int)

1128 True

1129

1130 And this composes like any other glom spec:

1131

1132 >>> target = [7, object(), 9]

1133 >>> glom(target, [is_int])

1134 [True, False, True]

1135

1136 Another example, mixing positional and keyword arguments:

1137

1138 >>> spec = Invoke(sorted).specs(T).constants(key=int, reverse=True)

1139 >>> target = ['10', '5', '20', '1']

1140 >>> glom(target, spec)

1141 ['20', '10', '5', '1']

1142

1143 Invoke also helps with evaluating zero-argument functions:

1144

1145 >>> glom(target={}, spec=Invoke(int))

1146 0

1147

1148 (A trivial example, but from timestamps to UUIDs, zero-arg calls do come up!)

1149

1150 .. note::

1151

1152 ``Invoke`` is mostly for functions, object construction, and callable

1153 objects. For calling methods, consider the :attr:`~glom.T` object.

1154

1155 """

1156 def __init__(self, func):

1157 if not callable(func) and not _is_spec(func, strict=True):

1158 raise TypeError('expected func to be a callable or Spec instance,'

1159 ' not: %r' % (func,))

1160 self.func = func

1161 self._args = ()

1162 # a registry of every known kwarg to its freshest value as set

1163 # by the methods below. the **kw dict is used as a unique marker.

1164 self._cur_kwargs = {}

1165

1166 @classmethod

1167 def specfunc(cls, spec):

1168 """Creates an :class:`Invoke` instance where the function is

1169 indicated by a spec.

1170

1171 >>> spec = Invoke.specfunc('func').constants(5)

1172 >>> glom({'func': range}, (spec, list))

1173 [0, 1, 2, 3, 4]

1174

1175 """

1176 return cls(Spec(spec))

1177

1178 def constants(self, *a, **kw):

1179 """Returns a new :class:`Invoke` spec, with the provided positional

1180 and keyword argument values stored for passing to the

1181 underlying function.

1182

1183 >>> spec = Invoke(T).constants(5)

1184 >>> glom(range, (spec, list))

1185 [0, 1, 2, 3, 4]

1186

1187 Subsequent positional arguments are appended:

1188

1189 >>> spec = Invoke(T).constants(2).constants(10, 2)

1190 >>> glom(range, (spec, list))

1191 [2, 4, 6, 8]

1192

1193 Keyword arguments also work as one might expect:

1194

1195 >>> round_2 = Invoke(round).constants(ndigits=2).specs(T)

1196 >>> glom(3.14159, round_2)

1197 3.14

1198

1199 :meth:`~Invoke.constants()` and other :class:`Invoke`

1200 methods may be called multiple times, just remember that every

1201 call returns a new spec.

1202 """

1203 ret = self.__class__(self.func)

1204 ret._args = self._args + ('C', a, kw)

1205 ret._cur_kwargs = dict(self._cur_kwargs)

1206 ret._cur_kwargs.update({k: kw for k, _ in kw.items()})

1207 return ret

1208

1209 def specs(self, *a, **kw):

1210 """Returns a new :class:`Invoke` spec, with the provided positional

1211 and keyword arguments stored to be interpreted as specs, with

1212 the results passed to the underlying function.

1213

1214 >>> spec = Invoke(range).specs('value')

1215 >>> glom({'value': 5}, (spec, list))

1216 [0, 1, 2, 3, 4]

1217

1218 Subsequent positional arguments are appended:

1219

1220 >>> spec = Invoke(range).specs('start').specs('end', 'step')

1221 >>> target = {'start': 2, 'end': 10, 'step': 2}

1222 >>> glom(target, (spec, list))

1223 [2, 4, 6, 8]

1224

1225 Keyword arguments also work as one might expect:

1226

1227 >>> multiply = lambda x, y: x * y

1228 >>> times_3 = Invoke(multiply).constants(y=3).specs(x='value')

1229 >>> glom({'value': 5}, times_3)

1230 15

1231

1232 :meth:`~Invoke.specs()` and other :class:`Invoke`

1233 methods may be called multiple times, just remember that every

1234 call returns a new spec.

1235

1236 """

1237 ret = self.__class__(self.func)

1238 ret._args = self._args + ('S', a, kw)

1239 ret._cur_kwargs = dict(self._cur_kwargs)

1240 ret._cur_kwargs.update({k: kw for k, _ in kw.items()})

1241 return ret

1242

1243 def star(self, args=None, kwargs=None):

1244 """Returns a new :class:`Invoke` spec, with *args* and/or *kwargs*

1245 specs set to be "starred" or "star-starred" (respectively)

1246

1247 >>> spec = Invoke(zip).star(args='lists')

1248 >>> target = {'lists': [[1, 2], [3, 4], [5, 6]]}

1249 >>> list(glom(target, spec))

1250 [(1, 3, 5), (2, 4, 6)]

1251

1252 Args:

1253 args (spec): A spec to be evaluated and "starred" into the

1254 underlying function.

1255 kwargs (spec): A spec to be evaluated and "star-starred" into

1256 the underlying function.

1257

1258 One or both of the above arguments should be set.

1259

1260 The :meth:`~Invoke.star()`, like other :class:`Invoke`

1261 methods, may be called multiple times. The *args* and *kwargs*

1262 will be stacked in the order in which they are provided.

1263 """

1264 if args is None and kwargs is None:

1265 raise TypeError('expected one or both of args/kwargs to be passed')

1266 ret = self.__class__(self.func)

1267 ret._args = self._args + ('*', args, kwargs)

1268 ret._cur_kwargs = dict(self._cur_kwargs)

1269 return ret

1270

1271 def __repr__(self):

1272 base_fname = self.__class__.__name__

1273 fname_map = {'C': 'constants', 'S': 'specs', '*': 'star'}

1274 if type(self.func) is Spec:

1275 base_fname += '.specfunc'

1276 args = (self.func.spec,)

1277 else:

1278 args = (self.func,)

1279 chunks = [format_invocation(base_fname, args, repr=bbrepr)]

1280

1281 for i in range(len(self._args) // 3):

1282 op, args, _kwargs = self._args[i * 3: i * 3 + 3]

1283 fname = fname_map[op]

1284 if op in ('C', 'S'):

1285 kwargs = [(k, v) for k, v in _kwargs.items()

1286 if self._cur_kwargs[k] is _kwargs]

1287 else:

1288 kwargs = {}

1289 if args:

1290 kwargs['args'] = args

1291 if _kwargs:

1292 kwargs['kwargs'] = _kwargs

1293 args = ()

1294

1295 chunks.append('.' + format_invocation(fname, args, kwargs, repr=bbrepr))

1296

1297 return ''.join(chunks)

1298

1299 def glomit(self, target, scope):

1300 all_args = []

1301 all_kwargs = {}

1302

1303 recurse = lambda spec: scope[glom](target, spec, scope)

1304 func = recurse(self.func) if _is_spec(self.func, strict=True) else self.func

1305

1306 for i in range(len(self._args) // 3):

1307 op, args, kwargs = self._args[i * 3: i * 3 + 3]

1308 if op == 'C':

1309 all_args.extend(args)

1310 all_kwargs.update({k: v for k, v in kwargs.items()

1311 if self._cur_kwargs[k] is kwargs})

1312 elif op == 'S':

1313 all_args.extend([recurse(arg) for arg in args])

1314 all_kwargs.update({k: recurse(v) for k, v in kwargs.items()

1315 if self._cur_kwargs[k] is kwargs})

1316 elif op == '*':

1317 if args is not None:

1318 all_args.extend(recurse(args))

1319 if kwargs is not None:

1320 all_kwargs.update(recurse(kwargs))

1321

1322 return func(*all_args, **all_kwargs)

1323

1324

1325class Ref(object):

1326 """Name a part of a spec and refer to it elsewhere in the same spec,

1327 useful for trees and other self-similar data structures.

1328

1329 Args:

1330 name (str): The name of the spec to reference.

1331 subspec: Pass a spec to name it *name*, or leave unset to refer

1332 to an already-named spec.

1333 """

1334 def __init__(self, name, subspec=_MISSING):

1335 self.name, self.subspec = name, subspec

1336

1337 def glomit(self, target, scope):

1338 subspec = self.subspec

1339 scope_key = (Ref, self.name)

1340 if subspec is _MISSING:

1341 subspec = scope[scope_key]

1342 else:

1343 scope[scope_key] = subspec

1344 return scope[glom](target, subspec, scope)

1345

1346 def __repr__(self):

1347 if self.subspec is _MISSING:

1348 args = bbrepr(self.name)

1349 else:

1350 args = bbrepr((self.name, self.subspec))[1:-1]

1351 return "Ref(" + args + ")"

1352

1353

1354class TType(object):

1355 """``T``, short for "target". A singleton object that enables

1356 object-oriented expression of a glom specification.

1357

1358 .. note::

1359

1360 ``T`` is a singleton, and does not need to be constructed.

1361

1362 Basically, think of ``T`` as your data's stunt double. Everything

1363 that you do to ``T`` will be recorded and executed during the

1364 :func:`glom` call. Take this example:

1365

1366 >>> spec = T['a']['b']['c']

1367 >>> target = {'a': {'b': {'c': 'd'}}}

1368 >>> glom(target, spec)

1369 'd'

1370

1371 So far, we've relied on the ``'a.b.c'``-style shorthand for

1372 access, or used the :class:`~glom.Path` objects, but if you want

1373 to explicitly do attribute and key lookups, look no further than

1374 ``T``.

1375

1376 But T doesn't stop with unambiguous access. You can also call

1377 methods and perform almost any action you would with a normal

1378 object:

1379

1380 >>> spec = ('a', (T['b'].items(), list)) # reviewed below

1381 >>> glom(target, spec)

1382 [('c', 'd')]

1383

1384 A ``T`` object can go anywhere in the spec. As seen in the example

1385 above, we access ``'a'``, use a ``T`` to get ``'b'`` and iterate

1386 over its ``items``, turning them into a ``list``.

1387

1388 You can even use ``T`` with :class:`~glom.Call` to construct objects:

1389

1390 >>> class ExampleClass(object):

1391 ... def __init__(self, attr):

1392 ... self.attr = attr

1393 ...

1394 >>> target = {'attr': 3.14}

1395 >>> glom(target, Call(ExampleClass, kwargs=T)).attr

1396 3.14

1397

1398 On a further note, while ``lambda`` works great in glom specs, and

1399 can be very handy at times, ``T`` and :class:`~glom.Call`

1400 eliminate the need for the vast majority of ``lambda`` usage with

1401 glom.

1402

1403 Unlike ``lambda`` and other functions, ``T`` roundtrips

1404 beautifully and transparently:

1405

1406 >>> T['a'].b['c']('success')

1407 T['a'].b['c']('success')

1408

1409 ``T``-related access errors raise a :exc:`~glom.PathAccessError`

1410 during the :func:`~glom.glom` call.

1411

1412 .. note::

1413

1414 While ``T`` is clearly useful, powerful, and here to stay, its

1415 semantics are still being refined. Currently, operations beyond

1416 method calls and attribute/item access are considered

1417 experimental and should not be relied upon.

1418

1419 .. note::

1420

1421 ``T`` attributes starting with __ are reserved to avoid

1422 colliding with many built-in Python behaviors, current and

1423 future. The ``T.__()`` method is available for cases where

1424 they are needed. For example, ``T.__('class__')`` is

1425 equivalent to accessing the ``__class__`` attribute.

1426

1427 """

1428 __slots__ = ('__ops__',)

1429

1430 def __getattr__(self, name):

1431 if name.startswith('__'):

1432 raise AttributeError('T instances reserve dunder attributes.'

1433 ' To access the "{name}" attribute, use'

1434 ' T.__("{d_name}")'.format(name=name, d_name=name[2:]))

1435 return _t_child(self, '.', name)

1436

1437 def __getitem__(self, item):

1438 return _t_child(self, '[', item)

1439

1440 def __call__(self, *args, **kwargs):

1441 if self is S:

1442 if args:

1443 raise TypeError('S() takes no positional arguments, got: %r' % (args,))

1444 if not kwargs:

1445 raise TypeError('S() expected at least one kwarg, got none')

1446 # TODO: typecheck kwarg vals?

1447 return _t_child(self, '(', (args, kwargs))

1448

1449 def __star__(self):

1450 return _t_child(self, 'x', None)

1451

1452 def __starstar__(self):

1453 return _t_child(self, 'X', None)

1454

1455 def __stars__(self):

1456 """how many times the result will be wrapped in extra lists"""

1457 t_ops = self.__ops__[1::2]

1458 return t_ops.count('x') + t_ops.count('X')

1459

1460 def __add__(self, arg):

1461 return _t_child(self, '+', arg)

1462

1463 def __sub__(self, arg):

1464 return _t_child(self, '-', arg)

1465

1466 def __mul__(self, arg):

1467 return _t_child(self, '*', arg)

1468

1469 def __floordiv__(self, arg):

1470 return _t_child(self, '#', arg)

1471

1472 def __truediv__(self, arg):

1473 return _t_child(self, '/', arg)

1474

1475 __div__ = __truediv__

1476

1477 def __mod__(self, arg):

1478 return _t_child(self, '%', arg)

1479

1480 def __pow__(self, arg):

1481 return _t_child(self, ':', arg)

1482

1483 def __and__(self, arg):

1484 return _t_child(self, '&', arg)

1485

1486 def __or__(self, arg):

1487 return _t_child(self, '|', arg)

1488

1489 def __xor__(self, arg):

1490 return _t_child(self, '^', arg)

1491

1492 def __invert__(self):

1493 return _t_child(self, '~', None)

1494

1495 def __neg__(self):

1496 return _t_child(self, '_', None)

1497

1498 def __(self, name):

1499 return _t_child(self, '.', '__' + name)

1500

1501 def __repr__(self):

1502 t_path = self.__ops__

1503 return _format_t(t_path[1:], t_path[0])

1504

1505 def __getstate__(self):

1506 t_path = self.__ops__

1507 return tuple(({T: 'T', S: 'S', A: 'A'}[t_path[0]],) + t_path[1:])

1508

1509 def __setstate__(self, state):

1510 self.__ops__ = ({'T': T, 'S': S, 'A': A}[state[0]],) + state[1:]

1511

1512

1513def _t_child(parent, operation, arg):

1514 base = parent.__ops__

1515 if base[0] is A and operation not in ('.', '[', 'P'):

1516 # whitelist rather than blacklist assignment friendly operations

1517 # TODO: error type?

1518 raise BadSpec("operation not allowed on A assignment path")

1519 t = TType()

1520 t.__ops__ = base + (operation, arg)

1521 return t

1522

1523

1524def _s_first_magic(scope, key, _t):

1525 """

1526 enable S.a to do S['a'] or S['a'].val as a special

1527 case for accessing user defined string variables

1528 """

1529 err = None

1530 try:

1531 cur = scope[key]

1532 except KeyError as e:

1533 err = PathAccessError(e, Path(_t), 0) # always only one level depth, hence 0

1534 if err:

1535 raise err

1536 return cur

1537

1538

1539def _t_eval(target, _t, scope):

1540 t_path = _t.__ops__

1541 i = 1

1542 fetch_till = len(t_path)

1543 root = t_path[0]

1544 if root is T:

1545 cur = target

1546 elif root is S or root is A:

1547 # A is basically the same as S, but last step is assign

1548 if root is A:

1549 fetch_till -= 2

1550 if fetch_till < 1:

1551 raise BadSpec('cannot assign without destination')

1552 cur = scope

1553 if fetch_till > 1 and t_path[1] in ('.', 'P'):

1554 cur = _s_first_magic(cur, t_path[2], _t)

1555 i += 2

1556 elif root is S and fetch_till > 1 and t_path[1] == '(':

1557 # S(var='spec') style assignment

1558 _, kwargs = t_path[2]

1559 scope.update({

1560 k: arg_val(target, v, scope) for k, v in kwargs.items()})

1561 return target

1562

1563 else:

1564 raise ValueError('TType instance with invalid root') # pragma: no cover

1565 pae = None

1566 while i < fetch_till:

1567 op, arg = t_path[i], t_path[i + 1]

1568 arg = arg_val(target, arg, scope)

1569 if op == '.':

1570 try:

1571 cur = getattr(cur, arg)

1572 except AttributeError as e:

1573 pae = PathAccessError(e, Path(_t), i // 2)

1574 elif op == '[':

1575 try:

1576 cur = cur[arg]

1577 except (KeyError, IndexError, TypeError) as e:

1578 pae = PathAccessError(e, Path(_t), i // 2)

1579 elif op == 'P':

1580 # Path type stuff (fuzzy match)

1581 get = scope[TargetRegistry].get_handler('get', cur, path=t_path[2:i+2:2])

1582 try:

1583 cur = get(cur, arg)

1584 except Exception as e:

1585 pae = PathAccessError(e, Path(_t), i // 2)

1586 elif op in 'xX':

1587 nxt = []

1588 get_handler = scope[TargetRegistry].get_handler

1589 if op == 'x': # increases arity of cur each time through

1590 # TODO: so many try/except -- could scope[TargetRegistry] stuff be cached on type?

1591 _extend_children(nxt, cur, get_handler)

1592 elif op == 'X':

1593 sofar = set()

1594 _extend_children(nxt, cur, get_handler)

1595 for item in nxt:

1596 if id(item) not in sofar:

1597 sofar.add(id(item))

1598 _extend_children(nxt, item, get_handler)

1599 nxt.insert(0, cur)

1600 # handle the rest of the t_path in recursive calls

1601 cur = []

1602 todo = TType()

1603 todo.__ops__ = (root,) + t_path[i+2:]

1604 for child in nxt:

1605 try:

1606 cur.append(_t_eval(child, todo, scope))

1607 except PathAccessError:

1608 pass

1609 break # we handled the rest in recursive call, break loop

1610 elif op == '(':

1611 args, kwargs = arg

1612 scope[Path] += t_path[2:i+2:2]

1613 cur = scope[glom](

1614 target, Call(cur, args, kwargs), scope)

1615 # call with target rather than cur,

1616 # because it is probably more intuitive

1617 # if args to the call "reset" their path

1618 # e.g. "T.a" should mean the same thing

1619 # in both of these specs: T.a and T.b(T.a)

1620 else: # arithmetic operators

1621 try:

1622 if op == '+':

1623 cur = cur + arg

1624 elif op == '-':

1625 cur = cur - arg

1626 elif op == '*':

1627 cur = cur * arg

1628 #elif op == '#':

1629 # cur = cur // arg # TODO: python 2 friendly approach?

1630 elif op == '/':

1631 cur = cur / arg

1632 elif op == '%':

1633 cur = cur % arg

1634 elif op == ':':

1635 cur = cur ** arg

1636 elif op == '&':

1637 cur = cur & arg

1638 elif op == '|':

1639 cur = cur | arg

1640 elif op == '^':

1641 cur = cur ^ arg

1642 elif op == '~':

1643 cur = ~cur

1644 elif op == '_':

1645 cur = -cur

1646 except (TypeError, ZeroDivisionError) as e:

1647 pae = PathAccessError(e, Path(_t), i // 2)

1648 if pae:

1649 raise pae

1650 i += 2

1651 if root is A:

1652 op, arg = t_path[-2:]

1653 if cur is scope:

1654 op = '[' # all assignment on scope is setitem

1655 _assign_op(dest=cur, op=op, arg=arg, val=target, path=_t, scope=scope)

1656 return target # A should not change the target

1657 return cur

1658

1659

1660def _assign_op(dest, op, arg, val, path, scope):

1661 """helper method for doing the assignment on a T operation"""

1662 if op == '[':

1663 dest[arg] = val

1664 elif op == '.':

1665 setattr(dest, arg, val)

1666 elif op == 'P':

1667 _assign = scope[TargetRegistry].get_handler('assign', dest)

1668 try:

1669 _assign(dest, arg, val)

1670 except Exception as e:

1671 raise PathAssignError(e, path, arg)

1672 else: # pragma: no cover

1673 raise ValueError('unsupported T operation for assignment')

1674

1675

1676def _extend_children(children, item, get_handler):

1677 try: # dict or obj-like

1678 keys = get_handler('keys', item)

1679 get = get_handler('get', item)

1680 except UnregisteredTarget:

1681 try:

1682 iterate = get_handler('iterate', item)

1683 except UnregisteredTarget:

1684 pass

1685 else:

1686 try: # list-like

1687 children.extend(iterate(item))

1688 except Exception:

1689 pass

1690 else:

1691 try:

1692 for key in keys(item):

1693 try:

1694 children.append(get(item, key))

1695 except Exception:

1696 pass

1697 except Exception:

1698 pass

1699

1700

1701T = TType() # target aka Mr. T aka "this"

1702S = TType() # like T, but means grab stuff from Scope, not Target

1703A = TType() # like S, but shorthand to assign target to scope

1704

1705T.__ops__ = (T,)

1706S.__ops__ = (S,)

1707A.__ops__ = (A,)

1708

1709_T_STAR = T.__star__() # helper constant for Path.from_text

1710_T_STARSTAR = T.__starstar__() # helper constant for Path.from_text

1711

1712UP = make_sentinel('UP')

1713ROOT = make_sentinel('ROOT')

1714

1715

1716def _format_slice(x):

1717 if type(x) is not slice:

1718 return bbrepr(x)

1719 fmt = lambda v: "" if v is None else bbrepr(v)

1720 if x.step is None:

1721 return fmt(x.start) + ":" + fmt(x.stop)

1722 return fmt(x.start) + ":" + fmt(x.stop) + ":" + fmt(x.step)

1723

1724

1725def _format_t(path, root=T):

1726 prepr = [{T: 'T', S: 'S', A: 'A'}[root]]

1727 i = 0

1728 while i < len(path):

1729 op, arg = path[i], path[i + 1]

1730 if op == '.':

1731 prepr.append('.' + arg)

1732 elif op == '[':

1733 if type(arg) is tuple:

1734 index = ", ".join([_format_slice(x) for x in arg])

1735 else:

1736 index = _format_slice(arg)

1737 prepr.append("[%s]" % (index,))

1738 elif op == '(':

1739 args, kwargs = arg

1740 prepr.append(format_invocation(args=args, kwargs=kwargs, repr=bbrepr))

1741 elif op == 'P':

1742 return _format_path(path)

1743 elif op == 'x':

1744 prepr.append(".__star__()")

1745 elif op == 'X':

1746 prepr.append(".__starstar__()")

1747 elif op in ('_', '~'): # unary arithmetic operators

1748 if any([o in path[:i] for o in '+-/%:&|^~_']):

1749 prepr = ['('] + prepr + [')']

1750 prepr = ['-' if op == '_' else op] + prepr

1751 else: # binary arithmetic operators

1752 formatted_arg = bbrepr(arg)

1753 if type(arg) is TType:

1754 arg_path = arg.__ops__

1755 if any([o in arg_path for o in '+-/%:&|^~_']):

1756 formatted_arg = '(' + formatted_arg + ')'

1757 prepr.append(' ' + ('**' if op == ':' else op) + ' ')

1758 prepr.append(formatted_arg)

1759 i += 2

1760 return "".join(prepr)

1761

1762

1763class Val(object):

1764 """Val objects are specs which evaluate to the wrapped *value*.

1765

1766 >>> target = {'a': {'b': 'c'}}

1767 >>> spec = {'a': 'a.b', 'readability': Val('counts')}

1768 >>> pprint(glom(target, spec))

1769 {'a': 'c', 'readability': 'counts'}

1770

1771 Instead of accessing ``'counts'`` as a key like it did with

1772 ``'a.b'``, :func:`~glom.glom` just unwrapped the Val and

1773 included the value.

1774

1775 :class:`~glom.Val` takes one argument, the value to be returned.

1776

1777 .. note::

1778

1779 :class:`Val` was named ``Literal`` in versions of glom before

1780 20.7.0. An alias has been preserved for backwards

1781 compatibility, but reprs have changed.

1782

1783 """

1784 def __init__(self, value):

1785 self.value = value

1786

1787 def glomit(self, target, scope):

1788 return self.value

1789

1790 def __repr__(self):

1791 cn = self.__class__.__name__

1792 return '%s(%s)' % (cn, bbrepr(self.value))

1793

1794

1795Literal = Val # backwards compat for pre-20.7.0

1796

1797

1798class ScopeVars(object):

1799 """This is the runtime partner of :class:`Vars` -- this is what

1800 actually lives in the scope and stores runtime values.

1801

1802 While not part of the importable API of glom, it's half expected

1803 that some folks may write sepcs to populate and export scopes, at

1804 which point this type makes it easy to access values by attribute

1805 access or by converting to a dict.

1806

1807 """

1808 def __init__(self, base, defaults):

1809 self.__dict__ = dict(base)

1810 self.__dict__.update(defaults)

1811

1812 def __iter__(self):

1813 return iter(self.__dict__.items())

1814

1815 def __repr__(self):

1816 return "%s(%s)" % (self.__class__.__name__, bbrepr(self.__dict__))

1817

1818

1819class Vars(object):

1820 """

1821 :class:`Vars` is a helper that can be used with **S** in order to

1822 store shared mutable state.

1823

1824 Takes the same arguments as :class:`dict()`.

1825

1826 Arguments here should be thought of the same way as default arguments

1827 to a function. Each time the spec is evaluated, the same arguments

1828 will be referenced; so, think carefully about mutable data structures.

1829 """

1830 def __init__(self, base=(), **kw):

1831 dict(base) # ensure it is a dict-compatible first arg

1832 self.base = base

1833 self.defaults = kw

1834

1835 def glomit(self, target, spec):

1836 return ScopeVars(self.base, self.defaults)

1837

1838 def __repr__(self):

1839 ret = format_invocation(self.__class__.__name__,

1840 args=(self.base,) if self.base else (),

1841 kwargs=self.defaults,

1842 repr=bbrepr)

1843 return ret

1844

1845

1846class Let(object):

1847 """

1848 Deprecated, kept for backwards compat. Use S(x='y') instead.

1849

1850 >>> target = {'data': {'val': 9}}

1851 >>> spec = (Let(value=T['data']['val']), {'val': S['value']})

1852 >>> glom(target, spec)

1853 {'val': 9}

1854

1855 """

1856 def __init__(self, **kw):

1857 if not kw:

1858 raise TypeError('expected at least one keyword argument')

1859 self._binding = kw

1860

1861 def glomit(self, target, scope):

1862 scope.update({

1863 k: scope[glom](target, v, scope) for k, v in self._binding.items()})

1864 return target

1865

1866 def __repr__(self):

1867 cn = self.__class__.__name__

1868 return format_invocation(cn, kwargs=self._binding, repr=bbrepr)

1869

1870

1871class Auto(object):

1872 """

1873 Switch to Auto mode (the default)

1874

1875 TODO: this seems like it should be a sub-class of class Spec() --

1876 if Spec() could help define the interface for new "modes" or dialects

1877 that would also help make match mode feel less duct-taped on

1878 """

1879 def __init__(self, spec=None):

1880 self.spec = spec

1881

1882 def glomit(self, target, scope):

1883 scope[MODE] = AUTO

1884 return scope[glom](target, self.spec, scope)

1885

1886 def __repr__(self):

1887 cn = self.__class__.__name__

1888 rpr = '' if self.spec is None else bbrepr(self.spec)

1889 return '%s(%s)' % (cn, rpr)

1890

1891

1892class _AbstractIterable(_AbstractIterableBase):

1893 __metaclass__ = ABCMeta

1894 @classmethod

1895 def __subclasshook__(cls, C):

1896 if C in (str, bytes):

1897 return False

1898 return callable(getattr(C, "__iter__", None))

1899

1900

1901class _ObjStyleKeysMeta(type):

1902 def __instancecheck__(cls, C):

1903 return hasattr(C, "__dict__") and hasattr(C.__dict__, "keys")

1904

1905

1906class _ObjStyleKeys(_ObjStyleKeysMeta('_AbstractKeys', (object,), {})):

1907 __metaclass__ = _ObjStyleKeysMeta

1908

1909 @staticmethod

1910 def get_keys(obj):

1911 ret = obj.__dict__.keys()

1912 return ret

1913

1914

1915def _get_sequence_item(target, index):

1916 return target[int(index)]

1917

1918

1919# handlers are 3-arg callables, with args (spec, target, scope)

1920# spec is the first argument for convenience in the case

1921# that the handler is a method of the spec type

1922def _handle_dict(target, spec, scope):

1923 ret = type(spec)() # TODO: works for dict + ordereddict, but sufficient for all?

1924 for field, subspec in spec.items():

1925 val = scope[glom](target, subspec, scope)

1926 if val is SKIP:

1927 continue

1928 if type(field) in (Spec, TType):

1929 field = scope[glom](target, field, scope)

1930 ret[field] = val

1931 return ret

1932

1933

1934def _handle_list(target, spec, scope):

1935 subspec = spec[0]

1936 iterate = scope[TargetRegistry].get_handler('iterate', target, path=scope[Path])

1937 try:

1938 iterator = iterate(target)

1939 except Exception as e:

1940 raise TypeError('failed to iterate on instance of type %r at %r (got %r)'

1941 % (target.__class__.__name__, Path(*scope[Path]), e))

1942 ret = []

1943 base_path = scope[Path]

1944 for i, t in enumerate(iterator):

1945 scope[Path] = base_path + [i]

1946 val = scope[glom](t, subspec, scope)

1947 if val is SKIP:

1948 continue

1949 if val is STOP:

1950 break

1951 ret.append(val)

1952 return ret

1953

1954

1955def _handle_tuple(target, spec, scope):

1956 res = target

1957 for subspec in spec:

1958 scope = chain_child(scope)

1959 nxt = scope[glom](res, subspec, scope)

1960 if nxt is SKIP:

1961 continue

1962 if nxt is STOP:

1963 break

1964 res = nxt

1965 if not isinstance(subspec, list):

1966 scope[Path] += [getattr(subspec, '__name__', subspec)]

1967 return res

1968

1969

1970class Pipe(object):

1971 """Evaluate specs one after the other, passing the result of

1972 the previous evaluation in as the target of the next spec:

1973

1974 >>> glom({'a': {'b': -5}}, Pipe('a', 'b', abs))

1975 5

1976

1977 Same behavior as ``Auto(tuple(steps))``, but useful for explicit

1978 usage in other modes.

1979 """

1980 def __init__(self, *steps):

1981 self.steps = steps

1982

1983 def glomit(self, target, scope):

1984 return _handle_tuple(target, self.steps, scope)

1985

1986 def __repr__(self):

1987 return self.__class__.__name__ + bbrepr(self.steps)

1988

1989

1990class TargetRegistry(object):

1991 '''

1992 responsible for registration of target types for iteration

1993 and attribute walking

1994 '''

1995 def __init__(self, register_default_types=True):

1996 self._op_type_map = {}

1997 self._op_type_tree = {} # see _register_fuzzy_type for details

1998 self._type_cache = {}

1999

2000 self._op_auto_map = OrderedDict() # op name to function that returns handler function

2001

2002 self._register_builtin_ops()

2003

2004 if register_default_types:

2005 self._register_default_types()

2006 return

2007

2008 def get_handler(self, op, obj, path=None, raise_exc=True):

2009 """for an operation and object **instance**, obj, return the

2010 closest-matching handler function, raising UnregisteredTarget

2011 if no handler can be found for *obj* (or False if

2012 raise_exc=False)

2013

2014 """

2015 ret = False

2016 obj_type = type(obj)

2017 cache_key = (obj_type, op)

2018 if cache_key not in self._type_cache:

2019 type_map = self.get_type_map(op)

2020 if type_map:

2021 try:

2022 ret = type_map[obj_type]

2023 except KeyError:

2024 type_tree = self._op_type_tree.get(op, {})

2025 closest = self._get_closest_type(obj, type_tree=type_tree)

2026 if closest is None:

2027 ret = False

2028 else:

2029 ret = type_map[closest]

2030

2031 if ret is False and raise_exc:

2032 raise UnregisteredTarget(op, obj_type, type_map=type_map, path=path)

2033

2034 self._type_cache[cache_key] = ret

2035 return self._type_cache[cache_key]

2036

2037 def get_type_map(self, op):

2038 try:

2039 return self._op_type_map[op]

2040 except KeyError:

2041 return OrderedDict()

2042

2043 def _get_closest_type(self, obj, type_tree):

2044 default = None

2045 for cur_type, sub_tree in type_tree.items():

2046 if isinstance(obj, cur_type):

2047 sub_type = self._get_closest_type(obj, type_tree=sub_tree)

2048 ret = cur_type if sub_type is None else sub_type

2049 return ret

2050 return default

2051

2052 def _register_default_types(self):

2053 self.register(object)

2054 self.register(dict, get=operator.getitem)

2055 self.register(dict, keys=dict.keys)

2056 self.register(list, get=_get_sequence_item)

2057 self.register(tuple, get=_get_sequence_item)

2058 self.register(OrderedDict, get=operator.getitem)

2059 self.register(OrderedDict, keys=OrderedDict.keys)

2060 self.register(_AbstractIterable, iterate=iter)

2061 self.register(_ObjStyleKeys, keys=_ObjStyleKeys.get_keys)

2062

2063 def _register_fuzzy_type(self, op, new_type, _type_tree=None):

2064 """Build a "type tree", an OrderedDict mapping registered types to

2065 their subtypes

2066

2067 The type tree's invariant is that a key in the mapping is a

2068 valid parent type of all its children.

2069

2070 Order is preserved such that non-overlapping parts of the

2071 subtree take precedence by which was most recently added.

2072 """

2073 if _type_tree is None:

2074 try:

2075 _type_tree = self._op_type_tree[op]

2076 except KeyError:

2077 _type_tree = self._op_type_tree[op] = OrderedDict()

2078

2079 registered = False

2080 for cur_type, sub_tree in list(_type_tree.items()):

2081 if issubclass(cur_type, new_type):

2082 sub_tree = _type_tree.pop(cur_type) # mutation for recursion brevity

2083 try:

2084 _type_tree[new_type][cur_type] = sub_tree

2085 except KeyError:

2086 _type_tree[new_type] = OrderedDict({cur_type: sub_tree})

2087 registered = True

2088 elif issubclass(new_type, cur_type):

2089 _type_tree[cur_type] = self._register_fuzzy_type(op, new_type, _type_tree=sub_tree)

2090 registered = True

2091 if not registered:

2092 _type_tree[new_type] = OrderedDict()

2093 return _type_tree

2094

2095 def register(self, target_type, **kwargs):

2096 if not isinstance(target_type, type):

2097 raise TypeError('register expected a type, not an instance: %r' % (target_type,))

2098 exact = kwargs.pop('exact', None)

2099 new_op_map = dict(kwargs)

2100

2101 for op_name in sorted(set(self._op_auto_map.keys()) | set(new_op_map.keys())):

2102 cur_type_map = self._op_type_map.setdefault(op_name, OrderedDict())

2103

2104 if op_name in new_op_map:

2105 handler = new_op_map[op_name]

2106 elif target_type in cur_type_map:

2107 handler = cur_type_map[target_type]

2108 else:

2109 try:

2110 handler = self._op_auto_map[op_name](target_type)

2111 except Exception as e:

2112 raise TypeError('error while determining support for operation'

2113 ' "%s" on target type: %s (got %r)'

2114 % (op_name, target_type.__name__, e))

2115 if handler is not False and not callable(handler):

2116 raise TypeError('expected handler for op "%s" to be'

2117 ' callable or False, not: %r' % (op_name, handler))

2118 new_op_map[op_name] = handler

2119

2120 for op_name, handler in new_op_map.items():

2121 self._op_type_map[op_name][target_type] = handler

2122

2123 if not exact:

2124 for op_name in new_op_map:

2125 self._register_fuzzy_type(op_name, target_type)

2126

2127 self._type_cache = {} # reset type cache

2128

2129 return

2130

2131 def register_op(self, op_name, auto_func=None, exact=False):

2132 """add operations beyond the builtins ('get' and 'iterate' at the time

2133 of writing).

2134

2135 auto_func is a function that when passed a type, returns a

2136 handler associated with op_name if it's supported, or False if

2137 it's not.

2138

2139 See glom.core.register_op() for the global version used by

2140 extensions.

2141 """

2142 if not isinstance(op_name, basestring):

2143 raise TypeError('expected op_name to be a text name, not: %r' % (op_name,))

2144 if auto_func is None:

2145 auto_func = lambda t: False

2146 elif not callable(auto_func):

2147 raise TypeError('expected auto_func to be callable, not: %r' % (auto_func,))

2148

2149 # determine support for any previously known types

2150 known_types = set(sum([list(m.keys()) for m

2151 in self._op_type_map.values()], []))

2152 type_map = self._op_type_map.get(op_name, OrderedDict())

2153 type_tree = self._op_type_tree.get(op_name, OrderedDict())

2154 for t in sorted(known_types, key=lambda t: t.__name__):

2155 if t in type_map:

2156 continue

2157 try:

2158 handler = auto_func(t)

2159 except Exception as e:

2160 raise TypeError('error while determining support for operation'

2161 ' "%s" on target type: %s (got %r)'

2162 % (op_name, t.__name__, e))

2163 if handler is not False and not callable(handler):

2164 raise TypeError('expected handler for op "%s" to be'

2165 ' callable or False, not: %r' % (op_name, handler))

2166 type_map[t] = handler

2167

2168 if not exact:

2169 for t in known_types:

2170 self._register_fuzzy_type(op_name, t, _type_tree=type_tree)

2171

2172 self._op_type_map[op_name] = type_map

2173 self._op_type_tree[op_name] = type_tree

2174 self._op_auto_map[op_name] = auto_func

2175

2176 def _register_builtin_ops(self):

2177 def _get_iterable_handler(type_obj):

2178 return iter if callable(getattr(type_obj, '__iter__', None)) else False

2179

2180 self.register_op('iterate', _get_iterable_handler)

2181 self.register_op('get', lambda _: getattr)

2182

2183

2184_DEFAULT_SCOPE = ChainMap({})

2185

2186

2187def glom(target, spec, **kwargs):

2188 """Access or construct a value from a given *target* based on the

2189 specification declared by *spec*.

2190

2191 Accessing nested data, aka deep-get:

2192

2193 >>> target = {'a': {'b': 'c'}}

2194 >>> glom(target, 'a.b')

2195 'c'

2196

2197 Here the *spec* was just a string denoting a path,

2198 ``'a.b'``. As simple as it should be. You can also use

2199 :mod:`glob`-like wildcard selectors:

2200

2201 >>> target = {'a': [{'k': 'v1'}, {'k': 'v2'}]}

2202 >>> glom(target, 'a.*.k')

2203 ['v1', 'v2']

2204

2205 In addition to ``*``, you can also use ``**`` for recursive access:

2206

2207 >>> target = {'a': [{'k': 'v3'}, {'k': 'v4'}], 'k': 'v0'}

2208 >>> glom(target, '**.k')

2209 ['v0', 'v3', 'v4']

2210

2211 The next example shows how to use nested data to

2212 access many fields at once, and make a new nested structure.

2213

2214 Constructing, or restructuring more-complicated nested data:

2215

2216 >>> target = {'a': {'b': 'c', 'd': 'e'}, 'f': 'g', 'h': [0, 1, 2]}

2217 >>> spec = {'a': 'a.b', 'd': 'a.d', 'h': ('h', [lambda x: x * 2])}

2218 >>> output = glom(target, spec)

2219 >>> pprint(output)

2220 {'a': 'c', 'd': 'e', 'h': [0, 2, 4]}

2221

2222 ``glom`` also takes a keyword-argument, *default*. When set,

2223 if a ``glom`` operation fails with a :exc:`GlomError`, the

2224 *default* will be returned, very much like

2225 :meth:`dict.get()`:

2226

2227 >>> glom(target, 'a.xx', default='nada')

2228 'nada'

2229

2230 The *skip_exc* keyword argument controls which errors should

2231 be ignored.

2232

2233 >>> glom({}, lambda x: 100.0 / len(x), default=0.0, skip_exc=ZeroDivisionError)

2234 0.0

2235

2236 Args:

2237 target (object): the object on which the glom will operate.

2238 spec (object): Specification of the output object in the form

2239 of a dict, list, tuple, string, other glom construct, or

2240 any composition of these.

2241 default (object): An optional default to return in the case

2242 an exception, specified by *skip_exc*, is raised.

2243 skip_exc (Exception): An optional exception or tuple of

2244 exceptions to ignore and return *default* (None if

2245 omitted). If *skip_exc* and *default* are both not set,

2246 glom raises errors through.

2247 scope (dict): Additional data that can be accessed

2248 via S inside the glom-spec. Read more: :ref:`scope`.

2249

2250 It's a small API with big functionality, and glom's power is

2251 only surpassed by its intuitiveness. Give it a whirl!

2252

2253 """

2254 # TODO: check spec up front

2255 default = kwargs.pop('default', None if 'skip_exc' in kwargs else _MISSING)

2256 skip_exc = kwargs.pop('skip_exc', () if default is _MISSING else GlomError)

2257 glom_debug = kwargs.pop('glom_debug', GLOM_DEBUG)

2258 scope = _DEFAULT_SCOPE.new_child({

2259 Path: kwargs.pop('path', []),

2260 Inspect: kwargs.pop('inspector', None),

2261 MODE: AUTO,

2262 MIN_MODE: None,

2263 CHILD_ERRORS: [],

2264 'globals': ScopeVars({}, {}),

2265 })

2266 scope[UP] = scope

2267 scope[ROOT] = scope

2268 scope[T] = target

2269 scope.update(kwargs.pop('scope', {}))

2270 err = None

2271 if kwargs:

2272 raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys()))

2273 try:

2274 try:

2275 ret = _glom(target, spec, scope)

2276 except skip_exc:

2277 if default is _MISSING:

2278 raise

2279 ret = default # should this also be arg_val'd?

2280 except Exception as e:

2281 if glom_debug:

2282 raise

2283 if isinstance(e, GlomError):

2284 # need to change id or else py3 seems to not let us truncate the

2285 # stack trace with the explicit "raise err" below

2286 err = copy.copy(e)

2287 err._set_wrapped(e)

2288 else:

2289 err = GlomError.wrap(e)

2290 if isinstance(err, GlomError):

2291 err._finalize(scope[LAST_CHILD_SCOPE])

2292 else: # wrapping failed, fall back to default behavior

2293 raise

2294

2295 if err:

2296 raise err

2297 return ret

2298

2299

2300def chain_child(scope):

2301 """

2302 used for specs like Auto(tuple), Switch(), etc

2303 that want to chain their child scopes together

2304

2305 returns a new scope that can be passed to

2306 the next recursive glom call, e.g.

2307

2308 scope[glom](target, spec, chain_child(scope))

2309 """

2310 if LAST_CHILD_SCOPE not in scope.maps[0]:

2311 return scope # no children yet, nothing to do

2312 # NOTE: an option here is to drill down on LAST_CHILD_SCOPE;

2313 # this would have some interesting consequences for scoping

2314 # of tuples

2315 nxt_in_chain = scope[LAST_CHILD_SCOPE]

2316 nxt_in_chain.maps[0][NO_PYFRAME] = True

2317 # previous failed branches are forgiven as the

2318 # scope is re-wired into a new stack

2319 del nxt_in_chain.maps[0][CHILD_ERRORS][:]

2320 return nxt_in_chain

2321

2322

2323unbound_methods = set([type(str.__len__)]) #, type(Ref.glomit)])

2324

2325

2326def _has_callable_glomit(obj):

2327 glomit = getattr(obj, 'glomit', None)

2328 return callable(glomit) and not isinstance(obj, type)

2329

2330

2331def _glom(target, spec, scope):

2332 parent = scope

2333 pmap = parent.maps[0]

2334 scope = scope.new_child({

2335 T: target,

2336 Spec: spec,

2337 UP: parent,

2338 CHILD_ERRORS: [],

2339 MODE: pmap[MODE],

2340 MIN_MODE: pmap[MIN_MODE],

2341 })

2342 pmap[LAST_CHILD_SCOPE] = scope

2343

2344 try:

2345 if type(spec) is TType: # must go first, due to callability

2346 scope[MIN_MODE] = None # None is tombstone

2347 return _t_eval(target, spec, scope)

2348 elif _has_callable_glomit(spec):

2349 scope[MIN_MODE] = None

2350 return spec.glomit(target, scope)

2351

2352 return (scope.maps[0][MIN_MODE] or scope.maps[0][MODE])(target, spec, scope)

2353 except Exception as e:

2354 scope.maps[1][CHILD_ERRORS].append(scope)

2355 scope.maps[0][CUR_ERROR] = e

2356 if NO_PYFRAME in scope.maps[1]:

2357 cur_scope = scope[UP]

2358 while NO_PYFRAME in cur_scope.maps[0]:

2359 cur_scope.maps[1][CHILD_ERRORS].append(cur_scope)

2360 cur_scope.maps[0][CUR_ERROR] = e

2361 cur_scope = cur_scope[UP]

2362 raise

2363

2364

2365def AUTO(target, spec, scope):

2366 if type(spec) is str: # shortcut to make deep-get use case faster

2367 return _t_eval(target, Path.from_text(spec).path_t, scope)

2368 if isinstance(spec, dict):

2369 return _handle_dict(target, spec, scope)

2370 elif isinstance(spec, list):

2371 return _handle_list(target, spec, scope)

2372 elif isinstance(spec, tuple):

2373 return _handle_tuple(target, spec, scope)

2374 elif isinstance(spec, basestring):

2375 return Path.from_text(spec).glomit(target, scope)

2376 elif callable(spec):

2377 return spec(target)

2378

2379 raise TypeError('expected spec to be dict, list, tuple, callable, string,'

2380 ' or other Spec-like type, not: %r' % (spec,))

2381

2382

2383_DEFAULT_SCOPE.update({

2384 glom: _glom,

2385 TargetRegistry: TargetRegistry(register_default_types=True),

2386})

2387

2388

2389def register(target_type, **kwargs):

2390 """Register *target_type* so :meth:`~Glommer.glom()` will

2391 know how to handle instances of that type as targets.

2392

2393 Here's an example of adding basic iterabile support for Django's ORM:

2394

2395 .. code-block:: python

2396

2397 import glom

2398 import django.db.models

2399

2400 glom.register(django.db.models.Manager, iterate=lambda m: m.all())

2401 glom.register(django.db.models.QuerySet, iterate=lambda qs: qs.all())

2402

2403

2404

2405 Args:

2406 target_type (type): A type expected to appear in a glom()

2407 call target

2408 get (callable): A function which takes a target object and

2409 a name, acting as a default accessor. Defaults to

2410 :func:`getattr`.

2411 iterate (callable): A function which takes a target object

2412 and returns an iterator. Defaults to :func:`iter` if

2413 *target_type* appears to be iterable.

2414 exact (bool): Whether or not to match instances of subtypes

2415 of *target_type*.

2416

2417 .. note::

2418

2419 The module-level :func:`register()` function affects the

2420 module-level :func:`glom()` function's behavior. If this

2421 global effect is undesirable for your application, or

2422 you're implementing a library, consider instantiating a

2423 :class:`Glommer` instance, and using the

2424 :meth:`~Glommer.register()` and :meth:`Glommer.glom()`

2425 methods instead.

2426

2427 """

2428 _DEFAULT_SCOPE[TargetRegistry].register(target_type, **kwargs)

2429 return

2430

2431

2432def register_op(op_name, **kwargs):

2433 """For extension authors needing to add operations beyond the builtin

2434 'get', 'iterate', 'keys', 'assign', and 'delete' to the default scope.

2435 See TargetRegistry for more details.

2436 """

2437 _DEFAULT_SCOPE[TargetRegistry].register_op(op_name, **kwargs)

2438 return

2439

2440

2441class Glommer(object):

2442 """The :class:`Glommer` type mostly serves to encapsulate type

2443 registration context so that advanced uses of glom don't need to

2444 worry about stepping on each other.

2445

2446 Glommer objects are lightweight and, once instantiated, provide

2447 a :func:`glom()` method:

2448

2449 >>> glommer = Glommer()

2450 >>> glommer.glom({}, 'a.b.c', default='d')

2451 'd'

2452 >>> Glommer().glom({'vals': list(range(3))}, ('vals', len))

2453 3

2454

2455 Instances also provide :meth:`~Glommer.register()` method for

2456 localized control over type handling.

2457

2458 Args:

2459 register_default_types (bool): Whether or not to enable the

2460 handling behaviors of the default :func:`glom()`. These

2461 default actions include dict access, list and iterable

2462 iteration, and generic object attribute access. Defaults to

2463 True.

2464

2465 """

2466 def __init__(self, **kwargs):

2467 register_default_types = kwargs.pop('register_default_types', True)

2468 scope = kwargs.pop('scope', _DEFAULT_SCOPE)

2469

2470 # this "freezes" the scope in at the time of construction

2471 self.scope = ChainMap(dict(scope))

2472 self.scope[TargetRegistry] = TargetRegistry(register_default_types=register_default_types)

2473

2474 def register(self, target_type, **kwargs):

2475 """Register *target_type* so :meth:`~Glommer.glom()` will

2476 know how to handle instances of that type as targets.

2477

2478 Args:

2479 target_type (type): A type expected to appear in a glom()

2480 call target

2481 get (callable): A function which takes a target object and

2482 a name, acting as a default accessor. Defaults to

2483 :func:`getattr`.

2484 iterate (callable): A function which takes a target object

2485 and returns an iterator. Defaults to :func:`iter` if

2486 *target_type* appears to be iterable.

2487 exact (bool): Whether or not to match instances of subtypes

2488 of *target_type*.

2489

2490 .. note::

2491

2492 The module-level :func:`register()` function affects the

2493 module-level :func:`glom()` function's behavior. If this

2494 global effect is undesirable for your application, or

2495 you're implementing a library, consider instantiating a

2496 :class:`Glommer` instance, and using the

2497 :meth:`~Glommer.register()` and :meth:`Glommer.glom()`

2498 methods instead.

2499

2500 """

2501 exact = kwargs.pop('exact', False)

2502 self.scope[TargetRegistry].register(target_type, exact=exact, **kwargs)

2503 return

2504

2505 def glom(self, target, spec, **kwargs):

2506 return glom(target, spec, scope=self.scope, **kwargs)

2507

2508

2509class Fill(object):

2510 """A specifier type which switches to glom into "fill-mode". For the

2511 spec contained within the Fill, glom will only interpret explicit

2512 specifier types (including T objects). Whereas the default mode

2513 has special interpretations for each of these builtins, fill-mode

2514 takes a lighter touch, making Fill great for "filling out" Python

2515 literals, like tuples, dicts, sets, and lists.

2516

2517 >>> target = {'data': [0, 2, 4]}

2518 >>> spec = Fill((T['data'][2], T['data'][0]))

2519 >>> glom(target, spec)

2520 (4, 0)

2521

2522 As you can see, glom's usual built-in tuple item chaining behavior

2523 has switched into a simple tuple constructor.

2524

2525 (Sidenote for Lisp fans: Fill is like glom's quasi-quoting.)

2526

2527 """

2528 def __init__(self, spec=None):

2529 self.spec = spec

2530

2531 def glomit(self, target, scope):

2532 scope[MODE] = FILL

2533 return scope[glom](target, self.spec, scope)

2534

2535 def fill(self, target):

2536 return glom(target, self)

2537

2538 def __repr__(self):

2539 cn = self.__class__.__name__

2540 rpr = '' if self.spec is None else bbrepr(self.spec)

2541 return '%s(%s)' % (cn, rpr)

2542

2543

2544def FILL(target, spec, scope):

2545 # TODO: register an operator or two for the following to allow

2546 # extension. This operator can probably be shared with the

2547 # upcoming traversal/remap feature.

2548 recurse = lambda val: scope[glom](target, val, scope)

2549 if type(spec) is dict:

2550 return {recurse(key): recurse(val) for key, val in spec.items()}

2551 if type(spec) in (list, tuple, set, frozenset):

2552 result = [recurse(val) for val in spec]

2553 if type(spec) is list:

2554 return result

2555 return type(spec)(result)

2556 if callable(spec):

2557 return spec(target)

2558 return spec

2559

2560class _ArgValuator(object):

2561 def __init__(self):

2562 self.cache = {}

2563

2564 def mode(self, target, spec, scope):

2565 """

2566 similar to FILL, but without function calling;

2567 useful for default, scope assignment, call/invoke, etc

2568 """

2569 recur = lambda val: scope[glom](target, val, scope)

2570 result = spec

2571 if type(spec) in (list, dict): # can contain themselves

2572 if id(spec) in self.cache:

2573 return self.cache[id(spec)]

2574 result = self.cache[id(spec)] = type(spec)()

2575 if type(spec) is dict:

2576 result.update({recur(key): recur(val) for key, val in spec.items()})

2577 else:

2578 result.extend([recur(val) for val in spec])

2579 if type(spec) in (tuple, set, frozenset): # cannot contain themselves

2580 result = type(spec)([recur(val) for val in spec])

2581 return result

2582

2583

2584def arg_val(target, arg, scope):

2585 """

2586 evaluate an argument to find its value

2587 (arg_val phonetically similar to "eval" -- evaluate as an arg)

2588 """

2589 mode = scope[MIN_MODE]

2590 scope[MIN_MODE] = _ArgValuator().mode

2591 result = scope[glom](target, arg, scope)

2592 scope[MIN_MODE] = mode

2593 return result