Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/glom/core.py: 57%

1"""*glom gets results.*

3The ``glom`` package has one central entrypoint,

4:func:`glom.glom`. Everything else in the package revolves around that

5one function. Sometimes, big things come in small packages.

7A couple of conventional terms you'll see repeated many times below:

9* **target** - glom is built to work on any data, so we simply

10 refer to the object being accessed as the *"target"*

11* **spec** - *(aka "glomspec", short for specification)* The

12 accompanying template used to specify the structure of the return

13 value.

15Now that you know the terms, let's take a look around glom's powerful

16semantics.

18"""

20from __future__ import print_function

22import os

23import sys

24import pdb

25import copy

26import warnings

27import weakref

28import operator

29from abc import ABCMeta

30from pprint import pprint

31import string

32from collections import OrderedDict

33import traceback

35from face.helpers import get_wrap_width

36from boltons.typeutils import make_sentinel

37from boltons.iterutils import is_iterable

38#from boltons.funcutils import format_invocation

40basestring = str

41_AbstractIterableBase = ABCMeta('_AbstractIterableBase', (object,), {})

42from collections import ChainMap

43from reprlib import Repr, recursive_repr

45GLOM_DEBUG = os.getenv('GLOM_DEBUG', '').strip().lower()

46GLOM_DEBUG = False if (GLOM_DEBUG in ('', '0', 'false')) else True

48TRACE_WIDTH = max(get_wrap_width(max_width=110), 50) # min width

50PATH_STAR = True

51# should * and ** be interpreted as parallel traversal in Path.from_text()?

52# Changed to True in 23.1, this option to disable will go away soon

54_type_type = type

56_MISSING = make_sentinel('_MISSING')

57SKIP = make_sentinel('SKIP')

58SKIP.__doc__ = """

59The ``SKIP`` singleton can be returned from a function or included

60via a :class:`~glom.Val` to cancel assignment into the output

61object.

63>>> target = {'a': 'b'}

64>>> spec = {'a': lambda t: t['a'] if t['a'] == 'a' else SKIP}

65>>> glom(target, spec)

66{}

67>>> target = {'a': 'a'}

68>>> glom(target, spec)

69{'a': 'a'}

71Mostly used to drop keys from dicts (as above) or filter objects from

72lists.

74.. note::

76 SKIP was known as OMIT in versions 18.3.1 and prior. Versions 19+

77 will remove the OMIT alias entirely.

78"""

79OMIT = SKIP # backwards compat, remove in 19+

81STOP = make_sentinel('STOP')

82STOP.__doc__ = """

83The ``STOP`` singleton can be used to halt iteration of a list or

84execution of a tuple of subspecs.

86>>> target = range(10)

87>>> spec = [lambda x: x if x < 5 else STOP]

88>>> glom(target, spec)

89[0, 1, 2, 3, 4]

90"""

92LAST_CHILD_SCOPE = make_sentinel('LAST_CHILD_SCOPE')

93LAST_CHILD_SCOPE.__doc__ = """

94Marker that can be used by parents to keep track of the last child

95scope executed. Useful for "lifting" results out of child scopes

96for scopes that want to chain the scopes of their children together

97similar to tuple.

98"""

100NO_PYFRAME = make_sentinel('NO_PYFRAME')

101NO_PYFRAME.__doc__ = """

102Used internally to mark scopes which are no longer wrapped

103in a recursive glom() call, so that they can be cleaned up correctly

104in case of exceptions

105"""

106

107MODE = make_sentinel('MODE')

108

109MIN_MODE = make_sentinel('MIN_MODE')

110

111CHILD_ERRORS = make_sentinel('CHILD_ERRORS')

112CHILD_ERRORS.__doc__ = """

113``CHILD_ERRORS`` is used by glom internals to keep track of

114failed child branches of the current scope.

115"""

116

117CUR_ERROR = make_sentinel('CUR_ERROR')

118CUR_ERROR.__doc__ = """

119``CUR_ERROR`` is used by glom internals to keep track of

120thrown exceptions.

121"""

122

123_PKG_DIR_PATH = os.path.dirname(os.path.abspath(__file__))

124

125class GlomError(Exception):

126 """The base exception for all the errors that might be raised from

127 :func:`glom` processing logic.

128

129 By default, exceptions raised from within functions passed to glom

130 (e.g., ``len``, ``sum``, any ``lambda``) will not be wrapped in a

131 GlomError.

132 """

133 @classmethod

134 def wrap(cls, exc):

135 # TODO: need to test this against a wide array of exception types

136 # this approach to wrapping errors works for exceptions

137 # defined in pure-python as well as C

138 exc_type = type(exc)

139 bases = (GlomError,) if issubclass(GlomError, exc_type) else (exc_type, GlomError)

140 exc_wrapper_type = type("GlomError.wrap({})".format(exc_type.__name__), bases, {})

141 try:

142 wrapper = exc_wrapper_type(*exc.args)

143 wrapper.__wrapped = exc

144 return wrapper

145 except Exception: # maybe exception can't be re-created

146 return exc

147

148 def _set_wrapped(self, exc):

149 self.__wrapped = exc

150

151 def _finalize(self, scope):

152 # careful when changing how this functionality works; pytest seems to mess with

153 # the traceback module or sys.exc_info(). we saw different stacks when originally

154 # developing this in June 2020.

155 etype, evalue, _ = sys.exc_info()

156 tb_lines = traceback.format_exc().strip().splitlines()

157 limit = 0

158 for line in reversed(tb_lines):

159 if _PKG_DIR_PATH in line:

160 limit -= 1

161 break

162 limit += 1

163 self._tb_lines = tb_lines[-limit:]

164 self._scope = scope

165

166 def __str__(self):

167 if getattr(self, '_finalized_str', None):

168 return self._finalized_str

169 elif getattr(self, '_scope', None) is not None:

170 self._target_spec_trace = format_target_spec_trace(self._scope, self.__wrapped)

171 parts = ["error raised while processing, details below.",

172 " Target-spec trace (most recent last):",

173 self._target_spec_trace]

174 parts.extend(self._tb_lines)

175 self._finalized_str = "\n".join(parts)

176 return self._finalized_str

177

178 # else, not finalized

179 try:

180 exc_get_message = self.get_message

181 except AttributeError:

182 exc_get_message = super(GlomError, self).__str__

183 return exc_get_message()

184

185

186def _unpack_stack(scope, only_errors=True):

187 """

188 convert scope to [[scope, spec, target, error, [children]]]

189

190 this is a convenience method for printing stacks

191

192 only_errors=True means ignore branches which may still be hanging around

193 which were not involved in the stack trace of the error

194

195 only_errors=False could be useful for debugger / introspection (similar

196 to traceback.print_stack())

197 """

198 stack = []

199 scope = scope.maps[0]

200 while LAST_CHILD_SCOPE in scope:

201 child = scope[LAST_CHILD_SCOPE]

202 branches = scope[CHILD_ERRORS]

203 if branches == [child]:

204 branches = [] # if there's only one branch, count it as linear

205 stack.append([scope, scope[Spec], scope[T], scope.get(CUR_ERROR), branches])

206

207 # NB: this id() business is necessary to avoid a

208 # nondeterministic bug in abc's __eq__ see #189 for details

209 if id(child) in [id(b) for b in branches]:

210 break # if child already covered by branches, stop the linear descent

211

212 scope = child.maps[0]

213 else: # if break executed above, cur scope was already added

214 stack.append([scope, scope[Spec], scope[T], scope.get(CUR_ERROR), []])

215 # push errors "down" to where they were first raised / first observed

216 for i in range(len(stack) - 1):

217 cur, nxt = stack[i], stack[i + 1]

218 if cur[3] == nxt[3]:

219 cur[3] = None

220 if only_errors: # trim the stack to the last error

221 # leave at least 1 to not break formatting func below

222 # TODO: make format_target_spec_trace() tolerate an "empty" stack cleanly

223 while len(stack) > 1 and stack[-1][3] is None:

224 stack.pop()

225 return stack

226

227

228def _format_trace_value(value, maxlen):

229 s = bbrepr(value).replace("\\'", "'")

230 if len(s) > maxlen:

231 try:

232 suffix = '... (len=%s)' % len(value)

233 except Exception:

234 suffix = '...'

235 s = s[:maxlen - len(suffix)] + suffix

236 return s

237

238

239def format_target_spec_trace(scope, root_error, width=TRACE_WIDTH, depth=0, prev_target=_MISSING, last_branch=True):

240 """

241 unpack a scope into a multi-line but short summary

242 """

243 segments = []

244 indent = " " + "|" * depth

245 tick = "| " if depth else "- "

246 def mk_fmt(label, t=None):

247 pre = indent + (t or tick) + label + ": "

248 fmt_width = width - len(pre)

249 return lambda v: pre + _format_trace_value(v, fmt_width)

250 fmt_t = mk_fmt("Target")

251 fmt_s = mk_fmt("Spec")

252 fmt_b = mk_fmt("Spec", "+ ")

253 recurse = lambda s, last=False: format_target_spec_trace(s, root_error, width, depth + 1, prev_target, last)

254 tb_exc_line = lambda e: "".join(traceback.format_exception_only(type(e), e))[:-1]

255 fmt_e = lambda e: indent + tick + tb_exc_line(e)

256 for scope, spec, target, error, branches in _unpack_stack(scope):

257 if target is not prev_target:

258 segments.append(fmt_t(target))

259 prev_target = target

260 if branches:

261 segments.append(fmt_b(spec))

262 segments.extend([recurse(s) for s in branches[:-1]])

263 segments.append(recurse(branches[-1], last_branch))

264 else:

265 segments.append(fmt_s(spec))

266 if error is not None and error is not root_error:

267 last_line_error = True

268 segments.append(fmt_e(error))

269 else:

270 last_line_error = False

271 if depth: # \ on first line, X on last line

272 remark = lambda s, m: s[:depth + 1] + m + s[depth + 2:]

273 segments[0] = remark(segments[0], "\\")

274 if not last_branch or last_line_error:

275 segments[-1] = remark(segments[-1], "X")

276 return "\n".join(segments)

277

278

279# TODO: not used (yet)

280def format_oneline_trace(scope):

281 """

282 unpack a scope into a single line summary

283 (shortest summary possible)

284 """

285 # the goal here is to do a kind of delta-compression --

286 # if the target is the same, don't repeat it

287 segments = []

288 prev_target = _MISSING

289 for scope, spec, target, error, branches in _unpack_stack(scope, only_errors=False):

290 segments.append('/')

291 if type(spec) in (TType, Path):

292 segments.append(bbrepr(spec))

293 else:

294 segments.append(type(spec).__name__)

295 if target != prev_target:

296 segments.append('!')

297 segments.append(type(target).__name__)

298 if Path in scope:

299 segments.append('<')

300 segments.append('->'.join([str(p) for p in scope[Path]]))

301 segments.append('>')

302 prev_target = target

303

304 return "".join(segments)

305

306

307class PathAccessError(GlomError, AttributeError, KeyError, IndexError):

308 """This :exc:`GlomError` subtype represents a failure to access an

309 attribute as dictated by the spec. The most commonly-seen error

310 when using glom, it maintains a copy of the original exception and

311 produces a readable error message for easy debugging.

312

313 If you see this error, you may want to:

314

315 * Check the target data is accurate using :class:`~glom.Inspect`

316 * Catch the exception and return a semantically meaningful error message

317 * Use :class:`glom.Coalesce` to specify a default

318 * Use the top-level ``default`` kwarg on :func:`~glom.glom()`

319

320 In any case, be glad you got this error and not the one it was

321 wrapping!

322

323 Args:

324 exc (Exception): The error that arose when we tried to access

325 *path*. Typically an instance of KeyError, AttributeError,

326 IndexError, or TypeError, and sometimes others.

327 path (Path): The full Path glom was in the middle of accessing

328 when the error occurred.

329 part_idx (int): The index of the part of the *path* that caused

330 the error.

331

332 >>> target = {'a': {'b': None}}

333 >>> glom(target, 'a.b.c')

334 Traceback (most recent call last):

335 ...

336 PathAccessError: could not access 'c', part 2 of Path('a', 'b', 'c'), got error: ...

337

338 """

339 def __init__(self, exc, path, part_idx):

340 self.exc = exc

341 self.path = path

342 self.part_idx = part_idx

343

344 def get_message(self):

345 path_part = Path(self.path).values()[self.part_idx]

346 return ('could not access %r, part %r of %r, got error: %r'

347 % (path_part, self.part_idx, self.path, self.exc))

348

349 def __repr__(self):

350 cn = self.__class__.__name__

351 return '%s(%r, %r, %r)' % (cn, self.exc, self.path, self.part_idx)

352

353

354class PathAssignError(GlomError):

355 """This :exc:`GlomError` subtype is raised when an assignment fails,

356 stemming from an :func:`~glom.assign` call or other

357 :class:`~glom.Assign` usage.

358

359 One example would be assigning to an out-of-range position in a list::

360

361 >>> assign(["short", "list"], Path(5), 'too far') # doctest: +SKIP

362 Traceback (most recent call last):

363 ...

364 PathAssignError: could not assign 5 on object at Path(), got error: IndexError(...

365

366 Other assignment failures could be due to assigning to an

367 ``@property`` or exception being raised inside a ``__setattr__()``.

368

369 """

370 def __init__(self, exc, path, dest_name):

371 self.exc = exc

372 self.path = path

373 self.dest_name = dest_name

374

375 def get_message(self):

376 return ('could not assign %r on object at %r, got error: %r'

377 % (self.dest_name, self.path, self.exc))

378

379 def __repr__(self):

380 cn = self.__class__.__name__

381 return '%s(%r, %r, %r)' % (cn, self.exc, self.path, self.dest_name)

382

383

384class CoalesceError(GlomError):

385 """This :exc:`GlomError` subtype is raised from within a

386 :class:`Coalesce` spec's processing, when none of the subspecs

387 match and no default is provided.

388

389 The exception object itself keeps track of several values which

390 may be useful for processing:

391

392 Args:

393 coal_obj (Coalesce): The original failing spec, see

394 :class:`Coalesce`'s docs for details.

395 skipped (list): A list of ignored values and exceptions, in the

396 order that their respective subspecs appear in the original

397 *coal_obj*.

398 path: Like many GlomErrors, this exception knows the path at

399 which it occurred.

400

401 >>> target = {}

402 >>> glom(target, Coalesce('a', 'b'))

403 Traceback (most recent call last):

404 ...

405 CoalesceError: no valid values found. Tried ('a', 'b') and got (PathAccessError, PathAccessError) ...

406

407 .. note::

408

409 Coalesce is a *branching* specifier type, so as of v20.7.0, its

410 exception messages feature an error tree. See

411 :ref:`branched-exceptions` for details on how to interpret these

412 exceptions.

413

414 """

415 def __init__(self, coal_obj, skipped, path):

416 self.coal_obj = coal_obj

417 self.skipped = skipped

418 self.path = path

419

420 def __repr__(self):

421 cn = self.__class__.__name__

422 return '%s(%r, %r, %r)' % (cn, self.coal_obj, self.skipped, self.path)

423

424 def get_message(self):

425 missed_specs = tuple(self.coal_obj.subspecs)

426 skipped_vals = [v.__class__.__name__

427 if isinstance(v, self.coal_obj.skip_exc)

428 else '<skipped %s>' % v.__class__.__name__

429 for v in self.skipped]

430 msg = ('no valid values found. Tried %r and got (%s)'

431 % (missed_specs, ', '.join(skipped_vals)))

432 if self.coal_obj.skip is not _MISSING:

433 msg += ', skip set to %r' % (self.coal_obj.skip,)

434 if self.coal_obj.skip_exc is not GlomError:

435 msg += ', skip_exc set to %r' % (self.coal_obj.skip_exc,)

436 if self.path is not None:

437 msg += ' (at path %r)' % (self.path,)

438 return msg

439

440

441class BadSpec(GlomError, TypeError):

442 """Raised when a spec structure is malformed, e.g., when a specifier

443 type is invalid for the current mode."""

444

445

446class UnregisteredTarget(GlomError):

447 """This :class:`GlomError` subtype is raised when a spec calls for an

448 unsupported action on a target type. For instance, trying to

449 iterate on an non-iterable target:

450

451 >>> glom(object(), ['a.b.c'])

452 Traceback (most recent call last):

453 ...

454 UnregisteredTarget: target type 'object' not registered for 'iterate', expected one of registered types: (...)

455

456 It should be noted that this is a pretty uncommon occurrence in

457 production glom usage. See the :ref:`setup-and-registration`

458 section for details on how to avoid this error.

459

460 An UnregisteredTarget takes and tracks a few values:

461

462 Args:

463 op (str): The name of the operation being performed ('get' or 'iterate')

464 target_type (type): The type of the target being processed.

465 type_map (dict): A mapping of target types that do support this operation

466 path: The path at which the error occurred.

467

468 """

469 def __init__(self, op, target_type, type_map, path):

470 self.op = op

471 self.target_type = target_type

472 self.type_map = type_map

473 self.path = path

474 super(UnregisteredTarget, self).__init__(op, target_type, type_map, path)

475

476 def __repr__(self):

477 cn = self.__class__.__name__

478 # <type %r> is because Python 3 inexplicably changed the type

479 # repr from <type *> to <class *>

480 return ('%s(%r, <type %r>, %r, %r)'

481 % (cn, self.op, self.target_type.__name__, self.type_map, self.path))

482

483 def get_message(self):

484 if not self.type_map:

485 return ("glom() called without registering any types for operation '%s'. see"

486 " glom.register() or Glommer's constructor for details." % (self.op,))

487 reg_types = sorted([t.__name__ for t, h in self.type_map.items() if h])

488 reg_types_str = '()' if not reg_types else ('(%s)' % ', '.join(reg_types))

489 msg = ("target type %r not registered for '%s', expected one of"

490 " registered types: %s" % (self.target_type.__name__, self.op, reg_types_str))

491 if self.path:

492 msg += ' (at %r)' % (self.path,)

493 return msg

494

495

496if getattr(__builtins__, '__dict__', None) is not None:

497 # pypy's __builtins__ is a module, as is CPython's REPL, but at

498 # normal execution time it's a dict?

499 __builtins__ = __builtins__.__dict__

500

501

502_BUILTIN_ID_NAME_MAP = dict([(id(v), k)

503 for k, v in __builtins__.items()])

504

505

506class _BBRepr(Repr):

507 """A better repr for builtins, when the built-in repr isn't

508 roundtrippable.

509 """

510 def __init__(self):

511 super().__init__()

512 # turn up all the length limits very high

513 for name in self.__dict__:

514 setattr(self, name, 1024)

515

516 def repr1(self, x, level):

517 ret = Repr.repr1(self, x, level)

518 if not ret.startswith('<'):

519 return ret

520 return _BUILTIN_ID_NAME_MAP.get(id(x), ret)

521

522

523bbrepr = recursive_repr()(_BBRepr().repr)

524

525

526class _BBReprFormatter(string.Formatter):

527 """

528 allow format strings to be evaluated where {!r} will use bbrepr

529 instead of repr

530 """

531 def convert_field(self, value, conversion):

532 if conversion == 'r':

533 return bbrepr(value).replace("\\'", "'")

534 return super(_BBReprFormatter, self).convert_field(value, conversion)

535

536

537bbformat = _BBReprFormatter().format

538

539

540# TODO: push this back up to boltons with repr kwarg

541def format_invocation(name='', args=(), kwargs=None, **kw):

542 """Given a name, positional arguments, and keyword arguments, format

543 a basic Python-style function call.

544

545 >>> print(format_invocation('func', args=(1, 2), kwargs={'c': 3}))

546 func(1, 2, c=3)

547 >>> print(format_invocation('a_func', args=(1,)))

548 a_func(1)

549 >>> print(format_invocation('kw_func', kwargs=[('a', 1), ('b', 2)]))

550 kw_func(a=1, b=2)

551

552 """

553 _repr = kw.pop('repr', bbrepr)

554 if kw:

555 raise TypeError('unexpected keyword args: %r' % ', '.join(kw.keys()))

556 kwargs = kwargs or {}

557 a_text = ', '.join([_repr(a) for a in args])

558 if isinstance(kwargs, dict):

559 kwarg_items = [(k, kwargs[k]) for k in sorted(kwargs)]

560 else:

561 kwarg_items = kwargs

562 kw_text = ', '.join(['%s=%s' % (k, _repr(v)) for k, v in kwarg_items])

563

564 all_args_text = a_text

565 if all_args_text and kw_text:

566 all_args_text += ', '

567 all_args_text += kw_text

568

569 return '%s(%s)' % (name, all_args_text)

570

571

572class Path(object):

573 """Path objects specify explicit paths when the default

574 ``'a.b.c'``-style general access syntax won't work or isn't

575 desirable. Use this to wrap ints, datetimes, and other valid

576 keys, as well as strings with dots that shouldn't be expanded.

577

578 >>> target = {'a': {'b': 'c', 'd.e': 'f', 2: 3}}

579 >>> glom(target, Path('a', 2))

580 3

581 >>> glom(target, Path('a', 'd.e'))

582 'f'

583

584 Paths can be used to join together other Path objects, as

585 well as :data:`~glom.T` objects:

586

587 >>> Path(T['a'], T['b'])

588 T['a']['b']

589 >>> Path(Path('a', 'b'), Path('c', 'd'))

590 Path('a', 'b', 'c', 'd')

591

592 Paths also support indexing and slicing, with each access

593 returning a new Path object:

594

595 >>> path = Path('a', 'b', 1, 2)

596 >>> path[0]

597 Path('a')

598 >>> path[-2:]

599 Path(1, 2)

600

601 To build a Path object from a string, use :meth:`Path.from_text()`.

602 This is the default behavior when the top-level :func:`~glom.glom`

603 function gets a string spec.

604 """

605 def __init__(self, *path_parts):

606 if not path_parts:

607 self.path_t = T

608 return

609 if isinstance(path_parts[0], TType):

610 path_t = path_parts[0]

611 offset = 1

612 else:

613 path_t = T

614 offset = 0

615 for part in path_parts[offset:]:

616 if isinstance(part, Path):

617 part = part.path_t

618 if isinstance(part, TType):

619 sub_parts = part.__ops__

620 if sub_parts[0] is not T:

621 raise ValueError('path segment must be path from T, not %r'

622 % sub_parts[0])

623 i = 1

624 while i < len(sub_parts):

625 path_t = _t_child(path_t, sub_parts[i], sub_parts[i + 1])

626 i += 2

627 else:

628 path_t = _t_child(path_t, 'P', part)

629 self.path_t = path_t

630

631 _CACHE = {True: {}, False: {}}

632 _MAX_CACHE = 10000

633 _STAR_WARNED = False

634

635 @classmethod

636 def from_text(cls, text):

637 """Make a Path from .-delimited text:

638

639 >>> Path.from_text('a.b.c')

640 Path('a', 'b', 'c')

641

642 This is the default behavior when :func:`~glom.glom` gets a string spec.

643 """

644 def create():

645 segs = text.split('.')

646 if PATH_STAR:

647 segs = [

648 _T_STAR if seg == '*' else

649 _T_STARSTAR if seg == '**' else seg

650 for seg in segs]

651 elif not cls._STAR_WARNED:

652 if '*' in segs or '**' in segs:

653 warnings.warn(

654 "'*' and '**' have changed behavior in glom version 23.1."

655 " Recommend switch to T['*'] or T['**'].")

656 cls._STAR_WARNED = True

657 return cls(*segs)

658

659 cache = cls._CACHE[PATH_STAR] # remove this when PATH_STAR is default

660 if text not in cache:

661 if len(cache) > cls._MAX_CACHE:

662 return create()

663 cache[text] = create()

664 return cache[text]

665

666 def glomit(self, target, scope):

667 # The entrypoint for the Path extension

668 return _t_eval(target, self.path_t, scope)

669

670 def __len__(self):

671 return (len(self.path_t.__ops__) - 1) // 2

672

673 def __eq__(self, other):

674 if type(other) is Path:

675 return self.path_t.__ops__ == other.path_t.__ops__

676 elif type(other) is TType:

677 return self.path_t.__ops__ == other.__ops__

678 return False

679

680 def __ne__(self, other):

681 return not self == other

682

683 def values(self):

684 """

685 Returns a tuple of values referenced in this path.

686

687 >>> Path(T.a.b, 'c', T['d']).values()

688 ('a', 'b', 'c', 'd')

689 """

690 cur_t_path = self.path_t.__ops__

691 return cur_t_path[2::2]

692

693 def items(self):

694 """

695 Returns a tuple of (operation, value) pairs.

696

697 >>> Path(T.a.b, 'c', T['d']).items()

698 (('.', 'a'), ('.', 'b'), ('P', 'c'), ('[', 'd'))

699

700 """

701 cur_t_path = self.path_t.__ops__

702 return tuple(zip(cur_t_path[1::2], cur_t_path[2::2]))

703

704 def startswith(self, other):

705 if isinstance(other, basestring):

706 other = Path(other)

707 if isinstance(other, Path):

708 other = other.path_t

709 if not isinstance(other, TType):

710 raise TypeError('can only check if Path starts with string, Path or T')

711 o_path = other.__ops__

712 return self.path_t.__ops__[:len(o_path)] == o_path

713

714 def from_t(self):

715 '''return the same path but starting from T'''

716 t_path = self.path_t.__ops__

717 if t_path[0] is S:

718 new_t = TType()

719 new_t.__ops__ = (T,) + t_path[1:]

720 return Path(new_t)

721 return self

722

723 def __getitem__(self, i):

724 cur_t_path = self.path_t.__ops__

725 try:

726 step = i.step

727 start = i.start if i.start is not None else 0

728 stop = i.stop

729

730 start = (start * 2) + 1 if start >= 0 else (start * 2) + len(cur_t_path)

731 if stop is not None:

732 stop = (stop * 2) + 1 if stop >= 0 else (stop * 2) + len(cur_t_path)

733 except AttributeError:

734 step = 1

735 start = (i * 2) + 1 if i >= 0 else (i * 2) + len(cur_t_path)

736 if start < 0 or start > len(cur_t_path):

737 raise IndexError('Path index out of range')

738 stop = ((i + 1) * 2) + 1 if i >= 0 else ((i + 1) * 2) + len(cur_t_path)

739

740 new_t = TType()

741 new_path = cur_t_path[start:stop]

742 if step is not None and step != 1:

743 new_path = tuple(zip(new_path[::2], new_path[1::2]))[::step]

744 new_path = sum(new_path, ())

745 new_t.__ops__ = (cur_t_path[0],) + new_path

746 return Path(new_t)

747

748 def __repr__(self):

749 return _format_path(self.path_t.__ops__[1:])

750

751

752def _format_path(t_path):

753 path_parts, cur_t_path = [], []

754 i = 0

755 while i < len(t_path):

756 op, arg = t_path[i], t_path[i + 1]

757 i += 2

758 if op == 'P':

759 if cur_t_path:

760 path_parts.append(cur_t_path)

761 cur_t_path = []

762 path_parts.append(arg)

763 else:

764 cur_t_path.append(op)

765 cur_t_path.append(arg)

766 if path_parts and cur_t_path:

767 path_parts.append(cur_t_path)

768

769 if path_parts or not cur_t_path:

770 return 'Path(%s)' % ', '.join([_format_t(part)

771 if type(part) is list else repr(part)

772 for part in path_parts])

773 return _format_t(cur_t_path)

774

775

776class Spec(object):

777 """Spec objects serve three purposes, here they are, roughly ordered

778 by utility:

779

780 1. As a form of compiled or "curried" glom call, similar to

781 Python's built-in :func:`re.compile`.

782 2. A marker as an object as representing a spec rather than a

783 literal value in certain cases where that might be ambiguous.

784 3. A way to update the scope within another Spec.

785

786 In the second usage, Spec objects are the complement to

787 :class:`~glom.Val`, wrapping a value and marking that it

788 should be interpreted as a glom spec, rather than a literal value.

789 This is useful in places where it would be interpreted as a value

790 by default. (Such as T[key], Call(func) where key and func are

791 assumed to be literal values and not specs.)

792

793 Args:

794 spec: The glom spec.

795 scope (dict): additional values to add to the scope when

796 evaluating this Spec

797

798 """

799 def __init__(self, spec, scope=None):

800 self.spec = spec

801 self.scope = scope or {}

802

803 def glom(self, target, **kw):

804 scope = dict(self.scope)

805 scope.update(kw.get('scope', {}))

806 kw['scope'] = ChainMap(scope)

807 glom_ = scope.get(glom, glom)

808 return glom_(target, self.spec, **kw)

809

810 def glomit(self, target, scope):

811 scope.update(self.scope)

812 return scope[glom](target, self.spec, scope)

813

814 def __repr__(self):

815 cn = self.__class__.__name__

816 if self.scope:

817 return '%s(%s, scope=%r)' % (cn, bbrepr(self.spec), self.scope)

818 return '%s(%s)' % (cn, bbrepr(self.spec))

819

820

821class Coalesce(object):

822 """Coalesce objects specify fallback behavior for a list of

823 subspecs.

824

825 Subspecs are passed as positional arguments, and keyword arguments

826 control defaults. Each subspec is evaluated in turn, and if none

827 match, a :exc:`CoalesceError` is raised, or a default is returned,

828 depending on the options used.

829

830 .. note::

831

832 This operation may seem very familar if you have experience with

833 `SQL`_ or even `C# and others`_.

834

835

836 In practice, this fallback behavior's simplicity is only surpassed

837 by its utility:

838

839 >>> target = {'c': 'd'}

840 >>> glom(target, Coalesce('a', 'b', 'c'))

841 'd'

842

843 glom tries to get ``'a'`` from ``target``, but gets a

844 KeyError. Rather than raise a :exc:`~glom.PathAccessError` as usual,

845 glom *coalesces* into the next subspec, ``'b'``. The process

846 repeats until it gets to ``'c'``, which returns our value,

847 ``'d'``. If our value weren't present, we'd see:

848

849 >>> target = {}

850 >>> glom(target, Coalesce('a', 'b'))

851 Traceback (most recent call last):

852 ...

853 CoalesceError: no valid values found. Tried ('a', 'b') and got (PathAccessError, PathAccessError) ...

854

855 Same process, but because ``target`` is empty, we get a

856 :exc:`CoalesceError`.

857

858 .. note::

859

860 Coalesce is a *branching* specifier type, so as of v20.7.0, its

861 exception messages feature an error tree. See

862 :ref:`branched-exceptions` for details on how to interpret these

863 exceptions.

864

865

866 If we want to avoid an exception, and we know which value we want

867 by default, we can set *default*:

868

869 >>> target = {}

870 >>> glom(target, Coalesce('a', 'b', 'c'), default='d-fault')

871 'd-fault'

872

873 ``'a'``, ``'b'``, and ``'c'`` weren't present so we got ``'d-fault'``.

874

875 Args:

876

877 subspecs: One or more glommable subspecs

878 default: A value to return if no subspec results in a valid value

879 default_factory: A callable whose result will be returned as a default

880 skip: A value, tuple of values, or predicate function

881 representing values to ignore

882 skip_exc: An exception or tuple of exception types to catch and

883 move on to the next subspec. Defaults to :exc:`GlomError`, the

884 parent type of all glom runtime exceptions.

885

886 If all subspecs produce skipped values or exceptions, a

887 :exc:`CoalesceError` will be raised. For more examples, check out

888 the :doc:`tutorial`, which makes extensive use of Coalesce.

889

890 .. _SQL: https://en.wikipedia.org/w/index.php?title=Null_(SQL)&oldid=833093792#COALESCE

891 .. _C# and others: https://en.wikipedia.org/w/index.php?title=Null_coalescing_operator&oldid=839493322#C#

892

893 """

894 def __init__(self, *subspecs, **kwargs):

895 self.subspecs = subspecs

896 self._orig_kwargs = dict(kwargs)

897 self.default = kwargs.pop('default', _MISSING)

898 self.default_factory = kwargs.pop('default_factory', _MISSING)

899 if self.default and self.default_factory:

900 raise ValueError('expected one of "default" or "default_factory", not both')

901 self.skip = kwargs.pop('skip', _MISSING)

902 if self.skip is _MISSING:

903 self.skip_func = lambda v: False

904 elif callable(self.skip):

905 self.skip_func = self.skip

906 elif isinstance(self.skip, tuple):

907 self.skip_func = lambda v: v in self.skip

908 else:

909 self.skip_func = lambda v: v == self.skip

910 self.skip_exc = kwargs.pop('skip_exc', GlomError)

911 if kwargs:

912 raise TypeError('unexpected keyword args: %r' % (sorted(kwargs.keys()),))

913

914 def glomit(self, target, scope):

915 skipped = []

916 for subspec in self.subspecs:

917 try:

918 ret = scope[glom](target, subspec, scope)

919 if not self.skip_func(ret):

920 break

921 skipped.append(ret)

922 except self.skip_exc as e:

923 skipped.append(e)

924 continue

925 else:

926 if self.default is not _MISSING:

927 ret = arg_val(target, self.default, scope)

928 elif self.default_factory is not _MISSING:

929 ret = self.default_factory()

930 else:

931 raise CoalesceError(self, skipped, scope[Path])

932 return ret

933

934 def __repr__(self):

935 cn = self.__class__.__name__

936 return format_invocation(cn, self.subspecs, self._orig_kwargs, repr=bbrepr)

937

938

939class Inspect(object):

940 """The :class:`~glom.Inspect` specifier type provides a way to get

941 visibility into glom's evaluation of a specification, enabling

942 debugging of those tricky problems that may arise with unexpected

943 data.

944

945 :class:`~glom.Inspect` can be inserted into an existing spec in one of two

946 ways. First, as a wrapper around the spec in question, or second,

947 as an argument-less placeholder wherever a spec could be.

948

949 :class:`~glom.Inspect` supports several modes, controlled by

950 keyword arguments. Its default, no-argument mode, simply echos the

951 state of the glom at the point where it appears:

952

953 >>> target = {'a': {'b': {}}}

954 >>> val = glom(target, Inspect('a.b')) # wrapping a spec

955 ---

956 path: ['a.b']

957 target: {'a': {'b': {}}}

958 output: {}

959 ---

960

961 Debugging behavior aside, :class:`~glom.Inspect` has no effect on

962 values in the target, spec, or result.

963

964 Args:

965 echo (bool): Whether to print the path, target, and output of

966 each inspected glom. Defaults to True.

967 recursive (bool): Whether or not the Inspect should be applied

968 at every level, at or below the spec that it wraps. Defaults

969 to False.

970 breakpoint (bool): This flag controls whether a debugging prompt

971 should appear before evaluating each inspected spec. Can also

972 take a callable. Defaults to False.

973 post_mortem (bool): This flag controls whether exceptions

974 should be caught and interactively debugged with :mod:`pdb` on

975 inspected specs.

976

977 All arguments above are keyword-only to avoid overlap with a

978 wrapped spec.

979

980 .. note::

981

982 Just like ``pdb.set_trace()``, be careful about leaving stray

983 ``Inspect()`` instances in production glom specs.

984

985 """

986 def __init__(self, *a, **kw):

987 self.wrapped = a[0] if a else Path()

988 self.recursive = kw.pop('recursive', False)

989 self.echo = kw.pop('echo', True)

990 breakpoint = kw.pop('breakpoint', False)

991 if breakpoint is True:

992 breakpoint = pdb.set_trace

993 if breakpoint and not callable(breakpoint):

994 raise TypeError('breakpoint expected bool or callable, not: %r' % breakpoint)

995 self.breakpoint = breakpoint

996 post_mortem = kw.pop('post_mortem', False)

997 if post_mortem is True:

998 post_mortem = pdb.post_mortem

999 if post_mortem and not callable(post_mortem):

1000 raise TypeError('post_mortem expected bool or callable, not: %r' % post_mortem)

1001 self.post_mortem = post_mortem

1002

1003 def __repr__(self):

1004 return '<INSPECT>'

1005

1006 def glomit(self, target, scope):

1007 # stash the real handler under Inspect,

1008 # and replace the child handler with a trace callback

1009 scope[Inspect] = scope[glom]

1010 scope[glom] = self._trace

1011 return scope[glom](target, self.wrapped, scope)

1012

1013 def _trace(self, target, spec, scope):

1014 if not self.recursive:

1015 scope[glom] = scope[Inspect]

1016 if self.echo:

1017 print('---')

1018 # TODO: switch from scope[Path] to the Target-Spec format trace above

1019 # ... but maybe be smart about only printing deltas instead of the whole

1020 # thing

1021 print('path: ', scope[Path] + [spec])

1022 print('target:', target)

1023 if self.breakpoint:

1024 # TODO: real debugger here?

1025 self.breakpoint()

1026 try:

1027 ret = scope[Inspect](target, spec, scope)

1028 except Exception:

1029 if self.post_mortem:

1030 self.post_mortem()

1031 raise

1032 if self.echo:

1033 print('output:', ret)

1034 print('---')

1035 return ret

1036

1037

1038class Call(object):

1039 """:class:`Call` specifies when a target should be passed to a function,

1040 *func*.

1041

1042 :class:`Call` is similar to :func:`~functools.partial` in that

1043 it is no more powerful than ``lambda`` or other functions, but

1044 it is designed to be more readable, with a better ``repr``.

1045

1046 Args:

1047 func (callable): a function or other callable to be called with

1048 the target

1049

1050 :class:`Call` combines well with :attr:`~glom.T` to construct objects. For

1051 instance, to generate a dict and then pass it to a constructor:

1052

1053 >>> class ExampleClass(object):

1054 ... def __init__(self, attr):

1055 ... self.attr = attr

1056 ...

1057 >>> target = {'attr': 3.14}

1058 >>> glom(target, Call(ExampleClass, kwargs=T)).attr

1059 3.14

1060

1061 This does the same as ``glom(target, lambda target:

1062 ExampleClass(**target))``, but it's easy to see which one reads

1063 better.

1064

1065 .. note::

1066

1067 ``Call`` is mostly for functions. Use a :attr:`~glom.T` object

1068 if you need to call a method.

1069

1070 .. warning::

1071

1072 :class:`Call` has a successor with a fuller-featured API, new

1073 in 19.10.0: the :class:`Invoke` specifier type.

1074 """

1075 def __init__(self, func=None, args=None, kwargs=None):

1076 if func is None:

1077 func = T

1078 if not (callable(func) or isinstance(func, (Spec, TType))):

1079 raise TypeError('expected func to be a callable or T'

1080 ' expression, not: %r' % (func,))

1081 if args is None:

1082 args = ()

1083 if kwargs is None:

1084 kwargs = {}

1085 self.func, self.args, self.kwargs = func, args, kwargs

1086

1087 def glomit(self, target, scope):

1088 'run against the current target'

1089 r = lambda spec: arg_val(target, spec, scope)

1090 return r(self.func)(*r(self.args), **r(self.kwargs))

1091

1092 def __repr__(self):

1093 cn = self.__class__.__name__

1094 return '%s(%s, args=%r, kwargs=%r)' % (cn, bbrepr(self.func), self.args, self.kwargs)

1095

1096

1097def _is_spec(obj, strict=False):

1098 # a little util for codifying the spec type checking in glom

1099 if isinstance(obj, TType):

1100 return True

1101 if strict:

1102 return type(obj) is Spec

1103

1104 return _has_callable_glomit(obj) # pragma: no cover

1105

1106

1107class Invoke(object):

1108 """Specifier type designed for easy invocation of callables from glom.

1109

1110 Args:

1111 func (callable): A function or other callable object.

1112

1113 ``Invoke`` is similar to :func:`functools.partial`, but with the

1114 ability to set up a "templated" call which interleaves constants and

1115 glom specs.

1116

1117 For example, the following creates a spec which can be used to

1118 check if targets are integers:

1119

1120 >>> is_int = Invoke(isinstance).specs(T).constants(int)

1121 >>> glom(5, is_int)

1122 True

1123

1124 And this composes like any other glom spec:

1125

1126 >>> target = [7, object(), 9]

1127 >>> glom(target, [is_int])

1128 [True, False, True]

1129

1130 Another example, mixing positional and keyword arguments:

1131

1132 >>> spec = Invoke(sorted).specs(T).constants(key=int, reverse=True)

1133 >>> target = ['10', '5', '20', '1']

1134 >>> glom(target, spec)

1135 ['20', '10', '5', '1']

1136

1137 Invoke also helps with evaluating zero-argument functions:

1138

1139 >>> glom(target={}, spec=Invoke(int))

1140 0

1141

1142 (A trivial example, but from timestamps to UUIDs, zero-arg calls do come up!)

1143

1144 .. note::

1145

1146 ``Invoke`` is mostly for functions, object construction, and callable

1147 objects. For calling methods, consider the :attr:`~glom.T` object.

1148

1149 """

1150 def __init__(self, func):

1151 if not callable(func) and not _is_spec(func, strict=True):

1152 raise TypeError('expected func to be a callable or Spec instance,'

1153 ' not: %r' % (func,))

1154 self.func = func

1155 self._args = ()

1156 # a registry of every known kwarg to its freshest value as set

1157 # by the methods below. the **kw dict is used as a unique marker.

1158 self._cur_kwargs = {}

1159

1160 @classmethod

1161 def specfunc(cls, spec):

1162 """Creates an :class:`Invoke` instance where the function is

1163 indicated by a spec.

1164

1165 >>> spec = Invoke.specfunc('func').constants(5)

1166 >>> glom({'func': range}, (spec, list))

1167 [0, 1, 2, 3, 4]

1168

1169 """

1170 return cls(Spec(spec))

1171

1172 def constants(self, *a, **kw):

1173 """Returns a new :class:`Invoke` spec, with the provided positional

1174 and keyword argument values stored for passing to the

1175 underlying function.

1176

1177 >>> spec = Invoke(T).constants(5)

1178 >>> glom(range, (spec, list))

1179 [0, 1, 2, 3, 4]

1180

1181 Subsequent positional arguments are appended:

1182

1183 >>> spec = Invoke(T).constants(2).constants(10, 2)

1184 >>> glom(range, (spec, list))

1185 [2, 4, 6, 8]

1186

1187 Keyword arguments also work as one might expect:

1188

1189 >>> round_2 = Invoke(round).constants(ndigits=2).specs(T)

1190 >>> glom(3.14159, round_2)

1191 3.14

1192

1193 :meth:`~Invoke.constants()` and other :class:`Invoke`

1194 methods may be called multiple times, just remember that every

1195 call returns a new spec.

1196 """

1197 ret = self.__class__(self.func)

1198 ret._args = self._args + ('C', a, kw)

1199 ret._cur_kwargs = dict(self._cur_kwargs)

1200 ret._cur_kwargs.update({k: kw for k, _ in kw.items()})

1201 return ret

1202

1203 def specs(self, *a, **kw):

1204 """Returns a new :class:`Invoke` spec, with the provided positional

1205 and keyword arguments stored to be interpreted as specs, with

1206 the results passed to the underlying function.

1207

1208 >>> spec = Invoke(range).specs('value')

1209 >>> glom({'value': 5}, (spec, list))

1210 [0, 1, 2, 3, 4]

1211

1212 Subsequent positional arguments are appended:

1213

1214 >>> spec = Invoke(range).specs('start').specs('end', 'step')

1215 >>> target = {'start': 2, 'end': 10, 'step': 2}

1216 >>> glom(target, (spec, list))

1217 [2, 4, 6, 8]

1218

1219 Keyword arguments also work as one might expect:

1220

1221 >>> multiply = lambda x, y: x * y

1222 >>> times_3 = Invoke(multiply).constants(y=3).specs(x='value')

1223 >>> glom({'value': 5}, times_3)

1224 15

1225

1226 :meth:`~Invoke.specs()` and other :class:`Invoke`

1227 methods may be called multiple times, just remember that every

1228 call returns a new spec.

1229

1230 """

1231 ret = self.__class__(self.func)

1232 ret._args = self._args + ('S', a, kw)

1233 ret._cur_kwargs = dict(self._cur_kwargs)

1234 ret._cur_kwargs.update({k: kw for k, _ in kw.items()})

1235 return ret

1236

1237 def star(self, args=None, kwargs=None):

1238 """Returns a new :class:`Invoke` spec, with *args* and/or *kwargs*

1239 specs set to be "starred" or "star-starred" (respectively)

1240

1241 >>> spec = Invoke(zip).star(args='lists')

1242 >>> target = {'lists': [[1, 2], [3, 4], [5, 6]]}

1243 >>> list(glom(target, spec))

1244 [(1, 3, 5), (2, 4, 6)]

1245

1246 Args:

1247 args (spec): A spec to be evaluated and "starred" into the

1248 underlying function.

1249 kwargs (spec): A spec to be evaluated and "star-starred" into

1250 the underlying function.

1251

1252 One or both of the above arguments should be set.

1253

1254 The :meth:`~Invoke.star()`, like other :class:`Invoke`

1255 methods, may be called multiple times. The *args* and *kwargs*

1256 will be stacked in the order in which they are provided.

1257 """

1258 if args is None and kwargs is None:

1259 raise TypeError('expected one or both of args/kwargs to be passed')

1260 ret = self.__class__(self.func)

1261 ret._args = self._args + ('*', args, kwargs)

1262 ret._cur_kwargs = dict(self._cur_kwargs)

1263 return ret

1264

1265 def __repr__(self):

1266 base_fname = self.__class__.__name__

1267 fname_map = {'C': 'constants', 'S': 'specs', '*': 'star'}

1268 if type(self.func) is Spec:

1269 base_fname += '.specfunc'

1270 args = (self.func.spec,)

1271 else:

1272 args = (self.func,)

1273 chunks = [format_invocation(base_fname, args, repr=bbrepr)]

1274

1275 for i in range(len(self._args) // 3):

1276 op, args, _kwargs = self._args[i * 3: i * 3 + 3]

1277 fname = fname_map[op]

1278 if op in ('C', 'S'):

1279 kwargs = [(k, v) for k, v in _kwargs.items()

1280 if self._cur_kwargs[k] is _kwargs]

1281 else:

1282 kwargs = {}

1283 if args:

1284 kwargs['args'] = args

1285 if _kwargs:

1286 kwargs['kwargs'] = _kwargs

1287 args = ()

1288

1289 chunks.append('.' + format_invocation(fname, args, kwargs, repr=bbrepr))

1290

1291 return ''.join(chunks)

1292

1293 def glomit(self, target, scope):

1294 all_args = []

1295 all_kwargs = {}

1296

1297 recurse = lambda spec: scope[glom](target, spec, scope)

1298 func = recurse(self.func) if _is_spec(self.func, strict=True) else self.func

1299

1300 for i in range(len(self._args) // 3):

1301 op, args, kwargs = self._args[i * 3: i * 3 + 3]

1302 if op == 'C':

1303 all_args.extend(args)

1304 all_kwargs.update({k: v for k, v in kwargs.items()

1305 if self._cur_kwargs[k] is kwargs})

1306 elif op == 'S':

1307 all_args.extend([recurse(arg) for arg in args])

1308 all_kwargs.update({k: recurse(v) for k, v in kwargs.items()

1309 if self._cur_kwargs[k] is kwargs})

1310 elif op == '*':

1311 if args is not None:

1312 all_args.extend(recurse(args))

1313 if kwargs is not None:

1314 all_kwargs.update(recurse(kwargs))

1315

1316 return func(*all_args, **all_kwargs)

1317

1318

1319class Ref(object):

1320 """Name a part of a spec and refer to it elsewhere in the same spec,

1321 useful for trees and other self-similar data structures.

1322

1323 Args:

1324 name (str): The name of the spec to reference.

1325 subspec: Pass a spec to name it *name*, or leave unset to refer

1326 to an already-named spec.

1327 """

1328 def __init__(self, name, subspec=_MISSING):

1329 self.name, self.subspec = name, subspec

1330

1331 def glomit(self, target, scope):

1332 subspec = self.subspec

1333 scope_key = (Ref, self.name)

1334 if subspec is _MISSING:

1335 subspec = scope[scope_key]

1336 else:

1337 scope[scope_key] = subspec

1338 return scope[glom](target, subspec, scope)

1339

1340 def __repr__(self):

1341 if self.subspec is _MISSING:

1342 args = bbrepr(self.name)

1343 else:

1344 args = bbrepr((self.name, self.subspec))[1:-1]

1345 return "Ref(" + args + ")"

1346

1347

1348class TType(object):

1349 """``T``, short for "target". A singleton object that enables

1350 object-oriented expression of a glom specification.

1351

1352 .. note::

1353

1354 ``T`` is a singleton, and does not need to be constructed.

1355

1356 Basically, think of ``T`` as your data's stunt double. Everything

1357 that you do to ``T`` will be recorded and executed during the

1358 :func:`glom` call. Take this example:

1359

1360 >>> spec = T['a']['b']['c']

1361 >>> target = {'a': {'b': {'c': 'd'}}}

1362 >>> glom(target, spec)

1363 'd'

1364

1365 So far, we've relied on the ``'a.b.c'``-style shorthand for

1366 access, or used the :class:`~glom.Path` objects, but if you want

1367 to explicitly do attribute and key lookups, look no further than

1368 ``T``.

1369

1370 But T doesn't stop with unambiguous access. You can also call

1371 methods and perform almost any action you would with a normal

1372 object:

1373

1374 >>> spec = ('a', (T['b'].items(), list)) # reviewed below

1375 >>> glom(target, spec)

1376 [('c', 'd')]

1377

1378 A ``T`` object can go anywhere in the spec. As seen in the example

1379 above, we access ``'a'``, use a ``T`` to get ``'b'`` and iterate

1380 over its ``items``, turning them into a ``list``.

1381

1382 You can even use ``T`` with :class:`~glom.Call` to construct objects:

1383

1384 >>> class ExampleClass(object):

1385 ... def __init__(self, attr):

1386 ... self.attr = attr

1387 ...

1388 >>> target = {'attr': 3.14}

1389 >>> glom(target, Call(ExampleClass, kwargs=T)).attr

1390 3.14

1391

1392 On a further note, while ``lambda`` works great in glom specs, and

1393 can be very handy at times, ``T`` and :class:`~glom.Call`

1394 eliminate the need for the vast majority of ``lambda`` usage with

1395 glom.

1396

1397 Unlike ``lambda`` and other functions, ``T`` roundtrips

1398 beautifully and transparently:

1399

1400 >>> T['a'].b['c']('success')

1401 T['a'].b['c']('success')

1402

1403 ``T``-related access errors raise a :exc:`~glom.PathAccessError`

1404 during the :func:`~glom.glom` call.

1405

1406 .. note::

1407

1408 While ``T`` is clearly useful, powerful, and here to stay, its

1409 semantics are still being refined. Currently, operations beyond

1410 method calls and attribute/item access are considered

1411 experimental and should not be relied upon.

1412

1413 .. note::

1414

1415 ``T`` attributes starting with __ are reserved to avoid

1416 colliding with many built-in Python behaviors, current and

1417 future. The ``T.__()`` method is available for cases where

1418 they are needed. For example, ``T.__('class__')`` is

1419 equivalent to accessing the ``__class__`` attribute.

1420

1421 """

1422 __slots__ = ('__ops__',)

1423

1424 def __getattr__(self, name):

1425 if name.startswith('__'):

1426 raise AttributeError('T instances reserve dunder attributes.'

1427 ' To access the "{name}" attribute, use'

1428 ' T.__("{d_name}")'.format(name=name, d_name=name[2:]))

1429 return _t_child(self, '.', name)

1430

1431 def __getitem__(self, item):

1432 return _t_child(self, '[', item)

1433

1434 def __call__(self, *args, **kwargs):

1435 if self is S:

1436 if args:

1437 raise TypeError('S() takes no positional arguments, got: %r' % (args,))

1438 if not kwargs:

1439 raise TypeError('S() expected at least one kwarg, got none')

1440 # TODO: typecheck kwarg vals?

1441 return _t_child(self, '(', (args, kwargs))

1442

1443 def __star__(self):

1444 return _t_child(self, 'x', None)

1445

1446 def __starstar__(self):

1447 return _t_child(self, 'X', None)

1448

1449 def __stars__(self):

1450 """how many times the result will be wrapped in extra lists"""

1451 t_ops = self.__ops__[1::2]

1452 return t_ops.count('x') + t_ops.count('X')

1453

1454 def __add__(self, arg):

1455 return _t_child(self, '+', arg)

1456

1457 def __sub__(self, arg):

1458 return _t_child(self, '-', arg)

1459

1460 def __mul__(self, arg):

1461 return _t_child(self, '*', arg)

1462

1463 def __floordiv__(self, arg):

1464 return _t_child(self, '#', arg)

1465

1466 def __truediv__(self, arg):

1467 return _t_child(self, '/', arg)

1468

1469 __div__ = __truediv__

1470

1471 def __mod__(self, arg):

1472 return _t_child(self, '%', arg)

1473

1474 def __pow__(self, arg):

1475 return _t_child(self, ':', arg)

1476

1477 def __and__(self, arg):

1478 return _t_child(self, '&', arg)

1479

1480 def __or__(self, arg):

1481 return _t_child(self, '|', arg)

1482

1483 def __xor__(self, arg):

1484 return _t_child(self, '^', arg)

1485

1486 def __invert__(self):

1487 return _t_child(self, '~', None)

1488

1489 def __neg__(self):

1490 return _t_child(self, '_', None)

1491

1492 def __(self, name):

1493 return _t_child(self, '.', '__' + name)

1494

1495 def __repr__(self):

1496 t_path = self.__ops__

1497 return _format_t(t_path[1:], t_path[0])

1498

1499 def __getstate__(self):

1500 t_path = self.__ops__

1501 return tuple(({T: 'T', S: 'S', A: 'A'}[t_path[0]],) + t_path[1:])

1502

1503 def __setstate__(self, state):

1504 self.__ops__ = ({'T': T, 'S': S, 'A': A}[state[0]],) + state[1:]

1505

1506

1507def _t_child(parent, operation, arg):

1508 base = parent.__ops__

1509 if base[0] is A and operation not in ('.', '[', 'P'):

1510 # whitelist rather than blacklist assignment friendly operations

1511 # TODO: error type?

1512 raise BadSpec("operation not allowed on A assignment path")

1513 t = TType()

1514 t.__ops__ = base + (operation, arg)

1515 return t

1516

1517

1518def _s_first_magic(scope, key, _t):

1519 """

1520 enable S.a to do S['a'] or S['a'].val as a special

1521 case for accessing user defined string variables

1522 """

1523 err = None

1524 try:

1525 cur = scope[key]

1526 except KeyError as e:

1527 err = PathAccessError(e, Path(_t), 0) # always only one level depth, hence 0

1528 if err:

1529 raise err

1530 return cur

1531

1532

1533def _t_eval(target, _t, scope):

1534 t_path = _t.__ops__

1535 i = 1

1536 fetch_till = len(t_path)

1537 root = t_path[0]

1538 if root is T:

1539 cur = target

1540 elif root is S or root is A:

1541 # A is basically the same as S, but last step is assign

1542 if root is A:

1543 fetch_till -= 2

1544 if fetch_till < 1:

1545 raise BadSpec('cannot assign without destination')

1546 cur = scope

1547 if fetch_till > 1 and t_path[1] in ('.', 'P'):

1548 cur = _s_first_magic(cur, t_path[2], _t)

1549 i += 2

1550 elif root is S and fetch_till > 1 and t_path[1] == '(':

1551 # S(var='spec') style assignment

1552 _, kwargs = t_path[2]

1553 scope.update({

1554 k: arg_val(target, v, scope) for k, v in kwargs.items()})

1555 return target

1556

1557 else:

1558 raise ValueError('TType instance with invalid root') # pragma: no cover

1559 pae = None

1560 while i < fetch_till:

1561 op, arg = t_path[i], t_path[i + 1]

1562 arg = arg_val(target, arg, scope)

1563 if op == '.':

1564 try:

1565 cur = getattr(cur, arg)

1566 except AttributeError as e:

1567 pae = PathAccessError(e, Path(_t), i // 2)

1568 elif op == '[':

1569 try:

1570 cur = cur[arg]

1571 except (KeyError, IndexError, TypeError) as e:

1572 pae = PathAccessError(e, Path(_t), i // 2)

1573 elif op == 'P':

1574 # Path type stuff (fuzzy match)

1575 get = scope[TargetRegistry].get_handler('get', cur, path=t_path[2:i+2:2])

1576 try:

1577 cur = get(cur, arg)

1578 except Exception as e:

1579 pae = PathAccessError(e, Path(_t), i // 2)

1580 elif op in 'xX':

1581 nxt = []

1582 get_handler = scope[TargetRegistry].get_handler

1583 if op == 'x': # increases arity of cur each time through

1584 # TODO: so many try/except -- could scope[TargetRegistry] stuff be cached on type?

1585 _extend_children(nxt, cur, get_handler)

1586 elif op == 'X':

1587 sofar = set()

1588 _extend_children(nxt, cur, get_handler)

1589 for item in nxt:

1590 if id(item) not in sofar:

1591 sofar.add(id(item))

1592 _extend_children(nxt, item, get_handler)

1593 nxt.insert(0, cur)

1594 # handle the rest of the t_path in recursive calls

1595 cur = []

1596 todo = TType()

1597 todo.__ops__ = (root,) + t_path[i+2:]

1598 for child in nxt:

1599 try:

1600 cur.append(_t_eval(child, todo, scope))

1601 except PathAccessError:

1602 pass

1603 break # we handled the rest in recursive call, break loop

1604 elif op == '(':

1605 args, kwargs = arg

1606 scope[Path] += t_path[2:i+2:2]

1607 cur = scope[glom](

1608 target, Call(cur, args, kwargs), scope)

1609 # call with target rather than cur,

1610 # because it is probably more intuitive

1611 # if args to the call "reset" their path

1612 # e.g. "T.a" should mean the same thing

1613 # in both of these specs: T.a and T.b(T.a)

1614 else: # arithmetic operators

1615 try:

1616 if op == '+':

1617 cur = cur + arg

1618 elif op == '-':

1619 cur = cur - arg

1620 elif op == '*':

1621 cur = cur * arg

1622 #elif op == '#':

1623 # cur = cur // arg # TODO: python 2 friendly approach?

1624 elif op == '/':

1625 cur = cur / arg

1626 elif op == '%':

1627 cur = cur % arg

1628 elif op == ':':

1629 cur = cur ** arg

1630 elif op == '&':

1631 cur = cur & arg

1632 elif op == '|':

1633 cur = cur | arg

1634 elif op == '^':

1635 cur = cur ^ arg

1636 elif op == '~':

1637 cur = ~cur

1638 elif op == '_':

1639 cur = -cur

1640 except (TypeError, ZeroDivisionError) as e:

1641 pae = PathAccessError(e, Path(_t), i // 2)

1642 if pae:

1643 raise pae

1644 i += 2

1645 if root is A:

1646 op, arg = t_path[-2:]

1647 if cur is scope:

1648 op = '[' # all assignment on scope is setitem

1649 _assign_op(dest=cur, op=op, arg=arg, val=target, path=_t, scope=scope)

1650 return target # A should not change the target

1651 return cur

1652

1653

1654def _assign_op(dest, op, arg, val, path, scope):

1655 """helper method for doing the assignment on a T operation"""

1656 if op == '[':

1657 dest[arg] = val

1658 elif op == '.':

1659 setattr(dest, arg, val)

1660 elif op == 'P':

1661 _assign = scope[TargetRegistry].get_handler('assign', dest)

1662 try:

1663 _assign(dest, arg, val)

1664 except Exception as e:

1665 raise PathAssignError(e, path, arg)

1666 else: # pragma: no cover

1667 raise ValueError('unsupported T operation for assignment')

1668

1669

1670def _extend_children(children, item, get_handler):

1671 try: # dict or obj-like

1672 keys = get_handler('keys', item)

1673 get = get_handler('get', item)

1674 except UnregisteredTarget:

1675 try:

1676 iterate = get_handler('iterate', item)

1677 except UnregisteredTarget:

1678 pass

1679 else:

1680 try: # list-like

1681 children.extend(iterate(item))

1682 except Exception:

1683 pass

1684 else:

1685 try:

1686 for key in keys(item):

1687 try:

1688 children.append(get(item, key))

1689 except Exception:

1690 pass

1691 except Exception:

1692 pass

1693

1694

1695T = TType() # target aka Mr. T aka "this"

1696S = TType() # like T, but means grab stuff from Scope, not Target

1697A = TType() # like S, but shorthand to assign target to scope

1698

1699T.__ops__ = (T,)

1700S.__ops__ = (S,)

1701A.__ops__ = (A,)

1702

1703_T_STAR = T.__star__() # helper constant for Path.from_text

1704_T_STARSTAR = T.__starstar__() # helper constant for Path.from_text

1705

1706UP = make_sentinel('UP')

1707ROOT = make_sentinel('ROOT')

1708

1709

1710def _format_slice(x):

1711 if type(x) is not slice:

1712 return bbrepr(x)

1713 fmt = lambda v: "" if v is None else bbrepr(v)

1714 if x.step is None:

1715 return fmt(x.start) + ":" + fmt(x.stop)

1716 return fmt(x.start) + ":" + fmt(x.stop) + ":" + fmt(x.step)

1717

1718

1719def _format_t(path, root=T):

1720 prepr = [{T: 'T', S: 'S', A: 'A'}[root]]

1721 i = 0

1722 while i < len(path):

1723 op, arg = path[i], path[i + 1]

1724 if op == '.':

1725 prepr.append('.' + arg)

1726 elif op == '[':

1727 if type(arg) is tuple:

1728 index = ", ".join([_format_slice(x) for x in arg])

1729 else:

1730 index = _format_slice(arg)

1731 prepr.append("[%s]" % (index,))

1732 elif op == '(':

1733 args, kwargs = arg

1734 prepr.append(format_invocation(args=args, kwargs=kwargs, repr=bbrepr))

1735 elif op == 'P':

1736 return _format_path(path)

1737 elif op == 'x':

1738 prepr.append(".__star__()")

1739 elif op == 'X':

1740 prepr.append(".__starstar__()")

1741 elif op in ('_', '~'): # unary arithmetic operators

1742 if any([o in path[:i] for o in '+-/%:&|^~_']):

1743 prepr = ['('] + prepr + [')']

1744 prepr = ['-' if op == '_' else op] + prepr

1745 else: # binary arithmetic operators

1746 formatted_arg = bbrepr(arg)

1747 if type(arg) is TType:

1748 arg_path = arg.__ops__

1749 if any([o in arg_path for o in '+-/%:&|^~_']):

1750 formatted_arg = '(' + formatted_arg + ')'

1751 prepr.append(' ' + ('**' if op == ':' else op) + ' ')

1752 prepr.append(formatted_arg)

1753 i += 2

1754 return "".join(prepr)

1755

1756

1757class Val(object):

1758 """Val objects are specs which evaluate to the wrapped *value*.

1759

1760 >>> target = {'a': {'b': 'c'}}

1761 >>> spec = {'a': 'a.b', 'readability': Val('counts')}

1762 >>> pprint(glom(target, spec))

1763 {'a': 'c', 'readability': 'counts'}

1764

1765 Instead of accessing ``'counts'`` as a key like it did with

1766 ``'a.b'``, :func:`~glom.glom` just unwrapped the Val and

1767 included the value.

1768

1769 :class:`~glom.Val` takes one argument, the value to be returned.

1770

1771 .. note::

1772

1773 :class:`Val` was named ``Literal`` in versions of glom before

1774 20.7.0. An alias has been preserved for backwards

1775 compatibility, but reprs have changed.

1776

1777 """

1778 def __init__(self, value):

1779 self.value = value

1780

1781 def glomit(self, target, scope):

1782 return self.value

1783

1784 def __repr__(self):

1785 cn = self.__class__.__name__

1786 return '%s(%s)' % (cn, bbrepr(self.value))

1787

1788

1789Literal = Val # backwards compat for pre-20.7.0

1790

1791

1792class ScopeVars(object):

1793 """This is the runtime partner of :class:`Vars` -- this is what

1794 actually lives in the scope and stores runtime values.

1795

1796 While not part of the importable API of glom, it's half expected

1797 that some folks may write sepcs to populate and export scopes, at

1798 which point this type makes it easy to access values by attribute

1799 access or by converting to a dict.

1800

1801 """

1802 def __init__(self, base, defaults):

1803 self.__dict__ = dict(base)

1804 self.__dict__.update(defaults)

1805

1806 def __iter__(self):

1807 return iter(self.__dict__.items())

1808

1809 def __repr__(self):

1810 return "%s(%s)" % (self.__class__.__name__, bbrepr(self.__dict__))

1811

1812

1813class Vars(object):

1814 """

1815 :class:`Vars` is a helper that can be used with **S** in order to

1816 store shared mutable state.

1817

1818 Takes the same arguments as :class:`dict()`.

1819

1820 Arguments here should be thought of the same way as default arguments

1821 to a function. Each time the spec is evaluated, the same arguments

1822 will be referenced; so, think carefully about mutable data structures.

1823 """

1824 def __init__(self, base=(), **kw):

1825 dict(base) # ensure it is a dict-compatible first arg

1826 self.base = base

1827 self.defaults = kw

1828

1829 def glomit(self, target, spec):

1830 return ScopeVars(self.base, self.defaults)

1831

1832 def __repr__(self):

1833 ret = format_invocation(self.__class__.__name__,

1834 args=(self.base,) if self.base else (),

1835 kwargs=self.defaults,

1836 repr=bbrepr)

1837 return ret

1838

1839

1840class Let(object):

1841 """

1842 Deprecated, kept for backwards compat. Use S(x='y') instead.

1843

1844 >>> target = {'data': {'val': 9}}

1845 >>> spec = (Let(value=T['data']['val']), {'val': S['value']})

1846 >>> glom(target, spec)

1847 {'val': 9}

1848

1849 """

1850 def __init__(self, **kw):

1851 if not kw:

1852 raise TypeError('expected at least one keyword argument')

1853 self._binding = kw

1854

1855 def glomit(self, target, scope):

1856 scope.update({

1857 k: scope[glom](target, v, scope) for k, v in self._binding.items()})

1858 return target

1859

1860 def __repr__(self):

1861 cn = self.__class__.__name__

1862 return format_invocation(cn, kwargs=self._binding, repr=bbrepr)

1863

1864

1865class Auto(object):

1866 """

1867 Switch to Auto mode (the default)

1868

1869 TODO: this seems like it should be a sub-class of class Spec() --

1870 if Spec() could help define the interface for new "modes" or dialects

1871 that would also help make match mode feel less duct-taped on

1872 """

1873 def __init__(self, spec=None):

1874 self.spec = spec

1875

1876 def glomit(self, target, scope):

1877 scope[MODE] = AUTO

1878 return scope[glom](target, self.spec, scope)

1879

1880 def __repr__(self):

1881 cn = self.__class__.__name__

1882 rpr = '' if self.spec is None else bbrepr(self.spec)

1883 return '%s(%s)' % (cn, rpr)

1884

1885

1886class _AbstractIterable(_AbstractIterableBase):

1887 __metaclass__ = ABCMeta

1888 @classmethod

1889 def __subclasshook__(cls, C):

1890 if C in (str, bytes):

1891 return False

1892 return callable(getattr(C, "__iter__", None))

1893

1894

1895class _ObjStyleKeysMeta(type):

1896 def __instancecheck__(cls, C):

1897 return hasattr(C, "__dict__") and hasattr(C.__dict__, "keys")

1898

1899

1900class _ObjStyleKeys(_ObjStyleKeysMeta('_AbstractKeys', (object,), {})):

1901 __metaclass__ = _ObjStyleKeysMeta

1902

1903 @staticmethod

1904 def get_keys(obj):

1905 ret = obj.__dict__.keys()

1906 return ret

1907

1908

1909def _get_sequence_item(target, index):

1910 return target[int(index)]

1911

1912

1913# handlers are 3-arg callables, with args (spec, target, scope)

1914# spec is the first argument for convenience in the case

1915# that the handler is a method of the spec type

1916def _handle_dict(target, spec, scope):

1917 ret = type(spec)() # TODO: works for dict + ordereddict, but sufficient for all?

1918 for field, subspec in spec.items():

1919 val = scope[glom](target, subspec, scope)

1920 if val is SKIP:

1921 continue

1922 if type(field) in (Spec, TType):

1923 field = scope[glom](target, field, scope)

1924 ret[field] = val

1925 return ret

1926

1927

1928def _handle_list(target, spec, scope):

1929 subspec = spec[0]

1930 iterate = scope[TargetRegistry].get_handler('iterate', target, path=scope[Path])

1931 try:

1932 iterator = iterate(target)

1933 except Exception as e:

1934 raise TypeError('failed to iterate on instance of type %r at %r (got %r)'

1935 % (target.__class__.__name__, Path(*scope[Path]), e))

1936 ret = []

1937 base_path = scope[Path]

1938 for i, t in enumerate(iterator):

1939 scope[Path] = base_path + [i]

1940 val = scope[glom](t, subspec, scope)

1941 if val is SKIP:

1942 continue

1943 if val is STOP:

1944 break

1945 ret.append(val)

1946 return ret

1947

1948

1949def _handle_tuple(target, spec, scope):

1950 res = target

1951 for subspec in spec:

1952 scope = chain_child(scope)

1953 nxt = scope[glom](res, subspec, scope)

1954 if nxt is SKIP:

1955 continue

1956 if nxt is STOP:

1957 break

1958 res = nxt

1959 if not isinstance(subspec, list):

1960 scope[Path] += [getattr(subspec, '__name__', subspec)]

1961 return res

1962

1963

1964class Pipe(object):

1965 """Evaluate specs one after the other, passing the result of

1966 the previous evaluation in as the target of the next spec:

1967

1968 >>> glom({'a': {'b': -5}}, Pipe('a', 'b', abs))

1969 5

1970

1971 Same behavior as ``Auto(tuple(steps))``, but useful for explicit

1972 usage in other modes.

1973 """

1974 def __init__(self, *steps):

1975 self.steps = steps

1976

1977 def glomit(self, target, scope):

1978 return _handle_tuple(target, self.steps, scope)

1979

1980 def __repr__(self):

1981 return self.__class__.__name__ + bbrepr(self.steps)

1982

1983

1984class TargetRegistry(object):

1985 '''

1986 responsible for registration of target types for iteration

1987 and attribute walking

1988 '''

1989 def __init__(self, register_default_types=True):

1990 self._op_type_map = {}

1991 self._op_type_tree = {} # see _register_fuzzy_type for details

1992 self._type_cache = {}

1993

1994 self._op_auto_map = OrderedDict() # op name to function that returns handler function

1995

1996 self._register_builtin_ops()

1997

1998 if register_default_types:

1999 self._register_default_types()

2000 return

2001

2002 def get_handler(self, op, obj, path=None, raise_exc=True):

2003 """for an operation and object **instance**, obj, return the

2004 closest-matching handler function, raising UnregisteredTarget

2005 if no handler can be found for *obj* (or False if

2006 raise_exc=False)

2007

2008 """

2009 ret = False

2010 obj_type = type(obj)

2011 cache_key = (obj_type, op)

2012 if cache_key not in self._type_cache:

2013 type_map = self.get_type_map(op)

2014 if type_map:

2015 try:

2016 ret = type_map[obj_type]

2017 except KeyError:

2018 type_tree = self._op_type_tree.get(op, {})

2019 closest = self._get_closest_type(obj, type_tree=type_tree)

2020 if closest is None:

2021 ret = False

2022 else:

2023 ret = type_map[closest]

2024

2025 if ret is False and raise_exc:

2026 raise UnregisteredTarget(op, obj_type, type_map=type_map, path=path)

2027

2028 self._type_cache[cache_key] = ret

2029 return self._type_cache[cache_key]

2030

2031 def get_type_map(self, op):

2032 try:

2033 return self._op_type_map[op]

2034 except KeyError:

2035 return OrderedDict()

2036

2037 def _get_closest_type(self, obj, type_tree):

2038 default = None

2039 for cur_type, sub_tree in type_tree.items():

2040 if isinstance(obj, cur_type):

2041 sub_type = self._get_closest_type(obj, type_tree=sub_tree)

2042 ret = cur_type if sub_type is None else sub_type

2043 return ret

2044 return default

2045

2046 def _register_default_types(self):

2047 self.register(object)

2048 self.register(dict, get=operator.getitem)

2049 self.register(dict, keys=dict.keys)

2050 self.register(list, get=_get_sequence_item)

2051 self.register(tuple, get=_get_sequence_item)

2052 self.register(OrderedDict, get=operator.getitem)

2053 self.register(OrderedDict, keys=OrderedDict.keys)

2054 self.register(_AbstractIterable, iterate=iter)

2055 self.register(_ObjStyleKeys, keys=_ObjStyleKeys.get_keys)

2056

2057 def _register_fuzzy_type(self, op, new_type, _type_tree=None):

2058 """Build a "type tree", an OrderedDict mapping registered types to

2059 their subtypes

2060

2061 The type tree's invariant is that a key in the mapping is a

2062 valid parent type of all its children.

2063

2064 Order is preserved such that non-overlapping parts of the

2065 subtree take precedence by which was most recently added.

2066 """

2067 if _type_tree is None:

2068 try:

2069 _type_tree = self._op_type_tree[op]

2070 except KeyError:

2071 _type_tree = self._op_type_tree[op] = OrderedDict()

2072

2073 registered = False

2074 for cur_type, sub_tree in list(_type_tree.items()):

2075 if issubclass(cur_type, new_type):

2076 sub_tree = _type_tree.pop(cur_type) # mutation for recursion brevity

2077 try:

2078 _type_tree[new_type][cur_type] = sub_tree

2079 except KeyError:

2080 _type_tree[new_type] = OrderedDict({cur_type: sub_tree})

2081 registered = True

2082 elif issubclass(new_type, cur_type):

2083 _type_tree[cur_type] = self._register_fuzzy_type(op, new_type, _type_tree=sub_tree)

2084 registered = True

2085 if not registered:

2086 _type_tree[new_type] = OrderedDict()

2087 return _type_tree

2088

2089 def register(self, target_type, **kwargs):

2090 if not isinstance(target_type, type):

2091 raise TypeError('register expected a type, not an instance: %r' % (target_type,))

2092 exact = kwargs.pop('exact', None)

2093 new_op_map = dict(kwargs)

2094

2095 for op_name in sorted(set(self._op_auto_map.keys()) | set(new_op_map.keys())):

2096 cur_type_map = self._op_type_map.setdefault(op_name, OrderedDict())

2097

2098 if op_name in new_op_map:

2099 handler = new_op_map[op_name]

2100 elif target_type in cur_type_map:

2101 handler = cur_type_map[target_type]

2102 else:

2103 try:

2104 handler = self._op_auto_map[op_name](target_type)

2105 except Exception as e:

2106 raise TypeError('error while determining support for operation'

2107 ' "%s" on target type: %s (got %r)'

2108 % (op_name, target_type.__name__, e))

2109 if handler is not False and not callable(handler):

2110 raise TypeError('expected handler for op "%s" to be'

2111 ' callable or False, not: %r' % (op_name, handler))

2112 new_op_map[op_name] = handler

2113

2114 for op_name, handler in new_op_map.items():

2115 self._op_type_map[op_name][target_type] = handler

2116

2117 if not exact:

2118 for op_name in new_op_map:

2119 self._register_fuzzy_type(op_name, target_type)

2120

2121 self._type_cache = {} # reset type cache

2122

2123 return

2124

2125 def register_op(self, op_name, auto_func=None, exact=False):

2126 """add operations beyond the builtins ('get' and 'iterate' at the time

2127 of writing).

2128

2129 auto_func is a function that when passed a type, returns a

2130 handler associated with op_name if it's supported, or False if

2131 it's not.

2132

2133 See glom.core.register_op() for the global version used by

2134 extensions.

2135 """

2136 if not isinstance(op_name, basestring):

2137 raise TypeError('expected op_name to be a text name, not: %r' % (op_name,))

2138 if auto_func is None:

2139 auto_func = lambda t: False

2140 elif not callable(auto_func):

2141 raise TypeError('expected auto_func to be callable, not: %r' % (auto_func,))

2142

2143 # determine support for any previously known types

2144 known_types = set(sum([list(m.keys()) for m

2145 in self._op_type_map.values()], []))

2146 type_map = self._op_type_map.get(op_name, OrderedDict())

2147 type_tree = self._op_type_tree.get(op_name, OrderedDict())

2148 for t in sorted(known_types, key=lambda t: t.__name__):

2149 if t in type_map:

2150 continue

2151 try:

2152 handler = auto_func(t)

2153 except Exception as e:

2154 raise TypeError('error while determining support for operation'

2155 ' "%s" on target type: %s (got %r)'

2156 % (op_name, t.__name__, e))

2157 if handler is not False and not callable(handler):

2158 raise TypeError('expected handler for op "%s" to be'

2159 ' callable or False, not: %r' % (op_name, handler))

2160 type_map[t] = handler

2161

2162 if not exact:

2163 for t in known_types:

2164 self._register_fuzzy_type(op_name, t, _type_tree=type_tree)

2165

2166 self._op_type_map[op_name] = type_map

2167 self._op_type_tree[op_name] = type_tree

2168 self._op_auto_map[op_name] = auto_func

2169

2170 def _register_builtin_ops(self):

2171 def _get_iterable_handler(type_obj):

2172 return iter if callable(getattr(type_obj, '__iter__', None)) else False

2173

2174 self.register_op('iterate', _get_iterable_handler)

2175 self.register_op('get', lambda _: getattr)

2176

2177

2178_DEFAULT_SCOPE = ChainMap({})

2179

2180

2181def glom(target, spec, **kwargs):

2182 """Access or construct a value from a given *target* based on the

2183 specification declared by *spec*.

2184

2185 Accessing nested data, aka deep-get:

2186

2187 >>> target = {'a': {'b': 'c'}}

2188 >>> glom(target, 'a.b')

2189 'c'

2190

2191 Here the *spec* was just a string denoting a path,

2192 ``'a.b.``. As simple as it should be. You can also use

2193 :mod:`glob`-like wildcard selectors:

2194

2195 >>> target = {'a': [{'k': 'v1'}, {'k': 'v2'}]}

2196 >>> glom(target, 'a.*.k')

2197 ['v1', 'v2']

2198

2199 In addition to ``*``, you can also use ``**`` for recursive access:

2200

2201 >>> target = {'a': [{'k': 'v3'}, {'k': 'v4'}], 'k': 'v0'}

2202 >>> glom(target, '**.k')

2203 ['v0', 'v3', 'v4']

2204

2205 The next example shows how to use nested data to

2206 access many fields at once, and make a new nested structure.

2207

2208 Constructing, or restructuring more-complicated nested data:

2209

2210 >>> target = {'a': {'b': 'c', 'd': 'e'}, 'f': 'g', 'h': [0, 1, 2]}

2211 >>> spec = {'a': 'a.b', 'd': 'a.d', 'h': ('h', [lambda x: x * 2])}

2212 >>> output = glom(target, spec)

2213 >>> pprint(output)

2214 {'a': 'c', 'd': 'e', 'h': [0, 2, 4]}

2215

2216 ``glom`` also takes a keyword-argument, *default*. When set,

2217 if a ``glom`` operation fails with a :exc:`GlomError`, the

2218 *default* will be returned, very much like

2219 :meth:`dict.get()`:

2220

2221 >>> glom(target, 'a.xx', default='nada')

2222 'nada'

2223

2224 The *skip_exc* keyword argument controls which errors should

2225 be ignored.

2226

2227 >>> glom({}, lambda x: 100.0 / len(x), default=0.0, skip_exc=ZeroDivisionError)

2228 0.0

2229

2230 Args:

2231 target (object): the object on which the glom will operate.

2232 spec (object): Specification of the output object in the form

2233 of a dict, list, tuple, string, other glom construct, or

2234 any composition of these.

2235 default (object): An optional default to return in the case

2236 an exception, specified by *skip_exc*, is raised.

2237 skip_exc (Exception): An optional exception or tuple of

2238 exceptions to ignore and return *default* (None if

2239 omitted). If *skip_exc* and *default* are both not set,

2240 glom raises errors through.

2241 scope (dict): Additional data that can be accessed

2242 via S inside the glom-spec. Read more: :ref:`scope`.

2243

2244 It's a small API with big functionality, and glom's power is

2245 only surpassed by its intuitiveness. Give it a whirl!

2246

2247 """

2248 # TODO: check spec up front

2249 default = kwargs.pop('default', None if 'skip_exc' in kwargs else _MISSING)

2250 skip_exc = kwargs.pop('skip_exc', () if default is _MISSING else GlomError)

2251 glom_debug = kwargs.pop('glom_debug', GLOM_DEBUG)

2252 scope = _DEFAULT_SCOPE.new_child({

2253 Path: kwargs.pop('path', []),

2254 Inspect: kwargs.pop('inspector', None),

2255 MODE: AUTO,

2256 MIN_MODE: None,

2257 CHILD_ERRORS: [],

2258 'globals': ScopeVars({}, {}),

2259 })

2260 scope[UP] = scope

2261 scope[ROOT] = scope

2262 scope[T] = target

2263 scope.update(kwargs.pop('scope', {}))

2264 err = None

2265 if kwargs:

2266 raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys()))

2267 try:

2268 try:

2269 ret = _glom(target, spec, scope)

2270 except skip_exc:

2271 if default is _MISSING:

2272 raise

2273 ret = default # should this also be arg_val'd?

2274 except Exception as e:

2275 if glom_debug:

2276 raise

2277 if isinstance(e, GlomError):

2278 # need to change id or else py3 seems to not let us truncate the

2279 # stack trace with the explicit "raise err" below

2280 err = copy.copy(e)

2281 err._set_wrapped(e)

2282 else:

2283 err = GlomError.wrap(e)

2284 if isinstance(err, GlomError):

2285 err._finalize(scope[LAST_CHILD_SCOPE])

2286 else: # wrapping failed, fall back to default behavior

2287 raise

2288

2289 if err:

2290 raise err

2291 return ret

2292

2293

2294def chain_child(scope):

2295 """

2296 used for specs like Auto(tuple), Switch(), etc

2297 that want to chain their child scopes together

2298

2299 returns a new scope that can be passed to

2300 the next recursive glom call, e.g.

2301

2302 scope[glom](target, spec, chain_child(scope))

2303 """

2304 if LAST_CHILD_SCOPE not in scope.maps[0]:

2305 return scope # no children yet, nothing to do

2306 # NOTE: an option here is to drill down on LAST_CHILD_SCOPE;

2307 # this would have some interesting consequences for scoping

2308 # of tuples

2309 nxt_in_chain = scope[LAST_CHILD_SCOPE]

2310 nxt_in_chain.maps[0][NO_PYFRAME] = True

2311 # previous failed branches are forgiven as the

2312 # scope is re-wired into a new stack

2313 del nxt_in_chain.maps[0][CHILD_ERRORS][:]

2314 return nxt_in_chain

2315

2316

2317unbound_methods = set([type(str.__len__)]) #, type(Ref.glomit)])

2318

2319

2320def _has_callable_glomit(obj):

2321 glomit = getattr(obj, 'glomit', None)

2322 return callable(glomit) and not isinstance(obj, type)

2323

2324

2325def _glom(target, spec, scope):

2326 parent = scope

2327 pmap = parent.maps[0]

2328 scope = scope.new_child({

2329 T: target,

2330 Spec: spec,

2331 UP: parent,

2332 CHILD_ERRORS: [],

2333 MODE: pmap[MODE],

2334 MIN_MODE: pmap[MIN_MODE],

2335 })

2336 pmap[LAST_CHILD_SCOPE] = scope

2337

2338 try:

2339 if type(spec) is TType: # must go first, due to callability

2340 scope[MIN_MODE] = None # None is tombstone

2341 return _t_eval(target, spec, scope)

2342 elif _has_callable_glomit(spec):

2343 scope[MIN_MODE] = None

2344 return spec.glomit(target, scope)

2345

2346 return (scope.maps[0][MIN_MODE] or scope.maps[0][MODE])(target, spec, scope)

2347 except Exception as e:

2348 scope.maps[1][CHILD_ERRORS].append(scope)

2349 scope.maps[0][CUR_ERROR] = e

2350 if NO_PYFRAME in scope.maps[1]:

2351 cur_scope = scope[UP]

2352 while NO_PYFRAME in cur_scope.maps[0]:

2353 cur_scope.maps[1][CHILD_ERRORS].append(cur_scope)

2354 cur_scope.maps[0][CUR_ERROR] = e

2355 cur_scope = cur_scope[UP]

2356 raise

2357

2358

2359def AUTO(target, spec, scope):

2360 if type(spec) is str: # shortcut to make deep-get use case faster

2361 return _t_eval(target, Path.from_text(spec).path_t, scope)

2362 if isinstance(spec, dict):

2363 return _handle_dict(target, spec, scope)

2364 elif isinstance(spec, list):

2365 return _handle_list(target, spec, scope)

2366 elif isinstance(spec, tuple):

2367 return _handle_tuple(target, spec, scope)

2368 elif isinstance(spec, basestring):

2369 return Path.from_text(spec).glomit(target, scope)

2370 elif callable(spec):

2371 return spec(target)

2372

2373 raise TypeError('expected spec to be dict, list, tuple, callable, string,'

2374 ' or other Spec-like type, not: %r' % (spec,))

2375

2376

2377_DEFAULT_SCOPE.update({

2378 glom: _glom,

2379 TargetRegistry: TargetRegistry(register_default_types=True),

2380})

2381

2382

2383def register(target_type, **kwargs):

2384 """Register *target_type* so :meth:`~Glommer.glom()` will

2385 know how to handle instances of that type as targets.

2386

2387 Here's an example of adding basic iterabile support for Django's ORM:

2388

2389 .. code-block:: python

2390

2391 import glom

2392 import django.db.models

2393

2394 glom.register(django.db.models.Manager, iterate=lambda m: m.all())

2395 glom.register(django.db.models.QuerySet, iterate=lambda qs: qs.all())

2396

2397

2398

2399 Args:

2400 target_type (type): A type expected to appear in a glom()

2401 call target

2402 get (callable): A function which takes a target object and

2403 a name, acting as a default accessor. Defaults to

2404 :func:`getattr`.

2405 iterate (callable): A function which takes a target object

2406 and returns an iterator. Defaults to :func:`iter` if

2407 *target_type* appears to be iterable.

2408 exact (bool): Whether or not to match instances of subtypes

2409 of *target_type*.

2410

2411 .. note::

2412

2413 The module-level :func:`register()` function affects the

2414 module-level :func:`glom()` function's behavior. If this

2415 global effect is undesirable for your application, or

2416 you're implementing a library, consider instantiating a

2417 :class:`Glommer` instance, and using the

2418 :meth:`~Glommer.register()` and :meth:`Glommer.glom()`

2419 methods instead.

2420

2421 """

2422 _DEFAULT_SCOPE[TargetRegistry].register(target_type, **kwargs)

2423 return

2424

2425

2426def register_op(op_name, **kwargs):

2427 """For extension authors needing to add operations beyond the builtin

2428 'get', 'iterate', 'keys', 'assign', and 'delete' to the default scope.

2429 See TargetRegistry for more details.

2430 """

2431 _DEFAULT_SCOPE[TargetRegistry].register_op(op_name, **kwargs)

2432 return

2433

2434

2435class Glommer(object):

2436 """The :class:`Glommer` type mostly serves to encapsulate type

2437 registration context so that advanced uses of glom don't need to

2438 worry about stepping on each other.

2439

2440 Glommer objects are lightweight and, once instantiated, provide

2441 a :func:`glom()` method:

2442

2443 >>> glommer = Glommer()

2444 >>> glommer.glom({}, 'a.b.c', default='d')

2445 'd'

2446 >>> Glommer().glom({'vals': list(range(3))}, ('vals', len))

2447 3

2448

2449 Instances also provide :meth:`~Glommer.register()` method for

2450 localized control over type handling.

2451

2452 Args:

2453 register_default_types (bool): Whether or not to enable the

2454 handling behaviors of the default :func:`glom()`. These

2455 default actions include dict access, list and iterable

2456 iteration, and generic object attribute access. Defaults to

2457 True.

2458

2459 """

2460 def __init__(self, **kwargs):

2461 register_default_types = kwargs.pop('register_default_types', True)

2462 scope = kwargs.pop('scope', _DEFAULT_SCOPE)

2463

2464 # this "freezes" the scope in at the time of construction

2465 self.scope = ChainMap(dict(scope))

2466 self.scope[TargetRegistry] = TargetRegistry(register_default_types=register_default_types)

2467

2468 def register(self, target_type, **kwargs):

2469 """Register *target_type* so :meth:`~Glommer.glom()` will

2470 know how to handle instances of that type as targets.

2471

2472 Args:

2473 target_type (type): A type expected to appear in a glom()

2474 call target

2475 get (callable): A function which takes a target object and

2476 a name, acting as a default accessor. Defaults to

2477 :func:`getattr`.

2478 iterate (callable): A function which takes a target object

2479 and returns an iterator. Defaults to :func:`iter` if

2480 *target_type* appears to be iterable.

2481 exact (bool): Whether or not to match instances of subtypes

2482 of *target_type*.

2483

2484 .. note::

2485

2486 The module-level :func:`register()` function affects the

2487 module-level :func:`glom()` function's behavior. If this

2488 global effect is undesirable for your application, or

2489 you're implementing a library, consider instantiating a

2490 :class:`Glommer` instance, and using the

2491 :meth:`~Glommer.register()` and :meth:`Glommer.glom()`

2492 methods instead.

2493

2494 """

2495 exact = kwargs.pop('exact', False)

2496 self.scope[TargetRegistry].register(target_type, exact=exact, **kwargs)

2497 return

2498

2499 def glom(self, target, spec, **kwargs):

2500 return glom(target, spec, scope=self.scope, **kwargs)

2501

2502

2503class Fill(object):

2504 """A specifier type which switches to glom into "fill-mode". For the

2505 spec contained within the Fill, glom will only interpret explicit

2506 specifier types (including T objects). Whereas the default mode

2507 has special interpretations for each of these builtins, fill-mode

2508 takes a lighter touch, making Fill great for "filling out" Python

2509 literals, like tuples, dicts, sets, and lists.

2510

2511 >>> target = {'data': [0, 2, 4]}

2512 >>> spec = Fill((T['data'][2], T['data'][0]))

2513 >>> glom(target, spec)

2514 (4, 0)

2515

2516 As you can see, glom's usual built-in tuple item chaining behavior

2517 has switched into a simple tuple constructor.

2518

2519 (Sidenote for Lisp fans: Fill is like glom's quasi-quoting.)

2520

2521 """

2522 def __init__(self, spec=None):

2523 self.spec = spec

2524

2525 def glomit(self, target, scope):

2526 scope[MODE] = FILL

2527 return scope[glom](target, self.spec, scope)

2528

2529 def fill(self, target):

2530 return glom(target, self)

2531

2532 def __repr__(self):

2533 cn = self.__class__.__name__

2534 rpr = '' if self.spec is None else bbrepr(self.spec)

2535 return '%s(%s)' % (cn, rpr)

2536

2537

2538def FILL(target, spec, scope):

2539 # TODO: register an operator or two for the following to allow

2540 # extension. This operator can probably be shared with the

2541 # upcoming traversal/remap feature.

2542 recurse = lambda val: scope[glom](target, val, scope)

2543 if type(spec) is dict:

2544 return {recurse(key): recurse(val) for key, val in spec.items()}

2545 if type(spec) in (list, tuple, set, frozenset):

2546 result = [recurse(val) for val in spec]

2547 if type(spec) is list:

2548 return result

2549 return type(spec)(result)

2550 if callable(spec):

2551 return spec(target)

2552 return spec

2553

2554class _ArgValuator(object):

2555 def __init__(self):

2556 self.cache = {}

2557

2558 def mode(self, target, spec, scope):

2559 """

2560 similar to FILL, but without function calling;

2561 useful for default, scope assignment, call/invoke, etc

2562 """

2563 recur = lambda val: scope[glom](target, val, scope)

2564 result = spec

2565 if type(spec) in (list, dict): # can contain themselves

2566 if id(spec) in self.cache:

2567 return self.cache[id(spec)]

2568 result = self.cache[id(spec)] = type(spec)()

2569 if type(spec) is dict:

2570 result.update({recur(key): recur(val) for key, val in spec.items()})

2571 else:

2572 result.extend([recur(val) for val in spec])

2573 if type(spec) in (tuple, set, frozenset): # cannot contain themselves

2574 result = type(spec)([recur(val) for val in spec])

2575 return result

2576

2577

2578def arg_val(target, arg, scope):

2579 """

2580 evaluate an argument to find its value

2581 (arg_val phonetically similar to "eval" -- evaluate as an arg)

2582 """

2583 mode = scope[MIN_MODE]

2584 scope[MIN_MODE] = _ArgValuator().mode

2585 result = scope[glom](target, arg, scope)

2586 scope[MIN_MODE] = mode

2587 return result