Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/glom/core.py: 58%

1"""*glom gets results.*

3The ``glom`` package has one central entrypoint,

4:func:`glom.glom`. Everything else in the package revolves around that

5one function. Sometimes, big things come in small packages.

7A couple of conventional terms you'll see repeated many times below:

9* **target** - glom is built to work on any data, so we simply

10 refer to the object being accessed as the *"target"*

11* **spec** - *(aka "glomspec", short for specification)* The

12 accompanying template used to specify the structure of the return

13 value.

15Now that you know the terms, let's take a look around glom's powerful

16semantics.

18"""

21import os

22import sys

23import pdb

24import copy

25import warnings

26import weakref

27import operator

28from abc import ABCMeta

29from pprint import pprint

30import string

31from collections import OrderedDict

32import traceback

34from face.helpers import get_wrap_width

35from boltons.typeutils import make_sentinel

36from boltons.iterutils import is_iterable

37#from boltons.funcutils import format_invocation

39basestring = str

40_AbstractIterableBase = ABCMeta('_AbstractIterableBase', (object,), {})

41from collections import ChainMap

42from reprlib import Repr, recursive_repr

44GLOM_DEBUG = os.getenv('GLOM_DEBUG', '').strip().lower()

45GLOM_DEBUG = False if (GLOM_DEBUG in ('', '0', 'false')) else True

47TRACE_WIDTH = max(get_wrap_width(max_width=110), 50) # min width

49PATH_STAR = True

50# should * and ** be interpreted as parallel traversal in Path.from_text()?

51# Changed to True in 23.1, this option to disable will go away soon

53_type_type = type

55_MISSING = make_sentinel('_MISSING')

56SKIP = make_sentinel('SKIP')

57SKIP.__doc__ = """

58The ``SKIP`` singleton can be returned from a function or included

59via a :class:`~glom.Val` to cancel assignment into the output

60object.

62>>> target = {'a': 'b'}

63>>> spec = {'a': lambda t: t['a'] if t['a'] == 'a' else SKIP}

64>>> glom(target, spec)

65{}

66>>> target = {'a': 'a'}

67>>> glom(target, spec)

68{'a': 'a'}

70Mostly used to drop keys from dicts (as above) or filter objects from

71lists.

73.. note::

75 SKIP was known as OMIT in versions 18.3.1 and prior. Versions 19+

76 will remove the OMIT alias entirely.

77"""

78OMIT = SKIP # backwards compat, remove in 19+

80STOP = make_sentinel('STOP')

81STOP.__doc__ = """

82The ``STOP`` singleton can be used to halt iteration of a list or

83execution of a tuple of subspecs.

85>>> target = range(10)

86>>> spec = [lambda x: x if x < 5 else STOP]

87>>> glom(target, spec)

88[0, 1, 2, 3, 4]

89"""

91LAST_CHILD_SCOPE = make_sentinel('LAST_CHILD_SCOPE')

92LAST_CHILD_SCOPE.__doc__ = """

93Marker that can be used by parents to keep track of the last child

94scope executed. Useful for "lifting" results out of child scopes

95for scopes that want to chain the scopes of their children together

96similar to tuple.

97"""

99NO_PYFRAME = make_sentinel('NO_PYFRAME')

100NO_PYFRAME.__doc__ = """

101Used internally to mark scopes which are no longer wrapped

102in a recursive glom() call, so that they can be cleaned up correctly

103in case of exceptions

104"""

105

106MODE = make_sentinel('MODE')

107

108MIN_MODE = make_sentinel('MIN_MODE')

109

110CHILD_ERRORS = make_sentinel('CHILD_ERRORS')

111CHILD_ERRORS.__doc__ = """

112``CHILD_ERRORS`` is used by glom internals to keep track of

113failed child branches of the current scope.

114"""

115

116CUR_ERROR = make_sentinel('CUR_ERROR')

117CUR_ERROR.__doc__ = """

118``CUR_ERROR`` is used by glom internals to keep track of

119thrown exceptions.

120"""

121

122_PKG_DIR_PATH = os.path.dirname(os.path.abspath(__file__))

123

124class GlomError(Exception):

125 """The base exception for all the errors that might be raised from

126 :func:`glom` processing logic.

127

128 By default, exceptions raised from within functions passed to glom

129 (e.g., ``len``, ``sum``, any ``lambda``) will not be wrapped in a

130 GlomError.

131 """

132 @classmethod

133 def wrap(cls, exc):

134 # TODO: need to test this against a wide array of exception types

135 # this approach to wrapping errors works for exceptions

136 # defined in pure-python as well as C

137 exc_type = type(exc)

138 bases = (GlomError,) if issubclass(GlomError, exc_type) else (exc_type, GlomError)

139 exc_wrapper_type = type(f"GlomError.wrap({exc_type.__name__})", bases, {})

140 try:

141 wrapper = exc_wrapper_type(*exc.args)

142 wrapper.__wrapped = exc

143 return wrapper

144 except Exception: # maybe exception can't be re-created

145 return exc

146

147 def _set_wrapped(self, exc):

148 self.__wrapped = exc

149

150 def _finalize(self, scope):

151 # careful when changing how this functionality works; pytest seems to mess with

152 # the traceback module or sys.exc_info(). we saw different stacks when originally

153 # developing this in June 2020.

154 etype, evalue, _ = sys.exc_info()

155 tb_lines = traceback.format_exc().strip().splitlines()

156 limit = 0

157 for line in reversed(tb_lines):

158 if _PKG_DIR_PATH in line:

159 limit -= 1

160 break

161 limit += 1

162 self._tb_lines = tb_lines[-limit:]

163 # if the first line is trying to put a caret at a byte-code location on a line that

164 # isn't being displayed, skip it

165 if set(self._tb_lines[0]) <= {' ', '^', '~'}:

166 self._tb_lines = self._tb_lines[1:]

167 self._scope = scope

168

169 def __str__(self):

170 if getattr(self, '_finalized_str', None):

171 return self._finalized_str

172 elif getattr(self, '_scope', None) is not None:

173 self._target_spec_trace = format_target_spec_trace(self._scope, self.__wrapped)

174 parts = ["error raised while processing, details below.",

175 " Target-spec trace (most recent last):",

176 self._target_spec_trace]

177 parts.extend(self._tb_lines)

178 self._finalized_str = "\n".join(parts)

179 return self._finalized_str

180

181 # else, not finalized

182 try:

183 exc_get_message = self.get_message

184 except AttributeError:

185 exc_get_message = super().__str__

186 return exc_get_message()

187

188

189def _unpack_stack(scope, only_errors=True):

190 """

191 convert scope to [[scope, spec, target, error, [children]]]

192

193 this is a convenience method for printing stacks

194

195 only_errors=True means ignore branches which may still be hanging around

196 which were not involved in the stack trace of the error

197

198 only_errors=False could be useful for debugger / introspection (similar

199 to traceback.print_stack())

200 """

201 stack = []

202 scope = scope.maps[0]

203 while LAST_CHILD_SCOPE in scope:

204 child = scope[LAST_CHILD_SCOPE]

205 branches = scope[CHILD_ERRORS]

206 if branches == [child]:

207 branches = [] # if there's only one branch, count it as linear

208 stack.append([scope, scope[Spec], scope[T], scope.get(CUR_ERROR), branches])

209

210 # NB: this id() business is necessary to avoid a

211 # nondeterministic bug in abc's __eq__ see #189 for details

212 if id(child) in [id(b) for b in branches]:

213 break # if child already covered by branches, stop the linear descent

214

215 scope = child.maps[0]

216 else: # if break executed above, cur scope was already added

217 stack.append([scope, scope[Spec], scope[T], scope.get(CUR_ERROR), []])

218 # push errors "down" to where they were first raised / first observed

219 for i in range(len(stack) - 1):

220 cur, nxt = stack[i], stack[i + 1]

221 if cur[3] == nxt[3]:

222 cur[3] = None

223 if only_errors: # trim the stack to the last error

224 # leave at least 1 to not break formatting func below

225 # TODO: make format_target_spec_trace() tolerate an "empty" stack cleanly

226 while len(stack) > 1 and stack[-1][3] is None:

227 stack.pop()

228 return stack

229

230

231def _format_trace_value(value, maxlen):

232 s = bbrepr(value).replace("\\'", "'")

233 if len(s) > maxlen:

234 try:

235 suffix = '... (len=%s)' % len(value)

236 except Exception:

237 suffix = '...'

238 s = s[:maxlen - len(suffix)] + suffix

239 return s

240

241

242def format_target_spec_trace(scope, root_error, width=TRACE_WIDTH, depth=0, prev_target=_MISSING, last_branch=True):

243 """

244 unpack a scope into a multi-line but short summary

245 """

246 segments = []

247 indent = " " + "|" * depth

248 tick = "| " if depth else "- "

249 def mk_fmt(label, t=None):

250 pre = indent + (t or tick) + label + ": "

251 fmt_width = width - len(pre)

252 return lambda v: pre + _format_trace_value(v, fmt_width)

253 fmt_t = mk_fmt("Target")

254 fmt_s = mk_fmt("Spec")

255 fmt_b = mk_fmt("Spec", "+ ")

256 recurse = lambda s, last=False: format_target_spec_trace(s, root_error, width, depth + 1, prev_target, last)

257 tb_exc_line = lambda e: "".join(traceback.format_exception_only(type(e), e))[:-1]

258 fmt_e = lambda e: indent + tick + tb_exc_line(e)

259 for scope, spec, target, error, branches in _unpack_stack(scope):

260 if target is not prev_target:

261 segments.append(fmt_t(target))

262 prev_target = target

263 if branches:

264 segments.append(fmt_b(spec))

265 segments.extend([recurse(s) for s in branches[:-1]])

266 segments.append(recurse(branches[-1], last_branch))

267 else:

268 segments.append(fmt_s(spec))

269 if error is not None and error is not root_error:

270 last_line_error = True

271 segments.append(fmt_e(error))

272 else:

273 last_line_error = False

274 if depth: # \ on first line, X on last line

275 remark = lambda s, m: s[:depth + 1] + m + s[depth + 2:]

276 segments[0] = remark(segments[0], "\\")

277 if not last_branch or last_line_error:

278 segments[-1] = remark(segments[-1], "X")

279 return "\n".join(segments)

280

281

282# TODO: not used (yet)

283def format_oneline_trace(scope):

284 """

285 unpack a scope into a single line summary

286 (shortest summary possible)

287 """

288 # the goal here is to do a kind of delta-compression --

289 # if the target is the same, don't repeat it

290 segments = []

291 prev_target = _MISSING

292 for scope, spec, target, error, branches in _unpack_stack(scope, only_errors=False):

293 segments.append('/')

294 if type(spec) in (TType, Path):

295 segments.append(bbrepr(spec))

296 else:

297 segments.append(type(spec).__name__)

298 if target != prev_target:

299 segments.append('!')

300 segments.append(type(target).__name__)

301 if Path in scope:

302 segments.append('<')

303 segments.append('->'.join([str(p) for p in scope[Path]]))

304 segments.append('>')

305 prev_target = target

306

307 return "".join(segments)

308

309

310class PathAccessError(GlomError, AttributeError, KeyError, IndexError):

311 """This :exc:`GlomError` subtype represents a failure to access an

312 attribute as dictated by the spec. The most commonly-seen error

313 when using glom, it maintains a copy of the original exception and

314 produces a readable error message for easy debugging.

315

316 If you see this error, you may want to:

317

318 * Check the target data is accurate using :class:`~glom.Inspect`

319 * Catch the exception and return a semantically meaningful error message

320 * Use :class:`glom.Coalesce` to specify a default

321 * Use the top-level ``default`` kwarg on :func:`~glom.glom()`

322

323 In any case, be glad you got this error and not the one it was

324 wrapping!

325

326 Args:

327 exc (Exception): The error that arose when we tried to access

328 *path*. Typically an instance of KeyError, AttributeError,

329 IndexError, or TypeError, and sometimes others.

330 path (Path): The full Path glom was in the middle of accessing

331 when the error occurred.

332 part_idx (int): The index of the part of the *path* that caused

333 the error.

334

335 >>> target = {'a': {'b': None}}

336 >>> glom(target, 'a.b.c')

337 Traceback (most recent call last):

338 ...

339 PathAccessError: could not access 'c', part 2 of Path('a', 'b', 'c'), got error: ...

340

341 """

342 def __init__(self, exc, path, part_idx):

343 self.exc = exc

344 self.path = path

345 self.part_idx = part_idx

346

347 def get_message(self):

348 path_part = Path(self.path).values()[self.part_idx]

349 return ('could not access %r, part %r of %r, got error: %r'

350 % (path_part, self.part_idx, self.path, self.exc))

351

352 def __repr__(self):

353 cn = self.__class__.__name__

354 return f'{cn}({self.exc!r}, {self.path!r}, {self.part_idx!r})'

355

356

357class PathAssignError(GlomError):

358 """This :exc:`GlomError` subtype is raised when an assignment fails,

359 stemming from an :func:`~glom.assign` call or other

360 :class:`~glom.Assign` usage.

361

362 One example would be assigning to an out-of-range position in a list::

363

364 >>> assign(["short", "list"], Path(5), 'too far') # doctest: +SKIP

365 Traceback (most recent call last):

366 ...

367 PathAssignError: could not assign 5 on object at Path(), got error: IndexError(...

368

369 Other assignment failures could be due to assigning to an

370 ``@property`` or exception being raised inside a ``__setattr__()``.

371

372 """

373 def __init__(self, exc, path, dest_name):

374 self.exc = exc

375 self.path = path

376 self.dest_name = dest_name

377

378 def get_message(self):

379 return ('could not assign %r on object at %r, got error: %r'

380 % (self.dest_name, self.path, self.exc))

381

382 def __repr__(self):

383 cn = self.__class__.__name__

384 return f'{cn}({self.exc!r}, {self.path!r}, {self.dest_name!r})'

385

386

387class CoalesceError(GlomError):

388 """This :exc:`GlomError` subtype is raised from within a

389 :class:`Coalesce` spec's processing, when none of the subspecs

390 match and no default is provided.

391

392 The exception object itself keeps track of several values which

393 may be useful for processing:

394

395 Args:

396 coal_obj (Coalesce): The original failing spec, see

397 :class:`Coalesce`'s docs for details.

398 skipped (list): A list of ignored values and exceptions, in the

399 order that their respective subspecs appear in the original

400 *coal_obj*.

401 path: Like many GlomErrors, this exception knows the path at

402 which it occurred.

403

404 >>> target = {}

405 >>> glom(target, Coalesce('a', 'b'))

406 Traceback (most recent call last):

407 ...

408 CoalesceError: no valid values found. Tried ('a', 'b') and got (PathAccessError, PathAccessError) ...

409

410 .. note::

411

412 Coalesce is a *branching* specifier type, so as of v20.7.0, its

413 exception messages feature an error tree. See

414 :ref:`branched-exceptions` for details on how to interpret these

415 exceptions.

416

417 """

418 def __init__(self, coal_obj, skipped, path):

419 self.coal_obj = coal_obj

420 self.skipped = skipped

421 self.path = path

422

423 def __repr__(self):

424 cn = self.__class__.__name__

425 return f'{cn}({self.coal_obj!r}, {self.skipped!r}, {self.path!r})'

426

427 def get_message(self):

428 missed_specs = tuple(self.coal_obj.subspecs)

429 skipped_vals = [v.__class__.__name__

430 if isinstance(v, self.coal_obj.skip_exc)

431 else '<skipped %s>' % v.__class__.__name__

432 for v in self.skipped]

433 msg = ('no valid values found. Tried %r and got (%s)'

434 % (missed_specs, ', '.join(skipped_vals)))

435 if self.coal_obj.skip is not _MISSING:

436 msg += f', skip set to {self.coal_obj.skip!r}'

437 if self.coal_obj.skip_exc is not GlomError:

438 msg += f', skip_exc set to {self.coal_obj.skip_exc!r}'

439 if self.path is not None:

440 msg += f' (at path {self.path!r})'

441 return msg

442

443

444class BadSpec(GlomError, TypeError):

445 """Raised when a spec structure is malformed, e.g., when a specifier

446 type is invalid for the current mode."""

447

448

449class UnregisteredTarget(GlomError):

450 """This :class:`GlomError` subtype is raised when a spec calls for an

451 unsupported action on a target type. For instance, trying to

452 iterate on an non-iterable target:

453

454 >>> glom(object(), ['a.b.c'])

455 Traceback (most recent call last):

456 ...

457 UnregisteredTarget: target type 'object' not registered for 'iterate', expected one of registered types: (...)

458

459 It should be noted that this is a pretty uncommon occurrence in

460 production glom usage. See the :ref:`setup-and-registration`

461 section for details on how to avoid this error.

462

463 An UnregisteredTarget takes and tracks a few values:

464

465 Args:

466 op (str): The name of the operation being performed ('get' or 'iterate')

467 target_type (type): The type of the target being processed.

468 type_map (dict): A mapping of target types that do support this operation

469 path: The path at which the error occurred.

470

471 """

472 def __init__(self, op, target_type, type_map, path):

473 self.op = op

474 self.target_type = target_type

475 self.type_map = type_map

476 self.path = path

477 super().__init__(op, target_type, type_map, path)

478

479 def __repr__(self):

480 cn = self.__class__.__name__

481 # <type %r> is because Python 3 inexplicably changed the type

482 # repr from <type *> to <class *>

483 return ('%s(%r, <type %r>, %r, %r)'

484 % (cn, self.op, self.target_type.__name__, self.type_map, self.path))

485

486 def get_message(self):

487 if not self.type_map:

488 return ("glom() called without registering any types for operation '%s'. see"

489 " glom.register() or Glommer's constructor for details." % (self.op,))

490 reg_types = sorted([t.__name__ for t, h in self.type_map.items() if h])

491 reg_types_str = '()' if not reg_types else ('(%s)' % ', '.join(reg_types))

492 msg = ("target type %r not registered for '%s', expected one of"

493 " registered types: %s" % (self.target_type.__name__, self.op, reg_types_str))

494 if self.path:

495 msg += f' (at {self.path!r})'

496 return msg

497

498

499if getattr(__builtins__, '__dict__', None) is not None:

500 # pypy's __builtins__ is a module, as is CPython's REPL, but at

501 # normal execution time it's a dict?

502 __builtins__ = __builtins__.__dict__

503

504

505_BUILTIN_ID_NAME_MAP = {id(v): k

506 for k, v in __builtins__.items()}

507

508

509class _BBRepr(Repr):

510 """A better repr for builtins, when the built-in repr isn't

511 roundtrippable.

512 """

513 def __init__(self):

514 super().__init__()

515 # turn up all the length limits very high

516 for name in self.__dict__:

517 if not isinstance(getattr(self, name), int):

518 continue

519 setattr(self, name, 1024)

520

521 def repr1(self, x, level):

522 ret = Repr.repr1(self, x, level)

523 if not ret.startswith('<'):

524 return ret

525 return _BUILTIN_ID_NAME_MAP.get(id(x), ret)

526

527

528bbrepr = recursive_repr()(_BBRepr().repr)

529

530

531class _BBReprFormatter(string.Formatter):

532 """

533 allow format strings to be evaluated where {!r} will use bbrepr

534 instead of repr

535 """

536 def convert_field(self, value, conversion):

537 if conversion == 'r':

538 return bbrepr(value).replace("\\'", "'")

539 return super().convert_field(value, conversion)

540

541

542bbformat = _BBReprFormatter().format

543

544

545# TODO: push this back up to boltons with repr kwarg

546def format_invocation(name='', args=(), kwargs=None, **kw):

547 """Given a name, positional arguments, and keyword arguments, format

548 a basic Python-style function call.

549

550 >>> print(format_invocation('func', args=(1, 2), kwargs={'c': 3}))

551 func(1, 2, c=3)

552 >>> print(format_invocation('a_func', args=(1,)))

553 a_func(1)

554 >>> print(format_invocation('kw_func', kwargs=[('a', 1), ('b', 2)]))

555 kw_func(a=1, b=2)

556

557 """

558 _repr = kw.pop('repr', bbrepr)

559 if kw:

560 raise TypeError('unexpected keyword args: %r' % ', '.join(kw.keys()))

561 kwargs = kwargs or {}

562 a_text = ', '.join([_repr(a) for a in args])

563 if isinstance(kwargs, dict):

564 kwarg_items = [(k, kwargs[k]) for k in sorted(kwargs)]

565 else:

566 kwarg_items = kwargs

567 kw_text = ', '.join([f'{k}={_repr(v)}' for k, v in kwarg_items])

568

569 all_args_text = a_text

570 if all_args_text and kw_text:

571 all_args_text += ', '

572 all_args_text += kw_text

573

574 return f'{name}({all_args_text})'

575

576

577class Path:

578 """Path objects specify explicit paths when the default

579 ``'a.b.c'``-style general access syntax won't work or isn't

580 desirable. Use this to wrap ints, datetimes, and other valid

581 keys, as well as strings with dots that shouldn't be expanded.

582

583 >>> target = {'a': {'b': 'c', 'd.e': 'f', 2: 3}}

584 >>> glom(target, Path('a', 2))

585 3

586 >>> glom(target, Path('a', 'd.e'))

587 'f'

588

589 Paths can be used to join together other Path objects, as

590 well as :data:`~glom.T` objects:

591

592 >>> Path(T['a'], T['b'])

593 T['a']['b']

594 >>> Path(Path('a', 'b'), Path('c', 'd'))

595 Path('a', 'b', 'c', 'd')

596

597 Paths also support indexing and slicing, with each access

598 returning a new Path object:

599

600 >>> path = Path('a', 'b', 1, 2)

601 >>> path[0]

602 Path('a')

603 >>> path[-2:]

604 Path(1, 2)

605

606 To build a Path object from a string, use :meth:`Path.from_text()`.

607 This is the default behavior when the top-level :func:`~glom.glom`

608 function gets a string spec.

609 """

610 def __init__(self, *path_parts):

611 if not path_parts:

612 self.path_t = T

613 return

614 if isinstance(path_parts[0], TType):

615 path_t = path_parts[0]

616 offset = 1

617 else:

618 path_t = T

619 offset = 0

620 for part in path_parts[offset:]:

621 if isinstance(part, Path):

622 part = part.path_t

623 if isinstance(part, TType):

624 sub_parts = part.__ops__

625 if sub_parts[0] is not T:

626 raise ValueError('path segment must be path from T, not %r'

627 % sub_parts[0])

628 i = 1

629 while i < len(sub_parts):

630 path_t = _t_child(path_t, sub_parts[i], sub_parts[i + 1])

631 i += 2

632 else:

633 path_t = _t_child(path_t, 'P', part)

634 self.path_t = path_t

635

636 _CACHE = {True: {}, False: {}}

637 _MAX_CACHE = 10000

638 _STAR_WARNED = False

639

640 @classmethod

641 def from_text(cls, text):

642 """Make a Path from .-delimited text:

643

644 >>> Path.from_text('a.b.c')

645 Path('a', 'b', 'c')

646

647 This is the default behavior when :func:`~glom.glom` gets a string spec.

648 """

649 def create():

650 segs = text.split('.')

651 if PATH_STAR:

652 segs = [

653 _T_STAR if seg == '*' else

654 _T_STARSTAR if seg == '**' else seg

655 for seg in segs]

656 elif not cls._STAR_WARNED:

657 if '*' in segs or '**' in segs:

658 warnings.warn(

659 "'*' and '**' have changed behavior in glom version 23.1."

660 " Recommend switch to T['*'] or T['**'].")

661 cls._STAR_WARNED = True

662 return cls(*segs)

663

664 cache = cls._CACHE[PATH_STAR] # remove this when PATH_STAR is default

665 if text not in cache:

666 if len(cache) > cls._MAX_CACHE:

667 return create()

668 cache[text] = create()

669 return cache[text]

670

671 def glomit(self, target, scope):

672 # The entrypoint for the Path extension

673 return _t_eval(target, self.path_t, scope)

674

675 def __len__(self):

676 return (len(self.path_t.__ops__) - 1) // 2

677

678 def __eq__(self, other):

679 if type(other) is Path:

680 return self.path_t.__ops__ == other.path_t.__ops__

681 elif type(other) is TType:

682 return self.path_t.__ops__ == other.__ops__

683 return False

684

685 def __ne__(self, other):

686 return not self == other

687

688 def values(self):

689 """

690 Returns a tuple of values referenced in this path.

691

692 >>> Path(T.a.b, 'c', T['d']).values()

693 ('a', 'b', 'c', 'd')

694 """

695 cur_t_path = self.path_t.__ops__

696 return cur_t_path[2::2]

697

698 def items(self):

699 """

700 Returns a tuple of (operation, value) pairs.

701

702 >>> Path(T.a.b, 'c', T['d']).items()

703 (('.', 'a'), ('.', 'b'), ('P', 'c'), ('[', 'd'))

704

705 """

706 cur_t_path = self.path_t.__ops__

707 return tuple(zip(cur_t_path[1::2], cur_t_path[2::2]))

708

709 def startswith(self, other):

710 if isinstance(other, basestring):

711 other = Path(other)

712 if isinstance(other, Path):

713 other = other.path_t

714 if not isinstance(other, TType):

715 raise TypeError('can only check if Path starts with string, Path or T')

716 o_path = other.__ops__

717 return self.path_t.__ops__[:len(o_path)] == o_path

718

719 def from_t(self):

720 '''return the same path but starting from T'''

721 t_path = self.path_t.__ops__

722 if t_path[0] is S:

723 new_t = TType()

724 new_t.__ops__ = (T,) + t_path[1:]

725 return Path(new_t)

726 return self

727

728 def __getitem__(self, i):

729 cur_t_path = self.path_t.__ops__

730 try:

731 step = i.step

732 start = i.start if i.start is not None else 0

733 stop = i.stop

734

735 start = (start * 2) + 1 if start >= 0 else (start * 2) + len(cur_t_path)

736 if stop is not None:

737 stop = (stop * 2) + 1 if stop >= 0 else (stop * 2) + len(cur_t_path)

738 except AttributeError:

739 step = 1

740 start = (i * 2) + 1 if i >= 0 else (i * 2) + len(cur_t_path)

741 if start < 0 or start > len(cur_t_path):

742 raise IndexError('Path index out of range')

743 stop = ((i + 1) * 2) + 1 if i >= 0 else ((i + 1) * 2) + len(cur_t_path)

744

745 new_t = TType()

746 new_path = cur_t_path[start:stop]

747 if step is not None and step != 1:

748 new_path = tuple(zip(new_path[::2], new_path[1::2]))[::step]

749 new_path = sum(new_path, ())

750 new_t.__ops__ = (cur_t_path[0],) + new_path

751 return Path(new_t)

752

753 def __repr__(self):

754 return _format_path(self.path_t.__ops__[1:])

755

756

757def _format_path(t_path):

758 path_parts, cur_t_path = [], []

759 i = 0

760 while i < len(t_path):

761 op, arg = t_path[i], t_path[i + 1]

762 i += 2

763 if op == 'P':

764 if cur_t_path:

765 path_parts.append(cur_t_path)

766 cur_t_path = []

767 path_parts.append(arg)

768 else:

769 cur_t_path.append(op)

770 cur_t_path.append(arg)

771 if path_parts and cur_t_path:

772 path_parts.append(cur_t_path)

773

774 if path_parts or not cur_t_path:

775 return 'Path(%s)' % ', '.join([_format_t(part)

776 if type(part) is list else repr(part)

777 for part in path_parts])

778 return _format_t(cur_t_path)

779

780

781class Spec:

782 """Spec objects serve three purposes, here they are, roughly ordered

783 by utility:

784

785 1. As a form of compiled or "curried" glom call, similar to

786 Python's built-in :func:`re.compile`.

787 2. A marker as an object as representing a spec rather than a

788 literal value in certain cases where that might be ambiguous.

789 3. A way to update the scope within another Spec.

790

791 In the second usage, Spec objects are the complement to

792 :class:`~glom.Val`, wrapping a value and marking that it

793 should be interpreted as a glom spec, rather than a literal value.

794 This is useful in places where it would be interpreted as a value

795 by default. (Such as T[key], Call(func) where key and func are

796 assumed to be literal values and not specs.)

797

798 Args:

799 spec: The glom spec.

800 scope (dict): additional values to add to the scope when

801 evaluating this Spec

802

803 """

804 def __init__(self, spec, scope=None):

805 self.spec = spec

806 self.scope = scope or {}

807

808 def glom(self, target, **kw):

809 scope = dict(self.scope)

810 scope.update(kw.get('scope', {}))

811 kw['scope'] = ChainMap(scope)

812 glom_ = scope.get(glom, glom)

813 return glom_(target, self.spec, **kw)

814

815 def glomit(self, target, scope):

816 scope.update(self.scope)

817 return scope[glom](target, self.spec, scope)

818

819 def __repr__(self):

820 cn = self.__class__.__name__

821 if self.scope:

822 return f'{cn}({bbrepr(self.spec)}, scope={self.scope!r})'

823 return f'{cn}({bbrepr(self.spec)})'

824

825

826class Coalesce:

827 """Coalesce objects specify fallback behavior for a list of

828 subspecs.

829

830 Subspecs are passed as positional arguments, and keyword arguments

831 control defaults. Each subspec is evaluated in turn, and if none

832 match, a :exc:`CoalesceError` is raised, or a default is returned,

833 depending on the options used.

834

835 .. note::

836

837 This operation may seem very familar if you have experience with

838 `SQL`_ or even `C# and others`_.

839

840

841 In practice, this fallback behavior's simplicity is only surpassed

842 by its utility:

843

844 >>> target = {'c': 'd'}

845 >>> glom(target, Coalesce('a', 'b', 'c'))

846 'd'

847

848 glom tries to get ``'a'`` from ``target``, but gets a

849 KeyError. Rather than raise a :exc:`~glom.PathAccessError` as usual,

850 glom *coalesces* into the next subspec, ``'b'``. The process

851 repeats until it gets to ``'c'``, which returns our value,

852 ``'d'``. If our value weren't present, we'd see:

853

854 >>> target = {}

855 >>> glom(target, Coalesce('a', 'b'))

856 Traceback (most recent call last):

857 ...

858 CoalesceError: no valid values found. Tried ('a', 'b') and got (PathAccessError, PathAccessError) ...

859

860 Same process, but because ``target`` is empty, we get a

861 :exc:`CoalesceError`.

862

863 .. note::

864

865 Coalesce is a *branching* specifier type, so as of v20.7.0, its

866 exception messages feature an error tree. See

867 :ref:`branched-exceptions` for details on how to interpret these

868 exceptions.

869

870

871 If we want to avoid an exception, and we know which value we want

872 by default, we can set *default*:

873

874 >>> target = {}

875 >>> glom(target, Coalesce('a', 'b', 'c'), default='d-fault')

876 'd-fault'

877

878 ``'a'``, ``'b'``, and ``'c'`` weren't present so we got ``'d-fault'``.

879

880 Args:

881

882 subspecs: One or more glommable subspecs

883 default: A value to return if no subspec results in a valid value

884 default_factory: A callable whose result will be returned as a default

885 skip: A value, tuple of values, or predicate function

886 representing values to ignore

887 skip_exc: An exception or tuple of exception types to catch and

888 move on to the next subspec. Defaults to :exc:`GlomError`, the

889 parent type of all glom runtime exceptions.

890

891 If all subspecs produce skipped values or exceptions, a

892 :exc:`CoalesceError` will be raised. For more examples, check out

893 the :doc:`tutorial`, which makes extensive use of Coalesce.

894

895 .. _SQL: https://en.wikipedia.org/w/index.php?title=Null_(SQL)&oldid=833093792#COALESCE

896 .. _C# and others: https://en.wikipedia.org/w/index.php?title=Null_coalescing_operator&oldid=839493322#C#

897

898 """

899 def __init__(self, *subspecs, **kwargs):

900 self.subspecs = subspecs

901 self._orig_kwargs = dict(kwargs)

902 self.default = kwargs.pop('default', _MISSING)

903 self.default_factory = kwargs.pop('default_factory', _MISSING)

904 if self.default and self.default_factory:

905 raise ValueError('expected one of "default" or "default_factory", not both')

906 self.skip = kwargs.pop('skip', _MISSING)

907 if self.skip is _MISSING:

908 self.skip_func = lambda v: False

909 elif callable(self.skip):

910 self.skip_func = self.skip

911 elif isinstance(self.skip, tuple):

912 self.skip_func = lambda v: v in self.skip

913 else:

914 self.skip_func = lambda v: v == self.skip

915 self.skip_exc = kwargs.pop('skip_exc', GlomError)

916 if kwargs:

917 raise TypeError(f'unexpected keyword args: {sorted(kwargs.keys())!r}')

918

919 def glomit(self, target, scope):

920 skipped = []

921 for subspec in self.subspecs:

922 try:

923 ret = scope[glom](target, subspec, scope)

924 if not self.skip_func(ret):

925 break

926 skipped.append(ret)

927 except self.skip_exc as e:

928 skipped.append(e)

929 continue

930 else:

931 if self.default is not _MISSING:

932 ret = arg_val(target, self.default, scope)

933 elif self.default_factory is not _MISSING:

934 ret = self.default_factory()

935 else:

936 raise CoalesceError(self, skipped, scope[Path])

937 return ret

938

939 def __repr__(self):

940 cn = self.__class__.__name__

941 return format_invocation(cn, self.subspecs, self._orig_kwargs, repr=bbrepr)

942

943

944class Inspect:

945 """The :class:`~glom.Inspect` specifier type provides a way to get

946 visibility into glom's evaluation of a specification, enabling

947 debugging of those tricky problems that may arise with unexpected

948 data.

949

950 :class:`~glom.Inspect` can be inserted into an existing spec in one of two

951 ways. First, as a wrapper around the spec in question, or second,

952 as an argument-less placeholder wherever a spec could be.

953

954 :class:`~glom.Inspect` supports several modes, controlled by

955 keyword arguments. Its default, no-argument mode, simply echos the

956 state of the glom at the point where it appears:

957

958 >>> target = {'a': {'b': {}}}

959 >>> val = glom(target, Inspect('a.b')) # wrapping a spec

960 ---

961 path: ['a.b']

962 target: {'a': {'b': {}}}

963 output: {}

964 ---

965

966 Debugging behavior aside, :class:`~glom.Inspect` has no effect on

967 values in the target, spec, or result.

968

969 Args:

970 echo (bool): Whether to print the path, target, and output of

971 each inspected glom. Defaults to True.

972 recursive (bool): Whether or not the Inspect should be applied

973 at every level, at or below the spec that it wraps. Defaults

974 to False.

975 breakpoint (bool): This flag controls whether a debugging prompt

976 should appear before evaluating each inspected spec. Can also

977 take a callable. Defaults to False.

978 post_mortem (bool): This flag controls whether exceptions

979 should be caught and interactively debugged with :mod:`pdb` on

980 inspected specs.

981

982 All arguments above are keyword-only to avoid overlap with a

983 wrapped spec.

984

985 .. note::

986

987 Just like ``pdb.set_trace()``, be careful about leaving stray

988 ``Inspect()`` instances in production glom specs.

989

990 """

991 def __init__(self, *a, **kw):

992 self.wrapped = a[0] if a else Path()

993 self.recursive = kw.pop('recursive', False)

994 self.echo = kw.pop('echo', True)

995 breakpoint = kw.pop('breakpoint', False)

996 if breakpoint is True:

997 breakpoint = pdb.set_trace

998 if breakpoint and not callable(breakpoint):

999 raise TypeError('breakpoint expected bool or callable, not: %r' % breakpoint)

1000 self.breakpoint = breakpoint

1001 post_mortem = kw.pop('post_mortem', False)

1002 if post_mortem is True:

1003 post_mortem = pdb.post_mortem

1004 if post_mortem and not callable(post_mortem):

1005 raise TypeError('post_mortem expected bool or callable, not: %r' % post_mortem)

1006 self.post_mortem = post_mortem

1007

1008 def __repr__(self):

1009 return '<INSPECT>'

1010

1011 def glomit(self, target, scope):

1012 # stash the real handler under Inspect,

1013 # and replace the child handler with a trace callback

1014 scope[Inspect] = scope[glom]

1015 scope[glom] = self._trace

1016 return scope[glom](target, self.wrapped, scope)

1017

1018 def _trace(self, target, spec, scope):

1019 if not self.recursive:

1020 scope[glom] = scope[Inspect]

1021 if self.echo:

1022 print('---')

1023 # TODO: switch from scope[Path] to the Target-Spec format trace above

1024 # ... but maybe be smart about only printing deltas instead of the whole

1025 # thing

1026 print('path: ', scope[Path] + [spec])

1027 print('target:', target)

1028 if self.breakpoint:

1029 # TODO: real debugger here?

1030 self.breakpoint()

1031 try:

1032 ret = scope[Inspect](target, spec, scope)

1033 except Exception:

1034 if self.post_mortem:

1035 self.post_mortem()

1036 raise

1037 if self.echo:

1038 print('output:', ret)

1039 print('---')

1040 return ret

1041

1042

1043class Call:

1044 """:class:`Call` specifies when a target should be passed to a function,

1045 *func*.

1046

1047 :class:`Call` is similar to :func:`~functools.partial` in that

1048 it is no more powerful than ``lambda`` or other functions, but

1049 it is designed to be more readable, with a better ``repr``.

1050

1051 Args:

1052 func (callable): a function or other callable to be called with

1053 the target

1054

1055 :class:`Call` combines well with :attr:`~glom.T` to construct objects. For

1056 instance, to generate a dict and then pass it to a constructor:

1057

1058 >>> class ExampleClass(object):

1059 ... def __init__(self, attr):

1060 ... self.attr = attr

1061 ...

1062 >>> target = {'attr': 3.14}

1063 >>> glom(target, Call(ExampleClass, kwargs=T)).attr

1064 3.14

1065

1066 This does the same as ``glom(target, lambda target:

1067 ExampleClass(**target))``, but it's easy to see which one reads

1068 better.

1069

1070 .. note::

1071

1072 ``Call`` is mostly for functions. Use a :attr:`~glom.T` object

1073 if you need to call a method.

1074

1075 .. warning::

1076

1077 :class:`Call` has a successor with a fuller-featured API, new

1078 in 19.10.0: the :class:`Invoke` specifier type.

1079 """

1080 def __init__(self, func=None, args=None, kwargs=None):

1081 if func is None:

1082 func = T

1083 if not (callable(func) or isinstance(func, (Spec, TType))):

1084 raise TypeError('expected func to be a callable or T'

1085 ' expression, not: %r' % (func,))

1086 if args is None:

1087 args = ()

1088 if kwargs is None:

1089 kwargs = {}

1090 self.func, self.args, self.kwargs = func, args, kwargs

1091

1092 def glomit(self, target, scope):

1093 'run against the current target'

1094 r = lambda spec: arg_val(target, spec, scope)

1095 return r(self.func)(*r(self.args), **r(self.kwargs))

1096

1097 def __repr__(self):

1098 cn = self.__class__.__name__

1099 return f'{cn}({bbrepr(self.func)}, args={self.args!r}, kwargs={self.kwargs!r})'

1100

1101

1102def _is_spec(obj, strict=False):

1103 # a little util for codifying the spec type checking in glom

1104 if isinstance(obj, TType):

1105 return True

1106 if strict:

1107 return type(obj) is Spec

1108

1109 return _has_callable_glomit(obj) # pragma: no cover

1110

1111

1112class Invoke:

1113 """Specifier type designed for easy invocation of callables from glom.

1114

1115 Args:

1116 func (callable): A function or other callable object.

1117

1118 ``Invoke`` is similar to :func:`functools.partial`, but with the

1119 ability to set up a "templated" call which interleaves constants and

1120 glom specs.

1121

1122 For example, the following creates a spec which can be used to

1123 check if targets are integers:

1124

1125 >>> is_int = Invoke(isinstance).specs(T).constants(int)

1126 >>> glom(5, is_int)

1127 True

1128

1129 And this composes like any other glom spec:

1130

1131 >>> target = [7, object(), 9]

1132 >>> glom(target, [is_int])

1133 [True, False, True]

1134

1135 Another example, mixing positional and keyword arguments:

1136

1137 >>> spec = Invoke(sorted).specs(T).constants(key=int, reverse=True)

1138 >>> target = ['10', '5', '20', '1']

1139 >>> glom(target, spec)

1140 ['20', '10', '5', '1']

1141

1142 Invoke also helps with evaluating zero-argument functions:

1143

1144 >>> glom(target={}, spec=Invoke(int))

1145 0

1146

1147 (A trivial example, but from timestamps to UUIDs, zero-arg calls do come up!)

1148

1149 .. note::

1150

1151 ``Invoke`` is mostly for functions, object construction, and callable

1152 objects. For calling methods, consider the :attr:`~glom.T` object.

1153

1154 """

1155 def __init__(self, func):

1156 if not callable(func) and not _is_spec(func, strict=True):

1157 raise TypeError('expected func to be a callable or Spec instance,'

1158 ' not: %r' % (func,))

1159 self.func = func

1160 self._args = ()

1161 # a registry of every known kwarg to its freshest value as set

1162 # by the methods below. the **kw dict is used as a unique marker.

1163 self._cur_kwargs = {}

1164

1165 @classmethod

1166 def specfunc(cls, spec):

1167 """Creates an :class:`Invoke` instance where the function is

1168 indicated by a spec.

1169

1170 >>> spec = Invoke.specfunc('func').constants(5)

1171 >>> glom({'func': range}, (spec, list))

1172 [0, 1, 2, 3, 4]

1173

1174 """

1175 return cls(Spec(spec))

1176

1177 def constants(self, *a, **kw):

1178 """Returns a new :class:`Invoke` spec, with the provided positional

1179 and keyword argument values stored for passing to the

1180 underlying function.

1181

1182 >>> spec = Invoke(T).constants(5)

1183 >>> glom(range, (spec, list))

1184 [0, 1, 2, 3, 4]

1185

1186 Subsequent positional arguments are appended:

1187

1188 >>> spec = Invoke(T).constants(2).constants(10, 2)

1189 >>> glom(range, (spec, list))

1190 [2, 4, 6, 8]

1191

1192 Keyword arguments also work as one might expect:

1193

1194 >>> round_2 = Invoke(round).constants(ndigits=2).specs(T)

1195 >>> glom(3.14159, round_2)

1196 3.14

1197

1198 :meth:`~Invoke.constants()` and other :class:`Invoke`

1199 methods may be called multiple times, just remember that every

1200 call returns a new spec.

1201 """

1202 ret = self.__class__(self.func)

1203 ret._args = self._args + ('C', a, kw)

1204 ret._cur_kwargs = dict(self._cur_kwargs)

1205 ret._cur_kwargs.update({k: kw for k, _ in kw.items()})

1206 return ret

1207

1208 def specs(self, *a, **kw):

1209 """Returns a new :class:`Invoke` spec, with the provided positional

1210 and keyword arguments stored to be interpreted as specs, with

1211 the results passed to the underlying function.

1212

1213 >>> spec = Invoke(range).specs('value')

1214 >>> glom({'value': 5}, (spec, list))

1215 [0, 1, 2, 3, 4]

1216

1217 Subsequent positional arguments are appended:

1218

1219 >>> spec = Invoke(range).specs('start').specs('end', 'step')

1220 >>> target = {'start': 2, 'end': 10, 'step': 2}

1221 >>> glom(target, (spec, list))

1222 [2, 4, 6, 8]

1223

1224 Keyword arguments also work as one might expect:

1225

1226 >>> multiply = lambda x, y: x * y

1227 >>> times_3 = Invoke(multiply).constants(y=3).specs(x='value')

1228 >>> glom({'value': 5}, times_3)

1229 15

1230

1231 :meth:`~Invoke.specs()` and other :class:`Invoke`

1232 methods may be called multiple times, just remember that every

1233 call returns a new spec.

1234

1235 """

1236 ret = self.__class__(self.func)

1237 ret._args = self._args + ('S', a, kw)

1238 ret._cur_kwargs = dict(self._cur_kwargs)

1239 ret._cur_kwargs.update({k: kw for k, _ in kw.items()})

1240 return ret

1241

1242 def star(self, args=None, kwargs=None):

1243 """Returns a new :class:`Invoke` spec, with *args* and/or *kwargs*

1244 specs set to be "starred" or "star-starred" (respectively)

1245

1246 >>> spec = Invoke(zip).star(args='lists')

1247 >>> target = {'lists': [[1, 2], [3, 4], [5, 6]]}

1248 >>> list(glom(target, spec))

1249 [(1, 3, 5), (2, 4, 6)]

1250

1251 Args:

1252 args (spec): A spec to be evaluated and "starred" into the

1253 underlying function.

1254 kwargs (spec): A spec to be evaluated and "star-starred" into

1255 the underlying function.

1256

1257 One or both of the above arguments should be set.

1258

1259 The :meth:`~Invoke.star()`, like other :class:`Invoke`

1260 methods, may be called multiple times. The *args* and *kwargs*

1261 will be stacked in the order in which they are provided.

1262 """

1263 if args is None and kwargs is None:

1264 raise TypeError('expected one or both of args/kwargs to be passed')

1265 ret = self.__class__(self.func)

1266 ret._args = self._args + ('*', args, kwargs)

1267 ret._cur_kwargs = dict(self._cur_kwargs)

1268 return ret

1269

1270 def __repr__(self):

1271 base_fname = self.__class__.__name__

1272 fname_map = {'C': 'constants', 'S': 'specs', '*': 'star'}

1273 if type(self.func) is Spec:

1274 base_fname += '.specfunc'

1275 args = (self.func.spec,)

1276 else:

1277 args = (self.func,)

1278 chunks = [format_invocation(base_fname, args, repr=bbrepr)]

1279

1280 for i in range(len(self._args) // 3):

1281 op, args, _kwargs = self._args[i * 3: i * 3 + 3]

1282 fname = fname_map[op]

1283 if op in ('C', 'S'):

1284 kwargs = [(k, v) for k, v in _kwargs.items()

1285 if self._cur_kwargs[k] is _kwargs]

1286 else:

1287 kwargs = {}

1288 if args:

1289 kwargs['args'] = args

1290 if _kwargs:

1291 kwargs['kwargs'] = _kwargs

1292 args = ()

1293

1294 chunks.append('.' + format_invocation(fname, args, kwargs, repr=bbrepr))

1295

1296 return ''.join(chunks)

1297

1298 def glomit(self, target, scope):

1299 all_args = []

1300 all_kwargs = {}

1301

1302 recurse = lambda spec: scope[glom](target, spec, scope)

1303 func = recurse(self.func) if _is_spec(self.func, strict=True) else self.func

1304

1305 for i in range(len(self._args) // 3):

1306 op, args, kwargs = self._args[i * 3: i * 3 + 3]

1307 if op == 'C':

1308 all_args.extend(args)

1309 all_kwargs.update({k: v for k, v in kwargs.items()

1310 if self._cur_kwargs[k] is kwargs})

1311 elif op == 'S':

1312 all_args.extend([recurse(arg) for arg in args])

1313 all_kwargs.update({k: recurse(v) for k, v in kwargs.items()

1314 if self._cur_kwargs[k] is kwargs})

1315 elif op == '*':

1316 if args is not None:

1317 all_args.extend(recurse(args))

1318 if kwargs is not None:

1319 all_kwargs.update(recurse(kwargs))

1320

1321 return func(*all_args, **all_kwargs)

1322

1323

1324class Ref:

1325 """Name a part of a spec and refer to it elsewhere in the same spec,

1326 useful for trees and other self-similar data structures.

1327

1328 Args:

1329 name (str): The name of the spec to reference.

1330 subspec: Pass a spec to name it *name*, or leave unset to refer

1331 to an already-named spec.

1332 """

1333 def __init__(self, name, subspec=_MISSING):

1334 self.name, self.subspec = name, subspec

1335

1336 def glomit(self, target, scope):

1337 subspec = self.subspec

1338 scope_key = (Ref, self.name)

1339 if subspec is _MISSING:

1340 subspec = scope[scope_key]

1341 else:

1342 scope[scope_key] = subspec

1343 return scope[glom](target, subspec, scope)

1344

1345 def __repr__(self):

1346 if self.subspec is _MISSING:

1347 args = bbrepr(self.name)

1348 else:

1349 args = bbrepr((self.name, self.subspec))[1:-1]

1350 return "Ref(" + args + ")"

1351

1352

1353class TType:

1354 """``T``, short for "target". A singleton object that enables

1355 object-oriented expression of a glom specification.

1356

1357 .. note::

1358

1359 ``T`` is a singleton, and does not need to be constructed.

1360

1361 Basically, think of ``T`` as your data's stunt double. Everything

1362 that you do to ``T`` will be recorded and executed during the

1363 :func:`glom` call. Take this example:

1364

1365 >>> spec = T['a']['b']['c']

1366 >>> target = {'a': {'b': {'c': 'd'}}}

1367 >>> glom(target, spec)

1368 'd'

1369

1370 So far, we've relied on the ``'a.b.c'``-style shorthand for

1371 access, or used the :class:`~glom.Path` objects, but if you want

1372 to explicitly do attribute and key lookups, look no further than

1373 ``T``.

1374

1375 But T doesn't stop with unambiguous access. You can also call

1376 methods and perform almost any action you would with a normal

1377 object:

1378

1379 >>> spec = ('a', (T['b'].items(), list)) # reviewed below

1380 >>> glom(target, spec)

1381 [('c', 'd')]

1382

1383 A ``T`` object can go anywhere in the spec. As seen in the example

1384 above, we access ``'a'``, use a ``T`` to get ``'b'`` and iterate

1385 over its ``items``, turning them into a ``list``.

1386

1387 You can even use ``T`` with :class:`~glom.Call` to construct objects:

1388

1389 >>> class ExampleClass(object):

1390 ... def __init__(self, attr):

1391 ... self.attr = attr

1392 ...

1393 >>> target = {'attr': 3.14}

1394 >>> glom(target, Call(ExampleClass, kwargs=T)).attr

1395 3.14

1396

1397 On a further note, while ``lambda`` works great in glom specs, and

1398 can be very handy at times, ``T`` and :class:`~glom.Call`

1399 eliminate the need for the vast majority of ``lambda`` usage with

1400 glom.

1401

1402 Unlike ``lambda`` and other functions, ``T`` roundtrips

1403 beautifully and transparently:

1404

1405 >>> T['a'].b['c']('success')

1406 T['a'].b['c']('success')

1407

1408 ``T``-related access errors raise a :exc:`~glom.PathAccessError`

1409 during the :func:`~glom.glom` call.

1410

1411 .. note::

1412

1413 While ``T`` is clearly useful, powerful, and here to stay, its

1414 semantics are still being refined. Currently, operations beyond

1415 method calls and attribute/item access are considered

1416 experimental and should not be relied upon.

1417

1418 .. note::

1419

1420 ``T`` attributes starting with __ are reserved to avoid

1421 colliding with many built-in Python behaviors, current and

1422 future. The ``T.__()`` method is available for cases where

1423 they are needed. For example, ``T.__('class__')`` is

1424 equivalent to accessing the ``__class__`` attribute.

1425

1426 """

1427 __slots__ = ('__ops__',)

1428

1429 def __getattr__(self, name):

1430 if name.startswith('__'):

1431 raise AttributeError('T instances reserve dunder attributes.'

1432 ' To access the "{name}" attribute, use'

1433 ' T.__("{d_name}")'.format(name=name, d_name=name[2:]))

1434 return _t_child(self, '.', name)

1435

1436 def __getitem__(self, item):

1437 return _t_child(self, '[', item)

1438

1439 def __call__(self, *args, **kwargs):

1440 if self is S:

1441 if args:

1442 raise TypeError(f'S() takes no positional arguments, got: {args!r}')

1443 if not kwargs:

1444 raise TypeError('S() expected at least one kwarg, got none')

1445 # TODO: typecheck kwarg vals?

1446 return _t_child(self, '(', (args, kwargs))

1447

1448 def __star__(self):

1449 return _t_child(self, 'x', None)

1450

1451 def __starstar__(self):

1452 return _t_child(self, 'X', None)

1453

1454 def __stars__(self):

1455 """how many times the result will be wrapped in extra lists"""

1456 t_ops = self.__ops__[1::2]

1457 return t_ops.count('x') + t_ops.count('X')

1458

1459 def __add__(self, arg):

1460 return _t_child(self, '+', arg)

1461

1462 def __sub__(self, arg):

1463 return _t_child(self, '-', arg)

1464

1465 def __mul__(self, arg):

1466 return _t_child(self, '*', arg)

1467

1468 def __floordiv__(self, arg):

1469 return _t_child(self, '#', arg)

1470

1471 def __truediv__(self, arg):

1472 return _t_child(self, '/', arg)

1473

1474 __div__ = __truediv__

1475

1476 def __mod__(self, arg):

1477 return _t_child(self, '%', arg)

1478

1479 def __pow__(self, arg):

1480 return _t_child(self, ':', arg)

1481

1482 def __and__(self, arg):

1483 return _t_child(self, '&', arg)

1484

1485 def __or__(self, arg):

1486 return _t_child(self, '|', arg)

1487

1488 def __xor__(self, arg):

1489 return _t_child(self, '^', arg)

1490

1491 def __invert__(self):

1492 return _t_child(self, '~', None)

1493

1494 def __neg__(self):

1495 return _t_child(self, '_', None)

1496

1497 def __(self, name):

1498 return _t_child(self, '.', '__' + name)

1499

1500 def __repr__(self):

1501 t_path = self.__ops__

1502 return _format_t(t_path[1:], t_path[0])

1503

1504 def __getstate__(self):

1505 t_path = self.__ops__

1506 return tuple(({T: 'T', S: 'S', A: 'A'}[t_path[0]],) + t_path[1:])

1507

1508 def __setstate__(self, state):

1509 self.__ops__ = ({'T': T, 'S': S, 'A': A}[state[0]],) + state[1:]

1510

1511

1512def _t_child(parent, operation, arg):

1513 base = parent.__ops__

1514 if base[0] is A and operation not in ('.', '[', 'P'):

1515 # whitelist rather than blacklist assignment friendly operations

1516 # TODO: error type?

1517 raise BadSpec("operation not allowed on A assignment path")

1518 t = TType()

1519 t.__ops__ = base + (operation, arg)

1520 return t

1521

1522

1523def _s_first_magic(scope, key, _t):

1524 """

1525 enable S.a to do S['a'] or S['a'].val as a special

1526 case for accessing user defined string variables

1527 """

1528 err = None

1529 try:

1530 cur = scope[key]

1531 except KeyError as e:

1532 err = PathAccessError(e, Path(_t), 0) # always only one level depth, hence 0

1533 if err:

1534 raise err

1535 return cur

1536

1537

1538def _t_eval(target, _t, scope):

1539 t_path = _t.__ops__

1540 i = 1

1541 fetch_till = len(t_path)

1542 root = t_path[0]

1543 if root is T:

1544 cur = target

1545 elif root is S or root is A:

1546 # A is basically the same as S, but last step is assign

1547 if root is A:

1548 fetch_till -= 2

1549 if fetch_till < 1:

1550 raise BadSpec('cannot assign without destination')

1551 cur = scope

1552 if fetch_till > 1 and t_path[1] in ('.', 'P'):

1553 cur = _s_first_magic(cur, t_path[2], _t)

1554 i += 2

1555 elif root is S and fetch_till > 1 and t_path[1] == '(':

1556 # S(var='spec') style assignment

1557 _, kwargs = t_path[2]

1558 scope.update({

1559 k: arg_val(target, v, scope) for k, v in kwargs.items()})

1560 return target

1561

1562 else:

1563 raise ValueError('TType instance with invalid root') # pragma: no cover

1564 pae = None

1565 while i < fetch_till:

1566 op, arg = t_path[i], t_path[i + 1]

1567 arg = arg_val(target, arg, scope)

1568 if op == '.':

1569 try:

1570 cur = getattr(cur, arg)

1571 except AttributeError as e:

1572 pae = PathAccessError(e, Path(_t), i // 2)

1573 elif op == '[':

1574 try:

1575 cur = cur[arg]

1576 except (KeyError, IndexError, TypeError) as e:

1577 pae = PathAccessError(e, Path(_t), i // 2)

1578 elif op == 'P':

1579 # Path type stuff (fuzzy match)

1580 get = scope[TargetRegistry].get_handler('get', cur, path=t_path[2:i+2:2])

1581 try:

1582 cur = get(cur, arg)

1583 except Exception as e:

1584 pae = PathAccessError(e, Path(_t), i // 2)

1585 elif op in 'xX':

1586 nxt = []

1587 get_handler = scope[TargetRegistry].get_handler

1588 if op == 'x': # increases arity of cur each time through

1589 # TODO: so many try/except -- could scope[TargetRegistry] stuff be cached on type?

1590 _extend_children(nxt, cur, get_handler)

1591 elif op == 'X':

1592 sofar = set()

1593 _extend_children(nxt, cur, get_handler)

1594 for item in nxt:

1595 if id(item) not in sofar:

1596 sofar.add(id(item))

1597 _extend_children(nxt, item, get_handler)

1598 nxt.insert(0, cur)

1599 # handle the rest of the t_path in recursive calls

1600 cur = []

1601 todo = TType()

1602 todo.__ops__ = (root,) + t_path[i+2:]

1603 for child in nxt:

1604 try:

1605 cur.append(_t_eval(child, todo, scope))

1606 except PathAccessError:

1607 pass

1608 break # we handled the rest in recursive call, break loop

1609 elif op == '(':

1610 args, kwargs = arg

1611 scope[Path] += t_path[2:i+2:2]

1612 cur = scope[glom](

1613 target, Call(cur, args, kwargs), scope)

1614 # call with target rather than cur,

1615 # because it is probably more intuitive

1616 # if args to the call "reset" their path

1617 # e.g. "T.a" should mean the same thing

1618 # in both of these specs: T.a and T.b(T.a)

1619 else: # arithmetic operators

1620 try:

1621 if op == '+':

1622 cur = cur + arg

1623 elif op == '-':

1624 cur = cur - arg

1625 elif op == '*':

1626 cur = cur * arg

1627 #elif op == '#':

1628 # cur = cur // arg # TODO: python 2 friendly approach?

1629 elif op == '/':

1630 cur = cur / arg

1631 elif op == '%':

1632 cur = cur % arg

1633 elif op == ':':

1634 cur = cur ** arg

1635 elif op == '&':

1636 cur = cur & arg

1637 elif op == '|':

1638 cur = cur | arg

1639 elif op == '^':

1640 cur = cur ^ arg

1641 elif op == '~':

1642 cur = ~cur

1643 elif op == '_':

1644 cur = -cur

1645 except (TypeError, ZeroDivisionError) as e:

1646 pae = PathAccessError(e, Path(_t), i // 2)

1647 if pae:

1648 raise pae

1649 i += 2

1650 if root is A:

1651 op, arg = t_path[-2:]

1652 if cur is scope:

1653 op = '[' # all assignment on scope is setitem

1654 _assign_op(dest=cur, op=op, arg=arg, val=target, path=_t, scope=scope)

1655 return target # A should not change the target

1656 return cur

1657

1658

1659def _assign_op(dest, op, arg, val, path, scope):

1660 """helper method for doing the assignment on a T operation"""

1661 if op == '[':

1662 dest[arg] = val

1663 elif op == '.':

1664 setattr(dest, arg, val)

1665 elif op == 'P':

1666 _assign = scope[TargetRegistry].get_handler('assign', dest)

1667 try:

1668 _assign(dest, arg, val)

1669 except Exception as e:

1670 raise PathAssignError(e, path, arg)

1671 else: # pragma: no cover

1672 raise ValueError('unsupported T operation for assignment')

1673

1674

1675def _extend_children(children, item, get_handler):

1676 try: # dict or obj-like

1677 keys = get_handler('keys', item)

1678 get = get_handler('get', item)

1679 except UnregisteredTarget:

1680 try:

1681 iterate = get_handler('iterate', item)

1682 except UnregisteredTarget:

1683 pass

1684 else:

1685 try: # list-like

1686 children.extend(iterate(item))

1687 except Exception:

1688 pass

1689 else:

1690 try:

1691 for key in keys(item):

1692 try:

1693 children.append(get(item, key))

1694 except Exception:

1695 pass

1696 except Exception:

1697 pass

1698

1699

1700T = TType() # target aka Mr. T aka "this"

1701S = TType() # like T, but means grab stuff from Scope, not Target

1702A = TType() # like S, but shorthand to assign target to scope

1703

1704T.__ops__ = (T,)

1705S.__ops__ = (S,)

1706A.__ops__ = (A,)

1707

1708_T_STAR = T.__star__() # helper constant for Path.from_text

1709_T_STARSTAR = T.__starstar__() # helper constant for Path.from_text

1710

1711UP = make_sentinel('UP')

1712ROOT = make_sentinel('ROOT')

1713

1714

1715def _format_slice(x):

1716 if type(x) is not slice:

1717 return bbrepr(x)

1718 fmt = lambda v: "" if v is None else bbrepr(v)

1719 if x.step is None:

1720 return fmt(x.start) + ":" + fmt(x.stop)

1721 return fmt(x.start) + ":" + fmt(x.stop) + ":" + fmt(x.step)

1722

1723

1724def _format_t(path, root=T):

1725 prepr = [{T: 'T', S: 'S', A: 'A'}[root]]

1726 i = 0

1727 while i < len(path):

1728 op, arg = path[i], path[i + 1]

1729 if op == '.':

1730 prepr.append('.' + arg)

1731 elif op == '[':

1732 if type(arg) is tuple:

1733 index = ", ".join([_format_slice(x) for x in arg])

1734 else:

1735 index = _format_slice(arg)

1736 prepr.append(f"[{index}]")

1737 elif op == '(':

1738 args, kwargs = arg

1739 prepr.append(format_invocation(args=args, kwargs=kwargs, repr=bbrepr))

1740 elif op == 'P':

1741 return _format_path(path)

1742 elif op == 'x':

1743 prepr.append(".__star__()")

1744 elif op == 'X':

1745 prepr.append(".__starstar__()")

1746 elif op in ('_', '~'): # unary arithmetic operators

1747 if any([o in path[:i] for o in '+-/%:&|^~_']):

1748 prepr = ['('] + prepr + [')']

1749 prepr = ['-' if op == '_' else op] + prepr

1750 else: # binary arithmetic operators

1751 formatted_arg = bbrepr(arg)

1752 if type(arg) is TType:

1753 arg_path = arg.__ops__

1754 if any([o in arg_path for o in '+-/%:&|^~_']):

1755 formatted_arg = '(' + formatted_arg + ')'

1756 prepr.append(' ' + ('**' if op == ':' else op) + ' ')

1757 prepr.append(formatted_arg)

1758 i += 2

1759 return "".join(prepr)

1760

1761

1762class Val:

1763 """Val objects are specs which evaluate to the wrapped *value*.

1764

1765 >>> target = {'a': {'b': 'c'}}

1766 >>> spec = {'a': 'a.b', 'readability': Val('counts')}

1767 >>> pprint(glom(target, spec))

1768 {'a': 'c', 'readability': 'counts'}

1769

1770 Instead of accessing ``'counts'`` as a key like it did with

1771 ``'a.b'``, :func:`~glom.glom` just unwrapped the Val and

1772 included the value.

1773

1774 :class:`~glom.Val` takes one argument, the value to be returned.

1775

1776 .. note::

1777

1778 :class:`Val` was named ``Literal`` in versions of glom before

1779 20.7.0. An alias has been preserved for backwards

1780 compatibility, but reprs have changed.

1781

1782 """

1783 def __init__(self, value):

1784 self.value = value

1785

1786 def glomit(self, target, scope):

1787 return self.value

1788

1789 def __repr__(self):

1790 cn = self.__class__.__name__

1791 return f'{cn}({bbrepr(self.value)})'

1792

1793

1794Literal = Val # backwards compat for pre-20.7.0

1795

1796

1797class ScopeVars:

1798 """This is the runtime partner of :class:`Vars` -- this is what

1799 actually lives in the scope and stores runtime values.

1800

1801 While not part of the importable API of glom, it's half expected

1802 that some folks may write sepcs to populate and export scopes, at

1803 which point this type makes it easy to access values by attribute

1804 access or by converting to a dict.

1805

1806 """

1807 def __init__(self, base, defaults):

1808 self.__dict__ = dict(base)

1809 self.__dict__.update(defaults)

1810

1811 def __iter__(self):

1812 return iter(self.__dict__.items())

1813

1814 def __repr__(self):

1815 return f"{self.__class__.__name__}({bbrepr(self.__dict__)})"

1816

1817

1818class Vars:

1819 """

1820 :class:`Vars` is a helper that can be used with **S** in order to

1821 store shared mutable state.

1822

1823 Takes the same arguments as :class:`dict()`.

1824

1825 Arguments here should be thought of the same way as default arguments

1826 to a function. Each time the spec is evaluated, the same arguments

1827 will be referenced; so, think carefully about mutable data structures.

1828 """

1829 def __init__(self, base=(), **kw):

1830 dict(base) # ensure it is a dict-compatible first arg

1831 self.base = base

1832 self.defaults = kw

1833

1834 def glomit(self, target, spec):

1835 return ScopeVars(self.base, self.defaults)

1836

1837 def __repr__(self):

1838 ret = format_invocation(self.__class__.__name__,

1839 args=(self.base,) if self.base else (),

1840 kwargs=self.defaults,

1841 repr=bbrepr)

1842 return ret

1843

1844

1845class Let:

1846 """

1847 Deprecated, kept for backwards compat. Use S(x='y') instead.

1848

1849 >>> target = {'data': {'val': 9}}

1850 >>> spec = (Let(value=T['data']['val']), {'val': S['value']})

1851 >>> glom(target, spec)

1852 {'val': 9}

1853

1854 """

1855 def __init__(self, **kw):

1856 if not kw:

1857 raise TypeError('expected at least one keyword argument')

1858 self._binding = kw

1859

1860 def glomit(self, target, scope):

1861 scope.update({

1862 k: scope[glom](target, v, scope) for k, v in self._binding.items()})

1863 return target

1864

1865 def __repr__(self):

1866 cn = self.__class__.__name__

1867 return format_invocation(cn, kwargs=self._binding, repr=bbrepr)

1868

1869

1870class Auto:

1871 """

1872 Switch to Auto mode (the default)

1873

1874 TODO: this seems like it should be a sub-class of class Spec() --

1875 if Spec() could help define the interface for new "modes" or dialects

1876 that would also help make match mode feel less duct-taped on

1877 """

1878 def __init__(self, spec=None):

1879 self.spec = spec

1880

1881 def glomit(self, target, scope):

1882 scope[MODE] = AUTO

1883 return scope[glom](target, self.spec, scope)

1884

1885 def __repr__(self):

1886 cn = self.__class__.__name__

1887 rpr = '' if self.spec is None else bbrepr(self.spec)

1888 return f'{cn}({rpr})'

1889

1890

1891class _AbstractIterable(_AbstractIterableBase):

1892 __metaclass__ = ABCMeta

1893 @classmethod

1894 def __subclasshook__(cls, C):

1895 if C in (str, bytes):

1896 return False

1897 return callable(getattr(C, "__iter__", None))

1898

1899

1900class _ObjStyleKeysMeta(type):

1901 def __instancecheck__(cls, C):

1902 return hasattr(C, "__dict__") and hasattr(C.__dict__, "keys")

1903

1904

1905class _ObjStyleKeys(_ObjStyleKeysMeta('_AbstractKeys', (object,), {})):

1906 __metaclass__ = _ObjStyleKeysMeta

1907

1908 @staticmethod

1909 def get_keys(obj):

1910 ret = obj.__dict__.keys()

1911 return ret

1912

1913

1914def _get_sequence_item(target, index):

1915 return target[int(index)]

1916

1917

1918# handlers are 3-arg callables, with args (spec, target, scope)

1919# spec is the first argument for convenience in the case

1920# that the handler is a method of the spec type

1921def _handle_dict(target, spec, scope):

1922 ret = type(spec)() # TODO: works for dict + ordereddict, but sufficient for all?

1923 for field, subspec in spec.items():

1924 val = scope[glom](target, subspec, scope)

1925 if val is SKIP:

1926 continue

1927 if type(field) in (Spec, TType):

1928 field = scope[glom](target, field, scope)

1929 ret[field] = val

1930 return ret

1931

1932

1933def _handle_list(target, spec, scope):

1934 subspec = spec[0]

1935 iterate = scope[TargetRegistry].get_handler('iterate', target, path=scope[Path])

1936 try:

1937 iterator = iterate(target)

1938 except Exception as e:

1939 raise TypeError('failed to iterate on instance of type %r at %r (got %r)'

1940 % (target.__class__.__name__, Path(*scope[Path]), e))

1941 ret = []

1942 base_path = scope[Path]

1943 for i, t in enumerate(iterator):

1944 scope[Path] = base_path + [i]

1945 val = scope[glom](t, subspec, scope)

1946 if val is SKIP:

1947 continue

1948 if val is STOP:

1949 break

1950 ret.append(val)

1951 return ret

1952

1953

1954def _handle_tuple(target, spec, scope):

1955 res = target

1956 for subspec in spec:

1957 scope = chain_child(scope)

1958 nxt = scope[glom](res, subspec, scope)

1959 if nxt is SKIP:

1960 continue

1961 if nxt is STOP:

1962 break

1963 res = nxt

1964 if not isinstance(subspec, list):

1965 scope[Path] += [getattr(subspec, '__name__', subspec)]

1966 return res

1967

1968

1969class Pipe:

1970 """Evaluate specs one after the other, passing the result of

1971 the previous evaluation in as the target of the next spec:

1972

1973 >>> glom({'a': {'b': -5}}, Pipe('a', 'b', abs))

1974 5

1975

1976 Same behavior as ``Auto(tuple(steps))``, but useful for explicit

1977 usage in other modes.

1978 """

1979 def __init__(self, *steps):

1980 self.steps = steps

1981

1982 def glomit(self, target, scope):

1983 return _handle_tuple(target, self.steps, scope)

1984

1985 def __repr__(self):

1986 return self.__class__.__name__ + bbrepr(self.steps)

1987

1988

1989class TargetRegistry:

1990 '''

1991 responsible for registration of target types for iteration

1992 and attribute walking

1993 '''

1994 def __init__(self, register_default_types=True):

1995 self._op_type_map = {}

1996 self._op_type_tree = {} # see _register_fuzzy_type for details

1997 self._type_cache = {}

1998

1999 self._op_auto_map = OrderedDict() # op name to function that returns handler function

2000

2001 self._register_builtin_ops()

2002

2003 if register_default_types:

2004 self._register_default_types()

2005 return

2006

2007 def get_handler(self, op, obj, path=None, raise_exc=True):

2008 """for an operation and object **instance**, obj, return the

2009 closest-matching handler function, raising UnregisteredTarget

2010 if no handler can be found for *obj* (or False if

2011 raise_exc=False)

2012

2013 """

2014 ret = False

2015 obj_type = type(obj)

2016 cache_key = (obj_type, op)

2017 if cache_key not in self._type_cache:

2018 type_map = self.get_type_map(op)

2019 if type_map:

2020 try:

2021 ret = type_map[obj_type]

2022 except KeyError:

2023 type_tree = self._op_type_tree.get(op, {})

2024 closest = self._get_closest_type(obj, type_tree=type_tree)

2025 if closest is None:

2026 ret = False

2027 else:

2028 ret = type_map[closest]

2029

2030 if ret is False and raise_exc:

2031 raise UnregisteredTarget(op, obj_type, type_map=type_map, path=path)

2032

2033 self._type_cache[cache_key] = ret

2034 return self._type_cache[cache_key]

2035

2036 def get_type_map(self, op):

2037 try:

2038 return self._op_type_map[op]

2039 except KeyError:

2040 return OrderedDict()

2041

2042 def _get_closest_type(self, obj, type_tree):

2043 default = None

2044 for cur_type, sub_tree in type_tree.items():

2045 if isinstance(obj, cur_type):

2046 sub_type = self._get_closest_type(obj, type_tree=sub_tree)

2047 ret = cur_type if sub_type is None else sub_type

2048 return ret

2049 return default

2050

2051 def _register_default_types(self):

2052 self.register(object)

2053 self.register(dict, get=operator.getitem)

2054 self.register(dict, keys=dict.keys)

2055 self.register(list, get=_get_sequence_item)

2056 self.register(tuple, get=_get_sequence_item)

2057 self.register(OrderedDict, get=operator.getitem)

2058 self.register(OrderedDict, keys=OrderedDict.keys)

2059 self.register(_AbstractIterable, iterate=iter)

2060 self.register(_ObjStyleKeys, keys=_ObjStyleKeys.get_keys)

2061

2062 def _register_fuzzy_type(self, op, new_type, _type_tree=None):

2063 """Build a "type tree", an OrderedDict mapping registered types to

2064 their subtypes

2065

2066 The type tree's invariant is that a key in the mapping is a

2067 valid parent type of all its children.

2068

2069 Order is preserved such that non-overlapping parts of the

2070 subtree take precedence by which was most recently added.

2071 """

2072 if _type_tree is None:

2073 try:

2074 _type_tree = self._op_type_tree[op]

2075 except KeyError:

2076 _type_tree = self._op_type_tree[op] = OrderedDict()

2077

2078 registered = False

2079 for cur_type, sub_tree in list(_type_tree.items()):

2080 if issubclass(cur_type, new_type):

2081 sub_tree = _type_tree.pop(cur_type) # mutation for recursion brevity

2082 try:

2083 _type_tree[new_type][cur_type] = sub_tree

2084 except KeyError:

2085 _type_tree[new_type] = OrderedDict({cur_type: sub_tree})

2086 registered = True

2087 elif issubclass(new_type, cur_type):

2088 _type_tree[cur_type] = self._register_fuzzy_type(op, new_type, _type_tree=sub_tree)

2089 registered = True

2090 if not registered:

2091 _type_tree[new_type] = OrderedDict()

2092 return _type_tree

2093

2094 def register(self, target_type, **kwargs):

2095 if not isinstance(target_type, type):

2096 raise TypeError(f'register expected a type, not an instance: {target_type!r}')

2097 exact = kwargs.pop('exact', None)

2098 new_op_map = dict(kwargs)

2099

2100 for op_name in sorted(set(self._op_auto_map.keys()) | set(new_op_map.keys())):

2101 cur_type_map = self._op_type_map.setdefault(op_name, OrderedDict())

2102

2103 if op_name in new_op_map:

2104 handler = new_op_map[op_name]

2105 elif target_type in cur_type_map:

2106 handler = cur_type_map[target_type]

2107 else:

2108 try:

2109 handler = self._op_auto_map[op_name](target_type)

2110 except Exception as e:

2111 raise TypeError('error while determining support for operation'

2112 ' "%s" on target type: %s (got %r)'

2113 % (op_name, target_type.__name__, e))

2114 if handler is not False and not callable(handler):

2115 raise TypeError('expected handler for op "%s" to be'

2116 ' callable or False, not: %r' % (op_name, handler))

2117 new_op_map[op_name] = handler

2118

2119 for op_name, handler in new_op_map.items():

2120 self._op_type_map[op_name][target_type] = handler

2121

2122 if not exact:

2123 for op_name in new_op_map:

2124 self._register_fuzzy_type(op_name, target_type)

2125

2126 self._type_cache = {} # reset type cache

2127

2128 return

2129

2130 def register_op(self, op_name, auto_func=None, exact=False):

2131 """add operations beyond the builtins ('get' and 'iterate' at the time

2132 of writing).

2133

2134 auto_func is a function that when passed a type, returns a

2135 handler associated with op_name if it's supported, or False if

2136 it's not.

2137

2138 See glom.core.register_op() for the global version used by

2139 extensions.

2140 """

2141 if not isinstance(op_name, basestring):

2142 raise TypeError(f'expected op_name to be a text name, not: {op_name!r}')

2143 if auto_func is None:

2144 auto_func = lambda t: False

2145 elif not callable(auto_func):

2146 raise TypeError(f'expected auto_func to be callable, not: {auto_func!r}')

2147

2148 # determine support for any previously known types

2149 known_types = set(sum([list(m.keys()) for m

2150 in self._op_type_map.values()], []))

2151 type_map = self._op_type_map.get(op_name, OrderedDict())

2152 type_tree = self._op_type_tree.get(op_name, OrderedDict())

2153 for t in sorted(known_types, key=lambda t: t.__name__):

2154 if t in type_map:

2155 continue

2156 try:

2157 handler = auto_func(t)

2158 except Exception as e:

2159 raise TypeError('error while determining support for operation'

2160 ' "%s" on target type: %s (got %r)'

2161 % (op_name, t.__name__, e))

2162 if handler is not False and not callable(handler):

2163 raise TypeError('expected handler for op "%s" to be'

2164 ' callable or False, not: %r' % (op_name, handler))

2165 type_map[t] = handler

2166

2167 if not exact:

2168 for t in known_types:

2169 self._register_fuzzy_type(op_name, t, _type_tree=type_tree)

2170

2171 self._op_type_map[op_name] = type_map

2172 self._op_type_tree[op_name] = type_tree

2173 self._op_auto_map[op_name] = auto_func

2174

2175 def _register_builtin_ops(self):

2176 def _get_iterable_handler(type_obj):

2177 return iter if callable(getattr(type_obj, '__iter__', None)) else False

2178

2179 self.register_op('iterate', _get_iterable_handler)

2180 self.register_op('get', lambda _: getattr)

2181

2182

2183_DEFAULT_SCOPE = ChainMap({})

2184

2185

2186def glom(target, spec, **kwargs):

2187 """Access or construct a value from a given *target* based on the

2188 specification declared by *spec*.

2189

2190 Accessing nested data, aka deep-get:

2191

2192 >>> target = {'a': {'b': 'c'}}

2193 >>> glom(target, 'a.b')

2194 'c'

2195

2196 Here the *spec* was just a string denoting a path,

2197 ``'a.b'``. As simple as it should be. You can also use

2198 :mod:`glob`-like wildcard selectors:

2199

2200 >>> target = {'a': [{'k': 'v1'}, {'k': 'v2'}]}

2201 >>> glom(target, 'a.*.k')

2202 ['v1', 'v2']

2203

2204 In addition to ``*``, you can also use ``**`` for recursive access:

2205

2206 >>> target = {'a': [{'k': 'v3'}, {'k': 'v4'}], 'k': 'v0'}

2207 >>> glom(target, '**.k')

2208 ['v0', 'v3', 'v4']

2209

2210 The next example shows how to use nested data to

2211 access many fields at once, and make a new nested structure.

2212

2213 Constructing, or restructuring more-complicated nested data:

2214

2215 >>> target = {'a': {'b': 'c', 'd': 'e'}, 'f': 'g', 'h': [0, 1, 2]}

2216 >>> spec = {'a': 'a.b', 'd': 'a.d', 'h': ('h', [lambda x: x * 2])}

2217 >>> output = glom(target, spec)

2218 >>> pprint(output)

2219 {'a': 'c', 'd': 'e', 'h': [0, 2, 4]}

2220

2221 ``glom`` also takes a keyword-argument, *default*. When set,

2222 if a ``glom`` operation fails with a :exc:`GlomError`, the

2223 *default* will be returned, very much like

2224 :meth:`dict.get()`:

2225

2226 >>> glom(target, 'a.xx', default='nada')

2227 'nada'

2228

2229 The *skip_exc* keyword argument controls which errors should

2230 be ignored.

2231

2232 >>> glom({}, lambda x: 100.0 / len(x), default=0.0, skip_exc=ZeroDivisionError)

2233 0.0

2234

2235 Args:

2236 target (object): the object on which the glom will operate.

2237 spec (object): Specification of the output object in the form

2238 of a dict, list, tuple, string, other glom construct, or

2239 any composition of these.

2240 default (object): An optional default to return in the case

2241 an exception, specified by *skip_exc*, is raised.

2242 skip_exc (Exception): An optional exception or tuple of

2243 exceptions to ignore and return *default* (None if

2244 omitted). If *skip_exc* and *default* are both not set,

2245 glom raises errors through.

2246 scope (dict): Additional data that can be accessed

2247 via S inside the glom-spec. Read more: :ref:`scope`.

2248

2249 It's a small API with big functionality, and glom's power is

2250 only surpassed by its intuitiveness. Give it a whirl!

2251

2252 """

2253 # TODO: check spec up front

2254 default = kwargs.pop('default', None if 'skip_exc' in kwargs else _MISSING)

2255 skip_exc = kwargs.pop('skip_exc', () if default is _MISSING else GlomError)

2256 glom_debug = kwargs.pop('glom_debug', GLOM_DEBUG)

2257 scope = _DEFAULT_SCOPE.new_child({

2258 Path: kwargs.pop('path', []),

2259 Inspect: kwargs.pop('inspector', None),

2260 MODE: AUTO,

2261 MIN_MODE: None,

2262 CHILD_ERRORS: [],

2263 'globals': ScopeVars({}, {}),

2264 })

2265 scope[UP] = scope

2266 scope[ROOT] = scope

2267 scope[T] = target

2268 scope.update(kwargs.pop('scope', {}))

2269 err = None

2270 if kwargs:

2271 raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys()))

2272 try:

2273 try:

2274 ret = _glom(target, spec, scope)

2275 except skip_exc:

2276 if default is _MISSING:

2277 raise

2278 ret = default # should this also be arg_val'd?

2279 except Exception as e:

2280 if glom_debug:

2281 raise

2282 if isinstance(e, GlomError):

2283 # need to change id or else py3 seems to not let us truncate the

2284 # stack trace with the explicit "raise err" below

2285 err = copy.copy(e)

2286 err._set_wrapped(e)

2287 else:

2288 err = GlomError.wrap(e)

2289 if isinstance(err, GlomError):

2290 err._finalize(scope[LAST_CHILD_SCOPE])

2291 else: # wrapping failed, fall back to default behavior

2292 raise

2293

2294 if err:

2295 raise err

2296 return ret

2297

2298

2299def chain_child(scope):

2300 """

2301 used for specs like Auto(tuple), Switch(), etc

2302 that want to chain their child scopes together

2303

2304 returns a new scope that can be passed to

2305 the next recursive glom call, e.g.

2306

2307 scope[glom](target, spec, chain_child(scope))

2308 """

2309 if LAST_CHILD_SCOPE not in scope.maps[0]:

2310 return scope # no children yet, nothing to do

2311 # NOTE: an option here is to drill down on LAST_CHILD_SCOPE;

2312 # this would have some interesting consequences for scoping

2313 # of tuples

2314 nxt_in_chain = scope[LAST_CHILD_SCOPE]

2315 nxt_in_chain.maps[0][NO_PYFRAME] = True

2316 # previous failed branches are forgiven as the

2317 # scope is re-wired into a new stack

2318 del nxt_in_chain.maps[0][CHILD_ERRORS][:]

2319 return nxt_in_chain

2320

2321

2322unbound_methods = {type(str.__len__)} #, type(Ref.glomit)])

2323

2324

2325def _has_callable_glomit(obj):

2326 glomit = getattr(obj, 'glomit', None)

2327 return callable(glomit) and not isinstance(obj, type)

2328

2329

2330def _glom(target, spec, scope):

2331 parent = scope

2332 pmap = parent.maps[0]

2333 scope = scope.new_child({

2334 T: target,

2335 Spec: spec,

2336 UP: parent,

2337 CHILD_ERRORS: [],

2338 MODE: pmap[MODE],

2339 MIN_MODE: pmap[MIN_MODE],

2340 })

2341 pmap[LAST_CHILD_SCOPE] = scope

2342

2343 try:

2344 if type(spec) is TType: # must go first, due to callability

2345 scope[MIN_MODE] = None # None is tombstone

2346 return _t_eval(target, spec, scope)

2347 elif _has_callable_glomit(spec):

2348 scope[MIN_MODE] = None

2349 return spec.glomit(target, scope)

2350

2351 return (scope.maps[0][MIN_MODE] or scope.maps[0][MODE])(target, spec, scope)

2352 except Exception as e:

2353 scope.maps[1][CHILD_ERRORS].append(scope)

2354 scope.maps[0][CUR_ERROR] = e

2355 if NO_PYFRAME in scope.maps[1]:

2356 cur_scope = scope[UP]

2357 while NO_PYFRAME in cur_scope.maps[0]:

2358 cur_scope.maps[1][CHILD_ERRORS].append(cur_scope)

2359 cur_scope.maps[0][CUR_ERROR] = e

2360 cur_scope = cur_scope[UP]

2361 raise

2362

2363

2364def AUTO(target, spec, scope):

2365 if type(spec) is str: # shortcut to make deep-get use case faster

2366 return _t_eval(target, Path.from_text(spec).path_t, scope)

2367 if isinstance(spec, dict):

2368 return _handle_dict(target, spec, scope)

2369 elif isinstance(spec, list):

2370 return _handle_list(target, spec, scope)

2371 elif isinstance(spec, tuple):

2372 return _handle_tuple(target, spec, scope)

2373 elif isinstance(spec, basestring):

2374 return Path.from_text(spec).glomit(target, scope)

2375 elif callable(spec):

2376 return spec(target)

2377

2378 raise TypeError('expected spec to be dict, list, tuple, callable, string,'

2379 ' or other Spec-like type, not: %r' % (spec,))

2380

2381

2382_DEFAULT_SCOPE.update({

2383 glom: _glom,

2384 TargetRegistry: TargetRegistry(register_default_types=True),

2385})

2386

2387

2388def register(target_type, **kwargs):

2389 """Register *target_type* so :meth:`~Glommer.glom()` will

2390 know how to handle instances of that type as targets.

2391

2392 Here's an example of adding basic iterabile support for Django's ORM:

2393

2394 .. code-block:: python

2395

2396 import glom

2397 import django.db.models

2398

2399 glom.register(django.db.models.Manager, iterate=lambda m: m.all())

2400 glom.register(django.db.models.QuerySet, iterate=lambda qs: qs.all())

2401

2402

2403

2404 Args:

2405 target_type (type): A type expected to appear in a glom()

2406 call target

2407 get (callable): A function which takes a target object and

2408 a name, acting as a default accessor. Defaults to

2409 :func:`getattr`.

2410 iterate (callable): A function which takes a target object

2411 and returns an iterator. Defaults to :func:`iter` if

2412 *target_type* appears to be iterable.

2413 exact (bool): Whether or not to match instances of subtypes

2414 of *target_type*.

2415

2416 .. note::

2417

2418 The module-level :func:`register()` function affects the

2419 module-level :func:`glom()` function's behavior. If this

2420 global effect is undesirable for your application, or

2421 you're implementing a library, consider instantiating a

2422 :class:`Glommer` instance, and using the

2423 :meth:`~Glommer.register()` and :meth:`Glommer.glom()`

2424 methods instead.

2425

2426 """

2427 _DEFAULT_SCOPE[TargetRegistry].register(target_type, **kwargs)

2428 return

2429

2430

2431def register_op(op_name, **kwargs):

2432 """For extension authors needing to add operations beyond the builtin

2433 'get', 'iterate', 'keys', 'assign', and 'delete' to the default scope.

2434 See TargetRegistry for more details.

2435 """

2436 _DEFAULT_SCOPE[TargetRegistry].register_op(op_name, **kwargs)

2437 return

2438

2439

2440class Glommer:

2441 """The :class:`Glommer` type mostly serves to encapsulate type

2442 registration context so that advanced uses of glom don't need to

2443 worry about stepping on each other.

2444

2445 Glommer objects are lightweight and, once instantiated, provide

2446 a :func:`glom()` method:

2447

2448 >>> glommer = Glommer()

2449 >>> glommer.glom({}, 'a.b.c', default='d')

2450 'd'

2451 >>> Glommer().glom({'vals': list(range(3))}, ('vals', len))

2452 3

2453

2454 Instances also provide :meth:`~Glommer.register()` method for

2455 localized control over type handling.

2456

2457 Args:

2458 register_default_types (bool): Whether or not to enable the

2459 handling behaviors of the default :func:`glom()`. These

2460 default actions include dict access, list and iterable

2461 iteration, and generic object attribute access. Defaults to

2462 True.

2463

2464 """

2465 def __init__(self, **kwargs):

2466 register_default_types = kwargs.pop('register_default_types', True)

2467 scope = kwargs.pop('scope', _DEFAULT_SCOPE)

2468

2469 # this "freezes" the scope in at the time of construction

2470 self.scope = ChainMap(dict(scope))

2471 self.scope[TargetRegistry] = TargetRegistry(register_default_types=register_default_types)

2472

2473 def register(self, target_type, **kwargs):

2474 """Register *target_type* so :meth:`~Glommer.glom()` will

2475 know how to handle instances of that type as targets.

2476

2477 Args:

2478 target_type (type): A type expected to appear in a glom()

2479 call target

2480 get (callable): A function which takes a target object and

2481 a name, acting as a default accessor. Defaults to

2482 :func:`getattr`.

2483 iterate (callable): A function which takes a target object

2484 and returns an iterator. Defaults to :func:`iter` if

2485 *target_type* appears to be iterable.

2486 exact (bool): Whether or not to match instances of subtypes

2487 of *target_type*.

2488

2489 .. note::

2490

2491 The module-level :func:`register()` function affects the

2492 module-level :func:`glom()` function's behavior. If this

2493 global effect is undesirable for your application, or

2494 you're implementing a library, consider instantiating a

2495 :class:`Glommer` instance, and using the

2496 :meth:`~Glommer.register()` and :meth:`Glommer.glom()`

2497 methods instead.

2498

2499 """

2500 exact = kwargs.pop('exact', False)

2501 self.scope[TargetRegistry].register(target_type, exact=exact, **kwargs)

2502 return

2503

2504 def glom(self, target, spec, **kwargs):

2505 return glom(target, spec, scope=self.scope, **kwargs)

2506

2507

2508class Fill:

2509 """A specifier type which switches to glom into "fill-mode". For the

2510 spec contained within the Fill, glom will only interpret explicit

2511 specifier types (including T objects). Whereas the default mode

2512 has special interpretations for each of these builtins, fill-mode

2513 takes a lighter touch, making Fill great for "filling out" Python

2514 literals, like tuples, dicts, sets, and lists.

2515

2516 >>> target = {'data': [0, 2, 4]}

2517 >>> spec = Fill((T['data'][2], T['data'][0]))

2518 >>> glom(target, spec)

2519 (4, 0)

2520

2521 As you can see, glom's usual built-in tuple item chaining behavior

2522 has switched into a simple tuple constructor.

2523

2524 (Sidenote for Lisp fans: Fill is like glom's quasi-quoting.)

2525

2526 """

2527 def __init__(self, spec=None):

2528 self.spec = spec

2529

2530 def glomit(self, target, scope):

2531 scope[MODE] = FILL

2532 return scope[glom](target, self.spec, scope)

2533

2534 def fill(self, target):

2535 return glom(target, self)

2536

2537 def __repr__(self):

2538 cn = self.__class__.__name__

2539 rpr = '' if self.spec is None else bbrepr(self.spec)

2540 return f'{cn}({rpr})'

2541

2542

2543def FILL(target, spec, scope):

2544 # TODO: register an operator or two for the following to allow

2545 # extension. This operator can probably be shared with the

2546 # upcoming traversal/remap feature.

2547 recurse = lambda val: scope[glom](target, val, scope)

2548 if type(spec) is dict:

2549 return {recurse(key): recurse(val) for key, val in spec.items()}

2550 if type(spec) in (list, tuple, set, frozenset):

2551 result = [recurse(val) for val in spec]

2552 if type(spec) is list:

2553 return result

2554 return type(spec)(result)

2555 if callable(spec):

2556 return spec(target)

2557 return spec

2558

2559class _ArgValuator:

2560 def __init__(self):

2561 self.cache = {}

2562

2563 def mode(self, target, spec, scope):

2564 """

2565 similar to FILL, but without function calling;

2566 useful for default, scope assignment, call/invoke, etc

2567 """

2568 recur = lambda val: scope[glom](target, val, scope)

2569 result = spec

2570 if type(spec) in (list, dict): # can contain themselves

2571 if id(spec) in self.cache:

2572 return self.cache[id(spec)]

2573 result = self.cache[id(spec)] = type(spec)()

2574 if type(spec) is dict:

2575 result.update({recur(key): recur(val) for key, val in spec.items()})

2576 else:

2577 result.extend([recur(val) for val in spec])

2578 if type(spec) in (tuple, set, frozenset): # cannot contain themselves

2579 result = type(spec)([recur(val) for val in spec])

2580 return result

2581

2582

2583def arg_val(target, arg, scope):

2584 """

2585 evaluate an argument to find its value

2586 (arg_val phonetically similar to "eval" -- evaluate as an arg)

2587 """

2588 mode = scope[MIN_MODE]

2589 scope[MIN_MODE] = _ArgValuator().mode

2590 result = scope[glom](target, arg, scope)

2591 scope[MIN_MODE] = mode

2592 return result