Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/glom/reduction.py: 66%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
2import operator
3import itertools
4from pprint import pprint
6from boltons.typeutils import make_sentinel
8from .core import T, glom, GlomError, format_invocation, bbrepr, UnregisteredTarget, MODE
9from .grouping import GROUP, target_iter, ACC_TREE, CUR_AGG
11_MISSING = make_sentinel('_MISSING')
14try:
15 basestring
16except NameError:
17 basestring = str
20class FoldError(GlomError):
21 """Error raised when Fold() is called on non-iterable
22 targets, and possibly other uses in the future."""
23 pass
26class Fold(object):
27 """The `Fold` specifier type is glom's building block for reducing
28 iterables in data, implementing the classic `fold
29 <https://en.wikipedia.org/wiki/Fold_(higher-order_function)>`_
30 from functional programming, similar to Python's built-in
31 :func:`reduce`.
33 Args:
34 subspec: A spec representing the target to fold, which must be
35 an iterable, or otherwise registered to 'iterate' (with
36 :func:`~glom.register`).
37 init (callable): A function or type which will be invoked to
38 initialize the accumulator value.
39 op (callable): A function to call on the accumulator value and
40 every value, the result of which will become the new
41 accumulator value. Defaults to :func:`operator.iadd`.
43 Usage is as follows:
45 >>> target = [set([1, 2]), set([3]), set([2, 4])]
46 >>> result = glom(target, Fold(T, init=frozenset, op=frozenset.union))
47 >>> result == frozenset([1, 2, 3, 4])
48 True
50 Note the required ``spec`` and ``init`` arguments. ``op`` is
51 optional, but here must be used because the :class:`set` and
52 :class:`frozenset` types do not work with addition.
54 While :class:`~glom.Fold` is powerful, :class:`~glom.Flatten` and
55 :class:`~glom.Sum` are subtypes with more convenient defaults for
56 day-to-day use.
57 """
58 def __init__(self, subspec, init, op=operator.iadd):
59 self.subspec = subspec
60 self.init = init
61 self.op = op
62 if not callable(op):
63 raise TypeError('expected callable for %s op param, not: %r' %
64 (self.__class__.__name__, op))
65 if not callable(init):
66 raise TypeError('expected callable for %s init param, not: %r' %
67 (self.__class__.__name__, init))
69 def glomit(self, target, scope):
70 is_agg = False
71 if scope[MODE] is GROUP and scope.get(CUR_AGG) is None:
72 scope[CUR_AGG] = self
73 is_agg = True
75 if self.subspec is not T:
76 target = scope[glom](target, self.subspec, scope)
78 if is_agg:
79 return self._agg(target, scope[ACC_TREE])
80 try:
81 return self._fold(target_iter(target, scope))
82 except UnregisteredTarget as ut:
83 raise FoldError('can only %s on iterable targets, not %s type (%s)'
84 % (self.__class__.__name__, type(target).__name__, ut))
86 def _fold(self, iterator):
87 ret, op = self.init(), self.op
89 for v in iterator:
90 ret = op(ret, v)
92 return ret
94 def _agg(self, target, tree):
95 if self not in tree:
96 tree[self] = self.init()
97 tree[self] = self.op(tree[self], target)
98 return tree[self]
100 def __repr__(self):
101 cn = self.__class__.__name__
102 kwargs = {'init': self.init}
103 if self.op is not operator.iadd:
104 kwargs['op'] = self.op
105 return format_invocation(cn, (self.subspec,), kwargs, repr=bbrepr)
108class Sum(Fold):
109 """The `Sum` specifier type is used to aggregate integers and other
110 numericals using addition, much like the :func:`sum()` builtin.
112 >>> glom(range(5), Sum())
113 10
115 Note that this specifier takes a callable *init* parameter like
116 its friends, so to change the start value, be sure to wrap it in a
117 callable::
119 >>> glom(range(5), Sum(init=lambda: 5.0))
120 15.0
122 To "sum" lists and other iterables, see the :class:`Flatten`
123 spec. For other objects, see the :class:`Fold` specifier type.
125 """
126 def __init__(self, subspec=T, init=int):
127 super(Sum, self).__init__(subspec=subspec, init=init, op=operator.iadd)
129 def __repr__(self):
130 cn = self.__class__.__name__
131 args = () if self.subspec is T else (self.subspec,)
132 kwargs = {'init': self.init} if self.init is not int else {}
133 return format_invocation(cn, args, kwargs, repr=bbrepr)
136class Count(Fold):
137 """
138 takes a count of how many values occurred
140 >>> glom([1, 2, 3], Count())
141 3
142 """
143 __slots__ = ()
145 def __init__(self):
146 super(Count, self).__init__(
147 subspec=T, init=int, op=lambda cur, val: cur + 1)
149 def __repr__(self):
150 return '%s()' % self.__class__.__name__
153class Flatten(Fold):
154 """The `Flatten` specifier type is used to combine iterables. By
155 default it flattens an iterable of iterables into a single list
156 containing items from all iterables.
158 >>> target = [[1], [2, 3]]
159 >>> glom(target, Flatten())
160 [1, 2, 3]
162 You can also set *init* to ``"lazy"``, which returns a generator
163 instead of a list. Use this to avoid making extra lists and other
164 collections during intermediate processing steps.
165 """
166 def __init__(self, subspec=T, init=list):
167 if init == 'lazy':
168 self.lazy = True
169 init = list
170 else:
171 self.lazy = False
172 super(Flatten, self).__init__(subspec=subspec, init=init, op=operator.iadd)
174 def _fold(self, iterator):
175 if self.lazy:
176 return itertools.chain.from_iterable(iterator)
177 return super(Flatten, self)._fold(iterator)
179 def __repr__(self):
180 cn = self.__class__.__name__
181 args = () if self.subspec is T else (self.subspec,)
182 kwargs = {}
183 if self.lazy:
184 kwargs['init'] = 'lazy'
185 elif self.init is not list:
186 kwargs['init'] = self.init
187 return format_invocation(cn, args, kwargs, repr=bbrepr)
190def flatten(target, **kwargs):
191 """At its most basic, ``flatten()`` turns an iterable of iterables
192 into a single list. But it has a few arguments which give it more
193 power:
195 Args:
197 init (callable): A function or type which gives the initial
198 value of the return. The value must support addition. Common
199 values might be :class:`list` (the default), :class:`tuple`,
200 or even :class:`int`. You can also pass ``init="lazy"`` to
201 get a generator.
202 levels (int): A positive integer representing the number of
203 nested levels to flatten. Defaults to 1.
204 spec: The glomspec to fetch before flattening. This defaults to the
205 the root level of the object.
207 Usage is straightforward.
209 >>> target = [[1, 2], [3], [4]]
210 >>> flatten(target)
211 [1, 2, 3, 4]
213 Because integers themselves support addition, we actually have two
214 levels of flattening possible, to get back a single integer sum:
216 >>> flatten(target, init=int, levels=2)
217 10
219 However, flattening a non-iterable like an integer will raise an
220 exception:
222 >>> target = 10
223 >>> flatten(target)
224 Traceback (most recent call last):
225 ...
226 FoldError: can only Flatten on iterable targets, not int type (...)
228 By default, ``flatten()`` will add a mix of iterables together,
229 making it a more-robust alternative to the built-in
230 ``sum(list_of_lists, list())`` trick most experienced Python
231 programmers are familiar with using:
233 >>> list_of_iterables = [range(2), [2, 3], (4, 5)]
234 >>> sum(list_of_iterables, [])
235 Traceback (most recent call last):
236 ...
237 TypeError: can only concatenate list (not "tuple") to list
239 Whereas flatten() handles this just fine:
241 >>> flatten(list_of_iterables)
242 [0, 1, 2, 3, 4, 5]
244 The ``flatten()`` function is a convenient wrapper around the
245 :class:`Flatten` specifier type. For embedding in larger specs,
246 and more involved flattening, see :class:`Flatten` and its base,
247 :class:`Fold`.
249 """
250 subspec = kwargs.pop('spec', T)
251 init = kwargs.pop('init', list)
252 levels = kwargs.pop('levels', 1)
253 if kwargs:
254 raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys()))
256 if levels == 0:
257 return target
258 if levels < 0:
259 raise ValueError('expected levels >= 0, not %r' % levels)
260 spec = (subspec,)
261 spec += (Flatten(init="lazy"),) * (levels - 1)
262 spec += (Flatten(init=init),)
264 return glom(target, spec)
267class Merge(Fold):
268 """By default, Merge turns an iterable of mappings into a single,
269 merged :class:`dict`, leveraging the behavior of the
270 :meth:`~dict.update` method. The start state can be customized
271 with *init*, as well as the update operation, with *op*.
273 Args:
274 subspec: The location of the iterable of mappings. Defaults to ``T``.
275 init (callable): A type or callable which returns a base
276 instance into which all other values will be merged.
277 op (callable): A callable, which takes two arguments, and
278 performs a merge of the second into the first. Can also be
279 the string name of a method to fetch on the instance created
280 from *init*. Defaults to ``"update"``.
282 .. note::
284 Besides the differing defaults, the primary difference between
285 :class:`Merge` and other :class:`Fold` subtypes is that its
286 *op* argument is assumed to be a two-argument function which
287 has no return value and modifies the left parameter
288 in-place. Because the initial state is a new object created with
289 the *init* parameter, none of the target values are modified.
291 """
292 def __init__(self, subspec=T, init=dict, op=None):
293 if op is None:
294 op = 'update'
295 if isinstance(op, basestring):
296 test_init = init()
297 op = getattr(type(test_init), op, None)
298 if not callable(op):
299 raise ValueError('expected callable "op" arg or an "init" with an .update()'
300 ' method not %r and %r' % (op, init))
301 super(Merge, self).__init__(subspec=subspec, init=init, op=op)
303 def _fold(self, iterator):
304 # the difference here is that ret is mutated in-place, the
305 # variable not being reassigned, as in base Fold.
306 ret, op = self.init(), self.op
308 for v in iterator:
309 op(ret, v)
311 return ret
314 def _agg(self, target, tree):
315 if self not in tree:
316 acc = tree[self] = self.init()
317 else:
318 acc = tree[self]
319 self.op(acc, target)
320 return acc
323def merge(target, **kwargs):
324 """By default, ``merge()`` turns an iterable of mappings into a
325 single, merged :class:`dict`, leveraging the behavior of the
326 :meth:`~dict.update` method. A new mapping is created and none of
327 the passed mappings are modified.
329 >>> target = [{'a': 'alpha'}, {'b': 'B'}, {'a': 'A'}]
330 >>> res = merge(target)
331 >>> pprint(res)
332 {'a': 'A', 'b': 'B'}
334 Args:
335 target: The list of dicts, or some other iterable of mappings.
337 The start state can be customized with the *init* keyword
338 argument, as well as the update operation, with the *op* keyword
339 argument. For more on those customizations, see the :class:`Merge`
340 spec.
342 """
343 subspec = kwargs.pop('spec', T)
344 init = kwargs.pop('init', dict)
345 op = kwargs.pop('op', None)
346 if kwargs:
347 raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys()))
348 spec = Merge(subspec, init, op)
349 return glom(target, spec)