Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/glom/reduction.py: 66%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import operator
2import itertools
3from pprint import pprint
5from boltons.typeutils import make_sentinel
7from .core import T, glom, GlomError, format_invocation, bbrepr, UnregisteredTarget, MODE
8from .grouping import GROUP, target_iter, ACC_TREE, CUR_AGG
10_MISSING = make_sentinel('_MISSING')
13try:
14 basestring
15except NameError:
16 basestring = str
19class FoldError(GlomError):
20 """Error raised when Fold() is called on non-iterable
21 targets, and possibly other uses in the future."""
22 pass
25class Fold:
26 """The `Fold` specifier type is glom's building block for reducing
27 iterables in data, implementing the classic `fold
28 <https://en.wikipedia.org/wiki/Fold_(higher-order_function)>`_
29 from functional programming, similar to Python's built-in
30 :func:`reduce`.
32 Args:
33 subspec: A spec representing the target to fold, which must be
34 an iterable, or otherwise registered to 'iterate' (with
35 :func:`~glom.register`).
36 init (callable): A function or type which will be invoked to
37 initialize the accumulator value.
38 op (callable): A function to call on the accumulator value and
39 every value, the result of which will become the new
40 accumulator value. Defaults to :func:`operator.iadd`.
42 Usage is as follows:
44 >>> target = [set([1, 2]), set([3]), set([2, 4])]
45 >>> result = glom(target, Fold(T, init=frozenset, op=frozenset.union))
46 >>> result == frozenset([1, 2, 3, 4])
47 True
49 Note the required ``spec`` and ``init`` arguments. ``op`` is
50 optional, but here must be used because the :class:`set` and
51 :class:`frozenset` types do not work with addition.
53 While :class:`~glom.Fold` is powerful, :class:`~glom.Flatten` and
54 :class:`~glom.Sum` are subtypes with more convenient defaults for
55 day-to-day use.
56 """
57 def __init__(self, subspec, init, op=operator.iadd):
58 self.subspec = subspec
59 self.init = init
60 self.op = op
61 if not callable(op):
62 raise TypeError('expected callable for %s op param, not: %r' %
63 (self.__class__.__name__, op))
64 if not callable(init):
65 raise TypeError('expected callable for %s init param, not: %r' %
66 (self.__class__.__name__, init))
68 def glomit(self, target, scope):
69 is_agg = False
70 if scope[MODE] is GROUP and scope.get(CUR_AGG) is None:
71 scope[CUR_AGG] = self
72 is_agg = True
74 if self.subspec is not T:
75 target = scope[glom](target, self.subspec, scope)
77 if is_agg:
78 return self._agg(target, scope[ACC_TREE])
79 try:
80 return self._fold(target_iter(target, scope))
81 except UnregisteredTarget as ut:
82 raise FoldError('can only %s on iterable targets, not %s type (%s)'
83 % (self.__class__.__name__, type(target).__name__, ut))
85 def _fold(self, iterator):
86 ret, op = self.init(), self.op
88 for v in iterator:
89 ret = op(ret, v)
91 return ret
93 def _agg(self, target, tree):
94 if self not in tree:
95 tree[self] = self.init()
96 tree[self] = self.op(tree[self], target)
97 return tree[self]
99 def __repr__(self):
100 cn = self.__class__.__name__
101 kwargs = {'init': self.init}
102 if self.op is not operator.iadd:
103 kwargs['op'] = self.op
104 return format_invocation(cn, (self.subspec,), kwargs, repr=bbrepr)
107class Sum(Fold):
108 """The `Sum` specifier type is used to aggregate integers and other
109 numericals using addition, much like the :func:`sum()` builtin.
111 >>> glom(range(5), Sum())
112 10
114 Note that this specifier takes a callable *init* parameter like
115 its friends, so to change the start value, be sure to wrap it in a
116 callable::
118 >>> glom(range(5), Sum(init=lambda: 5.0))
119 15.0
121 To "sum" lists and other iterables, see the :class:`Flatten`
122 spec. For other objects, see the :class:`Fold` specifier type.
124 """
125 def __init__(self, subspec=T, init=int):
126 super().__init__(subspec=subspec, init=init, op=operator.iadd)
128 def __repr__(self):
129 cn = self.__class__.__name__
130 args = () if self.subspec is T else (self.subspec,)
131 kwargs = {'init': self.init} if self.init is not int else {}
132 return format_invocation(cn, args, kwargs, repr=bbrepr)
135class Count(Fold):
136 """
137 takes a count of how many values occurred
139 >>> glom([1, 2, 3], Count())
140 3
141 """
142 __slots__ = ()
144 def __init__(self):
145 super().__init__(
146 subspec=T, init=int, op=lambda cur, val: cur + 1)
148 def __repr__(self):
149 return '%s()' % self.__class__.__name__
152class Flatten(Fold):
153 """The `Flatten` specifier type is used to combine iterables. By
154 default it flattens an iterable of iterables into a single list
155 containing items from all iterables.
157 >>> target = [[1], [2, 3]]
158 >>> glom(target, Flatten())
159 [1, 2, 3]
161 You can also set *init* to ``"lazy"``, which returns a generator
162 instead of a list. Use this to avoid making extra lists and other
163 collections during intermediate processing steps.
164 """
165 def __init__(self, subspec=T, init=list):
166 if init == 'lazy':
167 self.lazy = True
168 init = list
169 else:
170 self.lazy = False
171 super().__init__(subspec=subspec, init=init, op=operator.iadd)
173 def _fold(self, iterator):
174 if self.lazy:
175 return itertools.chain.from_iterable(iterator)
176 return super()._fold(iterator)
178 def __repr__(self):
179 cn = self.__class__.__name__
180 args = () if self.subspec is T else (self.subspec,)
181 kwargs = {}
182 if self.lazy:
183 kwargs['init'] = 'lazy'
184 elif self.init is not list:
185 kwargs['init'] = self.init
186 return format_invocation(cn, args, kwargs, repr=bbrepr)
189def flatten(target, **kwargs):
190 """At its most basic, ``flatten()`` turns an iterable of iterables
191 into a single list. But it has a few arguments which give it more
192 power:
194 Args:
196 init (callable): A function or type which gives the initial
197 value of the return. The value must support addition. Common
198 values might be :class:`list` (the default), :class:`tuple`,
199 or even :class:`int`. You can also pass ``init="lazy"`` to
200 get a generator.
201 levels (int): A positive integer representing the number of
202 nested levels to flatten. Defaults to 1.
203 spec: The glomspec to fetch before flattening. This defaults to the
204 the root level of the object.
206 Usage is straightforward.
208 >>> target = [[1, 2], [3], [4]]
209 >>> flatten(target)
210 [1, 2, 3, 4]
212 Because integers themselves support addition, we actually have two
213 levels of flattening possible, to get back a single integer sum:
215 >>> flatten(target, init=int, levels=2)
216 10
218 However, flattening a non-iterable like an integer will raise an
219 exception:
221 >>> target = 10
222 >>> flatten(target)
223 Traceback (most recent call last):
224 ...
225 FoldError: can only Flatten on iterable targets, not int type (...)
227 By default, ``flatten()`` will add a mix of iterables together,
228 making it a more-robust alternative to the built-in
229 ``sum(list_of_lists, list())`` trick most experienced Python
230 programmers are familiar with using:
232 >>> list_of_iterables = [range(2), [2, 3], (4, 5)]
233 >>> sum(list_of_iterables, [])
234 Traceback (most recent call last):
235 ...
236 TypeError: can only concatenate list (not "tuple") to list
238 Whereas flatten() handles this just fine:
240 >>> flatten(list_of_iterables)
241 [0, 1, 2, 3, 4, 5]
243 The ``flatten()`` function is a convenient wrapper around the
244 :class:`Flatten` specifier type. For embedding in larger specs,
245 and more involved flattening, see :class:`Flatten` and its base,
246 :class:`Fold`.
248 """
249 subspec = kwargs.pop('spec', T)
250 init = kwargs.pop('init', list)
251 levels = kwargs.pop('levels', 1)
252 if kwargs:
253 raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys()))
255 if levels == 0:
256 return target
257 if levels < 0:
258 raise ValueError('expected levels >= 0, not %r' % levels)
259 spec = (subspec,)
260 spec += (Flatten(init="lazy"),) * (levels - 1)
261 spec += (Flatten(init=init),)
263 return glom(target, spec)
266class Merge(Fold):
267 """By default, Merge turns an iterable of mappings into a single,
268 merged :class:`dict`, leveraging the behavior of the
269 :meth:`~dict.update` method. The start state can be customized
270 with *init*, as well as the update operation, with *op*.
272 Args:
273 subspec: The location of the iterable of mappings. Defaults to ``T``.
274 init (callable): A type or callable which returns a base
275 instance into which all other values will be merged.
276 op (callable): A callable, which takes two arguments, and
277 performs a merge of the second into the first. Can also be
278 the string name of a method to fetch on the instance created
279 from *init*. Defaults to ``"update"``.
281 .. note::
283 Besides the differing defaults, the primary difference between
284 :class:`Merge` and other :class:`Fold` subtypes is that its
285 *op* argument is assumed to be a two-argument function which
286 has no return value and modifies the left parameter
287 in-place. Because the initial state is a new object created with
288 the *init* parameter, none of the target values are modified.
290 """
291 def __init__(self, subspec=T, init=dict, op=None):
292 if op is None:
293 op = 'update'
294 if isinstance(op, basestring):
295 test_init = init()
296 op = getattr(type(test_init), op, None)
297 if not callable(op):
298 raise ValueError('expected callable "op" arg or an "init" with an .update()'
299 ' method not %r and %r' % (op, init))
300 super().__init__(subspec=subspec, init=init, op=op)
302 def _fold(self, iterator):
303 # the difference here is that ret is mutated in-place, the
304 # variable not being reassigned, as in base Fold.
305 ret, op = self.init(), self.op
307 for v in iterator:
308 op(ret, v)
310 return ret
313 def _agg(self, target, tree):
314 if self not in tree:
315 acc = tree[self] = self.init()
316 else:
317 acc = tree[self]
318 self.op(acc, target)
319 return acc
322def merge(target, **kwargs):
323 """By default, ``merge()`` turns an iterable of mappings into a
324 single, merged :class:`dict`, leveraging the behavior of the
325 :meth:`~dict.update` method. A new mapping is created and none of
326 the passed mappings are modified.
328 >>> target = [{'a': 'alpha'}, {'b': 'B'}, {'a': 'A'}]
329 >>> res = merge(target)
330 >>> pprint(res)
331 {'a': 'A', 'b': 'B'}
333 Args:
334 target: The list of dicts, or some other iterable of mappings.
336 The start state can be customized with the *init* keyword
337 argument, as well as the update operation, with the *op* keyword
338 argument. For more on those customizations, see the :class:`Merge`
339 spec.
341 """
342 subspec = kwargs.pop('spec', T)
343 init = kwargs.pop('init', dict)
344 op = kwargs.pop('op', None)
345 if kwargs:
346 raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys()))
347 spec = Merge(subspec, init, op)
348 return glom(target, spec)