Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/glom/grouping.py: 28%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Group mode
3"""
4from __future__ import division
6import random
8from boltons.typeutils import make_sentinel
10from .core import glom, MODE, SKIP, STOP, TargetRegistry, Path, T, BadSpec, _MISSING
13ACC_TREE = make_sentinel('ACC_TREE')
14ACC_TREE.__doc__ = """
15tree of accumulators for aggregation;
16structure roughly corresponds to the result,
17but is not 1:1; instead the main purpose is to ensure
18data is kept until the Group() finishes executing
19"""
21CUR_AGG = make_sentinel('CUR_AGG')
22CUR_AGG.__doc__ = """
23the spec which is currently performing aggregation --
24useful for specs that want to work in either "aggregate"
25mode, or "spec" mode depending on if they are in Group mode
26or not; this sentinel in the Scope allows a spec to decide
27if it is "closest" to the Group and so should behave
28like an aggregate, or if it is further away and so should
29have normal spec behavior.
30"""
33def target_iter(target, scope):
34 iterate = scope[TargetRegistry].get_handler('iterate', target, path=scope[Path])
36 try:
37 iterator = iterate(target)
38 except Exception as e:
39 raise TypeError('failed to iterate on instance of type %r at %r (got %r)'
40 % (target.__class__.__name__, Path(*scope[Path]), e))
41 return iterator
44class Group(object):
45 """supports nesting grouping operations --
46 think of a glom-style recursive boltons.iterutils.bucketize
48 the "branches" of a Group spec are dicts;
49 the leaves are lists, or an Aggregation object
50 an Aggregation object is any object that defines the
51 method agg(target, accumulator)
53 For example, here we get a map of even and odd counts::
55 >>> glom(range(10), Group({T % 2: T}))
56 {0: 8, 1: 9}
58 And here we create a `"bucketized"
59 <https://boltons.readthedocs.io/en/latest/iterutils.html#boltons.iterutils.bucketize>`_
60 map of even and odd numbers::
62 >>> glom(range(10), Group({T % 2: [T]}))
63 {0: [0, 2, 4, 6, 8], 1: [1, 3, 5, 7, 9]}
65 target is the current target, accumulator is a dict
66 maintained by Group mode
68 unlike Iter(), Group() converts an iterable target
69 into a single result; Iter() converts an iterable
70 target into an iterable result
72 """
73 def __init__(self, spec):
74 self.spec = spec
76 def glomit(self, target, scope):
77 scope[MODE] = GROUP
78 scope[CUR_AGG] = None # reset aggregation tripwire for sub-specs
79 scope[ACC_TREE] = {}
81 # handle the basecase where the spec stops immediately
82 # TODO: something smarter
83 if type(self.spec) in (dict, list):
84 ret = type(self.spec)()
85 else:
86 ret = None
88 for t in target_iter(target, scope):
89 last, ret = ret, scope[glom](t, self.spec, scope)
90 if ret is STOP:
91 return last
92 return ret
94 def __repr__(self):
95 cn = self.__class__.__name__
96 return '%s(%r)' % (cn, self.spec)
99def GROUP(target, spec, scope):
100 """
101 Group mode dispatcher; also sentinel for current mode = group
102 """
103 recurse = lambda spec: scope[glom](target, spec, scope)
104 tree = scope[ACC_TREE] # current accumulator support structure
105 if callable(getattr(spec, "agg", None)):
106 return spec.agg(target, tree)
107 elif callable(spec):
108 return spec(target)
109 _spec_type = type(spec)
110 if _spec_type not in (dict, list):
111 raise BadSpec("Group mode expected dict, list, callable, or"
112 " aggregator, not: %r" % (spec,))
113 _spec_id = id(spec)
114 try:
115 acc = tree[_spec_id] # current accumulator
116 except KeyError:
117 acc = tree[_spec_id] = _spec_type()
118 if _spec_type is dict:
119 done = True
120 for keyspec, valspec in spec.items():
121 if tree.get(keyspec, None) is STOP:
122 continue
123 key = recurse(keyspec)
124 if key is SKIP:
125 done = False # SKIP means we still want more vals
126 continue
127 if key is STOP:
128 tree[keyspec] = STOP
129 continue
130 if key not in acc:
131 # TODO: guard against key == id(spec)
132 tree[key] = {}
133 scope[ACC_TREE] = tree[key]
134 result = recurse(valspec)
135 if result is STOP:
136 tree[keyspec] = STOP
137 continue
138 done = False # SKIP or returning a value means we still want more vals
139 if result is not SKIP:
140 acc[key] = result
141 if done:
142 return STOP
143 return acc
144 elif _spec_type is list:
145 for valspec in spec:
146 if type(valspec) is dict:
147 # doesn't make sense due to arity mismatch. did you mean [Auto({...})] ?
148 raise BadSpec('dicts within lists are not'
149 ' allowed while in Group mode: %r' % spec)
150 result = recurse(valspec)
151 if result is STOP:
152 return STOP
153 if result is not SKIP:
154 acc.append(result)
155 return acc
156 raise ValueError("{} not a valid spec type for Group mode".format(_spec_type)) # pragma: no cover
159class First(object):
160 """
161 holds onto the first value
163 >>> glom([1, 2, 3], Group(First()))
164 1
165 """
166 __slots__ = ()
168 def agg(self, target, tree):
169 if self not in tree:
170 tree[self] = STOP
171 return target
172 return STOP
174 def __repr__(self):
175 return '%s()' % self.__class__.__name__
178class Avg(object):
179 """
180 takes the numerical average of all values;
181 raises exception on non-numeric value
183 >>> glom([1, 2, 3], Group(Avg()))
184 2.0
185 """
186 __slots__ = ()
188 def agg(self, target, tree):
189 try:
190 avg_acc = tree[self]
191 except KeyError:
192 # format is [sum, count]
193 avg_acc = tree[self] = [0.0, 0]
194 avg_acc[0] += target
195 avg_acc[1] += 1
196 return avg_acc[0] / avg_acc[1]
198 def __repr__(self):
199 return '%s()' % self.__class__.__name__
202class Max(object):
203 """
204 takes the maximum of all values;
205 raises exception on values that are not comparable
207 >>> glom([1, 2, 3], Group(Max()))
208 3
209 """
210 __slots__ = ()
212 def agg(self, target, tree):
213 if self not in tree or target > tree[self]:
214 tree[self] = target
215 return tree[self]
217 def __repr__(self):
218 return '%s()' % self.__class__.__name__
221class Min(object):
222 """
223 takes the minimum of all values;
224 raises exception on values that are not comparable
226 >>> glom([1, 2, 3], Group(Min()))
227 1
228 """
229 __slots__ = ()
231 def agg(self, target, tree):
232 if self not in tree or target < tree[self]:
233 tree[self] = target
234 return tree[self]
236 def __repr__(self):
237 return '%s()' % self.__class__.__name__
240class Sample(object):
241 """takes a random sample of the values
243 >>> glom([1, 2, 3], Group(Sample(2))) # doctest: +SKIP
244 [1, 3]
245 >>> glom(range(5000), Group(Sample(2))) # doctest: +SKIP
246 [272, 2901]
248 The advantage of this over :func:`random.sample` is that this can
249 take an arbitrarily-sized, potentially-very-long streaming input
250 and returns a fixed-size output. Note that this does not stream
251 results out, so your streaming input must have finite length.
252 """
253 __slots__ = ('size',)
255 def __init__(self, size):
256 self.size = size
258 def agg(self, target, tree):
259 # simple reservoir sampling scheme
260 # https://en.wikipedia.org/wiki/Reservoir_sampling#Simple_algorithm
261 if self not in tree:
262 tree[self] = [0, []]
263 num_seen, sample = tree[self]
264 if len(sample) < self.size:
265 sample.append(target)
266 else:
267 pos = random.randint(0, num_seen)
268 if pos < self.size:
269 sample[pos] = target
270 tree[self][0] += 1
271 return sample
273 def __repr__(self):
274 return '%s(%r)' % (self.__class__.__name__, self.size)
278class Limit(object):
279 """
280 Limits the number of values passed to sub-accumulator
282 >>> glom([1, 2, 3], Group(Limit(2)))
283 [1, 2]
285 To override the default untransformed list output, set the subspec kwarg:
287 >>> glom(range(10), Group(Limit(3, subspec={(lambda x: x % 2): [T]})))
288 {0: [0, 2], 1: [1]}
290 You can even nest Limits in other ``Group`` specs:
292 >>> glom(range(10), Group(Limit(5, {(lambda x: x % 2): Limit(2)})))
293 {0: [0, 2], 1: [1, 3]}
295 """
296 __slots__ = ('n', 'subspec')
298 def __init__(self, n, subspec=_MISSING):
299 if subspec is _MISSING:
300 subspec = [T]
301 self.n = n
302 self.subspec = subspec
304 def glomit(self, target, scope):
305 if scope[MODE] is not GROUP:
306 raise BadSpec("Limit() only valid in Group mode")
307 tree = scope[ACC_TREE] # current accumulator support structure
308 if self not in tree:
309 tree[self] = [0, {}]
310 scope[ACC_TREE] = tree[self][1]
311 tree[self][0] += 1
312 if tree[self][0] > self.n:
313 return STOP
314 return scope[glom](target, self.subspec, scope)
316 def __repr__(self):
317 return '%s(%r, %r)' % (self.__class__.__name__, self.n, self.subspec)