Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/glom/grouping.py: 28%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Group mode
3"""
5import random
7from boltons.typeutils import make_sentinel
9from .core import glom, MODE, SKIP, STOP, TargetRegistry, Path, T, BadSpec, _MISSING
12ACC_TREE = make_sentinel('ACC_TREE')
13ACC_TREE.__doc__ = """
14tree of accumulators for aggregation;
15structure roughly corresponds to the result,
16but is not 1:1; instead the main purpose is to ensure
17data is kept until the Group() finishes executing
18"""
20CUR_AGG = make_sentinel('CUR_AGG')
21CUR_AGG.__doc__ = """
22the spec which is currently performing aggregation --
23useful for specs that want to work in either "aggregate"
24mode, or "spec" mode depending on if they are in Group mode
25or not; this sentinel in the Scope allows a spec to decide
26if it is "closest" to the Group and so should behave
27like an aggregate, or if it is further away and so should
28have normal spec behavior.
29"""
32def target_iter(target, scope):
33 iterate = scope[TargetRegistry].get_handler('iterate', target, path=scope[Path])
35 try:
36 iterator = iterate(target)
37 except Exception as e:
38 raise TypeError('failed to iterate on instance of type %r at %r (got %r)'
39 % (target.__class__.__name__, Path(*scope[Path]), e))
40 return iterator
43class Group:
44 """supports nesting grouping operations --
45 think of a glom-style recursive boltons.iterutils.bucketize
47 the "branches" of a Group spec are dicts;
48 the leaves are lists, or an Aggregation object
49 an Aggregation object is any object that defines the
50 method agg(target, accumulator)
52 For example, here we get a map of even and odd counts::
54 >>> glom(range(10), Group({T % 2: T}))
55 {0: 8, 1: 9}
57 And here we create a `"bucketized"
58 <https://boltons.readthedocs.io/en/latest/iterutils.html#boltons.iterutils.bucketize>`_
59 map of even and odd numbers::
61 >>> glom(range(10), Group({T % 2: [T]}))
62 {0: [0, 2, 4, 6, 8], 1: [1, 3, 5, 7, 9]}
64 target is the current target, accumulator is a dict
65 maintained by Group mode
67 unlike Iter(), Group() converts an iterable target
68 into a single result; Iter() converts an iterable
69 target into an iterable result
71 """
72 def __init__(self, spec):
73 self.spec = spec
75 def glomit(self, target, scope):
76 scope[MODE] = GROUP
77 scope[CUR_AGG] = None # reset aggregation tripwire for sub-specs
78 scope[ACC_TREE] = {}
80 # handle the basecase where the spec stops immediately
81 # TODO: something smarter
82 if type(self.spec) in (dict, list):
83 ret = type(self.spec)()
84 else:
85 ret = None
87 for t in target_iter(target, scope):
88 last, ret = ret, scope[glom](t, self.spec, scope)
89 if ret is STOP:
90 return last
91 return ret
93 def __repr__(self):
94 cn = self.__class__.__name__
95 return f'{cn}({self.spec!r})'
98def GROUP(target, spec, scope):
99 """
100 Group mode dispatcher; also sentinel for current mode = group
101 """
102 recurse = lambda spec: scope[glom](target, spec, scope)
103 tree = scope[ACC_TREE] # current accumulator support structure
104 if callable(getattr(spec, "agg", None)):
105 return spec.agg(target, tree)
106 elif callable(spec):
107 return spec(target)
108 _spec_type = type(spec)
109 if _spec_type not in (dict, list):
110 raise BadSpec("Group mode expected dict, list, callable, or"
111 " aggregator, not: %r" % (spec,))
112 _spec_id = id(spec)
113 try:
114 acc = tree[_spec_id] # current accumulator
115 except KeyError:
116 acc = tree[_spec_id] = _spec_type()
117 if _spec_type is dict:
118 done = True
119 for keyspec, valspec in spec.items():
120 if tree.get(keyspec, None) is STOP:
121 continue
122 key = recurse(keyspec)
123 if key is SKIP:
124 done = False # SKIP means we still want more vals
125 continue
126 if key is STOP:
127 tree[keyspec] = STOP
128 continue
129 if key not in acc:
130 # TODO: guard against key == id(spec)
131 tree[key] = {}
132 scope[ACC_TREE] = tree[key]
133 result = recurse(valspec)
134 if result is STOP:
135 tree[keyspec] = STOP
136 continue
137 done = False # SKIP or returning a value means we still want more vals
138 if result is not SKIP:
139 acc[key] = result
140 if done:
141 return STOP
142 return acc
143 elif _spec_type is list:
144 for valspec in spec:
145 if type(valspec) is dict:
146 # doesn't make sense due to arity mismatch. did you mean [Auto({...})] ?
147 raise BadSpec('dicts within lists are not'
148 ' allowed while in Group mode: %r' % spec)
149 result = recurse(valspec)
150 if result is STOP:
151 return STOP
152 if result is not SKIP:
153 acc.append(result)
154 return acc
155 raise ValueError(f"{_spec_type} not a valid spec type for Group mode") # pragma: no cover
158class First:
159 """
160 holds onto the first value
162 >>> glom([1, 2, 3], Group(First()))
163 1
164 """
165 __slots__ = ()
167 def agg(self, target, tree):
168 if self not in tree:
169 tree[self] = STOP
170 return target
171 return STOP
173 def __repr__(self):
174 return '%s()' % self.__class__.__name__
177class Avg:
178 """
179 takes the numerical average of all values;
180 raises exception on non-numeric value
182 >>> glom([1, 2, 3], Group(Avg()))
183 2.0
184 """
185 __slots__ = ()
187 def agg(self, target, tree):
188 try:
189 avg_acc = tree[self]
190 except KeyError:
191 # format is [sum, count]
192 avg_acc = tree[self] = [0.0, 0]
193 avg_acc[0] += target
194 avg_acc[1] += 1
195 return avg_acc[0] / avg_acc[1]
197 def __repr__(self):
198 return '%s()' % self.__class__.__name__
201class Max:
202 """
203 takes the maximum of all values;
204 raises exception on values that are not comparable
206 >>> glom([1, 2, 3], Group(Max()))
207 3
208 """
209 __slots__ = ()
211 def agg(self, target, tree):
212 if self not in tree or target > tree[self]:
213 tree[self] = target
214 return tree[self]
216 def __repr__(self):
217 return '%s()' % self.__class__.__name__
220class Min:
221 """
222 takes the minimum of all values;
223 raises exception on values that are not comparable
225 >>> glom([1, 2, 3], Group(Min()))
226 1
227 """
228 __slots__ = ()
230 def agg(self, target, tree):
231 if self not in tree or target < tree[self]:
232 tree[self] = target
233 return tree[self]
235 def __repr__(self):
236 return '%s()' % self.__class__.__name__
239class Sample:
240 """takes a random sample of the values
242 >>> glom([1, 2, 3], Group(Sample(2))) # doctest: +SKIP
243 [1, 3]
244 >>> glom(range(5000), Group(Sample(2))) # doctest: +SKIP
245 [272, 2901]
247 The advantage of this over :func:`random.sample` is that this can
248 take an arbitrarily-sized, potentially-very-long streaming input
249 and returns a fixed-size output. Note that this does not stream
250 results out, so your streaming input must have finite length.
251 """
252 __slots__ = ('size',)
254 def __init__(self, size):
255 self.size = size
257 def agg(self, target, tree):
258 # simple reservoir sampling scheme
259 # https://en.wikipedia.org/wiki/Reservoir_sampling#Simple_algorithm
260 if self not in tree:
261 tree[self] = [0, []]
262 num_seen, sample = tree[self]
263 if len(sample) < self.size:
264 sample.append(target)
265 else:
266 pos = random.randint(0, num_seen)
267 if pos < self.size:
268 sample[pos] = target
269 tree[self][0] += 1
270 return sample
272 def __repr__(self):
273 return f'{self.__class__.__name__}({self.size!r})'
277class Limit:
278 """
279 Limits the number of values passed to sub-accumulator
281 >>> glom([1, 2, 3], Group(Limit(2)))
282 [1, 2]
284 To override the default untransformed list output, set the subspec kwarg:
286 >>> glom(range(10), Group(Limit(3, subspec={(lambda x: x % 2): [T]})))
287 {0: [0, 2], 1: [1]}
289 You can even nest Limits in other ``Group`` specs:
291 >>> glom(range(10), Group(Limit(5, {(lambda x: x % 2): Limit(2)})))
292 {0: [0, 2], 1: [1, 3]}
294 """
295 __slots__ = ('n', 'subspec')
297 def __init__(self, n, subspec=_MISSING):
298 if subspec is _MISSING:
299 subspec = [T]
300 self.n = n
301 self.subspec = subspec
303 def glomit(self, target, scope):
304 if scope[MODE] is not GROUP:
305 raise BadSpec("Limit() only valid in Group mode")
306 tree = scope[ACC_TREE] # current accumulator support structure
307 if self not in tree:
308 tree[self] = [0, {}]
309 scope[ACC_TREE] = tree[self][1]
310 tree[self][0] += 1
311 if tree[self][0] > self.n:
312 return STOP
313 return scope[glom](target, self.subspec, scope)
315 def __repr__(self):
316 return f'{self.__class__.__name__}({self.n!r}, {self.subspec!r})'