Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/glom/grouping.py: 28%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

157 statements  

1""" 

2Group mode 

3""" 

4 

5import random 

6 

7from boltons.typeutils import make_sentinel 

8 

9from .core import glom, MODE, SKIP, STOP, TargetRegistry, Path, T, BadSpec, _MISSING 

10 

11 

12ACC_TREE = make_sentinel('ACC_TREE') 

13ACC_TREE.__doc__ = """ 

14tree of accumulators for aggregation; 

15structure roughly corresponds to the result, 

16but is not 1:1; instead the main purpose is to ensure 

17data is kept until the Group() finishes executing 

18""" 

19 

20CUR_AGG = make_sentinel('CUR_AGG') 

21CUR_AGG.__doc__ = """ 

22the spec which is currently performing aggregation -- 

23useful for specs that want to work in either "aggregate" 

24mode, or "spec" mode depending on if they are in Group mode 

25or not; this sentinel in the Scope allows a spec to decide 

26if it is "closest" to the Group and so should behave 

27like an aggregate, or if it is further away and so should 

28have normal spec behavior. 

29""" 

30 

31 

32def target_iter(target, scope): 

33 iterate = scope[TargetRegistry].get_handler('iterate', target, path=scope[Path]) 

34 

35 try: 

36 iterator = iterate(target) 

37 except Exception as e: 

38 raise TypeError('failed to iterate on instance of type %r at %r (got %r)' 

39 % (target.__class__.__name__, Path(*scope[Path]), e)) 

40 return iterator 

41 

42 

43class Group: 

44 """supports nesting grouping operations -- 

45 think of a glom-style recursive boltons.iterutils.bucketize 

46 

47 the "branches" of a Group spec are dicts; 

48 the leaves are lists, or an Aggregation object 

49 an Aggregation object is any object that defines the 

50 method agg(target, accumulator) 

51 

52 For example, here we get a map of even and odd counts:: 

53 

54 >>> glom(range(10), Group({T % 2: T})) 

55 {0: 8, 1: 9} 

56 

57 And here we create a `"bucketized" 

58 <https://boltons.readthedocs.io/en/latest/iterutils.html#boltons.iterutils.bucketize>`_ 

59 map of even and odd numbers:: 

60 

61 >>> glom(range(10), Group({T % 2: [T]})) 

62 {0: [0, 2, 4, 6, 8], 1: [1, 3, 5, 7, 9]} 

63 

64 target is the current target, accumulator is a dict 

65 maintained by Group mode 

66 

67 unlike Iter(), Group() converts an iterable target 

68 into a single result; Iter() converts an iterable 

69 target into an iterable result 

70 

71 """ 

72 def __init__(self, spec): 

73 self.spec = spec 

74 

75 def glomit(self, target, scope): 

76 scope[MODE] = GROUP 

77 scope[CUR_AGG] = None # reset aggregation tripwire for sub-specs 

78 scope[ACC_TREE] = {} 

79 

80 # handle the basecase where the spec stops immediately 

81 # TODO: something smarter 

82 if type(self.spec) in (dict, list): 

83 ret = type(self.spec)() 

84 else: 

85 ret = None 

86 

87 for t in target_iter(target, scope): 

88 last, ret = ret, scope[glom](t, self.spec, scope) 

89 if ret is STOP: 

90 return last 

91 return ret 

92 

93 def __repr__(self): 

94 cn = self.__class__.__name__ 

95 return f'{cn}({self.spec!r})' 

96 

97 

98def GROUP(target, spec, scope): 

99 """ 

100 Group mode dispatcher; also sentinel for current mode = group 

101 """ 

102 recurse = lambda spec: scope[glom](target, spec, scope) 

103 tree = scope[ACC_TREE] # current accumulator support structure 

104 if callable(getattr(spec, "agg", None)): 

105 return spec.agg(target, tree) 

106 elif callable(spec): 

107 return spec(target) 

108 _spec_type = type(spec) 

109 if _spec_type not in (dict, list): 

110 raise BadSpec("Group mode expected dict, list, callable, or" 

111 " aggregator, not: %r" % (spec,)) 

112 _spec_id = id(spec) 

113 try: 

114 acc = tree[_spec_id] # current accumulator 

115 except KeyError: 

116 acc = tree[_spec_id] = _spec_type() 

117 if _spec_type is dict: 

118 done = True 

119 for keyspec, valspec in spec.items(): 

120 if tree.get(keyspec, None) is STOP: 

121 continue 

122 key = recurse(keyspec) 

123 if key is SKIP: 

124 done = False # SKIP means we still want more vals 

125 continue 

126 if key is STOP: 

127 tree[keyspec] = STOP 

128 continue 

129 if key not in acc: 

130 # TODO: guard against key == id(spec) 

131 tree[key] = {} 

132 scope[ACC_TREE] = tree[key] 

133 result = recurse(valspec) 

134 if result is STOP: 

135 tree[keyspec] = STOP 

136 continue 

137 done = False # SKIP or returning a value means we still want more vals 

138 if result is not SKIP: 

139 acc[key] = result 

140 if done: 

141 return STOP 

142 return acc 

143 elif _spec_type is list: 

144 for valspec in spec: 

145 if type(valspec) is dict: 

146 # doesn't make sense due to arity mismatch. did you mean [Auto({...})] ? 

147 raise BadSpec('dicts within lists are not' 

148 ' allowed while in Group mode: %r' % spec) 

149 result = recurse(valspec) 

150 if result is STOP: 

151 return STOP 

152 if result is not SKIP: 

153 acc.append(result) 

154 return acc 

155 raise ValueError(f"{_spec_type} not a valid spec type for Group mode") # pragma: no cover 

156 

157 

158class First: 

159 """ 

160 holds onto the first value 

161 

162 >>> glom([1, 2, 3], Group(First())) 

163 1 

164 """ 

165 __slots__ = () 

166 

167 def agg(self, target, tree): 

168 if self not in tree: 

169 tree[self] = STOP 

170 return target 

171 return STOP 

172 

173 def __repr__(self): 

174 return '%s()' % self.__class__.__name__ 

175 

176 

177class Avg: 

178 """ 

179 takes the numerical average of all values; 

180 raises exception on non-numeric value 

181 

182 >>> glom([1, 2, 3], Group(Avg())) 

183 2.0 

184 """ 

185 __slots__ = () 

186 

187 def agg(self, target, tree): 

188 try: 

189 avg_acc = tree[self] 

190 except KeyError: 

191 # format is [sum, count] 

192 avg_acc = tree[self] = [0.0, 0] 

193 avg_acc[0] += target 

194 avg_acc[1] += 1 

195 return avg_acc[0] / avg_acc[1] 

196 

197 def __repr__(self): 

198 return '%s()' % self.__class__.__name__ 

199 

200 

201class Max: 

202 """ 

203 takes the maximum of all values; 

204 raises exception on values that are not comparable 

205 

206 >>> glom([1, 2, 3], Group(Max())) 

207 3 

208 """ 

209 __slots__ = () 

210 

211 def agg(self, target, tree): 

212 if self not in tree or target > tree[self]: 

213 tree[self] = target 

214 return tree[self] 

215 

216 def __repr__(self): 

217 return '%s()' % self.__class__.__name__ 

218 

219 

220class Min: 

221 """ 

222 takes the minimum of all values; 

223 raises exception on values that are not comparable 

224 

225 >>> glom([1, 2, 3], Group(Min())) 

226 1 

227 """ 

228 __slots__ = () 

229 

230 def agg(self, target, tree): 

231 if self not in tree or target < tree[self]: 

232 tree[self] = target 

233 return tree[self] 

234 

235 def __repr__(self): 

236 return '%s()' % self.__class__.__name__ 

237 

238 

239class Sample: 

240 """takes a random sample of the values 

241 

242 >>> glom([1, 2, 3], Group(Sample(2))) # doctest: +SKIP 

243 [1, 3] 

244 >>> glom(range(5000), Group(Sample(2))) # doctest: +SKIP 

245 [272, 2901] 

246 

247 The advantage of this over :func:`random.sample` is that this can 

248 take an arbitrarily-sized, potentially-very-long streaming input 

249 and returns a fixed-size output. Note that this does not stream 

250 results out, so your streaming input must have finite length. 

251 """ 

252 __slots__ = ('size',) 

253 

254 def __init__(self, size): 

255 self.size = size 

256 

257 def agg(self, target, tree): 

258 # simple reservoir sampling scheme 

259 # https://en.wikipedia.org/wiki/Reservoir_sampling#Simple_algorithm 

260 if self not in tree: 

261 tree[self] = [0, []] 

262 num_seen, sample = tree[self] 

263 if len(sample) < self.size: 

264 sample.append(target) 

265 else: 

266 pos = random.randint(0, num_seen) 

267 if pos < self.size: 

268 sample[pos] = target 

269 tree[self][0] += 1 

270 return sample 

271 

272 def __repr__(self): 

273 return f'{self.__class__.__name__}({self.size!r})' 

274 

275 

276 

277class Limit: 

278 """ 

279 Limits the number of values passed to sub-accumulator 

280 

281 >>> glom([1, 2, 3], Group(Limit(2))) 

282 [1, 2] 

283 

284 To override the default untransformed list output, set the subspec kwarg: 

285 

286 >>> glom(range(10), Group(Limit(3, subspec={(lambda x: x % 2): [T]}))) 

287 {0: [0, 2], 1: [1]} 

288 

289 You can even nest Limits in other ``Group`` specs: 

290 

291 >>> glom(range(10), Group(Limit(5, {(lambda x: x % 2): Limit(2)}))) 

292 {0: [0, 2], 1: [1, 3]} 

293 

294 """ 

295 __slots__ = ('n', 'subspec') 

296 

297 def __init__(self, n, subspec=_MISSING): 

298 if subspec is _MISSING: 

299 subspec = [T] 

300 self.n = n 

301 self.subspec = subspec 

302 

303 def glomit(self, target, scope): 

304 if scope[MODE] is not GROUP: 

305 raise BadSpec("Limit() only valid in Group mode") 

306 tree = scope[ACC_TREE] # current accumulator support structure 

307 if self not in tree: 

308 tree[self] = [0, {}] 

309 scope[ACC_TREE] = tree[self][1] 

310 tree[self][0] += 1 

311 if tree[self][0] > self.n: 

312 return STOP 

313 return scope[glom](target, self.subspec, scope) 

314 

315 def __repr__(self): 

316 return f'{self.__class__.__name__}({self.n!r}, {self.subspec!r})'