Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/glom/grouping.py: 28%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

157 statements  

1""" 

2Group mode 

3""" 

4from __future__ import division 

5 

6import random 

7 

8from boltons.typeutils import make_sentinel 

9 

10from .core import glom, MODE, SKIP, STOP, TargetRegistry, Path, T, BadSpec, _MISSING 

11 

12 

13ACC_TREE = make_sentinel('ACC_TREE') 

14ACC_TREE.__doc__ = """ 

15tree of accumulators for aggregation; 

16structure roughly corresponds to the result, 

17but is not 1:1; instead the main purpose is to ensure 

18data is kept until the Group() finishes executing 

19""" 

20 

21CUR_AGG = make_sentinel('CUR_AGG') 

22CUR_AGG.__doc__ = """ 

23the spec which is currently performing aggregation -- 

24useful for specs that want to work in either "aggregate" 

25mode, or "spec" mode depending on if they are in Group mode 

26or not; this sentinel in the Scope allows a spec to decide 

27if it is "closest" to the Group and so should behave 

28like an aggregate, or if it is further away and so should 

29have normal spec behavior. 

30""" 

31 

32 

33def target_iter(target, scope): 

34 iterate = scope[TargetRegistry].get_handler('iterate', target, path=scope[Path]) 

35 

36 try: 

37 iterator = iterate(target) 

38 except Exception as e: 

39 raise TypeError('failed to iterate on instance of type %r at %r (got %r)' 

40 % (target.__class__.__name__, Path(*scope[Path]), e)) 

41 return iterator 

42 

43 

44class Group(object): 

45 """supports nesting grouping operations -- 

46 think of a glom-style recursive boltons.iterutils.bucketize 

47 

48 the "branches" of a Group spec are dicts; 

49 the leaves are lists, or an Aggregation object 

50 an Aggregation object is any object that defines the 

51 method agg(target, accumulator) 

52 

53 For example, here we get a map of even and odd counts:: 

54 

55 >>> glom(range(10), Group({T % 2: T})) 

56 {0: 8, 1: 9} 

57 

58 And here we create a `"bucketized" 

59 <https://boltons.readthedocs.io/en/latest/iterutils.html#boltons.iterutils.bucketize>`_ 

60 map of even and odd numbers:: 

61 

62 >>> glom(range(10), Group({T % 2: [T]})) 

63 {0: [0, 2, 4, 6, 8], 1: [1, 3, 5, 7, 9]} 

64 

65 target is the current target, accumulator is a dict 

66 maintained by Group mode 

67 

68 unlike Iter(), Group() converts an iterable target 

69 into a single result; Iter() converts an iterable 

70 target into an iterable result 

71 

72 """ 

73 def __init__(self, spec): 

74 self.spec = spec 

75 

76 def glomit(self, target, scope): 

77 scope[MODE] = GROUP 

78 scope[CUR_AGG] = None # reset aggregation tripwire for sub-specs 

79 scope[ACC_TREE] = {} 

80 

81 # handle the basecase where the spec stops immediately 

82 # TODO: something smarter 

83 if type(self.spec) in (dict, list): 

84 ret = type(self.spec)() 

85 else: 

86 ret = None 

87 

88 for t in target_iter(target, scope): 

89 last, ret = ret, scope[glom](t, self.spec, scope) 

90 if ret is STOP: 

91 return last 

92 return ret 

93 

94 def __repr__(self): 

95 cn = self.__class__.__name__ 

96 return '%s(%r)' % (cn, self.spec) 

97 

98 

99def GROUP(target, spec, scope): 

100 """ 

101 Group mode dispatcher; also sentinel for current mode = group 

102 """ 

103 recurse = lambda spec: scope[glom](target, spec, scope) 

104 tree = scope[ACC_TREE] # current accumulator support structure 

105 if callable(getattr(spec, "agg", None)): 

106 return spec.agg(target, tree) 

107 elif callable(spec): 

108 return spec(target) 

109 _spec_type = type(spec) 

110 if _spec_type not in (dict, list): 

111 raise BadSpec("Group mode expected dict, list, callable, or" 

112 " aggregator, not: %r" % (spec,)) 

113 _spec_id = id(spec) 

114 try: 

115 acc = tree[_spec_id] # current accumulator 

116 except KeyError: 

117 acc = tree[_spec_id] = _spec_type() 

118 if _spec_type is dict: 

119 done = True 

120 for keyspec, valspec in spec.items(): 

121 if tree.get(keyspec, None) is STOP: 

122 continue 

123 key = recurse(keyspec) 

124 if key is SKIP: 

125 done = False # SKIP means we still want more vals 

126 continue 

127 if key is STOP: 

128 tree[keyspec] = STOP 

129 continue 

130 if key not in acc: 

131 # TODO: guard against key == id(spec) 

132 tree[key] = {} 

133 scope[ACC_TREE] = tree[key] 

134 result = recurse(valspec) 

135 if result is STOP: 

136 tree[keyspec] = STOP 

137 continue 

138 done = False # SKIP or returning a value means we still want more vals 

139 if result is not SKIP: 

140 acc[key] = result 

141 if done: 

142 return STOP 

143 return acc 

144 elif _spec_type is list: 

145 for valspec in spec: 

146 if type(valspec) is dict: 

147 # doesn't make sense due to arity mismatch. did you mean [Auto({...})] ? 

148 raise BadSpec('dicts within lists are not' 

149 ' allowed while in Group mode: %r' % spec) 

150 result = recurse(valspec) 

151 if result is STOP: 

152 return STOP 

153 if result is not SKIP: 

154 acc.append(result) 

155 return acc 

156 raise ValueError("{} not a valid spec type for Group mode".format(_spec_type)) # pragma: no cover 

157 

158 

159class First(object): 

160 """ 

161 holds onto the first value 

162 

163 >>> glom([1, 2, 3], Group(First())) 

164 1 

165 """ 

166 __slots__ = () 

167 

168 def agg(self, target, tree): 

169 if self not in tree: 

170 tree[self] = STOP 

171 return target 

172 return STOP 

173 

174 def __repr__(self): 

175 return '%s()' % self.__class__.__name__ 

176 

177 

178class Avg(object): 

179 """ 

180 takes the numerical average of all values; 

181 raises exception on non-numeric value 

182 

183 >>> glom([1, 2, 3], Group(Avg())) 

184 2.0 

185 """ 

186 __slots__ = () 

187 

188 def agg(self, target, tree): 

189 try: 

190 avg_acc = tree[self] 

191 except KeyError: 

192 # format is [sum, count] 

193 avg_acc = tree[self] = [0.0, 0] 

194 avg_acc[0] += target 

195 avg_acc[1] += 1 

196 return avg_acc[0] / avg_acc[1] 

197 

198 def __repr__(self): 

199 return '%s()' % self.__class__.__name__ 

200 

201 

202class Max(object): 

203 """ 

204 takes the maximum of all values; 

205 raises exception on values that are not comparable 

206 

207 >>> glom([1, 2, 3], Group(Max())) 

208 3 

209 """ 

210 __slots__ = () 

211 

212 def agg(self, target, tree): 

213 if self not in tree or target > tree[self]: 

214 tree[self] = target 

215 return tree[self] 

216 

217 def __repr__(self): 

218 return '%s()' % self.__class__.__name__ 

219 

220 

221class Min(object): 

222 """ 

223 takes the minimum of all values; 

224 raises exception on values that are not comparable 

225 

226 >>> glom([1, 2, 3], Group(Min())) 

227 1 

228 """ 

229 __slots__ = () 

230 

231 def agg(self, target, tree): 

232 if self not in tree or target < tree[self]: 

233 tree[self] = target 

234 return tree[self] 

235 

236 def __repr__(self): 

237 return '%s()' % self.__class__.__name__ 

238 

239 

240class Sample(object): 

241 """takes a random sample of the values 

242 

243 >>> glom([1, 2, 3], Group(Sample(2))) # doctest: +SKIP 

244 [1, 3] 

245 >>> glom(range(5000), Group(Sample(2))) # doctest: +SKIP 

246 [272, 2901] 

247 

248 The advantage of this over :func:`random.sample` is that this can 

249 take an arbitrarily-sized, potentially-very-long streaming input 

250 and returns a fixed-size output. Note that this does not stream 

251 results out, so your streaming input must have finite length. 

252 """ 

253 __slots__ = ('size',) 

254 

255 def __init__(self, size): 

256 self.size = size 

257 

258 def agg(self, target, tree): 

259 # simple reservoir sampling scheme 

260 # https://en.wikipedia.org/wiki/Reservoir_sampling#Simple_algorithm 

261 if self not in tree: 

262 tree[self] = [0, []] 

263 num_seen, sample = tree[self] 

264 if len(sample) < self.size: 

265 sample.append(target) 

266 else: 

267 pos = random.randint(0, num_seen) 

268 if pos < self.size: 

269 sample[pos] = target 

270 tree[self][0] += 1 

271 return sample 

272 

273 def __repr__(self): 

274 return '%s(%r)' % (self.__class__.__name__, self.size) 

275 

276 

277 

278class Limit(object): 

279 """ 

280 Limits the number of values passed to sub-accumulator 

281 

282 >>> glom([1, 2, 3], Group(Limit(2))) 

283 [1, 2] 

284 

285 To override the default untransformed list output, set the subspec kwarg: 

286 

287 >>> glom(range(10), Group(Limit(3, subspec={(lambda x: x % 2): [T]}))) 

288 {0: [0, 2], 1: [1]} 

289 

290 You can even nest Limits in other ``Group`` specs: 

291 

292 >>> glom(range(10), Group(Limit(5, {(lambda x: x % 2): Limit(2)}))) 

293 {0: [0, 2], 1: [1, 3]} 

294 

295 """ 

296 __slots__ = ('n', 'subspec') 

297 

298 def __init__(self, n, subspec=_MISSING): 

299 if subspec is _MISSING: 

300 subspec = [T] 

301 self.n = n 

302 self.subspec = subspec 

303 

304 def glomit(self, target, scope): 

305 if scope[MODE] is not GROUP: 

306 raise BadSpec("Limit() only valid in Group mode") 

307 tree = scope[ACC_TREE] # current accumulator support structure 

308 if self not in tree: 

309 tree[self] = [0, {}] 

310 scope[ACC_TREE] = tree[self][1] 

311 tree[self][0] += 1 

312 if tree[self][0] > self.n: 

313 return STOP 

314 return scope[glom](target, self.subspec, scope) 

315 

316 def __repr__(self): 

317 return '%s(%r, %r)' % (self.__class__.__name__, self.n, self.subspec)