Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/glom/reduction.py: 66%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

128 statements  

1import operator 

2import itertools 

3from pprint import pprint 

4 

5from boltons.typeutils import make_sentinel 

6 

7from .core import T, glom, GlomError, format_invocation, bbrepr, UnregisteredTarget, MODE 

8from .grouping import GROUP, target_iter, ACC_TREE, CUR_AGG 

9 

10_MISSING = make_sentinel('_MISSING') 

11 

12 

13try: 

14 basestring 

15except NameError: 

16 basestring = str 

17 

18 

19class FoldError(GlomError): 

20 """Error raised when Fold() is called on non-iterable 

21 targets, and possibly other uses in the future.""" 

22 pass 

23 

24 

25class Fold: 

26 """The `Fold` specifier type is glom's building block for reducing 

27 iterables in data, implementing the classic `fold 

28 <https://en.wikipedia.org/wiki/Fold_(higher-order_function)>`_ 

29 from functional programming, similar to Python's built-in 

30 :func:`reduce`. 

31 

32 Args: 

33 subspec: A spec representing the target to fold, which must be 

34 an iterable, or otherwise registered to 'iterate' (with 

35 :func:`~glom.register`). 

36 init (callable): A function or type which will be invoked to 

37 initialize the accumulator value. 

38 op (callable): A function to call on the accumulator value and 

39 every value, the result of which will become the new 

40 accumulator value. Defaults to :func:`operator.iadd`. 

41 

42 Usage is as follows: 

43 

44 >>> target = [set([1, 2]), set([3]), set([2, 4])] 

45 >>> result = glom(target, Fold(T, init=frozenset, op=frozenset.union)) 

46 >>> result == frozenset([1, 2, 3, 4]) 

47 True 

48 

49 Note the required ``spec`` and ``init`` arguments. ``op`` is 

50 optional, but here must be used because the :class:`set` and 

51 :class:`frozenset` types do not work with addition. 

52 

53 While :class:`~glom.Fold` is powerful, :class:`~glom.Flatten` and 

54 :class:`~glom.Sum` are subtypes with more convenient defaults for 

55 day-to-day use. 

56 """ 

57 def __init__(self, subspec, init, op=operator.iadd): 

58 self.subspec = subspec 

59 self.init = init 

60 self.op = op 

61 if not callable(op): 

62 raise TypeError('expected callable for %s op param, not: %r' % 

63 (self.__class__.__name__, op)) 

64 if not callable(init): 

65 raise TypeError('expected callable for %s init param, not: %r' % 

66 (self.__class__.__name__, init)) 

67 

68 def glomit(self, target, scope): 

69 is_agg = False 

70 if scope[MODE] is GROUP and scope.get(CUR_AGG) is None: 

71 scope[CUR_AGG] = self 

72 is_agg = True 

73 

74 if self.subspec is not T: 

75 target = scope[glom](target, self.subspec, scope) 

76 

77 if is_agg: 

78 return self._agg(target, scope[ACC_TREE]) 

79 try: 

80 return self._fold(target_iter(target, scope)) 

81 except UnregisteredTarget as ut: 

82 raise FoldError('can only %s on iterable targets, not %s type (%s)' 

83 % (self.__class__.__name__, type(target).__name__, ut)) 

84 

85 def _fold(self, iterator): 

86 ret, op = self.init(), self.op 

87 

88 for v in iterator: 

89 ret = op(ret, v) 

90 

91 return ret 

92 

93 def _agg(self, target, tree): 

94 if self not in tree: 

95 tree[self] = self.init() 

96 tree[self] = self.op(tree[self], target) 

97 return tree[self] 

98 

99 def __repr__(self): 

100 cn = self.__class__.__name__ 

101 kwargs = {'init': self.init} 

102 if self.op is not operator.iadd: 

103 kwargs['op'] = self.op 

104 return format_invocation(cn, (self.subspec,), kwargs, repr=bbrepr) 

105 

106 

107class Sum(Fold): 

108 """The `Sum` specifier type is used to aggregate integers and other 

109 numericals using addition, much like the :func:`sum()` builtin. 

110 

111 >>> glom(range(5), Sum()) 

112 10 

113 

114 Note that this specifier takes a callable *init* parameter like 

115 its friends, so to change the start value, be sure to wrap it in a 

116 callable:: 

117 

118 >>> glom(range(5), Sum(init=lambda: 5.0)) 

119 15.0 

120 

121 To "sum" lists and other iterables, see the :class:`Flatten` 

122 spec. For other objects, see the :class:`Fold` specifier type. 

123 

124 """ 

125 def __init__(self, subspec=T, init=int): 

126 super().__init__(subspec=subspec, init=init, op=operator.iadd) 

127 

128 def __repr__(self): 

129 cn = self.__class__.__name__ 

130 args = () if self.subspec is T else (self.subspec,) 

131 kwargs = {'init': self.init} if self.init is not int else {} 

132 return format_invocation(cn, args, kwargs, repr=bbrepr) 

133 

134 

135class Count(Fold): 

136 """ 

137 takes a count of how many values occurred 

138 

139 >>> glom([1, 2, 3], Count()) 

140 3 

141 """ 

142 __slots__ = () 

143 

144 def __init__(self): 

145 super().__init__( 

146 subspec=T, init=int, op=lambda cur, val: cur + 1) 

147 

148 def __repr__(self): 

149 return '%s()' % self.__class__.__name__ 

150 

151 

152class Flatten(Fold): 

153 """The `Flatten` specifier type is used to combine iterables. By 

154 default it flattens an iterable of iterables into a single list 

155 containing items from all iterables. 

156 

157 >>> target = [[1], [2, 3]] 

158 >>> glom(target, Flatten()) 

159 [1, 2, 3] 

160 

161 You can also set *init* to ``"lazy"``, which returns a generator 

162 instead of a list. Use this to avoid making extra lists and other 

163 collections during intermediate processing steps. 

164 """ 

165 def __init__(self, subspec=T, init=list): 

166 if init == 'lazy': 

167 self.lazy = True 

168 init = list 

169 else: 

170 self.lazy = False 

171 super().__init__(subspec=subspec, init=init, op=operator.iadd) 

172 

173 def _fold(self, iterator): 

174 if self.lazy: 

175 return itertools.chain.from_iterable(iterator) 

176 return super()._fold(iterator) 

177 

178 def __repr__(self): 

179 cn = self.__class__.__name__ 

180 args = () if self.subspec is T else (self.subspec,) 

181 kwargs = {} 

182 if self.lazy: 

183 kwargs['init'] = 'lazy' 

184 elif self.init is not list: 

185 kwargs['init'] = self.init 

186 return format_invocation(cn, args, kwargs, repr=bbrepr) 

187 

188 

189def flatten(target, **kwargs): 

190 """At its most basic, ``flatten()`` turns an iterable of iterables 

191 into a single list. But it has a few arguments which give it more 

192 power: 

193 

194 Args: 

195 

196 init (callable): A function or type which gives the initial 

197 value of the return. The value must support addition. Common 

198 values might be :class:`list` (the default), :class:`tuple`, 

199 or even :class:`int`. You can also pass ``init="lazy"`` to 

200 get a generator. 

201 levels (int): A positive integer representing the number of 

202 nested levels to flatten. Defaults to 1. 

203 spec: The glomspec to fetch before flattening. This defaults to the 

204 the root level of the object. 

205 

206 Usage is straightforward. 

207 

208 >>> target = [[1, 2], [3], [4]] 

209 >>> flatten(target) 

210 [1, 2, 3, 4] 

211 

212 Because integers themselves support addition, we actually have two 

213 levels of flattening possible, to get back a single integer sum: 

214 

215 >>> flatten(target, init=int, levels=2) 

216 10 

217 

218 However, flattening a non-iterable like an integer will raise an 

219 exception: 

220 

221 >>> target = 10 

222 >>> flatten(target) 

223 Traceback (most recent call last): 

224 ... 

225 FoldError: can only Flatten on iterable targets, not int type (...) 

226 

227 By default, ``flatten()`` will add a mix of iterables together, 

228 making it a more-robust alternative to the built-in 

229 ``sum(list_of_lists, list())`` trick most experienced Python 

230 programmers are familiar with using: 

231 

232 >>> list_of_iterables = [range(2), [2, 3], (4, 5)] 

233 >>> sum(list_of_iterables, []) 

234 Traceback (most recent call last): 

235 ... 

236 TypeError: can only concatenate list (not "tuple") to list 

237 

238 Whereas flatten() handles this just fine: 

239 

240 >>> flatten(list_of_iterables) 

241 [0, 1, 2, 3, 4, 5] 

242 

243 The ``flatten()`` function is a convenient wrapper around the 

244 :class:`Flatten` specifier type. For embedding in larger specs, 

245 and more involved flattening, see :class:`Flatten` and its base, 

246 :class:`Fold`. 

247 

248 """ 

249 subspec = kwargs.pop('spec', T) 

250 init = kwargs.pop('init', list) 

251 levels = kwargs.pop('levels', 1) 

252 if kwargs: 

253 raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys())) 

254 

255 if levels == 0: 

256 return target 

257 if levels < 0: 

258 raise ValueError('expected levels >= 0, not %r' % levels) 

259 spec = (subspec,) 

260 spec += (Flatten(init="lazy"),) * (levels - 1) 

261 spec += (Flatten(init=init),) 

262 

263 return glom(target, spec) 

264 

265 

266class Merge(Fold): 

267 """By default, Merge turns an iterable of mappings into a single, 

268 merged :class:`dict`, leveraging the behavior of the 

269 :meth:`~dict.update` method. The start state can be customized 

270 with *init*, as well as the update operation, with *op*. 

271 

272 Args: 

273 subspec: The location of the iterable of mappings. Defaults to ``T``. 

274 init (callable): A type or callable which returns a base 

275 instance into which all other values will be merged. 

276 op (callable): A callable, which takes two arguments, and 

277 performs a merge of the second into the first. Can also be 

278 the string name of a method to fetch on the instance created 

279 from *init*. Defaults to ``"update"``. 

280 

281 .. note:: 

282 

283 Besides the differing defaults, the primary difference between 

284 :class:`Merge` and other :class:`Fold` subtypes is that its 

285 *op* argument is assumed to be a two-argument function which 

286 has no return value and modifies the left parameter 

287 in-place. Because the initial state is a new object created with 

288 the *init* parameter, none of the target values are modified. 

289 

290 """ 

291 def __init__(self, subspec=T, init=dict, op=None): 

292 if op is None: 

293 op = 'update' 

294 if isinstance(op, basestring): 

295 test_init = init() 

296 op = getattr(type(test_init), op, None) 

297 if not callable(op): 

298 raise ValueError('expected callable "op" arg or an "init" with an .update()' 

299 ' method not %r and %r' % (op, init)) 

300 super().__init__(subspec=subspec, init=init, op=op) 

301 

302 def _fold(self, iterator): 

303 # the difference here is that ret is mutated in-place, the 

304 # variable not being reassigned, as in base Fold. 

305 ret, op = self.init(), self.op 

306 

307 for v in iterator: 

308 op(ret, v) 

309 

310 return ret 

311 

312 

313 def _agg(self, target, tree): 

314 if self not in tree: 

315 acc = tree[self] = self.init() 

316 else: 

317 acc = tree[self] 

318 self.op(acc, target) 

319 return acc 

320 

321 

322def merge(target, **kwargs): 

323 """By default, ``merge()`` turns an iterable of mappings into a 

324 single, merged :class:`dict`, leveraging the behavior of the 

325 :meth:`~dict.update` method. A new mapping is created and none of 

326 the passed mappings are modified. 

327 

328 >>> target = [{'a': 'alpha'}, {'b': 'B'}, {'a': 'A'}] 

329 >>> res = merge(target) 

330 >>> pprint(res) 

331 {'a': 'A', 'b': 'B'} 

332 

333 Args: 

334 target: The list of dicts, or some other iterable of mappings. 

335 

336 The start state can be customized with the *init* keyword 

337 argument, as well as the update operation, with the *op* keyword 

338 argument. For more on those customizations, see the :class:`Merge` 

339 spec. 

340 

341 """ 

342 subspec = kwargs.pop('spec', T) 

343 init = kwargs.pop('init', dict) 

344 op = kwargs.pop('op', None) 

345 if kwargs: 

346 raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys())) 

347 spec = Merge(subspec, init, op) 

348 return glom(target, spec)