Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/glom/reduction.py: 66%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

128 statements  

1 

2import operator 

3import itertools 

4from pprint import pprint 

5 

6from boltons.typeutils import make_sentinel 

7 

8from .core import T, glom, GlomError, format_invocation, bbrepr, UnregisteredTarget, MODE 

9from .grouping import GROUP, target_iter, ACC_TREE, CUR_AGG 

10 

11_MISSING = make_sentinel('_MISSING') 

12 

13 

14try: 

15 basestring 

16except NameError: 

17 basestring = str 

18 

19 

20class FoldError(GlomError): 

21 """Error raised when Fold() is called on non-iterable 

22 targets, and possibly other uses in the future.""" 

23 pass 

24 

25 

26class Fold(object): 

27 """The `Fold` specifier type is glom's building block for reducing 

28 iterables in data, implementing the classic `fold 

29 <https://en.wikipedia.org/wiki/Fold_(higher-order_function)>`_ 

30 from functional programming, similar to Python's built-in 

31 :func:`reduce`. 

32 

33 Args: 

34 subspec: A spec representing the target to fold, which must be 

35 an iterable, or otherwise registered to 'iterate' (with 

36 :func:`~glom.register`). 

37 init (callable): A function or type which will be invoked to 

38 initialize the accumulator value. 

39 op (callable): A function to call on the accumulator value and 

40 every value, the result of which will become the new 

41 accumulator value. Defaults to :func:`operator.iadd`. 

42 

43 Usage is as follows: 

44 

45 >>> target = [set([1, 2]), set([3]), set([2, 4])] 

46 >>> result = glom(target, Fold(T, init=frozenset, op=frozenset.union)) 

47 >>> result == frozenset([1, 2, 3, 4]) 

48 True 

49 

50 Note the required ``spec`` and ``init`` arguments. ``op`` is 

51 optional, but here must be used because the :class:`set` and 

52 :class:`frozenset` types do not work with addition. 

53 

54 While :class:`~glom.Fold` is powerful, :class:`~glom.Flatten` and 

55 :class:`~glom.Sum` are subtypes with more convenient defaults for 

56 day-to-day use. 

57 """ 

58 def __init__(self, subspec, init, op=operator.iadd): 

59 self.subspec = subspec 

60 self.init = init 

61 self.op = op 

62 if not callable(op): 

63 raise TypeError('expected callable for %s op param, not: %r' % 

64 (self.__class__.__name__, op)) 

65 if not callable(init): 

66 raise TypeError('expected callable for %s init param, not: %r' % 

67 (self.__class__.__name__, init)) 

68 

69 def glomit(self, target, scope): 

70 is_agg = False 

71 if scope[MODE] is GROUP and scope.get(CUR_AGG) is None: 

72 scope[CUR_AGG] = self 

73 is_agg = True 

74 

75 if self.subspec is not T: 

76 target = scope[glom](target, self.subspec, scope) 

77 

78 if is_agg: 

79 return self._agg(target, scope[ACC_TREE]) 

80 try: 

81 return self._fold(target_iter(target, scope)) 

82 except UnregisteredTarget as ut: 

83 raise FoldError('can only %s on iterable targets, not %s type (%s)' 

84 % (self.__class__.__name__, type(target).__name__, ut)) 

85 

86 def _fold(self, iterator): 

87 ret, op = self.init(), self.op 

88 

89 for v in iterator: 

90 ret = op(ret, v) 

91 

92 return ret 

93 

94 def _agg(self, target, tree): 

95 if self not in tree: 

96 tree[self] = self.init() 

97 tree[self] = self.op(tree[self], target) 

98 return tree[self] 

99 

100 def __repr__(self): 

101 cn = self.__class__.__name__ 

102 kwargs = {'init': self.init} 

103 if self.op is not operator.iadd: 

104 kwargs['op'] = self.op 

105 return format_invocation(cn, (self.subspec,), kwargs, repr=bbrepr) 

106 

107 

108class Sum(Fold): 

109 """The `Sum` specifier type is used to aggregate integers and other 

110 numericals using addition, much like the :func:`sum()` builtin. 

111 

112 >>> glom(range(5), Sum()) 

113 10 

114 

115 Note that this specifier takes a callable *init* parameter like 

116 its friends, so to change the start value, be sure to wrap it in a 

117 callable:: 

118 

119 >>> glom(range(5), Sum(init=lambda: 5.0)) 

120 15.0 

121 

122 To "sum" lists and other iterables, see the :class:`Flatten` 

123 spec. For other objects, see the :class:`Fold` specifier type. 

124 

125 """ 

126 def __init__(self, subspec=T, init=int): 

127 super(Sum, self).__init__(subspec=subspec, init=init, op=operator.iadd) 

128 

129 def __repr__(self): 

130 cn = self.__class__.__name__ 

131 args = () if self.subspec is T else (self.subspec,) 

132 kwargs = {'init': self.init} if self.init is not int else {} 

133 return format_invocation(cn, args, kwargs, repr=bbrepr) 

134 

135 

136class Count(Fold): 

137 """ 

138 takes a count of how many values occurred 

139 

140 >>> glom([1, 2, 3], Count()) 

141 3 

142 """ 

143 __slots__ = () 

144 

145 def __init__(self): 

146 super(Count, self).__init__( 

147 subspec=T, init=int, op=lambda cur, val: cur + 1) 

148 

149 def __repr__(self): 

150 return '%s()' % self.__class__.__name__ 

151 

152 

153class Flatten(Fold): 

154 """The `Flatten` specifier type is used to combine iterables. By 

155 default it flattens an iterable of iterables into a single list 

156 containing items from all iterables. 

157 

158 >>> target = [[1], [2, 3]] 

159 >>> glom(target, Flatten()) 

160 [1, 2, 3] 

161 

162 You can also set *init* to ``"lazy"``, which returns a generator 

163 instead of a list. Use this to avoid making extra lists and other 

164 collections during intermediate processing steps. 

165 """ 

166 def __init__(self, subspec=T, init=list): 

167 if init == 'lazy': 

168 self.lazy = True 

169 init = list 

170 else: 

171 self.lazy = False 

172 super(Flatten, self).__init__(subspec=subspec, init=init, op=operator.iadd) 

173 

174 def _fold(self, iterator): 

175 if self.lazy: 

176 return itertools.chain.from_iterable(iterator) 

177 return super(Flatten, self)._fold(iterator) 

178 

179 def __repr__(self): 

180 cn = self.__class__.__name__ 

181 args = () if self.subspec is T else (self.subspec,) 

182 kwargs = {} 

183 if self.lazy: 

184 kwargs['init'] = 'lazy' 

185 elif self.init is not list: 

186 kwargs['init'] = self.init 

187 return format_invocation(cn, args, kwargs, repr=bbrepr) 

188 

189 

190def flatten(target, **kwargs): 

191 """At its most basic, ``flatten()`` turns an iterable of iterables 

192 into a single list. But it has a few arguments which give it more 

193 power: 

194 

195 Args: 

196 

197 init (callable): A function or type which gives the initial 

198 value of the return. The value must support addition. Common 

199 values might be :class:`list` (the default), :class:`tuple`, 

200 or even :class:`int`. You can also pass ``init="lazy"`` to 

201 get a generator. 

202 levels (int): A positive integer representing the number of 

203 nested levels to flatten. Defaults to 1. 

204 spec: The glomspec to fetch before flattening. This defaults to the 

205 the root level of the object. 

206 

207 Usage is straightforward. 

208 

209 >>> target = [[1, 2], [3], [4]] 

210 >>> flatten(target) 

211 [1, 2, 3, 4] 

212 

213 Because integers themselves support addition, we actually have two 

214 levels of flattening possible, to get back a single integer sum: 

215 

216 >>> flatten(target, init=int, levels=2) 

217 10 

218 

219 However, flattening a non-iterable like an integer will raise an 

220 exception: 

221 

222 >>> target = 10 

223 >>> flatten(target) 

224 Traceback (most recent call last): 

225 ... 

226 FoldError: can only Flatten on iterable targets, not int type (...) 

227 

228 By default, ``flatten()`` will add a mix of iterables together, 

229 making it a more-robust alternative to the built-in 

230 ``sum(list_of_lists, list())`` trick most experienced Python 

231 programmers are familiar with using: 

232 

233 >>> list_of_iterables = [range(2), [2, 3], (4, 5)] 

234 >>> sum(list_of_iterables, []) 

235 Traceback (most recent call last): 

236 ... 

237 TypeError: can only concatenate list (not "tuple") to list 

238 

239 Whereas flatten() handles this just fine: 

240 

241 >>> flatten(list_of_iterables) 

242 [0, 1, 2, 3, 4, 5] 

243 

244 The ``flatten()`` function is a convenient wrapper around the 

245 :class:`Flatten` specifier type. For embedding in larger specs, 

246 and more involved flattening, see :class:`Flatten` and its base, 

247 :class:`Fold`. 

248 

249 """ 

250 subspec = kwargs.pop('spec', T) 

251 init = kwargs.pop('init', list) 

252 levels = kwargs.pop('levels', 1) 

253 if kwargs: 

254 raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys())) 

255 

256 if levels == 0: 

257 return target 

258 if levels < 0: 

259 raise ValueError('expected levels >= 0, not %r' % levels) 

260 spec = (subspec,) 

261 spec += (Flatten(init="lazy"),) * (levels - 1) 

262 spec += (Flatten(init=init),) 

263 

264 return glom(target, spec) 

265 

266 

267class Merge(Fold): 

268 """By default, Merge turns an iterable of mappings into a single, 

269 merged :class:`dict`, leveraging the behavior of the 

270 :meth:`~dict.update` method. The start state can be customized 

271 with *init*, as well as the update operation, with *op*. 

272 

273 Args: 

274 subspec: The location of the iterable of mappings. Defaults to ``T``. 

275 init (callable): A type or callable which returns a base 

276 instance into which all other values will be merged. 

277 op (callable): A callable, which takes two arguments, and 

278 performs a merge of the second into the first. Can also be 

279 the string name of a method to fetch on the instance created 

280 from *init*. Defaults to ``"update"``. 

281 

282 .. note:: 

283 

284 Besides the differing defaults, the primary difference between 

285 :class:`Merge` and other :class:`Fold` subtypes is that its 

286 *op* argument is assumed to be a two-argument function which 

287 has no return value and modifies the left parameter 

288 in-place. Because the initial state is a new object created with 

289 the *init* parameter, none of the target values are modified. 

290 

291 """ 

292 def __init__(self, subspec=T, init=dict, op=None): 

293 if op is None: 

294 op = 'update' 

295 if isinstance(op, basestring): 

296 test_init = init() 

297 op = getattr(type(test_init), op, None) 

298 if not callable(op): 

299 raise ValueError('expected callable "op" arg or an "init" with an .update()' 

300 ' method not %r and %r' % (op, init)) 

301 super(Merge, self).__init__(subspec=subspec, init=init, op=op) 

302 

303 def _fold(self, iterator): 

304 # the difference here is that ret is mutated in-place, the 

305 # variable not being reassigned, as in base Fold. 

306 ret, op = self.init(), self.op 

307 

308 for v in iterator: 

309 op(ret, v) 

310 

311 return ret 

312 

313 

314 def _agg(self, target, tree): 

315 if self not in tree: 

316 acc = tree[self] = self.init() 

317 else: 

318 acc = tree[self] 

319 self.op(acc, target) 

320 return acc 

321 

322 

323def merge(target, **kwargs): 

324 """By default, ``merge()`` turns an iterable of mappings into a 

325 single, merged :class:`dict`, leveraging the behavior of the 

326 :meth:`~dict.update` method. A new mapping is created and none of 

327 the passed mappings are modified. 

328 

329 >>> target = [{'a': 'alpha'}, {'b': 'B'}, {'a': 'A'}] 

330 >>> res = merge(target) 

331 >>> pprint(res) 

332 {'a': 'A', 'b': 'B'} 

333 

334 Args: 

335 target: The list of dicts, or some other iterable of mappings. 

336 

337 The start state can be customized with the *init* keyword 

338 argument, as well as the update operation, with the *op* keyword 

339 argument. For more on those customizations, see the :class:`Merge` 

340 spec. 

341 

342 """ 

343 subspec = kwargs.pop('spec', T) 

344 init = kwargs.pop('init', dict) 

345 op = kwargs.pop('op', None) 

346 if kwargs: 

347 raise TypeError('unexpected keyword args: %r' % sorted(kwargs.keys())) 

348 spec = Merge(subspec, init, op) 

349 return glom(target, spec)