1# Copyright (c) 2013, Mahmoud Hashemi
2#
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted provided that the following conditions are
5# met:
6#
7# * Redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer.
9#
10# * Redistributions in binary form must reproduce the above
11# copyright notice, this list of conditions and the following
12# disclaimer in the documentation and/or other materials provided
13# with the distribution.
14#
15# * The names of the contributors may not be used to endorse or
16# promote products derived from this software without specific
17# prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31""":mod:`itertools` is full of great examples of Python generator
32usage. However, there are still some critical gaps. ``iterutils``
33fills many of those gaps with featureful, tested, and Pythonic
34solutions.
35
36Many of the functions below have two versions, one which
37returns an iterator (denoted by the ``*_iter`` naming pattern), and a
38shorter-named convenience form that returns a list. Some of the
39following are based on examples in itertools docs.
40"""
41
42import os
43import math
44import time
45import codecs
46import random
47import itertools
48from itertools import zip_longest
49from collections.abc import Mapping, Sequence, Set, ItemsView, Iterable
50
51
52try:
53 from .typeutils import make_sentinel
54 _UNSET = make_sentinel('_UNSET')
55 _REMAP_EXIT = make_sentinel('_REMAP_EXIT')
56except ImportError:
57 _REMAP_EXIT = object()
58 _UNSET = object()
59
60
61def is_iterable(obj):
62 """Similar in nature to :func:`callable`, ``is_iterable`` returns
63 ``True`` if an object is `iterable`_, ``False`` if not.
64
65 >>> is_iterable([])
66 True
67 >>> is_iterable(object())
68 False
69
70 .. _iterable: https://docs.python.org/2/glossary.html#term-iterable
71 """
72 try:
73 iter(obj)
74 except TypeError:
75 return False
76 return True
77
78
79def is_scalar(obj):
80 """A near-mirror of :func:`is_iterable`. Returns ``False`` if an
81 object is an iterable container type. Strings are considered
82 scalar as well, because strings are more often treated as whole
83 values as opposed to iterables of 1-character substrings.
84
85 >>> is_scalar(object())
86 True
87 >>> is_scalar(range(10))
88 False
89 >>> is_scalar('hello')
90 True
91 """
92 return not is_iterable(obj) or isinstance(obj, (str, bytes))
93
94
95def is_collection(obj):
96 """The opposite of :func:`is_scalar`. Returns ``True`` if an object
97 is an iterable other than a string.
98
99 >>> is_collection(object())
100 False
101 >>> is_collection(range(10))
102 True
103 >>> is_collection('hello')
104 False
105 """
106 return is_iterable(obj) and not isinstance(obj, (str, bytes))
107
108
109def split(src, sep=None, maxsplit=None):
110 """Splits an iterable based on a separator. Like :meth:`str.split`,
111 but for all iterables. Returns a list of lists.
112
113 >>> split(['hi', 'hello', None, None, 'sup', None, 'soap', None])
114 [['hi', 'hello'], ['sup'], ['soap']]
115
116 See :func:`split_iter` docs for more info.
117 """
118 return list(split_iter(src, sep, maxsplit))
119
120
121def split_iter(src, sep=None, maxsplit=None):
122 """Splits an iterable based on a separator, *sep*, a max of
123 *maxsplit* times (no max by default). *sep* can be:
124
125 * a single value
126 * an iterable of separators
127 * a single-argument callable that returns True when a separator is
128 encountered
129
130 ``split_iter()`` yields lists of non-separator values. A separator will
131 never appear in the output.
132
133 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None]))
134 [['hi', 'hello'], ['sup'], ['soap']]
135
136 Note that ``split_iter`` is based on :func:`str.split`, so if
137 *sep* is ``None``, ``split()`` **groups** separators. If empty lists
138 are desired between two contiguous ``None`` values, simply use
139 ``sep=[None]``:
140
141 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None]))
142 [['hi', 'hello'], ['sup']]
143 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None], sep=[None]))
144 [['hi', 'hello'], [], ['sup'], []]
145
146 Using a callable separator:
147
148 >>> falsy_sep = lambda x: not x
149 >>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep))
150 [['hi', 'hello'], [], ['sup'], []]
151
152 See :func:`split` for a list-returning version.
153
154 """
155 if not is_iterable(src):
156 raise TypeError('expected an iterable')
157
158 if maxsplit is not None:
159 maxsplit = int(maxsplit)
160 if maxsplit == 0:
161 yield [src]
162 return
163
164 if callable(sep):
165 sep_func = sep
166 elif not is_scalar(sep):
167 sep = frozenset(sep)
168 sep_func = lambda x: x in sep
169 else:
170 sep_func = lambda x: x == sep
171
172 cur_group = []
173 split_count = 0
174 for s in src:
175 if maxsplit is not None and split_count >= maxsplit:
176 sep_func = lambda x: False
177 if sep_func(s):
178 if sep is None and not cur_group:
179 # If sep is none, str.split() "groups" separators
180 # check the str.split() docs for more info
181 continue
182 split_count += 1
183 yield cur_group
184 cur_group = []
185 else:
186 cur_group.append(s)
187
188 if cur_group or sep is not None:
189 yield cur_group
190 return
191
192
193def lstrip(iterable, strip_value=None):
194 """Strips values from the beginning of an iterable. Stripped items will
195 match the value of the argument strip_value. Functionality is analogous
196 to that of the method str.lstrip. Returns a list.
197
198 >>> lstrip(['Foo', 'Bar', 'Bam'], 'Foo')
199 ['Bar', 'Bam']
200
201 """
202 return list(lstrip_iter(iterable, strip_value))
203
204
205def lstrip_iter(iterable, strip_value=None):
206 """Strips values from the beginning of an iterable. Stripped items will
207 match the value of the argument strip_value. Functionality is analogous
208 to that of the method str.lstrip. Returns a generator.
209
210 >>> list(lstrip_iter(['Foo', 'Bar', 'Bam'], 'Foo'))
211 ['Bar', 'Bam']
212
213 """
214 iterator = iter(iterable)
215 for i in iterator:
216 if i != strip_value:
217 yield i
218 break
219 for i in iterator:
220 yield i
221
222
223def rstrip(iterable, strip_value=None):
224 """Strips values from the end of an iterable. Stripped items will
225 match the value of the argument strip_value. Functionality is analogous
226 to that of the method str.rstrip. Returns a list.
227
228 >>> rstrip(['Foo', 'Bar', 'Bam'], 'Bam')
229 ['Foo', 'Bar']
230
231 """
232 return list(rstrip_iter(iterable,strip_value))
233
234
235def rstrip_iter(iterable, strip_value=None):
236 """Strips values from the end of an iterable. Stripped items will
237 match the value of the argument strip_value. Functionality is analogous
238 to that of the method str.rstrip. Returns a generator.
239
240 >>> list(rstrip_iter(['Foo', 'Bar', 'Bam'], 'Bam'))
241 ['Foo', 'Bar']
242
243 """
244 iterator = iter(iterable)
245 for i in iterator:
246 if i == strip_value:
247 cache = list()
248 cache.append(i)
249 broken = False
250 for i in iterator:
251 if i == strip_value:
252 cache.append(i)
253 else:
254 broken = True
255 break
256 if not broken: # Return to caller here because the end of the
257 return # iterator has been reached
258 yield from cache
259 yield i
260
261
262def strip(iterable, strip_value=None):
263 """Strips values from the beginning and end of an iterable. Stripped items
264 will match the value of the argument strip_value. Functionality is
265 analogous to that of the method str.strip. Returns a list.
266
267 >>> strip(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu')
268 ['Foo', 'Bar', 'Bam']
269
270 """
271 return list(strip_iter(iterable,strip_value))
272
273
274def strip_iter(iterable,strip_value=None):
275 """Strips values from the beginning and end of an iterable. Stripped items
276 will match the value of the argument strip_value. Functionality is
277 analogous to that of the method str.strip. Returns a generator.
278
279 >>> list(strip_iter(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu'))
280 ['Foo', 'Bar', 'Bam']
281
282 """
283 return rstrip_iter(lstrip_iter(iterable,strip_value),strip_value)
284
285
286def chunked(src, size, count=None, **kw):
287 """Returns a list of *count* chunks, each with *size* elements,
288 generated from iterable *src*. If *src* is not evenly divisible by
289 *size*, the final chunk will have fewer than *size* elements.
290 Provide the *fill* keyword argument to provide a pad value and
291 enable padding, otherwise no padding will take place.
292
293 >>> chunked(range(10), 3)
294 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
295 >>> chunked(range(10), 3, fill=None)
296 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]
297 >>> chunked(range(10), 3, count=2)
298 [[0, 1, 2], [3, 4, 5]]
299
300 See :func:`chunked_iter` for more info.
301 """
302 chunk_iter = chunked_iter(src, size, **kw)
303 if count is None:
304 return list(chunk_iter)
305 else:
306 return list(itertools.islice(chunk_iter, count))
307
308
309def _validate_positive_int(value, name, strictly_positive=True):
310 value = int(value)
311 if value < 0 or (strictly_positive and value == 0):
312 raise ValueError('expected a positive integer ' + name)
313 return value
314
315
316def chunked_iter(src, size, **kw):
317 """Generates *size*-sized chunks from *src* iterable. Unless the
318 optional *fill* keyword argument is provided, iterables not evenly
319 divisible by *size* will have a final chunk that is smaller than
320 *size*.
321
322 >>> list(chunked_iter(range(10), 3))
323 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
324 >>> list(chunked_iter(range(10), 3, fill=None))
325 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]
326
327 Note that ``fill=None`` in fact uses ``None`` as the fill value.
328 """
329 # TODO: add count kwarg?
330 if not is_iterable(src):
331 raise TypeError('expected an iterable')
332 size = _validate_positive_int(size, 'chunk size')
333 do_fill = True
334 try:
335 fill_val = kw.pop('fill')
336 except KeyError:
337 do_fill = False
338 fill_val = None
339 if kw:
340 raise ValueError('got unexpected keyword arguments: %r' % kw.keys())
341 if not src:
342 return
343 postprocess = lambda chk: chk
344 if isinstance(src, (str, bytes)):
345 postprocess = lambda chk, _sep=type(src)(): _sep.join(chk)
346 if isinstance(src, bytes):
347 postprocess = lambda chk: bytes(chk)
348 src_iter = iter(src)
349 while True:
350 cur_chunk = list(itertools.islice(src_iter, size))
351 if not cur_chunk:
352 break
353 lc = len(cur_chunk)
354 if lc < size and do_fill:
355 cur_chunk[lc:] = [fill_val] * (size - lc)
356 yield postprocess(cur_chunk)
357 return
358
359
360def chunk_ranges(input_size, chunk_size, input_offset=0, overlap_size=0, align=False):
361 """Generates *chunk_size*-sized chunk ranges for an input with length *input_size*.
362 Optionally, a start of the input can be set via *input_offset*, and
363 and overlap between the chunks may be specified via *overlap_size*.
364 Also, if *align* is set to *True*, any items with *i % (chunk_size-overlap_size) == 0*
365 are always at the beginning of the chunk.
366
367 Returns an iterator of (start, end) tuples, one tuple per chunk.
368
369 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5))
370 [(10, 15), (15, 20)]
371 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=1))
372 [(10, 15), (14, 19), (18, 20)]
373 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=2))
374 [(10, 15), (13, 18), (16, 20)]
375
376 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=False))
377 [(4, 9), (9, 14), (14, 19)]
378 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=True))
379 [(4, 5), (5, 10), (10, 15), (15, 19)]
380
381 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=False))
382 [(2, 7), (6, 11), (10, 15), (14, 17)]
383 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=True))
384 [(2, 5), (4, 9), (8, 13), (12, 17)]
385 >>> list(chunk_ranges(input_offset=3, input_size=15, chunk_size=5, overlap_size=1, align=True))
386 [(3, 5), (4, 9), (8, 13), (12, 17), (16, 18)]
387 """
388 input_size = _validate_positive_int(input_size, 'input_size', strictly_positive=False)
389 chunk_size = _validate_positive_int(chunk_size, 'chunk_size')
390 input_offset = _validate_positive_int(input_offset, 'input_offset', strictly_positive=False)
391 overlap_size = _validate_positive_int(overlap_size, 'overlap_size', strictly_positive=False)
392
393 input_stop = input_offset + input_size
394
395 if align:
396 initial_chunk_len = chunk_size - input_offset % (chunk_size - overlap_size)
397 if initial_chunk_len != overlap_size:
398 yield (input_offset, min(input_offset + initial_chunk_len, input_stop))
399 if input_offset + initial_chunk_len >= input_stop:
400 return
401 input_offset = input_offset + initial_chunk_len - overlap_size
402
403 for i in range(input_offset, input_stop, chunk_size - overlap_size):
404 yield (i, min(i + chunk_size, input_stop))
405
406 if i + chunk_size >= input_stop:
407 return
408
409
410def pairwise(src, end=_UNSET):
411 """Convenience function for calling :func:`windowed` on *src*, with
412 *size* set to 2.
413
414 >>> pairwise(range(5))
415 [(0, 1), (1, 2), (2, 3), (3, 4)]
416 >>> pairwise([])
417 []
418
419 Unless *end* is set, the number of pairs is always one less than
420 the number of elements in the iterable passed in, except on an empty input,
421 which will return an empty list.
422
423 With *end* set, a number of pairs equal to the length of *src* is returned,
424 with the last item of the last pair being equal to *end*.
425
426 >>> list(pairwise(range(3), end=None))
427 [(0, 1), (1, 2), (2, None)]
428
429 This way, *end* values can be useful as sentinels to signal the end of the iterable.
430 """
431 return windowed(src, 2, fill=end)
432
433
434def pairwise_iter(src, end=_UNSET):
435 """Convenience function for calling :func:`windowed_iter` on *src*,
436 with *size* set to 2.
437
438 >>> list(pairwise_iter(range(5)))
439 [(0, 1), (1, 2), (2, 3), (3, 4)]
440 >>> list(pairwise_iter([]))
441 []
442
443 Unless *end* is set, the number of pairs is always one less
444 than the number of elements in the iterable passed in,
445 or zero, when *src* is empty.
446
447 With *end* set, a number of pairs equal to the length of *src* is returned,
448 with the last item of the last pair being equal to *end*.
449
450 >>> list(pairwise_iter(range(3), end=None))
451 [(0, 1), (1, 2), (2, None)]
452
453 This way, *end* values can be useful as sentinels to signal the end
454 of the iterable. For infinite iterators, setting *end* has no effect.
455 """
456 return windowed_iter(src, 2, fill=end)
457
458
459def windowed(src, size, fill=_UNSET):
460 """Returns tuples with exactly length *size*. If *fill* is unset
461 and the iterable is too short to make a window of length *size*,
462 no tuples are returned. See :func:`windowed_iter` for more.
463 """
464 return list(windowed_iter(src, size, fill=fill))
465
466
467def windowed_iter(src, size, fill=_UNSET):
468 """Returns tuples with length *size* which represent a sliding
469 window over iterable *src*.
470
471 >>> list(windowed_iter(range(7), 3))
472 [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)]
473
474 If *fill* is unset, and the iterable is too short to make a window
475 of length *size*, then no window tuples are returned.
476
477 >>> list(windowed_iter(range(3), 5))
478 []
479
480 With *fill* set, the iterator always yields a number of windows
481 equal to the length of the *src* iterable.
482
483 >>> windowed(range(4), 3, fill=None)
484 [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]
485
486 This way, *fill* values can be useful to signal the end of the iterable.
487 For infinite iterators, setting *fill* has no effect.
488 """
489 tees = itertools.tee(src, size)
490 if fill is _UNSET:
491 try:
492 for i, t in enumerate(tees):
493 for _ in range(i):
494 next(t)
495 except StopIteration:
496 return zip([])
497 return zip(*tees)
498
499 for i, t in enumerate(tees):
500 for _ in range(i):
501 try:
502 next(t)
503 except StopIteration:
504 continue
505 return zip_longest(*tees, fillvalue=fill)
506
507
508
509def xfrange(stop, start=None, step=1.0):
510 """Same as :func:`frange`, but generator-based instead of returning a
511 list.
512
513 >>> tuple(xfrange(1, 3, step=0.75))
514 (1.0, 1.75, 2.5)
515
516 See :func:`frange` for more details.
517 """
518 if not step:
519 raise ValueError('step must be non-zero')
520 if start is None:
521 start, stop = 0.0, stop * 1.0
522 else:
523 # swap when all args are used
524 stop, start = start * 1.0, stop * 1.0
525 cur = start
526 while cur < stop:
527 yield cur
528 cur += step
529
530
531def frange(stop, start=None, step=1.0):
532 """A :func:`range` clone for float-based ranges.
533
534 >>> frange(5)
535 [0.0, 1.0, 2.0, 3.0, 4.0]
536 >>> frange(6, step=1.25)
537 [0.0, 1.25, 2.5, 3.75, 5.0]
538 >>> frange(100.5, 101.5, 0.25)
539 [100.5, 100.75, 101.0, 101.25]
540 >>> frange(5, 0)
541 []
542 >>> frange(5, 0, step=-1.25)
543 [5.0, 3.75, 2.5, 1.25]
544 """
545 if not step:
546 raise ValueError('step must be non-zero')
547 if start is None:
548 start, stop = 0.0, stop * 1.0
549 else:
550 # swap when all args are used
551 stop, start = start * 1.0, stop * 1.0
552 count = int(math.ceil((stop - start) / step))
553 ret = [None] * count
554 if not ret:
555 return ret
556 ret[0] = start
557 for i in range(1, count):
558 ret[i] = ret[i - 1] + step
559 return ret
560
561
562def backoff(start, stop, count=None, factor=2.0, jitter=False):
563 """Returns a list of geometrically-increasing floating-point numbers,
564 suitable for usage with `exponential backoff`_. Exactly like
565 :func:`backoff_iter`, but without the ``'repeat'`` option for
566 *count*. See :func:`backoff_iter` for more details.
567
568 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff
569
570 >>> backoff(1, 10)
571 [1.0, 2.0, 4.0, 8.0, 10.0]
572 """
573 if count == 'repeat':
574 raise ValueError("'repeat' supported in backoff_iter, not backoff")
575 return list(backoff_iter(start, stop, count=count,
576 factor=factor, jitter=jitter))
577
578
579def backoff_iter(start, stop, count=None, factor=2.0, jitter=False):
580 """Generates a sequence of geometrically-increasing floats, suitable
581 for usage with `exponential backoff`_. Starts with *start*,
582 increasing by *factor* until *stop* is reached, optionally
583 stopping iteration once *count* numbers are yielded. *factor*
584 defaults to 2. In general retrying with properly-configured
585 backoff creates a better-behaved component for a larger service
586 ecosystem.
587
588 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff
589
590 >>> list(backoff_iter(1.0, 10.0, count=5))
591 [1.0, 2.0, 4.0, 8.0, 10.0]
592 >>> list(backoff_iter(1.0, 10.0, count=8))
593 [1.0, 2.0, 4.0, 8.0, 10.0, 10.0, 10.0, 10.0]
594 >>> list(backoff_iter(0.25, 100.0, factor=10))
595 [0.25, 2.5, 25.0, 100.0]
596
597 A simplified usage example:
598
599 .. code-block:: python
600
601 for timeout in backoff_iter(0.25, 5.0):
602 try:
603 res = network_call()
604 break
605 except Exception as e:
606 log(e)
607 time.sleep(timeout)
608
609 An enhancement for large-scale systems would be to add variation,
610 or *jitter*, to timeout values. This is done to avoid a thundering
611 herd on the receiving end of the network call.
612
613 Finally, for *count*, the special value ``'repeat'`` can be passed to
614 continue yielding indefinitely.
615
616 Args:
617
618 start (float): Positive number for baseline.
619 stop (float): Positive number for maximum.
620 count (int): Number of steps before stopping
621 iteration. Defaults to the number of steps between *start* and
622 *stop*. Pass the string, `'repeat'`, to continue iteration
623 indefinitely.
624 factor (float): Rate of exponential increase. Defaults to `2.0`,
625 e.g., `[1, 2, 4, 8, 16]`.
626 jitter (float): A factor between `-1.0` and `1.0`, used to
627 uniformly randomize and thus spread out timeouts in a distributed
628 system, avoiding rhythm effects. Positive values use the base
629 backoff curve as a maximum, negative values use the curve as a
630 minimum. Set to 1.0 or `True` for a jitter approximating
631 Ethernet's time-tested backoff solution. Defaults to `False`.
632
633 """
634 start = float(start)
635 stop = float(stop)
636 factor = float(factor)
637 if start < 0.0:
638 raise ValueError('expected start >= 0, not %r' % start)
639 if factor < 1.0:
640 raise ValueError('expected factor >= 1.0, not %r' % factor)
641 if stop == 0.0:
642 raise ValueError('expected stop >= 0')
643 if stop < start:
644 raise ValueError('expected stop >= start, not %r' % stop)
645 if count is None:
646 denom = start if start else 1
647 count = 1 + math.ceil(math.log(stop/denom, factor))
648 count = count if start else count + 1
649 if count != 'repeat' and count < 0:
650 raise ValueError('count must be positive or "repeat", not %r' % count)
651 if jitter:
652 jitter = float(jitter)
653 if not (-1.0 <= jitter <= 1.0):
654 raise ValueError('expected jitter -1 <= j <= 1, not: %r' % jitter)
655
656 cur, i = start, 0
657 while count == 'repeat' or i < count:
658 if not jitter:
659 cur_ret = cur
660 elif jitter:
661 cur_ret = cur - (cur * jitter * random.random())
662 yield cur_ret
663 i += 1
664 if cur == 0:
665 cur = 1
666 elif cur < stop:
667 cur *= factor
668 if cur > stop:
669 cur = stop
670 return
671
672
673def bucketize(src, key=bool, value_transform=None, key_filter=None):
674 """Group values in the *src* iterable by the value returned by *key*.
675
676 >>> bucketize(range(5))
677 {False: [0], True: [1, 2, 3, 4]}
678 >>> is_odd = lambda x: x % 2 == 1
679 >>> bucketize(range(5), is_odd)
680 {False: [0, 2, 4], True: [1, 3]}
681
682 *key* is :class:`bool` by default, but can either be a callable or a string or a list
683 if it is a string, it is the name of the attribute on which to bucketize objects.
684
685 >>> bucketize([1+1j, 2+2j, 1, 2], key='real')
686 {1.0: [(1+1j), 1], 2.0: [(2+2j), 2]}
687
688 if *key* is a list, it contains the buckets where to put each object
689
690 >>> bucketize([1,2,365,4,98],key=[0,1,2,0,2])
691 {0: [1, 4], 1: [2], 2: [365, 98]}
692
693
694 Value lists are not deduplicated:
695
696 >>> bucketize([None, None, None, 'hello'])
697 {False: [None, None, None], True: ['hello']}
698
699 Bucketize into more than 3 groups
700
701 >>> bucketize(range(10), lambda x: x % 3)
702 {0: [0, 3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]}
703
704 ``bucketize`` has a couple of advanced options useful in certain
705 cases. *value_transform* can be used to modify values as they are
706 added to buckets, and *key_filter* will allow excluding certain
707 buckets from being collected.
708
709 >>> bucketize(range(5), value_transform=lambda x: x*x)
710 {False: [0], True: [1, 4, 9, 16]}
711
712 >>> bucketize(range(10), key=lambda x: x % 3, key_filter=lambda k: k % 3 != 1)
713 {0: [0, 3, 6, 9], 2: [2, 5, 8]}
714
715 Note in some of these examples there were at most two keys, ``True`` and
716 ``False``, and each key present has a list with at least one
717 item. See :func:`partition` for a version specialized for binary
718 use cases.
719
720 """
721 if not is_iterable(src):
722 raise TypeError('expected an iterable')
723 elif isinstance(key, list):
724 if len(key) != len(src):
725 raise ValueError("key and src have to be the same length")
726 src = zip(key, src)
727
728 if isinstance(key, str):
729 key_func = lambda x: getattr(x, key, x)
730 elif callable(key):
731 key_func = key
732 elif isinstance(key, list):
733 key_func = lambda x: x[0]
734 else:
735 raise TypeError('expected key to be callable or a string or a list')
736
737 if value_transform is None:
738 value_transform = lambda x: x
739 if not callable(value_transform):
740 raise TypeError('expected callable value transform function')
741 if isinstance(key, list):
742 f = value_transform
743 value_transform=lambda x: f(x[1])
744
745 ret = {}
746 for val in src:
747 key_of_val = key_func(val)
748 if key_filter is None or key_filter(key_of_val):
749 ret.setdefault(key_of_val, []).append(value_transform(val))
750 return ret
751
752
753def partition(src, key=bool):
754 """No relation to :meth:`str.partition`, ``partition`` is like
755 :func:`bucketize`, but for added convenience returns a tuple of
756 ``(truthy_values, falsy_values)``.
757
758 >>> nonempty, empty = partition(['', '', 'hi', '', 'bye'])
759 >>> nonempty
760 ['hi', 'bye']
761
762 *key* defaults to :class:`bool`, but can be carefully overridden to
763 use either a function that returns either ``True`` or ``False`` or
764 a string name of the attribute on which to partition objects.
765
766 >>> import string
767 >>> is_digit = lambda x: x in string.digits
768 >>> decimal_digits, hexletters = partition(string.hexdigits, is_digit)
769 >>> ''.join(decimal_digits), ''.join(hexletters)
770 ('0123456789', 'abcdefABCDEF')
771 """
772 bucketized = bucketize(src, key)
773 return bucketized.get(True, []), bucketized.get(False, [])
774
775
776def unique(src, key=None):
777 """``unique()`` returns a list of unique values, as determined by
778 *key*, in the order they first appeared in the input iterable,
779 *src*.
780
781 >>> ones_n_zeros = '11010110001010010101010'
782 >>> ''.join(unique(ones_n_zeros))
783 '10'
784
785 See :func:`unique_iter` docs for more details.
786 """
787 return list(unique_iter(src, key))
788
789
790def unique_iter(src, key=None):
791 """Yield unique elements from the iterable, *src*, based on *key*,
792 in the order in which they first appeared in *src*.
793
794 >>> repetitious = [1, 2, 3] * 10
795 >>> list(unique_iter(repetitious))
796 [1, 2, 3]
797
798 By default, *key* is the object itself, but *key* can either be a
799 callable or, for convenience, a string name of the attribute on
800 which to uniqueify objects, falling back on identity when the
801 attribute is not present.
802
803 >>> pleasantries = ['hi', 'hello', 'ok', 'bye', 'yes']
804 >>> list(unique_iter(pleasantries, key=lambda x: len(x)))
805 ['hi', 'hello', 'bye']
806 """
807 if not is_iterable(src):
808 raise TypeError('expected an iterable, not %r' % type(src))
809 if key is None:
810 key_func = lambda x: x
811 elif callable(key):
812 key_func = key
813 elif isinstance(key, str):
814 key_func = lambda x: getattr(x, key, x)
815 else:
816 raise TypeError('"key" expected a string or callable, not %r' % key)
817 seen = set()
818 for i in src:
819 k = key_func(i)
820 if k not in seen:
821 seen.add(k)
822 yield i
823 return
824
825
826def redundant(src, key=None, groups=False):
827 """The complement of :func:`unique()`.
828
829 By default returns non-unique/duplicate values as a list of the
830 *first* redundant value in *src*. Pass ``groups=True`` to get
831 groups of all values with redundancies, ordered by position of the
832 first redundant value. This is useful in conjunction with some
833 normalizing *key* function.
834
835 >>> redundant([1, 2, 3, 4])
836 []
837 >>> redundant([1, 2, 3, 2, 3, 3, 4])
838 [2, 3]
839 >>> redundant([1, 2, 3, 2, 3, 3, 4], groups=True)
840 [[2, 2], [3, 3, 3]]
841
842 An example using a *key* function to do case-insensitive
843 redundancy detection.
844
845 >>> redundant(['hi', 'Hi', 'HI', 'hello'], key=str.lower)
846 ['Hi']
847 >>> redundant(['hi', 'Hi', 'HI', 'hello'], groups=True, key=str.lower)
848 [['hi', 'Hi', 'HI']]
849
850 *key* should also be used when the values in *src* are not hashable.
851
852 .. note::
853
854 This output of this function is designed for reporting
855 duplicates in contexts when a unique input is desired. Due to
856 the grouped return type, there is no streaming equivalent of
857 this function for the time being.
858
859 """
860 if key is None:
861 pass
862 elif callable(key):
863 key_func = key
864 elif isinstance(key, (str, bytes)):
865 key_func = lambda x: getattr(x, key, x)
866 else:
867 raise TypeError('"key" expected a string or callable, not %r' % key)
868 seen = {} # key to first seen item
869 redundant_order = []
870 redundant_groups = {}
871 for i in src:
872 k = key_func(i) if key else i
873 if k not in seen:
874 seen[k] = i
875 else:
876 if k in redundant_groups:
877 if groups:
878 redundant_groups[k].append(i)
879 else:
880 redundant_order.append(k)
881 redundant_groups[k] = [seen[k], i]
882 if not groups:
883 ret = [redundant_groups[k][1] for k in redundant_order]
884 else:
885 ret = [redundant_groups[k] for k in redundant_order]
886 return ret
887
888
889def one(src, default=None, key=None):
890 """Along the same lines as builtins, :func:`all` and :func:`any`, and
891 similar to :func:`first`, ``one()`` returns the single object in
892 the given iterable *src* that evaluates to ``True``, as determined
893 by callable *key*. If unset, *key* defaults to :class:`bool`. If
894 no such objects are found, *default* is returned. If *default* is
895 not passed, ``None`` is returned.
896
897 If *src* has more than one object that evaluates to ``True``, or
898 if there is no object that fulfills such condition, return
899 *default*. It's like an `XOR`_ over an iterable.
900
901 >>> one((True, False, False))
902 True
903 >>> one((True, False, True))
904 >>> one((0, 0, 'a'))
905 'a'
906 >>> one((0, False, None))
907 >>> one((True, True), default=False)
908 False
909 >>> bool(one(('', 1)))
910 True
911 >>> one((10, 20, 30, 42), key=lambda i: i > 40)
912 42
913
914 See `Martín Gaitán's original repo`_ for further use cases.
915
916 .. _Martín Gaitán's original repo: https://github.com/mgaitan/one
917 .. _XOR: https://en.wikipedia.org/wiki/Exclusive_or
918
919 """
920 ones = list(itertools.islice(filter(key, src), 2))
921 return ones[0] if len(ones) == 1 else default
922
923
924def first(iterable, default=None, key=None):
925 """Return first element of *iterable* that evaluates to ``True``, else
926 return ``None`` or optional *default*. Similar to :func:`one`.
927
928 >>> first([0, False, None, [], (), 42])
929 42
930 >>> first([0, False, None, [], ()]) is None
931 True
932 >>> first([0, False, None, [], ()], default='ohai')
933 'ohai'
934 >>> import re
935 >>> m = first(re.match(regex, 'abc') for regex in ['b.*', 'a(.*)'])
936 >>> m.group(1)
937 'bc'
938
939 The optional *key* argument specifies a one-argument predicate function
940 like that used for *filter()*. The *key* argument, if supplied, should be
941 in keyword form. For example, finding the first even number in an iterable:
942
943 >>> first([1, 1, 3, 4, 5], key=lambda x: x % 2 == 0)
944 4
945
946 Contributed by Hynek Schlawack, author of `the original standalone module`_.
947
948 .. _the original standalone module: https://github.com/hynek/first
949 """
950 return next(filter(key, iterable), default)
951
952
953def flatten_iter(iterable):
954 """``flatten_iter()`` yields all the elements from *iterable* while
955 collapsing any nested iterables.
956
957 >>> nested = [[1, 2], [[3], [4, 5]]]
958 >>> list(flatten_iter(nested))
959 [1, 2, 3, 4, 5]
960 """
961 for item in iterable:
962 if isinstance(item, Iterable) and not isinstance(item, (str, bytes)):
963 yield from flatten_iter(item)
964 else:
965 yield item
966
967def flatten(iterable):
968 """``flatten()`` returns a collapsed list of all the elements from
969 *iterable* while collapsing any nested iterables.
970
971 >>> nested = [[1, 2], [[3], [4, 5]]]
972 >>> flatten(nested)
973 [1, 2, 3, 4, 5]
974 """
975 return list(flatten_iter(iterable))
976
977
978def same(iterable, ref=_UNSET):
979 """``same()`` returns ``True`` when all values in *iterable* are
980 equal to one another, or optionally a reference value,
981 *ref*. Similar to :func:`all` and :func:`any` in that it evaluates
982 an iterable and returns a :class:`bool`. ``same()`` returns
983 ``True`` for empty iterables.
984
985 >>> same([])
986 True
987 >>> same([1])
988 True
989 >>> same(['a', 'a', 'a'])
990 True
991 >>> same(range(20))
992 False
993 >>> same([[], []])
994 True
995 >>> same([[], []], ref='test')
996 False
997
998 """
999 iterator = iter(iterable)
1000 if ref is _UNSET:
1001 ref = next(iterator, ref)
1002 return all(val == ref for val in iterator)
1003
1004
1005def default_visit(path, key, value):
1006 # print('visit(%r, %r, %r)' % (path, key, value))
1007 return key, value
1008
1009# enable the extreme: monkeypatching iterutils with a different default_visit
1010_orig_default_visit = default_visit
1011
1012
1013def default_enter(path, key, value):
1014 # print('enter(%r, %r)' % (key, value))
1015 if isinstance(value, (str, bytes)):
1016 return value, False
1017 elif isinstance(value, Mapping):
1018 return value.__class__(), ItemsView(value)
1019 elif isinstance(value, Sequence):
1020 return value.__class__(), enumerate(value)
1021 elif isinstance(value, Set):
1022 return value.__class__(), enumerate(value)
1023 else:
1024 # files, strings, other iterables, and scalars are not
1025 # traversed
1026 return value, False
1027
1028
1029def default_exit(path, key, old_parent, new_parent, new_items):
1030 # print('exit(%r, %r, %r, %r, %r)'
1031 # % (path, key, old_parent, new_parent, new_items))
1032 ret = new_parent
1033 if isinstance(new_parent, Mapping):
1034 new_parent.update(new_items)
1035 elif isinstance(new_parent, Sequence):
1036 vals = [v for i, v in new_items]
1037 try:
1038 new_parent.extend(vals)
1039 except AttributeError:
1040 ret = new_parent.__class__(vals) # tuples
1041 elif isinstance(new_parent, Set):
1042 vals = [v for i, v in new_items]
1043 try:
1044 new_parent.update(vals)
1045 except AttributeError:
1046 ret = new_parent.__class__(vals) # frozensets
1047 else:
1048 raise RuntimeError('unexpected iterable type: %r' % type(new_parent))
1049 return ret
1050
1051
1052def remap(root, visit=default_visit, enter=default_enter, exit=default_exit,
1053 **kwargs):
1054 """The remap ("recursive map") function is used to traverse and
1055 transform nested structures. Lists, tuples, sets, and dictionaries
1056 are just a few of the data structures nested into heterogeneous
1057 tree-like structures that are so common in programming.
1058 Unfortunately, Python's built-in ways to manipulate collections
1059 are almost all flat. List comprehensions may be fast and succinct,
1060 but they do not recurse, making it tedious to apply quick changes
1061 or complex transforms to real-world data.
1062
1063 remap goes where list comprehensions cannot.
1064
1065 Here's an example of removing all Nones from some data:
1066
1067 >>> from pprint import pprint
1068 >>> reviews = {'Star Trek': {'TNG': 10, 'DS9': 8.5, 'ENT': None},
1069 ... 'Babylon 5': 6, 'Dr. Who': None}
1070 >>> pprint(remap(reviews, lambda p, k, v: v is not None))
1071 {'Babylon 5': 6, 'Star Trek': {'DS9': 8.5, 'TNG': 10}}
1072
1073 Notice how both Nones have been removed despite the nesting in the
1074 dictionary. Not bad for a one-liner, and that's just the beginning.
1075 See `this remap cookbook`_ for more delicious recipes.
1076
1077 .. _this remap cookbook: http://sedimental.org/remap.html
1078
1079 remap takes four main arguments: the object to traverse and three
1080 optional callables which determine how the remapped object will be
1081 created.
1082
1083 Args:
1084
1085 root: The target object to traverse. By default, remap
1086 supports iterables like :class:`list`, :class:`tuple`,
1087 :class:`dict`, and :class:`set`, but any object traversable by
1088 *enter* will work.
1089 visit (callable): This function is called on every item in
1090 *root*. It must accept three positional arguments, *path*,
1091 *key*, and *value*. *path* is simply a tuple of parents'
1092 keys. *visit* should return the new key-value pair. It may
1093 also return ``True`` as shorthand to keep the old item
1094 unmodified, or ``False`` to drop the item from the new
1095 structure. *visit* is called after *enter*, on the new parent.
1096
1097 The *visit* function is called for every item in root,
1098 including duplicate items. For traversable values, it is
1099 called on the new parent object, after all its children
1100 have been visited. The default visit behavior simply
1101 returns the key-value pair unmodified.
1102 enter (callable): This function controls which items in *root*
1103 are traversed. It accepts the same arguments as *visit*: the
1104 path, the key, and the value of the current item. It returns a
1105 pair of the blank new parent, and an iterator over the items
1106 which should be visited. If ``False`` is returned instead of
1107 an iterator, the value will not be traversed.
1108
1109 The *enter* function is only called once per unique value. The
1110 default enter behavior support mappings, sequences, and
1111 sets. Strings and all other iterables will not be traversed.
1112 exit (callable): This function determines how to handle items
1113 once they have been visited. It gets the same three
1114 arguments as the other functions -- *path*, *key*, *value*
1115 -- plus two more: the blank new parent object returned
1116 from *enter*, and a list of the new items, as remapped by
1117 *visit*.
1118
1119 Like *enter*, the *exit* function is only called once per
1120 unique value. The default exit behavior is to simply add
1121 all new items to the new parent, e.g., using
1122 :meth:`list.extend` and :meth:`dict.update` to add to the
1123 new parent. Immutable objects, such as a :class:`tuple` or
1124 :class:`namedtuple`, must be recreated from scratch, but
1125 use the same type as the new parent passed back from the
1126 *enter* function.
1127 reraise_visit (bool): A pragmatic convenience for the *visit*
1128 callable. When set to ``False``, remap ignores any errors
1129 raised by the *visit* callback. Items causing exceptions
1130 are kept. See examples for more details.
1131
1132 remap is designed to cover the majority of cases with just the
1133 *visit* callable. While passing in multiple callables is very
1134 empowering, remap is designed so very few cases should require
1135 passing more than one function.
1136
1137 When passing *enter* and *exit*, it's common and easiest to build
1138 on the default behavior. Simply add ``from boltons.iterutils import
1139 default_enter`` (or ``default_exit``), and have your enter/exit
1140 function call the default behavior before or after your custom
1141 logic. See `this example`_.
1142
1143 Duplicate and self-referential objects (aka reference loops) are
1144 automatically handled internally, `as shown here`_.
1145
1146 .. _this example: http://sedimental.org/remap.html#sort_all_lists
1147 .. _as shown here: http://sedimental.org/remap.html#corner_cases
1148
1149 """
1150 # TODO: improve argument formatting in sphinx doc
1151 # TODO: enter() return (False, items) to continue traverse but cancel copy?
1152 if not callable(visit):
1153 raise TypeError('visit expected callable, not: %r' % visit)
1154 if not callable(enter):
1155 raise TypeError('enter expected callable, not: %r' % enter)
1156 if not callable(exit):
1157 raise TypeError('exit expected callable, not: %r' % exit)
1158 reraise_visit = kwargs.pop('reraise_visit', True)
1159 if kwargs:
1160 raise TypeError('unexpected keyword arguments: %r' % kwargs.keys())
1161
1162 path, registry, stack = (), {}, [(None, root)]
1163 new_items_stack = []
1164 while stack:
1165 key, value = stack.pop()
1166 id_value = id(value)
1167 if key is _REMAP_EXIT:
1168 key, new_parent, old_parent = value
1169 id_value = id(old_parent)
1170 path, new_items = new_items_stack.pop()
1171 value = exit(path, key, old_parent, new_parent, new_items)
1172 registry[id_value] = value
1173 if not new_items_stack:
1174 continue
1175 elif id_value in registry:
1176 value = registry[id_value]
1177 else:
1178 res = enter(path, key, value)
1179 try:
1180 new_parent, new_items = res
1181 except TypeError:
1182 # TODO: handle False?
1183 raise TypeError('enter should return a tuple of (new_parent,'
1184 ' items_iterator), not: %r' % res)
1185 if new_items is not False:
1186 # traverse unless False is explicitly passed
1187 registry[id_value] = new_parent
1188 new_items_stack.append((path, []))
1189 if value is not root:
1190 path += (key,)
1191 stack.append((_REMAP_EXIT, (key, new_parent, value)))
1192 if new_items:
1193 stack.extend(reversed(list(new_items)))
1194 continue
1195 if visit is _orig_default_visit:
1196 # avoid function call overhead by inlining identity operation
1197 visited_item = (key, value)
1198 else:
1199 try:
1200 visited_item = visit(path, key, value)
1201 except Exception:
1202 if reraise_visit:
1203 raise
1204 visited_item = True
1205 if visited_item is False:
1206 continue # drop
1207 elif visited_item is True:
1208 visited_item = (key, value)
1209 # TODO: typecheck?
1210 # raise TypeError('expected (key, value) from visit(),'
1211 # ' not: %r' % visited_item)
1212 try:
1213 new_items_stack[-1][1].append(visited_item)
1214 except IndexError:
1215 raise TypeError('expected remappable root, not: %r' % root)
1216 return value
1217
1218
1219class PathAccessError(KeyError, IndexError, TypeError):
1220 """An amalgamation of KeyError, IndexError, and TypeError,
1221 representing what can occur when looking up a path in a nested
1222 object.
1223 """
1224 def __init__(self, exc, seg, path):
1225 self.exc = exc
1226 self.seg = seg
1227 self.path = path
1228
1229 def __repr__(self):
1230 cn = self.__class__.__name__
1231 return f'{cn}({self.exc!r}, {self.seg!r}, {self.path!r})'
1232
1233 def __str__(self):
1234 return ('could not access %r from path %r, got error: %r'
1235 % (self.seg, self.path, self.exc))
1236
1237
1238def get_path(root, path, default=_UNSET):
1239 """Retrieve a value from a nested object via a tuple representing the
1240 lookup path.
1241
1242 >>> root = {'a': {'b': {'c': [[1], [2], [3]]}}}
1243 >>> get_path(root, ('a', 'b', 'c', 2, 0))
1244 3
1245
1246 The path tuple format is intentionally consistent with that of
1247 :func:`remap`, but a single dotted string can also be passed.
1248
1249 One of get_path's chief aims is improved error messaging. EAFP is
1250 great, but the error messages are not.
1251
1252 For instance, ``root['a']['b']['c'][2][1]`` gives back
1253 ``IndexError: list index out of range``
1254
1255 What went out of range where? get_path currently raises
1256 ``PathAccessError: could not access 2 from path ('a', 'b', 'c', 2,
1257 1), got error: IndexError('list index out of range',)``, a
1258 subclass of IndexError and KeyError.
1259
1260 You can also pass a default that covers the entire operation,
1261 should the lookup fail at any level.
1262
1263 Args:
1264 root: The target nesting of dictionaries, lists, or other
1265 objects supporting ``__getitem__``.
1266 path (tuple): A sequence of strings and integers to be successively
1267 looked up within *root*. A dot-separated (``a.b``) string may
1268 also be passed.
1269 default: The value to be returned should any
1270 ``PathAccessError`` exceptions be raised.
1271 """
1272 if isinstance(path, str):
1273 path = path.split('.')
1274 cur = root
1275 try:
1276 for seg in path:
1277 try:
1278 cur = cur[seg]
1279 except (KeyError, IndexError) as exc:
1280 raise PathAccessError(exc, seg, path)
1281 except TypeError as exc:
1282 # either string index in a list, or a parent that
1283 # doesn't support indexing
1284 try:
1285 seg = int(seg)
1286 cur = cur[seg]
1287 except (ValueError, KeyError, IndexError, TypeError):
1288 if not is_iterable(cur):
1289 exc = TypeError('%r object is not indexable'
1290 % type(cur).__name__)
1291 raise PathAccessError(exc, seg, path)
1292 except PathAccessError:
1293 if default is _UNSET:
1294 raise
1295 return default
1296 return cur
1297
1298
1299def research(root, query=lambda p, k, v: True, reraise=False):
1300 """The :func:`research` function uses :func:`remap` to recurse over
1301 any data nested in *root*, and find values which match a given
1302 criterion, specified by the *query* callable.
1303
1304 Results are returned as a list of ``(path, value)`` pairs. The
1305 paths are tuples in the same format accepted by
1306 :func:`get_path`. This can be useful for comparing values nested
1307 in two or more different structures.
1308
1309 Here's a simple example that finds all integers:
1310
1311 >>> root = {'a': {'b': 1, 'c': (2, 'd', 3)}, 'e': None}
1312 >>> res = research(root, query=lambda p, k, v: isinstance(v, int))
1313 >>> print(sorted(res))
1314 [(('a', 'b'), 1), (('a', 'c', 0), 2), (('a', 'c', 2), 3)]
1315
1316 Note how *query* follows the same, familiar ``path, key, value``
1317 signature as the ``visit`` and ``enter`` functions on
1318 :func:`remap`, and returns a :class:`bool`.
1319
1320 Args:
1321 root: The target object to search. Supports the same types of
1322 objects as :func:`remap`, including :class:`list`,
1323 :class:`tuple`, :class:`dict`, and :class:`set`.
1324 query (callable): The function called on every object to
1325 determine whether to include it in the search results. The
1326 callable must accept three arguments, *path*, *key*, and
1327 *value*, commonly abbreviated *p*, *k*, and *v*, same as
1328 *enter* and *visit* from :func:`remap`.
1329 reraise (bool): Whether to reraise exceptions raised by *query*
1330 or to simply drop the result that caused the error.
1331
1332
1333 With :func:`research` it's easy to inspect the details of a data
1334 structure, like finding values that are at a certain depth (using
1335 ``len(p)``) and much more. If more advanced functionality is
1336 needed, check out the code and make your own :func:`remap`
1337 wrapper, and consider `submitting a patch`_!
1338
1339 .. _submitting a patch: https://github.com/mahmoud/boltons/pulls
1340 """
1341 ret = []
1342
1343 if not callable(query):
1344 raise TypeError('query expected callable, not: %r' % query)
1345
1346 def enter(path, key, value):
1347 try:
1348 if query(path, key, value):
1349 ret.append((path + (key,), value))
1350 except Exception:
1351 if reraise:
1352 raise
1353 return default_enter(path, key, value)
1354
1355 remap(root, enter=enter)
1356 return ret
1357
1358
1359# TODO: recollect()
1360# TODO: refilter()
1361# TODO: reiter()
1362
1363
1364# GUID iterators: 10x faster and somewhat more compact than uuid.
1365
1366class GUIDerator:
1367 """The GUIDerator is an iterator that yields a globally-unique
1368 identifier (GUID) on every iteration. The GUIDs produced are
1369 hexadecimal strings.
1370
1371 Testing shows it to be around 12x faster than the uuid module. By
1372 default it is also more compact, partly due to its default 96-bit
1373 (24-hexdigit) length. 96 bits of randomness means that there is a
1374 1 in 2 ^ 32 chance of collision after 2 ^ 64 iterations. If more
1375 or less uniqueness is desired, the *size* argument can be adjusted
1376 accordingly.
1377
1378 Args:
1379 size (int): character length of the GUID, defaults to 24. Lengths
1380 between 20 and 36 are considered valid.
1381
1382 The GUIDerator has built-in fork protection that causes it to
1383 detect a fork on next iteration and reseed accordingly.
1384
1385 """
1386 def __init__(self, size=24):
1387 self.size = size
1388 if size < 20 or size > 36:
1389 raise ValueError('expected 20 < size <= 36')
1390 import hashlib
1391 self._sha1 = hashlib.sha1
1392 self.count = itertools.count()
1393 self.reseed()
1394
1395 def reseed(self):
1396 import socket
1397 self.pid = os.getpid()
1398 self.salt = '-'.join([str(self.pid),
1399 socket.gethostname() or '<nohostname>',
1400 str(time.time()),
1401 os.urandom(6).hex()])
1402 return
1403
1404 def __iter__(self):
1405 return self
1406
1407 def __next__(self):
1408 if os.getpid() != self.pid:
1409 self.reseed()
1410 target_bytes = (self.salt + str(next(self.count))).encode('utf8')
1411 hash_text = self._sha1(target_bytes).hexdigest()[:self.size]
1412 return hash_text
1413
1414 next = __next__
1415
1416
1417class SequentialGUIDerator(GUIDerator):
1418 """Much like the standard GUIDerator, the SequentialGUIDerator is an
1419 iterator that yields a globally-unique identifier (GUID) on every
1420 iteration. The GUIDs produced are hexadecimal strings.
1421
1422 The SequentialGUIDerator differs in that it picks a starting GUID
1423 value and increments every iteration. This yields GUIDs which are
1424 of course unique, but also ordered and lexicographically sortable.
1425
1426 The SequentialGUIDerator is around 50% faster than the normal
1427 GUIDerator, making it almost 20x as fast as the built-in uuid
1428 module. By default it is also more compact, partly due to its
1429 96-bit (24-hexdigit) default length. 96 bits of randomness means that
1430 there is a 1 in 2 ^ 32 chance of collision after 2 ^ 64
1431 iterations. If more or less uniqueness is desired, the *size*
1432 argument can be adjusted accordingly.
1433
1434 Args:
1435 size (int): character length of the GUID, defaults to 24.
1436
1437 Note that with SequentialGUIDerator there is a chance of GUIDs
1438 growing larger than the size configured. The SequentialGUIDerator
1439 has built-in fork protection that causes it to detect a fork on
1440 next iteration and reseed accordingly.
1441
1442 """
1443
1444 def reseed(self):
1445 super().reseed()
1446 start_str = self._sha1(self.salt.encode('utf8')).hexdigest()
1447 self.start = int(start_str[:self.size], 16)
1448 self.start |= (1 << ((self.size * 4) - 2))
1449
1450 def __next__(self):
1451 if os.getpid() != self.pid:
1452 self.reseed()
1453 return '%x' % (next(self.count) + self.start)
1454
1455 next = __next__
1456
1457
1458guid_iter = GUIDerator()
1459seq_guid_iter = SequentialGUIDerator()
1460
1461
1462def soft_sorted(iterable, first=None, last=None, key=None, reverse=False):
1463 """For when you care about the order of some elements, but not about
1464 others.
1465
1466 Use this to float to the top and/or sink to the bottom a specific
1467 ordering, while sorting the rest of the elements according to
1468 normal :func:`sorted` rules.
1469
1470 >>> soft_sorted(['two', 'b', 'one', 'a'], first=['one', 'two'])
1471 ['one', 'two', 'a', 'b']
1472 >>> soft_sorted(range(7), first=[6, 15], last=[2, 4], reverse=True)
1473 [6, 5, 3, 1, 0, 2, 4]
1474 >>> import string
1475 >>> ''.join(soft_sorted(string.hexdigits, first='za1', last='b', key=str.lower))
1476 'aA1023456789cCdDeEfFbB'
1477
1478 Args:
1479 iterable (list): A list or other iterable to sort.
1480 first (list): A sequence to enforce for elements which should
1481 appear at the beginning of the returned list.
1482 last (list): A sequence to enforce for elements which should
1483 appear at the end of the returned list.
1484 key (callable): Callable used to generate a comparable key for
1485 each item to be sorted, same as the key in
1486 :func:`sorted`. Note that entries in *first* and *last*
1487 should be the keys for the items. Defaults to
1488 passthrough/the identity function.
1489 reverse (bool): Whether or not elements not explicitly ordered
1490 by *first* and *last* should be in reverse order or not.
1491
1492 Returns a new list in sorted order.
1493 """
1494 first = first or []
1495 last = last or []
1496 key = key or (lambda x: x)
1497 seq = list(iterable)
1498 other = [x for x in seq if not ((first and key(x) in first) or (last and key(x) in last))]
1499 other.sort(key=key, reverse=reverse)
1500
1501 if first:
1502 first = sorted([x for x in seq if key(x) in first], key=lambda x: first.index(key(x)))
1503 if last:
1504 last = sorted([x for x in seq if key(x) in last], key=lambda x: last.index(key(x)))
1505 return first + other + last
1506
1507
1508def untyped_sorted(iterable, key=None, reverse=False):
1509 """A version of :func:`sorted` which will happily sort an iterable of
1510 heterogeneous types and return a new list, similar to legacy Python's
1511 behavior.
1512
1513 >>> untyped_sorted(['abc', 2.0, 1, 2, 'def'])
1514 [1, 2.0, 2, 'abc', 'def']
1515
1516 Note how mutually orderable types are sorted as expected, as in
1517 the case of the integers and floats above.
1518
1519 .. note::
1520
1521 Results may vary across Python versions and builds, but the
1522 function will produce a sorted list, except in the case of
1523 explicitly unorderable objects.
1524
1525 """
1526 class _Wrapper:
1527 slots = ('obj',)
1528
1529 def __init__(self, obj):
1530 self.obj = obj
1531
1532 def __lt__(self, other):
1533 obj = key(self.obj) if key is not None else self.obj
1534 other = key(other.obj) if key is not None else other.obj
1535 try:
1536 ret = obj < other
1537 except TypeError:
1538 ret = ((type(obj).__name__, id(type(obj)), obj)
1539 < (type(other).__name__, id(type(other)), other))
1540 return ret
1541
1542 if key is not None and not callable(key):
1543 raise TypeError('expected function or callable object for key, not: %r'
1544 % key)
1545
1546 return sorted(iterable, key=_Wrapper, reverse=reverse)
1547
1548"""
1549May actually be faster to do an isinstance check for a str path
1550
1551$ python -m timeit -s "x = [1]" "x[0]"
155210000000 loops, best of 3: 0.0207 usec per loop
1553$ python -m timeit -s "x = [1]" "try: x[0] \nexcept: pass"
155410000000 loops, best of 3: 0.029 usec per loop
1555$ python -m timeit -s "x = [1]" "try: x[1] \nexcept: pass"
15561000000 loops, best of 3: 0.315 usec per loop
1557# setting up try/except is fast, only around 0.01us
1558# actually triggering the exception takes almost 10x as long
1559
1560$ python -m timeit -s "x = [1]" "isinstance(x, basestring)"
156110000000 loops, best of 3: 0.141 usec per loop
1562$ python -m timeit -s "x = [1]" "isinstance(x, str)"
156310000000 loops, best of 3: 0.131 usec per loop
1564$ python -m timeit -s "x = [1]" "try: x.split('.')\n except: pass"
15651000000 loops, best of 3: 0.443 usec per loop
1566$ python -m timeit -s "x = [1]" "try: x.split('.') \nexcept AttributeError: pass"
15671000000 loops, best of 3: 0.544 usec per loop
1568"""