Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/boltons/iterutils.py: 17%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1# Copyright (c) 2013, Mahmoud Hashemi
2#
3# Redistribution and use in source and binary forms, with or without
4# modification, are permitted provided that the following conditions are
5# met:
6#
7# * Redistributions of source code must retain the above copyright
8# notice, this list of conditions and the following disclaimer.
9#
10# * Redistributions in binary form must reproduce the above
11# copyright notice, this list of conditions and the following
12# disclaimer in the documentation and/or other materials provided
13# with the distribution.
14#
15# * The names of the contributors may not be used to endorse or
16# promote products derived from this software without specific
17# prior written permission.
18#
19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31""":mod:`itertools` is full of great examples of Python generator
32usage. However, there are still some critical gaps. ``iterutils``
33fills many of those gaps with featureful, tested, and Pythonic
34solutions.
36Many of the functions below have two versions, one which
37returns an iterator (denoted by the ``*_iter`` naming pattern), and a
38shorter-named convenience form that returns a list. Some of the
39following are based on examples in itertools docs.
40"""
42import os
43import math
44import time
45import codecs
46import random
47import itertools
48from itertools import zip_longest
49from collections.abc import Mapping, Sequence, Set, ItemsView, Iterable
52try:
53 from .typeutils import make_sentinel
54 _UNSET = make_sentinel('_UNSET')
55 _REMAP_EXIT = make_sentinel('_REMAP_EXIT')
56except ImportError:
57 _REMAP_EXIT = object()
58 _UNSET = object()
61def is_iterable(obj):
62 """Similar in nature to :func:`callable`, ``is_iterable`` returns
63 ``True`` if an object is `iterable`_, ``False`` if not.
65 >>> is_iterable([])
66 True
67 >>> is_iterable(object())
68 False
70 .. _iterable: https://docs.python.org/2/glossary.html#term-iterable
71 """
72 try:
73 iter(obj)
74 except TypeError:
75 return False
76 return True
79def is_scalar(obj):
80 """A near-mirror of :func:`is_iterable`. Returns ``False`` if an
81 object is an iterable container type. Strings are considered
82 scalar as well, because strings are more often treated as whole
83 values as opposed to iterables of 1-character substrings.
85 >>> is_scalar(object())
86 True
87 >>> is_scalar(range(10))
88 False
89 >>> is_scalar('hello')
90 True
91 """
92 return not is_iterable(obj) or isinstance(obj, (str, bytes))
95def is_collection(obj):
96 """The opposite of :func:`is_scalar`. Returns ``True`` if an object
97 is an iterable other than a string.
99 >>> is_collection(object())
100 False
101 >>> is_collection(range(10))
102 True
103 >>> is_collection('hello')
104 False
105 """
106 return is_iterable(obj) and not isinstance(obj, (str, bytes))
109def split(src, sep=None, maxsplit=None):
110 """Splits an iterable based on a separator. Like :meth:`str.split`,
111 but for all iterables. Returns a list of lists.
113 >>> split(['hi', 'hello', None, None, 'sup', None, 'soap', None])
114 [['hi', 'hello'], ['sup'], ['soap']]
116 See :func:`split_iter` docs for more info.
117 """
118 return list(split_iter(src, sep, maxsplit))
121def split_iter(src, sep=None, maxsplit=None):
122 """Splits an iterable based on a separator, *sep*, a max of
123 *maxsplit* times (no max by default). *sep* can be:
125 * a single value
126 * an iterable of separators
127 * a single-argument callable that returns True when a separator is
128 encountered
130 ``split_iter()`` yields lists of non-separator values. A separator will
131 never appear in the output.
133 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None]))
134 [['hi', 'hello'], ['sup'], ['soap']]
136 Note that ``split_iter`` is based on :func:`str.split`, so if
137 *sep* is ``None``, ``split()`` **groups** separators. If empty lists
138 are desired between two contiguous ``None`` values, simply use
139 ``sep=[None]``:
141 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None]))
142 [['hi', 'hello'], ['sup']]
143 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None], sep=[None]))
144 [['hi', 'hello'], [], ['sup'], []]
146 Using a callable separator:
148 >>> falsy_sep = lambda x: not x
149 >>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep))
150 [['hi', 'hello'], [], ['sup'], []]
152 See :func:`split` for a list-returning version.
154 """
155 if not is_iterable(src):
156 raise TypeError('expected an iterable')
158 if maxsplit is not None:
159 maxsplit = int(maxsplit)
160 if maxsplit == 0:
161 yield [src]
162 return
164 if callable(sep):
165 sep_func = sep
166 elif not is_scalar(sep):
167 sep = frozenset(sep)
168 sep_func = lambda x: x in sep
169 else:
170 sep_func = lambda x: x == sep
172 cur_group = []
173 split_count = 0
174 for s in src:
175 if maxsplit is not None and split_count >= maxsplit:
176 sep_func = lambda x: False
177 if sep_func(s):
178 if sep is None and not cur_group:
179 # If sep is none, str.split() "groups" separators
180 # check the str.split() docs for more info
181 continue
182 split_count += 1
183 yield cur_group
184 cur_group = []
185 else:
186 cur_group.append(s)
188 if cur_group or sep is not None:
189 yield cur_group
190 return
193def lstrip(iterable, strip_value=None):
194 """Strips values from the beginning of an iterable. Stripped items will
195 match the value of the argument strip_value. Functionality is analogous
196 to that of the method str.lstrip. Returns a list.
198 >>> lstrip(['Foo', 'Bar', 'Bam'], 'Foo')
199 ['Bar', 'Bam']
201 """
202 return list(lstrip_iter(iterable, strip_value))
205def lstrip_iter(iterable, strip_value=None):
206 """Strips values from the beginning of an iterable. Stripped items will
207 match the value of the argument strip_value. Functionality is analogous
208 to that of the method str.lstrip. Returns a generator.
210 >>> list(lstrip_iter(['Foo', 'Bar', 'Bam'], 'Foo'))
211 ['Bar', 'Bam']
213 """
214 iterator = iter(iterable)
215 for i in iterator:
216 if i != strip_value:
217 yield i
218 break
219 for i in iterator:
220 yield i
223def rstrip(iterable, strip_value=None):
224 """Strips values from the end of an iterable. Stripped items will
225 match the value of the argument strip_value. Functionality is analogous
226 to that of the method str.rstrip. Returns a list.
228 >>> rstrip(['Foo', 'Bar', 'Bam'], 'Bam')
229 ['Foo', 'Bar']
231 """
232 return list(rstrip_iter(iterable,strip_value))
235def rstrip_iter(iterable, strip_value=None):
236 """Strips values from the end of an iterable. Stripped items will
237 match the value of the argument strip_value. Functionality is analogous
238 to that of the method str.rstrip. Returns a generator.
240 >>> list(rstrip_iter(['Foo', 'Bar', 'Bam'], 'Bam'))
241 ['Foo', 'Bar']
243 """
244 iterator = iter(iterable)
245 for i in iterator:
246 if i == strip_value:
247 cache = list()
248 cache.append(i)
249 broken = False
250 for i in iterator:
251 if i == strip_value:
252 cache.append(i)
253 else:
254 broken = True
255 break
256 if not broken: # Return to caller here because the end of the
257 return # iterator has been reached
258 yield from cache
259 yield i
262def strip(iterable, strip_value=None):
263 """Strips values from the beginning and end of an iterable. Stripped items
264 will match the value of the argument strip_value. Functionality is
265 analogous to that of the method str.strip. Returns a list.
267 >>> strip(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu')
268 ['Foo', 'Bar', 'Bam']
270 """
271 return list(strip_iter(iterable,strip_value))
274def strip_iter(iterable,strip_value=None):
275 """Strips values from the beginning and end of an iterable. Stripped items
276 will match the value of the argument strip_value. Functionality is
277 analogous to that of the method str.strip. Returns a generator.
279 >>> list(strip_iter(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu'))
280 ['Foo', 'Bar', 'Bam']
282 """
283 return rstrip_iter(lstrip_iter(iterable,strip_value),strip_value)
286def chunked(src, size, count=None, **kw):
287 """Returns a list of *count* chunks, each with *size* elements,
288 generated from iterable *src*. If *src* is not evenly divisible by
289 *size*, the final chunk will have fewer than *size* elements.
290 Provide the *fill* keyword argument to provide a pad value and
291 enable padding, otherwise no padding will take place.
293 >>> chunked(range(10), 3)
294 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
295 >>> chunked(range(10), 3, fill=None)
296 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]
297 >>> chunked(range(10), 3, count=2)
298 [[0, 1, 2], [3, 4, 5]]
300 See :func:`chunked_iter` for more info.
301 """
302 chunk_iter = chunked_iter(src, size, **kw)
303 if count is None:
304 return list(chunk_iter)
305 else:
306 return list(itertools.islice(chunk_iter, count))
309def _validate_positive_int(value, name, strictly_positive=True):
310 value = int(value)
311 if value < 0 or (strictly_positive and value == 0):
312 raise ValueError('expected a positive integer ' + name)
313 return value
316def chunked_iter(src, size, **kw):
317 """Generates *size*-sized chunks from *src* iterable. Unless the
318 optional *fill* keyword argument is provided, iterables not evenly
319 divisible by *size* will have a final chunk that is smaller than
320 *size*.
322 >>> list(chunked_iter(range(10), 3))
323 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
324 >>> list(chunked_iter(range(10), 3, fill=None))
325 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]
327 Note that ``fill=None`` in fact uses ``None`` as the fill value.
328 """
329 # TODO: add count kwarg?
330 if not is_iterable(src):
331 raise TypeError('expected an iterable')
332 size = _validate_positive_int(size, 'chunk size')
333 do_fill = True
334 try:
335 fill_val = kw.pop('fill')
336 except KeyError:
337 do_fill = False
338 fill_val = None
339 if kw:
340 raise ValueError('got unexpected keyword arguments: %r' % kw.keys())
341 if not src:
342 return
343 postprocess = lambda chk: chk
344 if isinstance(src, (str, bytes)):
345 postprocess = lambda chk, _sep=type(src)(): _sep.join(chk)
346 if isinstance(src, bytes):
347 postprocess = lambda chk: bytes(chk)
348 src_iter = iter(src)
349 while True:
350 cur_chunk = list(itertools.islice(src_iter, size))
351 if not cur_chunk:
352 break
353 lc = len(cur_chunk)
354 if lc < size and do_fill:
355 cur_chunk[lc:] = [fill_val] * (size - lc)
356 yield postprocess(cur_chunk)
357 return
360def chunk_ranges(input_size, chunk_size, input_offset=0, overlap_size=0, align=False):
361 """Generates *chunk_size*-sized chunk ranges for an input with length *input_size*.
362 Optionally, a start of the input can be set via *input_offset*, and
363 and overlap between the chunks may be specified via *overlap_size*.
364 Also, if *align* is set to *True*, any items with *i % (chunk_size-overlap_size) == 0*
365 are always at the beginning of the chunk.
367 Returns an iterator of (start, end) tuples, one tuple per chunk.
369 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5))
370 [(10, 15), (15, 20)]
371 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=1))
372 [(10, 15), (14, 19), (18, 20)]
373 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=2))
374 [(10, 15), (13, 18), (16, 20)]
376 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=False))
377 [(4, 9), (9, 14), (14, 19)]
378 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=True))
379 [(4, 5), (5, 10), (10, 15), (15, 19)]
381 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=False))
382 [(2, 7), (6, 11), (10, 15), (14, 17)]
383 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=True))
384 [(2, 5), (4, 9), (8, 13), (12, 17)]
385 >>> list(chunk_ranges(input_offset=3, input_size=15, chunk_size=5, overlap_size=1, align=True))
386 [(3, 5), (4, 9), (8, 13), (12, 17), (16, 18)]
387 """
388 input_size = _validate_positive_int(input_size, 'input_size', strictly_positive=False)
389 chunk_size = _validate_positive_int(chunk_size, 'chunk_size')
390 input_offset = _validate_positive_int(input_offset, 'input_offset', strictly_positive=False)
391 overlap_size = _validate_positive_int(overlap_size, 'overlap_size', strictly_positive=False)
393 input_stop = input_offset + input_size
395 if align:
396 initial_chunk_len = chunk_size - input_offset % (chunk_size - overlap_size)
397 if initial_chunk_len != overlap_size:
398 yield (input_offset, min(input_offset + initial_chunk_len, input_stop))
399 if input_offset + initial_chunk_len >= input_stop:
400 return
401 input_offset = input_offset + initial_chunk_len - overlap_size
403 for i in range(input_offset, input_stop, chunk_size - overlap_size):
404 yield (i, min(i + chunk_size, input_stop))
406 if i + chunk_size >= input_stop:
407 return
410def pairwise(src, end=_UNSET):
411 """Convenience function for calling :func:`windowed` on *src*, with
412 *size* set to 2.
414 >>> pairwise(range(5))
415 [(0, 1), (1, 2), (2, 3), (3, 4)]
416 >>> pairwise([])
417 []
419 Unless *end* is set, the number of pairs is always one less than
420 the number of elements in the iterable passed in, except on an empty input,
421 which will return an empty list.
423 With *end* set, a number of pairs equal to the length of *src* is returned,
424 with the last item of the last pair being equal to *end*.
426 >>> list(pairwise(range(3), end=None))
427 [(0, 1), (1, 2), (2, None)]
429 This way, *end* values can be useful as sentinels to signal the end of the iterable.
430 """
431 return windowed(src, 2, fill=end)
434def pairwise_iter(src, end=_UNSET):
435 """Convenience function for calling :func:`windowed_iter` on *src*,
436 with *size* set to 2.
438 >>> list(pairwise_iter(range(5)))
439 [(0, 1), (1, 2), (2, 3), (3, 4)]
440 >>> list(pairwise_iter([]))
441 []
443 Unless *end* is set, the number of pairs is always one less
444 than the number of elements in the iterable passed in,
445 or zero, when *src* is empty.
447 With *end* set, a number of pairs equal to the length of *src* is returned,
448 with the last item of the last pair being equal to *end*.
450 >>> list(pairwise_iter(range(3), end=None))
451 [(0, 1), (1, 2), (2, None)]
453 This way, *end* values can be useful as sentinels to signal the end
454 of the iterable. For infinite iterators, setting *end* has no effect.
455 """
456 return windowed_iter(src, 2, fill=end)
459def windowed(src, size, fill=_UNSET):
460 """Returns tuples with exactly length *size*. If *fill* is unset
461 and the iterable is too short to make a window of length *size*,
462 no tuples are returned. See :func:`windowed_iter` for more.
463 """
464 return list(windowed_iter(src, size, fill=fill))
467def windowed_iter(src, size, fill=_UNSET):
468 """Returns tuples with length *size* which represent a sliding
469 window over iterable *src*.
471 >>> list(windowed_iter(range(7), 3))
472 [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)]
474 If *fill* is unset, and the iterable is too short to make a window
475 of length *size*, then no window tuples are returned.
477 >>> list(windowed_iter(range(3), 5))
478 []
480 With *fill* set, the iterator always yields a number of windows
481 equal to the length of the *src* iterable.
483 >>> windowed(range(4), 3, fill=None)
484 [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]
486 This way, *fill* values can be useful to signal the end of the iterable.
487 For infinite iterators, setting *fill* has no effect.
488 """
489 tees = itertools.tee(src, size)
490 if fill is _UNSET:
491 try:
492 for i, t in enumerate(tees):
493 for _ in range(i):
494 next(t)
495 except StopIteration:
496 return zip([])
497 return zip(*tees)
499 for i, t in enumerate(tees):
500 for _ in range(i):
501 try:
502 next(t)
503 except StopIteration:
504 continue
505 return zip_longest(*tees, fillvalue=fill)
509def xfrange(stop, start=None, step=1.0):
510 """Same as :func:`frange`, but generator-based instead of returning a
511 list.
513 >>> tuple(xfrange(1, 3, step=0.75))
514 (1.0, 1.75, 2.5)
516 See :func:`frange` for more details.
517 """
518 if not step:
519 raise ValueError('step must be non-zero')
520 if start is None:
521 start, stop = 0.0, stop * 1.0
522 else:
523 # swap when all args are used
524 stop, start = start * 1.0, stop * 1.0
525 cur = start
526 while cur < stop:
527 yield cur
528 cur += step
531def frange(stop, start=None, step=1.0):
532 """A :func:`range` clone for float-based ranges.
534 >>> frange(5)
535 [0.0, 1.0, 2.0, 3.0, 4.0]
536 >>> frange(6, step=1.25)
537 [0.0, 1.25, 2.5, 3.75, 5.0]
538 >>> frange(100.5, 101.5, 0.25)
539 [100.5, 100.75, 101.0, 101.25]
540 >>> frange(5, 0)
541 []
542 >>> frange(5, 0, step=-1.25)
543 [5.0, 3.75, 2.5, 1.25]
544 """
545 if not step:
546 raise ValueError('step must be non-zero')
547 if start is None:
548 start, stop = 0.0, stop * 1.0
549 else:
550 # swap when all args are used
551 stop, start = start * 1.0, stop * 1.0
552 count = int(math.ceil((stop - start) / step))
553 ret = [None] * count
554 if not ret:
555 return ret
556 ret[0] = start
557 for i in range(1, count):
558 ret[i] = ret[i - 1] + step
559 return ret
562def backoff(start, stop, count=None, factor=2.0, jitter=False):
563 """Returns a list of geometrically-increasing floating-point numbers,
564 suitable for usage with `exponential backoff`_. Exactly like
565 :func:`backoff_iter`, but without the ``'repeat'`` option for
566 *count*. See :func:`backoff_iter` for more details.
568 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff
570 >>> backoff(1, 10)
571 [1.0, 2.0, 4.0, 8.0, 10.0]
572 """
573 if count == 'repeat':
574 raise ValueError("'repeat' supported in backoff_iter, not backoff")
575 return list(backoff_iter(start, stop, count=count,
576 factor=factor, jitter=jitter))
579def backoff_iter(start, stop, count=None, factor=2.0, jitter=False):
580 """Generates a sequence of geometrically-increasing floats, suitable
581 for usage with `exponential backoff`_. Starts with *start*,
582 increasing by *factor* until *stop* is reached, optionally
583 stopping iteration once *count* numbers are yielded. *factor*
584 defaults to 2. In general retrying with properly-configured
585 backoff creates a better-behaved component for a larger service
586 ecosystem.
588 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff
590 >>> list(backoff_iter(1.0, 10.0, count=5))
591 [1.0, 2.0, 4.0, 8.0, 10.0]
592 >>> list(backoff_iter(1.0, 10.0, count=8))
593 [1.0, 2.0, 4.0, 8.0, 10.0, 10.0, 10.0, 10.0]
594 >>> list(backoff_iter(0.25, 100.0, factor=10))
595 [0.25, 2.5, 25.0, 100.0]
597 A simplified usage example:
599 .. code-block:: python
601 for timeout in backoff_iter(0.25, 5.0):
602 try:
603 res = network_call()
604 break
605 except Exception as e:
606 log(e)
607 time.sleep(timeout)
609 An enhancement for large-scale systems would be to add variation,
610 or *jitter*, to timeout values. This is done to avoid a thundering
611 herd on the receiving end of the network call.
613 Finally, for *count*, the special value ``'repeat'`` can be passed to
614 continue yielding indefinitely.
616 Args:
618 start (float): Positive number for baseline.
619 stop (float): Positive number for maximum.
620 count (int): Number of steps before stopping
621 iteration. Defaults to the number of steps between *start* and
622 *stop*. Pass the string, `'repeat'`, to continue iteration
623 indefinitely.
624 factor (float): Rate of exponential increase. Defaults to `2.0`,
625 e.g., `[1, 2, 4, 8, 16]`.
626 jitter (float): A factor between `-1.0` and `1.0`, used to
627 uniformly randomize and thus spread out timeouts in a distributed
628 system, avoiding rhythm effects. Positive values use the base
629 backoff curve as a maximum, negative values use the curve as a
630 minimum. Set to 1.0 or `True` for a jitter approximating
631 Ethernet's time-tested backoff solution. Defaults to `False`.
633 """
634 start = float(start)
635 stop = float(stop)
636 factor = float(factor)
637 if start < 0.0:
638 raise ValueError('expected start >= 0, not %r' % start)
639 if factor < 1.0:
640 raise ValueError('expected factor >= 1.0, not %r' % factor)
641 if stop == 0.0:
642 raise ValueError('expected stop >= 0')
643 if stop < start:
644 raise ValueError('expected stop >= start, not %r' % stop)
645 if count is None:
646 denom = start if start else 1
647 count = 1 + math.ceil(math.log(stop/denom, factor))
648 count = count if start else count + 1
649 if count != 'repeat' and count < 0:
650 raise ValueError('count must be positive or "repeat", not %r' % count)
651 if jitter:
652 jitter = float(jitter)
653 if not (-1.0 <= jitter <= 1.0):
654 raise ValueError('expected jitter -1 <= j <= 1, not: %r' % jitter)
656 cur, i = start, 0
657 while count == 'repeat' or i < count:
658 if not jitter:
659 cur_ret = cur
660 elif jitter:
661 cur_ret = cur - (cur * jitter * random.random())
662 yield cur_ret
663 i += 1
664 if cur == 0:
665 cur = 1
666 elif cur < stop:
667 cur *= factor
668 if cur > stop:
669 cur = stop
670 return
673def bucketize(src, key=bool, value_transform=None, key_filter=None):
674 """Group values in the *src* iterable by the value returned by *key*.
676 >>> bucketize(range(5))
677 {False: [0], True: [1, 2, 3, 4]}
678 >>> is_odd = lambda x: x % 2 == 1
679 >>> bucketize(range(5), is_odd)
680 {False: [0, 2, 4], True: [1, 3]}
682 *key* is :class:`bool` by default, but can either be a callable or a string or a list
683 if it is a string, it is the name of the attribute on which to bucketize objects.
685 >>> bucketize([1+1j, 2+2j, 1, 2], key='real')
686 {1.0: [(1+1j), 1], 2.0: [(2+2j), 2]}
688 if *key* is a list, it contains the buckets where to put each object
690 >>> bucketize([1,2,365,4,98],key=[0,1,2,0,2])
691 {0: [1, 4], 1: [2], 2: [365, 98]}
694 Value lists are not deduplicated:
696 >>> bucketize([None, None, None, 'hello'])
697 {False: [None, None, None], True: ['hello']}
699 Bucketize into more than 3 groups
701 >>> bucketize(range(10), lambda x: x % 3)
702 {0: [0, 3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]}
704 ``bucketize`` has a couple of advanced options useful in certain
705 cases. *value_transform* can be used to modify values as they are
706 added to buckets, and *key_filter* will allow excluding certain
707 buckets from being collected.
709 >>> bucketize(range(5), value_transform=lambda x: x*x)
710 {False: [0], True: [1, 4, 9, 16]}
712 >>> bucketize(range(10), key=lambda x: x % 3, key_filter=lambda k: k % 3 != 1)
713 {0: [0, 3, 6, 9], 2: [2, 5, 8]}
715 Note in some of these examples there were at most two keys, ``True`` and
716 ``False``, and each key present has a list with at least one
717 item. See :func:`partition` for a version specialized for binary
718 use cases.
720 """
721 if not is_iterable(src):
722 raise TypeError('expected an iterable')
723 elif isinstance(key, list):
724 if len(key) != len(src):
725 raise ValueError("key and src have to be the same length")
726 src = zip(key, src)
728 if isinstance(key, str):
729 key_func = lambda x: getattr(x, key, x)
730 elif callable(key):
731 key_func = key
732 elif isinstance(key, list):
733 key_func = lambda x: x[0]
734 else:
735 raise TypeError('expected key to be callable or a string or a list')
737 if value_transform is None:
738 value_transform = lambda x: x
739 if not callable(value_transform):
740 raise TypeError('expected callable value transform function')
741 if isinstance(key, list):
742 f = value_transform
743 value_transform=lambda x: f(x[1])
745 ret = {}
746 for val in src:
747 key_of_val = key_func(val)
748 if key_filter is None or key_filter(key_of_val):
749 ret.setdefault(key_of_val, []).append(value_transform(val))
750 return ret
753def partition(src, key=bool):
754 """No relation to :meth:`str.partition`, ``partition`` is like
755 :func:`bucketize`, but for added convenience returns a tuple of
756 ``(truthy_values, falsy_values)``.
758 >>> nonempty, empty = partition(['', '', 'hi', '', 'bye'])
759 >>> nonempty
760 ['hi', 'bye']
762 *key* defaults to :class:`bool`, but can be carefully overridden to
763 use either a function that returns either ``True`` or ``False`` or
764 a string name of the attribute on which to partition objects.
766 >>> import string
767 >>> is_digit = lambda x: x in string.digits
768 >>> decimal_digits, hexletters = partition(string.hexdigits, is_digit)
769 >>> ''.join(decimal_digits), ''.join(hexletters)
770 ('0123456789', 'abcdefABCDEF')
771 """
772 bucketized = bucketize(src, key)
773 return bucketized.get(True, []), bucketized.get(False, [])
776def unique(src, key=None):
777 """``unique()`` returns a list of unique values, as determined by
778 *key*, in the order they first appeared in the input iterable,
779 *src*.
781 >>> ones_n_zeros = '11010110001010010101010'
782 >>> ''.join(unique(ones_n_zeros))
783 '10'
785 See :func:`unique_iter` docs for more details.
786 """
787 return list(unique_iter(src, key))
790def unique_iter(src, key=None):
791 """Yield unique elements from the iterable, *src*, based on *key*,
792 in the order in which they first appeared in *src*.
794 >>> repetitious = [1, 2, 3] * 10
795 >>> list(unique_iter(repetitious))
796 [1, 2, 3]
798 By default, *key* is the object itself, but *key* can either be a
799 callable or, for convenience, a string name of the attribute on
800 which to uniqueify objects, falling back on identity when the
801 attribute is not present.
803 >>> pleasantries = ['hi', 'hello', 'ok', 'bye', 'yes']
804 >>> list(unique_iter(pleasantries, key=lambda x: len(x)))
805 ['hi', 'hello', 'bye']
806 """
807 if not is_iterable(src):
808 raise TypeError('expected an iterable, not %r' % type(src))
809 if key is None:
810 key_func = lambda x: x
811 elif callable(key):
812 key_func = key
813 elif isinstance(key, str):
814 key_func = lambda x: getattr(x, key, x)
815 else:
816 raise TypeError('"key" expected a string or callable, not %r' % key)
817 seen = set()
818 for i in src:
819 k = key_func(i)
820 if k not in seen:
821 seen.add(k)
822 yield i
823 return
826def redundant(src, key=None, groups=False):
827 """The complement of :func:`unique()`.
829 By default returns non-unique/duplicate values as a list of the
830 *first* redundant value in *src*. Pass ``groups=True`` to get
831 groups of all values with redundancies, ordered by position of the
832 first redundant value. This is useful in conjunction with some
833 normalizing *key* function.
835 >>> redundant([1, 2, 3, 4])
836 []
837 >>> redundant([1, 2, 3, 2, 3, 3, 4])
838 [2, 3]
839 >>> redundant([1, 2, 3, 2, 3, 3, 4], groups=True)
840 [[2, 2], [3, 3, 3]]
842 An example using a *key* function to do case-insensitive
843 redundancy detection.
845 >>> redundant(['hi', 'Hi', 'HI', 'hello'], key=str.lower)
846 ['Hi']
847 >>> redundant(['hi', 'Hi', 'HI', 'hello'], groups=True, key=str.lower)
848 [['hi', 'Hi', 'HI']]
850 *key* should also be used when the values in *src* are not hashable.
852 .. note::
854 This output of this function is designed for reporting
855 duplicates in contexts when a unique input is desired. Due to
856 the grouped return type, there is no streaming equivalent of
857 this function for the time being.
859 """
860 if key is None:
861 pass
862 elif callable(key):
863 key_func = key
864 elif isinstance(key, (str, bytes)):
865 key_func = lambda x: getattr(x, key, x)
866 else:
867 raise TypeError('"key" expected a string or callable, not %r' % key)
868 seen = {} # key to first seen item
869 redundant_order = []
870 redundant_groups = {}
871 for i in src:
872 k = key_func(i) if key else i
873 if k not in seen:
874 seen[k] = i
875 else:
876 if k in redundant_groups:
877 if groups:
878 redundant_groups[k].append(i)
879 else:
880 redundant_order.append(k)
881 redundant_groups[k] = [seen[k], i]
882 if not groups:
883 ret = [redundant_groups[k][1] for k in redundant_order]
884 else:
885 ret = [redundant_groups[k] for k in redundant_order]
886 return ret
889def one(src, default=None, key=None):
890 """Along the same lines as builtins, :func:`all` and :func:`any`, and
891 similar to :func:`first`, ``one()`` returns the single object in
892 the given iterable *src* that evaluates to ``True``, as determined
893 by callable *key*. If unset, *key* defaults to :class:`bool`. If
894 no such objects are found, *default* is returned. If *default* is
895 not passed, ``None`` is returned.
897 If *src* has more than one object that evaluates to ``True``, or
898 if there is no object that fulfills such condition, return
899 *default*. It's like an `XOR`_ over an iterable.
901 >>> one((True, False, False))
902 True
903 >>> one((True, False, True))
904 >>> one((0, 0, 'a'))
905 'a'
906 >>> one((0, False, None))
907 >>> one((True, True), default=False)
908 False
909 >>> bool(one(('', 1)))
910 True
911 >>> one((10, 20, 30, 42), key=lambda i: i > 40)
912 42
914 See `Martín Gaitán's original repo`_ for further use cases.
916 .. _Martín Gaitán's original repo: https://github.com/mgaitan/one
917 .. _XOR: https://en.wikipedia.org/wiki/Exclusive_or
919 """
920 ones = list(itertools.islice(filter(key, src), 2))
921 return ones[0] if len(ones) == 1 else default
924def first(iterable, default=None, key=None):
925 """Return first element of *iterable* that evaluates to ``True``, else
926 return ``None`` or optional *default*. Similar to :func:`one`.
928 >>> first([0, False, None, [], (), 42])
929 42
930 >>> first([0, False, None, [], ()]) is None
931 True
932 >>> first([0, False, None, [], ()], default='ohai')
933 'ohai'
934 >>> import re
935 >>> m = first(re.match(regex, 'abc') for regex in ['b.*', 'a(.*)'])
936 >>> m.group(1)
937 'bc'
939 The optional *key* argument specifies a one-argument predicate function
940 like that used for *filter()*. The *key* argument, if supplied, should be
941 in keyword form. For example, finding the first even number in an iterable:
943 >>> first([1, 1, 3, 4, 5], key=lambda x: x % 2 == 0)
944 4
946 Contributed by Hynek Schlawack, author of `the original standalone module`_.
948 .. _the original standalone module: https://github.com/hynek/first
949 """
950 return next(filter(key, iterable), default)
953def flatten_iter(iterable):
954 """``flatten_iter()`` yields all the elements from *iterable* while
955 collapsing any nested iterables.
957 >>> nested = [[1, 2], [[3], [4, 5]]]
958 >>> list(flatten_iter(nested))
959 [1, 2, 3, 4, 5]
960 """
961 for item in iterable:
962 if isinstance(item, Iterable) and not isinstance(item, (str, bytes)):
963 yield from flatten_iter(item)
964 else:
965 yield item
967def flatten(iterable):
968 """``flatten()`` returns a collapsed list of all the elements from
969 *iterable* while collapsing any nested iterables.
971 >>> nested = [[1, 2], [[3], [4, 5]]]
972 >>> flatten(nested)
973 [1, 2, 3, 4, 5]
974 """
975 return list(flatten_iter(iterable))
978def same(iterable, ref=_UNSET):
979 """``same()`` returns ``True`` when all values in *iterable* are
980 equal to one another, or optionally a reference value,
981 *ref*. Similar to :func:`all` and :func:`any` in that it evaluates
982 an iterable and returns a :class:`bool`. ``same()`` returns
983 ``True`` for empty iterables.
985 >>> same([])
986 True
987 >>> same([1])
988 True
989 >>> same(['a', 'a', 'a'])
990 True
991 >>> same(range(20))
992 False
993 >>> same([[], []])
994 True
995 >>> same([[], []], ref='test')
996 False
998 """
999 iterator = iter(iterable)
1000 if ref is _UNSET:
1001 ref = next(iterator, ref)
1002 return all(val == ref for val in iterator)
1005def default_visit(path, key, value):
1006 # print('visit(%r, %r, %r)' % (path, key, value))
1007 return key, value
1009# enable the extreme: monkeypatching iterutils with a different default_visit
1010_orig_default_visit = default_visit
1013def default_enter(path, key, value):
1014 # print('enter(%r, %r)' % (key, value))
1015 if isinstance(value, (str, bytes)):
1016 return value, False
1017 elif isinstance(value, Mapping):
1018 return value.__class__(), ItemsView(value)
1019 elif isinstance(value, Sequence):
1020 return value.__class__(), enumerate(value)
1021 elif isinstance(value, Set):
1022 return value.__class__(), enumerate(value)
1023 else:
1024 # files, strings, other iterables, and scalars are not
1025 # traversed
1026 return value, False
1029def default_exit(path, key, old_parent, new_parent, new_items):
1030 # print('exit(%r, %r, %r, %r, %r)'
1031 # % (path, key, old_parent, new_parent, new_items))
1032 ret = new_parent
1033 if isinstance(new_parent, Mapping):
1034 new_parent.update(new_items)
1035 elif isinstance(new_parent, Sequence):
1036 vals = [v for i, v in new_items]
1037 try:
1038 new_parent.extend(vals)
1039 except AttributeError:
1040 ret = new_parent.__class__(vals) # tuples
1041 elif isinstance(new_parent, Set):
1042 vals = [v for i, v in new_items]
1043 try:
1044 new_parent.update(vals)
1045 except AttributeError:
1046 ret = new_parent.__class__(vals) # frozensets
1047 else:
1048 raise RuntimeError('unexpected iterable type: %r' % type(new_parent))
1049 return ret
1052def remap(root, visit=default_visit, enter=default_enter, exit=default_exit,
1053 **kwargs):
1054 """The remap ("recursive map") function is used to traverse and
1055 transform nested structures. Lists, tuples, sets, and dictionaries
1056 are just a few of the data structures nested into heterogeneous
1057 tree-like structures that are so common in programming.
1058 Unfortunately, Python's built-in ways to manipulate collections
1059 are almost all flat. List comprehensions may be fast and succinct,
1060 but they do not recurse, making it tedious to apply quick changes
1061 or complex transforms to real-world data.
1063 remap goes where list comprehensions cannot.
1065 Here's an example of removing all Nones from some data:
1067 >>> from pprint import pprint
1068 >>> reviews = {'Star Trek': {'TNG': 10, 'DS9': 8.5, 'ENT': None},
1069 ... 'Babylon 5': 6, 'Dr. Who': None}
1070 >>> pprint(remap(reviews, lambda p, k, v: v is not None))
1071 {'Babylon 5': 6, 'Star Trek': {'DS9': 8.5, 'TNG': 10}}
1073 Notice how both Nones have been removed despite the nesting in the
1074 dictionary. Not bad for a one-liner, and that's just the beginning.
1075 See `this remap cookbook`_ for more delicious recipes.
1077 .. _this remap cookbook: http://sedimental.org/remap.html
1079 remap takes four main arguments: the object to traverse and three
1080 optional callables which determine how the remapped object will be
1081 created.
1083 Args:
1085 root: The target object to traverse. By default, remap
1086 supports iterables like :class:`list`, :class:`tuple`,
1087 :class:`dict`, and :class:`set`, but any object traversable by
1088 *enter* will work.
1089 visit (callable): This function is called on every item in
1090 *root*. It must accept three positional arguments, *path*,
1091 *key*, and *value*. *path* is simply a tuple of parents'
1092 keys. *visit* should return the new key-value pair. It may
1093 also return ``True`` as shorthand to keep the old item
1094 unmodified, or ``False`` to drop the item from the new
1095 structure. *visit* is called after *enter*, on the new parent.
1097 The *visit* function is called for every item in root,
1098 including duplicate items. For traversable values, it is
1099 called on the new parent object, after all its children
1100 have been visited. The default visit behavior simply
1101 returns the key-value pair unmodified.
1102 enter (callable): This function controls which items in *root*
1103 are traversed. It accepts the same arguments as *visit*: the
1104 path, the key, and the value of the current item. It returns a
1105 pair of the blank new parent, and an iterator over the items
1106 which should be visited. If ``False`` is returned instead of
1107 an iterator, the value will not be traversed.
1109 The *enter* function is only called once per unique value. The
1110 default enter behavior support mappings, sequences, and
1111 sets. Strings and all other iterables will not be traversed.
1112 exit (callable): This function determines how to handle items
1113 once they have been visited. It gets the same three
1114 arguments as the other functions -- *path*, *key*, *value*
1115 -- plus two more: the blank new parent object returned
1116 from *enter*, and a list of the new items, as remapped by
1117 *visit*.
1119 Like *enter*, the *exit* function is only called once per
1120 unique value. The default exit behavior is to simply add
1121 all new items to the new parent, e.g., using
1122 :meth:`list.extend` and :meth:`dict.update` to add to the
1123 new parent. Immutable objects, such as a :class:`tuple` or
1124 :class:`namedtuple`, must be recreated from scratch, but
1125 use the same type as the new parent passed back from the
1126 *enter* function.
1127 reraise_visit (bool): A pragmatic convenience for the *visit*
1128 callable. When set to ``False``, remap ignores any errors
1129 raised by the *visit* callback. Items causing exceptions
1130 are kept. See examples for more details.
1132 remap is designed to cover the majority of cases with just the
1133 *visit* callable. While passing in multiple callables is very
1134 empowering, remap is designed so very few cases should require
1135 passing more than one function.
1137 When passing *enter* and *exit*, it's common and easiest to build
1138 on the default behavior. Simply add ``from boltons.iterutils import
1139 default_enter`` (or ``default_exit``), and have your enter/exit
1140 function call the default behavior before or after your custom
1141 logic. See `this example`_.
1143 Duplicate and self-referential objects (aka reference loops) are
1144 automatically handled internally, `as shown here`_.
1146 .. _this example: http://sedimental.org/remap.html#sort_all_lists
1147 .. _as shown here: http://sedimental.org/remap.html#corner_cases
1149 """
1150 # TODO: improve argument formatting in sphinx doc
1151 # TODO: enter() return (False, items) to continue traverse but cancel copy?
1152 if not callable(visit):
1153 raise TypeError('visit expected callable, not: %r' % visit)
1154 if not callable(enter):
1155 raise TypeError('enter expected callable, not: %r' % enter)
1156 if not callable(exit):
1157 raise TypeError('exit expected callable, not: %r' % exit)
1158 reraise_visit = kwargs.pop('reraise_visit', True)
1159 if kwargs:
1160 raise TypeError('unexpected keyword arguments: %r' % kwargs.keys())
1162 path, registry, stack = (), {}, [(None, root)]
1163 new_items_stack = []
1164 while stack:
1165 key, value = stack.pop()
1166 id_value = id(value)
1167 if key is _REMAP_EXIT:
1168 key, new_parent, old_parent = value
1169 id_value = id(old_parent)
1170 path, new_items = new_items_stack.pop()
1171 value = exit(path, key, old_parent, new_parent, new_items)
1172 registry[id_value] = value
1173 if not new_items_stack:
1174 continue
1175 elif id_value in registry:
1176 value = registry[id_value]
1177 else:
1178 res = enter(path, key, value)
1179 try:
1180 new_parent, new_items = res
1181 except TypeError:
1182 # TODO: handle False?
1183 raise TypeError('enter should return a tuple of (new_parent,'
1184 ' items_iterator), not: %r' % res)
1185 if new_items is not False:
1186 # traverse unless False is explicitly passed
1187 registry[id_value] = new_parent
1188 new_items_stack.append((path, []))
1189 if value is not root:
1190 path += (key,)
1191 stack.append((_REMAP_EXIT, (key, new_parent, value)))
1192 if new_items:
1193 stack.extend(reversed(list(new_items)))
1194 continue
1195 if visit is _orig_default_visit:
1196 # avoid function call overhead by inlining identity operation
1197 visited_item = (key, value)
1198 else:
1199 try:
1200 visited_item = visit(path, key, value)
1201 except Exception:
1202 if reraise_visit:
1203 raise
1204 visited_item = True
1205 if visited_item is False:
1206 continue # drop
1207 elif visited_item is True:
1208 visited_item = (key, value)
1209 # TODO: typecheck?
1210 # raise TypeError('expected (key, value) from visit(),'
1211 # ' not: %r' % visited_item)
1212 try:
1213 new_items_stack[-1][1].append(visited_item)
1214 except IndexError:
1215 raise TypeError('expected remappable root, not: %r' % root)
1216 return value
1219class PathAccessError(KeyError, IndexError, TypeError):
1220 """An amalgamation of KeyError, IndexError, and TypeError,
1221 representing what can occur when looking up a path in a nested
1222 object.
1223 """
1224 def __init__(self, exc, seg, path):
1225 self.exc = exc
1226 self.seg = seg
1227 self.path = path
1229 def __repr__(self):
1230 cn = self.__class__.__name__
1231 return f'{cn}({self.exc!r}, {self.seg!r}, {self.path!r})'
1233 def __str__(self):
1234 return ('could not access %r from path %r, got error: %r'
1235 % (self.seg, self.path, self.exc))
1238def get_path(root, path, default=_UNSET):
1239 """Retrieve a value from a nested object via a tuple representing the
1240 lookup path.
1242 >>> root = {'a': {'b': {'c': [[1], [2], [3]]}}}
1243 >>> get_path(root, ('a', 'b', 'c', 2, 0))
1244 3
1246 The path tuple format is intentionally consistent with that of
1247 :func:`remap`, but a single dotted string can also be passed.
1249 One of get_path's chief aims is improved error messaging. EAFP is
1250 great, but the error messages are not.
1252 For instance, ``root['a']['b']['c'][2][1]`` gives back
1253 ``IndexError: list index out of range``
1255 What went out of range where? get_path currently raises
1256 ``PathAccessError: could not access 2 from path ('a', 'b', 'c', 2,
1257 1), got error: IndexError('list index out of range',)``, a
1258 subclass of IndexError and KeyError.
1260 You can also pass a default that covers the entire operation,
1261 should the lookup fail at any level.
1263 Args:
1264 root: The target nesting of dictionaries, lists, or other
1265 objects supporting ``__getitem__``.
1266 path (tuple): A sequence of strings and integers to be successively
1267 looked up within *root*. A dot-separated (``a.b``) string may
1268 also be passed.
1269 default: The value to be returned should any
1270 ``PathAccessError`` exceptions be raised.
1271 """
1272 if isinstance(path, str):
1273 path = path.split('.')
1274 cur = root
1275 try:
1276 for seg in path:
1277 try:
1278 cur = cur[seg]
1279 except (KeyError, IndexError) as exc:
1280 raise PathAccessError(exc, seg, path)
1281 except TypeError as exc:
1282 # either string index in a list, or a parent that
1283 # doesn't support indexing
1284 try:
1285 seg = int(seg)
1286 cur = cur[seg]
1287 except (ValueError, KeyError, IndexError, TypeError):
1288 if not is_iterable(cur):
1289 exc = TypeError('%r object is not indexable'
1290 % type(cur).__name__)
1291 raise PathAccessError(exc, seg, path)
1292 except PathAccessError:
1293 if default is _UNSET:
1294 raise
1295 return default
1296 return cur
1299def research(root, query=lambda p, k, v: True, reraise=False):
1300 """The :func:`research` function uses :func:`remap` to recurse over
1301 any data nested in *root*, and find values which match a given
1302 criterion, specified by the *query* callable.
1304 Results are returned as a list of ``(path, value)`` pairs. The
1305 paths are tuples in the same format accepted by
1306 :func:`get_path`. This can be useful for comparing values nested
1307 in two or more different structures.
1309 Here's a simple example that finds all integers:
1311 >>> root = {'a': {'b': 1, 'c': (2, 'd', 3)}, 'e': None}
1312 >>> res = research(root, query=lambda p, k, v: isinstance(v, int))
1313 >>> print(sorted(res))
1314 [(('a', 'b'), 1), (('a', 'c', 0), 2), (('a', 'c', 2), 3)]
1316 Note how *query* follows the same, familiar ``path, key, value``
1317 signature as the ``visit`` and ``enter`` functions on
1318 :func:`remap`, and returns a :class:`bool`.
1320 Args:
1321 root: The target object to search. Supports the same types of
1322 objects as :func:`remap`, including :class:`list`,
1323 :class:`tuple`, :class:`dict`, and :class:`set`.
1324 query (callable): The function called on every object to
1325 determine whether to include it in the search results. The
1326 callable must accept three arguments, *path*, *key*, and
1327 *value*, commonly abbreviated *p*, *k*, and *v*, same as
1328 *enter* and *visit* from :func:`remap`.
1329 reraise (bool): Whether to reraise exceptions raised by *query*
1330 or to simply drop the result that caused the error.
1333 With :func:`research` it's easy to inspect the details of a data
1334 structure, like finding values that are at a certain depth (using
1335 ``len(p)``) and much more. If more advanced functionality is
1336 needed, check out the code and make your own :func:`remap`
1337 wrapper, and consider `submitting a patch`_!
1339 .. _submitting a patch: https://github.com/mahmoud/boltons/pulls
1340 """
1341 ret = []
1343 if not callable(query):
1344 raise TypeError('query expected callable, not: %r' % query)
1346 def enter(path, key, value):
1347 try:
1348 if query(path, key, value):
1349 ret.append((path + (key,), value))
1350 except Exception:
1351 if reraise:
1352 raise
1353 return default_enter(path, key, value)
1355 remap(root, enter=enter)
1356 return ret
1359# TODO: recollect()
1360# TODO: refilter()
1361# TODO: reiter()
1364# GUID iterators: 10x faster and somewhat more compact than uuid.
1366class GUIDerator:
1367 """The GUIDerator is an iterator that yields a globally-unique
1368 identifier (GUID) on every iteration. The GUIDs produced are
1369 hexadecimal strings.
1371 Testing shows it to be around 12x faster than the uuid module. By
1372 default it is also more compact, partly due to its default 96-bit
1373 (24-hexdigit) length. 96 bits of randomness means that there is a
1374 1 in 2 ^ 32 chance of collision after 2 ^ 64 iterations. If more
1375 or less uniqueness is desired, the *size* argument can be adjusted
1376 accordingly.
1378 Args:
1379 size (int): character length of the GUID, defaults to 24. Lengths
1380 between 20 and 36 are considered valid.
1382 The GUIDerator has built-in fork protection that causes it to
1383 detect a fork on next iteration and reseed accordingly.
1385 """
1386 def __init__(self, size=24):
1387 self.size = size
1388 if size < 20 or size > 36:
1389 raise ValueError('expected 20 < size <= 36')
1390 import hashlib
1391 self._sha1 = hashlib.sha1
1392 self.count = itertools.count()
1393 self.reseed()
1395 def reseed(self):
1396 import socket
1397 self.pid = os.getpid()
1398 self.salt = '-'.join([str(self.pid),
1399 socket.gethostname() or '<nohostname>',
1400 str(time.time()),
1401 os.urandom(6).hex()])
1402 return
1404 def __iter__(self):
1405 return self
1407 def __next__(self):
1408 if os.getpid() != self.pid:
1409 self.reseed()
1410 target_bytes = (self.salt + str(next(self.count))).encode('utf8')
1411 hash_text = self._sha1(target_bytes).hexdigest()[:self.size]
1412 return hash_text
1414 next = __next__
1417class SequentialGUIDerator(GUIDerator):
1418 """Much like the standard GUIDerator, the SequentialGUIDerator is an
1419 iterator that yields a globally-unique identifier (GUID) on every
1420 iteration. The GUIDs produced are hexadecimal strings.
1422 The SequentialGUIDerator differs in that it picks a starting GUID
1423 value and increments every iteration. This yields GUIDs which are
1424 of course unique, but also ordered and lexicographically sortable.
1426 The SequentialGUIDerator is around 50% faster than the normal
1427 GUIDerator, making it almost 20x as fast as the built-in uuid
1428 module. By default it is also more compact, partly due to its
1429 96-bit (24-hexdigit) default length. 96 bits of randomness means that
1430 there is a 1 in 2 ^ 32 chance of collision after 2 ^ 64
1431 iterations. If more or less uniqueness is desired, the *size*
1432 argument can be adjusted accordingly.
1434 Args:
1435 size (int): character length of the GUID, defaults to 24.
1437 Note that with SequentialGUIDerator there is a chance of GUIDs
1438 growing larger than the size configured. The SequentialGUIDerator
1439 has built-in fork protection that causes it to detect a fork on
1440 next iteration and reseed accordingly.
1442 """
1444 def reseed(self):
1445 super().reseed()
1446 start_str = self._sha1(self.salt.encode('utf8')).hexdigest()
1447 self.start = int(start_str[:self.size], 16)
1448 self.start |= (1 << ((self.size * 4) - 2))
1450 def __next__(self):
1451 if os.getpid() != self.pid:
1452 self.reseed()
1453 return '%x' % (next(self.count) + self.start)
1455 next = __next__
1458guid_iter = GUIDerator()
1459seq_guid_iter = SequentialGUIDerator()
1462def soft_sorted(iterable, first=None, last=None, key=None, reverse=False):
1463 """For when you care about the order of some elements, but not about
1464 others.
1466 Use this to float to the top and/or sink to the bottom a specific
1467 ordering, while sorting the rest of the elements according to
1468 normal :func:`sorted` rules.
1470 >>> soft_sorted(['two', 'b', 'one', 'a'], first=['one', 'two'])
1471 ['one', 'two', 'a', 'b']
1472 >>> soft_sorted(range(7), first=[6, 15], last=[2, 4], reverse=True)
1473 [6, 5, 3, 1, 0, 2, 4]
1474 >>> import string
1475 >>> ''.join(soft_sorted(string.hexdigits, first='za1', last='b', key=str.lower))
1476 'aA1023456789cCdDeEfFbB'
1478 Args:
1479 iterable (list): A list or other iterable to sort.
1480 first (list): A sequence to enforce for elements which should
1481 appear at the beginning of the returned list.
1482 last (list): A sequence to enforce for elements which should
1483 appear at the end of the returned list.
1484 key (callable): Callable used to generate a comparable key for
1485 each item to be sorted, same as the key in
1486 :func:`sorted`. Note that entries in *first* and *last*
1487 should be the keys for the items. Defaults to
1488 passthrough/the identity function.
1489 reverse (bool): Whether or not elements not explicitly ordered
1490 by *first* and *last* should be in reverse order or not.
1492 Returns a new list in sorted order.
1493 """
1494 first = first or []
1495 last = last or []
1496 key = key or (lambda x: x)
1497 seq = list(iterable)
1498 other = [x for x in seq if not ((first and key(x) in first) or (last and key(x) in last))]
1499 other.sort(key=key, reverse=reverse)
1501 if first:
1502 first = sorted([x for x in seq if key(x) in first], key=lambda x: first.index(key(x)))
1503 if last:
1504 last = sorted([x for x in seq if key(x) in last], key=lambda x: last.index(key(x)))
1505 return first + other + last
1508def untyped_sorted(iterable, key=None, reverse=False):
1509 """A version of :func:`sorted` which will happily sort an iterable of
1510 heterogeneous types and return a new list, similar to legacy Python's
1511 behavior.
1513 >>> untyped_sorted(['abc', 2.0, 1, 2, 'def'])
1514 [1, 2.0, 2, 'abc', 'def']
1516 Note how mutually orderable types are sorted as expected, as in
1517 the case of the integers and floats above.
1519 .. note::
1521 Results may vary across Python versions and builds, but the
1522 function will produce a sorted list, except in the case of
1523 explicitly unorderable objects.
1525 """
1526 class _Wrapper:
1527 slots = ('obj',)
1529 def __init__(self, obj):
1530 self.obj = obj
1532 def __lt__(self, other):
1533 obj = key(self.obj) if key is not None else self.obj
1534 other = key(other.obj) if key is not None else other.obj
1535 try:
1536 ret = obj < other
1537 except TypeError:
1538 ret = ((type(obj).__name__, id(type(obj)), obj)
1539 < (type(other).__name__, id(type(other)), other))
1540 return ret
1542 if key is not None and not callable(key):
1543 raise TypeError('expected function or callable object for key, not: %r'
1544 % key)
1546 return sorted(iterable, key=_Wrapper, reverse=reverse)
1548"""
1549May actually be faster to do an isinstance check for a str path
1551$ python -m timeit -s "x = [1]" "x[0]"
155210000000 loops, best of 3: 0.0207 usec per loop
1553$ python -m timeit -s "x = [1]" "try: x[0] \nexcept: pass"
155410000000 loops, best of 3: 0.029 usec per loop
1555$ python -m timeit -s "x = [1]" "try: x[1] \nexcept: pass"
15561000000 loops, best of 3: 0.315 usec per loop
1557# setting up try/except is fast, only around 0.01us
1558# actually triggering the exception takes almost 10x as long
1560$ python -m timeit -s "x = [1]" "isinstance(x, basestring)"
156110000000 loops, best of 3: 0.141 usec per loop
1562$ python -m timeit -s "x = [1]" "isinstance(x, str)"
156310000000 loops, best of 3: 0.131 usec per loop
1564$ python -m timeit -s "x = [1]" "try: x.split('.')\n except: pass"
15651000000 loops, best of 3: 0.443 usec per loop
1566$ python -m timeit -s "x = [1]" "try: x.split('.') \nexcept AttributeError: pass"
15671000000 loops, best of 3: 0.544 usec per loop
1568"""