Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/boltons/iterutils.py: 24%
546 statements
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-11 06:58 +0000
« prev ^ index » next coverage.py v7.4.0, created at 2024-01-11 06:58 +0000
1# -*- coding: utf-8 -*-
3# Copyright (c) 2013, Mahmoud Hashemi
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9# * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#
12# * Redistributions in binary form must reproduce the above
13# copyright notice, this list of conditions and the following
14# disclaimer in the documentation and/or other materials provided
15# with the distribution.
16#
17# * The names of the contributors may not be used to endorse or
18# promote products derived from this software without specific
19# prior written permission.
20#
21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33""":mod:`itertools` is full of great examples of Python generator
34usage. However, there are still some critical gaps. ``iterutils``
35fills many of those gaps with featureful, tested, and Pythonic
36solutions.
38Many of the functions below have two versions, one which
39returns an iterator (denoted by the ``*_iter`` naming pattern), and a
40shorter-named convenience form that returns a list. Some of the
41following are based on examples in itertools docs.
42"""
44import os
45import math
46import time
47import codecs
48import random
49import itertools
51try:
52 from collections.abc import Mapping, Sequence, Set, ItemsView, Iterable
53except ImportError:
54 from collections import Mapping, Sequence, Set, ItemsView, Iterable
57try:
58 from .typeutils import make_sentinel
59 _UNSET = make_sentinel('_UNSET')
60 _REMAP_EXIT = make_sentinel('_REMAP_EXIT')
61except ImportError:
62 _REMAP_EXIT = object()
63 _UNSET = object()
65try:
66 from future_builtins import filter
67 from itertools import izip, izip_longest as zip_longest
68 _IS_PY3 = False
69except ImportError:
70 # Python 3 compat
71 _IS_PY3 = True
72 basestring = (str, bytes)
73 unicode = str
74 izip, xrange = zip, range
75 from itertools import zip_longest
78def is_iterable(obj):
79 """Similar in nature to :func:`callable`, ``is_iterable`` returns
80 ``True`` if an object is `iterable`_, ``False`` if not.
82 >>> is_iterable([])
83 True
84 >>> is_iterable(object())
85 False
87 .. _iterable: https://docs.python.org/2/glossary.html#term-iterable
88 """
89 try:
90 iter(obj)
91 except TypeError:
92 return False
93 return True
96def is_scalar(obj):
97 """A near-mirror of :func:`is_iterable`. Returns ``False`` if an
98 object is an iterable container type. Strings are considered
99 scalar as well, because strings are more often treated as whole
100 values as opposed to iterables of 1-character substrings.
102 >>> is_scalar(object())
103 True
104 >>> is_scalar(range(10))
105 False
106 >>> is_scalar('hello')
107 True
108 """
109 return not is_iterable(obj) or isinstance(obj, basestring)
112def is_collection(obj):
113 """The opposite of :func:`is_scalar`. Returns ``True`` if an object
114 is an iterable other than a string.
116 >>> is_collection(object())
117 False
118 >>> is_collection(range(10))
119 True
120 >>> is_collection('hello')
121 False
122 """
123 return is_iterable(obj) and not isinstance(obj, basestring)
126def split(src, sep=None, maxsplit=None):
127 """Splits an iterable based on a separator. Like :meth:`str.split`,
128 but for all iterables. Returns a list of lists.
130 >>> split(['hi', 'hello', None, None, 'sup', None, 'soap', None])
131 [['hi', 'hello'], ['sup'], ['soap']]
133 See :func:`split_iter` docs for more info.
134 """
135 return list(split_iter(src, sep, maxsplit))
138def split_iter(src, sep=None, maxsplit=None):
139 """Splits an iterable based on a separator, *sep*, a max of
140 *maxsplit* times (no max by default). *sep* can be:
142 * a single value
143 * an iterable of separators
144 * a single-argument callable that returns True when a separator is
145 encountered
147 ``split_iter()`` yields lists of non-separator values. A separator will
148 never appear in the output.
150 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None]))
151 [['hi', 'hello'], ['sup'], ['soap']]
153 Note that ``split_iter`` is based on :func:`str.split`, so if
154 *sep* is ``None``, ``split()`` **groups** separators. If empty lists
155 are desired between two contiguous ``None`` values, simply use
156 ``sep=[None]``:
158 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None]))
159 [['hi', 'hello'], ['sup']]
160 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None], sep=[None]))
161 [['hi', 'hello'], [], ['sup'], []]
163 Using a callable separator:
165 >>> falsy_sep = lambda x: not x
166 >>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep))
167 [['hi', 'hello'], [], ['sup'], []]
169 See :func:`split` for a list-returning version.
171 """
172 if not is_iterable(src):
173 raise TypeError('expected an iterable')
175 if maxsplit is not None:
176 maxsplit = int(maxsplit)
177 if maxsplit == 0:
178 yield [src]
179 return
181 if callable(sep):
182 sep_func = sep
183 elif not is_scalar(sep):
184 sep = frozenset(sep)
185 sep_func = lambda x: x in sep
186 else:
187 sep_func = lambda x: x == sep
189 cur_group = []
190 split_count = 0
191 for s in src:
192 if maxsplit is not None and split_count >= maxsplit:
193 sep_func = lambda x: False
194 if sep_func(s):
195 if sep is None and not cur_group:
196 # If sep is none, str.split() "groups" separators
197 # check the str.split() docs for more info
198 continue
199 split_count += 1
200 yield cur_group
201 cur_group = []
202 else:
203 cur_group.append(s)
205 if cur_group or sep is not None:
206 yield cur_group
207 return
210def lstrip(iterable, strip_value=None):
211 """Strips values from the beginning of an iterable. Stripped items will
212 match the value of the argument strip_value. Functionality is analogous
213 to that of the method str.lstrip. Returns a list.
215 >>> lstrip(['Foo', 'Bar', 'Bam'], 'Foo')
216 ['Bar', 'Bam']
218 """
219 return list(lstrip_iter(iterable, strip_value))
222def lstrip_iter(iterable, strip_value=None):
223 """Strips values from the beginning of an iterable. Stripped items will
224 match the value of the argument strip_value. Functionality is analogous
225 to that of the method str.lstrip. Returns a generator.
227 >>> list(lstrip_iter(['Foo', 'Bar', 'Bam'], 'Foo'))
228 ['Bar', 'Bam']
230 """
231 iterator = iter(iterable)
232 for i in iterator:
233 if i != strip_value:
234 yield i
235 break
236 for i in iterator:
237 yield i
240def rstrip(iterable, strip_value=None):
241 """Strips values from the end of an iterable. Stripped items will
242 match the value of the argument strip_value. Functionality is analogous
243 to that of the method str.rstrip. Returns a list.
245 >>> rstrip(['Foo', 'Bar', 'Bam'], 'Bam')
246 ['Foo', 'Bar']
248 """
249 return list(rstrip_iter(iterable,strip_value))
252def rstrip_iter(iterable, strip_value=None):
253 """Strips values from the end of an iterable. Stripped items will
254 match the value of the argument strip_value. Functionality is analogous
255 to that of the method str.rstrip. Returns a generator.
257 >>> list(rstrip_iter(['Foo', 'Bar', 'Bam'], 'Bam'))
258 ['Foo', 'Bar']
260 """
261 iterator = iter(iterable)
262 for i in iterator:
263 if i == strip_value:
264 cache = list()
265 cache.append(i)
266 broken = False
267 for i in iterator:
268 if i == strip_value:
269 cache.append(i)
270 else:
271 broken = True
272 break
273 if not broken: # Return to caller here because the end of the
274 return # iterator has been reached
275 for t in cache:
276 yield t
277 yield i
280def strip(iterable, strip_value=None):
281 """Strips values from the beginning and end of an iterable. Stripped items
282 will match the value of the argument strip_value. Functionality is
283 analogous to that of the method str.strip. Returns a list.
285 >>> strip(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu')
286 ['Foo', 'Bar', 'Bam']
288 """
289 return list(strip_iter(iterable,strip_value))
292def strip_iter(iterable,strip_value=None):
293 """Strips values from the beginning and end of an iterable. Stripped items
294 will match the value of the argument strip_value. Functionality is
295 analogous to that of the method str.strip. Returns a generator.
297 >>> list(strip_iter(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu'))
298 ['Foo', 'Bar', 'Bam']
300 """
301 return rstrip_iter(lstrip_iter(iterable,strip_value),strip_value)
304def chunked(src, size, count=None, **kw):
305 """Returns a list of *count* chunks, each with *size* elements,
306 generated from iterable *src*. If *src* is not evenly divisible by
307 *size*, the final chunk will have fewer than *size* elements.
308 Provide the *fill* keyword argument to provide a pad value and
309 enable padding, otherwise no padding will take place.
311 >>> chunked(range(10), 3)
312 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
313 >>> chunked(range(10), 3, fill=None)
314 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]
315 >>> chunked(range(10), 3, count=2)
316 [[0, 1, 2], [3, 4, 5]]
318 See :func:`chunked_iter` for more info.
319 """
320 chunk_iter = chunked_iter(src, size, **kw)
321 if count is None:
322 return list(chunk_iter)
323 else:
324 return list(itertools.islice(chunk_iter, count))
327def _validate_positive_int(value, name, strictly_positive=True):
328 value = int(value)
329 if value < 0 or (strictly_positive and value == 0):
330 raise ValueError('expected a positive integer ' + name)
331 return value
334def chunked_iter(src, size, **kw):
335 """Generates *size*-sized chunks from *src* iterable. Unless the
336 optional *fill* keyword argument is provided, iterables not evenly
337 divisible by *size* will have a final chunk that is smaller than
338 *size*.
340 >>> list(chunked_iter(range(10), 3))
341 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
342 >>> list(chunked_iter(range(10), 3, fill=None))
343 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]
345 Note that ``fill=None`` in fact uses ``None`` as the fill value.
346 """
347 # TODO: add count kwarg?
348 if not is_iterable(src):
349 raise TypeError('expected an iterable')
350 size = _validate_positive_int(size, 'chunk size')
351 do_fill = True
352 try:
353 fill_val = kw.pop('fill')
354 except KeyError:
355 do_fill = False
356 fill_val = None
357 if kw:
358 raise ValueError('got unexpected keyword arguments: %r' % kw.keys())
359 if not src:
360 return
361 postprocess = lambda chk: chk
362 if isinstance(src, basestring):
363 postprocess = lambda chk, _sep=type(src)(): _sep.join(chk)
364 if _IS_PY3 and isinstance(src, bytes):
365 postprocess = lambda chk: bytes(chk)
366 src_iter = iter(src)
367 while True:
368 cur_chunk = list(itertools.islice(src_iter, size))
369 if not cur_chunk:
370 break
371 lc = len(cur_chunk)
372 if lc < size and do_fill:
373 cur_chunk[lc:] = [fill_val] * (size - lc)
374 yield postprocess(cur_chunk)
375 return
378def chunk_ranges(input_size, chunk_size, input_offset=0, overlap_size=0, align=False):
379 """Generates *chunk_size*-sized chunk ranges for an input with length *input_size*.
380 Optionally, a start of the input can be set via *input_offset*, and
381 and overlap between the chunks may be specified via *overlap_size*.
382 Also, if *align* is set to *True*, any items with *i % (chunk_size-overlap_size) == 0*
383 are always at the beginning of the chunk.
385 Returns an iterator of (start, end) tuples, one tuple per chunk.
387 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5))
388 [(10, 15), (15, 20)]
389 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=1))
390 [(10, 15), (14, 19), (18, 20)]
391 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=2))
392 [(10, 15), (13, 18), (16, 20)]
394 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=False))
395 [(4, 9), (9, 14), (14, 19)]
396 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=True))
397 [(4, 5), (5, 10), (10, 15), (15, 19)]
399 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=False))
400 [(2, 7), (6, 11), (10, 15), (14, 17)]
401 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=True))
402 [(2, 5), (4, 9), (8, 13), (12, 17)]
403 >>> list(chunk_ranges(input_offset=3, input_size=15, chunk_size=5, overlap_size=1, align=True))
404 [(3, 5), (4, 9), (8, 13), (12, 17), (16, 18)]
405 """
406 input_size = _validate_positive_int(input_size, 'input_size', strictly_positive=False)
407 chunk_size = _validate_positive_int(chunk_size, 'chunk_size')
408 input_offset = _validate_positive_int(input_offset, 'input_offset', strictly_positive=False)
409 overlap_size = _validate_positive_int(overlap_size, 'overlap_size', strictly_positive=False)
411 input_stop = input_offset + input_size
413 if align:
414 initial_chunk_len = chunk_size - input_offset % (chunk_size - overlap_size)
415 if initial_chunk_len != overlap_size:
416 yield (input_offset, min(input_offset + initial_chunk_len, input_stop))
417 if input_offset + initial_chunk_len >= input_stop:
418 return
419 input_offset = input_offset + initial_chunk_len - overlap_size
421 for i in range(input_offset, input_stop, chunk_size - overlap_size):
422 yield (i, min(i + chunk_size, input_stop))
424 if i + chunk_size >= input_stop:
425 return
428def pairwise(src, end=_UNSET):
429 """Convenience function for calling :func:`windowed` on *src*, with
430 *size* set to 2.
432 >>> pairwise(range(5))
433 [(0, 1), (1, 2), (2, 3), (3, 4)]
434 >>> pairwise([])
435 []
437 Unless *end* is set, the number of pairs is always one less than
438 the number of elements in the iterable passed in, except on an empty input,
439 which will return an empty list.
441 With *end* set, a number of pairs equal to the length of *src* is returned,
442 with the last item of the last pair being equal to *end*.
444 >>> list(pairwise(range(3), end=None))
445 [(0, 1), (1, 2), (2, None)]
447 This way, *end* values can be useful as sentinels to signal the end of the iterable.
448 """
449 return windowed(src, 2, fill=end)
452def pairwise_iter(src, end=_UNSET):
453 """Convenience function for calling :func:`windowed_iter` on *src*,
454 with *size* set to 2.
456 >>> list(pairwise_iter(range(5)))
457 [(0, 1), (1, 2), (2, 3), (3, 4)]
458 >>> list(pairwise_iter([]))
459 []
461 Unless *end* is set, the number of pairs is always one less
462 than the number of elements in the iterable passed in,
463 or zero, when *src* is empty.
465 With *end* set, a number of pairs equal to the length of *src* is returned,
466 with the last item of the last pair being equal to *end*.
468 >>> list(pairwise_iter(range(3), end=None))
469 [(0, 1), (1, 2), (2, None)]
471 This way, *end* values can be useful as sentinels to signal the end
472 of the iterable. For infinite iterators, setting *end* has no effect.
473 """
474 return windowed_iter(src, 2, fill=end)
477def windowed(src, size, fill=_UNSET):
478 """Returns tuples with exactly length *size*. If *fill* is unset
479 and the iterable is too short to make a window of length *size*,
480 no tuples are returned. See :func:`windowed_iter` for more.
481 """
482 return list(windowed_iter(src, size, fill=fill))
485def windowed_iter(src, size, fill=_UNSET):
486 """Returns tuples with length *size* which represent a sliding
487 window over iterable *src*.
489 >>> list(windowed_iter(range(7), 3))
490 [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)]
492 If *fill* is unset, and the iterable is too short to make a window
493 of length *size*, then no window tuples are returned.
495 >>> list(windowed_iter(range(3), 5))
496 []
498 With *fill* set, the iterator always yields a number of windows
499 equal to the length of the *src* iterable.
501 >>> windowed(range(4), 3, fill=None)
502 [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]
504 This way, *fill* values can be useful to signal the end of the iterable.
505 For infinite iterators, setting *fill* has no effect.
506 """
507 tees = itertools.tee(src, size)
508 if fill is _UNSET:
509 try:
510 for i, t in enumerate(tees):
511 for _ in range(i):
512 next(t)
513 except StopIteration:
514 return zip([])
515 return zip(*tees)
517 for i, t in enumerate(tees):
518 for _ in range(i):
519 try:
520 next(t)
521 except StopIteration:
522 continue
523 return zip_longest(*tees, fillvalue=fill)
527def xfrange(stop, start=None, step=1.0):
528 """Same as :func:`frange`, but generator-based instead of returning a
529 list.
531 >>> tuple(xfrange(1, 3, step=0.75))
532 (1.0, 1.75, 2.5)
534 See :func:`frange` for more details.
535 """
536 if not step:
537 raise ValueError('step must be non-zero')
538 if start is None:
539 start, stop = 0.0, stop * 1.0
540 else:
541 # swap when all args are used
542 stop, start = start * 1.0, stop * 1.0
543 cur = start
544 while cur < stop:
545 yield cur
546 cur += step
549def frange(stop, start=None, step=1.0):
550 """A :func:`range` clone for float-based ranges.
552 >>> frange(5)
553 [0.0, 1.0, 2.0, 3.0, 4.0]
554 >>> frange(6, step=1.25)
555 [0.0, 1.25, 2.5, 3.75, 5.0]
556 >>> frange(100.5, 101.5, 0.25)
557 [100.5, 100.75, 101.0, 101.25]
558 >>> frange(5, 0)
559 []
560 >>> frange(5, 0, step=-1.25)
561 [5.0, 3.75, 2.5, 1.25]
562 """
563 if not step:
564 raise ValueError('step must be non-zero')
565 if start is None:
566 start, stop = 0.0, stop * 1.0
567 else:
568 # swap when all args are used
569 stop, start = start * 1.0, stop * 1.0
570 count = int(math.ceil((stop - start) / step))
571 ret = [None] * count
572 if not ret:
573 return ret
574 ret[0] = start
575 for i in xrange(1, count):
576 ret[i] = ret[i - 1] + step
577 return ret
580def backoff(start, stop, count=None, factor=2.0, jitter=False):
581 """Returns a list of geometrically-increasing floating-point numbers,
582 suitable for usage with `exponential backoff`_. Exactly like
583 :func:`backoff_iter`, but without the ``'repeat'`` option for
584 *count*. See :func:`backoff_iter` for more details.
586 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff
588 >>> backoff(1, 10)
589 [1.0, 2.0, 4.0, 8.0, 10.0]
590 """
591 if count == 'repeat':
592 raise ValueError("'repeat' supported in backoff_iter, not backoff")
593 return list(backoff_iter(start, stop, count=count,
594 factor=factor, jitter=jitter))
597def backoff_iter(start, stop, count=None, factor=2.0, jitter=False):
598 """Generates a sequence of geometrically-increasing floats, suitable
599 for usage with `exponential backoff`_. Starts with *start*,
600 increasing by *factor* until *stop* is reached, optionally
601 stopping iteration once *count* numbers are yielded. *factor*
602 defaults to 2. In general retrying with properly-configured
603 backoff creates a better-behaved component for a larger service
604 ecosystem.
606 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff
608 >>> list(backoff_iter(1.0, 10.0, count=5))
609 [1.0, 2.0, 4.0, 8.0, 10.0]
610 >>> list(backoff_iter(1.0, 10.0, count=8))
611 [1.0, 2.0, 4.0, 8.0, 10.0, 10.0, 10.0, 10.0]
612 >>> list(backoff_iter(0.25, 100.0, factor=10))
613 [0.25, 2.5, 25.0, 100.0]
615 A simplified usage example:
617 .. code-block:: python
619 for timeout in backoff_iter(0.25, 5.0):
620 try:
621 res = network_call()
622 break
623 except Exception as e:
624 log(e)
625 time.sleep(timeout)
627 An enhancement for large-scale systems would be to add variation,
628 or *jitter*, to timeout values. This is done to avoid a thundering
629 herd on the receiving end of the network call.
631 Finally, for *count*, the special value ``'repeat'`` can be passed to
632 continue yielding indefinitely.
634 Args:
636 start (float): Positive number for baseline.
637 stop (float): Positive number for maximum.
638 count (int): Number of steps before stopping
639 iteration. Defaults to the number of steps between *start* and
640 *stop*. Pass the string, `'repeat'`, to continue iteration
641 indefinitely.
642 factor (float): Rate of exponential increase. Defaults to `2.0`,
643 e.g., `[1, 2, 4, 8, 16]`.
644 jitter (float): A factor between `-1.0` and `1.0`, used to
645 uniformly randomize and thus spread out timeouts in a distributed
646 system, avoiding rhythm effects. Positive values use the base
647 backoff curve as a maximum, negative values use the curve as a
648 minimum. Set to 1.0 or `True` for a jitter approximating
649 Ethernet's time-tested backoff solution. Defaults to `False`.
651 """
652 start = float(start)
653 stop = float(stop)
654 factor = float(factor)
655 if start < 0.0:
656 raise ValueError('expected start >= 0, not %r' % start)
657 if factor < 1.0:
658 raise ValueError('expected factor >= 1.0, not %r' % factor)
659 if stop == 0.0:
660 raise ValueError('expected stop >= 0')
661 if stop < start:
662 raise ValueError('expected stop >= start, not %r' % stop)
663 if count is None:
664 denom = start if start else 1
665 count = 1 + math.ceil(math.log(stop/denom, factor))
666 count = count if start else count + 1
667 if count != 'repeat' and count < 0:
668 raise ValueError('count must be positive or "repeat", not %r' % count)
669 if jitter:
670 jitter = float(jitter)
671 if not (-1.0 <= jitter <= 1.0):
672 raise ValueError('expected jitter -1 <= j <= 1, not: %r' % jitter)
674 cur, i = start, 0
675 while count == 'repeat' or i < count:
676 if not jitter:
677 cur_ret = cur
678 elif jitter:
679 cur_ret = cur - (cur * jitter * random.random())
680 yield cur_ret
681 i += 1
682 if cur == 0:
683 cur = 1
684 elif cur < stop:
685 cur *= factor
686 if cur > stop:
687 cur = stop
688 return
691def bucketize(src, key=bool, value_transform=None, key_filter=None):
692 """Group values in the *src* iterable by the value returned by *key*.
694 >>> bucketize(range(5))
695 {False: [0], True: [1, 2, 3, 4]}
696 >>> is_odd = lambda x: x % 2 == 1
697 >>> bucketize(range(5), is_odd)
698 {False: [0, 2, 4], True: [1, 3]}
700 *key* is :class:`bool` by default, but can either be a callable or a string or a list
701 if it is a string, it is the name of the attribute on which to bucketize objects.
703 >>> bucketize([1+1j, 2+2j, 1, 2], key='real')
704 {1.0: [(1+1j), 1], 2.0: [(2+2j), 2]}
706 if *key* is a list, it contains the buckets where to put each object
708 >>> bucketize([1,2,365,4,98],key=[0,1,2,0,2])
709 {0: [1, 4], 1: [2], 2: [365, 98]}
712 Value lists are not deduplicated:
714 >>> bucketize([None, None, None, 'hello'])
715 {False: [None, None, None], True: ['hello']}
717 Bucketize into more than 3 groups
719 >>> bucketize(range(10), lambda x: x % 3)
720 {0: [0, 3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]}
722 ``bucketize`` has a couple of advanced options useful in certain
723 cases. *value_transform* can be used to modify values as they are
724 added to buckets, and *key_filter* will allow excluding certain
725 buckets from being collected.
727 >>> bucketize(range(5), value_transform=lambda x: x*x)
728 {False: [0], True: [1, 4, 9, 16]}
730 >>> bucketize(range(10), key=lambda x: x % 3, key_filter=lambda k: k % 3 != 1)
731 {0: [0, 3, 6, 9], 2: [2, 5, 8]}
733 Note in some of these examples there were at most two keys, ``True`` and
734 ``False``, and each key present has a list with at least one
735 item. See :func:`partition` for a version specialized for binary
736 use cases.
738 """
739 if not is_iterable(src):
740 raise TypeError('expected an iterable')
741 elif isinstance(key, list):
742 if len(key) != len(src):
743 raise ValueError("key and src have to be the same length")
744 src = zip(key, src)
746 if isinstance(key, basestring):
747 key_func = lambda x: getattr(x, key, x)
748 elif callable(key):
749 key_func = key
750 elif isinstance(key, list):
751 key_func = lambda x: x[0]
752 else:
753 raise TypeError('expected key to be callable or a string or a list')
755 if value_transform is None:
756 value_transform = lambda x: x
757 if not callable(value_transform):
758 raise TypeError('expected callable value transform function')
759 if isinstance(key, list):
760 f = value_transform
761 value_transform=lambda x: f(x[1])
763 ret = {}
764 for val in src:
765 key_of_val = key_func(val)
766 if key_filter is None or key_filter(key_of_val):
767 ret.setdefault(key_of_val, []).append(value_transform(val))
768 return ret
771def partition(src, key=bool):
772 """No relation to :meth:`str.partition`, ``partition`` is like
773 :func:`bucketize`, but for added convenience returns a tuple of
774 ``(truthy_values, falsy_values)``.
776 >>> nonempty, empty = partition(['', '', 'hi', '', 'bye'])
777 >>> nonempty
778 ['hi', 'bye']
780 *key* defaults to :class:`bool`, but can be carefully overridden to
781 use either a function that returns either ``True`` or ``False`` or
782 a string name of the attribute on which to partition objects.
784 >>> import string
785 >>> is_digit = lambda x: x in string.digits
786 >>> decimal_digits, hexletters = partition(string.hexdigits, is_digit)
787 >>> ''.join(decimal_digits), ''.join(hexletters)
788 ('0123456789', 'abcdefABCDEF')
789 """
790 bucketized = bucketize(src, key)
791 return bucketized.get(True, []), bucketized.get(False, [])
794def unique(src, key=None):
795 """``unique()`` returns a list of unique values, as determined by
796 *key*, in the order they first appeared in the input iterable,
797 *src*.
799 >>> ones_n_zeros = '11010110001010010101010'
800 >>> ''.join(unique(ones_n_zeros))
801 '10'
803 See :func:`unique_iter` docs for more details.
804 """
805 return list(unique_iter(src, key))
808def unique_iter(src, key=None):
809 """Yield unique elements from the iterable, *src*, based on *key*,
810 in the order in which they first appeared in *src*.
812 >>> repetitious = [1, 2, 3] * 10
813 >>> list(unique_iter(repetitious))
814 [1, 2, 3]
816 By default, *key* is the object itself, but *key* can either be a
817 callable or, for convenience, a string name of the attribute on
818 which to uniqueify objects, falling back on identity when the
819 attribute is not present.
821 >>> pleasantries = ['hi', 'hello', 'ok', 'bye', 'yes']
822 >>> list(unique_iter(pleasantries, key=lambda x: len(x)))
823 ['hi', 'hello', 'bye']
824 """
825 if not is_iterable(src):
826 raise TypeError('expected an iterable, not %r' % type(src))
827 if key is None:
828 key_func = lambda x: x
829 elif callable(key):
830 key_func = key
831 elif isinstance(key, basestring):
832 key_func = lambda x: getattr(x, key, x)
833 else:
834 raise TypeError('"key" expected a string or callable, not %r' % key)
835 seen = set()
836 for i in src:
837 k = key_func(i)
838 if k not in seen:
839 seen.add(k)
840 yield i
841 return
844def redundant(src, key=None, groups=False):
845 """The complement of :func:`unique()`.
847 By default returns non-unique/duplicate values as a list of the
848 *first* redundant value in *src*. Pass ``groups=True`` to get
849 groups of all values with redundancies, ordered by position of the
850 first redundant value. This is useful in conjunction with some
851 normalizing *key* function.
853 >>> redundant([1, 2, 3, 4])
854 []
855 >>> redundant([1, 2, 3, 2, 3, 3, 4])
856 [2, 3]
857 >>> redundant([1, 2, 3, 2, 3, 3, 4], groups=True)
858 [[2, 2], [3, 3, 3]]
860 An example using a *key* function to do case-insensitive
861 redundancy detection.
863 >>> redundant(['hi', 'Hi', 'HI', 'hello'], key=str.lower)
864 ['Hi']
865 >>> redundant(['hi', 'Hi', 'HI', 'hello'], groups=True, key=str.lower)
866 [['hi', 'Hi', 'HI']]
868 *key* should also be used when the values in *src* are not hashable.
870 .. note::
872 This output of this function is designed for reporting
873 duplicates in contexts when a unique input is desired. Due to
874 the grouped return type, there is no streaming equivalent of
875 this function for the time being.
877 """
878 if key is None:
879 pass
880 elif callable(key):
881 key_func = key
882 elif isinstance(key, basestring):
883 key_func = lambda x: getattr(x, key, x)
884 else:
885 raise TypeError('"key" expected a string or callable, not %r' % key)
886 seen = {} # key to first seen item
887 redundant_order = []
888 redundant_groups = {}
889 for i in src:
890 k = key_func(i) if key else i
891 if k not in seen:
892 seen[k] = i
893 else:
894 if k in redundant_groups:
895 if groups:
896 redundant_groups[k].append(i)
897 else:
898 redundant_order.append(k)
899 redundant_groups[k] = [seen[k], i]
900 if not groups:
901 ret = [redundant_groups[k][1] for k in redundant_order]
902 else:
903 ret = [redundant_groups[k] for k in redundant_order]
904 return ret
907def one(src, default=None, key=None):
908 """Along the same lines as builtins, :func:`all` and :func:`any`, and
909 similar to :func:`first`, ``one()`` returns the single object in
910 the given iterable *src* that evaluates to ``True``, as determined
911 by callable *key*. If unset, *key* defaults to :class:`bool`. If
912 no such objects are found, *default* is returned. If *default* is
913 not passed, ``None`` is returned.
915 If *src* has more than one object that evaluates to ``True``, or
916 if there is no object that fulfills such condition, return
917 *default*. It's like an `XOR`_ over an iterable.
919 >>> one((True, False, False))
920 True
921 >>> one((True, False, True))
922 >>> one((0, 0, 'a'))
923 'a'
924 >>> one((0, False, None))
925 >>> one((True, True), default=False)
926 False
927 >>> bool(one(('', 1)))
928 True
929 >>> one((10, 20, 30, 42), key=lambda i: i > 40)
930 42
932 See `Martín Gaitán's original repo`_ for further use cases.
934 .. _Martín Gaitán's original repo: https://github.com/mgaitan/one
935 .. _XOR: https://en.wikipedia.org/wiki/Exclusive_or
937 """
938 ones = list(itertools.islice(filter(key, src), 2))
939 return ones[0] if len(ones) == 1 else default
942def first(iterable, default=None, key=None):
943 """Return first element of *iterable* that evaluates to ``True``, else
944 return ``None`` or optional *default*. Similar to :func:`one`.
946 >>> first([0, False, None, [], (), 42])
947 42
948 >>> first([0, False, None, [], ()]) is None
949 True
950 >>> first([0, False, None, [], ()], default='ohai')
951 'ohai'
952 >>> import re
953 >>> m = first(re.match(regex, 'abc') for regex in ['b.*', 'a(.*)'])
954 >>> m.group(1)
955 'bc'
957 The optional *key* argument specifies a one-argument predicate function
958 like that used for *filter()*. The *key* argument, if supplied, should be
959 in keyword form. For example, finding the first even number in an iterable:
961 >>> first([1, 1, 3, 4, 5], key=lambda x: x % 2 == 0)
962 4
964 Contributed by Hynek Schlawack, author of `the original standalone module`_.
966 .. _the original standalone module: https://github.com/hynek/first
967 """
968 return next(filter(key, iterable), default)
971def flatten_iter(iterable):
972 """``flatten_iter()`` yields all the elements from *iterable* while
973 collapsing any nested iterables.
975 >>> nested = [[1, 2], [[3], [4, 5]]]
976 >>> list(flatten_iter(nested))
977 [1, 2, 3, 4, 5]
978 """
979 for item in iterable:
980 if isinstance(item, Iterable) and not isinstance(item, basestring):
981 for subitem in flatten_iter(item):
982 yield subitem
983 else:
984 yield item
986def flatten(iterable):
987 """``flatten()`` returns a collapsed list of all the elements from
988 *iterable* while collapsing any nested iterables.
990 >>> nested = [[1, 2], [[3], [4, 5]]]
991 >>> flatten(nested)
992 [1, 2, 3, 4, 5]
993 """
994 return list(flatten_iter(iterable))
997def same(iterable, ref=_UNSET):
998 """``same()`` returns ``True`` when all values in *iterable* are
999 equal to one another, or optionally a reference value,
1000 *ref*. Similar to :func:`all` and :func:`any` in that it evaluates
1001 an iterable and returns a :class:`bool`. ``same()`` returns
1002 ``True`` for empty iterables.
1004 >>> same([])
1005 True
1006 >>> same([1])
1007 True
1008 >>> same(['a', 'a', 'a'])
1009 True
1010 >>> same(range(20))
1011 False
1012 >>> same([[], []])
1013 True
1014 >>> same([[], []], ref='test')
1015 False
1017 """
1018 iterator = iter(iterable)
1019 if ref is _UNSET:
1020 ref = next(iterator, ref)
1021 return all(val == ref for val in iterator)
1024def default_visit(path, key, value):
1025 # print('visit(%r, %r, %r)' % (path, key, value))
1026 return key, value
1028# enable the extreme: monkeypatching iterutils with a different default_visit
1029_orig_default_visit = default_visit
1032def default_enter(path, key, value):
1033 # print('enter(%r, %r)' % (key, value))
1034 if isinstance(value, basestring):
1035 return value, False
1036 elif isinstance(value, Mapping):
1037 return value.__class__(), ItemsView(value)
1038 elif isinstance(value, Sequence):
1039 return value.__class__(), enumerate(value)
1040 elif isinstance(value, Set):
1041 return value.__class__(), enumerate(value)
1042 else:
1043 # files, strings, other iterables, and scalars are not
1044 # traversed
1045 return value, False
1048def default_exit(path, key, old_parent, new_parent, new_items):
1049 # print('exit(%r, %r, %r, %r, %r)'
1050 # % (path, key, old_parent, new_parent, new_items))
1051 ret = new_parent
1052 if isinstance(new_parent, Mapping):
1053 new_parent.update(new_items)
1054 elif isinstance(new_parent, Sequence):
1055 vals = [v for i, v in new_items]
1056 try:
1057 new_parent.extend(vals)
1058 except AttributeError:
1059 ret = new_parent.__class__(vals) # tuples
1060 elif isinstance(new_parent, Set):
1061 vals = [v for i, v in new_items]
1062 try:
1063 new_parent.update(vals)
1064 except AttributeError:
1065 ret = new_parent.__class__(vals) # frozensets
1066 else:
1067 raise RuntimeError('unexpected iterable type: %r' % type(new_parent))
1068 return ret
1071def remap(root, visit=default_visit, enter=default_enter, exit=default_exit,
1072 **kwargs):
1073 """The remap ("recursive map") function is used to traverse and
1074 transform nested structures. Lists, tuples, sets, and dictionaries
1075 are just a few of the data structures nested into heterogeneous
1076 tree-like structures that are so common in programming.
1077 Unfortunately, Python's built-in ways to manipulate collections
1078 are almost all flat. List comprehensions may be fast and succinct,
1079 but they do not recurse, making it tedious to apply quick changes
1080 or complex transforms to real-world data.
1082 remap goes where list comprehensions cannot.
1084 Here's an example of removing all Nones from some data:
1086 >>> from pprint import pprint
1087 >>> reviews = {'Star Trek': {'TNG': 10, 'DS9': 8.5, 'ENT': None},
1088 ... 'Babylon 5': 6, 'Dr. Who': None}
1089 >>> pprint(remap(reviews, lambda p, k, v: v is not None))
1090 {'Babylon 5': 6, 'Star Trek': {'DS9': 8.5, 'TNG': 10}}
1092 Notice how both Nones have been removed despite the nesting in the
1093 dictionary. Not bad for a one-liner, and that's just the beginning.
1094 See `this remap cookbook`_ for more delicious recipes.
1096 .. _this remap cookbook: http://sedimental.org/remap.html
1098 remap takes four main arguments: the object to traverse and three
1099 optional callables which determine how the remapped object will be
1100 created.
1102 Args:
1104 root: The target object to traverse. By default, remap
1105 supports iterables like :class:`list`, :class:`tuple`,
1106 :class:`dict`, and :class:`set`, but any object traversable by
1107 *enter* will work.
1108 visit (callable): This function is called on every item in
1109 *root*. It must accept three positional arguments, *path*,
1110 *key*, and *value*. *path* is simply a tuple of parents'
1111 keys. *visit* should return the new key-value pair. It may
1112 also return ``True`` as shorthand to keep the old item
1113 unmodified, or ``False`` to drop the item from the new
1114 structure. *visit* is called after *enter*, on the new parent.
1116 The *visit* function is called for every item in root,
1117 including duplicate items. For traversable values, it is
1118 called on the new parent object, after all its children
1119 have been visited. The default visit behavior simply
1120 returns the key-value pair unmodified.
1121 enter (callable): This function controls which items in *root*
1122 are traversed. It accepts the same arguments as *visit*: the
1123 path, the key, and the value of the current item. It returns a
1124 pair of the blank new parent, and an iterator over the items
1125 which should be visited. If ``False`` is returned instead of
1126 an iterator, the value will not be traversed.
1128 The *enter* function is only called once per unique value. The
1129 default enter behavior support mappings, sequences, and
1130 sets. Strings and all other iterables will not be traversed.
1131 exit (callable): This function determines how to handle items
1132 once they have been visited. It gets the same three
1133 arguments as the other functions -- *path*, *key*, *value*
1134 -- plus two more: the blank new parent object returned
1135 from *enter*, and a list of the new items, as remapped by
1136 *visit*.
1138 Like *enter*, the *exit* function is only called once per
1139 unique value. The default exit behavior is to simply add
1140 all new items to the new parent, e.g., using
1141 :meth:`list.extend` and :meth:`dict.update` to add to the
1142 new parent. Immutable objects, such as a :class:`tuple` or
1143 :class:`namedtuple`, must be recreated from scratch, but
1144 use the same type as the new parent passed back from the
1145 *enter* function.
1146 reraise_visit (bool): A pragmatic convenience for the *visit*
1147 callable. When set to ``False``, remap ignores any errors
1148 raised by the *visit* callback. Items causing exceptions
1149 are kept. See examples for more details.
1151 remap is designed to cover the majority of cases with just the
1152 *visit* callable. While passing in multiple callables is very
1153 empowering, remap is designed so very few cases should require
1154 passing more than one function.
1156 When passing *enter* and *exit*, it's common and easiest to build
1157 on the default behavior. Simply add ``from boltons.iterutils import
1158 default_enter`` (or ``default_exit``), and have your enter/exit
1159 function call the default behavior before or after your custom
1160 logic. See `this example`_.
1162 Duplicate and self-referential objects (aka reference loops) are
1163 automatically handled internally, `as shown here`_.
1165 .. _this example: http://sedimental.org/remap.html#sort_all_lists
1166 .. _as shown here: http://sedimental.org/remap.html#corner_cases
1168 """
1169 # TODO: improve argument formatting in sphinx doc
1170 # TODO: enter() return (False, items) to continue traverse but cancel copy?
1171 if not callable(visit):
1172 raise TypeError('visit expected callable, not: %r' % visit)
1173 if not callable(enter):
1174 raise TypeError('enter expected callable, not: %r' % enter)
1175 if not callable(exit):
1176 raise TypeError('exit expected callable, not: %r' % exit)
1177 reraise_visit = kwargs.pop('reraise_visit', True)
1178 if kwargs:
1179 raise TypeError('unexpected keyword arguments: %r' % kwargs.keys())
1181 path, registry, stack = (), {}, [(None, root)]
1182 new_items_stack = []
1183 while stack:
1184 key, value = stack.pop()
1185 id_value = id(value)
1186 if key is _REMAP_EXIT:
1187 key, new_parent, old_parent = value
1188 id_value = id(old_parent)
1189 path, new_items = new_items_stack.pop()
1190 value = exit(path, key, old_parent, new_parent, new_items)
1191 registry[id_value] = value
1192 if not new_items_stack:
1193 continue
1194 elif id_value in registry:
1195 value = registry[id_value]
1196 else:
1197 res = enter(path, key, value)
1198 try:
1199 new_parent, new_items = res
1200 except TypeError:
1201 # TODO: handle False?
1202 raise TypeError('enter should return a tuple of (new_parent,'
1203 ' items_iterator), not: %r' % res)
1204 if new_items is not False:
1205 # traverse unless False is explicitly passed
1206 registry[id_value] = new_parent
1207 new_items_stack.append((path, []))
1208 if value is not root:
1209 path += (key,)
1210 stack.append((_REMAP_EXIT, (key, new_parent, value)))
1211 if new_items:
1212 stack.extend(reversed(list(new_items)))
1213 continue
1214 if visit is _orig_default_visit:
1215 # avoid function call overhead by inlining identity operation
1216 visited_item = (key, value)
1217 else:
1218 try:
1219 visited_item = visit(path, key, value)
1220 except Exception:
1221 if reraise_visit:
1222 raise
1223 visited_item = True
1224 if visited_item is False:
1225 continue # drop
1226 elif visited_item is True:
1227 visited_item = (key, value)
1228 # TODO: typecheck?
1229 # raise TypeError('expected (key, value) from visit(),'
1230 # ' not: %r' % visited_item)
1231 try:
1232 new_items_stack[-1][1].append(visited_item)
1233 except IndexError:
1234 raise TypeError('expected remappable root, not: %r' % root)
1235 return value
1238class PathAccessError(KeyError, IndexError, TypeError):
1239 """An amalgamation of KeyError, IndexError, and TypeError,
1240 representing what can occur when looking up a path in a nested
1241 object.
1242 """
1243 def __init__(self, exc, seg, path):
1244 self.exc = exc
1245 self.seg = seg
1246 self.path = path
1248 def __repr__(self):
1249 cn = self.__class__.__name__
1250 return '%s(%r, %r, %r)' % (cn, self.exc, self.seg, self.path)
1252 def __str__(self):
1253 return ('could not access %r from path %r, got error: %r'
1254 % (self.seg, self.path, self.exc))
1257def get_path(root, path, default=_UNSET):
1258 """Retrieve a value from a nested object via a tuple representing the
1259 lookup path.
1261 >>> root = {'a': {'b': {'c': [[1], [2], [3]]}}}
1262 >>> get_path(root, ('a', 'b', 'c', 2, 0))
1263 3
1265 The path format is intentionally consistent with that of
1266 :func:`remap`.
1268 One of get_path's chief aims is improved error messaging. EAFP is
1269 great, but the error messages are not.
1271 For instance, ``root['a']['b']['c'][2][1]`` gives back
1272 ``IndexError: list index out of range``
1274 What went out of range where? get_path currently raises
1275 ``PathAccessError: could not access 2 from path ('a', 'b', 'c', 2,
1276 1), got error: IndexError('list index out of range',)``, a
1277 subclass of IndexError and KeyError.
1279 You can also pass a default that covers the entire operation,
1280 should the lookup fail at any level.
1282 Args:
1283 root: The target nesting of dictionaries, lists, or other
1284 objects supporting ``__getitem__``.
1285 path (tuple): A list of strings and integers to be successively
1286 looked up within *root*.
1287 default: The value to be returned should any
1288 ``PathAccessError`` exceptions be raised.
1289 """
1290 if isinstance(path, basestring):
1291 path = path.split('.')
1292 cur = root
1293 try:
1294 for seg in path:
1295 try:
1296 cur = cur[seg]
1297 except (KeyError, IndexError) as exc:
1298 raise PathAccessError(exc, seg, path)
1299 except TypeError as exc:
1300 # either string index in a list, or a parent that
1301 # doesn't support indexing
1302 try:
1303 seg = int(seg)
1304 cur = cur[seg]
1305 except (ValueError, KeyError, IndexError, TypeError):
1306 if not is_iterable(cur):
1307 exc = TypeError('%r object is not indexable'
1308 % type(cur).__name__)
1309 raise PathAccessError(exc, seg, path)
1310 except PathAccessError:
1311 if default is _UNSET:
1312 raise
1313 return default
1314 return cur
1317def research(root, query=lambda p, k, v: True, reraise=False):
1318 """The :func:`research` function uses :func:`remap` to recurse over
1319 any data nested in *root*, and find values which match a given
1320 criterion, specified by the *query* callable.
1322 Results are returned as a list of ``(path, value)`` pairs. The
1323 paths are tuples in the same format accepted by
1324 :func:`get_path`. This can be useful for comparing values nested
1325 in two or more different structures.
1327 Here's a simple example that finds all integers:
1329 >>> root = {'a': {'b': 1, 'c': (2, 'd', 3)}, 'e': None}
1330 >>> res = research(root, query=lambda p, k, v: isinstance(v, int))
1331 >>> print(sorted(res))
1332 [(('a', 'b'), 1), (('a', 'c', 0), 2), (('a', 'c', 2), 3)]
1334 Note how *query* follows the same, familiar ``path, key, value``
1335 signature as the ``visit`` and ``enter`` functions on
1336 :func:`remap`, and returns a :class:`bool`.
1338 Args:
1339 root: The target object to search. Supports the same types of
1340 objects as :func:`remap`, including :class:`list`,
1341 :class:`tuple`, :class:`dict`, and :class:`set`.
1342 query (callable): The function called on every object to
1343 determine whether to include it in the search results. The
1344 callable must accept three arguments, *path*, *key*, and
1345 *value*, commonly abbreviated *p*, *k*, and *v*, same as
1346 *enter* and *visit* from :func:`remap`.
1347 reraise (bool): Whether to reraise exceptions raised by *query*
1348 or to simply drop the result that caused the error.
1351 With :func:`research` it's easy to inspect the details of a data
1352 structure, like finding values that are at a certain depth (using
1353 ``len(p)``) and much more. If more advanced functionality is
1354 needed, check out the code and make your own :func:`remap`
1355 wrapper, and consider `submitting a patch`_!
1357 .. _submitting a patch: https://github.com/mahmoud/boltons/pulls
1358 """
1359 ret = []
1361 if not callable(query):
1362 raise TypeError('query expected callable, not: %r' % query)
1364 def enter(path, key, value):
1365 try:
1366 if query(path, key, value):
1367 ret.append((path + (key,), value))
1368 except Exception:
1369 if reraise:
1370 raise
1371 return default_enter(path, key, value)
1373 remap(root, enter=enter)
1374 return ret
1377# TODO: recollect()
1378# TODO: refilter()
1379# TODO: reiter()
1382# GUID iterators: 10x faster and somewhat more compact than uuid.
1384class GUIDerator(object):
1385 """The GUIDerator is an iterator that yields a globally-unique
1386 identifier (GUID) on every iteration. The GUIDs produced are
1387 hexadecimal strings.
1389 Testing shows it to be around 12x faster than the uuid module. By
1390 default it is also more compact, partly due to its default 96-bit
1391 (24-hexdigit) length. 96 bits of randomness means that there is a
1392 1 in 2 ^ 32 chance of collision after 2 ^ 64 iterations. If more
1393 or less uniqueness is desired, the *size* argument can be adjusted
1394 accordingly.
1396 Args:
1397 size (int): character length of the GUID, defaults to 24. Lengths
1398 between 20 and 36 are considered valid.
1400 The GUIDerator has built-in fork protection that causes it to
1401 detect a fork on next iteration and reseed accordingly.
1403 """
1404 def __init__(self, size=24):
1405 self.size = size
1406 if size < 20 or size > 36:
1407 raise ValueError('expected 20 < size <= 36')
1408 import hashlib
1409 self._sha1 = hashlib.sha1
1410 self.count = itertools.count()
1411 self.reseed()
1413 def reseed(self):
1414 import socket
1415 self.pid = os.getpid()
1416 self.salt = '-'.join([str(self.pid),
1417 socket.gethostname() or b'<nohostname>',
1418 str(time.time()),
1419 codecs.encode(os.urandom(6),
1420 'hex_codec').decode('ascii')])
1421 # that codecs trick is the best/only way to get a bytes to
1422 # hexbytes in py2/3
1423 return
1425 def __iter__(self):
1426 return self
1428 if _IS_PY3:
1429 def __next__(self):
1430 if os.getpid() != self.pid:
1431 self.reseed()
1432 target_bytes = (self.salt + str(next(self.count))).encode('utf8')
1433 hash_text = self._sha1(target_bytes).hexdigest()[:self.size]
1434 return hash_text
1435 else:
1436 def __next__(self):
1437 if os.getpid() != self.pid:
1438 self.reseed()
1439 return self._sha1(self.salt +
1440 str(next(self.count))).hexdigest()[:self.size]
1442 next = __next__
1445class SequentialGUIDerator(GUIDerator):
1446 """Much like the standard GUIDerator, the SequentialGUIDerator is an
1447 iterator that yields a globally-unique identifier (GUID) on every
1448 iteration. The GUIDs produced are hexadecimal strings.
1450 The SequentialGUIDerator differs in that it picks a starting GUID
1451 value and increments every iteration. This yields GUIDs which are
1452 of course unique, but also ordered and lexicographically sortable.
1454 The SequentialGUIDerator is around 50% faster than the normal
1455 GUIDerator, making it almost 20x as fast as the built-in uuid
1456 module. By default it is also more compact, partly due to its
1457 96-bit (24-hexdigit) default length. 96 bits of randomness means that
1458 there is a 1 in 2 ^ 32 chance of collision after 2 ^ 64
1459 iterations. If more or less uniqueness is desired, the *size*
1460 argument can be adjusted accordingly.
1462 Args:
1463 size (int): character length of the GUID, defaults to 24.
1465 Note that with SequentialGUIDerator there is a chance of GUIDs
1466 growing larger than the size configured. The SequentialGUIDerator
1467 has built-in fork protection that causes it to detect a fork on
1468 next iteration and reseed accordingly.
1470 """
1472 if _IS_PY3:
1473 def reseed(self):
1474 super(SequentialGUIDerator, self).reseed()
1475 start_str = self._sha1(self.salt.encode('utf8')).hexdigest()
1476 self.start = int(start_str[:self.size], 16)
1477 self.start |= (1 << ((self.size * 4) - 2))
1478 else:
1479 def reseed(self):
1480 super(SequentialGUIDerator, self).reseed()
1481 start_str = self._sha1(self.salt).hexdigest()
1482 self.start = int(start_str[:self.size], 16)
1483 self.start |= (1 << ((self.size * 4) - 2))
1485 def __next__(self):
1486 if os.getpid() != self.pid:
1487 self.reseed()
1488 return '%x' % (next(self.count) + self.start)
1490 next = __next__
1493guid_iter = GUIDerator()
1494seq_guid_iter = SequentialGUIDerator()
1497def soft_sorted(iterable, first=None, last=None, key=None, reverse=False):
1498 """For when you care about the order of some elements, but not about
1499 others.
1501 Use this to float to the top and/or sink to the bottom a specific
1502 ordering, while sorting the rest of the elements according to
1503 normal :func:`sorted` rules.
1505 >>> soft_sorted(['two', 'b', 'one', 'a'], first=['one', 'two'])
1506 ['one', 'two', 'a', 'b']
1507 >>> soft_sorted(range(7), first=[6, 15], last=[2, 4], reverse=True)
1508 [6, 5, 3, 1, 0, 2, 4]
1509 >>> import string
1510 >>> ''.join(soft_sorted(string.hexdigits, first='za1', last='b', key=str.lower))
1511 'aA1023456789cCdDeEfFbB'
1513 Args:
1514 iterable (list): A list or other iterable to sort.
1515 first (list): A sequence to enforce for elements which should
1516 appear at the beginning of the returned list.
1517 last (list): A sequence to enforce for elements which should
1518 appear at the end of the returned list.
1519 key (callable): Callable used to generate a comparable key for
1520 each item to be sorted, same as the key in
1521 :func:`sorted`. Note that entries in *first* and *last*
1522 should be the keys for the items. Defaults to
1523 passthrough/the identity function.
1524 reverse (bool): Whether or not elements not explicitly ordered
1525 by *first* and *last* should be in reverse order or not.
1527 Returns a new list in sorted order.
1528 """
1529 first = first or []
1530 last = last or []
1531 key = key or (lambda x: x)
1532 seq = list(iterable)
1533 other = [x for x in seq if not ((first and key(x) in first) or (last and key(x) in last))]
1534 other.sort(key=key, reverse=reverse)
1536 if first:
1537 first = sorted([x for x in seq if key(x) in first], key=lambda x: first.index(key(x)))
1538 if last:
1539 last = sorted([x for x in seq if key(x) in last], key=lambda x: last.index(key(x)))
1540 return first + other + last
1543def untyped_sorted(iterable, key=None, reverse=False):
1544 """A version of :func:`sorted` which will happily sort an iterable of
1545 heterogeneous types and return a new list, similar to legacy Python's
1546 behavior.
1548 >>> untyped_sorted(['abc', 2.0, 1, 2, 'def'])
1549 [1, 2.0, 2, 'abc', 'def']
1551 Note how mutually orderable types are sorted as expected, as in
1552 the case of the integers and floats above.
1554 .. note::
1556 Results may vary across Python versions and builds, but the
1557 function will produce a sorted list, except in the case of
1558 explicitly unorderable objects.
1560 """
1561 class _Wrapper(object):
1562 slots = ('obj',)
1564 def __init__(self, obj):
1565 self.obj = obj
1567 def __lt__(self, other):
1568 obj = key(self.obj) if key is not None else self.obj
1569 other = key(other.obj) if key is not None else other.obj
1570 try:
1571 ret = obj < other
1572 except TypeError:
1573 ret = ((type(obj).__name__, id(type(obj)), obj)
1574 < (type(other).__name__, id(type(other)), other))
1575 return ret
1577 if key is not None and not callable(key):
1578 raise TypeError('expected function or callable object for key, not: %r'
1579 % key)
1581 return sorted(iterable, key=_Wrapper, reverse=reverse)
1583"""
1584May actually be faster to do an isinstance check for a str path
1586$ python -m timeit -s "x = [1]" "x[0]"
158710000000 loops, best of 3: 0.0207 usec per loop
1588$ python -m timeit -s "x = [1]" "try: x[0] \nexcept: pass"
158910000000 loops, best of 3: 0.029 usec per loop
1590$ python -m timeit -s "x = [1]" "try: x[1] \nexcept: pass"
15911000000 loops, best of 3: 0.315 usec per loop
1592# setting up try/except is fast, only around 0.01us
1593# actually triggering the exception takes almost 10x as long
1595$ python -m timeit -s "x = [1]" "isinstance(x, basestring)"
159610000000 loops, best of 3: 0.141 usec per loop
1597$ python -m timeit -s "x = [1]" "isinstance(x, str)"
159810000000 loops, best of 3: 0.131 usec per loop
1599$ python -m timeit -s "x = [1]" "try: x.split('.')\n except: pass"
16001000000 loops, best of 3: 0.443 usec per loop
1601$ python -m timeit -s "x = [1]" "try: x.split('.') \nexcept AttributeError: pass"
16021000000 loops, best of 3: 0.544 usec per loop
1603"""