Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/boltons/iterutils.py: 25%
537 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:13 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-06-07 06:13 +0000
1# -*- coding: utf-8 -*-
3# Copyright (c) 2013, Mahmoud Hashemi
4#
5# Redistribution and use in source and binary forms, with or without
6# modification, are permitted provided that the following conditions are
7# met:
8#
9# * Redistributions of source code must retain the above copyright
10# notice, this list of conditions and the following disclaimer.
11#
12# * Redistributions in binary form must reproduce the above
13# copyright notice, this list of conditions and the following
14# disclaimer in the documentation and/or other materials provided
15# with the distribution.
16#
17# * The names of the contributors may not be used to endorse or
18# promote products derived from this software without specific
19# prior written permission.
20#
21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
33""":mod:`itertools` is full of great examples of Python generator
34usage. However, there are still some critical gaps. ``iterutils``
35fills many of those gaps with featureful, tested, and Pythonic
36solutions.
38Many of the functions below have two versions, one which
39returns an iterator (denoted by the ``*_iter`` naming pattern), and a
40shorter-named convenience form that returns a list. Some of the
41following are based on examples in itertools docs.
42"""
44import os
45import math
46import time
47import codecs
48import random
49import itertools
51try:
52 from collections.abc import Mapping, Sequence, Set, ItemsView, Iterable
53except ImportError:
54 from collections import Mapping, Sequence, Set, ItemsView, Iterable
57try:
58 from .typeutils import make_sentinel
59 _UNSET = make_sentinel('_UNSET')
60 _REMAP_EXIT = make_sentinel('_REMAP_EXIT')
61except ImportError:
62 _REMAP_EXIT = object()
63 _UNSET = object()
65try:
66 from future_builtins import filter
67 from itertools import izip
68 _IS_PY3 = False
69except ImportError:
70 # Python 3 compat
71 _IS_PY3 = True
72 basestring = (str, bytes)
73 unicode = str
74 izip, xrange = zip, range
77def is_iterable(obj):
78 """Similar in nature to :func:`callable`, ``is_iterable`` returns
79 ``True`` if an object is `iterable`_, ``False`` if not.
81 >>> is_iterable([])
82 True
83 >>> is_iterable(object())
84 False
86 .. _iterable: https://docs.python.org/2/glossary.html#term-iterable
87 """
88 try:
89 iter(obj)
90 except TypeError:
91 return False
92 return True
95def is_scalar(obj):
96 """A near-mirror of :func:`is_iterable`. Returns ``False`` if an
97 object is an iterable container type. Strings are considered
98 scalar as well, because strings are more often treated as whole
99 values as opposed to iterables of 1-character substrings.
101 >>> is_scalar(object())
102 True
103 >>> is_scalar(range(10))
104 False
105 >>> is_scalar('hello')
106 True
107 """
108 return not is_iterable(obj) or isinstance(obj, basestring)
111def is_collection(obj):
112 """The opposite of :func:`is_scalar`. Returns ``True`` if an object
113 is an iterable other than a string.
115 >>> is_collection(object())
116 False
117 >>> is_collection(range(10))
118 True
119 >>> is_collection('hello')
120 False
121 """
122 return is_iterable(obj) and not isinstance(obj, basestring)
125def split(src, sep=None, maxsplit=None):
126 """Splits an iterable based on a separator. Like :meth:`str.split`,
127 but for all iterables. Returns a list of lists.
129 >>> split(['hi', 'hello', None, None, 'sup', None, 'soap', None])
130 [['hi', 'hello'], ['sup'], ['soap']]
132 See :func:`split_iter` docs for more info.
133 """
134 return list(split_iter(src, sep, maxsplit))
137def split_iter(src, sep=None, maxsplit=None):
138 """Splits an iterable based on a separator, *sep*, a max of
139 *maxsplit* times (no max by default). *sep* can be:
141 * a single value
142 * an iterable of separators
143 * a single-argument callable that returns True when a separator is
144 encountered
146 ``split_iter()`` yields lists of non-separator values. A separator will
147 never appear in the output.
149 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None]))
150 [['hi', 'hello'], ['sup'], ['soap']]
152 Note that ``split_iter`` is based on :func:`str.split`, so if
153 *sep* is ``None``, ``split()`` **groups** separators. If empty lists
154 are desired between two contiguous ``None`` values, simply use
155 ``sep=[None]``:
157 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None]))
158 [['hi', 'hello'], ['sup']]
159 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None], sep=[None]))
160 [['hi', 'hello'], [], ['sup'], []]
162 Using a callable separator:
164 >>> falsy_sep = lambda x: not x
165 >>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep))
166 [['hi', 'hello'], [], ['sup'], []]
168 See :func:`split` for a list-returning version.
170 """
171 if not is_iterable(src):
172 raise TypeError('expected an iterable')
174 if maxsplit is not None:
175 maxsplit = int(maxsplit)
176 if maxsplit == 0:
177 yield [src]
178 return
180 if callable(sep):
181 sep_func = sep
182 elif not is_scalar(sep):
183 sep = frozenset(sep)
184 sep_func = lambda x: x in sep
185 else:
186 sep_func = lambda x: x == sep
188 cur_group = []
189 split_count = 0
190 for s in src:
191 if maxsplit is not None and split_count >= maxsplit:
192 sep_func = lambda x: False
193 if sep_func(s):
194 if sep is None and not cur_group:
195 # If sep is none, str.split() "groups" separators
196 # check the str.split() docs for more info
197 continue
198 split_count += 1
199 yield cur_group
200 cur_group = []
201 else:
202 cur_group.append(s)
204 if cur_group or sep is not None:
205 yield cur_group
206 return
209def lstrip(iterable, strip_value=None):
210 """Strips values from the beginning of an iterable. Stripped items will
211 match the value of the argument strip_value. Functionality is analogous
212 to that of the method str.lstrip. Returns a list.
214 >>> lstrip(['Foo', 'Bar', 'Bam'], 'Foo')
215 ['Bar', 'Bam']
217 """
218 return list(lstrip_iter(iterable, strip_value))
221def lstrip_iter(iterable, strip_value=None):
222 """Strips values from the beginning of an iterable. Stripped items will
223 match the value of the argument strip_value. Functionality is analogous
224 to that of the method str.lstrip. Returns a generator.
226 >>> list(lstrip_iter(['Foo', 'Bar', 'Bam'], 'Foo'))
227 ['Bar', 'Bam']
229 """
230 iterator = iter(iterable)
231 for i in iterator:
232 if i != strip_value:
233 yield i
234 break
235 for i in iterator:
236 yield i
239def rstrip(iterable, strip_value=None):
240 """Strips values from the end of an iterable. Stripped items will
241 match the value of the argument strip_value. Functionality is analogous
242 to that of the method str.rstrip. Returns a list.
244 >>> rstrip(['Foo', 'Bar', 'Bam'], 'Bam')
245 ['Foo', 'Bar']
247 """
248 return list(rstrip_iter(iterable,strip_value))
251def rstrip_iter(iterable, strip_value=None):
252 """Strips values from the end of an iterable. Stripped items will
253 match the value of the argument strip_value. Functionality is analogous
254 to that of the method str.rstrip. Returns a generator.
256 >>> list(rstrip_iter(['Foo', 'Bar', 'Bam'], 'Bam'))
257 ['Foo', 'Bar']
259 """
260 iterator = iter(iterable)
261 for i in iterator:
262 if i == strip_value:
263 cache = list()
264 cache.append(i)
265 broken = False
266 for i in iterator:
267 if i == strip_value:
268 cache.append(i)
269 else:
270 broken = True
271 break
272 if not broken: # Return to caller here because the end of the
273 return # iterator has been reached
274 for t in cache:
275 yield t
276 yield i
279def strip(iterable, strip_value=None):
280 """Strips values from the beginning and end of an iterable. Stripped items
281 will match the value of the argument strip_value. Functionality is
282 analogous to that of the method str.strip. Returns a list.
284 >>> strip(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu')
285 ['Foo', 'Bar', 'Bam']
287 """
288 return list(strip_iter(iterable,strip_value))
291def strip_iter(iterable,strip_value=None):
292 """Strips values from the beginning and end of an iterable. Stripped items
293 will match the value of the argument strip_value. Functionality is
294 analogous to that of the method str.strip. Returns a generator.
296 >>> list(strip_iter(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu'))
297 ['Foo', 'Bar', 'Bam']
299 """
300 return rstrip_iter(lstrip_iter(iterable,strip_value),strip_value)
303def chunked(src, size, count=None, **kw):
304 """Returns a list of *count* chunks, each with *size* elements,
305 generated from iterable *src*. If *src* is not evenly divisible by
306 *size*, the final chunk will have fewer than *size* elements.
307 Provide the *fill* keyword argument to provide a pad value and
308 enable padding, otherwise no padding will take place.
310 >>> chunked(range(10), 3)
311 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
312 >>> chunked(range(10), 3, fill=None)
313 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]
314 >>> chunked(range(10), 3, count=2)
315 [[0, 1, 2], [3, 4, 5]]
317 See :func:`chunked_iter` for more info.
318 """
319 chunk_iter = chunked_iter(src, size, **kw)
320 if count is None:
321 return list(chunk_iter)
322 else:
323 return list(itertools.islice(chunk_iter, count))
326def _validate_positive_int(value, name, strictly_positive=True):
327 value = int(value)
328 if value < 0 or (strictly_positive and value == 0):
329 raise ValueError('expected a positive integer ' + name)
330 return value
333def chunked_iter(src, size, **kw):
334 """Generates *size*-sized chunks from *src* iterable. Unless the
335 optional *fill* keyword argument is provided, iterables not evenly
336 divisible by *size* will have a final chunk that is smaller than
337 *size*.
339 >>> list(chunked_iter(range(10), 3))
340 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
341 >>> list(chunked_iter(range(10), 3, fill=None))
342 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]
344 Note that ``fill=None`` in fact uses ``None`` as the fill value.
345 """
346 # TODO: add count kwarg?
347 if not is_iterable(src):
348 raise TypeError('expected an iterable')
349 size = _validate_positive_int(size, 'chunk size')
350 do_fill = True
351 try:
352 fill_val = kw.pop('fill')
353 except KeyError:
354 do_fill = False
355 fill_val = None
356 if kw:
357 raise ValueError('got unexpected keyword arguments: %r' % kw.keys())
358 if not src:
359 return
360 postprocess = lambda chk: chk
361 if isinstance(src, basestring):
362 postprocess = lambda chk, _sep=type(src)(): _sep.join(chk)
363 if _IS_PY3 and isinstance(src, bytes):
364 postprocess = lambda chk: bytes(chk)
365 src_iter = iter(src)
366 while True:
367 cur_chunk = list(itertools.islice(src_iter, size))
368 if not cur_chunk:
369 break
370 lc = len(cur_chunk)
371 if lc < size and do_fill:
372 cur_chunk[lc:] = [fill_val] * (size - lc)
373 yield postprocess(cur_chunk)
374 return
377def chunk_ranges(input_size, chunk_size, input_offset=0, overlap_size=0, align=False):
378 """Generates *chunk_size*-sized chunk ranges for an input with length *input_size*.
379 Optionally, a start of the input can be set via *input_offset*, and
380 and overlap between the chunks may be specified via *overlap_size*.
381 Also, if *align* is set to *True*, any items with *i % (chunk_size-overlap_size) == 0*
382 are always at the beginning of the chunk.
384 Returns an iterator of (start, end) tuples, one tuple per chunk.
386 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5))
387 [(10, 15), (15, 20)]
388 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=1))
389 [(10, 15), (14, 19), (18, 20)]
390 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=2))
391 [(10, 15), (13, 18), (16, 20)]
393 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=False))
394 [(4, 9), (9, 14), (14, 19)]
395 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=True))
396 [(4, 5), (5, 10), (10, 15), (15, 19)]
398 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=False))
399 [(2, 7), (6, 11), (10, 15), (14, 17)]
400 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=True))
401 [(2, 5), (4, 9), (8, 13), (12, 17)]
402 >>> list(chunk_ranges(input_offset=3, input_size=15, chunk_size=5, overlap_size=1, align=True))
403 [(3, 5), (4, 9), (8, 13), (12, 17), (16, 18)]
404 """
405 input_size = _validate_positive_int(input_size, 'input_size', strictly_positive=False)
406 chunk_size = _validate_positive_int(chunk_size, 'chunk_size')
407 input_offset = _validate_positive_int(input_offset, 'input_offset', strictly_positive=False)
408 overlap_size = _validate_positive_int(overlap_size, 'overlap_size', strictly_positive=False)
410 input_stop = input_offset + input_size
412 if align:
413 initial_chunk_len = chunk_size - input_offset % (chunk_size - overlap_size)
414 if initial_chunk_len != overlap_size:
415 yield (input_offset, min(input_offset + initial_chunk_len, input_stop))
416 if input_offset + initial_chunk_len >= input_stop:
417 return
418 input_offset = input_offset + initial_chunk_len - overlap_size
420 for i in range(input_offset, input_stop, chunk_size - overlap_size):
421 yield (i, min(i + chunk_size, input_stop))
423 if i + chunk_size >= input_stop:
424 return
427def pairwise(src):
428 """Convenience function for calling :func:`windowed` on *src*, with
429 *size* set to 2.
431 >>> pairwise(range(5))
432 [(0, 1), (1, 2), (2, 3), (3, 4)]
433 >>> pairwise([])
434 []
436 The number of pairs is always one less than the number of elements
437 in the iterable passed in, except on empty inputs, which returns
438 an empty list.
439 """
440 return windowed(src, 2)
443def pairwise_iter(src):
444 """Convenience function for calling :func:`windowed_iter` on *src*,
445 with *size* set to 2.
447 >>> list(pairwise_iter(range(5)))
448 [(0, 1), (1, 2), (2, 3), (3, 4)]
449 >>> list(pairwise_iter([]))
450 []
452 The number of pairs is always one less than the number of elements
453 in the iterable passed in, or zero, when *src* is empty.
455 """
456 return windowed_iter(src, 2)
459def windowed(src, size):
460 """Returns tuples with exactly length *size*. If the iterable is
461 too short to make a window of length *size*, no tuples are
462 returned. See :func:`windowed_iter` for more.
463 """
464 return list(windowed_iter(src, size))
467def windowed_iter(src, size):
468 """Returns tuples with length *size* which represent a sliding
469 window over iterable *src*.
471 >>> list(windowed_iter(range(7), 3))
472 [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)]
474 If the iterable is too short to make a window of length *size*,
475 then no window tuples are returned.
477 >>> list(windowed_iter(range(3), 5))
478 []
479 """
480 # TODO: lists? (for consistency)
481 tees = itertools.tee(src, size)
482 try:
483 for i, t in enumerate(tees):
484 for _ in xrange(i):
485 next(t)
486 except StopIteration:
487 return izip([])
488 return izip(*tees)
491def xfrange(stop, start=None, step=1.0):
492 """Same as :func:`frange`, but generator-based instead of returning a
493 list.
495 >>> tuple(xfrange(1, 3, step=0.75))
496 (1.0, 1.75, 2.5)
498 See :func:`frange` for more details.
499 """
500 if not step:
501 raise ValueError('step must be non-zero')
502 if start is None:
503 start, stop = 0.0, stop * 1.0
504 else:
505 # swap when all args are used
506 stop, start = start * 1.0, stop * 1.0
507 cur = start
508 while cur < stop:
509 yield cur
510 cur += step
513def frange(stop, start=None, step=1.0):
514 """A :func:`range` clone for float-based ranges.
516 >>> frange(5)
517 [0.0, 1.0, 2.0, 3.0, 4.0]
518 >>> frange(6, step=1.25)
519 [0.0, 1.25, 2.5, 3.75, 5.0]
520 >>> frange(100.5, 101.5, 0.25)
521 [100.5, 100.75, 101.0, 101.25]
522 >>> frange(5, 0)
523 []
524 >>> frange(5, 0, step=-1.25)
525 [5.0, 3.75, 2.5, 1.25]
526 """
527 if not step:
528 raise ValueError('step must be non-zero')
529 if start is None:
530 start, stop = 0.0, stop * 1.0
531 else:
532 # swap when all args are used
533 stop, start = start * 1.0, stop * 1.0
534 count = int(math.ceil((stop - start) / step))
535 ret = [None] * count
536 if not ret:
537 return ret
538 ret[0] = start
539 for i in xrange(1, count):
540 ret[i] = ret[i - 1] + step
541 return ret
544def backoff(start, stop, count=None, factor=2.0, jitter=False):
545 """Returns a list of geometrically-increasing floating-point numbers,
546 suitable for usage with `exponential backoff`_. Exactly like
547 :func:`backoff_iter`, but without the ``'repeat'`` option for
548 *count*. See :func:`backoff_iter` for more details.
550 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff
552 >>> backoff(1, 10)
553 [1.0, 2.0, 4.0, 8.0, 10.0]
554 """
555 if count == 'repeat':
556 raise ValueError("'repeat' supported in backoff_iter, not backoff")
557 return list(backoff_iter(start, stop, count=count,
558 factor=factor, jitter=jitter))
561def backoff_iter(start, stop, count=None, factor=2.0, jitter=False):
562 """Generates a sequence of geometrically-increasing floats, suitable
563 for usage with `exponential backoff`_. Starts with *start*,
564 increasing by *factor* until *stop* is reached, optionally
565 stopping iteration once *count* numbers are yielded. *factor*
566 defaults to 2. In general retrying with properly-configured
567 backoff creates a better-behaved component for a larger service
568 ecosystem.
570 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff
572 >>> list(backoff_iter(1.0, 10.0, count=5))
573 [1.0, 2.0, 4.0, 8.0, 10.0]
574 >>> list(backoff_iter(1.0, 10.0, count=8))
575 [1.0, 2.0, 4.0, 8.0, 10.0, 10.0, 10.0, 10.0]
576 >>> list(backoff_iter(0.25, 100.0, factor=10))
577 [0.25, 2.5, 25.0, 100.0]
579 A simplified usage example:
581 .. code-block:: python
583 for timeout in backoff_iter(0.25, 5.0):
584 try:
585 res = network_call()
586 break
587 except Exception as e:
588 log(e)
589 time.sleep(timeout)
591 An enhancement for large-scale systems would be to add variation,
592 or *jitter*, to timeout values. This is done to avoid a thundering
593 herd on the receiving end of the network call.
595 Finally, for *count*, the special value ``'repeat'`` can be passed to
596 continue yielding indefinitely.
598 Args:
600 start (float): Positive number for baseline.
601 stop (float): Positive number for maximum.
602 count (int): Number of steps before stopping
603 iteration. Defaults to the number of steps between *start* and
604 *stop*. Pass the string, `'repeat'`, to continue iteration
605 indefinitely.
606 factor (float): Rate of exponential increase. Defaults to `2.0`,
607 e.g., `[1, 2, 4, 8, 16]`.
608 jitter (float): A factor between `-1.0` and `1.0`, used to
609 uniformly randomize and thus spread out timeouts in a distributed
610 system, avoiding rhythm effects. Positive values use the base
611 backoff curve as a maximum, negative values use the curve as a
612 minimum. Set to 1.0 or `True` for a jitter approximating
613 Ethernet's time-tested backoff solution. Defaults to `False`.
615 """
616 start = float(start)
617 stop = float(stop)
618 factor = float(factor)
619 if start < 0.0:
620 raise ValueError('expected start >= 0, not %r' % start)
621 if factor < 1.0:
622 raise ValueError('expected factor >= 1.0, not %r' % factor)
623 if stop == 0.0:
624 raise ValueError('expected stop >= 0')
625 if stop < start:
626 raise ValueError('expected stop >= start, not %r' % stop)
627 if count is None:
628 denom = start if start else 1
629 count = 1 + math.ceil(math.log(stop/denom, factor))
630 count = count if start else count + 1
631 if count != 'repeat' and count < 0:
632 raise ValueError('count must be positive or "repeat", not %r' % count)
633 if jitter:
634 jitter = float(jitter)
635 if not (-1.0 <= jitter <= 1.0):
636 raise ValueError('expected jitter -1 <= j <= 1, not: %r' % jitter)
638 cur, i = start, 0
639 while count == 'repeat' or i < count:
640 if not jitter:
641 cur_ret = cur
642 elif jitter:
643 cur_ret = cur - (cur * jitter * random.random())
644 yield cur_ret
645 i += 1
646 if cur == 0:
647 cur = 1
648 elif cur < stop:
649 cur *= factor
650 if cur > stop:
651 cur = stop
652 return
655def bucketize(src, key=bool, value_transform=None, key_filter=None):
656 """Group values in the *src* iterable by the value returned by *key*.
658 >>> bucketize(range(5))
659 {False: [0], True: [1, 2, 3, 4]}
660 >>> is_odd = lambda x: x % 2 == 1
661 >>> bucketize(range(5), is_odd)
662 {False: [0, 2, 4], True: [1, 3]}
664 *key* is :class:`bool` by default, but can either be a callable or a string or a list
665 if it is a string, it is the name of the attribute on which to bucketize objects.
667 >>> bucketize([1+1j, 2+2j, 1, 2], key='real')
668 {1.0: [(1+1j), 1], 2.0: [(2+2j), 2]}
670 if *key* is a list, it contains the buckets where to put each object
672 >>> bucketize([1,2,365,4,98],key=[0,1,2,0,2])
673 {0: [1, 4], 1: [2], 2: [365, 98]}
676 Value lists are not deduplicated:
678 >>> bucketize([None, None, None, 'hello'])
679 {False: [None, None, None], True: ['hello']}
681 Bucketize into more than 3 groups
683 >>> bucketize(range(10), lambda x: x % 3)
684 {0: [0, 3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]}
686 ``bucketize`` has a couple of advanced options useful in certain
687 cases. *value_transform* can be used to modify values as they are
688 added to buckets, and *key_filter* will allow excluding certain
689 buckets from being collected.
691 >>> bucketize(range(5), value_transform=lambda x: x*x)
692 {False: [0], True: [1, 4, 9, 16]}
694 >>> bucketize(range(10), key=lambda x: x % 3, key_filter=lambda k: k % 3 != 1)
695 {0: [0, 3, 6, 9], 2: [2, 5, 8]}
697 Note in some of these examples there were at most two keys, ``True`` and
698 ``False``, and each key present has a list with at least one
699 item. See :func:`partition` for a version specialized for binary
700 use cases.
702 """
703 if not is_iterable(src):
704 raise TypeError('expected an iterable')
705 elif isinstance(key, list):
706 if len(key) != len(src):
707 raise ValueError("key and src have to be the same length")
708 src = zip(key, src)
710 if isinstance(key, basestring):
711 key_func = lambda x: getattr(x, key, x)
712 elif callable(key):
713 key_func = key
714 elif isinstance(key, list):
715 key_func = lambda x: x[0]
716 else:
717 raise TypeError('expected key to be callable or a string or a list')
719 if value_transform is None:
720 value_transform = lambda x: x
721 if not callable(value_transform):
722 raise TypeError('expected callable value transform function')
723 if isinstance(key, list):
724 f = value_transform
725 value_transform=lambda x: f(x[1])
727 ret = {}
728 for val in src:
729 key_of_val = key_func(val)
730 if key_filter is None or key_filter(key_of_val):
731 ret.setdefault(key_of_val, []).append(value_transform(val))
732 return ret
735def partition(src, key=bool):
736 """No relation to :meth:`str.partition`, ``partition`` is like
737 :func:`bucketize`, but for added convenience returns a tuple of
738 ``(truthy_values, falsy_values)``.
740 >>> nonempty, empty = partition(['', '', 'hi', '', 'bye'])
741 >>> nonempty
742 ['hi', 'bye']
744 *key* defaults to :class:`bool`, but can be carefully overridden to
745 use either a function that returns either ``True`` or ``False`` or
746 a string name of the attribute on which to partition objects.
748 >>> import string
749 >>> is_digit = lambda x: x in string.digits
750 >>> decimal_digits, hexletters = partition(string.hexdigits, is_digit)
751 >>> ''.join(decimal_digits), ''.join(hexletters)
752 ('0123456789', 'abcdefABCDEF')
753 """
754 bucketized = bucketize(src, key)
755 return bucketized.get(True, []), bucketized.get(False, [])
758def unique(src, key=None):
759 """``unique()`` returns a list of unique values, as determined by
760 *key*, in the order they first appeared in the input iterable,
761 *src*.
763 >>> ones_n_zeros = '11010110001010010101010'
764 >>> ''.join(unique(ones_n_zeros))
765 '10'
767 See :func:`unique_iter` docs for more details.
768 """
769 return list(unique_iter(src, key))
772def unique_iter(src, key=None):
773 """Yield unique elements from the iterable, *src*, based on *key*,
774 in the order in which they first appeared in *src*.
776 >>> repetitious = [1, 2, 3] * 10
777 >>> list(unique_iter(repetitious))
778 [1, 2, 3]
780 By default, *key* is the object itself, but *key* can either be a
781 callable or, for convenience, a string name of the attribute on
782 which to uniqueify objects, falling back on identity when the
783 attribute is not present.
785 >>> pleasantries = ['hi', 'hello', 'ok', 'bye', 'yes']
786 >>> list(unique_iter(pleasantries, key=lambda x: len(x)))
787 ['hi', 'hello', 'bye']
788 """
789 if not is_iterable(src):
790 raise TypeError('expected an iterable, not %r' % type(src))
791 if key is None:
792 key_func = lambda x: x
793 elif callable(key):
794 key_func = key
795 elif isinstance(key, basestring):
796 key_func = lambda x: getattr(x, key, x)
797 else:
798 raise TypeError('"key" expected a string or callable, not %r' % key)
799 seen = set()
800 for i in src:
801 k = key_func(i)
802 if k not in seen:
803 seen.add(k)
804 yield i
805 return
808def redundant(src, key=None, groups=False):
809 """The complement of :func:`unique()`.
811 By default returns non-unique/duplicate values as a list of the
812 *first* redundant value in *src*. Pass ``groups=True`` to get
813 groups of all values with redundancies, ordered by position of the
814 first redundant value. This is useful in conjunction with some
815 normalizing *key* function.
817 >>> redundant([1, 2, 3, 4])
818 []
819 >>> redundant([1, 2, 3, 2, 3, 3, 4])
820 [2, 3]
821 >>> redundant([1, 2, 3, 2, 3, 3, 4], groups=True)
822 [[2, 2], [3, 3, 3]]
824 An example using a *key* function to do case-insensitive
825 redundancy detection.
827 >>> redundant(['hi', 'Hi', 'HI', 'hello'], key=str.lower)
828 ['Hi']
829 >>> redundant(['hi', 'Hi', 'HI', 'hello'], groups=True, key=str.lower)
830 [['hi', 'Hi', 'HI']]
832 *key* should also be used when the values in *src* are not hashable.
834 .. note::
836 This output of this function is designed for reporting
837 duplicates in contexts when a unique input is desired. Due to
838 the grouped return type, there is no streaming equivalent of
839 this function for the time being.
841 """
842 if key is None:
843 pass
844 elif callable(key):
845 key_func = key
846 elif isinstance(key, basestring):
847 key_func = lambda x: getattr(x, key, x)
848 else:
849 raise TypeError('"key" expected a string or callable, not %r' % key)
850 seen = {} # key to first seen item
851 redundant_order = []
852 redundant_groups = {}
853 for i in src:
854 k = key_func(i) if key else i
855 if k not in seen:
856 seen[k] = i
857 else:
858 if k in redundant_groups:
859 if groups:
860 redundant_groups[k].append(i)
861 else:
862 redundant_order.append(k)
863 redundant_groups[k] = [seen[k], i]
864 if not groups:
865 ret = [redundant_groups[k][1] for k in redundant_order]
866 else:
867 ret = [redundant_groups[k] for k in redundant_order]
868 return ret
871def one(src, default=None, key=None):
872 """Along the same lines as builtins, :func:`all` and :func:`any`, and
873 similar to :func:`first`, ``one()`` returns the single object in
874 the given iterable *src* that evaluates to ``True``, as determined
875 by callable *key*. If unset, *key* defaults to :class:`bool`. If
876 no such objects are found, *default* is returned. If *default* is
877 not passed, ``None`` is returned.
879 If *src* has more than one object that evaluates to ``True``, or
880 if there is no object that fulfills such condition, return
881 *default*. It's like an `XOR`_ over an iterable.
883 >>> one((True, False, False))
884 True
885 >>> one((True, False, True))
886 >>> one((0, 0, 'a'))
887 'a'
888 >>> one((0, False, None))
889 >>> one((True, True), default=False)
890 False
891 >>> bool(one(('', 1)))
892 True
893 >>> one((10, 20, 30, 42), key=lambda i: i > 40)
894 42
896 See `Martín Gaitán's original repo`_ for further use cases.
898 .. _Martín Gaitán's original repo: https://github.com/mgaitan/one
899 .. _XOR: https://en.wikipedia.org/wiki/Exclusive_or
901 """
902 ones = list(itertools.islice(filter(key, src), 2))
903 return ones[0] if len(ones) == 1 else default
906def first(iterable, default=None, key=None):
907 """Return first element of *iterable* that evaluates to ``True``, else
908 return ``None`` or optional *default*. Similar to :func:`one`.
910 >>> first([0, False, None, [], (), 42])
911 42
912 >>> first([0, False, None, [], ()]) is None
913 True
914 >>> first([0, False, None, [], ()], default='ohai')
915 'ohai'
916 >>> import re
917 >>> m = first(re.match(regex, 'abc') for regex in ['b.*', 'a(.*)'])
918 >>> m.group(1)
919 'bc'
921 The optional *key* argument specifies a one-argument predicate function
922 like that used for *filter()*. The *key* argument, if supplied, should be
923 in keyword form. For example, finding the first even number in an iterable:
925 >>> first([1, 1, 3, 4, 5], key=lambda x: x % 2 == 0)
926 4
928 Contributed by Hynek Schlawack, author of `the original standalone module`_.
930 .. _the original standalone module: https://github.com/hynek/first
931 """
932 return next(filter(key, iterable), default)
935def flatten_iter(iterable):
936 """``flatten_iter()`` yields all the elements from *iterable* while
937 collapsing any nested iterables.
939 >>> nested = [[1, 2], [[3], [4, 5]]]
940 >>> list(flatten_iter(nested))
941 [1, 2, 3, 4, 5]
942 """
943 for item in iterable:
944 if isinstance(item, Iterable) and not isinstance(item, basestring):
945 for subitem in flatten_iter(item):
946 yield subitem
947 else:
948 yield item
950def flatten(iterable):
951 """``flatten()`` returns a collapsed list of all the elements from
952 *iterable* while collapsing any nested iterables.
954 >>> nested = [[1, 2], [[3], [4, 5]]]
955 >>> flatten(nested)
956 [1, 2, 3, 4, 5]
957 """
958 return list(flatten_iter(iterable))
961def same(iterable, ref=_UNSET):
962 """``same()`` returns ``True`` when all values in *iterable* are
963 equal to one another, or optionally a reference value,
964 *ref*. Similar to :func:`all` and :func:`any` in that it evaluates
965 an iterable and returns a :class:`bool`. ``same()`` returns
966 ``True`` for empty iterables.
968 >>> same([])
969 True
970 >>> same([1])
971 True
972 >>> same(['a', 'a', 'a'])
973 True
974 >>> same(range(20))
975 False
976 >>> same([[], []])
977 True
978 >>> same([[], []], ref='test')
979 False
981 """
982 iterator = iter(iterable)
983 if ref is _UNSET:
984 ref = next(iterator, ref)
985 return all(val == ref for val in iterator)
988def default_visit(path, key, value):
989 # print('visit(%r, %r, %r)' % (path, key, value))
990 return key, value
992# enable the extreme: monkeypatching iterutils with a different default_visit
993_orig_default_visit = default_visit
996def default_enter(path, key, value):
997 # print('enter(%r, %r)' % (key, value))
998 if isinstance(value, basestring):
999 return value, False
1000 elif isinstance(value, Mapping):
1001 return value.__class__(), ItemsView(value)
1002 elif isinstance(value, Sequence):
1003 return value.__class__(), enumerate(value)
1004 elif isinstance(value, Set):
1005 return value.__class__(), enumerate(value)
1006 else:
1007 # files, strings, other iterables, and scalars are not
1008 # traversed
1009 return value, False
1012def default_exit(path, key, old_parent, new_parent, new_items):
1013 # print('exit(%r, %r, %r, %r, %r)'
1014 # % (path, key, old_parent, new_parent, new_items))
1015 ret = new_parent
1016 if isinstance(new_parent, Mapping):
1017 new_parent.update(new_items)
1018 elif isinstance(new_parent, Sequence):
1019 vals = [v for i, v in new_items]
1020 try:
1021 new_parent.extend(vals)
1022 except AttributeError:
1023 ret = new_parent.__class__(vals) # tuples
1024 elif isinstance(new_parent, Set):
1025 vals = [v for i, v in new_items]
1026 try:
1027 new_parent.update(vals)
1028 except AttributeError:
1029 ret = new_parent.__class__(vals) # frozensets
1030 else:
1031 raise RuntimeError('unexpected iterable type: %r' % type(new_parent))
1032 return ret
1035def remap(root, visit=default_visit, enter=default_enter, exit=default_exit,
1036 **kwargs):
1037 """The remap ("recursive map") function is used to traverse and
1038 transform nested structures. Lists, tuples, sets, and dictionaries
1039 are just a few of the data structures nested into heterogeneous
1040 tree-like structures that are so common in programming.
1041 Unfortunately, Python's built-in ways to manipulate collections
1042 are almost all flat. List comprehensions may be fast and succinct,
1043 but they do not recurse, making it tedious to apply quick changes
1044 or complex transforms to real-world data.
1046 remap goes where list comprehensions cannot.
1048 Here's an example of removing all Nones from some data:
1050 >>> from pprint import pprint
1051 >>> reviews = {'Star Trek': {'TNG': 10, 'DS9': 8.5, 'ENT': None},
1052 ... 'Babylon 5': 6, 'Dr. Who': None}
1053 >>> pprint(remap(reviews, lambda p, k, v: v is not None))
1054 {'Babylon 5': 6, 'Star Trek': {'DS9': 8.5, 'TNG': 10}}
1056 Notice how both Nones have been removed despite the nesting in the
1057 dictionary. Not bad for a one-liner, and that's just the beginning.
1058 See `this remap cookbook`_ for more delicious recipes.
1060 .. _this remap cookbook: http://sedimental.org/remap.html
1062 remap takes four main arguments: the object to traverse and three
1063 optional callables which determine how the remapped object will be
1064 created.
1066 Args:
1068 root: The target object to traverse. By default, remap
1069 supports iterables like :class:`list`, :class:`tuple`,
1070 :class:`dict`, and :class:`set`, but any object traversable by
1071 *enter* will work.
1072 visit (callable): This function is called on every item in
1073 *root*. It must accept three positional arguments, *path*,
1074 *key*, and *value*. *path* is simply a tuple of parents'
1075 keys. *visit* should return the new key-value pair. It may
1076 also return ``True`` as shorthand to keep the old item
1077 unmodified, or ``False`` to drop the item from the new
1078 structure. *visit* is called after *enter*, on the new parent.
1080 The *visit* function is called for every item in root,
1081 including duplicate items. For traversable values, it is
1082 called on the new parent object, after all its children
1083 have been visited. The default visit behavior simply
1084 returns the key-value pair unmodified.
1085 enter (callable): This function controls which items in *root*
1086 are traversed. It accepts the same arguments as *visit*: the
1087 path, the key, and the value of the current item. It returns a
1088 pair of the blank new parent, and an iterator over the items
1089 which should be visited. If ``False`` is returned instead of
1090 an iterator, the value will not be traversed.
1092 The *enter* function is only called once per unique value. The
1093 default enter behavior support mappings, sequences, and
1094 sets. Strings and all other iterables will not be traversed.
1095 exit (callable): This function determines how to handle items
1096 once they have been visited. It gets the same three
1097 arguments as the other functions -- *path*, *key*, *value*
1098 -- plus two more: the blank new parent object returned
1099 from *enter*, and a list of the new items, as remapped by
1100 *visit*.
1102 Like *enter*, the *exit* function is only called once per
1103 unique value. The default exit behavior is to simply add
1104 all new items to the new parent, e.g., using
1105 :meth:`list.extend` and :meth:`dict.update` to add to the
1106 new parent. Immutable objects, such as a :class:`tuple` or
1107 :class:`namedtuple`, must be recreated from scratch, but
1108 use the same type as the new parent passed back from the
1109 *enter* function.
1110 reraise_visit (bool): A pragmatic convenience for the *visit*
1111 callable. When set to ``False``, remap ignores any errors
1112 raised by the *visit* callback. Items causing exceptions
1113 are kept. See examples for more details.
1115 remap is designed to cover the majority of cases with just the
1116 *visit* callable. While passing in multiple callables is very
1117 empowering, remap is designed so very few cases should require
1118 passing more than one function.
1120 When passing *enter* and *exit*, it's common and easiest to build
1121 on the default behavior. Simply add ``from boltons.iterutils import
1122 default_enter`` (or ``default_exit``), and have your enter/exit
1123 function call the default behavior before or after your custom
1124 logic. See `this example`_.
1126 Duplicate and self-referential objects (aka reference loops) are
1127 automatically handled internally, `as shown here`_.
1129 .. _this example: http://sedimental.org/remap.html#sort_all_lists
1130 .. _as shown here: http://sedimental.org/remap.html#corner_cases
1132 """
1133 # TODO: improve argument formatting in sphinx doc
1134 # TODO: enter() return (False, items) to continue traverse but cancel copy?
1135 if not callable(visit):
1136 raise TypeError('visit expected callable, not: %r' % visit)
1137 if not callable(enter):
1138 raise TypeError('enter expected callable, not: %r' % enter)
1139 if not callable(exit):
1140 raise TypeError('exit expected callable, not: %r' % exit)
1141 reraise_visit = kwargs.pop('reraise_visit', True)
1142 if kwargs:
1143 raise TypeError('unexpected keyword arguments: %r' % kwargs.keys())
1145 path, registry, stack = (), {}, [(None, root)]
1146 new_items_stack = []
1147 while stack:
1148 key, value = stack.pop()
1149 id_value = id(value)
1150 if key is _REMAP_EXIT:
1151 key, new_parent, old_parent = value
1152 id_value = id(old_parent)
1153 path, new_items = new_items_stack.pop()
1154 value = exit(path, key, old_parent, new_parent, new_items)
1155 registry[id_value] = value
1156 if not new_items_stack:
1157 continue
1158 elif id_value in registry:
1159 value = registry[id_value]
1160 else:
1161 res = enter(path, key, value)
1162 try:
1163 new_parent, new_items = res
1164 except TypeError:
1165 # TODO: handle False?
1166 raise TypeError('enter should return a tuple of (new_parent,'
1167 ' items_iterator), not: %r' % res)
1168 if new_items is not False:
1169 # traverse unless False is explicitly passed
1170 registry[id_value] = new_parent
1171 new_items_stack.append((path, []))
1172 if value is not root:
1173 path += (key,)
1174 stack.append((_REMAP_EXIT, (key, new_parent, value)))
1175 if new_items:
1176 stack.extend(reversed(list(new_items)))
1177 continue
1178 if visit is _orig_default_visit:
1179 # avoid function call overhead by inlining identity operation
1180 visited_item = (key, value)
1181 else:
1182 try:
1183 visited_item = visit(path, key, value)
1184 except Exception:
1185 if reraise_visit:
1186 raise
1187 visited_item = True
1188 if visited_item is False:
1189 continue # drop
1190 elif visited_item is True:
1191 visited_item = (key, value)
1192 # TODO: typecheck?
1193 # raise TypeError('expected (key, value) from visit(),'
1194 # ' not: %r' % visited_item)
1195 try:
1196 new_items_stack[-1][1].append(visited_item)
1197 except IndexError:
1198 raise TypeError('expected remappable root, not: %r' % root)
1199 return value
1202class PathAccessError(KeyError, IndexError, TypeError):
1203 """An amalgamation of KeyError, IndexError, and TypeError,
1204 representing what can occur when looking up a path in a nested
1205 object.
1206 """
1207 def __init__(self, exc, seg, path):
1208 self.exc = exc
1209 self.seg = seg
1210 self.path = path
1212 def __repr__(self):
1213 cn = self.__class__.__name__
1214 return '%s(%r, %r, %r)' % (cn, self.exc, self.seg, self.path)
1216 def __str__(self):
1217 return ('could not access %r from path %r, got error: %r'
1218 % (self.seg, self.path, self.exc))
1221def get_path(root, path, default=_UNSET):
1222 """Retrieve a value from a nested object via a tuple representing the
1223 lookup path.
1225 >>> root = {'a': {'b': {'c': [[1], [2], [3]]}}}
1226 >>> get_path(root, ('a', 'b', 'c', 2, 0))
1227 3
1229 The path format is intentionally consistent with that of
1230 :func:`remap`.
1232 One of get_path's chief aims is improved error messaging. EAFP is
1233 great, but the error messages are not.
1235 For instance, ``root['a']['b']['c'][2][1]`` gives back
1236 ``IndexError: list index out of range``
1238 What went out of range where? get_path currently raises
1239 ``PathAccessError: could not access 2 from path ('a', 'b', 'c', 2,
1240 1), got error: IndexError('list index out of range',)``, a
1241 subclass of IndexError and KeyError.
1243 You can also pass a default that covers the entire operation,
1244 should the lookup fail at any level.
1246 Args:
1247 root: The target nesting of dictionaries, lists, or other
1248 objects supporting ``__getitem__``.
1249 path (tuple): A list of strings and integers to be successively
1250 looked up within *root*.
1251 default: The value to be returned should any
1252 ``PathAccessError`` exceptions be raised.
1253 """
1254 if isinstance(path, basestring):
1255 path = path.split('.')
1256 cur = root
1257 try:
1258 for seg in path:
1259 try:
1260 cur = cur[seg]
1261 except (KeyError, IndexError) as exc:
1262 raise PathAccessError(exc, seg, path)
1263 except TypeError as exc:
1264 # either string index in a list, or a parent that
1265 # doesn't support indexing
1266 try:
1267 seg = int(seg)
1268 cur = cur[seg]
1269 except (ValueError, KeyError, IndexError, TypeError):
1270 if not is_iterable(cur):
1271 exc = TypeError('%r object is not indexable'
1272 % type(cur).__name__)
1273 raise PathAccessError(exc, seg, path)
1274 except PathAccessError:
1275 if default is _UNSET:
1276 raise
1277 return default
1278 return cur
1281def research(root, query=lambda p, k, v: True, reraise=False):
1282 """The :func:`research` function uses :func:`remap` to recurse over
1283 any data nested in *root*, and find values which match a given
1284 criterion, specified by the *query* callable.
1286 Results are returned as a list of ``(path, value)`` pairs. The
1287 paths are tuples in the same format accepted by
1288 :func:`get_path`. This can be useful for comparing values nested
1289 in two or more different structures.
1291 Here's a simple example that finds all integers:
1293 >>> root = {'a': {'b': 1, 'c': (2, 'd', 3)}, 'e': None}
1294 >>> res = research(root, query=lambda p, k, v: isinstance(v, int))
1295 >>> print(sorted(res))
1296 [(('a', 'b'), 1), (('a', 'c', 0), 2), (('a', 'c', 2), 3)]
1298 Note how *query* follows the same, familiar ``path, key, value``
1299 signature as the ``visit`` and ``enter`` functions on
1300 :func:`remap`, and returns a :class:`bool`.
1302 Args:
1303 root: The target object to search. Supports the same types of
1304 objects as :func:`remap`, including :class:`list`,
1305 :class:`tuple`, :class:`dict`, and :class:`set`.
1306 query (callable): The function called on every object to
1307 determine whether to include it in the search results. The
1308 callable must accept three arguments, *path*, *key*, and
1309 *value*, commonly abbreviated *p*, *k*, and *v*, same as
1310 *enter* and *visit* from :func:`remap`.
1311 reraise (bool): Whether to reraise exceptions raised by *query*
1312 or to simply drop the result that caused the error.
1315 With :func:`research` it's easy to inspect the details of a data
1316 structure, like finding values that are at a certain depth (using
1317 ``len(p)``) and much more. If more advanced functionality is
1318 needed, check out the code and make your own :func:`remap`
1319 wrapper, and consider `submitting a patch`_!
1321 .. _submitting a patch: https://github.com/mahmoud/boltons/pulls
1322 """
1323 ret = []
1325 if not callable(query):
1326 raise TypeError('query expected callable, not: %r' % query)
1328 def enter(path, key, value):
1329 try:
1330 if query(path, key, value):
1331 ret.append((path + (key,), value))
1332 except Exception:
1333 if reraise:
1334 raise
1335 return default_enter(path, key, value)
1337 remap(root, enter=enter)
1338 return ret
1341# TODO: recollect()
1342# TODO: refilter()
1343# TODO: reiter()
1346# GUID iterators: 10x faster and somewhat more compact than uuid.
1348class GUIDerator(object):
1349 """The GUIDerator is an iterator that yields a globally-unique
1350 identifier (GUID) on every iteration. The GUIDs produced are
1351 hexadecimal strings.
1353 Testing shows it to be around 12x faster than the uuid module. By
1354 default it is also more compact, partly due to its default 96-bit
1355 (24-hexdigit) length. 96 bits of randomness means that there is a
1356 1 in 2 ^ 32 chance of collision after 2 ^ 64 iterations. If more
1357 or less uniqueness is desired, the *size* argument can be adjusted
1358 accordingly.
1360 Args:
1361 size (int): character length of the GUID, defaults to 24. Lengths
1362 between 20 and 36 are considered valid.
1364 The GUIDerator has built-in fork protection that causes it to
1365 detect a fork on next iteration and reseed accordingly.
1367 """
1368 def __init__(self, size=24):
1369 self.size = size
1370 if size < 20 or size > 36:
1371 raise ValueError('expected 20 < size <= 36')
1372 import hashlib
1373 self._sha1 = hashlib.sha1
1374 self.count = itertools.count()
1375 self.reseed()
1377 def reseed(self):
1378 import socket
1379 self.pid = os.getpid()
1380 self.salt = '-'.join([str(self.pid),
1381 socket.gethostname() or b'<nohostname>',
1382 str(time.time()),
1383 codecs.encode(os.urandom(6),
1384 'hex_codec').decode('ascii')])
1385 # that codecs trick is the best/only way to get a bytes to
1386 # hexbytes in py2/3
1387 return
1389 def __iter__(self):
1390 return self
1392 if _IS_PY3:
1393 def __next__(self):
1394 if os.getpid() != self.pid:
1395 self.reseed()
1396 target_bytes = (self.salt + str(next(self.count))).encode('utf8')
1397 hash_text = self._sha1(target_bytes).hexdigest()[:self.size]
1398 return hash_text
1399 else:
1400 def __next__(self):
1401 if os.getpid() != self.pid:
1402 self.reseed()
1403 return self._sha1(self.salt +
1404 str(next(self.count))).hexdigest()[:self.size]
1406 next = __next__
1409class SequentialGUIDerator(GUIDerator):
1410 """Much like the standard GUIDerator, the SequentialGUIDerator is an
1411 iterator that yields a globally-unique identifier (GUID) on every
1412 iteration. The GUIDs produced are hexadecimal strings.
1414 The SequentialGUIDerator differs in that it picks a starting GUID
1415 value and increments every iteration. This yields GUIDs which are
1416 of course unique, but also ordered and lexicographically sortable.
1418 The SequentialGUIDerator is around 50% faster than the normal
1419 GUIDerator, making it almost 20x as fast as the built-in uuid
1420 module. By default it is also more compact, partly due to its
1421 96-bit (24-hexdigit) default length. 96 bits of randomness means that
1422 there is a 1 in 2 ^ 32 chance of collision after 2 ^ 64
1423 iterations. If more or less uniqueness is desired, the *size*
1424 argument can be adjusted accordingly.
1426 Args:
1427 size (int): character length of the GUID, defaults to 24.
1429 Note that with SequentialGUIDerator there is a chance of GUIDs
1430 growing larger than the size configured. The SequentialGUIDerator
1431 has built-in fork protection that causes it to detect a fork on
1432 next iteration and reseed accordingly.
1434 """
1436 if _IS_PY3:
1437 def reseed(self):
1438 super(SequentialGUIDerator, self).reseed()
1439 start_str = self._sha1(self.salt.encode('utf8')).hexdigest()
1440 self.start = int(start_str[:self.size], 16)
1441 self.start |= (1 << ((self.size * 4) - 2))
1442 else:
1443 def reseed(self):
1444 super(SequentialGUIDerator, self).reseed()
1445 start_str = self._sha1(self.salt).hexdigest()
1446 self.start = int(start_str[:self.size], 16)
1447 self.start |= (1 << ((self.size * 4) - 2))
1449 def __next__(self):
1450 if os.getpid() != self.pid:
1451 self.reseed()
1452 return '%x' % (next(self.count) + self.start)
1454 next = __next__
1457guid_iter = GUIDerator()
1458seq_guid_iter = SequentialGUIDerator()
1461def soft_sorted(iterable, first=None, last=None, key=None, reverse=False):
1462 """For when you care about the order of some elements, but not about
1463 others.
1465 Use this to float to the top and/or sink to the bottom a specific
1466 ordering, while sorting the rest of the elements according to
1467 normal :func:`sorted` rules.
1469 >>> soft_sorted(['two', 'b', 'one', 'a'], first=['one', 'two'])
1470 ['one', 'two', 'a', 'b']
1471 >>> soft_sorted(range(7), first=[6, 15], last=[2, 4], reverse=True)
1472 [6, 5, 3, 1, 0, 2, 4]
1473 >>> import string
1474 >>> ''.join(soft_sorted(string.hexdigits, first='za1', last='b', key=str.lower))
1475 'aA1023456789cCdDeEfFbB'
1477 Args:
1478 iterable (list): A list or other iterable to sort.
1479 first (list): A sequence to enforce for elements which should
1480 appear at the beginning of the returned list.
1481 last (list): A sequence to enforce for elements which should
1482 appear at the end of the returned list.
1483 key (callable): Callable used to generate a comparable key for
1484 each item to be sorted, same as the key in
1485 :func:`sorted`. Note that entries in *first* and *last*
1486 should be the keys for the items. Defaults to
1487 passthrough/the identity function.
1488 reverse (bool): Whether or not elements not explicitly ordered
1489 by *first* and *last* should be in reverse order or not.
1491 Returns a new list in sorted order.
1492 """
1493 first = first or []
1494 last = last or []
1495 key = key or (lambda x: x)
1496 seq = list(iterable)
1497 other = [x for x in seq if not ((first and key(x) in first) or (last and key(x) in last))]
1498 other.sort(key=key, reverse=reverse)
1500 if first:
1501 first = sorted([x for x in seq if key(x) in first], key=lambda x: first.index(key(x)))
1502 if last:
1503 last = sorted([x for x in seq if key(x) in last], key=lambda x: last.index(key(x)))
1504 return first + other + last
1507def untyped_sorted(iterable, key=None, reverse=False):
1508 """A version of :func:`sorted` which will happily sort an iterable of
1509 heterogeneous types and return a new list, similar to legacy Python's
1510 behavior.
1512 >>> untyped_sorted(['abc', 2.0, 1, 2, 'def'])
1513 [1, 2.0, 2, 'abc', 'def']
1515 Note how mutually orderable types are sorted as expected, as in
1516 the case of the integers and floats above.
1518 .. note::
1520 Results may vary across Python versions and builds, but the
1521 function will produce a sorted list, except in the case of
1522 explicitly unorderable objects.
1524 """
1525 class _Wrapper(object):
1526 slots = ('obj',)
1528 def __init__(self, obj):
1529 self.obj = obj
1531 def __lt__(self, other):
1532 obj = key(self.obj) if key is not None else self.obj
1533 other = key(other.obj) if key is not None else other.obj
1534 try:
1535 ret = obj < other
1536 except TypeError:
1537 ret = ((type(obj).__name__, id(type(obj)), obj)
1538 < (type(other).__name__, id(type(other)), other))
1539 return ret
1541 if key is not None and not callable(key):
1542 raise TypeError('expected function or callable object for key, not: %r'
1543 % key)
1545 return sorted(iterable, key=_Wrapper, reverse=reverse)
1547"""
1548May actually be faster to do an isinstance check for a str path
1550$ python -m timeit -s "x = [1]" "x[0]"
155110000000 loops, best of 3: 0.0207 usec per loop
1552$ python -m timeit -s "x = [1]" "try: x[0] \nexcept: pass"
155310000000 loops, best of 3: 0.029 usec per loop
1554$ python -m timeit -s "x = [1]" "try: x[1] \nexcept: pass"
15551000000 loops, best of 3: 0.315 usec per loop
1556# setting up try/except is fast, only around 0.01us
1557# actually triggering the exception takes almost 10x as long
1559$ python -m timeit -s "x = [1]" "isinstance(x, basestring)"
156010000000 loops, best of 3: 0.141 usec per loop
1561$ python -m timeit -s "x = [1]" "isinstance(x, str)"
156210000000 loops, best of 3: 0.131 usec per loop
1563$ python -m timeit -s "x = [1]" "try: x.split('.')\n except: pass"
15641000000 loops, best of 3: 0.443 usec per loop
1565$ python -m timeit -s "x = [1]" "try: x.split('.') \nexcept AttributeError: pass"
15661000000 loops, best of 3: 0.544 usec per loop
1567"""