Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/more_itertools/more.py: 19%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import math
3from collections import Counter, defaultdict, deque
4from collections.abc import Sequence
5from contextlib import suppress
6from dataclasses import dataclass
7from functools import cached_property, partial, wraps
8from heapq import heapify, heapreplace
9from itertools import (
10 accumulate,
11 chain,
12 combinations,
13 compress,
14 count,
15 cycle,
16 dropwhile,
17 groupby,
18 islice,
19 permutations,
20 repeat,
21 starmap,
22 takewhile,
23 tee,
24 zip_longest,
25 product,
26)
27from math import comb, e, exp, factorial, floor, fsum, log, log1p, perm, tau
28from math import ceil, prod
29from queue import Empty, Queue
30from random import random, randrange, shuffle, uniform
31from operator import (
32 attrgetter,
33 getitem,
34 is_not,
35 itemgetter,
36 lt,
37 neg,
38 sub,
39 gt,
40)
41from sys import maxsize
42from time import monotonic
43from threading import Lock
45from .recipes import (
46 _marker,
47 consume,
48 first_true,
49 flatten,
50 is_prime,
51 nth,
52 powerset,
53 running_mean,
54 running_median,
55 sieve,
56 take,
57 unique_everseen,
58 all_equal,
59 batched,
60)
62__all__ = [
63 'AbortThread',
64 'SequenceView',
65 'Stats',
66 'adjacent',
67 'all_unique',
68 'always_iterable',
69 'always_reversible',
70 'argmax',
71 'argmin',
72 'bucket',
73 'callback_iter',
74 'chunked',
75 'chunked_even',
76 'circular_shifts',
77 'collapse',
78 'combination_index',
79 'combination_with_replacement_index',
80 'concurrent_tee',
81 'consecutive_groups',
82 'constrained_batches',
83 'consumer',
84 'count_cycle',
85 'countable',
86 'derangements',
87 'dft',
88 'difference',
89 'distinct_combinations',
90 'distinct_permutations',
91 'distribute',
92 'divide',
93 'doublestarmap',
94 'duplicates_everseen',
95 'duplicates_justseen',
96 'classify_unique',
97 'exactly_n',
98 'extract',
99 'filter_except',
100 'filter_map',
101 'first',
102 'gray_product',
103 'groupby_transform',
104 'ichunked',
105 'iequals',
106 'idft',
107 'ilen',
108 'interleave',
109 'interleave_evenly',
110 'interleave_longest',
111 'interleave_randomly',
112 'intersperse',
113 'is_sorted',
114 'islice_extended',
115 'iterate',
116 'iter_suppress',
117 'join_mappings',
118 'last',
119 'locate',
120 'longest_common_prefix',
121 'lstrip',
122 'make_decorator',
123 'map_except',
124 'map_if',
125 'map_reduce',
126 'mark_ends',
127 'minmax',
128 'nth_or_last',
129 'nth_permutation',
130 'nth_prime',
131 'nth_product',
132 'nth_combination_with_replacement',
133 'numeric_range',
134 'one',
135 'only',
136 'outer_product',
137 'padded',
138 'partial_product',
139 'partitions',
140 'peekable',
141 'permutation_index',
142 'powerset_of_sets',
143 'product_index',
144 'raise_',
145 'repeat_each',
146 'repeat_last',
147 'replace',
148 'rlocate',
149 'rstrip',
150 'run_length',
151 'running_min',
152 'running_max',
153 'running_statistics',
154 'sample',
155 'seekable',
156 'serialize',
157 'set_partitions',
158 'side_effect',
159 'sized_iterator',
160 'sliced',
161 'sort_together',
162 'split_after',
163 'split_at',
164 'split_before',
165 'split_into',
166 'split_when',
167 'spy',
168 'stagger',
169 'strip',
170 'strictly_n',
171 'substrings',
172 'substrings_indexes',
173 'synchronized',
174 'takewhile_inclusive',
175 'time_limited',
176 'unique_in_window',
177 'unique_to_each',
178 'unzip',
179 'value_chain',
180 'windowed',
181 'windowed_complete',
182 'with_iter',
183 'zip_broadcast',
184 'zip_offset',
185]
187# math.sumprod is available for Python 3.12+
188try:
189 from math import sumprod as _fsumprod
191except ImportError: # pragma: no cover
192 # Extended precision algorithms from T. J. Dekker,
193 # "A Floating-Point Technique for Extending the Available Precision"
194 # https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
195 # Formulas: (5.5) (5.6) and (5.8). Code: mul12()
197 def dl_split(x: float):
198 "Split a float into two half-precision components."
199 t = x * 134217729.0 # Veltkamp constant = 2.0 ** 27 + 1
200 hi = t - (t - x)
201 lo = x - hi
202 return hi, lo
204 def dl_mul(x, y):
205 "Lossless multiplication."
206 xx_hi, xx_lo = dl_split(x)
207 yy_hi, yy_lo = dl_split(y)
208 p = xx_hi * yy_hi
209 q = xx_hi * yy_lo + xx_lo * yy_hi
210 z = p + q
211 zz = p - z + q + xx_lo * yy_lo
212 return z, zz
214 def _fsumprod(p, q):
215 return fsum(chain.from_iterable(map(dl_mul, p, q)))
218def chunked(iterable, n, strict=False):
219 """Break *iterable* into lists of length *n*:
221 >>> list(chunked([1, 2, 3, 4, 5, 6], 3))
222 [[1, 2, 3], [4, 5, 6]]
224 By the default, the last yielded list will have fewer than *n* elements
225 if the length of *iterable* is not divisible by *n*:
227 >>> list(chunked([1, 2, 3, 4, 5, 6, 7, 8], 3))
228 [[1, 2, 3], [4, 5, 6], [7, 8]]
230 To use a fill-in value instead, see the :func:`grouper` recipe.
232 If the length of *iterable* is not divisible by *n* and *strict* is
233 ``True``, then ``ValueError`` will be raised before the last
234 list is yielded.
236 """
237 iterator = iter(partial(take, n, iter(iterable)), [])
238 if strict:
239 if n is None:
240 raise ValueError('n must not be None when using strict mode.')
242 def ret():
243 for chunk in iterator:
244 if len(chunk) != n:
245 raise ValueError('iterable is not divisible by n.')
246 yield chunk
248 return ret()
249 else:
250 return iterator
253def first(iterable, default=_marker):
254 """Return the first item of *iterable*, or *default* if *iterable* is
255 empty.
257 >>> first([0, 1, 2, 3])
258 0
259 >>> first([], 'some default')
260 'some default'
262 If *default* is not provided and there are no items in the iterable,
263 raise ``ValueError``.
265 :func:`first` is useful when you have a generator of expensive-to-retrieve
266 values and want any arbitrary one. It is marginally shorter than
267 ``next(iter(iterable), default)``.
269 """
270 for item in iterable:
271 return item
272 if default is _marker:
273 raise ValueError(
274 'first() was called on an empty iterable, '
275 'and no default value was provided.'
276 )
277 return default
280def last(iterable, default=_marker):
281 """Return the last item of *iterable*, or *default* if *iterable* is
282 empty.
284 >>> last([0, 1, 2, 3])
285 3
286 >>> last([], 'some default')
287 'some default'
289 If *default* is not provided and there are no items in the iterable,
290 raise ``ValueError``.
291 """
292 try:
293 if isinstance(iterable, Sequence):
294 return iterable[-1]
295 # Work around https://bugs.python.org/issue38525
296 if getattr(iterable, '__reversed__', None):
297 return next(reversed(iterable))
298 return deque(iterable, maxlen=1)[-1]
299 except (IndexError, TypeError, StopIteration):
300 if default is _marker:
301 raise ValueError(
302 'last() was called on an empty iterable, '
303 'and no default value was provided.'
304 )
305 return default
308def nth_or_last(iterable, n, default=_marker):
309 """Return the nth or the last item of *iterable*,
310 or *default* if *iterable* is empty.
312 >>> nth_or_last([0, 1, 2, 3], 2)
313 2
314 >>> nth_or_last([0, 1], 2)
315 1
316 >>> nth_or_last([], 0, 'some default')
317 'some default'
319 If *default* is not provided and there are no items in the iterable,
320 raise ``ValueError``.
321 """
322 return last(islice(iterable, n + 1), default=default)
325class peekable:
326 """Wrap an iterator to allow lookahead and prepending elements.
328 Call :meth:`peek` on the result to get the value that will be returned
329 by :func:`next`. This won't advance the iterator:
331 >>> p = peekable(['a', 'b'])
332 >>> p.peek()
333 'a'
334 >>> next(p)
335 'a'
337 Pass :meth:`peek` a default value to return that instead of raising
338 ``StopIteration`` when the iterator is exhausted.
340 >>> p = peekable([])
341 >>> p.peek('hi')
342 'hi'
344 peekables also offer a :meth:`prepend` method, which "inserts" items
345 at the head of the iterable:
347 >>> p = peekable([1, 2, 3])
348 >>> p.prepend(10, 11, 12)
349 >>> next(p)
350 10
351 >>> p.peek()
352 11
353 >>> list(p)
354 [11, 12, 1, 2, 3]
356 peekables can be indexed. Index 0 is the item that will be returned by
357 :func:`next`, index 1 is the item after that, and so on:
358 The values up to the given index will be cached.
360 >>> p = peekable(['a', 'b', 'c', 'd'])
361 >>> p[0]
362 'a'
363 >>> p[1]
364 'b'
365 >>> next(p)
366 'a'
368 Negative indexes are supported, but be aware that they will cache the
369 remaining items in the source iterator, which may require significant
370 storage.
372 To check whether a peekable is exhausted, check its truth value:
374 >>> p = peekable(['a', 'b'])
375 >>> if p: # peekable has items
376 ... list(p)
377 ['a', 'b']
378 >>> if not p: # peekable is exhausted
379 ... list(p)
380 []
382 """
384 def __init__(self, iterable):
385 self._it = iter(iterable)
386 self._cache = deque()
388 def __iter__(self):
389 return self
391 def __bool__(self):
392 try:
393 self.peek()
394 except StopIteration:
395 return False
396 return True
398 def peek(self, default=_marker):
399 """Return the item that will be next returned from ``next()``.
401 Return ``default`` if there are no items left. If ``default`` is not
402 provided, raise ``StopIteration``.
404 """
405 if not self._cache:
406 try:
407 self._cache.append(next(self._it))
408 except StopIteration:
409 if default is _marker:
410 raise
411 return default
412 return self._cache[0]
414 def prepend(self, *items):
415 """Stack up items to be the next ones returned from ``next()`` or
416 ``self.peek()``. The items will be returned in
417 first in, first out order::
419 >>> p = peekable([1, 2, 3])
420 >>> p.prepend(10, 11, 12)
421 >>> next(p)
422 10
423 >>> list(p)
424 [11, 12, 1, 2, 3]
426 It is possible, by prepending items, to "resurrect" a peekable that
427 previously raised ``StopIteration``.
429 >>> p = peekable([])
430 >>> next(p)
431 Traceback (most recent call last):
432 ...
433 StopIteration
434 >>> p.prepend(1)
435 >>> next(p)
436 1
437 >>> next(p)
438 Traceback (most recent call last):
439 ...
440 StopIteration
442 """
443 self._cache.extendleft(reversed(items))
445 def __next__(self):
446 if self._cache:
447 return self._cache.popleft()
449 return next(self._it)
451 def _get_slice(self, index):
452 # Normalize the slice's arguments
453 step = 1 if (index.step is None) else index.step
454 if step > 0:
455 start = 0 if (index.start is None) else index.start
456 stop = maxsize if (index.stop is None) else index.stop
457 elif step < 0:
458 start = -1 if (index.start is None) else index.start
459 stop = (-maxsize - 1) if (index.stop is None) else index.stop
460 else:
461 raise ValueError('slice step cannot be zero')
463 # If either the start or stop index is negative, we'll need to cache
464 # the rest of the iterable in order to slice from the right side.
465 if (start < 0) or (stop < 0):
466 self._cache.extend(self._it)
467 # Otherwise we'll need to find the rightmost index and cache to that
468 # point.
469 else:
470 n = min(max(start, stop) + 1, maxsize)
471 cache_len = len(self._cache)
472 if n >= cache_len:
473 self._cache.extend(islice(self._it, n - cache_len))
475 return list(self._cache)[index]
477 def __getitem__(self, index):
478 if isinstance(index, slice):
479 return self._get_slice(index)
481 cache_len = len(self._cache)
482 if index < 0:
483 self._cache.extend(self._it)
484 elif index >= cache_len:
485 self._cache.extend(islice(self._it, index + 1 - cache_len))
487 return self._cache[index]
490def consumer(func):
491 """Decorator that automatically advances a PEP-342-style "reverse iterator"
492 to its first yield point so you don't have to call ``next()`` on it
493 manually.
495 >>> @consumer
496 ... def tally():
497 ... i = 0
498 ... while True:
499 ... print('Thing number %s is %s.' % (i, (yield)))
500 ... i += 1
501 ...
502 >>> t = tally()
503 >>> t.send('red')
504 Thing number 0 is red.
505 >>> t.send('fish')
506 Thing number 1 is fish.
508 Without the decorator, you would have to call ``next(t)`` before
509 ``t.send()`` could be used.
511 """
513 @wraps(func)
514 def wrapper(*args, **kwargs):
515 gen = func(*args, **kwargs)
516 next(gen)
517 return gen
519 return wrapper
522def ilen(iterable):
523 """Return the number of items in *iterable*.
525 For example, there are 168 prime numbers below 1,000:
527 >>> ilen(sieve(1000))
528 168
530 Equivalent to, but faster than::
532 def ilen(iterable):
533 count = 0
534 for _ in iterable:
535 count += 1
536 return count
538 This fully consumes the iterable, so handle with care.
540 """
541 # This is the "most beautiful of the fast variants" of this function.
542 # If you think you can improve on it, please ensure that your version
543 # is both 10x faster and 10x more beautiful.
544 return sum(compress(repeat(1), zip(iterable)))
547def iterate(func, start):
548 """Return ``start``, ``func(start)``, ``func(func(start))``, ...
550 Produces an infinite iterator. To add a stopping condition,
551 use :func:`take`, ``takewhile``, or :func:`takewhile_inclusive`:.
553 >>> take(10, iterate(lambda x: 2*x, 1))
554 [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
556 >>> collatz = lambda x: 3*x + 1 if x%2==1 else x // 2
557 >>> list(takewhile_inclusive(lambda x: x!=1, iterate(collatz, 10)))
558 [10, 5, 16, 8, 4, 2, 1]
560 """
561 with suppress(StopIteration):
562 while True:
563 yield start
564 start = func(start)
567def with_iter(context_manager):
568 """Wrap an iterable in a ``with`` statement, so it closes once exhausted.
570 For example, this will close the file when the iterator is exhausted::
572 upper_lines = (line.upper() for line in with_iter(open('foo')))
574 Note that you have to actually exhaust the iterator for opened files to be closed.
576 Any context manager which returns an iterable is a candidate for
577 ``with_iter``.
579 """
580 with context_manager as iterable:
581 yield from iterable
584class sized_iterator:
585 """Wrapper for *iterable* that implements ``__len__``.
587 >>> it = map(str, range(5))
588 >>> sized_it = sized_iterator(it, 5)
589 >>> len(sized_it)
590 5
591 >>> list(sized_it)
592 ['0', '1', '2', '3', '4']
594 This is useful for tools that use :func:`len`, like
595 `tqdm <https://pypi.org/project/tqdm/>`__ .
597 The wrapper doesn't validate the provided *length*, so be sure to choose
598 a value that reflects reality.
599 """
601 def __init__(self, iterable, length):
602 self._iterator = iter(iterable)
603 self._length = length
605 def __next__(self):
606 return next(self._iterator)
608 def __iter__(self):
609 return self
611 def __len__(self):
612 return self._length
615def one(iterable, too_short=None, too_long=None):
616 """Return the first item from *iterable*, which is expected to contain only
617 that item. Raise an exception if *iterable* is empty or has more than one
618 item.
620 :func:`one` is useful for ensuring that an iterable contains only one item.
621 For example, it can be used to retrieve the result of a database query
622 that is expected to return a single row.
624 If *iterable* is empty, ``ValueError`` will be raised. You may specify a
625 different exception with the *too_short* keyword:
627 >>> it = []
628 >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL
629 Traceback (most recent call last):
630 ...
631 ValueError: too few items in iterable (expected 1)'
632 >>> too_short = IndexError('too few items')
633 >>> one(it, too_short=too_short) # doctest: +IGNORE_EXCEPTION_DETAIL
634 Traceback (most recent call last):
635 ...
636 IndexError: too few items
638 Similarly, if *iterable* contains more than one item, ``ValueError`` will
639 be raised. You may specify a different exception with the *too_long*
640 keyword:
642 >>> it = ['too', 'many']
643 >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL
644 Traceback (most recent call last):
645 ...
646 ValueError: Expected exactly one item in iterable, but got 'too',
647 'many', and perhaps more.
648 >>> too_long = RuntimeError
649 >>> one(it, too_long=too_long) # doctest: +IGNORE_EXCEPTION_DETAIL
650 Traceback (most recent call last):
651 ...
652 RuntimeError
654 Note that :func:`one` attempts to advance *iterable* twice to ensure there
655 is only one item. See :func:`spy` or :func:`peekable` to check iterable
656 contents less destructively.
658 """
659 iterator = iter(iterable)
660 for first in iterator:
661 for second in iterator:
662 msg = (
663 f'Expected exactly one item in iterable, but got {first!r}, '
664 f'{second!r}, and perhaps more.'
665 )
666 raise too_long or ValueError(msg)
667 return first
668 raise too_short or ValueError('too few items in iterable (expected 1)')
671def raise_(exception, *args):
672 raise exception(*args)
675def strictly_n(iterable, n, too_short=None, too_long=None):
676 """Validate that *iterable* has exactly *n* items and return them if
677 it does. If it has fewer than *n* items, call function *too_short*
678 with the actual number of items. If it has more than *n* items, call function
679 *too_long* with the number ``n + 1``.
681 >>> iterable = ['a', 'b', 'c', 'd']
682 >>> n = 4
683 >>> list(strictly_n(iterable, n))
684 ['a', 'b', 'c', 'd']
686 Note that the returned iterable must be consumed in order for the check to
687 be made.
689 By default, *too_short* and *too_long* are functions that raise
690 ``ValueError``.
692 >>> list(strictly_n('ab', 3)) # doctest: +IGNORE_EXCEPTION_DETAIL
693 Traceback (most recent call last):
694 ...
695 ValueError: too few items in iterable (got 2)
697 >>> list(strictly_n('abc', 2)) # doctest: +IGNORE_EXCEPTION_DETAIL
698 Traceback (most recent call last):
699 ...
700 ValueError: too many items in iterable (got at least 3)
702 You can instead supply functions that do something else.
703 *too_short* will be called with the number of items in *iterable*.
704 *too_long* will be called with `n + 1`.
706 >>> def too_short(item_count):
707 ... raise RuntimeError
708 >>> it = strictly_n('abcd', 6, too_short=too_short)
709 >>> list(it) # doctest: +IGNORE_EXCEPTION_DETAIL
710 Traceback (most recent call last):
711 ...
712 RuntimeError
714 >>> def too_long(item_count):
715 ... print('The boss is going to hear about this')
716 >>> it = strictly_n('abcdef', 4, too_long=too_long)
717 >>> list(it)
718 The boss is going to hear about this
719 ['a', 'b', 'c', 'd']
721 """
722 if too_short is None:
723 too_short = lambda item_count: raise_(
724 ValueError,
725 f'Too few items in iterable (got {item_count})',
726 )
728 if too_long is None:
729 too_long = lambda item_count: raise_(
730 ValueError,
731 f'Too many items in iterable (got at least {item_count})',
732 )
734 it = iter(iterable)
736 sent = 0
737 for item in islice(it, n):
738 yield item
739 sent += 1
741 if sent < n:
742 too_short(sent)
743 return
745 for item in it:
746 too_long(n + 1)
747 return
750def distinct_permutations(iterable, r=None):
751 """Yield successive distinct permutations of the elements in *iterable*.
753 >>> sorted(distinct_permutations([1, 0, 1]))
754 [(0, 1, 1), (1, 0, 1), (1, 1, 0)]
756 Equivalent to yielding from ``set(permutations(iterable))``, except
757 duplicates are not generated and thrown away. For larger input sequences
758 this is much more efficient.
760 Duplicate permutations arise when there are duplicated elements in the
761 input iterable. The number of items returned is
762 `n! / (x_1! * x_2! * ... * x_n!)`, where `n` is the total number of
763 items input, and each `x_i` is the count of a distinct item in the input
764 sequence. The function :func:`multinomial` computes this directly.
766 If *r* is given, only the *r*-length permutations are yielded.
768 >>> sorted(distinct_permutations([1, 0, 1], r=2))
769 [(0, 1), (1, 0), (1, 1)]
770 >>> sorted(distinct_permutations(range(3), r=2))
771 [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
773 *iterable* need not be sortable, but note that using equal (``x == y``)
774 but non-identical (``id(x) != id(y)``) elements may produce surprising
775 behavior. For example, ``1`` and ``True`` are equal but non-identical:
777 >>> list(distinct_permutations([1, True, '3'])) # doctest: +SKIP
778 [
779 (1, True, '3'),
780 (1, '3', True),
781 ('3', 1, True)
782 ]
783 >>> list(distinct_permutations([1, 2, '3'])) # doctest: +SKIP
784 [
785 (1, 2, '3'),
786 (1, '3', 2),
787 (2, 1, '3'),
788 (2, '3', 1),
789 ('3', 1, 2),
790 ('3', 2, 1)
791 ]
792 """
794 # Algorithm: https://w.wiki/Qai
795 def _full(A):
796 while True:
797 # Yield the permutation we have
798 yield tuple(A)
800 # Find the largest index i such that A[i] < A[i + 1]
801 for i in range(size - 2, -1, -1):
802 if A[i] < A[i + 1]:
803 break
804 # If no such index exists, this permutation is the last one
805 else:
806 return
808 # Find the largest index j greater than j such that A[i] < A[j]
809 for j in range(size - 1, i, -1):
810 if A[i] < A[j]:
811 break
813 # Swap the value of A[i] with that of A[j], then reverse the
814 # sequence from A[i + 1] to form the new permutation
815 A[i], A[j] = A[j], A[i]
816 A[i + 1 :] = A[: i - size : -1] # A[i + 1:][::-1]
818 # Algorithm: modified from the above
819 def _partial(A, r):
820 # Split A into the first r items and the last r items
821 head, tail = A[:r], A[r:]
822 right_head_indexes = range(r - 1, -1, -1)
823 left_tail_indexes = range(len(tail))
825 while True:
826 # Yield the permutation we have
827 yield tuple(head)
829 # Starting from the right, find the first index of the head with
830 # value smaller than the maximum value of the tail - call it i.
831 pivot = tail[-1]
832 for i in right_head_indexes:
833 if head[i] < pivot:
834 break
835 pivot = head[i]
836 else:
837 return
839 # Starting from the left, find the first value of the tail
840 # with a value greater than head[i] and swap.
841 for j in left_tail_indexes:
842 if tail[j] > head[i]:
843 head[i], tail[j] = tail[j], head[i]
844 break
845 # If we didn't find one, start from the right and find the first
846 # index of the head with a value greater than head[i] and swap.
847 else:
848 for j in right_head_indexes:
849 if head[j] > head[i]:
850 head[i], head[j] = head[j], head[i]
851 break
853 # Reverse head[i + 1:] and swap it with tail[:r - (i + 1)]
854 tail += head[: i - r : -1] # head[i + 1:][::-1]
855 i += 1
856 head[i:], tail[:] = tail[: r - i], tail[r - i :]
858 items = list(iterable)
860 try:
861 items.sort()
862 sortable = True
863 except TypeError:
864 sortable = False
866 indices_dict = defaultdict(list)
868 for item in items:
869 indices_dict[items.index(item)].append(item)
871 indices = [items.index(item) for item in items]
872 indices.sort()
874 equivalent_items = {k: cycle(v) for k, v in indices_dict.items()}
876 def permuted_items(permuted_indices):
877 return tuple(
878 next(equivalent_items[index]) for index in permuted_indices
879 )
881 size = len(items)
882 if r is None:
883 r = size
885 # functools.partial(_partial, ... )
886 algorithm = _full if (r == size) else partial(_partial, r=r)
888 if 0 < r <= size:
889 if sortable:
890 return algorithm(items)
891 else:
892 return (
893 permuted_items(permuted_indices)
894 for permuted_indices in algorithm(indices)
895 )
897 return iter(() if r else ((),))
900def derangements(iterable, r=None):
901 """Yield successive derangements of the elements in *iterable*.
903 A derangement is a permutation in which no element appears at its original
904 index. In other words, a derangement is a permutation that has no fixed points.
906 Suppose Alice, Bob, Carol, and Dave are playing Secret Santa.
907 The code below outputs all of the different ways to assign gift recipients
908 such that nobody is assigned to himself or herself:
910 >>> for d in derangements(['Alice', 'Bob', 'Carol', 'Dave']):
911 ... print(', '.join(d))
912 Bob, Alice, Dave, Carol
913 Bob, Carol, Dave, Alice
914 Bob, Dave, Alice, Carol
915 Carol, Alice, Dave, Bob
916 Carol, Dave, Alice, Bob
917 Carol, Dave, Bob, Alice
918 Dave, Alice, Bob, Carol
919 Dave, Carol, Alice, Bob
920 Dave, Carol, Bob, Alice
922 If *r* is given, only the *r*-length derangements are yielded.
924 >>> sorted(derangements(range(3), 2))
925 [(1, 0), (1, 2), (2, 0)]
926 >>> sorted(derangements([0, 2, 3], 2))
927 [(2, 0), (2, 3), (3, 0)]
929 Elements are treated as unique based on their position, not on their value.
931 Consider the Secret Santa example with two *different* people who have
932 the *same* name. Then there are two valid gift assignments even though
933 it might appear that a person is assigned to themselves:
935 >>> names = ['Alice', 'Bob', 'Bob']
936 >>> list(derangements(names))
937 [('Bob', 'Bob', 'Alice'), ('Bob', 'Alice', 'Bob')]
939 To avoid confusion, make the inputs distinct:
941 >>> deduped = [f'{name}{index}' for index, name in enumerate(names)]
942 >>> list(derangements(deduped))
943 [('Bob1', 'Bob2', 'Alice0'), ('Bob2', 'Alice0', 'Bob1')]
945 The number of derangements of a set of size *n* is known as the
946 "subfactorial of n". For n > 0, the subfactorial is:
947 ``round(math.factorial(n) / math.e)``.
949 References:
951 * Article: https://www.numberanalytics.com/blog/ultimate-guide-to-derangements-in-combinatorics
952 * Sizes: https://oeis.org/A000166
953 """
954 xs = tuple(iterable)
955 ys = tuple(range(len(xs)))
956 return compress(
957 permutations(xs, r=r),
958 map(all, map(map, repeat(is_not), repeat(ys), permutations(ys, r=r))),
959 )
962def intersperse(e, iterable, n=1):
963 """Intersperse filler element *e* among the items in *iterable*, leaving
964 *n* items between each filler element.
966 >>> list(intersperse('!', [1, 2, 3, 4, 5]))
967 [1, '!', 2, '!', 3, '!', 4, '!', 5]
969 >>> list(intersperse(None, [1, 2, 3, 4, 5], n=2))
970 [1, 2, None, 3, 4, None, 5]
972 """
973 if n == 0:
974 raise ValueError('n must be > 0')
975 elif n == 1:
976 # interleave(repeat(e), iterable) -> e, x_0, e, x_1, e, x_2...
977 # islice(..., 1, None) -> x_0, e, x_1, e, x_2...
978 return islice(interleave(repeat(e), iterable), 1, None)
979 else:
980 # interleave(filler, chunks) -> [e], [x_0, x_1], [e], [x_2, x_3]...
981 # islice(..., 1, None) -> [x_0, x_1], [e], [x_2, x_3]...
982 # flatten(...) -> x_0, x_1, e, x_2, x_3...
983 filler = repeat([e])
984 chunks = chunked(iterable, n)
985 return flatten(islice(interleave(filler, chunks), 1, None))
988def unique_to_each(*iterables):
989 """Return the elements from each of the input iterables that aren't in the
990 other input iterables.
992 For example, suppose you have a set of packages, each with a set of
993 dependencies::
995 {'pkg_1': {'A', 'B'}, 'pkg_2': {'B', 'C'}, 'pkg_3': {'B', 'D'}}
997 If you remove one package, which dependencies can also be removed?
999 If ``pkg_1`` is removed, then ``A`` is no longer necessary - it is not
1000 associated with ``pkg_2`` or ``pkg_3``. Similarly, ``C`` is only needed for
1001 ``pkg_2``, and ``D`` is only needed for ``pkg_3``::
1003 >>> unique_to_each({'A', 'B'}, {'B', 'C'}, {'B', 'D'})
1004 [['A'], ['C'], ['D']]
1006 If there are duplicates in one input iterable that aren't in the others
1007 they will be duplicated in the output. Input order is preserved::
1009 >>> unique_to_each("mississippi", "missouri")
1010 [['p', 'p'], ['o', 'u', 'r']]
1012 It is assumed that the elements of each iterable are hashable.
1014 """
1015 pool = [list(it) for it in iterables]
1016 counts = Counter(chain.from_iterable(map(set, pool)))
1017 uniques = {element for element in counts if counts[element] == 1}
1018 return [list(filter(uniques.__contains__, it)) for it in pool]
1021def windowed(seq, n, fillvalue=None, step=1):
1022 """Return a sliding window of width *n* over the given iterable.
1024 >>> all_windows = windowed([1, 2, 3, 4, 5], 3)
1025 >>> list(all_windows)
1026 [(1, 2, 3), (2, 3, 4), (3, 4, 5)]
1028 When the window is larger than the iterable, *fillvalue* is used in place
1029 of missing values:
1031 >>> list(windowed([1, 2, 3], 4))
1032 [(1, 2, 3, None)]
1034 Each window will advance in increments of *step*:
1036 >>> list(windowed([1, 2, 3, 4, 5, 6], 3, fillvalue='!', step=2))
1037 [(1, 2, 3), (3, 4, 5), (5, 6, '!')]
1039 To slide into the iterable's items, use :func:`chain` to add filler items
1040 to the left:
1042 >>> iterable = [1, 2, 3, 4]
1043 >>> n = 3
1044 >>> padding = [None] * (n - 1)
1045 >>> list(windowed(chain(padding, iterable), 3))
1046 [(None, None, 1), (None, 1, 2), (1, 2, 3), (2, 3, 4)]
1047 """
1048 if n <= 0:
1049 raise ValueError('n must be > 0')
1050 if step < 1:
1051 raise ValueError('step must be >= 1')
1053 iterator = iter(seq)
1055 # Generate first window
1056 window = deque(islice(iterator, n), maxlen=n)
1058 # Deal with the first window not being full
1059 if not window:
1060 return
1061 if len(window) < n:
1062 yield tuple(window) + ((fillvalue,) * (n - len(window)))
1063 return
1064 yield tuple(window)
1066 # Create the filler for the next windows. The padding ensures
1067 # we have just enough elements to fill the last window.
1068 padding = (fillvalue,) * (n - 1 if step >= n else step - 1)
1069 filler = map(window.append, chain(iterator, padding))
1071 # Generate the rest of the windows
1072 for _ in islice(filler, step - 1, None, step):
1073 yield tuple(window)
1076def substrings(iterable):
1077 """Yield all of the substrings of *iterable*.
1079 >>> [''.join(s) for s in substrings('more')]
1080 ['m', 'o', 'r', 'e', 'mo', 'or', 're', 'mor', 'ore', 'more']
1082 Note that non-string iterables can also be subdivided.
1084 >>> list(substrings([0, 1, 2]))
1085 [(0,), (1,), (2,), (0, 1), (1, 2), (0, 1, 2)]
1087 Like subslices() but returns tuples instead of lists
1088 and returns the shortest substrings first.
1090 """
1091 seq = tuple(iterable)
1092 item_count = len(seq)
1093 for n in range(1, item_count + 1):
1094 slices = map(slice, range(item_count), range(n, item_count + 1))
1095 yield from map(getitem, repeat(seq), slices)
1098def substrings_indexes(seq, reverse=False):
1099 """Yield all substrings and their positions in *seq*
1101 The items yielded will be a tuple of the form ``(substr, i, j)``, where
1102 ``substr == seq[i:j]``.
1104 This function only works for iterables that support slicing, such as
1105 ``str`` objects.
1107 >>> for item in substrings_indexes('more'):
1108 ... print(item)
1109 ('m', 0, 1)
1110 ('o', 1, 2)
1111 ('r', 2, 3)
1112 ('e', 3, 4)
1113 ('mo', 0, 2)
1114 ('or', 1, 3)
1115 ('re', 2, 4)
1116 ('mor', 0, 3)
1117 ('ore', 1, 4)
1118 ('more', 0, 4)
1120 Set *reverse* to ``True`` to yield the same items in the opposite order.
1123 """
1124 r = range(1, len(seq) + 1)
1125 if reverse:
1126 r = reversed(r)
1127 return (
1128 (seq[i : i + L], i, i + L) for L in r for i in range(len(seq) - L + 1)
1129 )
1132class bucket:
1133 """Wrap *iterable* and return an object that buckets the iterable into
1134 child iterables based on a *key* function.
1136 >>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3']
1137 >>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character
1138 >>> sorted(list(s)) # Get the keys
1139 ['a', 'b', 'c']
1140 >>> a_iterable = s['a']
1141 >>> next(a_iterable)
1142 'a1'
1143 >>> next(a_iterable)
1144 'a2'
1145 >>> list(s['b'])
1146 ['b1', 'b2', 'b3']
1148 The original iterable will be advanced and its items will be cached until
1149 they are used by the child iterables. This may require significant storage.
1151 By default, attempting to select a bucket to which no items belong will
1152 exhaust the iterable and cache all values.
1153 If you specify a *validator* function, selected buckets will instead be
1154 checked against it.
1156 >>> from itertools import count
1157 >>> it = count(1, 2) # Infinite sequence of odd numbers
1158 >>> key = lambda x: x % 10 # Bucket by last digit
1159 >>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only
1160 >>> s = bucket(it, key=key, validator=validator)
1161 >>> 2 in s
1162 False
1163 >>> list(s[2])
1164 []
1166 .. seealso:: :func:`map_reduce`, :func:`groupby_transform`
1168 If storage is not a concern, :func:`map_reduce` returns a Python
1169 dictionary, which is generally easier to work with. If the elements
1170 with the same key are already adjacent, :func:`groupby_transform`
1171 or :func:`itertools.groupby` can be used without any caching overhead.
1173 """
1175 def __init__(self, iterable, key, validator=None):
1176 self._it = iter(iterable)
1177 self._key = key
1178 self._cache = defaultdict(deque)
1179 self._validator = validator or (lambda x: True)
1181 def __contains__(self, value):
1182 if not self._validator(value):
1183 return False
1185 try:
1186 item = next(self[value])
1187 except StopIteration:
1188 return False
1189 else:
1190 self._cache[value].appendleft(item)
1192 return True
1194 def _get_values(self, value):
1195 """
1196 Helper to yield items from the parent iterator that match *value*.
1197 Items that don't match are stored in the local cache as they
1198 are encountered.
1199 """
1200 while True:
1201 # If we've cached some items that match the target value, emit
1202 # the first one and evict it from the cache.
1203 if self._cache[value]:
1204 yield self._cache[value].popleft()
1205 # Otherwise we need to advance the parent iterator to search for
1206 # a matching item, caching the rest.
1207 else:
1208 while True:
1209 try:
1210 item = next(self._it)
1211 except StopIteration:
1212 return
1213 item_value = self._key(item)
1214 if item_value == value:
1215 yield item
1216 break
1217 elif self._validator(item_value):
1218 self._cache[item_value].append(item)
1220 def __iter__(self):
1221 for item in self._it:
1222 item_value = self._key(item)
1223 if self._validator(item_value):
1224 self._cache[item_value].append(item)
1226 return iter(self._cache)
1228 def __getitem__(self, value):
1229 if not self._validator(value):
1230 return iter(())
1232 return self._get_values(value)
1235def spy(iterable, n=1):
1236 """Return a 2-tuple with a list containing the first *n* elements of
1237 *iterable*, and an iterator with the same items as *iterable*.
1238 This allows you to "look ahead" at the items in the iterable without
1239 advancing it.
1241 There is one item in the list by default:
1243 >>> iterable = 'abcdefg'
1244 >>> head, iterable = spy(iterable)
1245 >>> head
1246 ['a']
1247 >>> list(iterable)
1248 ['a', 'b', 'c', 'd', 'e', 'f', 'g']
1250 You may use unpacking to retrieve items instead of lists:
1252 >>> (head,), iterable = spy('abcdefg')
1253 >>> head
1254 'a'
1255 >>> (first, second), iterable = spy('abcdefg', 2)
1256 >>> first
1257 'a'
1258 >>> second
1259 'b'
1261 The number of items requested can be larger than the number of items in
1262 the iterable:
1264 >>> iterable = [1, 2, 3, 4, 5]
1265 >>> head, iterable = spy(iterable, 10)
1266 >>> head
1267 [1, 2, 3, 4, 5]
1268 >>> list(iterable)
1269 [1, 2, 3, 4, 5]
1271 """
1272 p, q = tee(iterable)
1273 return take(n, q), p
1276def interleave(*iterables):
1277 """Return a new iterable yielding from each iterable in turn,
1278 until the shortest is exhausted.
1280 >>> list(interleave([1, 2, 3], [4, 5], [6, 7, 8]))
1281 [1, 4, 6, 2, 5, 7]
1283 For a version that doesn't terminate after the shortest iterable is
1284 exhausted, see :func:`interleave_longest`.
1286 """
1287 return chain.from_iterable(zip(*iterables))
1290def interleave_longest(*iterables):
1291 """Return a new iterable yielding from each iterable in turn,
1292 skipping any that are exhausted.
1294 >>> list(interleave_longest([1, 2, 3], [4, 5], [6, 7, 8]))
1295 [1, 4, 6, 2, 5, 7, 3, 8]
1297 This function produces the same output as :func:`roundrobin`, but may
1298 perform better for some inputs (in particular when the number of iterables
1299 is large).
1301 """
1302 for xs in zip_longest(*iterables, fillvalue=_marker):
1303 for x in xs:
1304 if x is not _marker:
1305 yield x
1308def interleave_evenly(iterables, lengths=None):
1309 """
1310 Interleave multiple iterables so that their elements are evenly distributed
1311 throughout the output sequence.
1313 >>> iterables = [1, 2, 3, 4, 5], ['a', 'b']
1314 >>> list(interleave_evenly(iterables))
1315 [1, 2, 'a', 3, 4, 'b', 5]
1317 >>> iterables = [[1, 2, 3], [4, 5], [6, 7, 8]]
1318 >>> list(interleave_evenly(iterables))
1319 [1, 6, 4, 2, 7, 3, 8, 5]
1321 This function requires iterables of known length. Iterables without
1322 ``__len__()`` can be used by manually specifying lengths with *lengths*:
1324 >>> from itertools import combinations, repeat
1325 >>> iterables = [combinations(range(4), 2), ['a', 'b', 'c']]
1326 >>> lengths = [4 * (4 - 1) // 2, 3]
1327 >>> list(interleave_evenly(iterables, lengths=lengths))
1328 [(0, 1), (0, 2), 'a', (0, 3), (1, 2), 'b', (1, 3), (2, 3), 'c']
1330 Based on Bresenham's algorithm.
1331 """
1332 if lengths is None:
1333 try:
1334 lengths = [len(it) for it in iterables]
1335 except TypeError:
1336 raise ValueError(
1337 'Iterable lengths could not be determined automatically. '
1338 'Specify them with the lengths keyword.'
1339 )
1340 elif len(iterables) != len(lengths):
1341 raise ValueError('Mismatching number of iterables and lengths.')
1343 dims = len(lengths)
1345 # sort iterables by length, descending
1346 lengths_permute = sorted(
1347 range(dims), key=lambda i: lengths[i], reverse=True
1348 )
1349 lengths_desc = [lengths[i] for i in lengths_permute]
1350 iters_desc = [iter(iterables[i]) for i in lengths_permute]
1352 # the longest iterable is the primary one (Bresenham: the longest
1353 # distance along an axis)
1354 delta_primary, deltas_secondary = lengths_desc[0], lengths_desc[1:]
1355 iter_primary, iters_secondary = iters_desc[0], iters_desc[1:]
1356 errors = [delta_primary // dims] * len(deltas_secondary)
1358 to_yield = sum(lengths)
1359 while to_yield:
1360 yield next(iter_primary)
1361 to_yield -= 1
1362 # update errors for each secondary iterable
1363 errors = [e - delta for e, delta in zip(errors, deltas_secondary)]
1365 # those iterables for which the error is negative are yielded
1366 # ("diagonal step" in Bresenham)
1367 for i, e_ in enumerate(errors):
1368 if e_ < 0:
1369 yield next(iters_secondary[i])
1370 to_yield -= 1
1371 errors[i] += delta_primary
1374def interleave_randomly(*iterables):
1375 """Repeatedly select one of the input *iterables* at random and yield the next
1376 item from it.
1378 >>> iterables = [1, 2, 3], 'abc', (True, False, None)
1379 >>> list(interleave_randomly(*iterables)) # doctest: +SKIP
1380 ['a', 'b', 1, 'c', True, False, None, 2, 3]
1382 The relative order of the items in each input iterable will preserved. Note the
1383 sequences of items with this property are not equally likely to be generated.
1385 """
1386 iterators = [iter(e) for e in iterables]
1387 while iterators:
1388 idx = randrange(len(iterators))
1389 try:
1390 yield next(iterators[idx])
1391 except StopIteration:
1392 # equivalent to `list.pop` but slightly faster
1393 iterators[idx] = iterators[-1]
1394 del iterators[-1]
1397def collapse(iterable, base_type=None, levels=None):
1398 """Flatten an iterable with multiple levels of nesting (e.g., a list of
1399 lists of tuples) into non-iterable types.
1401 >>> iterable = [(1, 2), ([3, 4], [[5], [6]])]
1402 >>> list(collapse(iterable))
1403 [1, 2, 3, 4, 5, 6]
1405 Binary and text strings are not considered iterable and
1406 will not be collapsed.
1408 To avoid collapsing other types, specify *base_type*:
1410 >>> iterable = ['ab', ('cd', 'ef'), ['gh', 'ij']]
1411 >>> list(collapse(iterable, base_type=tuple))
1412 ['ab', ('cd', 'ef'), 'gh', 'ij']
1414 Specify *levels* to stop flattening after a certain level:
1416 >>> iterable = [('a', ['b']), ('c', ['d'])]
1417 >>> list(collapse(iterable)) # Fully flattened
1418 ['a', 'b', 'c', 'd']
1419 >>> list(collapse(iterable, levels=1)) # Only one level flattened
1420 ['a', ['b'], 'c', ['d']]
1422 """
1423 stack = deque()
1424 # Add our first node group, treat the iterable as a single node
1425 stack.appendleft((0, repeat(iterable, 1)))
1427 while stack:
1428 node_group = stack.popleft()
1429 level, nodes = node_group
1431 # Check if beyond max level
1432 if levels is not None and level > levels:
1433 yield from nodes
1434 continue
1436 for node in nodes:
1437 # Check if done iterating
1438 if isinstance(node, (str, bytes)) or (
1439 (base_type is not None) and isinstance(node, base_type)
1440 ):
1441 yield node
1442 # Otherwise try to create child nodes
1443 else:
1444 try:
1445 tree = iter(node)
1446 except TypeError:
1447 yield node
1448 else:
1449 # Save our current location
1450 stack.appendleft(node_group)
1451 # Append the new child node
1452 stack.appendleft((level + 1, tree))
1453 # Break to process child node
1454 break
1457def side_effect(func, iterable, chunk_size=None, before=None, after=None):
1458 """Invoke *func* on each item in *iterable* (or on each *chunk_size* group
1459 of items) before yielding the item.
1461 `func` must be a function that takes a single argument. Its return value
1462 will be discarded.
1464 *before* and *after* are optional functions that take no arguments. They
1465 will be executed before iteration starts and after it ends, respectively.
1467 `side_effect` can be used for logging, updating progress bars, or anything
1468 that is not functionally "pure."
1470 Emitting a status message:
1472 >>> from more_itertools import consume
1473 >>> func = lambda item: print('Received {}'.format(item))
1474 >>> consume(side_effect(func, range(2)))
1475 Received 0
1476 Received 1
1478 Operating on chunks of items:
1480 >>> pair_sums = []
1481 >>> func = lambda chunk: pair_sums.append(sum(chunk))
1482 >>> list(side_effect(func, [0, 1, 2, 3, 4, 5], 2))
1483 [0, 1, 2, 3, 4, 5]
1484 >>> list(pair_sums)
1485 [1, 5, 9]
1487 Writing to a file-like object:
1489 >>> from io import StringIO
1490 >>> from more_itertools import consume
1491 >>> f = StringIO()
1492 >>> func = lambda x: print(x, file=f)
1493 >>> before = lambda: print(u'HEADER', file=f)
1494 >>> after = f.close
1495 >>> it = [u'a', u'b', u'c']
1496 >>> consume(side_effect(func, it, before=before, after=after))
1497 >>> f.closed
1498 True
1500 """
1501 try:
1502 if before is not None:
1503 before()
1505 if chunk_size is None:
1506 for item in iterable:
1507 func(item)
1508 yield item
1509 else:
1510 for chunk in chunked(iterable, chunk_size):
1511 func(chunk)
1512 yield from chunk
1513 finally:
1514 if after is not None:
1515 after()
1518def sliced(seq, n, strict=False):
1519 """Yield slices of length *n* from the sequence *seq*.
1521 >>> list(sliced((1, 2, 3, 4, 5, 6), 3))
1522 [(1, 2, 3), (4, 5, 6)]
1524 By the default, the last yielded slice will have fewer than *n* elements
1525 if the length of *seq* is not divisible by *n*:
1527 >>> list(sliced((1, 2, 3, 4, 5, 6, 7, 8), 3))
1528 [(1, 2, 3), (4, 5, 6), (7, 8)]
1530 If the length of *seq* is not divisible by *n* and *strict* is
1531 ``True``, then ``ValueError`` will be raised before the last
1532 slice is yielded.
1534 This function will only work for iterables that support slicing.
1535 For non-sliceable iterables, see :func:`chunked`.
1537 """
1538 iterator = takewhile(len, (seq[i : i + n] for i in count(0, n)))
1539 if strict:
1541 def ret():
1542 for _slice in iterator:
1543 if len(_slice) != n:
1544 raise ValueError("seq is not divisible by n.")
1545 yield _slice
1547 return ret()
1548 else:
1549 return iterator
1552def split_at(iterable, pred, maxsplit=-1, keep_separator=False):
1553 """Yield lists of items from *iterable*, where each list is delimited by
1554 an item where callable *pred* returns ``True``.
1556 >>> list(split_at('abcdcba', lambda x: x == 'b'))
1557 [['a'], ['c', 'd', 'c'], ['a']]
1559 >>> list(split_at(range(10), lambda n: n % 2 == 1))
1560 [[0], [2], [4], [6], [8], []]
1562 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1563 then there is no limit on the number of splits:
1565 >>> list(split_at(range(10), lambda n: n % 2 == 1, maxsplit=2))
1566 [[0], [2], [4, 5, 6, 7, 8, 9]]
1568 By default, the delimiting items are not included in the output.
1569 To include them, set *keep_separator* to ``True``.
1571 >>> list(split_at('abcdcba', lambda x: x == 'b', keep_separator=True))
1572 [['a'], ['b'], ['c', 'd', 'c'], ['b'], ['a']]
1574 """
1575 if maxsplit == 0:
1576 yield list(iterable)
1577 return
1579 buf = []
1580 it = iter(iterable)
1581 for item in it:
1582 if pred(item):
1583 yield buf
1584 if keep_separator:
1585 yield [item]
1586 if maxsplit == 1:
1587 yield list(it)
1588 return
1589 buf = []
1590 maxsplit -= 1
1591 else:
1592 buf.append(item)
1593 yield buf
1596def split_before(iterable, pred, maxsplit=-1):
1597 """Yield lists of items from *iterable*, where each list ends just before
1598 an item for which callable *pred* returns ``True``:
1600 >>> list(split_before('OneTwo', lambda s: s.isupper()))
1601 [['O', 'n', 'e'], ['T', 'w', 'o']]
1603 >>> list(split_before(range(10), lambda n: n % 3 == 0))
1604 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
1606 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1607 then there is no limit on the number of splits:
1609 >>> list(split_before(range(10), lambda n: n % 3 == 0, maxsplit=2))
1610 [[0, 1, 2], [3, 4, 5], [6, 7, 8, 9]]
1611 """
1612 if maxsplit == 0:
1613 yield list(iterable)
1614 return
1616 buf = []
1617 it = iter(iterable)
1618 for item in it:
1619 if pred(item) and buf:
1620 yield buf
1621 if maxsplit == 1:
1622 yield [item, *it]
1623 return
1624 buf = []
1625 maxsplit -= 1
1626 buf.append(item)
1627 if buf:
1628 yield buf
1631def split_after(iterable, pred, maxsplit=-1):
1632 """Yield lists of items from *iterable*, where each list ends with an
1633 item where callable *pred* returns ``True``:
1635 >>> list(split_after('one1two2', lambda s: s.isdigit()))
1636 [['o', 'n', 'e', '1'], ['t', 'w', 'o', '2']]
1638 >>> list(split_after(range(10), lambda n: n % 3 == 0))
1639 [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]]
1641 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1642 then there is no limit on the number of splits:
1644 >>> list(split_after(range(10), lambda n: n % 3 == 0, maxsplit=2))
1645 [[0], [1, 2, 3], [4, 5, 6, 7, 8, 9]]
1647 """
1648 if maxsplit == 0:
1649 yield list(iterable)
1650 return
1652 buf = []
1653 it = iter(iterable)
1654 for item in it:
1655 buf.append(item)
1656 if pred(item) and buf:
1657 yield buf
1658 if maxsplit == 1:
1659 buf = list(it)
1660 if buf:
1661 yield buf
1662 return
1663 buf = []
1664 maxsplit -= 1
1665 if buf:
1666 yield buf
1669def split_when(iterable, pred, maxsplit=-1):
1670 """Split *iterable* into pieces based on the output of *pred*.
1671 *pred* should be a function that takes successive pairs of items and
1672 returns ``True`` if the iterable should be split in between them.
1674 For example, to find runs of increasing numbers, split the iterable when
1675 element ``i`` is larger than element ``i + 1``:
1677 >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], lambda x, y: x > y))
1678 [[1, 2, 3, 3], [2, 5], [2, 4], [2]]
1680 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1681 then there is no limit on the number of splits:
1683 >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2],
1684 ... lambda x, y: x > y, maxsplit=2))
1685 [[1, 2, 3, 3], [2, 5], [2, 4, 2]]
1687 """
1688 if maxsplit == 0:
1689 yield list(iterable)
1690 return
1692 it = iter(iterable)
1693 try:
1694 cur_item = next(it)
1695 except StopIteration:
1696 return
1698 buf = [cur_item]
1699 for next_item in it:
1700 if pred(cur_item, next_item):
1701 yield buf
1702 if maxsplit == 1:
1703 yield [next_item, *it]
1704 return
1705 buf = []
1706 maxsplit -= 1
1708 buf.append(next_item)
1709 cur_item = next_item
1711 yield buf
1714def split_into(iterable, sizes):
1715 """Yield a list of sequential items from *iterable* of length 'n' for each
1716 integer 'n' in *sizes*.
1718 >>> list(split_into([1,2,3,4,5,6], [1,2,3]))
1719 [[1], [2, 3], [4, 5, 6]]
1721 If the sum of *sizes* is smaller than the length of *iterable*, then the
1722 remaining items of *iterable* will not be returned.
1724 >>> list(split_into([1,2,3,4,5,6], [2,3]))
1725 [[1, 2], [3, 4, 5]]
1727 If the sum of *sizes* is larger than the length of *iterable*, fewer items
1728 will be returned in the iteration that overruns the *iterable* and further
1729 lists will be empty:
1731 >>> list(split_into([1,2,3,4], [1,2,3,4]))
1732 [[1], [2, 3], [4], []]
1734 When a ``None`` object is encountered in *sizes*, the returned list will
1735 contain items up to the end of *iterable* the same way that
1736 :func:`itertools.slice` does:
1738 >>> list(split_into([1,2,3,4,5,6,7,8,9,0], [2,3,None]))
1739 [[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]]
1741 :func:`split_into` can be useful for grouping a series of items where the
1742 sizes of the groups are not uniform. An example would be where in a row
1743 from a table, multiple columns represent elements of the same feature
1744 (e.g. a point represented by x,y,z) but, the format is not the same for
1745 all columns.
1746 """
1747 # convert the iterable argument into an iterator so its contents can
1748 # be consumed by islice in case it is a generator
1749 it = iter(iterable)
1751 for size in sizes:
1752 if size is None:
1753 yield list(it)
1754 return
1755 else:
1756 yield list(islice(it, size))
1759def padded(iterable, fillvalue=None, n=None, next_multiple=False):
1760 """Yield the elements from *iterable*, followed by *fillvalue*, such that
1761 at least *n* items are emitted.
1763 >>> list(padded([1, 2, 3], '?', 5))
1764 [1, 2, 3, '?', '?']
1766 If *next_multiple* is ``True``, *fillvalue* will be emitted until the
1767 number of items emitted is a multiple of *n*:
1769 >>> list(padded([1, 2, 3, 4], n=3, next_multiple=True))
1770 [1, 2, 3, 4, None, None]
1772 If *n* is ``None``, *fillvalue* will be emitted indefinitely.
1774 To create an *iterable* of exactly size *n*, you can truncate with
1775 :func:`islice`.
1777 >>> list(islice(padded([1, 2, 3], '?'), 5))
1778 [1, 2, 3, '?', '?']
1779 >>> list(islice(padded([1, 2, 3, 4, 5, 6, 7, 8], '?'), 5))
1780 [1, 2, 3, 4, 5]
1782 """
1783 iterator = iter(iterable)
1784 iterator_with_repeat = chain(iterator, repeat(fillvalue))
1786 if n is None:
1787 return iterator_with_repeat
1788 elif n < 1:
1789 raise ValueError('n must be at least 1')
1790 elif next_multiple:
1792 def slice_generator():
1793 for first in iterator:
1794 yield (first,)
1795 yield islice(iterator_with_repeat, n - 1)
1797 # While elements exist produce slices of size n
1798 return chain.from_iterable(slice_generator())
1799 else:
1800 # Ensure the first batch is at least size n then iterate
1801 return chain(islice(iterator_with_repeat, n), iterator)
1804def repeat_each(iterable, n=2):
1805 """Repeat each element in *iterable* *n* times.
1807 >>> list(repeat_each('ABC', 3))
1808 ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C']
1809 """
1810 return chain.from_iterable(map(repeat, iterable, repeat(n)))
1813def repeat_last(iterable, default=None):
1814 """After the *iterable* is exhausted, keep yielding its last element.
1816 >>> list(islice(repeat_last(range(3)), 5))
1817 [0, 1, 2, 2, 2]
1819 If the iterable is empty, yield *default* forever::
1821 >>> list(islice(repeat_last(range(0), 42), 5))
1822 [42, 42, 42, 42, 42]
1824 """
1825 item = _marker
1826 for item in iterable:
1827 yield item
1828 final = default if item is _marker else item
1829 yield from repeat(final)
1832def distribute(n, iterable):
1833 """Distribute the items from *iterable* among *n* smaller iterables.
1835 >>> group_1, group_2 = distribute(2, [1, 2, 3, 4, 5, 6])
1836 >>> list(group_1)
1837 [1, 3, 5]
1838 >>> list(group_2)
1839 [2, 4, 6]
1841 If the length of *iterable* is not evenly divisible by *n*, then the
1842 length of the returned iterables will not be identical:
1844 >>> children = distribute(3, [1, 2, 3, 4, 5, 6, 7])
1845 >>> [list(c) for c in children]
1846 [[1, 4, 7], [2, 5], [3, 6]]
1848 If the length of *iterable* is smaller than *n*, then the last returned
1849 iterables will be empty:
1851 >>> children = distribute(5, [1, 2, 3])
1852 >>> [list(c) for c in children]
1853 [[1], [2], [3], [], []]
1855 This function uses :func:`itertools.tee` and may require significant
1856 storage.
1858 If you need the order items in the smaller iterables to match the
1859 original iterable, see :func:`divide`.
1861 """
1862 if n < 1:
1863 raise ValueError('n must be at least 1')
1865 children = tee(iterable, n)
1866 return [islice(it, index, None, n) for index, it in enumerate(children)]
1869def stagger(iterable, offsets=(-1, 0, 1), longest=False, fillvalue=None):
1870 """Yield tuples whose elements are offset from *iterable*.
1871 The amount by which the `i`-th item in each tuple is offset is given by
1872 the `i`-th item in *offsets*.
1874 >>> list(stagger([0, 1, 2, 3]))
1875 [(None, 0, 1), (0, 1, 2), (1, 2, 3)]
1876 >>> list(stagger(range(8), offsets=(0, 2, 4)))
1877 [(0, 2, 4), (1, 3, 5), (2, 4, 6), (3, 5, 7)]
1879 By default, the sequence will end when the final element of a tuple is the
1880 last item in the iterable. To continue until the first element of a tuple
1881 is the last item in the iterable, set *longest* to ``True``::
1883 >>> list(stagger([0, 1, 2, 3], longest=True))
1884 [(None, 0, 1), (0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]
1886 By default, ``None`` will be used to replace offsets beyond the end of the
1887 sequence. Specify *fillvalue* to use some other value.
1889 """
1890 children = tee(iterable, len(offsets))
1892 return zip_offset(
1893 *children, offsets=offsets, longest=longest, fillvalue=fillvalue
1894 )
1897def zip_offset(*iterables, offsets, longest=False, fillvalue=None):
1898 """``zip`` the input *iterables* together, but offset the `i`-th iterable
1899 by the `i`-th item in *offsets*.
1901 >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1)))
1902 [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e')]
1904 This can be used as a lightweight alternative to SciPy or pandas to analyze
1905 data sets in which some series have a lead or lag relationship.
1907 By default, the sequence will end when the shortest iterable is exhausted.
1908 To continue until the longest iterable is exhausted, set *longest* to
1909 ``True``.
1911 >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1), longest=True))
1912 [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e'), (None, 'f')]
1914 By default, ``None`` will be used to replace offsets beyond the end of the
1915 sequence. Specify *fillvalue* to use some other value.
1917 """
1918 if len(iterables) != len(offsets):
1919 raise ValueError("Number of iterables and offsets didn't match")
1921 staggered = []
1922 for it, n in zip(iterables, offsets):
1923 if n < 0:
1924 staggered.append(chain(repeat(fillvalue, -n), it))
1925 elif n > 0:
1926 staggered.append(islice(it, n, None))
1927 else:
1928 staggered.append(it)
1930 if longest:
1931 return zip_longest(*staggered, fillvalue=fillvalue)
1933 return zip(*staggered)
1936def sort_together(
1937 iterables, key_list=(0,), key=None, reverse=False, strict=False
1938):
1939 """Return the input iterables sorted together, with *key_list* as the
1940 priority for sorting. All iterables are trimmed to the length of the
1941 shortest one.
1943 This can be used like the sorting function in a spreadsheet. If each
1944 iterable represents a column of data, the key list determines which
1945 columns are used for sorting.
1947 By default, all iterables are sorted using the ``0``-th iterable::
1949 >>> iterables = [(4, 3, 2, 1), ('a', 'b', 'c', 'd')]
1950 >>> sort_together(iterables)
1951 [(1, 2, 3, 4), ('d', 'c', 'b', 'a')]
1953 Set a different key list to sort according to another iterable.
1954 Specifying multiple keys dictates how ties are broken::
1956 >>> iterables = [(3, 1, 2), (0, 1, 0), ('c', 'b', 'a')]
1957 >>> sort_together(iterables, key_list=(1, 2))
1958 [(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')]
1960 To sort by a function of the elements of the iterable, pass a *key*
1961 function. Its arguments are the elements of the iterables corresponding to
1962 the key list::
1964 >>> names = ('a', 'b', 'c')
1965 >>> lengths = (1, 2, 3)
1966 >>> widths = (5, 2, 1)
1967 >>> def area(length, width):
1968 ... return length * width
1969 >>> sort_together([names, lengths, widths], key_list=(1, 2), key=area)
1970 [('c', 'b', 'a'), (3, 2, 1), (1, 2, 5)]
1972 Set *reverse* to ``True`` to sort in descending order.
1974 >>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True)
1975 [(3, 2, 1), ('a', 'b', 'c')]
1977 If the *strict* keyword argument is ``True``, then
1978 ``ValueError`` will be raised if any of the iterables have
1979 different lengths.
1981 """
1982 if key is None:
1983 # if there is no key function, the key argument to sorted is an
1984 # itemgetter
1985 key_argument = itemgetter(*key_list)
1986 else:
1987 # if there is a key function, call it with the items at the offsets
1988 # specified by the key function as arguments
1989 key_list = list(key_list)
1990 if len(key_list) == 1:
1991 # if key_list contains a single item, pass the item at that offset
1992 # as the only argument to the key function
1993 key_offset = key_list[0]
1994 key_argument = lambda zipped_items: key(zipped_items[key_offset])
1995 else:
1996 # if key_list contains multiple items, use itemgetter to return a
1997 # tuple of items, which we pass as *args to the key function
1998 get_key_items = itemgetter(*key_list)
1999 key_argument = lambda zipped_items: key(
2000 *get_key_items(zipped_items)
2001 )
2003 transposed = zip(*iterables, strict=strict)
2004 reordered = sorted(transposed, key=key_argument, reverse=reverse)
2005 untransposed = zip(*reordered, strict=strict)
2006 return list(untransposed)
2009def unzip(iterable):
2010 """The inverse of :func:`zip`, this function disaggregates the elements
2011 of the zipped *iterable*.
2013 The ``i``-th iterable contains the ``i``-th element from each element
2014 of the zipped iterable. The first element is used to determine the
2015 length of the remaining elements.
2017 >>> iterable = [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
2018 >>> letters, numbers = unzip(iterable)
2019 >>> list(letters)
2020 ['a', 'b', 'c', 'd']
2021 >>> list(numbers)
2022 [1, 2, 3, 4]
2024 This is similar to using ``zip(*iterable)``, but it avoids reading
2025 *iterable* into memory. Note, however, that this function uses
2026 :func:`itertools.tee` and thus may require significant storage.
2028 """
2029 head, iterable = spy(iterable)
2030 if not head:
2031 # empty iterable, e.g. zip([], [], [])
2032 return ()
2033 # spy returns a one-length iterable as head
2034 head = head[0]
2035 iterables = tee(iterable, len(head))
2037 # If we have an iterable like iter([(1, 2, 3), (4, 5), (6,)]),
2038 # the second unzipped iterable fails at the third tuple since
2039 # it tries to access (6,)[1].
2040 # Same with the third unzipped iterable and the second tuple.
2041 # To support these "improperly zipped" iterables, we suppress
2042 # the IndexError, which just stops the unzipped iterables at
2043 # first length mismatch.
2044 return tuple(
2045 iter_suppress(map(itemgetter(i), it), IndexError)
2046 for i, it in enumerate(iterables)
2047 )
2050def divide(n, iterable):
2051 """Divide the elements from *iterable* into *n* parts, maintaining
2052 order.
2054 >>> group_1, group_2 = divide(2, [1, 2, 3, 4, 5, 6])
2055 >>> list(group_1)
2056 [1, 2, 3]
2057 >>> list(group_2)
2058 [4, 5, 6]
2060 If the length of *iterable* is not evenly divisible by *n*, then the
2061 length of the returned iterables will not be identical:
2063 >>> children = divide(3, [1, 2, 3, 4, 5, 6, 7])
2064 >>> [list(c) for c in children]
2065 [[1, 2, 3], [4, 5], [6, 7]]
2067 If the length of the iterable is smaller than n, then the last returned
2068 iterables will be empty:
2070 >>> children = divide(5, [1, 2, 3])
2071 >>> [list(c) for c in children]
2072 [[1], [2], [3], [], []]
2074 This function will exhaust the iterable before returning.
2075 If order is not important, see :func:`distribute`, which does not first
2076 pull the iterable into memory.
2078 """
2079 if n < 1:
2080 raise ValueError('n must be at least 1')
2082 try:
2083 iterable[:0]
2084 except TypeError:
2085 seq = tuple(iterable)
2086 else:
2087 seq = iterable
2089 q, r = divmod(len(seq), n)
2091 ret = []
2092 stop = 0
2093 for i in range(1, n + 1):
2094 start = stop
2095 stop += q + 1 if i <= r else q
2096 ret.append(iter(seq[start:stop]))
2098 return ret
2101def always_iterable(obj, base_type=(str, bytes)):
2102 """If *obj* is iterable, return an iterator over its items::
2104 >>> obj = (1, 2, 3)
2105 >>> list(always_iterable(obj))
2106 [1, 2, 3]
2108 If *obj* is not iterable, return a one-item iterable containing *obj*::
2110 >>> obj = 1
2111 >>> list(always_iterable(obj))
2112 [1]
2114 If *obj* is ``None``, return an empty iterable:
2116 >>> obj = None
2117 >>> list(always_iterable(None))
2118 []
2120 By default, binary and text strings are not considered iterable::
2122 >>> obj = 'foo'
2123 >>> list(always_iterable(obj))
2124 ['foo']
2126 If *base_type* is set, objects for which ``isinstance(obj, base_type)``
2127 returns ``True`` won't be considered iterable.
2129 >>> obj = {'a': 1}
2130 >>> list(always_iterable(obj)) # Iterate over the dict's keys
2131 ['a']
2132 >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit
2133 [{'a': 1}]
2135 Set *base_type* to ``None`` to avoid any special handling and treat objects
2136 Python considers iterable as iterable:
2138 >>> obj = 'foo'
2139 >>> list(always_iterable(obj, base_type=None))
2140 ['f', 'o', 'o']
2141 """
2142 if obj is None:
2143 return iter(())
2145 if (base_type is not None) and isinstance(obj, base_type):
2146 return iter((obj,))
2148 try:
2149 return iter(obj)
2150 except TypeError:
2151 return iter((obj,))
2154def adjacent(predicate, iterable, distance=1):
2155 """Return an iterable over `(bool, item)` tuples where the `item` is
2156 drawn from *iterable* and the `bool` indicates whether
2157 that item satisfies the *predicate* or is adjacent to an item that does.
2159 For example, to find whether items are adjacent to a ``3``::
2161 >>> list(adjacent(lambda x: x == 3, range(6)))
2162 [(False, 0), (False, 1), (True, 2), (True, 3), (True, 4), (False, 5)]
2164 Set *distance* to change what counts as adjacent. For example, to find
2165 whether items are two places away from a ``3``:
2167 >>> list(adjacent(lambda x: x == 3, range(6), distance=2))
2168 [(False, 0), (True, 1), (True, 2), (True, 3), (True, 4), (True, 5)]
2170 This is useful for contextualizing the results of a search function.
2171 For example, a code comparison tool might want to identify lines that
2172 have changed, but also surrounding lines to give the viewer of the diff
2173 context.
2175 The predicate function will only be called once for each item in the
2176 iterable.
2178 See also :func:`groupby_transform`, which can be used with this function
2179 to group ranges of items with the same `bool` value.
2181 """
2182 # Allow distance=0 mainly for testing that it reproduces results with map()
2183 if distance < 0:
2184 raise ValueError('distance must be at least 0')
2186 i1, i2 = tee(iterable)
2187 padding = [False] * distance
2188 selected = chain(padding, map(predicate, i1), padding)
2189 adjacent_to_selected = map(any, windowed(selected, 2 * distance + 1))
2190 return zip(adjacent_to_selected, i2)
2193def groupby_transform(iterable, keyfunc=None, valuefunc=None, reducefunc=None):
2194 """An extension of :func:`itertools.groupby` that can apply transformations
2195 to the grouped data.
2197 * *keyfunc* is a function computing a key value for each item in *iterable*
2198 * *valuefunc* is a function that transforms the individual items from
2199 *iterable* after grouping
2200 * *reducefunc* is a function that transforms each group of items
2202 >>> iterable = 'aAAbBBcCC'
2203 >>> keyfunc = lambda k: k.upper()
2204 >>> valuefunc = lambda v: v.lower()
2205 >>> reducefunc = lambda g: ''.join(g)
2206 >>> list(groupby_transform(iterable, keyfunc, valuefunc, reducefunc))
2207 [('A', 'aaa'), ('B', 'bbb'), ('C', 'ccc')]
2209 Each optional argument defaults to an identity function if not specified.
2211 :func:`groupby_transform` is useful when grouping elements of an iterable
2212 using a separate iterable as the key. To do this, :func:`zip` the iterables
2213 and pass a *keyfunc* that extracts the first element and a *valuefunc*
2214 that extracts the second element::
2216 >>> from operator import itemgetter
2217 >>> keys = [0, 0, 1, 1, 1, 2, 2, 2, 3]
2218 >>> values = 'abcdefghi'
2219 >>> iterable = zip(keys, values)
2220 >>> grouper = groupby_transform(iterable, itemgetter(0), itemgetter(1))
2221 >>> [(k, ''.join(g)) for k, g in grouper]
2222 [(0, 'ab'), (1, 'cde'), (2, 'fgh'), (3, 'i')]
2224 Note that the order of items in the iterable is significant.
2225 Only adjacent items are grouped together, so if you don't want any
2226 duplicate groups, you should sort the iterable by the key function
2227 or consider :func:`bucket` or :func:`map_reduce`. :func:`map_reduce`
2228 consumes the iterable immediately and returns a dictionary, while
2229 :func:`bucket` does not.
2231 .. seealso:: :func:`bucket`, :func:`map_reduce`
2233 """
2234 ret = groupby(iterable, keyfunc)
2235 if valuefunc:
2236 ret = ((k, map(valuefunc, g)) for k, g in ret)
2237 if reducefunc:
2238 ret = ((k, reducefunc(g)) for k, g in ret)
2240 return ret
2243class numeric_range(Sequence):
2244 """An extension of the built-in ``range()`` function whose arguments can
2245 be any orderable numeric type.
2247 With only *stop* specified, *start* defaults to ``0`` and *step*
2248 defaults to ``1``. The output items will match the type of *stop*:
2250 >>> list(numeric_range(3.5))
2251 [0.0, 1.0, 2.0, 3.0]
2253 With only *start* and *stop* specified, *step* defaults to ``1``. The
2254 output items will match the type of *start*:
2256 >>> from decimal import Decimal
2257 >>> start = Decimal('2.1')
2258 >>> stop = Decimal('5.1')
2259 >>> list(numeric_range(start, stop))
2260 [Decimal('2.1'), Decimal('3.1'), Decimal('4.1')]
2262 With *start*, *stop*, and *step* specified the output items will match
2263 the type of ``start + step``:
2265 >>> from fractions import Fraction
2266 >>> start = Fraction(1, 2) # Start at 1/2
2267 >>> stop = Fraction(5, 2) # End at 5/2
2268 >>> step = Fraction(1, 2) # Count by 1/2
2269 >>> list(numeric_range(start, stop, step))
2270 [Fraction(1, 2), Fraction(1, 1), Fraction(3, 2), Fraction(2, 1)]
2272 If *step* is zero, ``ValueError`` is raised. Negative steps are supported:
2274 >>> list(numeric_range(3, -1, -1.0))
2275 [3.0, 2.0, 1.0, 0.0]
2277 Be aware of the limitations of floating-point numbers; the representation
2278 of the yielded numbers may be surprising.
2280 ``datetime.datetime`` objects can be used for *start* and *stop*, if *step*
2281 is a ``datetime.timedelta`` object:
2283 >>> import datetime
2284 >>> start = datetime.datetime(2019, 1, 1)
2285 >>> stop = datetime.datetime(2019, 1, 3)
2286 >>> step = datetime.timedelta(days=1)
2287 >>> items = iter(numeric_range(start, stop, step))
2288 >>> next(items)
2289 datetime.datetime(2019, 1, 1, 0, 0)
2290 >>> next(items)
2291 datetime.datetime(2019, 1, 2, 0, 0)
2293 """
2295 _EMPTY_HASH = hash(range(0, 0))
2297 def __init__(self, *args):
2298 argc = len(args)
2299 if argc == 1:
2300 (self._stop,) = args
2301 self._start = type(self._stop)(0)
2302 self._step = type(self._stop - self._start)(1)
2303 elif argc == 2:
2304 self._start, self._stop = args
2305 self._step = type(self._stop - self._start)(1)
2306 elif argc == 3:
2307 self._start, self._stop, self._step = args
2308 elif argc == 0:
2309 raise TypeError(
2310 f'numeric_range expected at least 1 argument, got {argc}'
2311 )
2312 else:
2313 raise TypeError(
2314 f'numeric_range expected at most 3 arguments, got {argc}'
2315 )
2317 self._zero = type(self._step)(0)
2318 if self._step == self._zero:
2319 raise ValueError('numeric_range() arg 3 must not be zero')
2320 self._growing = self._step > self._zero
2322 def __bool__(self):
2323 if self._growing:
2324 return self._start < self._stop
2325 else:
2326 return self._start > self._stop
2328 def __contains__(self, elem):
2329 if self._growing:
2330 if self._start <= elem < self._stop:
2331 return (elem - self._start) % self._step == self._zero
2332 else:
2333 if self._start >= elem > self._stop:
2334 return (self._start - elem) % (-self._step) == self._zero
2336 return False
2338 def __eq__(self, other):
2339 if isinstance(other, numeric_range):
2340 empty_self = not bool(self)
2341 empty_other = not bool(other)
2342 if empty_self or empty_other:
2343 return empty_self and empty_other # True if both empty
2344 else:
2345 return (
2346 self._start == other._start
2347 and self._step == other._step
2348 and self._get_by_index(-1) == other._get_by_index(-1)
2349 )
2350 else:
2351 return False
2353 def __getitem__(self, key):
2354 if isinstance(key, int):
2355 return self._get_by_index(key)
2356 elif isinstance(key, slice):
2357 start_idx, stop_idx, step_idx = key.indices(self._len)
2358 return numeric_range(
2359 self._start + start_idx * self._step,
2360 self._start + stop_idx * self._step,
2361 self._step * step_idx,
2362 )
2363 else:
2364 raise TypeError(
2365 'numeric range indices must be '
2366 f'integers or slices, not {type(key).__name__}'
2367 )
2369 def __hash__(self):
2370 if self:
2371 return hash((self._start, self._get_by_index(-1), self._step))
2372 else:
2373 return self._EMPTY_HASH
2375 def __iter__(self):
2376 values = (self._start + (n * self._step) for n in count())
2377 if self._growing:
2378 return takewhile(partial(gt, self._stop), values)
2379 else:
2380 return takewhile(partial(lt, self._stop), values)
2382 def __len__(self):
2383 return self._len
2385 @cached_property
2386 def _len(self):
2387 if self._growing:
2388 start = self._start
2389 stop = self._stop
2390 step = self._step
2391 else:
2392 start = self._stop
2393 stop = self._start
2394 step = -self._step
2395 distance = stop - start
2396 if distance <= self._zero:
2397 return 0
2398 else: # distance > 0 and step > 0: regular euclidean division
2399 q, r = divmod(distance, step)
2400 return int(q) + int(r != self._zero)
2402 def __reduce__(self):
2403 return numeric_range, (self._start, self._stop, self._step)
2405 def __repr__(self):
2406 if self._step == 1:
2407 return f"numeric_range({self._start!r}, {self._stop!r})"
2408 return (
2409 f"numeric_range({self._start!r}, {self._stop!r}, {self._step!r})"
2410 )
2412 def __reversed__(self):
2413 return iter(
2414 numeric_range(
2415 self._get_by_index(-1), self._start - self._step, -self._step
2416 )
2417 )
2419 def count(self, value):
2420 return int(value in self)
2422 def index(self, value):
2423 if self._growing:
2424 if self._start <= value < self._stop:
2425 q, r = divmod(value - self._start, self._step)
2426 if r == self._zero:
2427 return int(q)
2428 else:
2429 if self._start >= value > self._stop:
2430 q, r = divmod(self._start - value, -self._step)
2431 if r == self._zero:
2432 return int(q)
2434 raise ValueError(f"{value} is not in numeric range")
2436 def _get_by_index(self, i):
2437 if i < 0:
2438 i += self._len
2439 if i < 0 or i >= self._len:
2440 raise IndexError("numeric range object index out of range")
2441 return self._start + i * self._step
2444def count_cycle(iterable, n=None):
2445 """Cycle through the items from *iterable* up to *n* times, yielding
2446 the number of completed cycles along with each item. If *n* is omitted the
2447 process repeats indefinitely.
2449 >>> list(count_cycle('AB', 3))
2450 [(0, 'A'), (0, 'B'), (1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')]
2452 """
2453 if n is not None:
2454 return product(range(n), iterable)
2455 seq = tuple(iterable)
2456 if not seq:
2457 return iter(())
2458 counter = count() if n is None else range(n)
2459 return zip(repeat_each(counter, len(seq)), cycle(seq))
2462def mark_ends(iterable):
2463 """Yield 3-tuples of the form ``(is_first, is_last, item)``.
2465 >>> list(mark_ends('ABC'))
2466 [(True, False, 'A'), (False, False, 'B'), (False, True, 'C')]
2468 Use this when looping over an iterable to take special action on its first
2469 and/or last items:
2471 >>> iterable = ['Header', 100, 200, 'Footer']
2472 >>> total = 0
2473 >>> for is_first, is_last, item in mark_ends(iterable):
2474 ... if is_first:
2475 ... continue # Skip the header
2476 ... if is_last:
2477 ... continue # Skip the footer
2478 ... total += item
2479 >>> print(total)
2480 300
2481 """
2482 it = iter(iterable)
2483 for a in it:
2484 first = True
2485 for b in it:
2486 yield first, False, a
2487 a = b
2488 first = False
2489 yield first, True, a
2492def locate(iterable, pred=bool, window_size=None):
2493 """Yield the index of each item in *iterable* for which *pred* returns
2494 ``True``.
2496 *pred* defaults to :func:`bool`, which will select truthy items:
2498 >>> list(locate([0, 1, 1, 0, 1, 0, 0]))
2499 [1, 2, 4]
2501 Set *pred* to a custom function to, e.g., find the indexes for a particular
2502 item.
2504 >>> list(locate(['a', 'b', 'c', 'b'], lambda x: x == 'b'))
2505 [1, 3]
2507 If *window_size* is given, then the *pred* function will be called with
2508 the values in each window. This enables searching for sub-sequences.
2509 Note that *pred* may receive fewer than *window_size* arguments at the end of
2510 the iterable.
2512 >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]
2513 >>> pred = lambda *args: args == (1, 2, 3)
2514 >>> list(locate(iterable, pred=pred, window_size=3))
2515 [1, 5, 9]
2517 Use with :func:`seekable` to find indexes and then retrieve the associated
2518 items:
2520 >>> from itertools import count
2521 >>> from more_itertools import seekable
2522 >>> source = (3 * n + 1 if (n % 2) else n // 2 for n in count())
2523 >>> it = seekable(source)
2524 >>> pred = lambda x: x > 100
2525 >>> indexes = locate(it, pred=pred)
2526 >>> i = next(indexes)
2527 >>> it.seek(i)
2528 >>> next(it)
2529 106
2531 """
2532 if window_size is None:
2533 return compress(count(), map(pred, iterable))
2535 if window_size < 1:
2536 raise ValueError('window size must be at least 1')
2538 it = windowed(iterable, window_size, fillvalue=_marker)
2539 return compress(
2540 count(),
2541 (pred(*(x for x in w if x is not _marker)) for w in it),
2542 )
2545def longest_common_prefix(iterables):
2546 """Yield elements of the longest common prefix among given *iterables*.
2548 >>> ''.join(longest_common_prefix(['abcd', 'abc', 'abf']))
2549 'ab'
2551 """
2552 return (c[0] for c in takewhile(all_equal, zip(*iterables)))
2555def lstrip(iterable, pred):
2556 """Yield the items from *iterable*, but strip any from the beginning
2557 for which *pred* returns ``True``.
2559 For example, to remove a set of items from the start of an iterable:
2561 >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
2562 >>> pred = lambda x: x in {None, False, ''}
2563 >>> list(lstrip(iterable, pred))
2564 [1, 2, None, 3, False, None]
2566 This function is analogous to to :func:`str.lstrip`, and is essentially
2567 an wrapper for :func:`itertools.dropwhile`.
2569 """
2570 return dropwhile(pred, iterable)
2573def rstrip(iterable, pred):
2574 """Yield the items from *iterable*, but strip any from the end
2575 for which *pred* returns ``True``.
2577 For example, to remove a set of items from the end of an iterable:
2579 >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
2580 >>> pred = lambda x: x in {None, False, ''}
2581 >>> list(rstrip(iterable, pred))
2582 [None, False, None, 1, 2, None, 3]
2584 This function is analogous to :func:`str.rstrip`.
2586 """
2587 cache = []
2588 cache_append = cache.append
2589 cache_clear = cache.clear
2590 for x in iterable:
2591 if pred(x):
2592 cache_append(x)
2593 else:
2594 yield from cache
2595 cache_clear()
2596 yield x
2599def strip(iterable, pred):
2600 """Yield the items from *iterable*, but strip any from the
2601 beginning and end for which *pred* returns ``True``.
2603 For example, to remove a set of items from both ends of an iterable:
2605 >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
2606 >>> pred = lambda x: x in {None, False, ''}
2607 >>> list(strip(iterable, pred))
2608 [1, 2, None, 3]
2610 This function is analogous to :func:`str.strip`.
2612 """
2613 return rstrip(lstrip(iterable, pred), pred)
2616class islice_extended:
2617 """An extension of :func:`itertools.islice` that supports negative values
2618 for *stop*, *start*, and *step*.
2620 >>> iterator = iter('abcdefgh')
2621 >>> list(islice_extended(iterator, -4, -1))
2622 ['e', 'f', 'g']
2624 Slices with negative values require some caching of *iterable*, but this
2625 function takes care to minimize the amount of memory required.
2627 For example, you can use a negative step with an infinite iterator:
2629 >>> from itertools import count
2630 >>> list(islice_extended(count(), 110, 99, -2))
2631 [110, 108, 106, 104, 102, 100]
2633 You can also use slice notation directly:
2635 >>> iterator = map(str, count())
2636 >>> it = islice_extended(iterator)[10:20:2]
2637 >>> list(it)
2638 ['10', '12', '14', '16', '18']
2640 """
2642 def __init__(self, iterable, *args):
2643 it = iter(iterable)
2644 if args:
2645 self._iterator = _islice_helper(it, slice(*args))
2646 else:
2647 self._iterator = it
2649 def __iter__(self):
2650 return self
2652 def __next__(self):
2653 return next(self._iterator)
2655 def __getitem__(self, key):
2656 if isinstance(key, slice):
2657 return islice_extended(_islice_helper(self._iterator, key))
2659 raise TypeError('islice_extended.__getitem__ argument must be a slice')
2662def _islice_helper(it, s):
2663 start = s.start
2664 stop = s.stop
2665 if s.step == 0:
2666 raise ValueError('step argument must be a non-zero integer or None.')
2667 step = s.step or 1
2669 if step > 0:
2670 start = 0 if (start is None) else start
2672 if start < 0:
2673 # Consume all but the last -start items
2674 cache = deque(enumerate(it, 1), maxlen=-start)
2675 len_iter = cache[-1][0] if cache else 0
2677 # Adjust start to be positive
2678 i = max(len_iter + start, 0)
2680 # Adjust stop to be positive
2681 if stop is None:
2682 j = len_iter
2683 elif stop >= 0:
2684 j = min(stop, len_iter)
2685 else:
2686 j = max(len_iter + stop, 0)
2688 # Slice the cache
2689 n = j - i
2690 if n <= 0:
2691 return
2693 for index in range(n):
2694 if index % step == 0:
2695 # pop and yield the item.
2696 # We don't want to use an intermediate variable
2697 # it would extend the lifetime of the current item
2698 yield cache.popleft()[1]
2699 else:
2700 # just pop and discard the item
2701 cache.popleft()
2702 elif (stop is not None) and (stop < 0):
2703 # Advance to the start position
2704 next(islice(it, start, start), None)
2706 # When stop is negative, we have to carry -stop items while
2707 # iterating
2708 cache = deque(islice(it, -stop), maxlen=-stop)
2710 for index, item in enumerate(it):
2711 if index % step == 0:
2712 # pop and yield the item.
2713 # We don't want to use an intermediate variable
2714 # it would extend the lifetime of the current item
2715 yield cache.popleft()
2716 else:
2717 # just pop and discard the item
2718 cache.popleft()
2719 cache.append(item)
2720 else:
2721 # When both start and stop are positive we have the normal case
2722 yield from islice(it, start, stop, step)
2723 else:
2724 start = -1 if (start is None) else start
2726 if (stop is not None) and (stop < 0):
2727 # Consume all but the last items
2728 n = -stop - 1
2729 cache = deque(enumerate(it, 1), maxlen=n)
2730 len_iter = cache[-1][0] if cache else 0
2732 # If start and stop are both negative they are comparable and
2733 # we can just slice. Otherwise we can adjust start to be negative
2734 # and then slice.
2735 if start < 0:
2736 i, j = start, stop
2737 else:
2738 i, j = min(start - len_iter, -1), None
2740 for index, item in list(cache)[i:j:step]:
2741 yield item
2742 else:
2743 # Advance to the stop position
2744 if stop is not None:
2745 m = stop + 1
2746 next(islice(it, m, m), None)
2748 # stop is positive, so if start is negative they are not comparable
2749 # and we need the rest of the items.
2750 if start < 0:
2751 i = start
2752 n = None
2753 # stop is None and start is positive, so we just need items up to
2754 # the start index.
2755 elif stop is None:
2756 i = None
2757 n = start + 1
2758 # Both stop and start are positive, so they are comparable.
2759 else:
2760 i = None
2761 n = start - stop
2762 if n <= 0:
2763 return
2765 cache = list(islice(it, n))
2767 yield from cache[i::step]
2770def always_reversible(iterable):
2771 """An extension of :func:`reversed` that supports all iterables, not
2772 just those which implement the ``Reversible`` or ``Sequence`` protocols.
2774 >>> print(*always_reversible(x for x in range(3)))
2775 2 1 0
2777 If the iterable is already reversible, this function returns the
2778 result of :func:`reversed()`. If the iterable is not reversible,
2779 this function will cache the remaining items in the iterable and
2780 yield them in reverse order, which may require significant storage.
2781 """
2782 try:
2783 return reversed(iterable)
2784 except TypeError:
2785 return reversed(list(iterable))
2788def consecutive_groups(iterable, ordering=None):
2789 """Yield groups of consecutive items using :func:`itertools.groupby`.
2790 The *ordering* function determines whether two items are adjacent by
2791 returning their position.
2793 By default, the ordering function is the identity function. This is
2794 suitable for finding runs of numbers:
2796 >>> iterable = [1, 10, 11, 12, 20, 30, 31, 32, 33, 40]
2797 >>> for group in consecutive_groups(iterable):
2798 ... print(list(group))
2799 [1]
2800 [10, 11, 12]
2801 [20]
2802 [30, 31, 32, 33]
2803 [40]
2805 To find runs of adjacent letters, apply :func:`ord` function
2806 to convert letters to ordinals.
2808 >>> iterable = 'abcdfgilmnop'
2809 >>> ordering = ord
2810 >>> for group in consecutive_groups(iterable, ordering):
2811 ... print(list(group))
2812 ['a', 'b', 'c', 'd']
2813 ['f', 'g']
2814 ['i']
2815 ['l', 'm', 'n', 'o', 'p']
2817 Each group of consecutive items is an iterator that shares it source with
2818 *iterable*. When an an output group is advanced, the previous group is
2819 no longer available unless its elements are copied (e.g., into a ``list``).
2821 >>> iterable = [1, 2, 11, 12, 21, 22]
2822 >>> saved_groups = []
2823 >>> for group in consecutive_groups(iterable):
2824 ... saved_groups.append(list(group)) # Copy group elements
2825 >>> saved_groups
2826 [[1, 2], [11, 12], [21, 22]]
2828 """
2829 if ordering is None:
2830 key = lambda x: x[0] - x[1]
2831 else:
2832 key = lambda x: x[0] - ordering(x[1])
2834 for k, g in groupby(enumerate(iterable), key=key):
2835 yield map(itemgetter(1), g)
2838def difference(iterable, func=sub, *, initial=None):
2839 """This function is the inverse of :func:`itertools.accumulate`. By default
2840 it will compute the first difference of *iterable* using
2841 :func:`operator.sub`:
2843 >>> from itertools import accumulate
2844 >>> iterable = accumulate([0, 1, 2, 3, 4]) # produces 0, 1, 3, 6, 10
2845 >>> list(difference(iterable))
2846 [0, 1, 2, 3, 4]
2848 *func* defaults to :func:`operator.sub`, but other functions can be
2849 specified. They will be applied as follows::
2851 A, B, C, D, ... --> A, func(B, A), func(C, B), func(D, C), ...
2853 For example, to do progressive division:
2855 >>> iterable = [1, 2, 6, 24, 120]
2856 >>> func = lambda x, y: x // y
2857 >>> list(difference(iterable, func))
2858 [1, 2, 3, 4, 5]
2860 If the *initial* keyword is set, the first element will be skipped when
2861 computing successive differences.
2863 >>> it = [10, 11, 13, 16] # from accumulate([1, 2, 3], initial=10)
2864 >>> list(difference(it, initial=10))
2865 [1, 2, 3]
2867 """
2868 a, b = tee(iterable)
2869 try:
2870 first = [next(b)]
2871 except StopIteration:
2872 return iter([])
2874 if initial is not None:
2875 return map(func, b, a)
2877 return chain(first, map(func, b, a))
2880class SequenceView(Sequence):
2881 """Return a read-only view of the sequence object *target*.
2883 :class:`SequenceView` objects are analogous to Python's built-in
2884 "dictionary view" types. They provide a dynamic view of a sequence's items,
2885 meaning that when the sequence updates, so does the view.
2887 >>> seq = ['0', '1', '2']
2888 >>> view = SequenceView(seq)
2889 >>> view
2890 SequenceView(['0', '1', '2'])
2891 >>> seq.append('3')
2892 >>> view
2893 SequenceView(['0', '1', '2', '3'])
2895 Sequence views support indexing, slicing, and length queries. They act
2896 like the underlying sequence, except they don't allow assignment:
2898 >>> view[1]
2899 '1'
2900 >>> view[1:-1]
2901 ['1', '2']
2902 >>> len(view)
2903 4
2905 Sequence views are useful as an alternative to copying, as they don't
2906 require (much) extra storage.
2908 """
2910 def __init__(self, target):
2911 if not isinstance(target, Sequence):
2912 raise TypeError
2913 self._target = target
2915 def __getitem__(self, index):
2916 return self._target[index]
2918 def __len__(self):
2919 return len(self._target)
2921 def __repr__(self):
2922 return f'{self.__class__.__name__}({self._target!r})'
2925class seekable:
2926 """Wrap an iterator to allow for seeking backward and forward. This
2927 progressively caches the items in the source iterable so they can be
2928 re-visited.
2930 Call :meth:`seek` with an index to seek to that position in the source
2931 iterable.
2933 To "reset" an iterator, seek to ``0``:
2935 >>> from itertools import count
2936 >>> it = seekable((str(n) for n in count()))
2937 >>> next(it), next(it), next(it)
2938 ('0', '1', '2')
2939 >>> it.seek(0)
2940 >>> next(it), next(it), next(it)
2941 ('0', '1', '2')
2943 You can also seek forward:
2945 >>> it = seekable((str(n) for n in range(20)))
2946 >>> it.seek(10)
2947 >>> next(it)
2948 '10'
2949 >>> it.seek(20) # Seeking past the end of the source isn't a problem
2950 >>> list(it)
2951 []
2952 >>> it.seek(0) # Resetting works even after hitting the end
2953 >>> next(it)
2954 '0'
2956 Call :meth:`relative_seek` to seek relative to the source iterator's
2957 current position.
2959 >>> it = seekable((str(n) for n in range(20)))
2960 >>> next(it), next(it), next(it)
2961 ('0', '1', '2')
2962 >>> it.relative_seek(2)
2963 >>> next(it)
2964 '5'
2965 >>> it.relative_seek(-3) # Source is at '6', we move back to '3'
2966 >>> next(it)
2967 '3'
2968 >>> it.relative_seek(-3) # Source is at '4', we move back to '1'
2969 >>> next(it)
2970 '1'
2973 Call :meth:`peek` to look ahead one item without advancing the iterator:
2975 >>> it = seekable('1234')
2976 >>> it.peek()
2977 '1'
2978 >>> list(it)
2979 ['1', '2', '3', '4']
2980 >>> it.peek(default='empty')
2981 'empty'
2983 Before the iterator is at its end, calling :func:`bool` on it will return
2984 ``True``. After it will return ``False``:
2986 >>> it = seekable('5678')
2987 >>> bool(it)
2988 True
2989 >>> list(it)
2990 ['5', '6', '7', '8']
2991 >>> bool(it)
2992 False
2994 You may view the contents of the cache with the :meth:`elements` method.
2995 That returns a :class:`SequenceView`, a view that updates automatically:
2997 >>> it = seekable((str(n) for n in range(10)))
2998 >>> next(it), next(it), next(it)
2999 ('0', '1', '2')
3000 >>> elements = it.elements()
3001 >>> elements
3002 SequenceView(['0', '1', '2'])
3003 >>> next(it)
3004 '3'
3005 >>> elements
3006 SequenceView(['0', '1', '2', '3'])
3008 By default, the cache grows as the source iterable progresses, so beware of
3009 wrapping very large or infinite iterables. Supply *maxlen* to limit the
3010 size of the cache (this of course limits how far back you can seek).
3012 >>> from itertools import count
3013 >>> it = seekable((str(n) for n in count()), maxlen=2)
3014 >>> next(it), next(it), next(it), next(it)
3015 ('0', '1', '2', '3')
3016 >>> list(it.elements())
3017 ['2', '3']
3018 >>> it.seek(0)
3019 >>> next(it), next(it), next(it), next(it)
3020 ('2', '3', '4', '5')
3021 >>> next(it)
3022 '6'
3024 """
3026 def __init__(self, iterable, maxlen=None):
3027 self._source = iter(iterable)
3028 if maxlen is None:
3029 self._cache = []
3030 else:
3031 self._cache = deque([], maxlen)
3032 self._index = None
3034 def __iter__(self):
3035 return self
3037 def __next__(self):
3038 if self._index is not None:
3039 try:
3040 item = self._cache[self._index]
3041 except IndexError:
3042 self._index = None
3043 else:
3044 self._index += 1
3045 return item
3047 item = next(self._source)
3048 self._cache.append(item)
3049 return item
3051 def __bool__(self):
3052 try:
3053 self.peek()
3054 except StopIteration:
3055 return False
3056 return True
3058 def peek(self, default=_marker):
3059 try:
3060 peeked = next(self)
3061 except StopIteration:
3062 if default is _marker:
3063 raise
3064 return default
3065 if self._index is None:
3066 self._index = len(self._cache)
3067 self._index -= 1
3068 return peeked
3070 def elements(self):
3071 return SequenceView(self._cache)
3073 def seek(self, index):
3074 self._index = index
3075 remainder = index - len(self._cache)
3076 if remainder > 0:
3077 consume(self, remainder)
3079 def relative_seek(self, count):
3080 if self._index is None:
3081 self._index = len(self._cache)
3083 self.seek(max(self._index + count, 0))
3086class run_length:
3087 """
3088 :func:`run_length.encode` compresses an iterable with run-length encoding.
3089 It yields groups of repeated items with the count of how many times they
3090 were repeated:
3092 >>> uncompressed = 'abbcccdddd'
3093 >>> list(run_length.encode(uncompressed))
3094 [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
3096 :func:`run_length.decode` decompresses an iterable that was previously
3097 compressed with run-length encoding. It yields the items of the
3098 decompressed iterable:
3100 >>> compressed = [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
3101 >>> list(run_length.decode(compressed))
3102 ['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'd']
3104 """
3106 @staticmethod
3107 def encode(iterable):
3108 return ((k, ilen(g)) for k, g in groupby(iterable))
3110 @staticmethod
3111 def decode(iterable):
3112 return chain.from_iterable(starmap(repeat, iterable))
3115def exactly_n(iterable, n, predicate=bool):
3116 """Return ``True`` if exactly ``n`` items in the iterable are ``True``
3117 according to the *predicate* function.
3119 >>> exactly_n([True, True, False], 2)
3120 True
3121 >>> exactly_n([True, True, False], 1)
3122 False
3123 >>> exactly_n([0, 1, 2, 3, 4, 5], 3, lambda x: x < 3)
3124 True
3126 The iterable will be advanced until ``n + 1`` truthy items are encountered,
3127 so avoid calling it on infinite iterables.
3129 """
3130 iterator = filter(predicate, iterable)
3131 if n <= 0:
3132 if n < 0:
3133 return False
3134 for _ in iterator:
3135 return False
3136 return True
3138 iterator = islice(iterator, n - 1, None)
3139 for _ in iterator:
3140 for _ in iterator:
3141 return False
3142 return True
3143 return False
3146def circular_shifts(iterable, steps=1):
3147 """Yield the circular shifts of *iterable*.
3149 >>> list(circular_shifts(range(4)))
3150 [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)]
3152 Set *steps* to the number of places to rotate to the left
3153 (or to the right if negative). Defaults to 1.
3155 >>> list(circular_shifts(range(4), 2))
3156 [(0, 1, 2, 3), (2, 3, 0, 1)]
3158 >>> list(circular_shifts(range(4), -1))
3159 [(0, 1, 2, 3), (3, 0, 1, 2), (2, 3, 0, 1), (1, 2, 3, 0)]
3161 """
3162 buffer = deque(iterable)
3163 if steps == 0:
3164 raise ValueError('Steps should be a non-zero integer')
3166 buffer.rotate(steps)
3167 steps = -steps
3168 n = len(buffer)
3169 n //= math.gcd(n, steps)
3171 for _ in repeat(None, n):
3172 buffer.rotate(steps)
3173 yield tuple(buffer)
3176def make_decorator(wrapping_func, result_index=0):
3177 """Return a decorator version of *wrapping_func*, which is a function that
3178 modifies an iterable. *result_index* is the position in that function's
3179 signature where the iterable goes.
3181 This lets you use itertools on the "production end," i.e. at function
3182 definition. This can augment what the function returns without changing the
3183 function's code.
3185 For example, to produce a decorator version of :func:`chunked`:
3187 >>> from more_itertools import chunked
3188 >>> chunker = make_decorator(chunked, result_index=0)
3189 >>> @chunker(3)
3190 ... def iter_range(n):
3191 ... return iter(range(n))
3192 ...
3193 >>> list(iter_range(9))
3194 [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
3196 To only allow truthy items to be returned:
3198 >>> truth_serum = make_decorator(filter, result_index=1)
3199 >>> @truth_serum(bool)
3200 ... def boolean_test():
3201 ... return [0, 1, '', ' ', False, True]
3202 ...
3203 >>> list(boolean_test())
3204 [1, ' ', True]
3206 The :func:`peekable` and :func:`seekable` wrappers make for practical
3207 decorators:
3209 >>> from more_itertools import peekable
3210 >>> peekable_function = make_decorator(peekable)
3211 >>> @peekable_function()
3212 ... def str_range(*args):
3213 ... return (str(x) for x in range(*args))
3214 ...
3215 >>> it = str_range(1, 20, 2)
3216 >>> next(it), next(it), next(it)
3217 ('1', '3', '5')
3218 >>> it.peek()
3219 '7'
3220 >>> next(it)
3221 '7'
3223 """
3225 # See https://sites.google.com/site/bbayles/index/decorator_factory for
3226 # notes on how this works.
3227 def decorator(*wrapping_args, **wrapping_kwargs):
3228 def outer_wrapper(f):
3229 def inner_wrapper(*args, **kwargs):
3230 result = f(*args, **kwargs)
3231 wrapping_args_ = list(wrapping_args)
3232 wrapping_args_.insert(result_index, result)
3233 return wrapping_func(*wrapping_args_, **wrapping_kwargs)
3235 return inner_wrapper
3237 return outer_wrapper
3239 return decorator
3242def map_reduce(iterable, keyfunc, valuefunc=None, reducefunc=None):
3243 """Return a dictionary that maps the items in *iterable* to categories
3244 defined by *keyfunc*, transforms them with *valuefunc*, and
3245 then summarizes them by category with *reducefunc*.
3247 *valuefunc* defaults to the identity function if it is unspecified.
3248 If *reducefunc* is unspecified, no summarization takes place:
3250 >>> keyfunc = lambda x: x.upper()
3251 >>> result = map_reduce('abbccc', keyfunc)
3252 >>> sorted(result.items())
3253 [('A', ['a']), ('B', ['b', 'b']), ('C', ['c', 'c', 'c'])]
3255 Specifying *valuefunc* transforms the categorized items:
3257 >>> keyfunc = lambda x: x.upper()
3258 >>> valuefunc = lambda x: 1
3259 >>> result = map_reduce('abbccc', keyfunc, valuefunc)
3260 >>> sorted(result.items())
3261 [('A', [1]), ('B', [1, 1]), ('C', [1, 1, 1])]
3263 Specifying *reducefunc* summarizes the categorized items:
3265 >>> keyfunc = lambda x: x.upper()
3266 >>> valuefunc = lambda x: 1
3267 >>> reducefunc = sum
3268 >>> result = map_reduce('abbccc', keyfunc, valuefunc, reducefunc)
3269 >>> sorted(result.items())
3270 [('A', 1), ('B', 2), ('C', 3)]
3272 You may want to filter the input iterable before applying the map/reduce
3273 procedure:
3275 >>> all_items = range(30)
3276 >>> items = [x for x in all_items if 10 <= x <= 20] # Filter
3277 >>> keyfunc = lambda x: x % 2 # Evens map to 0; odds to 1
3278 >>> categories = map_reduce(items, keyfunc=keyfunc)
3279 >>> sorted(categories.items())
3280 [(0, [10, 12, 14, 16, 18, 20]), (1, [11, 13, 15, 17, 19])]
3281 >>> summaries = map_reduce(items, keyfunc=keyfunc, reducefunc=sum)
3282 >>> sorted(summaries.items())
3283 [(0, 90), (1, 75)]
3285 Note that all items in the iterable are gathered into a list before the
3286 summarization step, which may require significant storage.
3288 The returned object is a :obj:`collections.defaultdict` with the
3289 ``default_factory`` set to ``None``, such that it behaves like a normal
3290 dictionary.
3292 .. seealso:: :func:`bucket`, :func:`groupby_transform`
3294 If storage is a concern, :func:`bucket` can be used without consuming the
3295 entire iterable right away. If the elements with the same key are already
3296 adjacent, :func:`groupby_transform` or :func:`itertools.groupby` can be
3297 used without any caching overhead.
3299 """
3301 ret = defaultdict(list)
3303 if valuefunc is None:
3304 for item in iterable:
3305 key = keyfunc(item)
3306 ret[key].append(item)
3308 else:
3309 for item in iterable:
3310 key = keyfunc(item)
3311 value = valuefunc(item)
3312 ret[key].append(value)
3314 if reducefunc is not None:
3315 for key, value_list in ret.items():
3316 ret[key] = reducefunc(value_list)
3318 ret.default_factory = None
3319 return ret
3322def rlocate(iterable, pred=bool, window_size=None):
3323 """Yield the index of each item in *iterable* for which *pred* returns
3324 ``True``, starting from the right and moving left.
3326 *pred* defaults to :func:`bool`, which will select truthy items:
3328 >>> list(rlocate([0, 1, 1, 0, 1, 0, 0])) # Truthy at 1, 2, and 4
3329 [4, 2, 1]
3331 Set *pred* to a custom function to, e.g., find the indexes for a particular
3332 item:
3334 >>> iterator = iter('abcb')
3335 >>> pred = lambda x: x == 'b'
3336 >>> list(rlocate(iterator, pred))
3337 [3, 1]
3339 If *window_size* is given, then the *pred* function will be called with
3340 that many items. This enables searching for sub-sequences:
3342 >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]
3343 >>> pred = lambda *args: args == (1, 2, 3)
3344 >>> list(rlocate(iterable, pred=pred, window_size=3))
3345 [9, 5, 1]
3347 Beware, this function won't return anything for infinite iterables.
3348 If *iterable* is reversible, ``rlocate`` will reverse it and search from
3349 the right. Otherwise, it will search from the left and return the results
3350 in reverse order.
3352 See :func:`locate` to for other example applications.
3354 """
3355 if window_size is None:
3356 try:
3357 len_iter = len(iterable)
3358 return (len_iter - i - 1 for i in locate(reversed(iterable), pred))
3359 except TypeError:
3360 pass
3362 return reversed(list(locate(iterable, pred, window_size)))
3365def replace(iterable, pred, substitutes, count=None, window_size=1):
3366 """Yield the items from *iterable*, replacing the items for which *pred*
3367 returns ``True`` with the items from the iterable *substitutes*.
3369 >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1]
3370 >>> pred = lambda x: x == 0
3371 >>> substitutes = (2, 3)
3372 >>> list(replace(iterable, pred, substitutes))
3373 [1, 1, 2, 3, 1, 1, 2, 3, 1, 1]
3375 If *count* is given, the number of replacements will be limited:
3377 >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1, 0]
3378 >>> pred = lambda x: x == 0
3379 >>> substitutes = [None]
3380 >>> list(replace(iterable, pred, substitutes, count=2))
3381 [1, 1, None, 1, 1, None, 1, 1, 0]
3383 Use *window_size* to control the number of items passed as arguments to
3384 *pred*. This allows for locating and replacing subsequences.
3386 >>> iterable = [0, 1, 2, 5, 0, 1, 2, 5]
3387 >>> window_size = 3
3388 >>> pred = lambda *args: args == (0, 1, 2) # 3 items passed to pred
3389 >>> substitutes = [3, 4] # Splice in these items
3390 >>> list(replace(iterable, pred, substitutes, window_size=window_size))
3391 [3, 4, 5, 3, 4, 5]
3393 *pred* may receive fewer than *window_size* arguments at the end of
3394 the iterable and should be able to handle this.
3396 """
3397 if window_size < 1:
3398 raise ValueError('window_size must be at least 1')
3400 # Save the substitutes iterable, since it's used more than once
3401 substitutes = tuple(substitutes)
3403 # Add padding such that the number of windows matches the length of the
3404 # iterable
3405 it = chain(iterable, repeat(_marker, window_size - 1))
3406 windows = windowed(it, window_size)
3408 n = 0
3409 for w in windows:
3410 # Strip any _marker padding so pred never sees internal sentinels.
3411 # Near the end of the iterable, pred will receive fewer arguments.
3412 args = tuple(x for x in w if x is not _marker)
3414 # If the current window matches our predicate (and we haven't hit
3415 # our maximum number of replacements), splice in the substitutes
3416 # and then consume the following windows that overlap with this one.
3417 # For example, if the iterable is (0, 1, 2, 3, 4...)
3418 # and the window size is 2, we have (0, 1), (1, 2), (2, 3)...
3419 # If the predicate matches on (0, 1), we need to zap (0, 1) and (1, 2)
3420 if args and pred(*args):
3421 if (count is None) or (n < count):
3422 n += 1
3423 yield from substitutes
3424 consume(windows, window_size - 1)
3425 continue
3427 # If there was no match (or we've reached the replacement limit),
3428 # yield the first item from the window.
3429 if args:
3430 yield args[0]
3433def partitions(iterable):
3434 """Yield all possible order-preserving partitions of *iterable*.
3436 >>> iterable = 'abc'
3437 >>> for part in partitions(iterable):
3438 ... print([''.join(p) for p in part])
3439 ['abc']
3440 ['a', 'bc']
3441 ['ab', 'c']
3442 ['a', 'b', 'c']
3444 This is unrelated to :func:`partition`.
3446 """
3447 sequence = list(iterable)
3448 n = len(sequence)
3449 for i in powerset(range(1, n)):
3450 yield [sequence[i:j] for i, j in zip((0,) + i, i + (n,))]
3453def set_partitions(iterable, k=None, min_size=None, max_size=None):
3454 """
3455 Yield the set partitions of *iterable* into *k* parts. Set partitions are
3456 not order-preserving.
3458 >>> iterable = 'abc'
3459 >>> for part in set_partitions(iterable, 2):
3460 ... print([''.join(p) for p in part])
3461 ['a', 'bc']
3462 ['ab', 'c']
3463 ['b', 'ac']
3466 If *k* is not given, every set partition is generated.
3468 >>> iterable = 'abc'
3469 >>> for part in set_partitions(iterable):
3470 ... print([''.join(p) for p in part])
3471 ['abc']
3472 ['a', 'bc']
3473 ['ab', 'c']
3474 ['b', 'ac']
3475 ['a', 'b', 'c']
3477 if *min_size* and/or *max_size* are given, the minimum and/or maximum size
3478 per block in partition is set.
3480 >>> iterable = 'abc'
3481 >>> for part in set_partitions(iterable, min_size=2):
3482 ... print([''.join(p) for p in part])
3483 ['abc']
3484 >>> for part in set_partitions(iterable, max_size=2):
3485 ... print([''.join(p) for p in part])
3486 ['a', 'bc']
3487 ['ab', 'c']
3488 ['b', 'ac']
3489 ['a', 'b', 'c']
3491 """
3492 L = list(iterable)
3493 n = len(L)
3494 if k is not None:
3495 if k < 1:
3496 raise ValueError(
3497 "Can't partition in a negative or zero number of groups"
3498 )
3499 elif k > n:
3500 return
3502 min_size = min_size if min_size is not None else 0
3503 max_size = max_size if max_size is not None else n
3504 if min_size > max_size:
3505 return
3507 def set_partitions_helper(L, k):
3508 n = len(L)
3509 if k == 1:
3510 yield [L]
3511 elif n == k:
3512 yield [[s] for s in L]
3513 else:
3514 e, *M = L
3515 for p in set_partitions_helper(M, k - 1):
3516 yield [[e], *p]
3517 for p in set_partitions_helper(M, k):
3518 for i in range(len(p)):
3519 yield p[:i] + [[e] + p[i]] + p[i + 1 :]
3521 if k is None:
3522 for k in range(1, n + 1):
3523 yield from filter(
3524 lambda z: all(min_size <= len(bk) <= max_size for bk in z),
3525 set_partitions_helper(L, k),
3526 )
3527 else:
3528 yield from filter(
3529 lambda z: all(min_size <= len(bk) <= max_size for bk in z),
3530 set_partitions_helper(L, k),
3531 )
3534class time_limited:
3535 """
3536 Yield items from *iterable* until *limit_seconds* have passed.
3537 If the time limit expires before all items have been yielded, the
3538 ``timed_out`` parameter will be set to ``True``.
3540 >>> from time import sleep
3541 >>> def generator():
3542 ... yield 1
3543 ... yield 2
3544 ... sleep(0.2)
3545 ... yield 3
3546 >>> iterable = time_limited(0.1, generator())
3547 >>> list(iterable)
3548 [1, 2]
3549 >>> iterable.timed_out
3550 True
3552 Note that the time is checked before each item is yielded, and iteration
3553 stops if the time elapsed is greater than *limit_seconds*. If your time
3554 limit is 1 second, but it takes 2 seconds to generate the first item from
3555 the iterable, the function will run for 2 seconds and not yield anything.
3556 As a special case, when *limit_seconds* is zero, the iterator never
3557 returns anything.
3559 """
3561 def __init__(self, limit_seconds, iterable):
3562 if limit_seconds < 0:
3563 raise ValueError('limit_seconds must be positive')
3564 self.limit_seconds = limit_seconds
3565 self._iterator = iter(iterable)
3566 self._start_time = monotonic()
3567 self.timed_out = False
3569 def __iter__(self):
3570 return self
3572 def __next__(self):
3573 if self.limit_seconds == 0:
3574 self.timed_out = True
3575 raise StopIteration
3576 item = next(self._iterator)
3577 if monotonic() - self._start_time > self.limit_seconds:
3578 self.timed_out = True
3579 raise StopIteration
3581 return item
3584def only(iterable, default=None, too_long=None):
3585 """If *iterable* has only one item, return it.
3586 If it has zero items, return *default*.
3587 If it has more than one item, raise the exception given by *too_long*,
3588 which is ``ValueError`` by default.
3590 >>> only([], default='missing')
3591 'missing'
3592 >>> only([1])
3593 1
3594 >>> only([1, 2]) # doctest: +IGNORE_EXCEPTION_DETAIL
3595 Traceback (most recent call last):
3596 ...
3597 ValueError: Expected exactly one item in iterable, but got 1, 2,
3598 and perhaps more.'
3599 >>> only([1, 2], too_long=TypeError) # doctest: +IGNORE_EXCEPTION_DETAIL
3600 Traceback (most recent call last):
3601 ...
3602 TypeError
3604 Note that :func:`only` attempts to advance *iterable* twice to ensure there
3605 is only one item. See :func:`spy` or :func:`peekable` to check
3606 iterable contents less destructively.
3608 """
3609 iterator = iter(iterable)
3610 for first in iterator:
3611 for second in iterator:
3612 msg = (
3613 f'Expected exactly one item in iterable, but got {first!r}, '
3614 f'{second!r}, and perhaps more.'
3615 )
3616 raise too_long or ValueError(msg)
3617 return first
3618 return default
3621def ichunked(iterable, n):
3622 """Break *iterable* into sub-iterables with *n* elements each.
3623 :func:`ichunked` is like :func:`chunked`, but it yields iterables
3624 instead of lists.
3626 If the sub-iterables are read in order, the elements of *iterable*
3627 won't be stored in memory.
3628 If they are read out of order, :func:`itertools.tee` is used to cache
3629 elements as necessary.
3631 >>> from itertools import count
3632 >>> all_chunks = ichunked(count(), 4)
3633 >>> c_1, c_2, c_3 = next(all_chunks), next(all_chunks), next(all_chunks)
3634 >>> list(c_2) # c_1's elements have been cached; c_3's haven't been
3635 [4, 5, 6, 7]
3636 >>> list(c_1)
3637 [0, 1, 2, 3]
3638 >>> list(c_3)
3639 [8, 9, 10, 11]
3641 """
3642 iterator = iter(iterable)
3643 for first in iterator:
3644 rest = islice(iterator, n - 1)
3645 cache, cacher = tee(rest)
3646 yield chain([first], rest, cache)
3647 consume(cacher)
3650def iequals(*iterables):
3651 """Return ``True`` if all given *iterables* are equal to each other,
3652 which means that they contain the same elements in the same order.
3654 The function is useful for comparing iterables of different data types
3655 or iterables that do not support equality checks.
3657 >>> iequals("abc", ['a', 'b', 'c'], ('a', 'b', 'c'), iter("abc"))
3658 True
3660 >>> iequals("abc", "acb")
3661 False
3663 Not to be confused with :func:`all_equal`, which checks whether all
3664 elements of iterable are equal to each other.
3666 """
3667 try:
3668 return all(map(all_equal, zip(*iterables, strict=True)))
3669 except ValueError:
3670 return False
3673def distinct_combinations(iterable, r):
3674 """Yield the distinct combinations of *r* items taken from *iterable*.
3676 >>> list(distinct_combinations([0, 0, 1], 2))
3677 [(0, 0), (0, 1)]
3679 Equivalent to ``set(combinations(iterable))``, except duplicates are not
3680 generated and thrown away. For larger input sequences this is much more
3681 efficient.
3683 """
3684 if r < 0:
3685 raise ValueError('r must be non-negative')
3686 elif r == 0:
3687 yield ()
3688 return
3689 pool = tuple(iterable)
3690 generators = [unique_everseen(enumerate(pool), key=itemgetter(1))]
3691 current_combo = [None] * r
3692 level = 0
3693 while generators:
3694 try:
3695 cur_idx, p = next(generators[-1])
3696 except StopIteration:
3697 generators.pop()
3698 level -= 1
3699 continue
3700 current_combo[level] = p
3701 if level + 1 == r:
3702 yield tuple(current_combo)
3703 else:
3704 generators.append(
3705 unique_everseen(
3706 enumerate(pool[cur_idx + 1 :], cur_idx + 1),
3707 key=itemgetter(1),
3708 )
3709 )
3710 level += 1
3713def filter_except(validator, iterable, *exceptions):
3714 """Yield the items from *iterable* for which the *validator* function does
3715 not raise one of the specified *exceptions*.
3717 *validator* is called for each item in *iterable*.
3718 It should be a function that accepts one argument and raises an exception
3719 if that item is not valid.
3721 >>> iterable = ['1', '2', 'three', '4', None]
3722 >>> list(filter_except(int, iterable, ValueError, TypeError))
3723 ['1', '2', '4']
3725 If an exception other than one given by *exceptions* is raised by
3726 *validator*, it is raised like normal.
3727 """
3728 for item in iterable:
3729 try:
3730 validator(item)
3731 except exceptions:
3732 pass
3733 else:
3734 yield item
3737def map_except(function, iterable, *exceptions):
3738 """Transform each item from *iterable* with *function* and yield the
3739 result, unless *function* raises one of the specified *exceptions*.
3741 *function* is called to transform each item in *iterable*.
3742 It should accept one argument.
3744 >>> iterable = ['1', '2', 'three', '4', None]
3745 >>> list(map_except(int, iterable, ValueError, TypeError))
3746 [1, 2, 4]
3748 If an exception other than one given by *exceptions* is raised by
3749 *function*, it is raised like normal.
3750 """
3751 for item in iterable:
3752 try:
3753 yield function(item)
3754 except exceptions:
3755 pass
3758def map_if(iterable, pred, func, func_else=None):
3759 """Evaluate each item from *iterable* using *pred*. If the result is
3760 equivalent to ``True``, transform the item with *func* and yield it.
3761 Otherwise, transform the item with *func_else* and yield it.
3763 *pred*, *func*, and *func_else* should each be functions that accept
3764 one argument. By default, *func_else* is the identity function.
3766 >>> from math import sqrt
3767 >>> iterable = list(range(-5, 5))
3768 >>> iterable
3769 [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4]
3770 >>> list(map_if(iterable, lambda x: x > 3, lambda x: 'toobig'))
3771 [-5, -4, -3, -2, -1, 0, 1, 2, 3, 'toobig']
3772 >>> list(map_if(iterable, lambda x: x >= 0,
3773 ... lambda x: f'{sqrt(x):.2f}', lambda x: None))
3774 [None, None, None, None, None, '0.00', '1.00', '1.41', '1.73', '2.00']
3775 """
3777 if func_else is None:
3778 for item in iterable:
3779 yield func(item) if pred(item) else item
3781 else:
3782 for item in iterable:
3783 yield func(item) if pred(item) else func_else(item)
3786def _sample_unweighted(iterator, k, strict):
3787 # Algorithm L in the 1994 paper by Kim-Hung Li:
3788 # "Reservoir-Sampling Algorithms of Time Complexity O(n(1+log(N/n)))".
3790 reservoir = list(islice(iterator, k))
3791 if strict and len(reservoir) < k:
3792 raise ValueError('Sample larger than population')
3793 W = 1.0
3795 with suppress(StopIteration):
3796 while True:
3797 W *= random() ** (1 / k)
3798 skip = floor(log(random()) / log1p(-W))
3799 element = next(islice(iterator, skip, None))
3800 reservoir[randrange(k)] = element
3802 shuffle(reservoir)
3803 return reservoir
3806def _sample_weighted(iterator, k, weights, strict):
3807 # Implementation of "A-ExpJ" from the 2006 paper by Efraimidis et al. :
3808 # "Weighted random sampling with a reservoir".
3810 # Log-transform for numerical stability for weights that are small/large
3811 weight_keys = (log(random()) / weight for weight in weights)
3813 # Fill up the reservoir (collection of samples) with the first `k`
3814 # weight-keys and elements, then heapify the list.
3815 reservoir = take(k, zip(weight_keys, iterator))
3816 if strict and len(reservoir) < k:
3817 raise ValueError('Sample larger than population')
3819 heapify(reservoir)
3821 # The number of jumps before changing the reservoir is a random variable
3822 # with an exponential distribution. Sample it using random() and logs.
3823 smallest_weight_key, _ = reservoir[0]
3824 weights_to_skip = log(random()) / smallest_weight_key
3826 for weight, element in zip(weights, iterator):
3827 if weight >= weights_to_skip:
3828 # The notation here is consistent with the paper, but we store
3829 # the weight-keys in log-space for better numerical stability.
3830 smallest_weight_key, _ = reservoir[0]
3831 t_w = exp(weight * smallest_weight_key)
3832 r_2 = uniform(t_w, 1) # generate U(t_w, 1)
3833 weight_key = log(r_2) / weight
3834 heapreplace(reservoir, (weight_key, element))
3835 smallest_weight_key, _ = reservoir[0]
3836 weights_to_skip = log(random()) / smallest_weight_key
3837 else:
3838 weights_to_skip -= weight
3840 ret = [element for weight_key, element in reservoir]
3841 shuffle(ret)
3842 return ret
3845def _sample_counted(population, k, counts, strict):
3846 element = None
3847 remaining = 0
3849 def feed(i):
3850 # Advance *i* steps ahead and consume an element
3851 nonlocal element, remaining
3853 while i + 1 > remaining:
3854 i = i - remaining
3855 element = next(population)
3856 remaining = next(counts)
3857 remaining -= i + 1
3858 return element
3860 with suppress(StopIteration):
3861 reservoir = []
3862 for _ in range(k):
3863 reservoir.append(feed(0))
3865 if strict and len(reservoir) < k:
3866 raise ValueError('Sample larger than population')
3868 with suppress(StopIteration):
3869 W = 1.0
3870 while True:
3871 W *= random() ** (1 / k)
3872 skip = floor(log(random()) / log1p(-W))
3873 element = feed(skip)
3874 reservoir[randrange(k)] = element
3876 shuffle(reservoir)
3877 return reservoir
3880def sample(iterable, k, weights=None, *, counts=None, strict=False):
3881 """Return a *k*-length list of elements chosen (without replacement)
3882 from the *iterable*.
3884 Similar to :func:`random.sample`, but works on inputs that aren't
3885 indexable (such as sets and dictionaries) and on inputs where the
3886 size isn't known in advance (such as generators).
3888 >>> iterable = range(100)
3889 >>> sample(iterable, 5) # doctest: +SKIP
3890 [81, 60, 96, 16, 4]
3892 For iterables with repeated elements, you may supply *counts* to
3893 indicate the repeats.
3895 >>> iterable = ['a', 'b']
3896 >>> counts = [3, 4] # Equivalent to 'a', 'a', 'a', 'b', 'b', 'b', 'b'
3897 >>> sample(iterable, k=3, counts=counts) # doctest: +SKIP
3898 ['a', 'a', 'b']
3900 An iterable with *weights* may be given:
3902 >>> iterable = range(100)
3903 >>> weights = (i * i + 1 for i in range(100))
3904 >>> sampled = sample(iterable, 5, weights=weights) # doctest: +SKIP
3905 [79, 67, 74, 66, 78]
3907 Weighted selections are made without replacement.
3908 After an element is selected, it is removed from the pool and the
3909 relative weights of the other elements increase (this
3910 does not match the behavior of :func:`random.sample`'s *counts*
3911 parameter). Note that *weights* may not be used with *counts*.
3913 If the length of *iterable* is less than *k*,
3914 ``ValueError`` is raised if *strict* is ``True`` and
3915 all elements are returned (in shuffled order) if *strict* is ``False``.
3917 By default, the `Algorithm L <https://w.wiki/ANrM>`__ reservoir sampling
3918 technique is used. When *weights* are provided,
3919 `Algorithm A-ExpJ <https://w.wiki/ANrS>`__ is used instead.
3921 Notes on reproducibility:
3923 * The algorithms rely on inexact floating-point functions provided
3924 by the underlying math library (e.g. ``log``, ``log1p``, and ``pow``).
3925 Those functions can `produce slightly different results
3926 <https://members.loria.fr/PZimmermann/papers/accuracy.pdf>`_ on
3927 different builds. Accordingly, selections can vary across builds
3928 even for the same seed.
3930 * The algorithms loop over the input and make selections based on
3931 ordinal position, so selections from unordered collections (such as
3932 sets) won't reproduce across sessions on the same platform using the
3933 same seed. For example, this won't reproduce::
3935 >> seed(8675309)
3936 >> sample(set('abcdefghijklmnopqrstuvwxyz'), 10)
3937 ['c', 'p', 'e', 'w', 's', 'a', 'j', 'd', 'n', 't']
3939 """
3940 iterator = iter(iterable)
3942 if k < 0:
3943 raise ValueError('k must be non-negative')
3945 if k == 0:
3946 return []
3948 if weights is not None and counts is not None:
3949 raise TypeError('weights and counts are mutually exclusive')
3951 elif weights is not None:
3952 weights = iter(weights)
3953 return _sample_weighted(iterator, k, weights, strict)
3955 elif counts is not None:
3956 counts = iter(counts)
3957 return _sample_counted(iterator, k, counts, strict)
3959 else:
3960 return _sample_unweighted(iterator, k, strict)
3963def is_sorted(iterable, key=None, reverse=False, strict=False):
3964 """Returns ``True`` if the items of iterable are in sorted order, and
3965 ``False`` otherwise. *key* and *reverse* have the same meaning that they do
3966 in the built-in :func:`sorted` function.
3968 >>> is_sorted(['1', '2', '3', '4', '5'], key=int)
3969 True
3970 >>> is_sorted([5, 4, 3, 1, 2], reverse=True)
3971 False
3973 If *strict*, tests for strict sorting, that is, returns ``False`` if equal
3974 elements are found:
3976 >>> is_sorted([1, 2, 2])
3977 True
3978 >>> is_sorted([1, 2, 2], strict=True)
3979 False
3981 The function returns ``False`` after encountering the first out-of-order
3982 item, which means it may produce results that differ from the built-in
3983 :func:`sorted` function for objects with unusual comparison dynamics
3984 (like ``math.nan``). If there are no out-of-order items, the iterable is
3985 exhausted.
3986 """
3987 it = iterable if (key is None) else map(key, iterable)
3988 a, b = tee(it)
3989 next(b, None)
3990 if reverse:
3991 b, a = a, b
3992 return all(map(lt, a, b)) if strict else not any(map(lt, b, a))
3995class AbortThread(BaseException):
3996 pass
3999class callback_iter:
4000 """Convert a function that uses callbacks to an iterator.
4002 .. warning::
4004 This function is deprecated as of version 11.0.0. It will be removed in a future
4005 major release.
4007 Let *func* be a function that takes a `callback` keyword argument.
4008 For example:
4010 >>> def func(callback=None):
4011 ... for i, c in [(1, 'a'), (2, 'b'), (3, 'c')]:
4012 ... if callback:
4013 ... callback(i, c)
4014 ... return 4
4017 Use ``with callback_iter(func)`` to get an iterator over the parameters
4018 that are delivered to the callback.
4020 >>> with callback_iter(func) as it:
4021 ... for args, kwargs in it:
4022 ... print(args)
4023 (1, 'a')
4024 (2, 'b')
4025 (3, 'c')
4027 The function will be called in a background thread. The ``done`` property
4028 indicates whether it has completed execution.
4030 >>> it.done
4031 True
4033 If it completes successfully, its return value will be available
4034 in the ``result`` property.
4036 >>> it.result
4037 4
4039 Notes:
4041 * If the function uses some keyword argument besides ``callback``, supply
4042 *callback_kwd*.
4043 * If it finished executing, but raised an exception, accessing the
4044 ``result`` property will raise the same exception.
4045 * If it hasn't finished executing, accessing the ``result``
4046 property from within the ``with`` block will raise ``RuntimeError``.
4047 * If it hasn't finished executing, accessing the ``result`` property from
4048 outside the ``with`` block will raise a
4049 ``more_itertools.AbortThread`` exception.
4050 * Provide *wait_seconds* to adjust how frequently the it is polled for
4051 output.
4053 """
4055 def __init__(self, func, callback_kwd='callback', wait_seconds=0.1):
4056 self._func = func
4057 self._callback_kwd = callback_kwd
4058 self._aborted = False
4059 self._future = None
4060 self._wait_seconds = wait_seconds
4062 # Lazily import concurrent.future
4063 self._module = __import__('concurrent.futures').futures
4064 self._executor = self._module.ThreadPoolExecutor(max_workers=1)
4065 self._iterator = self._reader()
4067 def __enter__(self):
4068 return self
4070 def __exit__(self, exc_type, exc_value, traceback):
4071 self._aborted = True
4072 self._executor.shutdown()
4074 def __iter__(self):
4075 return self
4077 def __next__(self):
4078 return next(self._iterator)
4080 @property
4081 def done(self):
4082 if self._future is None:
4083 return False
4084 return self._future.done()
4086 @property
4087 def result(self):
4088 if self._future:
4089 try:
4090 return self._future.result(timeout=0)
4091 except self._module.TimeoutError:
4092 pass
4094 raise RuntimeError('Function has not yet completed')
4096 def _reader(self):
4097 q = Queue()
4099 def callback(*args, **kwargs):
4100 if self._aborted:
4101 raise AbortThread('canceled by user')
4103 q.put((args, kwargs))
4105 self._future = self._executor.submit(
4106 self._func, **{self._callback_kwd: callback}
4107 )
4109 while True:
4110 try:
4111 item = q.get(timeout=self._wait_seconds)
4112 except Empty:
4113 pass
4114 else:
4115 q.task_done()
4116 yield item
4118 if self._future.done():
4119 break
4121 remaining = []
4122 while True:
4123 try:
4124 item = q.get_nowait()
4125 except Empty:
4126 break
4127 else:
4128 q.task_done()
4129 remaining.append(item)
4130 q.join()
4131 yield from remaining
4134def windowed_complete(iterable, n):
4135 """
4136 Yield ``(beginning, middle, end)`` tuples, where:
4138 * Each ``middle`` has *n* items from *iterable*
4139 * Each ``beginning`` has the items before the ones in ``middle``
4140 * Each ``end`` has the items after the ones in ``middle``
4142 >>> iterable = range(7)
4143 >>> n = 3
4144 >>> for beginning, middle, end in windowed_complete(iterable, n):
4145 ... print(beginning, middle, end)
4146 () (0, 1, 2) (3, 4, 5, 6)
4147 (0,) (1, 2, 3) (4, 5, 6)
4148 (0, 1) (2, 3, 4) (5, 6)
4149 (0, 1, 2) (3, 4, 5) (6,)
4150 (0, 1, 2, 3) (4, 5, 6) ()
4152 Note that *n* must be at least 0 and most equal to the length of
4153 *iterable*.
4155 This function will exhaust the iterable and may require significant
4156 storage.
4157 """
4158 if n < 0:
4159 raise ValueError('n must be >= 0')
4161 seq = tuple(iterable)
4162 size = len(seq)
4164 if n > size:
4165 raise ValueError('n must be <= len(seq)')
4167 for i in range(size - n + 1):
4168 beginning = seq[:i]
4169 middle = seq[i : i + n]
4170 end = seq[i + n :]
4171 yield beginning, middle, end
4174def all_unique(iterable, key=None):
4175 """
4176 Returns ``True`` if all the elements of *iterable* are unique (no two
4177 elements are equal).
4179 >>> all_unique('ABCB')
4180 False
4182 If a *key* function is specified, it will be used to make comparisons.
4184 >>> all_unique('ABCb')
4185 True
4186 >>> all_unique('ABCb', str.lower)
4187 False
4189 The function returns as soon as the first non-unique element is
4190 encountered. Iterables with a mix of hashable and unhashable items can
4191 be used, but the function will be slower for unhashable items.
4192 """
4193 seenset = set()
4194 seenset_add = seenset.add
4195 seenlist = []
4196 seenlist_add = seenlist.append
4197 for element in map(key, iterable) if key else iterable:
4198 try:
4199 if element in seenset:
4200 return False
4201 seenset_add(element)
4202 except TypeError:
4203 if element in seenlist:
4204 return False
4205 seenlist_add(element)
4206 return True
4209def nth_product(index, *iterables, repeat=1):
4210 """Equivalent to ``list(product(*iterables, repeat=repeat))[index]``.
4212 The products of *iterables* can be ordered lexicographically.
4213 :func:`nth_product` computes the product at sort position *index* without
4214 computing the previous products.
4216 >>> nth_product(8, range(2), range(2), range(2), range(2))
4217 (1, 0, 0, 0)
4219 The *repeat* keyword argument specifies the number of repetitions
4220 of the iterables. The above example is equivalent to::
4222 >>> nth_product(8, range(2), repeat=4)
4223 (1, 0, 0, 0)
4225 ``IndexError`` will be raised if the given *index* is invalid.
4226 """
4227 pools = tuple(map(tuple, reversed(iterables))) * repeat
4228 ns = tuple(map(len, pools))
4230 c = prod(ns)
4232 if index < 0:
4233 index += c
4234 if not 0 <= index < c:
4235 raise IndexError
4237 result = []
4238 for pool, n in zip(pools, ns):
4239 result.append(pool[index % n])
4240 index //= n
4242 return tuple(reversed(result))
4245def nth_permutation(iterable, r, index):
4246 """Equivalent to ``list(permutations(iterable, r))[index]```
4248 The subsequences of *iterable* that are of length *r* where order is
4249 important can be ordered lexicographically. :func:`nth_permutation`
4250 computes the subsequence at sort position *index* directly, without
4251 computing the previous subsequences.
4253 >>> nth_permutation('ghijk', 2, 5)
4254 ('h', 'i')
4256 ``ValueError`` will be raised If *r* is negative.
4257 ``IndexError`` will be raised if the given *index* is invalid.
4258 """
4259 pool = list(iterable)
4260 n = len(pool)
4261 if r is None:
4262 r = n
4263 c = perm(n, r)
4265 if index < 0:
4266 index += c
4267 if not 0 <= index < c:
4268 raise IndexError
4270 result = [0] * r
4271 q = index * factorial(n) // c if r < n else index
4272 for d in range(1, n + 1):
4273 q, i = divmod(q, d)
4274 if 0 <= n - d < r:
4275 result[n - d] = i
4276 if q == 0:
4277 break
4279 return tuple(map(pool.pop, result))
4282def nth_combination_with_replacement(iterable, r, index):
4283 """Equivalent to
4284 ``list(combinations_with_replacement(iterable, r))[index]``.
4287 The subsequences with repetition of *iterable* that are of length *r* can
4288 be ordered lexicographically. :func:`nth_combination_with_replacement`
4289 computes the subsequence at sort position *index* directly, without
4290 computing the previous subsequences with replacement.
4292 >>> nth_combination_with_replacement(range(5), 3, 5)
4293 (0, 1, 1)
4295 ``ValueError`` will be raised If *r* is negative.
4296 ``IndexError`` will be raised if the given *index* is invalid.
4297 """
4298 pool = tuple(iterable)
4299 n = len(pool)
4300 if r < 0:
4301 raise ValueError
4302 c = comb(n + r - 1, r) if n else 0 if r else 1
4304 if index < 0:
4305 index += c
4306 if not 0 <= index < c:
4307 raise IndexError
4309 result = []
4310 i = 0
4311 while r:
4312 r -= 1
4313 while n >= 0:
4314 num_combs = comb(n + r - 1, r)
4315 if index < num_combs:
4316 break
4317 n -= 1
4318 i += 1
4319 index -= num_combs
4320 result.append(pool[i])
4322 return tuple(result)
4325def value_chain(*args):
4326 """Yield all arguments passed to the function in the same order in which
4327 they were passed. If an argument itself is iterable then iterate over its
4328 values.
4330 >>> list(value_chain(1, 2, 3, [4, 5, 6]))
4331 [1, 2, 3, 4, 5, 6]
4333 Binary and text strings are not considered iterable and are emitted
4334 as-is:
4336 >>> list(value_chain('12', '34', ['56', '78']))
4337 ['12', '34', '56', '78']
4339 Pre- or postpend a single element to an iterable:
4341 >>> list(value_chain(1, [2, 3, 4, 5, 6]))
4342 [1, 2, 3, 4, 5, 6]
4343 >>> list(value_chain([1, 2, 3, 4, 5], 6))
4344 [1, 2, 3, 4, 5, 6]
4346 Multiple levels of nesting are not flattened.
4348 """
4349 scalar_types = (str, bytes)
4350 for value in args:
4351 if isinstance(value, scalar_types):
4352 yield value
4353 continue
4354 try:
4355 yield from value
4356 except TypeError:
4357 yield value
4360def product_index(element, *iterables, repeat=1):
4361 """Equivalent to ``list(product(*iterables, repeat=repeat)).index(tuple(element))``
4363 The products of *iterables* can be ordered lexicographically.
4364 :func:`product_index` computes the first index of *element* without
4365 computing the previous products.
4367 >>> product_index([8, 2], range(10), range(5))
4368 42
4370 The *repeat* keyword argument specifies the number of repetitions
4371 of the iterables::
4373 >>> product_index([8, 0, 7], range(10), repeat=3)
4374 807
4376 ``ValueError`` will be raised if the given *element* isn't in the product
4377 of *args*.
4378 """
4379 elements = tuple(element)
4380 pools = tuple(map(tuple, iterables)) * repeat
4381 if len(elements) != len(pools):
4382 raise ValueError('element is not a product of args')
4384 index = 0
4385 for elem, pool in zip(elements, pools):
4386 index = index * len(pool) + pool.index(elem)
4387 return index
4390def combination_index(element, iterable):
4391 """Equivalent to ``list(combinations(iterable, r)).index(element)``
4393 The subsequences of *iterable* that are of length *r* can be ordered
4394 lexicographically. :func:`combination_index` computes the index of the
4395 first *element*, without computing the previous combinations.
4397 >>> combination_index('adf', 'abcdefg')
4398 10
4400 ``ValueError`` will be raised if the given *element* isn't one of the
4401 combinations of *iterable*.
4402 """
4403 element = enumerate(element)
4404 k, y = next(element, (None, None))
4405 if k is None:
4406 return 0
4408 indexes = []
4409 pool = enumerate(iterable)
4410 for n, x in pool:
4411 if x == y:
4412 indexes.append(n)
4413 tmp, y = next(element, (None, None))
4414 if tmp is None:
4415 break
4416 else:
4417 k = tmp
4418 else:
4419 raise ValueError('element is not a combination of iterable')
4421 n, _ = last(pool, default=(n, None))
4423 index = 1
4424 for i, j in enumerate(reversed(indexes), start=1):
4425 j = n - j
4426 if i <= j:
4427 index += comb(j, i)
4429 return comb(n + 1, k + 1) - index
4432def combination_with_replacement_index(element, iterable):
4433 """Equivalent to
4434 ``list(combinations_with_replacement(iterable, r)).index(element)``
4436 The subsequences with repetition of *iterable* that are of length *r* can
4437 be ordered lexicographically. :func:`combination_with_replacement_index`
4438 computes the index of the first *element*, without computing the previous
4439 combinations with replacement.
4441 >>> combination_with_replacement_index('adf', 'abcdefg')
4442 20
4444 ``ValueError`` will be raised if the given *element* isn't one of the
4445 combinations with replacement of *iterable*.
4446 """
4447 element = tuple(element)
4448 l = len(element)
4449 element = enumerate(element)
4451 k, y = next(element, (None, None))
4452 if k is None:
4453 return 0
4455 indexes = []
4456 pool = tuple(iterable)
4457 for n, x in enumerate(pool):
4458 while x == y:
4459 indexes.append(n)
4460 tmp, y = next(element, (None, None))
4461 if tmp is None:
4462 break
4463 else:
4464 k = tmp
4465 if y is None:
4466 break
4467 else:
4468 raise ValueError(
4469 'element is not a combination with replacement of iterable'
4470 )
4472 n = len(pool)
4473 occupations = [0] * n
4474 for p in indexes:
4475 occupations[p] += 1
4477 index = 0
4478 cumulative_sum = 0
4479 for k in range(1, n):
4480 cumulative_sum += occupations[k - 1]
4481 j = l + n - 1 - k - cumulative_sum
4482 i = n - k
4483 if i <= j:
4484 index += comb(j, i)
4486 return index
4489def permutation_index(element, iterable):
4490 """Equivalent to ``list(permutations(iterable, r)).index(element)```
4492 The subsequences of *iterable* that are of length *r* where order is
4493 important can be ordered lexicographically. :func:`permutation_index`
4494 computes the index of the first *element* directly, without computing
4495 the previous permutations.
4497 >>> permutation_index([1, 3, 2], range(5))
4498 19
4500 ``ValueError`` will be raised if the given *element* isn't one of the
4501 permutations of *iterable*.
4502 """
4503 index = 0
4504 pool = list(iterable)
4505 for i, x in zip(range(len(pool), -1, -1), element):
4506 r = pool.index(x)
4507 index = index * i + r
4508 del pool[r]
4510 return index
4513class countable:
4514 """Wrap *iterable* and keep a count of how many items have been consumed.
4516 The ``items_seen`` attribute starts at ``0`` and increments as the iterable
4517 is consumed:
4519 >>> iterable = map(str, range(10))
4520 >>> it = countable(iterable)
4521 >>> it.items_seen
4522 0
4523 >>> next(it), next(it)
4524 ('0', '1')
4525 >>> list(it)
4526 ['2', '3', '4', '5', '6', '7', '8', '9']
4527 >>> it.items_seen
4528 10
4529 """
4531 def __init__(self, iterable):
4532 self._iterator = iter(iterable)
4533 self.items_seen = 0
4535 def __iter__(self):
4536 return self
4538 def __next__(self):
4539 item = next(self._iterator)
4540 self.items_seen += 1
4542 return item
4545def chunked_even(iterable, n):
4546 """Break *iterable* into lists of approximately length *n*.
4547 Items are distributed such the lengths of the lists differ by at most
4548 1 item.
4550 >>> iterable = [1, 2, 3, 4, 5, 6, 7]
4551 >>> n = 3
4552 >>> list(chunked_even(iterable, n)) # List lengths: 3, 2, 2
4553 [[1, 2, 3], [4, 5], [6, 7]]
4554 >>> list(chunked(iterable, n)) # List lengths: 3, 3, 1
4555 [[1, 2, 3], [4, 5, 6], [7]]
4557 """
4558 iterator = iter(iterable)
4560 # Initialize a buffer to process the chunks while keeping
4561 # some back to fill any underfilled chunks
4562 min_buffer = (n - 1) * (n - 2)
4563 buffer = list(islice(iterator, min_buffer))
4565 # Append items until we have a completed chunk
4566 for _ in islice(map(buffer.append, iterator), n, None, n):
4567 yield buffer[:n]
4568 del buffer[:n]
4570 # Check if any chunks need addition processing
4571 if not buffer:
4572 return
4573 length = len(buffer)
4575 # Chunks are either size `full_size <= n` or `partial_size = full_size - 1`
4576 q, r = divmod(length, n)
4577 num_lists = q + (1 if r > 0 else 0)
4578 q, r = divmod(length, num_lists)
4579 full_size = q + (1 if r > 0 else 0)
4580 partial_size = full_size - 1
4581 num_full = length - partial_size * num_lists
4583 # Yield chunks of full size
4584 partial_start_idx = num_full * full_size
4585 if full_size > 0:
4586 for i in range(0, partial_start_idx, full_size):
4587 yield buffer[i : i + full_size]
4589 # Yield chunks of partial size
4590 if partial_size > 0:
4591 for i in range(partial_start_idx, length, partial_size):
4592 yield buffer[i : i + partial_size]
4595def zip_broadcast(*objects, scalar_types=(str, bytes), strict=False):
4596 """A version of :func:`zip` that "broadcasts" any scalar
4597 (i.e., non-iterable) items into output tuples.
4599 >>> iterable_1 = [1, 2, 3]
4600 >>> iterable_2 = ['a', 'b', 'c']
4601 >>> scalar = '_'
4602 >>> list(zip_broadcast(iterable_1, iterable_2, scalar))
4603 [(1, 'a', '_'), (2, 'b', '_'), (3, 'c', '_')]
4605 The *scalar_types* keyword argument determines what types are considered
4606 scalar. It is set to ``(str, bytes)`` by default. Set it to ``None`` to
4607 treat strings and byte strings as iterable:
4609 >>> list(zip_broadcast('abc', 0, 'xyz', scalar_types=None))
4610 [('a', 0, 'x'), ('b', 0, 'y'), ('c', 0, 'z')]
4612 If the *strict* keyword argument is ``True``, then
4613 ``ValueError`` will be raised if any of the iterables have
4614 different lengths.
4615 """
4617 def is_scalar(obj):
4618 if scalar_types and isinstance(obj, scalar_types):
4619 return True
4620 try:
4621 iter(obj)
4622 except TypeError:
4623 return True
4624 else:
4625 return False
4627 size = len(objects)
4628 if not size:
4629 return
4631 new_item = [None] * size
4632 iterables, iterable_positions = [], []
4633 for i, obj in enumerate(objects):
4634 if is_scalar(obj):
4635 new_item[i] = obj
4636 else:
4637 iterables.append(iter(obj))
4638 iterable_positions.append(i)
4640 if not iterables:
4641 yield tuple(objects)
4642 return
4644 for item in zip(*iterables, strict=strict):
4645 for i, new_item[i] in zip(iterable_positions, item):
4646 pass
4647 yield tuple(new_item)
4650def unique_in_window(iterable, n, key=None):
4651 """Yield the items from *iterable* that haven't been seen recently.
4652 *n* is the size of the sliding window.
4654 >>> iterable = [0, 1, 0, 2, 3, 0]
4655 >>> n = 3
4656 >>> list(unique_in_window(iterable, n))
4657 [0, 1, 2, 3, 0]
4659 The *key* function, if provided, will be used to determine uniqueness:
4661 >>> list(unique_in_window('abAcda', 3, key=lambda x: x.lower()))
4662 ['a', 'b', 'c', 'd', 'a']
4664 Updates a sliding window no larger than n and yields a value
4665 if the item only occurs once in the updated window.
4667 When `n == 1`, *unique_in_window* is memoryless:
4669 >>> list(unique_in_window('aab', n=1))
4670 ['a', 'a', 'b']
4672 The items in *iterable* must be hashable.
4674 """
4675 if n <= 0:
4676 raise ValueError('n must be greater than 0')
4678 window = deque(maxlen=n)
4679 counts = Counter()
4680 use_key = key is not None
4682 for item in iterable:
4683 if len(window) == n:
4684 to_discard = window[0]
4685 if counts[to_discard] == 1:
4686 del counts[to_discard]
4687 else:
4688 counts[to_discard] -= 1
4690 k = key(item) if use_key else item
4691 if k not in counts:
4692 yield item
4693 counts[k] += 1
4694 window.append(k)
4697def duplicates_everseen(iterable, key=None):
4698 """Yield duplicate elements after their first appearance.
4700 >>> list(duplicates_everseen('mississippi'))
4701 ['s', 'i', 's', 's', 'i', 'p', 'i']
4702 >>> list(duplicates_everseen('AaaBbbCccAaa', str.lower))
4703 ['a', 'a', 'b', 'b', 'c', 'c', 'A', 'a', 'a']
4705 This function is analogous to :func:`unique_everseen` and is subject to
4706 the same performance considerations.
4708 """
4709 seen_set = set()
4710 seen_list = []
4711 use_key = key is not None
4713 for element in iterable:
4714 k = key(element) if use_key else element
4715 try:
4716 if k not in seen_set:
4717 seen_set.add(k)
4718 else:
4719 yield element
4720 except TypeError:
4721 if k not in seen_list:
4722 seen_list.append(k)
4723 else:
4724 yield element
4727def duplicates_justseen(iterable, key=None):
4728 """Yields serially-duplicate elements after their first appearance.
4730 >>> list(duplicates_justseen('mississippi'))
4731 ['s', 's', 'p']
4732 >>> list(duplicates_justseen('AaaBbbCccAaa', str.lower))
4733 ['a', 'a', 'b', 'b', 'c', 'c', 'a', 'a']
4735 This function is analogous to :func:`unique_justseen`.
4737 """
4738 return flatten(g for _, g in groupby(iterable, key) for _ in g)
4741def classify_unique(iterable, key=None):
4742 """Classify each element in terms of its uniqueness.
4744 For each element in the input iterable, return a 3-tuple consisting of:
4746 1. The element itself
4747 2. ``False`` if the element is equal to the one preceding it in the input,
4748 ``True`` otherwise (i.e. the equivalent of :func:`unique_justseen`)
4749 3. ``False`` if this element has been seen anywhere in the input before,
4750 ``True`` otherwise (i.e. the equivalent of :func:`unique_everseen`)
4752 >>> list(classify_unique('otto')) # doctest: +NORMALIZE_WHITESPACE
4753 [('o', True, True),
4754 ('t', True, True),
4755 ('t', False, False),
4756 ('o', True, False)]
4758 This function is analogous to :func:`unique_everseen` and is subject to
4759 the same performance considerations.
4761 """
4762 seen_set = set()
4763 seen_list = []
4764 use_key = key is not None
4765 previous = None
4767 for i, element in enumerate(iterable):
4768 k = key(element) if use_key else element
4769 is_unique_justseen = not i or previous != k
4770 previous = k
4771 is_unique_everseen = False
4772 try:
4773 if k not in seen_set:
4774 seen_set.add(k)
4775 is_unique_everseen = True
4776 except TypeError:
4777 if k not in seen_list:
4778 seen_list.append(k)
4779 is_unique_everseen = True
4780 yield element, is_unique_justseen, is_unique_everseen
4783def minmax(iterable_or_value, *others, key=None, default=_marker):
4784 """Returns both the smallest and largest items from an iterable
4785 or from two or more arguments.
4787 >>> minmax([3, 1, 5])
4788 (1, 5)
4790 >>> minmax(4, 2, 6)
4791 (2, 6)
4793 If a *key* function is provided, it will be used to transform the input
4794 items for comparison.
4796 >>> minmax([5, 30], key=str) # '30' sorts before '5'
4797 (30, 5)
4799 If a *default* value is provided, it will be returned if there are no
4800 input items.
4802 >>> minmax([], default=(0, 0))
4803 (0, 0)
4805 Otherwise ``ValueError`` is raised.
4807 This function makes a single pass over the input elements and takes care to
4808 minimize the number of comparisons made during processing.
4810 Note that unlike the builtin ``max`` function, which always returns the first
4811 item with the maximum value, this function may return another item when there are
4812 ties.
4814 This function is based on the
4815 `recipe <https://code.activestate.com/recipes/577916-fast-minmax-function>`__ by
4816 Raymond Hettinger.
4817 """
4818 iterable = (iterable_or_value, *others) if others else iterable_or_value
4820 it = iter(iterable)
4822 try:
4823 lo = hi = next(it)
4824 except StopIteration as exc:
4825 if default is _marker:
4826 raise ValueError(
4827 '`minmax()` argument is an empty iterable. '
4828 'Provide a `default` value to suppress this error.'
4829 ) from exc
4830 return default
4832 # Different branches depending on the presence of key. This saves a lot
4833 # of unimportant copies which would slow the "key=None" branch
4834 # significantly down.
4835 if key is None:
4836 for x, y in zip_longest(it, it, fillvalue=lo):
4837 if y < x:
4838 x, y = y, x
4839 if x < lo:
4840 lo = x
4841 if hi < y:
4842 hi = y
4844 else:
4845 lo_key = hi_key = key(lo)
4847 for x, y in zip_longest(it, it, fillvalue=lo):
4848 x_key, y_key = key(x), key(y)
4850 if y_key < x_key:
4851 x, y, x_key, y_key = y, x, y_key, x_key
4852 if x_key < lo_key:
4853 lo, lo_key = x, x_key
4854 if hi_key < y_key:
4855 hi, hi_key = y, y_key
4857 return lo, hi
4860def constrained_batches(
4861 iterable, max_size, max_count=None, get_len=len, strict=True
4862):
4863 """Yield batches of items from *iterable* with a combined size limited by
4864 *max_size*.
4866 >>> iterable = [b'12345', b'123', b'12345678', b'1', b'1', b'12', b'1']
4867 >>> list(constrained_batches(iterable, 10))
4868 [(b'12345', b'123'), (b'12345678', b'1', b'1'), (b'12', b'1')]
4870 If a *max_count* is supplied, the number of items per batch is also
4871 limited:
4873 >>> iterable = [b'12345', b'123', b'12345678', b'1', b'1', b'12', b'1']
4874 >>> list(constrained_batches(iterable, 10, max_count = 2))
4875 [(b'12345', b'123'), (b'12345678', b'1'), (b'1', b'12'), (b'1',)]
4877 If a *get_len* function is supplied, use that instead of :func:`len` to
4878 determine item size.
4880 If *strict* is ``True``, raise ``ValueError`` if any single item is bigger
4881 than *max_size*. Otherwise, allow single items to exceed *max_size*.
4882 """
4883 if max_size <= 0:
4884 raise ValueError('maximum size must be greater than zero')
4886 batch = []
4887 batch_size = 0
4888 batch_count = 0
4889 for item in iterable:
4890 item_len = get_len(item)
4891 if strict and item_len > max_size:
4892 raise ValueError('item size exceeds maximum size')
4894 reached_count = batch_count == max_count
4895 reached_size = item_len + batch_size > max_size
4896 if batch_count and (reached_size or reached_count):
4897 yield tuple(batch)
4898 batch.clear()
4899 batch_size = 0
4900 batch_count = 0
4902 batch.append(item)
4903 batch_size += item_len
4904 batch_count += 1
4906 if batch:
4907 yield tuple(batch)
4910def gray_product(*iterables, repeat=1):
4911 """Like :func:`itertools.product`, but return tuples in an order such
4912 that only one element in the generated tuple changes from one iteration
4913 to the next.
4915 >>> list(gray_product('AB','CD'))
4916 [('A', 'C'), ('B', 'C'), ('B', 'D'), ('A', 'D')]
4918 The *repeat* keyword argument specifies the number of repetitions
4919 of the iterables. For example, ``gray_product('AB', repeat=3)`` is
4920 equivalent to ``gray_product('AB', 'AB', 'AB')``.
4922 This function consumes all of the input iterables before producing output.
4923 If any of the input iterables have fewer than two items, ``ValueError``
4924 is raised.
4926 For information on the algorithm, see
4927 `this section <https://www-cs-faculty.stanford.edu/~knuth/fasc2a.ps.gz>`__
4928 of Donald Knuth's *The Art of Computer Programming*.
4929 """
4930 all_iterables = tuple(map(tuple, iterables)) * repeat
4931 iterable_count = len(all_iterables)
4932 for iterable in all_iterables:
4933 if len(iterable) < 2:
4934 raise ValueError("each iterable must have two or more items")
4936 # This is based on "Algorithm H" from section 7.2.1.1, page 20.
4937 # a holds the indexes of the source iterables for the n-tuple to be yielded
4938 # f is the array of "focus pointers"
4939 # o is the array of "directions"
4940 a = [0] * iterable_count
4941 f = list(range(iterable_count + 1))
4942 o = [1] * iterable_count
4943 while True:
4944 yield tuple(all_iterables[i][a[i]] for i in range(iterable_count))
4945 j = f[0]
4946 f[0] = 0
4947 if j == iterable_count:
4948 break
4949 a[j] = a[j] + o[j]
4950 if a[j] == 0 or a[j] == len(all_iterables[j]) - 1:
4951 o[j] = -o[j]
4952 f[j] = f[j + 1]
4953 f[j + 1] = j + 1
4956def partial_product(*iterables, repeat=1):
4957 """Yields tuples containing one item from each iterator, with subsequent
4958 tuples changing a single item at a time by advancing each iterator until it
4959 is exhausted. This sequence guarantees every value in each iterable is
4960 output at least once without generating all possible combinations.
4962 This may be useful, for example, when testing an expensive function.
4964 >>> list(partial_product('AB', 'C', 'DEF'))
4965 [('A', 'C', 'D'), ('B', 'C', 'D'), ('B', 'C', 'E'), ('B', 'C', 'F')]
4967 The *repeat* keyword argument specifies the number of repetitions
4968 of the iterables. For example, ``partial_product('AB', repeat=3)`` is
4969 equivalent to ``partial_product('AB', 'AB', 'AB')``.
4970 """
4972 all_iterables = tuple(map(tuple, iterables)) * repeat
4973 iterators = tuple(map(iter, all_iterables))
4975 try:
4976 prod = [next(it) for it in iterators]
4977 except StopIteration:
4978 return
4979 yield tuple(prod)
4981 for i, it in enumerate(iterators):
4982 for prod[i] in it:
4983 yield tuple(prod)
4986def takewhile_inclusive(predicate, iterable):
4987 """A variant of :func:`takewhile` that yields one additional element.
4989 >>> list(takewhile_inclusive(lambda x: x < 5, [1, 4, 6, 4, 1]))
4990 [1, 4, 6]
4992 :func:`takewhile` would return ``[1, 4]``.
4993 """
4994 for x in iterable:
4995 yield x
4996 if not predicate(x):
4997 break
5000def outer_product(func, xs, ys, *args, **kwargs):
5001 """A generalized outer product that applies a binary function to all
5002 pairs of items. Returns a 2D matrix with ``len(xs)`` rows and ``len(ys)``
5003 columns.
5004 Also accepts ``*args`` and ``**kwargs`` that are passed to ``func``.
5006 Multiplication table:
5008 >>> from operator import mul
5009 >>> list(outer_product(mul, range(1, 4), range(1, 6)))
5010 [(1, 2, 3, 4, 5), (2, 4, 6, 8, 10), (3, 6, 9, 12, 15)]
5012 Cross tabulation:
5014 >>> xs = ['A', 'B', 'A', 'A', 'B', 'B', 'A', 'A', 'B', 'B']
5015 >>> ys = ['X', 'X', 'X', 'Y', 'Z', 'Z', 'Y', 'Y', 'Z', 'Z']
5016 >>> pair_counts = Counter(zip(xs, ys))
5017 >>> count_rows = lambda x, y: pair_counts[x, y]
5018 >>> list(outer_product(count_rows, sorted(set(xs)), sorted(set(ys))))
5019 [(2, 3, 0), (1, 0, 4)]
5021 Usage with ``*args`` and ``**kwargs``:
5023 >>> animals = ['cat', 'wolf', 'mouse']
5024 >>> list(outer_product(min, animals, animals, key=len))
5025 [('cat', 'cat', 'cat'), ('cat', 'wolf', 'wolf'), ('cat', 'wolf', 'mouse')]
5026 """
5027 ys = tuple(ys)
5028 return batched(
5029 starmap(lambda x, y: func(x, y, *args, **kwargs), product(xs, ys)),
5030 n=len(ys),
5031 )
5034def iter_suppress(iterable, *exceptions):
5035 """Yield each of the items from *iterable*. If the iteration raises one of
5036 the specified *exceptions*, that exception will be suppressed and iteration
5037 will stop.
5039 >>> from itertools import chain
5040 >>> def breaks_at_five(x):
5041 ... while True:
5042 ... if x >= 5:
5043 ... raise RuntimeError
5044 ... yield x
5045 ... x += 1
5046 >>> it_1 = iter_suppress(breaks_at_five(1), RuntimeError)
5047 >>> it_2 = iter_suppress(breaks_at_five(2), RuntimeError)
5048 >>> list(chain(it_1, it_2))
5049 [1, 2, 3, 4, 2, 3, 4]
5050 """
5051 try:
5052 yield from iterable
5053 except exceptions:
5054 return
5057def filter_map(func, iterable):
5058 """Apply *func* to every element of *iterable*, yielding only those which
5059 are not ``None``.
5061 >>> elems = ['1', 'a', '2', 'b', '3']
5062 >>> list(filter_map(lambda s: int(s) if s.isnumeric() else None, elems))
5063 [1, 2, 3]
5064 """
5065 for x in iterable:
5066 y = func(x)
5067 if y is not None:
5068 yield y
5071def powerset_of_sets(iterable, *, baseset=set):
5072 """Yields all possible subsets of the iterable.
5074 >>> list(powerset_of_sets([1, 2, 3])) # doctest: +SKIP
5075 [set(), {1}, {2}, {3}, {1, 2}, {1, 3}, {2, 3}, {1, 2, 3}]
5076 >>> list(powerset_of_sets([1, 1, 0])) # doctest: +SKIP
5077 [set(), {1}, {0}, {0, 1}]
5079 :func:`powerset_of_sets` takes care to minimize the number
5080 of hash operations performed.
5082 The *baseset* parameter determines what kind of sets are
5083 constructed, either *set* or *frozenset*.
5084 """
5085 sets = tuple(dict.fromkeys(map(frozenset, zip(iterable))))
5086 union = baseset().union
5087 return chain.from_iterable(
5088 starmap(union, combinations(sets, r)) for r in range(len(sets) + 1)
5089 )
5092def join_mappings(**field_to_map):
5093 """
5094 Joins multiple mappings together using their common keys.
5096 >>> user_scores = {'elliot': 50, 'claris': 60}
5097 >>> user_times = {'elliot': 30, 'claris': 40}
5098 >>> join_mappings(score=user_scores, time=user_times)
5099 {'elliot': {'score': 50, 'time': 30}, 'claris': {'score': 60, 'time': 40}}
5100 """
5101 ret = defaultdict(dict)
5103 for field_name, mapping in field_to_map.items():
5104 for key, value in mapping.items():
5105 ret[key][field_name] = value
5107 return dict(ret)
5110def _complex_sumprod(v1, v2):
5111 """High precision sumprod() for complex numbers.
5112 Used by :func:`dft` and :func:`idft`.
5113 """
5115 real = attrgetter('real')
5116 imag = attrgetter('imag')
5117 r1 = chain(map(real, v1), map(neg, map(imag, v1)))
5118 r2 = chain(map(real, v2), map(imag, v2))
5119 i1 = chain(map(real, v1), map(imag, v1))
5120 i2 = chain(map(imag, v2), map(real, v2))
5121 return complex(_fsumprod(r1, r2), _fsumprod(i1, i2))
5124def dft(xarr):
5125 """Discrete Fourier Transform. *xarr* is a sequence of complex numbers.
5126 Yields the components of the corresponding transformed output vector.
5128 >>> import cmath
5129 >>> xarr = [1, 2-1j, -1j, -1+2j] # time domain
5130 >>> Xarr = [2, -2-2j, -2j, 4+4j] # frequency domain
5131 >>> magnitudes, phases = zip(*map(cmath.polar, Xarr))
5132 >>> all(map(cmath.isclose, dft(xarr), Xarr))
5133 True
5135 Inputs are restricted to numeric types that can add and multiply
5136 with a complex number. This includes int, float, complex, and
5137 Fraction, but excludes Decimal.
5139 See :func:`idft` for the inverse Discrete Fourier Transform.
5140 """
5141 N = len(xarr)
5142 roots_of_unity = [e ** (n / N * tau * -1j) for n in range(N)]
5143 for k in range(N):
5144 coeffs = [roots_of_unity[k * n % N] for n in range(N)]
5145 yield _complex_sumprod(xarr, coeffs)
5148def idft(Xarr):
5149 """Inverse Discrete Fourier Transform. *Xarr* is a sequence of
5150 complex numbers. Yields the components of the corresponding
5151 inverse-transformed output vector.
5153 >>> import cmath
5154 >>> xarr = [1, 2-1j, -1j, -1+2j] # time domain
5155 >>> Xarr = [2, -2-2j, -2j, 4+4j] # frequency domain
5156 >>> all(map(cmath.isclose, idft(Xarr), xarr))
5157 True
5159 Inputs are restricted to numeric types that can add and multiply
5160 with a complex number. This includes int, float, complex, and
5161 Fraction, but excludes Decimal.
5163 See :func:`dft` for the Discrete Fourier Transform.
5164 """
5165 N = len(Xarr)
5166 roots_of_unity = [e ** (n / N * tau * 1j) for n in range(N)]
5167 for k in range(N):
5168 coeffs = [roots_of_unity[k * n % N] for n in range(N)]
5169 yield _complex_sumprod(Xarr, coeffs) / N
5172def doublestarmap(func, iterable):
5173 """Apply *func* to every item of *iterable* by dictionary unpacking
5174 the item into *func*.
5176 The difference between :func:`itertools.starmap` and :func:`doublestarmap`
5177 parallels the distinction between ``func(*a)`` and ``func(**a)``.
5179 >>> iterable = [{'a': 1, 'b': 2}, {'a': 40, 'b': 60}]
5180 >>> list(doublestarmap(lambda a, b: a + b, iterable))
5181 [3, 100]
5183 ``TypeError`` will be raised if *func*'s signature doesn't match the
5184 mapping contained in *iterable* or if *iterable* does not contain mappings.
5185 """
5186 for item in iterable:
5187 yield func(**item)
5190def _nth_prime_bounds(n):
5191 """Bounds for the nth prime (counting from 1): lb < p_n < ub."""
5192 # At and above 688,383, the lb/ub spread is under 0.003 * p_n.
5194 if n < 1:
5195 raise ValueError
5197 if n < 6:
5198 return (n, 2.25 * n)
5200 # https://en.wikipedia.org/wiki/Prime-counting_function#Inequalities
5201 upper_bound = n * log(n * log(n))
5202 lower_bound = upper_bound - n
5203 if n >= 688_383:
5204 upper_bound -= n * (1.0 - (log(log(n)) - 2.0) / log(n))
5206 return lower_bound, upper_bound
5209def nth_prime(n, *, approximate=False):
5210 """Return the nth prime (counting from 0).
5212 >>> nth_prime(0)
5213 2
5214 >>> nth_prime(100)
5215 547
5217 If *approximate* is set to True, will return a prime close
5218 to the nth prime. The estimation is much faster than computing
5219 an exact result.
5221 >>> nth_prime(200_000_000, approximate=True) # Exact result is 4222234763
5222 4217820427
5224 """
5225 lb, ub = _nth_prime_bounds(n + 1)
5227 if not approximate or n <= 1_000_000:
5228 return nth(sieve(ceil(ub)), n)
5230 # Search from the midpoint and return the first odd prime
5231 odd = floor((lb + ub) / 2) | 1
5232 return first_true(count(odd, step=2), pred=is_prime)
5235def argmin(iterable, *, key=None):
5236 """
5237 Index of the first occurrence of a minimum value in an iterable.
5239 >>> argmin('efghabcdijkl')
5240 4
5241 >>> argmin([3, 2, 1, 0, 4, 2, 1, 0])
5242 3
5244 For example, look up a label corresponding to the position
5245 of a value that minimizes a cost function::
5247 >>> def cost(x):
5248 ... "Days for a wound to heal given a subject's age."
5249 ... return x**2 - 20*x + 150
5250 ...
5251 >>> labels = ['homer', 'marge', 'bart', 'lisa', 'maggie']
5252 >>> ages = [ 35, 30, 10, 9, 1 ]
5254 # Fastest healing family member
5255 >>> labels[argmin(ages, key=cost)]
5256 'bart'
5258 # Age with fastest healing
5259 >>> min(ages, key=cost)
5260 10
5262 """
5263 if key is not None:
5264 iterable = map(key, iterable)
5265 return min(enumerate(iterable), key=itemgetter(1))[0]
5268def argmax(iterable, *, key=None):
5269 """
5270 Index of the first occurrence of a maximum value in an iterable.
5272 >>> argmax('abcdefghabcd')
5273 7
5274 >>> argmax([0, 1, 2, 3, 3, 2, 1, 0])
5275 3
5277 For example, identify the best machine learning model::
5279 >>> models = ['svm', 'random forest', 'knn', 'naïve bayes']
5280 >>> accuracy = [ 68, 61, 84, 72 ]
5282 # Most accurate model
5283 >>> models[argmax(accuracy)]
5284 'knn'
5286 # Best accuracy
5287 >>> max(accuracy)
5288 84
5290 """
5291 if key is not None:
5292 iterable = map(key, iterable)
5293 return max(enumerate(iterable), key=itemgetter(1))[0]
5296def _extract_monotonic(iterator, indices):
5297 'Non-decreasing indices, lazily consumed'
5298 num_read = 0
5299 for index in indices:
5300 advance = index - num_read
5301 try:
5302 value = next(islice(iterator, advance, None))
5303 except ValueError:
5304 if advance != -1 or index < 0:
5305 raise ValueError(f'Invalid index: {index}') from None
5306 except StopIteration:
5307 raise IndexError(index) from None
5308 else:
5309 num_read += advance + 1
5310 yield value
5313def _extract_buffered(iterator, index_and_position):
5314 'Arbitrary index order, greedily consumed'
5315 buffer = {}
5316 iterator_position = -1
5317 next_to_emit = 0
5319 for index, order in index_and_position:
5320 advance = index - iterator_position
5321 if advance:
5322 try:
5323 value = next(islice(iterator, advance - 1, None))
5324 except StopIteration:
5325 raise IndexError(index) from None
5326 iterator_position = index
5328 buffer[order] = value
5330 while next_to_emit in buffer:
5331 yield buffer.pop(next_to_emit)
5332 next_to_emit += 1
5335def extract(iterable, indices, *, monotonic=False):
5336 """Yield values at the specified indices.
5338 Example:
5340 >>> data = 'abcdefghijklmnopqrstuvwxyz'
5341 >>> list(extract(data, [7, 4, 11, 11, 14]))
5342 ['h', 'e', 'l', 'l', 'o']
5344 The *iterable* is consumed lazily and can be infinite.
5346 When *monotonic* is false, the *indices* are consumed immediately
5347 and must be finite. When *monotonic* is true, *indices* are consumed
5348 lazily and can be infinite but must be non-decreasing.
5350 Raises ``IndexError`` if an index lies beyond the iterable.
5351 Raises ``ValueError`` for a negative index or for a decreasing
5352 index when *monotonic* is true.
5353 """
5355 iterator = iter(iterable)
5356 indices = iter(indices)
5358 if monotonic:
5359 return _extract_monotonic(iterator, indices)
5361 index_and_position = sorted(zip(indices, count()))
5362 if index_and_position and index_and_position[0][0] < 0:
5363 raise ValueError('Indices must be non-negative')
5364 return _extract_buffered(iterator, index_and_position)
5367class serialize:
5368 """Wrap a non-concurrent iterator with a lock to enforce sequential access.
5370 Applies a non-reentrant lock around calls to ``__next__``, allowing
5371 iterator and generator instances to be shared by multiple consumer
5372 threads.
5373 """
5375 __slots__ = ('iterator', 'lock')
5377 def __init__(self, iterable):
5378 self.iterator = iter(iterable)
5379 self.lock = Lock()
5381 def __iter__(self):
5382 return self
5384 def __next__(self):
5385 with self.lock:
5386 return next(self.iterator)
5389def synchronized(func):
5390 """Wrap an iterator-returning callable to make its iterators thread-safe.
5392 Existing itertools and more-itertools can be wrapped so that their
5393 iterator instances are serialized.
5395 For example, ``itertools.count`` does not make thread-safe instances,
5396 but that is easily fixed with::
5398 atomic_counter = synchronized(itertools.count)
5400 Can also be used as a decorator for generator functions definitions
5401 so that the generator instances are serialized::
5403 @synchronized
5404 def enumerate_and_timestamp(iterable):
5405 for count, value in enumerate(iterable):
5406 yield count, time_ns(), value
5408 """
5410 @wraps(func)
5411 def inner(*args, **kwargs):
5412 iterator = func(*args, **kwargs)
5413 return serialize(iterator)
5415 return inner
5418def concurrent_tee(iterable, n=2):
5419 """Variant of itertools.tee() but with guaranteed threading semantics.
5421 Takes a non-threadsafe iterator as an input and creates concurrent
5422 tee objects for other threads to have reliable independent copies of
5423 the data stream.
5425 The new iterators are only thread-safe if consumed within a single thread.
5426 To share just one of the new iterators across multiple threads, wrap it
5427 with :func:`serialize`.
5428 """
5430 if n < 0:
5431 raise ValueError
5432 if n == 0:
5433 return ()
5434 iterator = _concurrent_tee(iterable)
5435 result = [iterator]
5436 for _ in range(n - 1):
5437 result.append(_concurrent_tee(iterator))
5438 return tuple(result)
5441class _concurrent_tee:
5442 __slots__ = ('iterator', 'link', 'lock')
5444 def __init__(self, iterable):
5445 if isinstance(iterable, _concurrent_tee):
5446 self.iterator = iterable.iterator
5447 self.link = iterable.link
5448 self.lock = iterable.lock
5449 else:
5450 self.iterator = iter(iterable)
5451 self.link = [None, None]
5452 self.lock = Lock()
5454 def __iter__(self):
5455 return self
5457 def __next__(self):
5458 link = self.link
5459 if link[1] is None:
5460 with self.lock:
5461 if link[1] is None:
5462 link[0] = next(self.iterator)
5463 link[1] = [None, None]
5464 value, self.link = link
5465 return value
5468def _windowed_running_min(iterator, maxlen):
5469 sis = deque() # Strictly increasing subsequence
5470 for index, value in enumerate(iterator):
5471 if sis and sis[0][0] == index - maxlen:
5472 sis.popleft()
5473 while sis and not sis[-1][1] < value: # Remove non-increasing values
5474 sis.pop()
5475 sis.append((index, value)) # Most recent value at position -1
5476 yield sis[0][1] # Window minimum at position 0
5479def running_min(iterable, *, maxlen=None):
5480 """Smallest of values seen so far or values in a sliding window.
5482 Set *maxlen* to a positive integer to specify the maximum size
5483 of the sliding window. The default of *None* is equivalent to
5484 an unbounded window.
5486 For example:
5488 >>> list(running_min([4, 3, 7, 0, 8, 1, 6, 2, 9, 5]))
5489 [4, 3, 3, 0, 0, 0, 0, 0, 0, 0]
5491 >>> list(running_min([4, 3, 7, 0, 8, 1, 6, 2, 9, 5], maxlen=3))
5492 [4, 3, 3, 0, 0, 0, 1, 1, 2, 2]
5494 Supports numeric types such as int, float, Decimal, and Fraction,
5495 but not complex numbers which are unorderable.
5496 """
5498 iterator = iter(iterable)
5500 if maxlen is None:
5501 return accumulate(iterator, func=min)
5503 if maxlen <= 0:
5504 raise ValueError('Window size should be positive')
5506 return _windowed_running_min(iterator, maxlen)
5509def _windowed_running_max(iterator, maxlen):
5510 sds = deque() # Strictly decreasing subsequence
5511 for index, value in enumerate(iterator):
5512 if sds and sds[0][0] == index - maxlen:
5513 sds.popleft()
5514 while sds and not sds[-1][1] > value: # Remove non-decreasing values
5515 sds.pop()
5516 sds.append((index, value)) # Most recent value at position -1
5517 yield sds[0][1] # Window maximum at position 0
5520def running_max(iterable, *, maxlen=None):
5521 """Largest of values seen so far or values in a sliding window.
5523 Set *maxlen* to a positive integer to specify the maximum size
5524 of the sliding window. The default of *None* is equivalent to
5525 an unbounded window.
5527 For example:
5529 >>> list(running_max([4, 3, 7, 0, 8, 1, 6, 2, 9, 5]))
5530 [4, 4, 7, 7, 8, 8, 8, 8, 9, 9]
5532 >>> list(running_max([4, 3, 7, 0, 8, 1, 6, 2, 9, 5], maxlen=3))
5533 [4, 4, 7, 7, 8, 8, 8, 6, 9, 9]
5535 Supports numeric types such as int, float, Decimal, and Fraction,
5536 but not complex numbers which are unorderable.
5537 """
5539 iterator = iter(iterable)
5541 if maxlen is None:
5542 return accumulate(iterator, func=max)
5544 if maxlen <= 0:
5545 raise ValueError('Window size should be positive')
5547 return _windowed_running_max(iterator, maxlen)
5550@dataclass(frozen=True, slots=True)
5551class Stats:
5552 size: int
5553 minimum: float
5554 median: float
5555 maximum: float
5556 mean: float
5559def running_statistics(iterable, *, maxlen=None):
5560 """Statistics for values seen so far or values in a sliding window.
5562 Set *maxlen* to a positive integer to specify the maximum size
5563 of the sliding window. The default of *None* is equivalent to
5564 an unbounded window.
5566 Yields instances of a ``Stats`` dataclass with fields for the dataset *size*,
5567 *mininum* value, *median* value, *maximum* value, and the arithmetic *mean*.
5569 Supports numeric types such as int, float, Decimal, and Fraction,
5570 but not complex numbers which are unorderable.
5571 """
5573 # fmt: off
5574 t0, t1, t2, t3 = tee(iterable, 4)
5575 return map(
5576 Stats,
5577 count(1) if maxlen is None else chain(range(1, maxlen), repeat(maxlen)),
5578 running_min(t0, maxlen=maxlen),
5579 running_median(t1, maxlen=maxlen),
5580 running_max(t2, maxlen=maxlen),
5581 running_mean(t3, maxlen=maxlen),
5582 )
5583 # fmt: on