Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/more_itertools/more.py: 19%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import math
3from collections import Counter, defaultdict, deque
4from collections.abc import Sequence
5from contextlib import suppress
6from dataclasses import dataclass
7from functools import cached_property, partial, wraps
8from heapq import heapify, heapreplace
9from itertools import (
10 accumulate,
11 chain,
12 combinations,
13 compress,
14 count,
15 cycle,
16 dropwhile,
17 groupby,
18 islice,
19 permutations,
20 repeat,
21 starmap,
22 takewhile,
23 tee,
24 zip_longest,
25 product,
26)
27from math import comb, e, exp, factorial, floor, fsum, log, log1p, perm, tau
28from math import ceil, prod
29from queue import Empty, Queue
30from random import random, randrange, shuffle, uniform
31from operator import (
32 attrgetter,
33 getitem,
34 is_not,
35 itemgetter,
36 lt,
37 neg,
38 sub,
39 gt,
40)
41from sys import maxsize
42from time import monotonic
43from threading import Lock
45from .recipes import (
46 _marker,
47 consume,
48 first_true,
49 flatten,
50 is_prime,
51 nth,
52 powerset,
53 running_mean,
54 running_median,
55 sieve,
56 take,
57 unique_everseen,
58 all_equal,
59 batched,
60)
62__all__ = [
63 'AbortThread',
64 'SequenceView',
65 'Stats',
66 'adjacent',
67 'all_unique',
68 'always_iterable',
69 'always_reversible',
70 'argmax',
71 'argmin',
72 'bucket',
73 'callback_iter',
74 'chunked',
75 'chunked_even',
76 'circular_shifts',
77 'collapse',
78 'combination_index',
79 'combination_with_replacement_index',
80 'concurrent_tee',
81 'consecutive_groups',
82 'constrained_batches',
83 'consumer',
84 'count_cycle',
85 'countable',
86 'derangements',
87 'dft',
88 'difference',
89 'distinct_combinations',
90 'distinct_permutations',
91 'distribute',
92 'divide',
93 'doublestarmap',
94 'duplicates_everseen',
95 'duplicates_justseen',
96 'classify_unique',
97 'exactly_n',
98 'extract',
99 'filter_except',
100 'filter_map',
101 'first',
102 'gray_product',
103 'groupby_transform',
104 'ichunked',
105 'iequals',
106 'idft',
107 'ilen',
108 'interleave',
109 'interleave_evenly',
110 'interleave_longest',
111 'interleave_randomly',
112 'intersperse',
113 'is_sorted',
114 'islice_extended',
115 'iterate',
116 'iter_suppress',
117 'join_mappings',
118 'last',
119 'locate',
120 'longest_common_prefix',
121 'lstrip',
122 'make_decorator',
123 'map_except',
124 'map_if',
125 'map_reduce',
126 'mark_ends',
127 'minmax',
128 'nth_or_last',
129 'nth_permutation',
130 'nth_prime',
131 'nth_product',
132 'nth_combination_with_replacement',
133 'numeric_range',
134 'one',
135 'only',
136 'outer_product',
137 'padded',
138 'partial_product',
139 'partitions',
140 'peekable',
141 'permutation_index',
142 'powerset_of_sets',
143 'product_index',
144 'raise_',
145 'repeat_each',
146 'repeat_last',
147 'replace',
148 'rlocate',
149 'rstrip',
150 'run_length',
151 'running_min',
152 'running_max',
153 'running_statistics',
154 'sample',
155 'seekable',
156 'serialize',
157 'set_partitions',
158 'side_effect',
159 'sized_iterator',
160 'sliced',
161 'sort_together',
162 'split_after',
163 'split_at',
164 'split_before',
165 'split_into',
166 'split_when',
167 'spy',
168 'stagger',
169 'strip',
170 'strictly_n',
171 'substrings',
172 'substrings_indexes',
173 'synchronized',
174 'takewhile_inclusive',
175 'time_limited',
176 'unique_in_window',
177 'unique_to_each',
178 'unzip',
179 'value_chain',
180 'windowed',
181 'windowed_complete',
182 'with_iter',
183 'zip_broadcast',
184 'zip_offset',
185]
187# math.sumprod is available for Python 3.12+
188try:
189 from math import sumprod as _fsumprod
191except ImportError: # pragma: no cover
192 # Extended precision algorithms from T. J. Dekker,
193 # "A Floating-Point Technique for Extending the Available Precision"
194 # https://csclub.uwaterloo.ca/~pbarfuss/dekker1971.pdf
195 # Formulas: (5.5) (5.6) and (5.8). Code: mul12()
197 def dl_split(x: float):
198 "Split a float into two half-precision components."
199 t = x * 134217729.0 # Veltkamp constant = 2.0 ** 27 + 1
200 hi = t - (t - x)
201 lo = x - hi
202 return hi, lo
204 def dl_mul(x, y):
205 "Lossless multiplication."
206 xx_hi, xx_lo = dl_split(x)
207 yy_hi, yy_lo = dl_split(y)
208 p = xx_hi * yy_hi
209 q = xx_hi * yy_lo + xx_lo * yy_hi
210 z = p + q
211 zz = p - z + q + xx_lo * yy_lo
212 return z, zz
214 def _fsumprod(p, q):
215 return fsum(chain.from_iterable(map(dl_mul, p, q)))
218def chunked(iterable, n, strict=False):
219 """Break *iterable* into lists of length *n*:
221 >>> list(chunked([1, 2, 3, 4, 5, 6], 3))
222 [[1, 2, 3], [4, 5, 6]]
224 By the default, the last yielded list will have fewer than *n* elements
225 if the length of *iterable* is not divisible by *n*:
227 >>> list(chunked([1, 2, 3, 4, 5, 6, 7, 8], 3))
228 [[1, 2, 3], [4, 5, 6], [7, 8]]
230 To use a fill-in value instead, see the :func:`grouper` recipe.
232 If the length of *iterable* is not divisible by *n* and *strict* is
233 ``True``, then ``ValueError`` will be raised before the last
234 list is yielded.
236 """
237 iterator = iter(partial(take, n, iter(iterable)), [])
238 if strict:
239 if n is None:
240 raise ValueError('n must not be None when using strict mode.')
242 def ret():
243 for chunk in iterator:
244 if len(chunk) != n:
245 raise ValueError('iterable is not divisible by n.')
246 yield chunk
248 return ret()
249 else:
250 return iterator
253def first(iterable, default=_marker):
254 """Return the first item of *iterable*, or *default* if *iterable* is
255 empty.
257 >>> first([0, 1, 2, 3])
258 0
259 >>> first([], 'some default')
260 'some default'
262 If *default* is not provided and there are no items in the iterable,
263 raise ``ValueError``.
265 :func:`first` is useful when you have a generator of expensive-to-retrieve
266 values and want any arbitrary one. It is marginally shorter than
267 ``next(iter(iterable), default)``.
269 """
270 for item in iterable:
271 return item
272 if default is _marker:
273 raise ValueError(
274 'first() was called on an empty iterable, '
275 'and no default value was provided.'
276 )
277 return default
280def last(iterable, default=_marker):
281 """Return the last item of *iterable*, or *default* if *iterable* is
282 empty.
284 >>> last([0, 1, 2, 3])
285 3
286 >>> last([], 'some default')
287 'some default'
289 If *default* is not provided and there are no items in the iterable,
290 raise ``ValueError``.
291 """
292 try:
293 if isinstance(iterable, Sequence):
294 return iterable[-1]
295 # Work around https://bugs.python.org/issue38525
296 if getattr(iterable, '__reversed__', None):
297 return next(reversed(iterable))
298 return deque(iterable, maxlen=1)[-1]
299 except (IndexError, TypeError, StopIteration):
300 if default is _marker:
301 raise ValueError(
302 'last() was called on an empty iterable, '
303 'and no default value was provided.'
304 )
305 return default
308def nth_or_last(iterable, n, default=_marker):
309 """Return the nth or the last item of *iterable*,
310 or *default* if *iterable* is empty.
312 >>> nth_or_last([0, 1, 2, 3], 2)
313 2
314 >>> nth_or_last([0, 1], 2)
315 1
316 >>> nth_or_last([], 0, 'some default')
317 'some default'
319 If *default* is not provided and there are no items in the iterable,
320 raise ``ValueError``.
321 """
322 return last(islice(iterable, n + 1), default=default)
325class peekable:
326 """Wrap an iterator to allow lookahead and prepending elements.
328 Call :meth:`peek` on the result to get the value that will be returned
329 by :func:`next`. This won't advance the iterator:
331 >>> p = peekable(['a', 'b'])
332 >>> p.peek()
333 'a'
334 >>> next(p)
335 'a'
337 Pass :meth:`peek` a default value to return that instead of raising
338 ``StopIteration`` when the iterator is exhausted.
340 >>> p = peekable([])
341 >>> p.peek('hi')
342 'hi'
344 peekables also offer a :meth:`prepend` method, which "inserts" items
345 at the head of the iterable:
347 >>> p = peekable([1, 2, 3])
348 >>> p.prepend(10, 11, 12)
349 >>> next(p)
350 10
351 >>> p.peek()
352 11
353 >>> list(p)
354 [11, 12, 1, 2, 3]
356 peekables can be indexed. Index 0 is the item that will be returned by
357 :func:`next`, index 1 is the item after that, and so on:
358 The values up to the given index will be cached.
360 >>> p = peekable(['a', 'b', 'c', 'd'])
361 >>> p[0]
362 'a'
363 >>> p[1]
364 'b'
365 >>> next(p)
366 'a'
368 Negative indexes are supported, but be aware that they will cache the
369 remaining items in the source iterator, which may require significant
370 storage.
372 To check whether a peekable is exhausted, check its truth value:
374 >>> p = peekable(['a', 'b'])
375 >>> if p: # peekable has items
376 ... list(p)
377 ['a', 'b']
378 >>> if not p: # peekable is exhausted
379 ... list(p)
380 []
382 """
384 def __init__(self, iterable):
385 self._it = iter(iterable)
386 self._cache = deque()
388 def __iter__(self):
389 return self
391 def __bool__(self):
392 try:
393 self.peek()
394 except StopIteration:
395 return False
396 return True
398 def peek(self, default=_marker):
399 """Return the item that will be next returned from ``next()``.
401 Return ``default`` if there are no items left. If ``default`` is not
402 provided, raise ``StopIteration``.
404 """
405 if not self._cache:
406 try:
407 self._cache.append(next(self._it))
408 except StopIteration:
409 if default is _marker:
410 raise
411 return default
412 return self._cache[0]
414 def prepend(self, *items):
415 """Stack up items to be the next ones returned from ``next()`` or
416 ``self.peek()``. The items will be returned in
417 first in, first out order::
419 >>> p = peekable([1, 2, 3])
420 >>> p.prepend(10, 11, 12)
421 >>> next(p)
422 10
423 >>> list(p)
424 [11, 12, 1, 2, 3]
426 It is possible, by prepending items, to "resurrect" a peekable that
427 previously raised ``StopIteration``.
429 >>> p = peekable([])
430 >>> next(p)
431 Traceback (most recent call last):
432 ...
433 StopIteration
434 >>> p.prepend(1)
435 >>> next(p)
436 1
437 >>> next(p)
438 Traceback (most recent call last):
439 ...
440 StopIteration
442 """
443 self._cache.extendleft(reversed(items))
445 def __next__(self):
446 if self._cache:
447 return self._cache.popleft()
449 return next(self._it)
451 def _get_slice(self, index):
452 # Normalize the slice's arguments
453 step = 1 if (index.step is None) else index.step
454 if step > 0:
455 start = 0 if (index.start is None) else index.start
456 stop = maxsize if (index.stop is None) else index.stop
457 elif step < 0:
458 start = -1 if (index.start is None) else index.start
459 stop = (-maxsize - 1) if (index.stop is None) else index.stop
460 else:
461 raise ValueError('slice step cannot be zero')
463 # If either the start or stop index is negative, we'll need to cache
464 # the rest of the iterable in order to slice from the right side.
465 if (start < 0) or (stop < 0):
466 self._cache.extend(self._it)
467 # Otherwise we'll need to find the rightmost index and cache to that
468 # point.
469 else:
470 n = min(max(start, stop) + 1, maxsize)
471 cache_len = len(self._cache)
472 if n >= cache_len:
473 self._cache.extend(islice(self._it, n - cache_len))
475 return list(self._cache)[index]
477 def __getitem__(self, index):
478 if isinstance(index, slice):
479 return self._get_slice(index)
481 cache_len = len(self._cache)
482 if index < 0:
483 self._cache.extend(self._it)
484 elif index >= cache_len:
485 self._cache.extend(islice(self._it, index + 1 - cache_len))
487 return self._cache[index]
490def consumer(func):
491 """Decorator that automatically advances a PEP-342-style "reverse iterator"
492 to its first yield point so you don't have to call ``next()`` on it
493 manually.
495 >>> @consumer
496 ... def tally():
497 ... i = 0
498 ... while True:
499 ... print('Thing number %s is %s.' % (i, (yield)))
500 ... i += 1
501 ...
502 >>> t = tally()
503 >>> t.send('red')
504 Thing number 0 is red.
505 >>> t.send('fish')
506 Thing number 1 is fish.
508 Without the decorator, you would have to call ``next(t)`` before
509 ``t.send()`` could be used.
511 """
513 @wraps(func)
514 def wrapper(*args, **kwargs):
515 gen = func(*args, **kwargs)
516 next(gen)
517 return gen
519 return wrapper
522def ilen(iterable):
523 """Return the number of items in *iterable*.
525 For example, there are 168 prime numbers below 1,000:
527 >>> ilen(sieve(1000))
528 168
530 Equivalent to, but faster than::
532 def ilen(iterable):
533 count = 0
534 for _ in iterable:
535 count += 1
536 return count
538 This fully consumes the iterable, so handle with care.
540 """
541 # This is the "most beautiful of the fast variants" of this function.
542 # If you think you can improve on it, please ensure that your version
543 # is both 10x faster and 10x more beautiful.
544 return sum(compress(repeat(1), zip(iterable)))
547def iterate(func, start):
548 """Return ``start``, ``func(start)``, ``func(func(start))``, ...
550 Produces an infinite iterator. To add a stopping condition,
551 use :func:`take`, ``takewhile``, or :func:`takewhile_inclusive`:.
553 >>> take(10, iterate(lambda x: 2*x, 1))
554 [1, 2, 4, 8, 16, 32, 64, 128, 256, 512]
556 >>> collatz = lambda x: 3*x + 1 if x%2==1 else x // 2
557 >>> list(takewhile_inclusive(lambda x: x!=1, iterate(collatz, 10)))
558 [10, 5, 16, 8, 4, 2, 1]
560 """
561 with suppress(StopIteration):
562 while True:
563 yield start
564 start = func(start)
567def with_iter(context_manager):
568 """Wrap an iterable in a ``with`` statement, so it closes once exhausted.
570 For example, this will close the file when the iterator is exhausted::
572 upper_lines = (line.upper() for line in with_iter(open('foo')))
574 Note that you have to actually exhaust the iterator for opened files to be closed.
576 Any context manager which returns an iterable is a candidate for
577 ``with_iter``.
579 """
580 with context_manager as iterable:
581 yield from iterable
584class sized_iterator:
585 """Wrapper for *iterable* that implements ``__len__``.
587 >>> it = map(str, range(5))
588 >>> sized_it = sized_iterator(it, 5)
589 >>> len(sized_it)
590 5
591 >>> list(sized_it)
592 ['0', '1', '2', '3', '4']
594 This is useful for tools that use :func:`len`, like
595 `tqdm <https://pypi.org/project/tqdm/>`__ .
597 The wrapper doesn't validate the provided *length*, so be sure to choose
598 a value that reflects reality.
599 """
601 def __init__(self, iterable, length):
602 self._iterator = iter(iterable)
603 self._length = length
605 def __next__(self):
606 return next(self._iterator)
608 def __iter__(self):
609 return self
611 def __len__(self):
612 return self._length
615def one(iterable, too_short=None, too_long=None):
616 """Return the first item from *iterable*, which is expected to contain only
617 that item. Raise an exception if *iterable* is empty or has more than one
618 item.
620 :func:`one` is useful for ensuring that an iterable contains only one item.
621 For example, it can be used to retrieve the result of a database query
622 that is expected to return a single row.
624 If *iterable* is empty, ``ValueError`` will be raised. You may specify a
625 different exception with the *too_short* keyword:
627 >>> it = []
628 >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL
629 Traceback (most recent call last):
630 ...
631 ValueError: too few items in iterable (expected 1)'
632 >>> too_short = IndexError('too few items')
633 >>> one(it, too_short=too_short) # doctest: +IGNORE_EXCEPTION_DETAIL
634 Traceback (most recent call last):
635 ...
636 IndexError: too few items
638 Similarly, if *iterable* contains more than one item, ``ValueError`` will
639 be raised. You may specify a different exception with the *too_long*
640 keyword:
642 >>> it = ['too', 'many']
643 >>> one(it) # doctest: +IGNORE_EXCEPTION_DETAIL
644 Traceback (most recent call last):
645 ...
646 ValueError: Expected exactly one item in iterable, but got 'too',
647 'many', and perhaps more.
648 >>> too_long = RuntimeError
649 >>> one(it, too_long=too_long) # doctest: +IGNORE_EXCEPTION_DETAIL
650 Traceback (most recent call last):
651 ...
652 RuntimeError
654 Note that :func:`one` attempts to advance *iterable* twice to ensure there
655 is only one item. See :func:`spy` or :func:`peekable` to check iterable
656 contents less destructively.
658 """
659 iterator = iter(iterable)
660 for first in iterator:
661 for second in iterator:
662 msg = (
663 f'Expected exactly one item in iterable, but got {first!r}, '
664 f'{second!r}, and perhaps more.'
665 )
666 raise too_long or ValueError(msg)
667 return first
668 raise too_short or ValueError('too few items in iterable (expected 1)')
671def raise_(exception, *args):
672 raise exception(*args)
675def strictly_n(iterable, n, too_short=None, too_long=None):
676 """Validate that *iterable* has exactly *n* items and return them if
677 it does. If it has fewer than *n* items, call function *too_short*
678 with the actual number of items. If it has more than *n* items, call function
679 *too_long* with the number ``n + 1``.
681 >>> iterable = ['a', 'b', 'c', 'd']
682 >>> n = 4
683 >>> list(strictly_n(iterable, n))
684 ['a', 'b', 'c', 'd']
686 Note that the returned iterable must be consumed in order for the check to
687 be made.
689 By default, *too_short* and *too_long* are functions that raise
690 ``ValueError``.
692 >>> list(strictly_n('ab', 3)) # doctest: +IGNORE_EXCEPTION_DETAIL
693 Traceback (most recent call last):
694 ...
695 ValueError: too few items in iterable (got 2)
697 >>> list(strictly_n('abc', 2)) # doctest: +IGNORE_EXCEPTION_DETAIL
698 Traceback (most recent call last):
699 ...
700 ValueError: too many items in iterable (got at least 3)
702 You can instead supply functions that do something else.
703 *too_short* will be called with the number of items in *iterable*.
704 *too_long* will be called with `n + 1`.
706 >>> def too_short(item_count):
707 ... raise RuntimeError
708 >>> it = strictly_n('abcd', 6, too_short=too_short)
709 >>> list(it) # doctest: +IGNORE_EXCEPTION_DETAIL
710 Traceback (most recent call last):
711 ...
712 RuntimeError
714 >>> def too_long(item_count):
715 ... print('The boss is going to hear about this')
716 >>> it = strictly_n('abcdef', 4, too_long=too_long)
717 >>> list(it)
718 The boss is going to hear about this
719 ['a', 'b', 'c', 'd']
721 """
722 if too_short is None:
723 too_short = lambda item_count: raise_(
724 ValueError,
725 f'Too few items in iterable (got {item_count})',
726 )
728 if too_long is None:
729 too_long = lambda item_count: raise_(
730 ValueError,
731 f'Too many items in iterable (got at least {item_count})',
732 )
734 it = iter(iterable)
736 sent = 0
737 for item in islice(it, n):
738 yield item
739 sent += 1
741 if sent < n:
742 too_short(sent)
743 return
745 for item in it:
746 too_long(n + 1)
747 return
750def distinct_permutations(iterable, r=None):
751 """Yield successive distinct permutations of the elements in *iterable*.
753 >>> sorted(distinct_permutations([1, 0, 1]))
754 [(0, 1, 1), (1, 0, 1), (1, 1, 0)]
756 Equivalent to yielding from ``set(permutations(iterable))``, except
757 duplicates are not generated and thrown away. For larger input sequences
758 this is much more efficient.
760 Duplicate permutations arise when there are duplicated elements in the
761 input iterable. The number of items returned is
762 `n! / (x_1! * x_2! * ... * x_n!)`, where `n` is the total number of
763 items input, and each `x_i` is the count of a distinct item in the input
764 sequence. The function :func:`multinomial` computes this directly.
766 If *r* is given, only the *r*-length permutations are yielded.
768 >>> sorted(distinct_permutations([1, 0, 1], r=2))
769 [(0, 1), (1, 0), (1, 1)]
770 >>> sorted(distinct_permutations(range(3), r=2))
771 [(0, 1), (0, 2), (1, 0), (1, 2), (2, 0), (2, 1)]
773 *iterable* need not be sortable, but note that using equal (``x == y``)
774 but non-identical (``id(x) != id(y)``) elements may produce surprising
775 behavior. For example, ``1`` and ``True`` are equal but non-identical:
777 >>> list(distinct_permutations([1, True, '3'])) # doctest: +SKIP
778 [
779 (1, True, '3'),
780 (1, '3', True),
781 ('3', 1, True)
782 ]
783 >>> list(distinct_permutations([1, 2, '3'])) # doctest: +SKIP
784 [
785 (1, 2, '3'),
786 (1, '3', 2),
787 (2, 1, '3'),
788 (2, '3', 1),
789 ('3', 1, 2),
790 ('3', 2, 1)
791 ]
792 """
794 # Algorithm: https://w.wiki/Qai
795 def _full(A):
796 while True:
797 # Yield the permutation we have
798 yield tuple(A)
800 # Find the largest index i such that A[i] < A[i + 1]
801 for i in range(size - 2, -1, -1):
802 if A[i] < A[i + 1]:
803 break
804 # If no such index exists, this permutation is the last one
805 else:
806 return
808 # Find the largest index j greater than j such that A[i] < A[j]
809 for j in range(size - 1, i, -1):
810 if A[i] < A[j]:
811 break
813 # Swap the value of A[i] with that of A[j], then reverse the
814 # sequence from A[i + 1] to form the new permutation
815 A[i], A[j] = A[j], A[i]
816 A[i + 1 :] = A[: i - size : -1] # A[i + 1:][::-1]
818 # Algorithm: modified from the above
819 def _partial(A, r):
820 # Split A into the first r items and the last r items
821 head, tail = A[:r], A[r:]
822 right_head_indexes = range(r - 1, -1, -1)
823 left_tail_indexes = range(len(tail))
825 while True:
826 # Yield the permutation we have
827 yield tuple(head)
829 # Starting from the right, find the first index of the head with
830 # value smaller than the maximum value of the tail - call it i.
831 pivot = tail[-1]
832 for i in right_head_indexes:
833 if head[i] < pivot:
834 break
835 pivot = head[i]
836 else:
837 return
839 # Starting from the left, find the first value of the tail
840 # with a value greater than head[i] and swap.
841 for j in left_tail_indexes:
842 if tail[j] > head[i]:
843 head[i], tail[j] = tail[j], head[i]
844 break
845 # If we didn't find one, start from the right and find the first
846 # index of the head with a value greater than head[i] and swap.
847 else:
848 for j in right_head_indexes:
849 if head[j] > head[i]:
850 head[i], head[j] = head[j], head[i]
851 break
853 # Reverse head[i + 1:] and swap it with tail[:r - (i + 1)]
854 tail += head[: i - r : -1] # head[i + 1:][::-1]
855 i += 1
856 head[i:], tail[:] = tail[: r - i], tail[r - i :]
858 items = list(iterable)
860 try:
861 items.sort()
862 sortable = True
863 except TypeError:
864 sortable = False
866 indices_dict = defaultdict(list)
868 for item in items:
869 indices_dict[items.index(item)].append(item)
871 indices = [items.index(item) for item in items]
872 indices.sort()
874 equivalent_items = {k: cycle(v) for k, v in indices_dict.items()}
876 def permuted_items(permuted_indices):
877 return tuple(
878 next(equivalent_items[index]) for index in permuted_indices
879 )
881 size = len(items)
882 if r is None:
883 r = size
885 # functools.partial(_partial, ... )
886 algorithm = _full if (r == size) else partial(_partial, r=r)
888 if 0 < r <= size:
889 if sortable:
890 return algorithm(items)
891 else:
892 return (
893 permuted_items(permuted_indices)
894 for permuted_indices in algorithm(indices)
895 )
897 return iter(() if r else ((),))
900def derangements(iterable, r=None):
901 """Yield successive derangements of the elements in *iterable*.
903 A derangement is a permutation in which no element appears at its original
904 index. In other words, a derangement is a permutation that has no fixed points.
906 Suppose Alice, Bob, Carol, and Dave are playing Secret Santa.
907 The code below outputs all of the different ways to assign gift recipients
908 such that nobody is assigned to himself or herself:
910 >>> for d in derangements(['Alice', 'Bob', 'Carol', 'Dave']):
911 ... print(', '.join(d))
912 Bob, Alice, Dave, Carol
913 Bob, Carol, Dave, Alice
914 Bob, Dave, Alice, Carol
915 Carol, Alice, Dave, Bob
916 Carol, Dave, Alice, Bob
917 Carol, Dave, Bob, Alice
918 Dave, Alice, Bob, Carol
919 Dave, Carol, Alice, Bob
920 Dave, Carol, Bob, Alice
922 If *r* is given, only the *r*-length derangements are yielded.
924 >>> sorted(derangements(range(3), 2))
925 [(1, 0), (1, 2), (2, 0)]
926 >>> sorted(derangements([0, 2, 3], 2))
927 [(2, 0), (2, 3), (3, 0)]
929 Elements are treated as unique based on their position, not on their value.
931 Consider the Secret Santa example with two *different* people who have
932 the *same* name. Then there are two valid gift assignments even though
933 it might appear that a person is assigned to themselves:
935 >>> names = ['Alice', 'Bob', 'Bob']
936 >>> list(derangements(names))
937 [('Bob', 'Bob', 'Alice'), ('Bob', 'Alice', 'Bob')]
939 To avoid confusion, make the inputs distinct:
941 >>> deduped = [f'{name}{index}' for index, name in enumerate(names)]
942 >>> list(derangements(deduped))
943 [('Bob1', 'Bob2', 'Alice0'), ('Bob2', 'Alice0', 'Bob1')]
945 The number of derangements of a set of size *n* is known as the
946 "subfactorial of n". For n > 0, the subfactorial is:
947 ``round(math.factorial(n) / math.e)``.
949 References:
951 * Article: https://www.numberanalytics.com/blog/ultimate-guide-to-derangements-in-combinatorics
952 * Sizes: https://oeis.org/A000166
953 """
954 xs = tuple(iterable)
955 ys = tuple(range(len(xs)))
956 return compress(
957 permutations(xs, r=r),
958 map(all, map(map, repeat(is_not), repeat(ys), permutations(ys, r=r))),
959 )
962def intersperse(e, iterable, n=1):
963 """Intersperse filler element *e* among the items in *iterable*, leaving
964 *n* items between each filler element.
966 >>> list(intersperse('!', [1, 2, 3, 4, 5]))
967 [1, '!', 2, '!', 3, '!', 4, '!', 5]
969 >>> list(intersperse(None, [1, 2, 3, 4, 5], n=2))
970 [1, 2, None, 3, 4, None, 5]
972 """
973 if n == 0:
974 raise ValueError('n must be > 0')
975 elif n == 1:
976 # interleave(repeat(e), iterable) -> e, x_0, e, x_1, e, x_2...
977 # islice(..., 1, None) -> x_0, e, x_1, e, x_2...
978 return islice(interleave(repeat(e), iterable), 1, None)
979 else:
980 # interleave(filler, chunks) -> [e], [x_0, x_1], [e], [x_2, x_3]...
981 # islice(..., 1, None) -> [x_0, x_1], [e], [x_2, x_3]...
982 # flatten(...) -> x_0, x_1, e, x_2, x_3...
983 filler = repeat([e])
984 chunks = chunked(iterable, n)
985 return flatten(islice(interleave(filler, chunks), 1, None))
988def unique_to_each(*iterables):
989 """Return the elements from each of the input iterables that aren't in the
990 other input iterables.
992 For example, suppose you have a set of packages, each with a set of
993 dependencies::
995 {'pkg_1': {'A', 'B'}, 'pkg_2': {'B', 'C'}, 'pkg_3': {'B', 'D'}}
997 If you remove one package, which dependencies can also be removed?
999 If ``pkg_1`` is removed, then ``A`` is no longer necessary - it is not
1000 associated with ``pkg_2`` or ``pkg_3``. Similarly, ``C`` is only needed for
1001 ``pkg_2``, and ``D`` is only needed for ``pkg_3``::
1003 >>> unique_to_each({'A', 'B'}, {'B', 'C'}, {'B', 'D'})
1004 [['A'], ['C'], ['D']]
1006 If there are duplicates in one input iterable that aren't in the others
1007 they will be duplicated in the output. Input order is preserved::
1009 >>> unique_to_each("mississippi", "missouri")
1010 [['p', 'p'], ['o', 'u', 'r']]
1012 It is assumed that the elements of each iterable are hashable.
1014 """
1015 pool = [list(it) for it in iterables]
1016 counts = Counter(chain.from_iterable(map(set, pool)))
1017 uniques = {element for element in counts if counts[element] == 1}
1018 return [list(filter(uniques.__contains__, it)) for it in pool]
1021def windowed(seq, n, fillvalue=None, step=1):
1022 """Return a sliding window of width *n* over the given iterable.
1024 >>> all_windows = windowed([1, 2, 3, 4, 5], 3)
1025 >>> list(all_windows)
1026 [(1, 2, 3), (2, 3, 4), (3, 4, 5)]
1028 When the window is larger than the iterable, *fillvalue* is used in place
1029 of missing values:
1031 >>> list(windowed([1, 2, 3], 4))
1032 [(1, 2, 3, None)]
1034 Each window will advance in increments of *step*:
1036 >>> list(windowed([1, 2, 3, 4, 5, 6], 3, fillvalue='!', step=2))
1037 [(1, 2, 3), (3, 4, 5), (5, 6, '!')]
1039 To slide into the iterable's items, use :func:`chain` to add filler items
1040 to the left:
1042 >>> iterable = [1, 2, 3, 4]
1043 >>> n = 3
1044 >>> padding = [None] * (n - 1)
1045 >>> list(windowed(chain(padding, iterable), 3))
1046 [(None, None, 1), (None, 1, 2), (1, 2, 3), (2, 3, 4)]
1047 """
1048 if n < 0:
1049 raise ValueError('n must be >= 0')
1050 if n == 0:
1051 yield ()
1052 return
1053 if step < 1:
1054 raise ValueError('step must be >= 1')
1056 iterator = iter(seq)
1058 # Generate first window
1059 window = deque(islice(iterator, n), maxlen=n)
1061 # Deal with the first window not being full
1062 if not window:
1063 return
1064 if len(window) < n:
1065 yield tuple(window) + ((fillvalue,) * (n - len(window)))
1066 return
1067 yield tuple(window)
1069 # Create the filler for the next windows. The padding ensures
1070 # we have just enough elements to fill the last window.
1071 padding = (fillvalue,) * (n - 1 if step >= n else step - 1)
1072 filler = map(window.append, chain(iterator, padding))
1074 # Generate the rest of the windows
1075 for _ in islice(filler, step - 1, None, step):
1076 yield tuple(window)
1079def substrings(iterable):
1080 """Yield all of the substrings of *iterable*.
1082 >>> [''.join(s) for s in substrings('more')]
1083 ['m', 'o', 'r', 'e', 'mo', 'or', 're', 'mor', 'ore', 'more']
1085 Note that non-string iterables can also be subdivided.
1087 >>> list(substrings([0, 1, 2]))
1088 [(0,), (1,), (2,), (0, 1), (1, 2), (0, 1, 2)]
1090 Like subslices() but returns tuples instead of lists
1091 and returns the shortest substrings first.
1093 """
1094 seq = tuple(iterable)
1095 item_count = len(seq)
1096 for n in range(1, item_count + 1):
1097 slices = map(slice, range(item_count), range(n, item_count + 1))
1098 yield from map(getitem, repeat(seq), slices)
1101def substrings_indexes(seq, reverse=False):
1102 """Yield all substrings and their positions in *seq*
1104 The items yielded will be a tuple of the form ``(substr, i, j)``, where
1105 ``substr == seq[i:j]``.
1107 This function only works for iterables that support slicing, such as
1108 ``str`` objects.
1110 >>> for item in substrings_indexes('more'):
1111 ... print(item)
1112 ('m', 0, 1)
1113 ('o', 1, 2)
1114 ('r', 2, 3)
1115 ('e', 3, 4)
1116 ('mo', 0, 2)
1117 ('or', 1, 3)
1118 ('re', 2, 4)
1119 ('mor', 0, 3)
1120 ('ore', 1, 4)
1121 ('more', 0, 4)
1123 Set *reverse* to ``True`` to yield the same items in the opposite order.
1126 """
1127 r = range(1, len(seq) + 1)
1128 if reverse:
1129 r = reversed(r)
1130 return (
1131 (seq[i : i + L], i, i + L) for L in r for i in range(len(seq) - L + 1)
1132 )
1135class bucket:
1136 """Wrap *iterable* and return an object that buckets the iterable into
1137 child iterables based on a *key* function.
1139 >>> iterable = ['a1', 'b1', 'c1', 'a2', 'b2', 'c2', 'b3']
1140 >>> s = bucket(iterable, key=lambda x: x[0]) # Bucket by 1st character
1141 >>> sorted(list(s)) # Get the keys
1142 ['a', 'b', 'c']
1143 >>> a_iterable = s['a']
1144 >>> next(a_iterable)
1145 'a1'
1146 >>> next(a_iterable)
1147 'a2'
1148 >>> list(s['b'])
1149 ['b1', 'b2', 'b3']
1151 The original iterable will be advanced and its items will be cached until
1152 they are used by the child iterables. This may require significant storage.
1154 By default, attempting to select a bucket to which no items belong will
1155 exhaust the iterable and cache all values.
1156 If you specify a *validator* function, selected buckets will instead be
1157 checked against it.
1159 >>> from itertools import count
1160 >>> it = count(1, 2) # Infinite sequence of odd numbers
1161 >>> key = lambda x: x % 10 # Bucket by last digit
1162 >>> validator = lambda x: x in {1, 3, 5, 7, 9} # Odd digits only
1163 >>> s = bucket(it, key=key, validator=validator)
1164 >>> 2 in s
1165 False
1166 >>> list(s[2])
1167 []
1169 .. seealso:: :func:`map_reduce`, :func:`groupby_transform`
1171 If storage is not a concern, :func:`map_reduce` returns a Python
1172 dictionary, which is generally easier to work with. If the elements
1173 with the same key are already adjacent, :func:`groupby_transform`
1174 or :func:`itertools.groupby` can be used without any caching overhead.
1176 """
1178 def __init__(self, iterable, key, validator=None):
1179 self._it = iter(iterable)
1180 self._key = key
1181 self._cache = defaultdict(deque)
1182 self._validator = validator or (lambda x: True)
1184 def __contains__(self, value):
1185 if not self._validator(value):
1186 return False
1188 try:
1189 item = next(self[value])
1190 except StopIteration:
1191 return False
1192 else:
1193 self._cache[value].appendleft(item)
1195 return True
1197 def _get_values(self, value):
1198 """
1199 Helper to yield items from the parent iterator that match *value*.
1200 Items that don't match are stored in the local cache as they
1201 are encountered.
1202 """
1203 while True:
1204 # If we've cached some items that match the target value, emit
1205 # the first one and evict it from the cache.
1206 if self._cache[value]:
1207 yield self._cache[value].popleft()
1208 # Otherwise we need to advance the parent iterator to search for
1209 # a matching item, caching the rest.
1210 else:
1211 while True:
1212 try:
1213 item = next(self._it)
1214 except StopIteration:
1215 return
1216 item_value = self._key(item)
1217 if item_value == value:
1218 yield item
1219 break
1220 elif self._validator(item_value):
1221 self._cache[item_value].append(item)
1223 def __iter__(self):
1224 for item in self._it:
1225 item_value = self._key(item)
1226 if self._validator(item_value):
1227 self._cache[item_value].append(item)
1229 return iter(self._cache)
1231 def __getitem__(self, value):
1232 if not self._validator(value):
1233 return iter(())
1235 return self._get_values(value)
1238def spy(iterable, n=1):
1239 """Return a 2-tuple with a list containing the first *n* elements of
1240 *iterable*, and an iterator with the same items as *iterable*.
1241 This allows you to "look ahead" at the items in the iterable without
1242 advancing it.
1244 There is one item in the list by default:
1246 >>> iterable = 'abcdefg'
1247 >>> head, iterable = spy(iterable)
1248 >>> head
1249 ['a']
1250 >>> list(iterable)
1251 ['a', 'b', 'c', 'd', 'e', 'f', 'g']
1253 You may use unpacking to retrieve items instead of lists:
1255 >>> (head,), iterable = spy('abcdefg')
1256 >>> head
1257 'a'
1258 >>> (first, second), iterable = spy('abcdefg', 2)
1259 >>> first
1260 'a'
1261 >>> second
1262 'b'
1264 The number of items requested can be larger than the number of items in
1265 the iterable:
1267 >>> iterable = [1, 2, 3, 4, 5]
1268 >>> head, iterable = spy(iterable, 10)
1269 >>> head
1270 [1, 2, 3, 4, 5]
1271 >>> list(iterable)
1272 [1, 2, 3, 4, 5]
1274 """
1275 p, q = tee(iterable)
1276 return take(n, q), p
1279def interleave(*iterables):
1280 """Return a new iterable yielding from each iterable in turn,
1281 until the shortest is exhausted.
1283 >>> list(interleave([1, 2, 3], [4, 5], [6, 7, 8]))
1284 [1, 4, 6, 2, 5, 7]
1286 For a version that doesn't terminate after the shortest iterable is
1287 exhausted, see :func:`interleave_longest`.
1289 """
1290 return chain.from_iterable(zip(*iterables))
1293def interleave_longest(*iterables):
1294 """Return a new iterable yielding from each iterable in turn,
1295 skipping any that are exhausted.
1297 >>> list(interleave_longest([1, 2, 3], [4, 5], [6, 7, 8]))
1298 [1, 4, 6, 2, 5, 7, 3, 8]
1300 This function produces the same output as :func:`roundrobin`, but may
1301 perform better for some inputs (in particular when the number of iterables
1302 is large).
1304 """
1305 for xs in zip_longest(*iterables, fillvalue=_marker):
1306 for x in xs:
1307 if x is not _marker:
1308 yield x
1311def interleave_evenly(iterables, lengths=None):
1312 """
1313 Interleave multiple iterables so that their elements are evenly distributed
1314 throughout the output sequence.
1316 >>> iterables = [1, 2, 3, 4, 5], ['a', 'b']
1317 >>> list(interleave_evenly(iterables))
1318 [1, 2, 'a', 3, 4, 'b', 5]
1320 >>> iterables = [[1, 2, 3], [4, 5], [6, 7, 8]]
1321 >>> list(interleave_evenly(iterables))
1322 [1, 6, 4, 2, 7, 3, 8, 5]
1324 This function requires iterables of known length. Iterables without
1325 ``__len__()`` can be used by manually specifying lengths with *lengths*:
1327 >>> from itertools import combinations, repeat
1328 >>> iterables = [combinations(range(4), 2), ['a', 'b', 'c']]
1329 >>> lengths = [4 * (4 - 1) // 2, 3]
1330 >>> list(interleave_evenly(iterables, lengths=lengths))
1331 [(0, 1), (0, 2), 'a', (0, 3), (1, 2), 'b', (1, 3), (2, 3), 'c']
1333 Based on Bresenham's algorithm.
1334 """
1335 if lengths is None:
1336 try:
1337 lengths = [len(it) for it in iterables]
1338 except TypeError:
1339 raise ValueError(
1340 'Iterable lengths could not be determined automatically. '
1341 'Specify them with the lengths keyword.'
1342 )
1343 elif len(iterables) != len(lengths):
1344 raise ValueError('Mismatching number of iterables and lengths.')
1346 dims = len(lengths)
1348 # sort iterables by length, descending
1349 lengths_permute = sorted(
1350 range(dims), key=lambda i: lengths[i], reverse=True
1351 )
1352 lengths_desc = [lengths[i] for i in lengths_permute]
1353 iters_desc = [iter(iterables[i]) for i in lengths_permute]
1355 # the longest iterable is the primary one (Bresenham: the longest
1356 # distance along an axis)
1357 delta_primary, deltas_secondary = lengths_desc[0], lengths_desc[1:]
1358 iter_primary, iters_secondary = iters_desc[0], iters_desc[1:]
1359 errors = [delta_primary // dims] * len(deltas_secondary)
1361 to_yield = sum(lengths)
1362 while to_yield:
1363 yield next(iter_primary)
1364 to_yield -= 1
1365 # update errors for each secondary iterable
1366 errors = [e - delta for e, delta in zip(errors, deltas_secondary)]
1368 # those iterables for which the error is negative are yielded
1369 # ("diagonal step" in Bresenham)
1370 for i, e_ in enumerate(errors):
1371 if e_ < 0:
1372 yield next(iters_secondary[i])
1373 to_yield -= 1
1374 errors[i] += delta_primary
1377def interleave_randomly(*iterables):
1378 """Repeatedly select one of the input *iterables* at random and yield the next
1379 item from it.
1381 >>> iterables = [1, 2, 3], 'abc', (True, False, None)
1382 >>> list(interleave_randomly(*iterables)) # doctest: +SKIP
1383 ['a', 'b', 1, 'c', True, False, None, 2, 3]
1385 The relative order of the items in each input iterable will preserved. Note the
1386 sequences of items with this property are not equally likely to be generated.
1388 """
1389 iterators = [iter(e) for e in iterables]
1390 while iterators:
1391 idx = randrange(len(iterators))
1392 try:
1393 yield next(iterators[idx])
1394 except StopIteration:
1395 # equivalent to `list.pop` but slightly faster
1396 iterators[idx] = iterators[-1]
1397 del iterators[-1]
1400def collapse(iterable, base_type=None, levels=None):
1401 """Flatten an iterable with multiple levels of nesting (e.g., a list of
1402 lists of tuples) into non-iterable types.
1404 >>> iterable = [(1, 2), ([3, 4], [[5], [6]])]
1405 >>> list(collapse(iterable))
1406 [1, 2, 3, 4, 5, 6]
1408 Binary and text strings are not considered iterable and
1409 will not be collapsed.
1411 To avoid collapsing other types, specify *base_type*:
1413 >>> iterable = ['ab', ('cd', 'ef'), ['gh', 'ij']]
1414 >>> list(collapse(iterable, base_type=tuple))
1415 ['ab', ('cd', 'ef'), 'gh', 'ij']
1417 Specify *levels* to stop flattening after a certain level:
1419 >>> iterable = [('a', ['b']), ('c', ['d'])]
1420 >>> list(collapse(iterable)) # Fully flattened
1421 ['a', 'b', 'c', 'd']
1422 >>> list(collapse(iterable, levels=1)) # Only one level flattened
1423 ['a', ['b'], 'c', ['d']]
1425 """
1426 stack = deque()
1427 # Add our first node group, treat the iterable as a single node
1428 stack.appendleft((0, repeat(iterable, 1)))
1430 while stack:
1431 node_group = stack.popleft()
1432 level, nodes = node_group
1434 # Check if beyond max level
1435 if levels is not None and level > levels:
1436 yield from nodes
1437 continue
1439 for node in nodes:
1440 # Check if done iterating
1441 if isinstance(node, (str, bytes)) or (
1442 (base_type is not None) and isinstance(node, base_type)
1443 ):
1444 yield node
1445 # Otherwise try to create child nodes
1446 else:
1447 try:
1448 tree = iter(node)
1449 except TypeError:
1450 yield node
1451 else:
1452 # Save our current location
1453 stack.appendleft(node_group)
1454 # Append the new child node
1455 stack.appendleft((level + 1, tree))
1456 # Break to process child node
1457 break
1460def side_effect(func, iterable, chunk_size=None, before=None, after=None):
1461 """Invoke *func* on each item in *iterable* (or on each *chunk_size* group
1462 of items) before yielding the item.
1464 `func` must be a function that takes a single argument. Its return value
1465 will be discarded.
1467 *before* and *after* are optional functions that take no arguments. They
1468 will be executed before iteration starts and after it ends, respectively.
1470 `side_effect` can be used for logging, updating progress bars, or anything
1471 that is not functionally "pure."
1473 Emitting a status message:
1475 >>> from more_itertools import consume
1476 >>> func = lambda item: print('Received {}'.format(item))
1477 >>> consume(side_effect(func, range(2)))
1478 Received 0
1479 Received 1
1481 Operating on chunks of items:
1483 >>> pair_sums = []
1484 >>> func = lambda chunk: pair_sums.append(sum(chunk))
1485 >>> list(side_effect(func, [0, 1, 2, 3, 4, 5], 2))
1486 [0, 1, 2, 3, 4, 5]
1487 >>> list(pair_sums)
1488 [1, 5, 9]
1490 Writing to a file-like object:
1492 >>> from io import StringIO
1493 >>> from more_itertools import consume
1494 >>> f = StringIO()
1495 >>> func = lambda x: print(x, file=f)
1496 >>> before = lambda: print(u'HEADER', file=f)
1497 >>> after = f.close
1498 >>> it = [u'a', u'b', u'c']
1499 >>> consume(side_effect(func, it, before=before, after=after))
1500 >>> f.closed
1501 True
1503 """
1504 try:
1505 if before is not None:
1506 before()
1508 if chunk_size is None:
1509 for item in iterable:
1510 func(item)
1511 yield item
1512 else:
1513 for chunk in chunked(iterable, chunk_size):
1514 func(chunk)
1515 yield from chunk
1516 finally:
1517 if after is not None:
1518 after()
1521def sliced(seq, n, strict=False):
1522 """Yield slices of length *n* from the sequence *seq*.
1524 >>> list(sliced((1, 2, 3, 4, 5, 6), 3))
1525 [(1, 2, 3), (4, 5, 6)]
1527 By the default, the last yielded slice will have fewer than *n* elements
1528 if the length of *seq* is not divisible by *n*:
1530 >>> list(sliced((1, 2, 3, 4, 5, 6, 7, 8), 3))
1531 [(1, 2, 3), (4, 5, 6), (7, 8)]
1533 If the length of *seq* is not divisible by *n* and *strict* is
1534 ``True``, then ``ValueError`` will be raised before the last
1535 slice is yielded.
1537 This function will only work for iterables that support slicing.
1538 For non-sliceable iterables, see :func:`chunked`.
1540 """
1541 iterator = takewhile(len, (seq[i : i + n] for i in count(0, n)))
1542 if strict:
1544 def ret():
1545 for _slice in iterator:
1546 if len(_slice) != n:
1547 raise ValueError("seq is not divisible by n.")
1548 yield _slice
1550 return ret()
1551 else:
1552 return iterator
1555def split_at(iterable, pred, maxsplit=-1, keep_separator=False):
1556 """Yield lists of items from *iterable*, where each list is delimited by
1557 an item where callable *pred* returns ``True``.
1559 >>> list(split_at('abcdcba', lambda x: x == 'b'))
1560 [['a'], ['c', 'd', 'c'], ['a']]
1562 >>> list(split_at(range(10), lambda n: n % 2 == 1))
1563 [[0], [2], [4], [6], [8], []]
1565 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1566 then there is no limit on the number of splits:
1568 >>> list(split_at(range(10), lambda n: n % 2 == 1, maxsplit=2))
1569 [[0], [2], [4, 5, 6, 7, 8, 9]]
1571 By default, the delimiting items are not included in the output.
1572 To include them, set *keep_separator* to ``True``.
1574 >>> list(split_at('abcdcba', lambda x: x == 'b', keep_separator=True))
1575 [['a'], ['b'], ['c', 'd', 'c'], ['b'], ['a']]
1577 """
1578 if maxsplit == 0:
1579 yield list(iterable)
1580 return
1582 buf = []
1583 it = iter(iterable)
1584 for item in it:
1585 if pred(item):
1586 yield buf
1587 if keep_separator:
1588 yield [item]
1589 if maxsplit == 1:
1590 yield list(it)
1591 return
1592 buf = []
1593 maxsplit -= 1
1594 else:
1595 buf.append(item)
1596 yield buf
1599def split_before(iterable, pred, maxsplit=-1):
1600 """Yield lists of items from *iterable*, where each list ends just before
1601 an item for which callable *pred* returns ``True``:
1603 >>> list(split_before('OneTwo', lambda s: s.isupper()))
1604 [['O', 'n', 'e'], ['T', 'w', 'o']]
1606 >>> list(split_before(range(10), lambda n: n % 3 == 0))
1607 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]
1609 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1610 then there is no limit on the number of splits:
1612 >>> list(split_before(range(10), lambda n: n % 3 == 0, maxsplit=2))
1613 [[0, 1, 2], [3, 4, 5], [6, 7, 8, 9]]
1614 """
1615 if maxsplit == 0:
1616 yield list(iterable)
1617 return
1619 buf = []
1620 it = iter(iterable)
1621 for item in it:
1622 if pred(item) and buf:
1623 yield buf
1624 if maxsplit == 1:
1625 yield [item, *it]
1626 return
1627 buf = []
1628 maxsplit -= 1
1629 buf.append(item)
1630 if buf:
1631 yield buf
1634def split_after(iterable, pred, maxsplit=-1):
1635 """Yield lists of items from *iterable*, where each list ends with an
1636 item where callable *pred* returns ``True``:
1638 >>> list(split_after('one1two2', lambda s: s.isdigit()))
1639 [['o', 'n', 'e', '1'], ['t', 'w', 'o', '2']]
1641 >>> list(split_after(range(10), lambda n: n % 3 == 0))
1642 [[0], [1, 2, 3], [4, 5, 6], [7, 8, 9]]
1644 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1645 then there is no limit on the number of splits:
1647 >>> list(split_after(range(10), lambda n: n % 3 == 0, maxsplit=2))
1648 [[0], [1, 2, 3], [4, 5, 6, 7, 8, 9]]
1650 """
1651 if maxsplit == 0:
1652 yield list(iterable)
1653 return
1655 buf = []
1656 it = iter(iterable)
1657 for item in it:
1658 buf.append(item)
1659 if pred(item) and buf:
1660 yield buf
1661 if maxsplit == 1:
1662 buf = list(it)
1663 if buf:
1664 yield buf
1665 return
1666 buf = []
1667 maxsplit -= 1
1668 if buf:
1669 yield buf
1672def split_when(iterable, pred, maxsplit=-1):
1673 """Split *iterable* into pieces based on the output of *pred*.
1674 *pred* should be a function that takes successive pairs of items and
1675 returns ``True`` if the iterable should be split in between them.
1677 For example, to find runs of increasing numbers, split the iterable when
1678 element ``i`` is larger than element ``i + 1``:
1680 >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2], lambda x, y: x > y))
1681 [[1, 2, 3, 3], [2, 5], [2, 4], [2]]
1683 At most *maxsplit* splits are done. If *maxsplit* is not specified or -1,
1684 then there is no limit on the number of splits:
1686 >>> list(split_when([1, 2, 3, 3, 2, 5, 2, 4, 2],
1687 ... lambda x, y: x > y, maxsplit=2))
1688 [[1, 2, 3, 3], [2, 5], [2, 4, 2]]
1690 """
1691 if maxsplit == 0:
1692 yield list(iterable)
1693 return
1695 it = iter(iterable)
1696 try:
1697 cur_item = next(it)
1698 except StopIteration:
1699 return
1701 buf = [cur_item]
1702 for next_item in it:
1703 if pred(cur_item, next_item):
1704 yield buf
1705 if maxsplit == 1:
1706 yield [next_item, *it]
1707 return
1708 buf = []
1709 maxsplit -= 1
1711 buf.append(next_item)
1712 cur_item = next_item
1714 yield buf
1717def split_into(iterable, sizes):
1718 """Yield a list of sequential items from *iterable* of length 'n' for each
1719 integer 'n' in *sizes*.
1721 >>> list(split_into([1,2,3,4,5,6], [1,2,3]))
1722 [[1], [2, 3], [4, 5, 6]]
1724 If the sum of *sizes* is smaller than the length of *iterable*, then the
1725 remaining items of *iterable* will not be returned.
1727 >>> list(split_into([1,2,3,4,5,6], [2,3]))
1728 [[1, 2], [3, 4, 5]]
1730 If the sum of *sizes* is larger than the length of *iterable*, fewer items
1731 will be returned in the iteration that overruns the *iterable* and further
1732 lists will be empty:
1734 >>> list(split_into([1,2,3,4], [1,2,3,4]))
1735 [[1], [2, 3], [4], []]
1737 When a ``None`` object is encountered in *sizes*, the returned list will
1738 contain items up to the end of *iterable* the same way that
1739 :func:`itertools.slice` does:
1741 >>> list(split_into([1,2,3,4,5,6,7,8,9,0], [2,3,None]))
1742 [[1, 2], [3, 4, 5], [6, 7, 8, 9, 0]]
1744 :func:`split_into` can be useful for grouping a series of items where the
1745 sizes of the groups are not uniform. An example would be where in a row
1746 from a table, multiple columns represent elements of the same feature
1747 (e.g. a point represented by x,y,z) but, the format is not the same for
1748 all columns.
1749 """
1750 # convert the iterable argument into an iterator so its contents can
1751 # be consumed by islice in case it is a generator
1752 it = iter(iterable)
1754 for size in sizes:
1755 if size is None:
1756 yield list(it)
1757 return
1758 else:
1759 yield list(islice(it, size))
1762def padded(iterable, fillvalue=None, n=None, next_multiple=False):
1763 """Yield the elements from *iterable*, followed by *fillvalue*, such that
1764 at least *n* items are emitted.
1766 >>> list(padded([1, 2, 3], '?', 5))
1767 [1, 2, 3, '?', '?']
1769 If *next_multiple* is ``True``, *fillvalue* will be emitted until the
1770 number of items emitted is a multiple of *n*:
1772 >>> list(padded([1, 2, 3, 4], n=3, next_multiple=True))
1773 [1, 2, 3, 4, None, None]
1775 If *n* is ``None``, *fillvalue* will be emitted indefinitely.
1777 To create an *iterable* of exactly size *n*, you can truncate with
1778 :func:`islice`.
1780 >>> list(islice(padded([1, 2, 3], '?'), 5))
1781 [1, 2, 3, '?', '?']
1782 >>> list(islice(padded([1, 2, 3, 4, 5, 6, 7, 8], '?'), 5))
1783 [1, 2, 3, 4, 5]
1785 """
1786 iterator = iter(iterable)
1787 iterator_with_repeat = chain(iterator, repeat(fillvalue))
1789 if n is None:
1790 return iterator_with_repeat
1791 elif n < 1:
1792 raise ValueError('n must be at least 1')
1793 elif next_multiple:
1795 def slice_generator():
1796 for first in iterator:
1797 yield (first,)
1798 yield islice(iterator_with_repeat, n - 1)
1800 # While elements exist produce slices of size n
1801 return chain.from_iterable(slice_generator())
1802 else:
1803 # Ensure the first batch is at least size n then iterate
1804 return chain(islice(iterator_with_repeat, n), iterator)
1807def repeat_each(iterable, n=2):
1808 """Repeat each element in *iterable* *n* times.
1810 >>> list(repeat_each('ABC', 3))
1811 ['A', 'A', 'A', 'B', 'B', 'B', 'C', 'C', 'C']
1812 """
1813 return chain.from_iterable(map(repeat, iterable, repeat(n)))
1816def repeat_last(iterable, default=None):
1817 """After the *iterable* is exhausted, keep yielding its last element.
1819 >>> list(islice(repeat_last(range(3)), 5))
1820 [0, 1, 2, 2, 2]
1822 If the iterable is empty, yield *default* forever::
1824 >>> list(islice(repeat_last(range(0), 42), 5))
1825 [42, 42, 42, 42, 42]
1827 """
1828 item = _marker
1829 for item in iterable:
1830 yield item
1831 final = default if item is _marker else item
1832 yield from repeat(final)
1835def distribute(n, iterable):
1836 """Distribute the items from *iterable* among *n* smaller iterables.
1838 >>> group_1, group_2 = distribute(2, [1, 2, 3, 4, 5, 6])
1839 >>> list(group_1)
1840 [1, 3, 5]
1841 >>> list(group_2)
1842 [2, 4, 6]
1844 If the length of *iterable* is not evenly divisible by *n*, then the
1845 length of the returned iterables will not be identical:
1847 >>> children = distribute(3, [1, 2, 3, 4, 5, 6, 7])
1848 >>> [list(c) for c in children]
1849 [[1, 4, 7], [2, 5], [3, 6]]
1851 If the length of *iterable* is smaller than *n*, then the last returned
1852 iterables will be empty:
1854 >>> children = distribute(5, [1, 2, 3])
1855 >>> [list(c) for c in children]
1856 [[1], [2], [3], [], []]
1858 This function uses :func:`itertools.tee` and may require significant
1859 storage.
1861 If you need the order items in the smaller iterables to match the
1862 original iterable, see :func:`divide`.
1864 """
1865 if n < 1:
1866 raise ValueError('n must be at least 1')
1868 children = tee(iterable, n)
1869 return [islice(it, index, None, n) for index, it in enumerate(children)]
1872def stagger(iterable, offsets=(-1, 0, 1), longest=False, fillvalue=None):
1873 """Yield tuples whose elements are offset from *iterable*.
1874 The amount by which the `i`-th item in each tuple is offset is given by
1875 the `i`-th item in *offsets*.
1877 >>> list(stagger([0, 1, 2, 3]))
1878 [(None, 0, 1), (0, 1, 2), (1, 2, 3)]
1879 >>> list(stagger(range(8), offsets=(0, 2, 4)))
1880 [(0, 2, 4), (1, 3, 5), (2, 4, 6), (3, 5, 7)]
1882 By default, the sequence will end when the final element of a tuple is the
1883 last item in the iterable. To continue until the first element of a tuple
1884 is the last item in the iterable, set *longest* to ``True``::
1886 >>> list(stagger([0, 1, 2, 3], longest=True))
1887 [(None, 0, 1), (0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]
1889 By default, ``None`` will be used to replace offsets beyond the end of the
1890 sequence. Specify *fillvalue* to use some other value.
1892 """
1893 children = tee(iterable, len(offsets))
1895 return zip_offset(
1896 *children, offsets=offsets, longest=longest, fillvalue=fillvalue
1897 )
1900def zip_offset(*iterables, offsets, longest=False, fillvalue=None):
1901 """``zip`` the input *iterables* together, but offset the `i`-th iterable
1902 by the `i`-th item in *offsets*.
1904 >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1)))
1905 [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e')]
1907 This can be used as a lightweight alternative to SciPy or pandas to analyze
1908 data sets in which some series have a lead or lag relationship.
1910 By default, the sequence will end when the shortest iterable is exhausted.
1911 To continue until the longest iterable is exhausted, set *longest* to
1912 ``True``.
1914 >>> list(zip_offset('0123', 'abcdef', offsets=(0, 1), longest=True))
1915 [('0', 'b'), ('1', 'c'), ('2', 'd'), ('3', 'e'), (None, 'f')]
1917 By default, ``None`` will be used to replace offsets beyond the end of the
1918 sequence. Specify *fillvalue* to use some other value.
1920 """
1921 if len(iterables) != len(offsets):
1922 raise ValueError("Number of iterables and offsets didn't match")
1924 staggered = []
1925 for it, n in zip(iterables, offsets):
1926 if n < 0:
1927 staggered.append(chain(repeat(fillvalue, -n), it))
1928 elif n > 0:
1929 staggered.append(islice(it, n, None))
1930 else:
1931 staggered.append(it)
1933 if longest:
1934 return zip_longest(*staggered, fillvalue=fillvalue)
1936 return zip(*staggered)
1939def sort_together(
1940 iterables, key_list=(0,), key=None, reverse=False, strict=False
1941):
1942 """Return the input iterables sorted together, with *key_list* as the
1943 priority for sorting. All iterables are trimmed to the length of the
1944 shortest one.
1946 This can be used like the sorting function in a spreadsheet. If each
1947 iterable represents a column of data, the key list determines which
1948 columns are used for sorting.
1950 By default, all iterables are sorted using the ``0``-th iterable::
1952 >>> iterables = [(4, 3, 2, 1), ('a', 'b', 'c', 'd')]
1953 >>> sort_together(iterables)
1954 [(1, 2, 3, 4), ('d', 'c', 'b', 'a')]
1956 Set a different key list to sort according to another iterable.
1957 Specifying multiple keys dictates how ties are broken::
1959 >>> iterables = [(3, 1, 2), (0, 1, 0), ('c', 'b', 'a')]
1960 >>> sort_together(iterables, key_list=(1, 2))
1961 [(2, 3, 1), (0, 0, 1), ('a', 'c', 'b')]
1963 To sort by a function of the elements of the iterable, pass a *key*
1964 function. Its arguments are the elements of the iterables corresponding to
1965 the key list::
1967 >>> names = ('a', 'b', 'c')
1968 >>> lengths = (1, 2, 3)
1969 >>> widths = (5, 2, 1)
1970 >>> def area(length, width):
1971 ... return length * width
1972 >>> sort_together([names, lengths, widths], key_list=(1, 2), key=area)
1973 [('c', 'b', 'a'), (3, 2, 1), (1, 2, 5)]
1975 Set *reverse* to ``True`` to sort in descending order.
1977 >>> sort_together([(1, 2, 3), ('c', 'b', 'a')], reverse=True)
1978 [(3, 2, 1), ('a', 'b', 'c')]
1980 If the *strict* keyword argument is ``True``, then
1981 ``ValueError`` will be raised if any of the iterables have
1982 different lengths.
1984 """
1985 if key is None:
1986 # if there is no key function, the key argument to sorted is an
1987 # itemgetter
1988 key_argument = itemgetter(*key_list)
1989 else:
1990 # if there is a key function, call it with the items at the offsets
1991 # specified by the key function as arguments
1992 key_list = list(key_list)
1993 if len(key_list) == 1:
1994 # if key_list contains a single item, pass the item at that offset
1995 # as the only argument to the key function
1996 key_offset = key_list[0]
1997 key_argument = lambda zipped_items: key(zipped_items[key_offset])
1998 else:
1999 # if key_list contains multiple items, use itemgetter to return a
2000 # tuple of items, which we pass as *args to the key function
2001 get_key_items = itemgetter(*key_list)
2002 key_argument = lambda zipped_items: key(
2003 *get_key_items(zipped_items)
2004 )
2006 transposed = zip(*iterables, strict=strict)
2007 reordered = sorted(transposed, key=key_argument, reverse=reverse)
2008 untransposed = zip(*reordered, strict=strict)
2009 return list(untransposed)
2012def unzip(iterable):
2013 """The inverse of :func:`zip`, this function disaggregates the elements
2014 of the zipped *iterable*.
2016 The ``i``-th iterable contains the ``i``-th element from each element
2017 of the zipped iterable. The first element is used to determine the
2018 length of the remaining elements.
2020 >>> iterable = [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
2021 >>> letters, numbers = unzip(iterable)
2022 >>> list(letters)
2023 ['a', 'b', 'c', 'd']
2024 >>> list(numbers)
2025 [1, 2, 3, 4]
2027 This is similar to using ``zip(*iterable)``, but it avoids reading
2028 *iterable* into memory. Note, however, that this function uses
2029 :func:`itertools.tee` and thus may require significant storage.
2031 """
2032 head, iterable = spy(iterable)
2033 if not head:
2034 # empty iterable, e.g. zip([], [], [])
2035 return ()
2036 # spy returns a one-length iterable as head
2037 head = head[0]
2038 iterables = tee(iterable, len(head))
2040 # If we have an iterable like iter([(1, 2, 3), (4, 5), (6,)]),
2041 # the second unzipped iterable fails at the third tuple since
2042 # it tries to access (6,)[1].
2043 # Same with the third unzipped iterable and the second tuple.
2044 # To support these "improperly zipped" iterables, we suppress
2045 # the IndexError, which just stops the unzipped iterables at
2046 # first length mismatch.
2047 return tuple(
2048 iter_suppress(map(itemgetter(i), it), IndexError)
2049 for i, it in enumerate(iterables)
2050 )
2053def divide(n, iterable):
2054 """Divide the elements from *iterable* into *n* parts, maintaining
2055 order.
2057 >>> group_1, group_2 = divide(2, [1, 2, 3, 4, 5, 6])
2058 >>> list(group_1)
2059 [1, 2, 3]
2060 >>> list(group_2)
2061 [4, 5, 6]
2063 If the length of *iterable* is not evenly divisible by *n*, then the
2064 length of the returned iterables will not be identical:
2066 >>> children = divide(3, [1, 2, 3, 4, 5, 6, 7])
2067 >>> [list(c) for c in children]
2068 [[1, 2, 3], [4, 5], [6, 7]]
2070 If the length of the iterable is smaller than n, then the last returned
2071 iterables will be empty:
2073 >>> children = divide(5, [1, 2, 3])
2074 >>> [list(c) for c in children]
2075 [[1], [2], [3], [], []]
2077 This function will exhaust the iterable before returning.
2078 If order is not important, see :func:`distribute`, which does not first
2079 pull the iterable into memory.
2081 """
2082 if n < 1:
2083 raise ValueError('n must be at least 1')
2085 try:
2086 iterable[:0]
2087 except TypeError:
2088 seq = tuple(iterable)
2089 else:
2090 seq = iterable
2092 q, r = divmod(len(seq), n)
2094 ret = []
2095 stop = 0
2096 for i in range(1, n + 1):
2097 start = stop
2098 stop += q + 1 if i <= r else q
2099 ret.append(iter(seq[start:stop]))
2101 return ret
2104def always_iterable(obj, base_type=(str, bytes)):
2105 """If *obj* is iterable, return an iterator over its items::
2107 >>> obj = (1, 2, 3)
2108 >>> list(always_iterable(obj))
2109 [1, 2, 3]
2111 If *obj* is not iterable, return a one-item iterable containing *obj*::
2113 >>> obj = 1
2114 >>> list(always_iterable(obj))
2115 [1]
2117 If *obj* is ``None``, return an empty iterable:
2119 >>> obj = None
2120 >>> list(always_iterable(None))
2121 []
2123 By default, binary and text strings are not considered iterable::
2125 >>> obj = 'foo'
2126 >>> list(always_iterable(obj))
2127 ['foo']
2129 If *base_type* is set, objects for which ``isinstance(obj, base_type)``
2130 returns ``True`` won't be considered iterable.
2132 >>> obj = {'a': 1}
2133 >>> list(always_iterable(obj)) # Iterate over the dict's keys
2134 ['a']
2135 >>> list(always_iterable(obj, base_type=dict)) # Treat dicts as a unit
2136 [{'a': 1}]
2138 Set *base_type* to ``None`` to avoid any special handling and treat objects
2139 Python considers iterable as iterable:
2141 >>> obj = 'foo'
2142 >>> list(always_iterable(obj, base_type=None))
2143 ['f', 'o', 'o']
2144 """
2145 if obj is None:
2146 return iter(())
2148 if (base_type is not None) and isinstance(obj, base_type):
2149 return iter((obj,))
2151 try:
2152 return iter(obj)
2153 except TypeError:
2154 return iter((obj,))
2157def adjacent(predicate, iterable, distance=1):
2158 """Return an iterable over `(bool, item)` tuples where the `item` is
2159 drawn from *iterable* and the `bool` indicates whether
2160 that item satisfies the *predicate* or is adjacent to an item that does.
2162 For example, to find whether items are adjacent to a ``3``::
2164 >>> list(adjacent(lambda x: x == 3, range(6)))
2165 [(False, 0), (False, 1), (True, 2), (True, 3), (True, 4), (False, 5)]
2167 Set *distance* to change what counts as adjacent. For example, to find
2168 whether items are two places away from a ``3``:
2170 >>> list(adjacent(lambda x: x == 3, range(6), distance=2))
2171 [(False, 0), (True, 1), (True, 2), (True, 3), (True, 4), (True, 5)]
2173 This is useful for contextualizing the results of a search function.
2174 For example, a code comparison tool might want to identify lines that
2175 have changed, but also surrounding lines to give the viewer of the diff
2176 context.
2178 The predicate function will only be called once for each item in the
2179 iterable.
2181 See also :func:`groupby_transform`, which can be used with this function
2182 to group ranges of items with the same `bool` value.
2184 """
2185 # Allow distance=0 mainly for testing that it reproduces results with map()
2186 if distance < 0:
2187 raise ValueError('distance must be at least 0')
2189 i1, i2 = tee(iterable)
2190 padding = [False] * distance
2191 selected = chain(padding, map(predicate, i1), padding)
2192 adjacent_to_selected = map(any, windowed(selected, 2 * distance + 1))
2193 return zip(adjacent_to_selected, i2)
2196def groupby_transform(iterable, keyfunc=None, valuefunc=None, reducefunc=None):
2197 """An extension of :func:`itertools.groupby` that can apply transformations
2198 to the grouped data.
2200 * *keyfunc* is a function computing a key value for each item in *iterable*
2201 * *valuefunc* is a function that transforms the individual items from
2202 *iterable* after grouping
2203 * *reducefunc* is a function that transforms each group of items
2205 >>> iterable = 'aAAbBBcCC'
2206 >>> keyfunc = lambda k: k.upper()
2207 >>> valuefunc = lambda v: v.lower()
2208 >>> reducefunc = lambda g: ''.join(g)
2209 >>> list(groupby_transform(iterable, keyfunc, valuefunc, reducefunc))
2210 [('A', 'aaa'), ('B', 'bbb'), ('C', 'ccc')]
2212 Each optional argument defaults to an identity function if not specified.
2214 :func:`groupby_transform` is useful when grouping elements of an iterable
2215 using a separate iterable as the key. To do this, :func:`zip` the iterables
2216 and pass a *keyfunc* that extracts the first element and a *valuefunc*
2217 that extracts the second element::
2219 >>> from operator import itemgetter
2220 >>> keys = [0, 0, 1, 1, 1, 2, 2, 2, 3]
2221 >>> values = 'abcdefghi'
2222 >>> iterable = zip(keys, values)
2223 >>> grouper = groupby_transform(iterable, itemgetter(0), itemgetter(1))
2224 >>> [(k, ''.join(g)) for k, g in grouper]
2225 [(0, 'ab'), (1, 'cde'), (2, 'fgh'), (3, 'i')]
2227 Note that the order of items in the iterable is significant.
2228 Only adjacent items are grouped together, so if you don't want any
2229 duplicate groups, you should sort the iterable by the key function
2230 or consider :func:`bucket` or :func:`map_reduce`. :func:`map_reduce`
2231 consumes the iterable immediately and returns a dictionary, while
2232 :func:`bucket` does not.
2234 .. seealso:: :func:`bucket`, :func:`map_reduce`
2236 """
2237 ret = groupby(iterable, keyfunc)
2238 if valuefunc:
2239 ret = ((k, map(valuefunc, g)) for k, g in ret)
2240 if reducefunc:
2241 ret = ((k, reducefunc(g)) for k, g in ret)
2243 return ret
2246class numeric_range(Sequence):
2247 """An extension of the built-in ``range()`` function whose arguments can
2248 be any orderable numeric type.
2250 With only *stop* specified, *start* defaults to ``0`` and *step*
2251 defaults to ``1``. The output items will match the type of *stop*:
2253 >>> list(numeric_range(3.5))
2254 [0.0, 1.0, 2.0, 3.0]
2256 With only *start* and *stop* specified, *step* defaults to ``1``. The
2257 output items will match the type of *start*:
2259 >>> from decimal import Decimal
2260 >>> start = Decimal('2.1')
2261 >>> stop = Decimal('5.1')
2262 >>> list(numeric_range(start, stop))
2263 [Decimal('2.1'), Decimal('3.1'), Decimal('4.1')]
2265 With *start*, *stop*, and *step* specified the output items will match
2266 the type of ``start + step``:
2268 >>> from fractions import Fraction
2269 >>> start = Fraction(1, 2) # Start at 1/2
2270 >>> stop = Fraction(5, 2) # End at 5/2
2271 >>> step = Fraction(1, 2) # Count by 1/2
2272 >>> list(numeric_range(start, stop, step))
2273 [Fraction(1, 2), Fraction(1, 1), Fraction(3, 2), Fraction(2, 1)]
2275 If *step* is zero, ``ValueError`` is raised. Negative steps are supported:
2277 >>> list(numeric_range(3, -1, -1.0))
2278 [3.0, 2.0, 1.0, 0.0]
2280 Be aware of the limitations of floating-point numbers; the representation
2281 of the yielded numbers may be surprising.
2283 ``datetime.datetime`` objects can be used for *start* and *stop*, if *step*
2284 is a ``datetime.timedelta`` object:
2286 >>> import datetime
2287 >>> start = datetime.datetime(2019, 1, 1)
2288 >>> stop = datetime.datetime(2019, 1, 3)
2289 >>> step = datetime.timedelta(days=1)
2290 >>> items = iter(numeric_range(start, stop, step))
2291 >>> next(items)
2292 datetime.datetime(2019, 1, 1, 0, 0)
2293 >>> next(items)
2294 datetime.datetime(2019, 1, 2, 0, 0)
2296 """
2298 _EMPTY_HASH = hash(range(0, 0))
2300 def __init__(self, *args):
2301 argc = len(args)
2302 if argc == 1:
2303 (self._stop,) = args
2304 self._start = type(self._stop)(0)
2305 self._step = type(self._stop - self._start)(1)
2306 elif argc == 2:
2307 self._start, self._stop = args
2308 self._step = type(self._stop - self._start)(1)
2309 elif argc == 3:
2310 self._start, self._stop, self._step = args
2311 elif argc == 0:
2312 raise TypeError(
2313 f'numeric_range expected at least 1 argument, got {argc}'
2314 )
2315 else:
2316 raise TypeError(
2317 f'numeric_range expected at most 3 arguments, got {argc}'
2318 )
2320 self._zero = type(self._step)(0)
2321 if self._step == self._zero:
2322 raise ValueError('numeric_range() arg 3 must not be zero')
2323 self._growing = self._step > self._zero
2325 def __bool__(self):
2326 if self._growing:
2327 return self._start < self._stop
2328 else:
2329 return self._start > self._stop
2331 def __contains__(self, elem):
2332 if self._growing:
2333 if self._start <= elem < self._stop:
2334 return (elem - self._start) % self._step == self._zero
2335 else:
2336 if self._start >= elem > self._stop:
2337 return (self._start - elem) % (-self._step) == self._zero
2339 return False
2341 def __eq__(self, other):
2342 if isinstance(other, numeric_range):
2343 empty_self = not bool(self)
2344 empty_other = not bool(other)
2345 if empty_self or empty_other:
2346 return empty_self and empty_other # True if both empty
2347 else:
2348 return (
2349 self._start == other._start
2350 and self._step == other._step
2351 and self._get_by_index(-1) == other._get_by_index(-1)
2352 )
2353 else:
2354 return False
2356 def __getitem__(self, key):
2357 if isinstance(key, int):
2358 return self._get_by_index(key)
2359 elif isinstance(key, slice):
2360 start_idx, stop_idx, step_idx = key.indices(self._len)
2361 return numeric_range(
2362 self._start + start_idx * self._step,
2363 self._start + stop_idx * self._step,
2364 self._step * step_idx,
2365 )
2366 else:
2367 raise TypeError(
2368 'numeric range indices must be '
2369 f'integers or slices, not {type(key).__name__}'
2370 )
2372 def __hash__(self):
2373 if self:
2374 return hash((self._start, self._get_by_index(-1), self._step))
2375 else:
2376 return self._EMPTY_HASH
2378 def __iter__(self):
2379 values = (self._start + (n * self._step) for n in count())
2380 if self._growing:
2381 return takewhile(partial(gt, self._stop), values)
2382 else:
2383 return takewhile(partial(lt, self._stop), values)
2385 def __len__(self):
2386 return self._len
2388 @cached_property
2389 def _len(self):
2390 if self._growing:
2391 start = self._start
2392 stop = self._stop
2393 step = self._step
2394 else:
2395 start = self._stop
2396 stop = self._start
2397 step = -self._step
2398 distance = stop - start
2399 if distance <= self._zero:
2400 return 0
2401 else: # distance > 0 and step > 0: regular euclidean division
2402 q, r = divmod(distance, step)
2403 return int(q) + int(r != self._zero)
2405 def __reduce__(self):
2406 return numeric_range, (self._start, self._stop, self._step)
2408 def __repr__(self):
2409 if self._step == 1:
2410 return f"numeric_range({self._start!r}, {self._stop!r})"
2411 return (
2412 f"numeric_range({self._start!r}, {self._stop!r}, {self._step!r})"
2413 )
2415 def __reversed__(self):
2416 return iter(
2417 numeric_range(
2418 self._get_by_index(-1), self._start - self._step, -self._step
2419 )
2420 )
2422 def count(self, value):
2423 return int(value in self)
2425 def index(self, value):
2426 if self._growing:
2427 if self._start <= value < self._stop:
2428 q, r = divmod(value - self._start, self._step)
2429 if r == self._zero:
2430 return int(q)
2431 else:
2432 if self._start >= value > self._stop:
2433 q, r = divmod(self._start - value, -self._step)
2434 if r == self._zero:
2435 return int(q)
2437 raise ValueError(f"{value} is not in numeric range")
2439 def _get_by_index(self, i):
2440 if i < 0:
2441 i += self._len
2442 if i < 0 or i >= self._len:
2443 raise IndexError("numeric range object index out of range")
2444 return self._start + i * self._step
2447def count_cycle(iterable, n=None):
2448 """Cycle through the items from *iterable* up to *n* times, yielding
2449 the number of completed cycles along with each item. If *n* is omitted the
2450 process repeats indefinitely.
2452 >>> list(count_cycle('AB', 3))
2453 [(0, 'A'), (0, 'B'), (1, 'A'), (1, 'B'), (2, 'A'), (2, 'B')]
2455 """
2456 if n is not None:
2457 return product(range(n), iterable)
2458 seq = tuple(iterable)
2459 if not seq:
2460 return iter(())
2461 counter = count() if n is None else range(n)
2462 return zip(repeat_each(counter, len(seq)), cycle(seq))
2465def mark_ends(iterable):
2466 """Yield 3-tuples of the form ``(is_first, is_last, item)``.
2468 >>> list(mark_ends('ABC'))
2469 [(True, False, 'A'), (False, False, 'B'), (False, True, 'C')]
2471 Use this when looping over an iterable to take special action on its first
2472 and/or last items:
2474 >>> iterable = ['Header', 100, 200, 'Footer']
2475 >>> total = 0
2476 >>> for is_first, is_last, item in mark_ends(iterable):
2477 ... if is_first:
2478 ... continue # Skip the header
2479 ... if is_last:
2480 ... continue # Skip the footer
2481 ... total += item
2482 >>> print(total)
2483 300
2484 """
2485 it = iter(iterable)
2486 for a in it:
2487 first = True
2488 for b in it:
2489 yield first, False, a
2490 a = b
2491 first = False
2492 yield first, True, a
2495def locate(iterable, pred=bool, window_size=None):
2496 """Yield the index of each item in *iterable* for which *pred* returns
2497 ``True``.
2499 *pred* defaults to :func:`bool`, which will select truthy items:
2501 >>> list(locate([0, 1, 1, 0, 1, 0, 0]))
2502 [1, 2, 4]
2504 Set *pred* to a custom function to, e.g., find the indexes for a particular
2505 item.
2507 >>> list(locate(['a', 'b', 'c', 'b'], lambda x: x == 'b'))
2508 [1, 3]
2510 If *window_size* is given, then the *pred* function will be called with
2511 that many items. This enables searching for sub-sequences.
2512 *pred* may receive fewer than *window_size* arguments at the end of
2513 the iterable and should be able to handle this.
2515 >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]
2516 >>> pred = lambda *args: args == (1, 2, 3)
2517 >>> list(locate(iterable, pred=pred, window_size=3))
2518 [1, 5, 9]
2520 Use with :func:`seekable` to find indexes and then retrieve the associated
2521 items:
2523 >>> from itertools import count
2524 >>> from more_itertools import seekable
2525 >>> source = (3 * n + 1 if (n % 2) else n // 2 for n in count())
2526 >>> it = seekable(source)
2527 >>> pred = lambda x: x > 100
2528 >>> indexes = locate(it, pred=pred)
2529 >>> i = next(indexes)
2530 >>> it.seek(i)
2531 >>> next(it)
2532 106
2534 """
2535 if window_size is None:
2536 return compress(count(), map(pred, iterable))
2538 if window_size < 1:
2539 raise ValueError('window size must be at least 1')
2541 it = windowed(iterable, window_size, fillvalue=_marker)
2542 return compress(
2543 count(),
2544 (pred(*(x for x in w if x is not _marker)) for w in it),
2545 )
2548def longest_common_prefix(iterables):
2549 """Yield elements of the longest common prefix among given *iterables*.
2551 >>> ''.join(longest_common_prefix(['abcd', 'abc', 'abf']))
2552 'ab'
2554 """
2555 return (c[0] for c in takewhile(all_equal, zip(*iterables)))
2558def lstrip(iterable, pred):
2559 """Yield the items from *iterable*, but strip any from the beginning
2560 for which *pred* returns ``True``.
2562 For example, to remove a set of items from the start of an iterable:
2564 >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
2565 >>> pred = lambda x: x in {None, False, ''}
2566 >>> list(lstrip(iterable, pred))
2567 [1, 2, None, 3, False, None]
2569 This function is analogous to to :func:`str.lstrip`, and is essentially
2570 an wrapper for :func:`itertools.dropwhile`.
2572 """
2573 return dropwhile(pred, iterable)
2576def rstrip(iterable, pred):
2577 """Yield the items from *iterable*, but strip any from the end
2578 for which *pred* returns ``True``.
2580 For example, to remove a set of items from the end of an iterable:
2582 >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
2583 >>> pred = lambda x: x in {None, False, ''}
2584 >>> list(rstrip(iterable, pred))
2585 [None, False, None, 1, 2, None, 3]
2587 This function is analogous to :func:`str.rstrip`.
2589 """
2590 cache = []
2591 cache_append = cache.append
2592 cache_clear = cache.clear
2593 for x in iterable:
2594 if pred(x):
2595 cache_append(x)
2596 else:
2597 yield from cache
2598 cache_clear()
2599 yield x
2602def strip(iterable, pred):
2603 """Yield the items from *iterable*, but strip any from the
2604 beginning and end for which *pred* returns ``True``.
2606 For example, to remove a set of items from both ends of an iterable:
2608 >>> iterable = (None, False, None, 1, 2, None, 3, False, None)
2609 >>> pred = lambda x: x in {None, False, ''}
2610 >>> list(strip(iterable, pred))
2611 [1, 2, None, 3]
2613 This function is analogous to :func:`str.strip`.
2615 """
2616 return rstrip(lstrip(iterable, pred), pred)
2619class islice_extended:
2620 """An extension of :func:`itertools.islice` that supports negative values
2621 for *stop*, *start*, and *step*.
2623 >>> iterator = iter('abcdefgh')
2624 >>> list(islice_extended(iterator, -4, -1))
2625 ['e', 'f', 'g']
2627 Slices with negative values require some caching of *iterable*, but this
2628 function takes care to minimize the amount of memory required.
2630 For example, you can use a negative step with an infinite iterator:
2632 >>> from itertools import count
2633 >>> list(islice_extended(count(), 110, 99, -2))
2634 [110, 108, 106, 104, 102, 100]
2636 You can also use slice notation directly:
2638 >>> iterator = map(str, count())
2639 >>> it = islice_extended(iterator)[10:20:2]
2640 >>> list(it)
2641 ['10', '12', '14', '16', '18']
2643 """
2645 def __init__(self, iterable, *args):
2646 it = iter(iterable)
2647 if args:
2648 self._iterator = _islice_helper(it, slice(*args))
2649 else:
2650 self._iterator = it
2652 def __iter__(self):
2653 return self
2655 def __next__(self):
2656 return next(self._iterator)
2658 def __getitem__(self, key):
2659 if isinstance(key, slice):
2660 return islice_extended(_islice_helper(self._iterator, key))
2662 raise TypeError('islice_extended.__getitem__ argument must be a slice')
2665def _islice_helper(it, s):
2666 start = s.start
2667 stop = s.stop
2668 if s.step == 0:
2669 raise ValueError('step argument must be a non-zero integer or None.')
2670 step = s.step or 1
2672 if step > 0:
2673 start = 0 if (start is None) else start
2675 if start < 0:
2676 # Consume all but the last -start items
2677 cache = deque(enumerate(it, 1), maxlen=-start)
2678 len_iter = cache[-1][0] if cache else 0
2680 # Adjust start to be positive
2681 i = max(len_iter + start, 0)
2683 # Adjust stop to be positive
2684 if stop is None:
2685 j = len_iter
2686 elif stop >= 0:
2687 j = min(stop, len_iter)
2688 else:
2689 j = max(len_iter + stop, 0)
2691 # Slice the cache
2692 n = j - i
2693 if n <= 0:
2694 return
2696 for index in range(n):
2697 if index % step == 0:
2698 # pop and yield the item.
2699 # We don't want to use an intermediate variable
2700 # it would extend the lifetime of the current item
2701 yield cache.popleft()[1]
2702 else:
2703 # just pop and discard the item
2704 cache.popleft()
2705 elif (stop is not None) and (stop < 0):
2706 # Advance to the start position
2707 next(islice(it, start, start), None)
2709 # When stop is negative, we have to carry -stop items while
2710 # iterating
2711 cache = deque(islice(it, -stop), maxlen=-stop)
2713 for index, item in enumerate(it):
2714 if index % step == 0:
2715 # pop and yield the item.
2716 # We don't want to use an intermediate variable
2717 # it would extend the lifetime of the current item
2718 yield cache.popleft()
2719 else:
2720 # just pop and discard the item
2721 cache.popleft()
2722 cache.append(item)
2723 else:
2724 # When both start and stop are positive we have the normal case
2725 yield from islice(it, start, stop, step)
2726 else:
2727 start = -1 if (start is None) else start
2729 if (stop is not None) and (stop < 0):
2730 # Consume all but the last items
2731 n = -stop - 1
2732 cache = deque(enumerate(it, 1), maxlen=n)
2733 len_iter = cache[-1][0] if cache else 0
2735 # If start and stop are both negative they are comparable and
2736 # we can just slice. Otherwise we can adjust start to be negative
2737 # and then slice.
2738 if start < 0:
2739 i, j = start, stop
2740 else:
2741 i, j = min(start - len_iter, -1), None
2743 for index, item in list(cache)[i:j:step]:
2744 yield item
2745 else:
2746 # Advance to the stop position
2747 if stop is not None:
2748 m = stop + 1
2749 next(islice(it, m, m), None)
2751 # stop is positive, so if start is negative they are not comparable
2752 # and we need the rest of the items.
2753 if start < 0:
2754 i = start
2755 n = None
2756 # stop is None and start is positive, so we just need items up to
2757 # the start index.
2758 elif stop is None:
2759 i = None
2760 n = start + 1
2761 # Both stop and start are positive, so they are comparable.
2762 else:
2763 i = None
2764 n = start - stop
2765 if n <= 0:
2766 return
2768 cache = list(islice(it, n))
2770 yield from cache[i::step]
2773def always_reversible(iterable):
2774 """An extension of :func:`reversed` that supports all iterables, not
2775 just those which implement the ``Reversible`` or ``Sequence`` protocols.
2777 >>> print(*always_reversible(x for x in range(3)))
2778 2 1 0
2780 If the iterable is already reversible, this function returns the
2781 result of :func:`reversed()`. If the iterable is not reversible,
2782 this function will cache the remaining items in the iterable and
2783 yield them in reverse order, which may require significant storage.
2784 """
2785 try:
2786 return reversed(iterable)
2787 except TypeError:
2788 return reversed(list(iterable))
2791def consecutive_groups(iterable, ordering=None):
2792 """Yield groups of consecutive items using :func:`itertools.groupby`.
2793 The *ordering* function determines whether two items are adjacent by
2794 returning their position.
2796 By default, the ordering function is the identity function. This is
2797 suitable for finding runs of numbers:
2799 >>> iterable = [1, 10, 11, 12, 20, 30, 31, 32, 33, 40]
2800 >>> for group in consecutive_groups(iterable):
2801 ... print(list(group))
2802 [1]
2803 [10, 11, 12]
2804 [20]
2805 [30, 31, 32, 33]
2806 [40]
2808 To find runs of adjacent letters, apply :func:`ord` function
2809 to convert letters to ordinals.
2811 >>> iterable = 'abcdfgilmnop'
2812 >>> ordering = ord
2813 >>> for group in consecutive_groups(iterable, ordering):
2814 ... print(list(group))
2815 ['a', 'b', 'c', 'd']
2816 ['f', 'g']
2817 ['i']
2818 ['l', 'm', 'n', 'o', 'p']
2820 Each group of consecutive items is an iterator that shares it source with
2821 *iterable*. When an an output group is advanced, the previous group is
2822 no longer available unless its elements are copied (e.g., into a ``list``).
2824 >>> iterable = [1, 2, 11, 12, 21, 22]
2825 >>> saved_groups = []
2826 >>> for group in consecutive_groups(iterable):
2827 ... saved_groups.append(list(group)) # Copy group elements
2828 >>> saved_groups
2829 [[1, 2], [11, 12], [21, 22]]
2831 """
2832 if ordering is None:
2833 key = lambda x: x[0] - x[1]
2834 else:
2835 key = lambda x: x[0] - ordering(x[1])
2837 for k, g in groupby(enumerate(iterable), key=key):
2838 yield map(itemgetter(1), g)
2841def difference(iterable, func=sub, *, initial=None):
2842 """This function is the inverse of :func:`itertools.accumulate`. By default
2843 it will compute the first difference of *iterable* using
2844 :func:`operator.sub`:
2846 >>> from itertools import accumulate
2847 >>> iterable = accumulate([0, 1, 2, 3, 4]) # produces 0, 1, 3, 6, 10
2848 >>> list(difference(iterable))
2849 [0, 1, 2, 3, 4]
2851 *func* defaults to :func:`operator.sub`, but other functions can be
2852 specified. They will be applied as follows::
2854 A, B, C, D, ... --> A, func(B, A), func(C, B), func(D, C), ...
2856 For example, to do progressive division:
2858 >>> iterable = [1, 2, 6, 24, 120]
2859 >>> func = lambda x, y: x // y
2860 >>> list(difference(iterable, func))
2861 [1, 2, 3, 4, 5]
2863 If the *initial* keyword is set, the first element will be skipped when
2864 computing successive differences.
2866 >>> it = [10, 11, 13, 16] # from accumulate([1, 2, 3], initial=10)
2867 >>> list(difference(it, initial=10))
2868 [1, 2, 3]
2870 """
2871 a, b = tee(iterable)
2872 try:
2873 first = [next(b)]
2874 except StopIteration:
2875 return iter([])
2877 if initial is not None:
2878 return map(func, b, a)
2880 return chain(first, map(func, b, a))
2883class SequenceView(Sequence):
2884 """Return a read-only view of the sequence object *target*.
2886 :class:`SequenceView` objects are analogous to Python's built-in
2887 "dictionary view" types. They provide a dynamic view of a sequence's items,
2888 meaning that when the sequence updates, so does the view.
2890 >>> seq = ['0', '1', '2']
2891 >>> view = SequenceView(seq)
2892 >>> view
2893 SequenceView(['0', '1', '2'])
2894 >>> seq.append('3')
2895 >>> view
2896 SequenceView(['0', '1', '2', '3'])
2898 Sequence views support indexing, slicing, and length queries. They act
2899 like the underlying sequence, except they don't allow assignment:
2901 >>> view[1]
2902 '1'
2903 >>> view[1:-1]
2904 ['1', '2']
2905 >>> len(view)
2906 4
2908 Sequence views are useful as an alternative to copying, as they don't
2909 require (much) extra storage.
2911 """
2913 def __init__(self, target):
2914 if not isinstance(target, Sequence):
2915 raise TypeError
2916 self._target = target
2918 def __getitem__(self, index):
2919 return self._target[index]
2921 def __len__(self):
2922 return len(self._target)
2924 def __repr__(self):
2925 return f'{self.__class__.__name__}({self._target!r})'
2928class seekable:
2929 """Wrap an iterator to allow for seeking backward and forward. This
2930 progressively caches the items in the source iterable so they can be
2931 re-visited.
2933 Call :meth:`seek` with an index to seek to that position in the source
2934 iterable.
2936 To "reset" an iterator, seek to ``0``:
2938 >>> from itertools import count
2939 >>> it = seekable((str(n) for n in count()))
2940 >>> next(it), next(it), next(it)
2941 ('0', '1', '2')
2942 >>> it.seek(0)
2943 >>> next(it), next(it), next(it)
2944 ('0', '1', '2')
2946 You can also seek forward:
2948 >>> it = seekable((str(n) for n in range(20)))
2949 >>> it.seek(10)
2950 >>> next(it)
2951 '10'
2952 >>> it.seek(20) # Seeking past the end of the source isn't a problem
2953 >>> list(it)
2954 []
2955 >>> it.seek(0) # Resetting works even after hitting the end
2956 >>> next(it)
2957 '0'
2959 Call :meth:`relative_seek` to seek relative to the source iterator's
2960 current position.
2962 >>> it = seekable((str(n) for n in range(20)))
2963 >>> next(it), next(it), next(it)
2964 ('0', '1', '2')
2965 >>> it.relative_seek(2)
2966 >>> next(it)
2967 '5'
2968 >>> it.relative_seek(-3) # Source is at '6', we move back to '3'
2969 >>> next(it)
2970 '3'
2971 >>> it.relative_seek(-3) # Source is at '4', we move back to '1'
2972 >>> next(it)
2973 '1'
2976 Call :meth:`peek` to look ahead one item without advancing the iterator:
2978 >>> it = seekable('1234')
2979 >>> it.peek()
2980 '1'
2981 >>> list(it)
2982 ['1', '2', '3', '4']
2983 >>> it.peek(default='empty')
2984 'empty'
2986 Before the iterator is at its end, calling :func:`bool` on it will return
2987 ``True``. After it will return ``False``:
2989 >>> it = seekable('5678')
2990 >>> bool(it)
2991 True
2992 >>> list(it)
2993 ['5', '6', '7', '8']
2994 >>> bool(it)
2995 False
2997 You may view the contents of the cache with the :meth:`elements` method.
2998 That returns a :class:`SequenceView`, a view that updates automatically:
3000 >>> it = seekable((str(n) for n in range(10)))
3001 >>> next(it), next(it), next(it)
3002 ('0', '1', '2')
3003 >>> elements = it.elements()
3004 >>> elements
3005 SequenceView(['0', '1', '2'])
3006 >>> next(it)
3007 '3'
3008 >>> elements
3009 SequenceView(['0', '1', '2', '3'])
3011 By default, the cache grows as the source iterable progresses, so beware of
3012 wrapping very large or infinite iterables. Supply *maxlen* to limit the
3013 size of the cache (this of course limits how far back you can seek).
3015 >>> from itertools import count
3016 >>> it = seekable((str(n) for n in count()), maxlen=2)
3017 >>> next(it), next(it), next(it), next(it)
3018 ('0', '1', '2', '3')
3019 >>> list(it.elements())
3020 ['2', '3']
3021 >>> it.seek(0)
3022 >>> next(it), next(it), next(it), next(it)
3023 ('2', '3', '4', '5')
3024 >>> next(it)
3025 '6'
3027 """
3029 def __init__(self, iterable, maxlen=None):
3030 self._source = iter(iterable)
3031 if maxlen is None:
3032 self._cache = []
3033 else:
3034 self._cache = deque([], maxlen)
3035 self._index = None
3037 def __iter__(self):
3038 return self
3040 def __next__(self):
3041 if self._index is not None:
3042 try:
3043 item = self._cache[self._index]
3044 except IndexError:
3045 self._index = None
3046 else:
3047 self._index += 1
3048 return item
3050 item = next(self._source)
3051 self._cache.append(item)
3052 return item
3054 def __bool__(self):
3055 try:
3056 self.peek()
3057 except StopIteration:
3058 return False
3059 return True
3061 def peek(self, default=_marker):
3062 try:
3063 peeked = next(self)
3064 except StopIteration:
3065 if default is _marker:
3066 raise
3067 return default
3068 if self._index is None:
3069 self._index = len(self._cache)
3070 self._index -= 1
3071 return peeked
3073 def elements(self):
3074 return SequenceView(self._cache)
3076 def seek(self, index):
3077 self._index = index
3078 remainder = index - len(self._cache)
3079 if remainder > 0:
3080 consume(self, remainder)
3082 def relative_seek(self, count):
3083 if self._index is None:
3084 self._index = len(self._cache)
3086 self.seek(max(self._index + count, 0))
3089class run_length:
3090 """
3091 :func:`run_length.encode` compresses an iterable with run-length encoding.
3092 It yields groups of repeated items with the count of how many times they
3093 were repeated:
3095 >>> uncompressed = 'abbcccdddd'
3096 >>> list(run_length.encode(uncompressed))
3097 [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
3099 :func:`run_length.decode` decompresses an iterable that was previously
3100 compressed with run-length encoding. It yields the items of the
3101 decompressed iterable:
3103 >>> compressed = [('a', 1), ('b', 2), ('c', 3), ('d', 4)]
3104 >>> list(run_length.decode(compressed))
3105 ['a', 'b', 'b', 'c', 'c', 'c', 'd', 'd', 'd', 'd']
3107 """
3109 @staticmethod
3110 def encode(iterable):
3111 return ((k, ilen(g)) for k, g in groupby(iterable))
3113 @staticmethod
3114 def decode(iterable):
3115 return chain.from_iterable(starmap(repeat, iterable))
3118def exactly_n(iterable, n, predicate=bool):
3119 """Return ``True`` if exactly ``n`` items in the iterable are ``True``
3120 according to the *predicate* function.
3122 >>> exactly_n([True, True, False], 2)
3123 True
3124 >>> exactly_n([True, True, False], 1)
3125 False
3126 >>> exactly_n([0, 1, 2, 3, 4, 5], 3, lambda x: x < 3)
3127 True
3129 The iterable will be advanced until ``n + 1`` truthy items are encountered,
3130 so avoid calling it on infinite iterables.
3132 """
3133 iterator = filter(predicate, iterable)
3134 if n <= 0:
3135 if n < 0:
3136 return False
3137 for _ in iterator:
3138 return False
3139 return True
3141 iterator = islice(iterator, n - 1, None)
3142 for _ in iterator:
3143 for _ in iterator:
3144 return False
3145 return True
3146 return False
3149def circular_shifts(iterable, steps=1):
3150 """Yield the circular shifts of *iterable*.
3152 >>> list(circular_shifts(range(4)))
3153 [(0, 1, 2, 3), (1, 2, 3, 0), (2, 3, 0, 1), (3, 0, 1, 2)]
3155 Set *steps* to the number of places to rotate to the left
3156 (or to the right if negative). Defaults to 1.
3158 >>> list(circular_shifts(range(4), 2))
3159 [(0, 1, 2, 3), (2, 3, 0, 1)]
3161 >>> list(circular_shifts(range(4), -1))
3162 [(0, 1, 2, 3), (3, 0, 1, 2), (2, 3, 0, 1), (1, 2, 3, 0)]
3164 """
3165 buffer = deque(iterable)
3166 if steps == 0:
3167 raise ValueError('Steps should be a non-zero integer')
3169 buffer.rotate(steps)
3170 steps = -steps
3171 n = len(buffer)
3172 n //= math.gcd(n, steps)
3174 for _ in repeat(None, n):
3175 buffer.rotate(steps)
3176 yield tuple(buffer)
3179def make_decorator(wrapping_func, result_index=0):
3180 """Return a decorator version of *wrapping_func*, which is a function that
3181 modifies an iterable. *result_index* is the position in that function's
3182 signature where the iterable goes.
3184 This lets you use itertools on the "production end," i.e. at function
3185 definition. This can augment what the function returns without changing the
3186 function's code.
3188 For example, to produce a decorator version of :func:`chunked`:
3190 >>> from more_itertools import chunked
3191 >>> chunker = make_decorator(chunked, result_index=0)
3192 >>> @chunker(3)
3193 ... def iter_range(n):
3194 ... return iter(range(n))
3195 ...
3196 >>> list(iter_range(9))
3197 [[0, 1, 2], [3, 4, 5], [6, 7, 8]]
3199 To only allow truthy items to be returned:
3201 >>> truth_serum = make_decorator(filter, result_index=1)
3202 >>> @truth_serum(bool)
3203 ... def boolean_test():
3204 ... return [0, 1, '', ' ', False, True]
3205 ...
3206 >>> list(boolean_test())
3207 [1, ' ', True]
3209 The :func:`peekable` and :func:`seekable` wrappers make for practical
3210 decorators:
3212 >>> from more_itertools import peekable
3213 >>> peekable_function = make_decorator(peekable)
3214 >>> @peekable_function()
3215 ... def str_range(*args):
3216 ... return (str(x) for x in range(*args))
3217 ...
3218 >>> it = str_range(1, 20, 2)
3219 >>> next(it), next(it), next(it)
3220 ('1', '3', '5')
3221 >>> it.peek()
3222 '7'
3223 >>> next(it)
3224 '7'
3226 """
3228 # See https://sites.google.com/site/bbayles/index/decorator_factory for
3229 # notes on how this works.
3230 def decorator(*wrapping_args, **wrapping_kwargs):
3231 def outer_wrapper(f):
3232 def inner_wrapper(*args, **kwargs):
3233 result = f(*args, **kwargs)
3234 wrapping_args_ = list(wrapping_args)
3235 wrapping_args_.insert(result_index, result)
3236 return wrapping_func(*wrapping_args_, **wrapping_kwargs)
3238 return inner_wrapper
3240 return outer_wrapper
3242 return decorator
3245def map_reduce(iterable, keyfunc, valuefunc=None, reducefunc=None):
3246 """Return a dictionary that maps the items in *iterable* to categories
3247 defined by *keyfunc*, transforms them with *valuefunc*, and
3248 then summarizes them by category with *reducefunc*.
3250 *valuefunc* defaults to the identity function if it is unspecified.
3251 If *reducefunc* is unspecified, no summarization takes place:
3253 >>> keyfunc = lambda x: x.upper()
3254 >>> result = map_reduce('abbccc', keyfunc)
3255 >>> sorted(result.items())
3256 [('A', ['a']), ('B', ['b', 'b']), ('C', ['c', 'c', 'c'])]
3258 Specifying *valuefunc* transforms the categorized items:
3260 >>> keyfunc = lambda x: x.upper()
3261 >>> valuefunc = lambda x: 1
3262 >>> result = map_reduce('abbccc', keyfunc, valuefunc)
3263 >>> sorted(result.items())
3264 [('A', [1]), ('B', [1, 1]), ('C', [1, 1, 1])]
3266 Specifying *reducefunc* summarizes the categorized items:
3268 >>> keyfunc = lambda x: x.upper()
3269 >>> valuefunc = lambda x: 1
3270 >>> reducefunc = sum
3271 >>> result = map_reduce('abbccc', keyfunc, valuefunc, reducefunc)
3272 >>> sorted(result.items())
3273 [('A', 1), ('B', 2), ('C', 3)]
3275 You may want to filter the input iterable before applying the map/reduce
3276 procedure:
3278 >>> all_items = range(30)
3279 >>> items = [x for x in all_items if 10 <= x <= 20] # Filter
3280 >>> keyfunc = lambda x: x % 2 # Evens map to 0; odds to 1
3281 >>> categories = map_reduce(items, keyfunc=keyfunc)
3282 >>> sorted(categories.items())
3283 [(0, [10, 12, 14, 16, 18, 20]), (1, [11, 13, 15, 17, 19])]
3284 >>> summaries = map_reduce(items, keyfunc=keyfunc, reducefunc=sum)
3285 >>> sorted(summaries.items())
3286 [(0, 90), (1, 75)]
3288 Note that all items in the iterable are gathered into a list before the
3289 summarization step, which may require significant storage.
3291 The returned object is a :obj:`collections.defaultdict` with the
3292 ``default_factory`` set to ``None``, such that it behaves like a normal
3293 dictionary.
3295 .. seealso:: :func:`bucket`, :func:`groupby_transform`
3297 If storage is a concern, :func:`bucket` can be used without consuming the
3298 entire iterable right away. If the elements with the same key are already
3299 adjacent, :func:`groupby_transform` or :func:`itertools.groupby` can be
3300 used without any caching overhead.
3302 """
3304 ret = defaultdict(list)
3306 if valuefunc is None:
3307 for item in iterable:
3308 key = keyfunc(item)
3309 ret[key].append(item)
3311 else:
3312 for item in iterable:
3313 key = keyfunc(item)
3314 value = valuefunc(item)
3315 ret[key].append(value)
3317 if reducefunc is not None:
3318 for key, value_list in ret.items():
3319 ret[key] = reducefunc(value_list)
3321 ret.default_factory = None
3322 return ret
3325def rlocate(iterable, pred=bool, window_size=None):
3326 """Yield the index of each item in *iterable* for which *pred* returns
3327 ``True``, starting from the right and moving left.
3329 *pred* defaults to :func:`bool`, which will select truthy items:
3331 >>> list(rlocate([0, 1, 1, 0, 1, 0, 0])) # Truthy at 1, 2, and 4
3332 [4, 2, 1]
3334 Set *pred* to a custom function to, e.g., find the indexes for a particular
3335 item:
3337 >>> iterator = iter('abcb')
3338 >>> pred = lambda x: x == 'b'
3339 >>> list(rlocate(iterator, pred))
3340 [3, 1]
3342 If *window_size* is given, then the *pred* function will be called with
3343 that many items. This enables searching for sub-sequences:
3345 >>> iterable = [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]
3346 >>> pred = lambda *args: args == (1, 2, 3)
3347 >>> list(rlocate(iterable, pred=pred, window_size=3))
3348 [9, 5, 1]
3350 Beware, this function won't return anything for infinite iterables.
3351 If *iterable* is reversible, ``rlocate`` will reverse it and search from
3352 the right. Otherwise, it will search from the left and return the results
3353 in reverse order.
3355 See :func:`locate` to for other example applications.
3357 """
3358 if window_size is None:
3359 try:
3360 len_iter = len(iterable)
3361 return (len_iter - i - 1 for i in locate(reversed(iterable), pred))
3362 except TypeError:
3363 pass
3365 return reversed(list(locate(iterable, pred, window_size)))
3368def replace(iterable, pred, substitutes, count=None, window_size=1):
3369 """Yield the items from *iterable*, replacing the items for which *pred*
3370 returns ``True`` with the items from the iterable *substitutes*.
3372 >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1]
3373 >>> pred = lambda x: x == 0
3374 >>> substitutes = (2, 3)
3375 >>> list(replace(iterable, pred, substitutes))
3376 [1, 1, 2, 3, 1, 1, 2, 3, 1, 1]
3378 If *count* is given, the number of replacements will be limited:
3380 >>> iterable = [1, 1, 0, 1, 1, 0, 1, 1, 0]
3381 >>> pred = lambda x: x == 0
3382 >>> substitutes = [None]
3383 >>> list(replace(iterable, pred, substitutes, count=2))
3384 [1, 1, None, 1, 1, None, 1, 1, 0]
3386 Use *window_size* to control the number of items passed as arguments to
3387 *pred*. This allows for locating and replacing subsequences.
3389 >>> iterable = [0, 1, 2, 5, 0, 1, 2, 5]
3390 >>> window_size = 3
3391 >>> pred = lambda *args: args == (0, 1, 2) # 3 items passed to pred
3392 >>> substitutes = [3, 4] # Splice in these items
3393 >>> list(replace(iterable, pred, substitutes, window_size=window_size))
3394 [3, 4, 5, 3, 4, 5]
3396 *pred* may receive fewer than *window_size* arguments at the end of
3397 the iterable and should be able to handle this.
3399 """
3400 if window_size < 1:
3401 raise ValueError('window_size must be at least 1')
3403 # Save the substitutes iterable, since it's used more than once
3404 substitutes = tuple(substitutes)
3406 # Add padding such that the number of windows matches the length of the
3407 # iterable
3408 it = chain(iterable, repeat(_marker, window_size - 1))
3409 windows = windowed(it, window_size)
3411 n = 0
3412 for w in windows:
3413 # Strip any _marker padding so pred never sees internal sentinels.
3414 # Near the end of the iterable, pred will receive fewer arguments.
3415 args = tuple(x for x in w if x is not _marker)
3417 # If the current window matches our predicate (and we haven't hit
3418 # our maximum number of replacements), splice in the substitutes
3419 # and then consume the following windows that overlap with this one.
3420 # For example, if the iterable is (0, 1, 2, 3, 4...)
3421 # and the window size is 2, we have (0, 1), (1, 2), (2, 3)...
3422 # If the predicate matches on (0, 1), we need to zap (0, 1) and (1, 2)
3423 if args and pred(*args):
3424 if (count is None) or (n < count):
3425 n += 1
3426 yield from substitutes
3427 consume(windows, window_size - 1)
3428 continue
3430 # If there was no match (or we've reached the replacement limit),
3431 # yield the first item from the window.
3432 if args:
3433 yield args[0]
3436def partitions(iterable):
3437 """Yield all possible order-preserving partitions of *iterable*.
3439 >>> iterable = 'abc'
3440 >>> for part in partitions(iterable):
3441 ... print([''.join(p) for p in part])
3442 ['abc']
3443 ['a', 'bc']
3444 ['ab', 'c']
3445 ['a', 'b', 'c']
3447 This is unrelated to :func:`partition`.
3449 """
3450 sequence = list(iterable)
3451 n = len(sequence)
3452 for i in powerset(range(1, n)):
3453 yield [sequence[i:j] for i, j in zip((0,) + i, i + (n,))]
3456def set_partitions(iterable, k=None, min_size=None, max_size=None):
3457 """
3458 Yield the set partitions of *iterable* into *k* parts. Set partitions are
3459 not order-preserving.
3461 >>> iterable = 'abc'
3462 >>> for part in set_partitions(iterable, 2):
3463 ... print([''.join(p) for p in part])
3464 ['a', 'bc']
3465 ['ab', 'c']
3466 ['b', 'ac']
3469 If *k* is not given, every set partition is generated.
3471 >>> iterable = 'abc'
3472 >>> for part in set_partitions(iterable):
3473 ... print([''.join(p) for p in part])
3474 ['abc']
3475 ['a', 'bc']
3476 ['ab', 'c']
3477 ['b', 'ac']
3478 ['a', 'b', 'c']
3480 if *min_size* and/or *max_size* are given, the minimum and/or maximum size
3481 per block in partition is set.
3483 >>> iterable = 'abc'
3484 >>> for part in set_partitions(iterable, min_size=2):
3485 ... print([''.join(p) for p in part])
3486 ['abc']
3487 >>> for part in set_partitions(iterable, max_size=2):
3488 ... print([''.join(p) for p in part])
3489 ['a', 'bc']
3490 ['ab', 'c']
3491 ['b', 'ac']
3492 ['a', 'b', 'c']
3494 """
3495 L = list(iterable)
3496 n = len(L)
3497 if k is not None:
3498 if k < 1:
3499 raise ValueError(
3500 "Can't partition in a negative or zero number of groups"
3501 )
3502 elif k > n:
3503 return
3505 min_size = min_size if min_size is not None else 0
3506 max_size = max_size if max_size is not None else n
3507 if min_size > max_size:
3508 return
3510 def set_partitions_helper(L, k):
3511 n = len(L)
3512 if k == 1:
3513 yield [L]
3514 elif n == k:
3515 yield [[s] for s in L]
3516 else:
3517 e, *M = L
3518 for p in set_partitions_helper(M, k - 1):
3519 yield [[e], *p]
3520 for p in set_partitions_helper(M, k):
3521 for i in range(len(p)):
3522 yield p[:i] + [[e] + p[i]] + p[i + 1 :]
3524 if k is None:
3525 for k in range(1, n + 1):
3526 yield from filter(
3527 lambda z: all(min_size <= len(bk) <= max_size for bk in z),
3528 set_partitions_helper(L, k),
3529 )
3530 else:
3531 yield from filter(
3532 lambda z: all(min_size <= len(bk) <= max_size for bk in z),
3533 set_partitions_helper(L, k),
3534 )
3537class time_limited:
3538 """
3539 Yield items from *iterable* until *limit_seconds* have passed.
3540 If the time limit expires before all items have been yielded, the
3541 ``timed_out`` parameter will be set to ``True``.
3543 >>> from time import sleep
3544 >>> def generator():
3545 ... yield 1
3546 ... yield 2
3547 ... sleep(0.2)
3548 ... yield 3
3549 >>> iterable = time_limited(0.1, generator())
3550 >>> list(iterable)
3551 [1, 2]
3552 >>> iterable.timed_out
3553 True
3555 Note that the time is checked before each item is yielded, and iteration
3556 stops if the time elapsed is greater than *limit_seconds*. If your time
3557 limit is 1 second, but it takes 2 seconds to generate the first item from
3558 the iterable, the function will run for 2 seconds and not yield anything.
3559 As a special case, when *limit_seconds* is zero, the iterator never
3560 returns anything.
3562 """
3564 def __init__(self, limit_seconds, iterable):
3565 if limit_seconds < 0:
3566 raise ValueError('limit_seconds must be positive')
3567 self.limit_seconds = limit_seconds
3568 self._iterator = iter(iterable)
3569 self._start_time = monotonic()
3570 self.timed_out = False
3572 def __iter__(self):
3573 return self
3575 def __next__(self):
3576 if self.limit_seconds == 0:
3577 self.timed_out = True
3578 raise StopIteration
3579 item = next(self._iterator)
3580 if monotonic() - self._start_time > self.limit_seconds:
3581 self.timed_out = True
3582 raise StopIteration
3584 return item
3587def only(iterable, default=None, too_long=None):
3588 """If *iterable* has only one item, return it.
3589 If it has zero items, return *default*.
3590 If it has more than one item, raise the exception given by *too_long*,
3591 which is ``ValueError`` by default.
3593 >>> only([], default='missing')
3594 'missing'
3595 >>> only([1])
3596 1
3597 >>> only([1, 2]) # doctest: +IGNORE_EXCEPTION_DETAIL
3598 Traceback (most recent call last):
3599 ...
3600 ValueError: Expected exactly one item in iterable, but got 1, 2,
3601 and perhaps more.'
3602 >>> only([1, 2], too_long=TypeError) # doctest: +IGNORE_EXCEPTION_DETAIL
3603 Traceback (most recent call last):
3604 ...
3605 TypeError
3607 Note that :func:`only` attempts to advance *iterable* twice to ensure there
3608 is only one item. See :func:`spy` or :func:`peekable` to check
3609 iterable contents less destructively.
3611 """
3612 iterator = iter(iterable)
3613 for first in iterator:
3614 for second in iterator:
3615 msg = (
3616 f'Expected exactly one item in iterable, but got {first!r}, '
3617 f'{second!r}, and perhaps more.'
3618 )
3619 raise too_long or ValueError(msg)
3620 return first
3621 return default
3624def ichunked(iterable, n):
3625 """Break *iterable* into sub-iterables with *n* elements each.
3626 :func:`ichunked` is like :func:`chunked`, but it yields iterables
3627 instead of lists.
3629 If the sub-iterables are read in order, the elements of *iterable*
3630 won't be stored in memory.
3631 If they are read out of order, :func:`itertools.tee` is used to cache
3632 elements as necessary.
3634 >>> from itertools import count
3635 >>> all_chunks = ichunked(count(), 4)
3636 >>> c_1, c_2, c_3 = next(all_chunks), next(all_chunks), next(all_chunks)
3637 >>> list(c_2) # c_1's elements have been cached; c_3's haven't been
3638 [4, 5, 6, 7]
3639 >>> list(c_1)
3640 [0, 1, 2, 3]
3641 >>> list(c_3)
3642 [8, 9, 10, 11]
3644 """
3645 iterator = iter(iterable)
3646 for first in iterator:
3647 rest = islice(iterator, n - 1)
3648 cache, cacher = tee(rest)
3649 yield chain([first], rest, cache)
3650 consume(cacher)
3653def iequals(*iterables):
3654 """Return ``True`` if all given *iterables* are equal to each other,
3655 which means that they contain the same elements in the same order.
3657 The function is useful for comparing iterables of different data types
3658 or iterables that do not support equality checks.
3660 >>> iequals("abc", ['a', 'b', 'c'], ('a', 'b', 'c'), iter("abc"))
3661 True
3663 >>> iequals("abc", "acb")
3664 False
3666 Not to be confused with :func:`all_equal`, which checks whether all
3667 elements of iterable are equal to each other.
3669 """
3670 try:
3671 return all(map(all_equal, zip(*iterables, strict=True)))
3672 except ValueError:
3673 return False
3676def distinct_combinations(iterable, r):
3677 """Yield the distinct combinations of *r* items taken from *iterable*.
3679 >>> list(distinct_combinations([0, 0, 1], 2))
3680 [(0, 0), (0, 1)]
3682 Equivalent to ``set(combinations(iterable))``, except duplicates are not
3683 generated and thrown away. For larger input sequences this is much more
3684 efficient.
3686 """
3687 if r < 0:
3688 raise ValueError('r must be non-negative')
3689 elif r == 0:
3690 yield ()
3691 return
3692 pool = tuple(iterable)
3693 generators = [unique_everseen(enumerate(pool), key=itemgetter(1))]
3694 current_combo = [None] * r
3695 level = 0
3696 while generators:
3697 try:
3698 cur_idx, p = next(generators[-1])
3699 except StopIteration:
3700 generators.pop()
3701 level -= 1
3702 continue
3703 current_combo[level] = p
3704 if level + 1 == r:
3705 yield tuple(current_combo)
3706 else:
3707 generators.append(
3708 unique_everseen(
3709 enumerate(pool[cur_idx + 1 :], cur_idx + 1),
3710 key=itemgetter(1),
3711 )
3712 )
3713 level += 1
3716def filter_except(validator, iterable, *exceptions):
3717 """Yield the items from *iterable* for which the *validator* function does
3718 not raise one of the specified *exceptions*.
3720 *validator* is called for each item in *iterable*.
3721 It should be a function that accepts one argument and raises an exception
3722 if that item is not valid.
3724 >>> iterable = ['1', '2', 'three', '4', None]
3725 >>> list(filter_except(int, iterable, ValueError, TypeError))
3726 ['1', '2', '4']
3728 If an exception other than one given by *exceptions* is raised by
3729 *validator*, it is raised like normal.
3730 """
3731 for item in iterable:
3732 try:
3733 validator(item)
3734 except exceptions:
3735 pass
3736 else:
3737 yield item
3740def map_except(function, iterable, *exceptions):
3741 """Transform each item from *iterable* with *function* and yield the
3742 result, unless *function* raises one of the specified *exceptions*.
3744 *function* is called to transform each item in *iterable*.
3745 It should accept one argument.
3747 >>> iterable = ['1', '2', 'three', '4', None]
3748 >>> list(map_except(int, iterable, ValueError, TypeError))
3749 [1, 2, 4]
3751 If an exception other than one given by *exceptions* is raised by
3752 *function*, it is raised like normal.
3753 """
3754 for item in iterable:
3755 try:
3756 yield function(item)
3757 except exceptions:
3758 pass
3761def map_if(iterable, pred, func, func_else=None):
3762 """Evaluate each item from *iterable* using *pred*. If the result is
3763 equivalent to ``True``, transform the item with *func* and yield it.
3764 Otherwise, transform the item with *func_else* and yield it.
3766 *pred*, *func*, and *func_else* should each be functions that accept
3767 one argument. By default, *func_else* is the identity function.
3769 >>> from math import sqrt
3770 >>> iterable = list(range(-5, 5))
3771 >>> iterable
3772 [-5, -4, -3, -2, -1, 0, 1, 2, 3, 4]
3773 >>> list(map_if(iterable, lambda x: x > 3, lambda x: 'toobig'))
3774 [-5, -4, -3, -2, -1, 0, 1, 2, 3, 'toobig']
3775 >>> list(map_if(iterable, lambda x: x >= 0,
3776 ... lambda x: f'{sqrt(x):.2f}', lambda x: None))
3777 [None, None, None, None, None, '0.00', '1.00', '1.41', '1.73', '2.00']
3778 """
3780 if func_else is None:
3781 for item in iterable:
3782 yield func(item) if pred(item) else item
3784 else:
3785 for item in iterable:
3786 yield func(item) if pred(item) else func_else(item)
3789def _sample_unweighted(iterator, k, strict):
3790 # Algorithm L in the 1994 paper by Kim-Hung Li:
3791 # "Reservoir-Sampling Algorithms of Time Complexity O(n(1+log(N/n)))".
3793 reservoir = list(islice(iterator, k))
3794 if strict and len(reservoir) < k:
3795 raise ValueError('Sample larger than population')
3796 W = 1.0
3798 with suppress(StopIteration):
3799 while True:
3800 W *= random() ** (1 / k)
3801 skip = floor(log(random()) / log1p(-W))
3802 element = next(islice(iterator, skip, None))
3803 reservoir[randrange(k)] = element
3805 shuffle(reservoir)
3806 return reservoir
3809def _sample_weighted(iterator, k, weights, strict):
3810 # Implementation of "A-ExpJ" from the 2006 paper by Efraimidis et al. :
3811 # "Weighted random sampling with a reservoir".
3813 # Log-transform for numerical stability for weights that are small/large
3814 weight_keys = (log(random()) / weight for weight in weights)
3816 # Fill up the reservoir (collection of samples) with the first `k`
3817 # weight-keys and elements, then heapify the list.
3818 reservoir = take(k, zip(weight_keys, iterator))
3819 if strict and len(reservoir) < k:
3820 raise ValueError('Sample larger than population')
3822 heapify(reservoir)
3824 # The number of jumps before changing the reservoir is a random variable
3825 # with an exponential distribution. Sample it using random() and logs.
3826 smallest_weight_key, _ = reservoir[0]
3827 weights_to_skip = log(random()) / smallest_weight_key
3829 for weight, element in zip(weights, iterator):
3830 if weight >= weights_to_skip:
3831 # The notation here is consistent with the paper, but we store
3832 # the weight-keys in log-space for better numerical stability.
3833 smallest_weight_key, _ = reservoir[0]
3834 t_w = exp(weight * smallest_weight_key)
3835 r_2 = uniform(t_w, 1) # generate U(t_w, 1)
3836 weight_key = log(r_2) / weight
3837 heapreplace(reservoir, (weight_key, element))
3838 smallest_weight_key, _ = reservoir[0]
3839 weights_to_skip = log(random()) / smallest_weight_key
3840 else:
3841 weights_to_skip -= weight
3843 ret = [element for weight_key, element in reservoir]
3844 shuffle(ret)
3845 return ret
3848def _sample_counted(population, k, counts, strict):
3849 element = None
3850 remaining = 0
3852 def feed(i):
3853 # Advance *i* steps ahead and consume an element
3854 nonlocal element, remaining
3856 while i + 1 > remaining:
3857 i = i - remaining
3858 element = next(population)
3859 remaining = next(counts)
3860 remaining -= i + 1
3861 return element
3863 with suppress(StopIteration):
3864 reservoir = []
3865 for _ in range(k):
3866 reservoir.append(feed(0))
3868 if strict and len(reservoir) < k:
3869 raise ValueError('Sample larger than population')
3871 with suppress(StopIteration):
3872 W = 1.0
3873 while True:
3874 W *= random() ** (1 / k)
3875 skip = floor(log(random()) / log1p(-W))
3876 element = feed(skip)
3877 reservoir[randrange(k)] = element
3879 shuffle(reservoir)
3880 return reservoir
3883def sample(iterable, k, weights=None, *, counts=None, strict=False):
3884 """Return a *k*-length list of elements chosen (without replacement)
3885 from the *iterable*.
3887 Similar to :func:`random.sample`, but works on inputs that aren't
3888 indexable (such as sets and dictionaries) and on inputs where the
3889 size isn't known in advance (such as generators).
3891 >>> iterable = range(100)
3892 >>> sample(iterable, 5) # doctest: +SKIP
3893 [81, 60, 96, 16, 4]
3895 For iterables with repeated elements, you may supply *counts* to
3896 indicate the repeats.
3898 >>> iterable = ['a', 'b']
3899 >>> counts = [3, 4] # Equivalent to 'a', 'a', 'a', 'b', 'b', 'b', 'b'
3900 >>> sample(iterable, k=3, counts=counts) # doctest: +SKIP
3901 ['a', 'a', 'b']
3903 An iterable with *weights* may be given:
3905 >>> iterable = range(100)
3906 >>> weights = (i * i + 1 for i in range(100))
3907 >>> sampled = sample(iterable, 5, weights=weights) # doctest: +SKIP
3908 [79, 67, 74, 66, 78]
3910 Weighted selections are made without replacement.
3911 After an element is selected, it is removed from the pool and the
3912 relative weights of the other elements increase (this
3913 does not match the behavior of :func:`random.sample`'s *counts*
3914 parameter). Note that *weights* may not be used with *counts*.
3916 If the length of *iterable* is less than *k*,
3917 ``ValueError`` is raised if *strict* is ``True`` and
3918 all elements are returned (in shuffled order) if *strict* is ``False``.
3920 By default, the `Algorithm L <https://w.wiki/ANrM>`__ reservoir sampling
3921 technique is used. When *weights* are provided,
3922 `Algorithm A-ExpJ <https://w.wiki/ANrS>`__ is used instead.
3924 Notes on reproducibility:
3926 * The algorithms rely on inexact floating-point functions provided
3927 by the underlying math library (e.g. ``log``, ``log1p``, and ``pow``).
3928 Those functions can `produce slightly different results
3929 <https://members.loria.fr/PZimmermann/papers/accuracy.pdf>`_ on
3930 different builds. Accordingly, selections can vary across builds
3931 even for the same seed.
3933 * The algorithms loop over the input and make selections based on
3934 ordinal position, so selections from unordered collections (such as
3935 sets) won't reproduce across sessions on the same platform using the
3936 same seed. For example, this won't reproduce::
3938 >> seed(8675309)
3939 >> sample(set('abcdefghijklmnopqrstuvwxyz'), 10)
3940 ['c', 'p', 'e', 'w', 's', 'a', 'j', 'd', 'n', 't']
3942 """
3943 iterator = iter(iterable)
3945 if k < 0:
3946 raise ValueError('k must be non-negative')
3948 if k == 0:
3949 return []
3951 if weights is not None and counts is not None:
3952 raise TypeError('weights and counts are mutually exclusive')
3954 elif weights is not None:
3955 weights = iter(weights)
3956 return _sample_weighted(iterator, k, weights, strict)
3958 elif counts is not None:
3959 counts = iter(counts)
3960 return _sample_counted(iterator, k, counts, strict)
3962 else:
3963 return _sample_unweighted(iterator, k, strict)
3966def is_sorted(iterable, key=None, reverse=False, strict=False):
3967 """Returns ``True`` if the items of iterable are in sorted order, and
3968 ``False`` otherwise. *key* and *reverse* have the same meaning that they do
3969 in the built-in :func:`sorted` function.
3971 >>> is_sorted(['1', '2', '3', '4', '5'], key=int)
3972 True
3973 >>> is_sorted([5, 4, 3, 1, 2], reverse=True)
3974 False
3976 If *strict*, tests for strict sorting, that is, returns ``False`` if equal
3977 elements are found:
3979 >>> is_sorted([1, 2, 2])
3980 True
3981 >>> is_sorted([1, 2, 2], strict=True)
3982 False
3984 The function returns ``False`` after encountering the first out-of-order
3985 item, which means it may produce results that differ from the built-in
3986 :func:`sorted` function for objects with unusual comparison dynamics
3987 (like ``math.nan``). If there are no out-of-order items, the iterable is
3988 exhausted.
3989 """
3990 it = iterable if (key is None) else map(key, iterable)
3991 a, b = tee(it)
3992 next(b, None)
3993 if reverse:
3994 b, a = a, b
3995 return all(map(lt, a, b)) if strict else not any(map(lt, b, a))
3998class AbortThread(BaseException):
3999 pass
4002class callback_iter:
4003 """Convert a function that uses callbacks to an iterator.
4005 Let *func* be a function that takes a `callback` keyword argument.
4006 For example:
4008 >>> def func(callback=None):
4009 ... for i, c in [(1, 'a'), (2, 'b'), (3, 'c')]:
4010 ... if callback:
4011 ... callback(i, c)
4012 ... return 4
4015 Use ``with callback_iter(func)`` to get an iterator over the parameters
4016 that are delivered to the callback.
4018 >>> with callback_iter(func) as it:
4019 ... for args, kwargs in it:
4020 ... print(args)
4021 (1, 'a')
4022 (2, 'b')
4023 (3, 'c')
4025 The function will be called in a background thread. The ``done`` property
4026 indicates whether it has completed execution.
4028 >>> it.done
4029 True
4031 If it completes successfully, its return value will be available
4032 in the ``result`` property.
4034 >>> it.result
4035 4
4037 Notes:
4039 * If the function uses some keyword argument besides ``callback``, supply
4040 *callback_kwd*.
4041 * If it finished executing, but raised an exception, accessing the
4042 ``result`` property will raise the same exception.
4043 * If it hasn't finished executing, accessing the ``result``
4044 property from within the ``with`` block will raise ``RuntimeError``.
4045 * If it hasn't finished executing, accessing the ``result`` property from
4046 outside the ``with`` block will raise a
4047 ``more_itertools.AbortThread`` exception.
4048 * Provide *wait_seconds* to adjust how frequently the it is polled for
4049 output.
4051 """
4053 def __init__(self, func, callback_kwd='callback', wait_seconds=0.1):
4054 self._func = func
4055 self._callback_kwd = callback_kwd
4056 self._aborted = False
4057 self._future = None
4058 self._wait_seconds = wait_seconds
4060 # Lazily import concurrent.future
4061 self._module = __import__('concurrent.futures').futures
4062 self._executor = self._module.ThreadPoolExecutor(max_workers=1)
4063 self._iterator = self._reader()
4065 def __enter__(self):
4066 return self
4068 def __exit__(self, exc_type, exc_value, traceback):
4069 self._aborted = True
4070 self._executor.shutdown()
4072 def __iter__(self):
4073 return self
4075 def __next__(self):
4076 return next(self._iterator)
4078 @property
4079 def done(self):
4080 if self._future is None:
4081 return False
4082 return self._future.done()
4084 @property
4085 def result(self):
4086 if self._future:
4087 try:
4088 return self._future.result(timeout=0)
4089 except self._module.TimeoutError:
4090 pass
4092 raise RuntimeError('Function has not yet completed')
4094 def _reader(self):
4095 q = Queue()
4097 def callback(*args, **kwargs):
4098 if self._aborted:
4099 raise AbortThread('canceled by user')
4101 q.put((args, kwargs))
4103 self._future = self._executor.submit(
4104 self._func, **{self._callback_kwd: callback}
4105 )
4107 while True:
4108 try:
4109 item = q.get(timeout=self._wait_seconds)
4110 except Empty:
4111 pass
4112 else:
4113 q.task_done()
4114 yield item
4116 if self._future.done():
4117 break
4119 remaining = []
4120 while True:
4121 try:
4122 item = q.get_nowait()
4123 except Empty:
4124 break
4125 else:
4126 q.task_done()
4127 remaining.append(item)
4128 q.join()
4129 yield from remaining
4132def windowed_complete(iterable, n):
4133 """
4134 Yield ``(beginning, middle, end)`` tuples, where:
4136 * Each ``middle`` has *n* items from *iterable*
4137 * Each ``beginning`` has the items before the ones in ``middle``
4138 * Each ``end`` has the items after the ones in ``middle``
4140 >>> iterable = range(7)
4141 >>> n = 3
4142 >>> for beginning, middle, end in windowed_complete(iterable, n):
4143 ... print(beginning, middle, end)
4144 () (0, 1, 2) (3, 4, 5, 6)
4145 (0,) (1, 2, 3) (4, 5, 6)
4146 (0, 1) (2, 3, 4) (5, 6)
4147 (0, 1, 2) (3, 4, 5) (6,)
4148 (0, 1, 2, 3) (4, 5, 6) ()
4150 Note that *n* must be at least 0 and most equal to the length of
4151 *iterable*.
4153 This function will exhaust the iterable and may require significant
4154 storage.
4155 """
4156 if n < 0:
4157 raise ValueError('n must be >= 0')
4159 seq = tuple(iterable)
4160 size = len(seq)
4162 if n > size:
4163 raise ValueError('n must be <= len(seq)')
4165 for i in range(size - n + 1):
4166 beginning = seq[:i]
4167 middle = seq[i : i + n]
4168 end = seq[i + n :]
4169 yield beginning, middle, end
4172def all_unique(iterable, key=None):
4173 """
4174 Returns ``True`` if all the elements of *iterable* are unique (no two
4175 elements are equal).
4177 >>> all_unique('ABCB')
4178 False
4180 If a *key* function is specified, it will be used to make comparisons.
4182 >>> all_unique('ABCb')
4183 True
4184 >>> all_unique('ABCb', str.lower)
4185 False
4187 The function returns as soon as the first non-unique element is
4188 encountered. Iterables with a mix of hashable and unhashable items can
4189 be used, but the function will be slower for unhashable items.
4190 """
4191 seenset = set()
4192 seenset_add = seenset.add
4193 seenlist = []
4194 seenlist_add = seenlist.append
4195 for element in map(key, iterable) if key else iterable:
4196 try:
4197 if element in seenset:
4198 return False
4199 seenset_add(element)
4200 except TypeError:
4201 if element in seenlist:
4202 return False
4203 seenlist_add(element)
4204 return True
4207def nth_product(index, *iterables, repeat=1):
4208 """Equivalent to ``list(product(*iterables, repeat=repeat))[index]``.
4210 The products of *iterables* can be ordered lexicographically.
4211 :func:`nth_product` computes the product at sort position *index* without
4212 computing the previous products.
4214 >>> nth_product(8, range(2), range(2), range(2), range(2))
4215 (1, 0, 0, 0)
4217 The *repeat* keyword argument specifies the number of repetitions
4218 of the iterables. The above example is equivalent to::
4220 >>> nth_product(8, range(2), repeat=4)
4221 (1, 0, 0, 0)
4223 ``IndexError`` will be raised if the given *index* is invalid.
4224 """
4225 pools = tuple(map(tuple, reversed(iterables))) * repeat
4226 ns = tuple(map(len, pools))
4228 c = prod(ns)
4230 if index < 0:
4231 index += c
4232 if not 0 <= index < c:
4233 raise IndexError
4235 result = []
4236 for pool, n in zip(pools, ns):
4237 result.append(pool[index % n])
4238 index //= n
4240 return tuple(reversed(result))
4243def nth_permutation(iterable, r, index):
4244 """Equivalent to ``list(permutations(iterable, r))[index]```
4246 The subsequences of *iterable* that are of length *r* where order is
4247 important can be ordered lexicographically. :func:`nth_permutation`
4248 computes the subsequence at sort position *index* directly, without
4249 computing the previous subsequences.
4251 >>> nth_permutation('ghijk', 2, 5)
4252 ('h', 'i')
4254 ``ValueError`` will be raised If *r* is negative.
4255 ``IndexError`` will be raised if the given *index* is invalid.
4256 """
4257 pool = list(iterable)
4258 n = len(pool)
4259 if r is None:
4260 r = n
4261 c = perm(n, r)
4263 if index < 0:
4264 index += c
4265 if not 0 <= index < c:
4266 raise IndexError
4268 result = [0] * r
4269 q = index * factorial(n) // c if r < n else index
4270 for d in range(1, n + 1):
4271 q, i = divmod(q, d)
4272 if 0 <= n - d < r:
4273 result[n - d] = i
4274 if q == 0:
4275 break
4277 return tuple(map(pool.pop, result))
4280def nth_combination_with_replacement(iterable, r, index):
4281 """Equivalent to
4282 ``list(combinations_with_replacement(iterable, r))[index]``.
4285 The subsequences with repetition of *iterable* that are of length *r* can
4286 be ordered lexicographically. :func:`nth_combination_with_replacement`
4287 computes the subsequence at sort position *index* directly, without
4288 computing the previous subsequences with replacement.
4290 >>> nth_combination_with_replacement(range(5), 3, 5)
4291 (0, 1, 1)
4293 ``ValueError`` will be raised If *r* is negative.
4294 ``IndexError`` will be raised if the given *index* is invalid.
4295 """
4296 pool = tuple(iterable)
4297 n = len(pool)
4298 if r < 0:
4299 raise ValueError
4300 c = comb(n + r - 1, r) if n else 0 if r else 1
4302 if index < 0:
4303 index += c
4304 if not 0 <= index < c:
4305 raise IndexError
4307 result = []
4308 i = 0
4309 while r:
4310 r -= 1
4311 while n >= 0:
4312 num_combs = comb(n + r - 1, r)
4313 if index < num_combs:
4314 break
4315 n -= 1
4316 i += 1
4317 index -= num_combs
4318 result.append(pool[i])
4320 return tuple(result)
4323def value_chain(*args):
4324 """Yield all arguments passed to the function in the same order in which
4325 they were passed. If an argument itself is iterable then iterate over its
4326 values.
4328 >>> list(value_chain(1, 2, 3, [4, 5, 6]))
4329 [1, 2, 3, 4, 5, 6]
4331 Binary and text strings are not considered iterable and are emitted
4332 as-is:
4334 >>> list(value_chain('12', '34', ['56', '78']))
4335 ['12', '34', '56', '78']
4337 Pre- or postpend a single element to an iterable:
4339 >>> list(value_chain(1, [2, 3, 4, 5, 6]))
4340 [1, 2, 3, 4, 5, 6]
4341 >>> list(value_chain([1, 2, 3, 4, 5], 6))
4342 [1, 2, 3, 4, 5, 6]
4344 Multiple levels of nesting are not flattened.
4346 """
4347 scalar_types = (str, bytes)
4348 for value in args:
4349 if isinstance(value, scalar_types):
4350 yield value
4351 continue
4352 try:
4353 yield from value
4354 except TypeError:
4355 yield value
4358def product_index(element, *iterables, repeat=1):
4359 """Equivalent to ``list(product(*iterables, repeat=repeat)).index(tuple(element))``
4361 The products of *iterables* can be ordered lexicographically.
4362 :func:`product_index` computes the first index of *element* without
4363 computing the previous products.
4365 >>> product_index([8, 2], range(10), range(5))
4366 42
4368 The *repeat* keyword argument specifies the number of repetitions
4369 of the iterables::
4371 >>> product_index([8, 0, 7], range(10), repeat=3)
4372 807
4374 ``ValueError`` will be raised if the given *element* isn't in the product
4375 of *args*.
4376 """
4377 elements = tuple(element)
4378 pools = tuple(map(tuple, iterables)) * repeat
4379 if len(elements) != len(pools):
4380 raise ValueError('element is not a product of args')
4382 index = 0
4383 for elem, pool in zip(elements, pools):
4384 index = index * len(pool) + pool.index(elem)
4385 return index
4388def combination_index(element, iterable):
4389 """Equivalent to ``list(combinations(iterable, r)).index(element)``
4391 The subsequences of *iterable* that are of length *r* can be ordered
4392 lexicographically. :func:`combination_index` computes the index of the
4393 first *element*, without computing the previous combinations.
4395 >>> combination_index('adf', 'abcdefg')
4396 10
4398 ``ValueError`` will be raised if the given *element* isn't one of the
4399 combinations of *iterable*.
4400 """
4401 element = enumerate(element)
4402 k, y = next(element, (None, None))
4403 if k is None:
4404 return 0
4406 indexes = []
4407 pool = enumerate(iterable)
4408 for n, x in pool:
4409 if x == y:
4410 indexes.append(n)
4411 tmp, y = next(element, (None, None))
4412 if tmp is None:
4413 break
4414 else:
4415 k = tmp
4416 else:
4417 raise ValueError('element is not a combination of iterable')
4419 n, _ = last(pool, default=(n, None))
4421 index = 1
4422 for i, j in enumerate(reversed(indexes), start=1):
4423 j = n - j
4424 if i <= j:
4425 index += comb(j, i)
4427 return comb(n + 1, k + 1) - index
4430def combination_with_replacement_index(element, iterable):
4431 """Equivalent to
4432 ``list(combinations_with_replacement(iterable, r)).index(element)``
4434 The subsequences with repetition of *iterable* that are of length *r* can
4435 be ordered lexicographically. :func:`combination_with_replacement_index`
4436 computes the index of the first *element*, without computing the previous
4437 combinations with replacement.
4439 >>> combination_with_replacement_index('adf', 'abcdefg')
4440 20
4442 ``ValueError`` will be raised if the given *element* isn't one of the
4443 combinations with replacement of *iterable*.
4444 """
4445 element = tuple(element)
4446 l = len(element)
4447 element = enumerate(element)
4449 k, y = next(element, (None, None))
4450 if k is None:
4451 return 0
4453 indexes = []
4454 pool = tuple(iterable)
4455 for n, x in enumerate(pool):
4456 while x == y:
4457 indexes.append(n)
4458 tmp, y = next(element, (None, None))
4459 if tmp is None:
4460 break
4461 else:
4462 k = tmp
4463 if y is None:
4464 break
4465 else:
4466 raise ValueError(
4467 'element is not a combination with replacement of iterable'
4468 )
4470 n = len(pool)
4471 occupations = [0] * n
4472 for p in indexes:
4473 occupations[p] += 1
4475 index = 0
4476 cumulative_sum = 0
4477 for k in range(1, n):
4478 cumulative_sum += occupations[k - 1]
4479 j = l + n - 1 - k - cumulative_sum
4480 i = n - k
4481 if i <= j:
4482 index += comb(j, i)
4484 return index
4487def permutation_index(element, iterable):
4488 """Equivalent to ``list(permutations(iterable, r)).index(element)```
4490 The subsequences of *iterable* that are of length *r* where order is
4491 important can be ordered lexicographically. :func:`permutation_index`
4492 computes the index of the first *element* directly, without computing
4493 the previous permutations.
4495 >>> permutation_index([1, 3, 2], range(5))
4496 19
4498 ``ValueError`` will be raised if the given *element* isn't one of the
4499 permutations of *iterable*.
4500 """
4501 index = 0
4502 pool = list(iterable)
4503 for i, x in zip(range(len(pool), -1, -1), element):
4504 r = pool.index(x)
4505 index = index * i + r
4506 del pool[r]
4508 return index
4511class countable:
4512 """Wrap *iterable* and keep a count of how many items have been consumed.
4514 The ``items_seen`` attribute starts at ``0`` and increments as the iterable
4515 is consumed:
4517 >>> iterable = map(str, range(10))
4518 >>> it = countable(iterable)
4519 >>> it.items_seen
4520 0
4521 >>> next(it), next(it)
4522 ('0', '1')
4523 >>> list(it)
4524 ['2', '3', '4', '5', '6', '7', '8', '9']
4525 >>> it.items_seen
4526 10
4527 """
4529 def __init__(self, iterable):
4530 self._iterator = iter(iterable)
4531 self.items_seen = 0
4533 def __iter__(self):
4534 return self
4536 def __next__(self):
4537 item = next(self._iterator)
4538 self.items_seen += 1
4540 return item
4543def chunked_even(iterable, n):
4544 """Break *iterable* into lists of approximately length *n*.
4545 Items are distributed such the lengths of the lists differ by at most
4546 1 item.
4548 >>> iterable = [1, 2, 3, 4, 5, 6, 7]
4549 >>> n = 3
4550 >>> list(chunked_even(iterable, n)) # List lengths: 3, 2, 2
4551 [[1, 2, 3], [4, 5], [6, 7]]
4552 >>> list(chunked(iterable, n)) # List lengths: 3, 3, 1
4553 [[1, 2, 3], [4, 5, 6], [7]]
4555 """
4556 iterator = iter(iterable)
4558 # Initialize a buffer to process the chunks while keeping
4559 # some back to fill any underfilled chunks
4560 min_buffer = (n - 1) * (n - 2)
4561 buffer = list(islice(iterator, min_buffer))
4563 # Append items until we have a completed chunk
4564 for _ in islice(map(buffer.append, iterator), n, None, n):
4565 yield buffer[:n]
4566 del buffer[:n]
4568 # Check if any chunks need addition processing
4569 if not buffer:
4570 return
4571 length = len(buffer)
4573 # Chunks are either size `full_size <= n` or `partial_size = full_size - 1`
4574 q, r = divmod(length, n)
4575 num_lists = q + (1 if r > 0 else 0)
4576 q, r = divmod(length, num_lists)
4577 full_size = q + (1 if r > 0 else 0)
4578 partial_size = full_size - 1
4579 num_full = length - partial_size * num_lists
4581 # Yield chunks of full size
4582 partial_start_idx = num_full * full_size
4583 if full_size > 0:
4584 for i in range(0, partial_start_idx, full_size):
4585 yield buffer[i : i + full_size]
4587 # Yield chunks of partial size
4588 if partial_size > 0:
4589 for i in range(partial_start_idx, length, partial_size):
4590 yield buffer[i : i + partial_size]
4593def zip_broadcast(*objects, scalar_types=(str, bytes), strict=False):
4594 """A version of :func:`zip` that "broadcasts" any scalar
4595 (i.e., non-iterable) items into output tuples.
4597 >>> iterable_1 = [1, 2, 3]
4598 >>> iterable_2 = ['a', 'b', 'c']
4599 >>> scalar = '_'
4600 >>> list(zip_broadcast(iterable_1, iterable_2, scalar))
4601 [(1, 'a', '_'), (2, 'b', '_'), (3, 'c', '_')]
4603 The *scalar_types* keyword argument determines what types are considered
4604 scalar. It is set to ``(str, bytes)`` by default. Set it to ``None`` to
4605 treat strings and byte strings as iterable:
4607 >>> list(zip_broadcast('abc', 0, 'xyz', scalar_types=None))
4608 [('a', 0, 'x'), ('b', 0, 'y'), ('c', 0, 'z')]
4610 If the *strict* keyword argument is ``True``, then
4611 ``ValueError`` will be raised if any of the iterables have
4612 different lengths.
4613 """
4615 def is_scalar(obj):
4616 if scalar_types and isinstance(obj, scalar_types):
4617 return True
4618 try:
4619 iter(obj)
4620 except TypeError:
4621 return True
4622 else:
4623 return False
4625 size = len(objects)
4626 if not size:
4627 return
4629 new_item = [None] * size
4630 iterables, iterable_positions = [], []
4631 for i, obj in enumerate(objects):
4632 if is_scalar(obj):
4633 new_item[i] = obj
4634 else:
4635 iterables.append(iter(obj))
4636 iterable_positions.append(i)
4638 if not iterables:
4639 yield tuple(objects)
4640 return
4642 for item in zip(*iterables, strict=strict):
4643 for i, new_item[i] in zip(iterable_positions, item):
4644 pass
4645 yield tuple(new_item)
4648def unique_in_window(iterable, n, key=None):
4649 """Yield the items from *iterable* that haven't been seen recently.
4650 *n* is the size of the sliding window.
4652 >>> iterable = [0, 1, 0, 2, 3, 0]
4653 >>> n = 3
4654 >>> list(unique_in_window(iterable, n))
4655 [0, 1, 2, 3, 0]
4657 The *key* function, if provided, will be used to determine uniqueness:
4659 >>> list(unique_in_window('abAcda', 3, key=lambda x: x.lower()))
4660 ['a', 'b', 'c', 'd', 'a']
4662 Updates a sliding window no larger than n and yields a value
4663 if the item only occurs once in the updated window.
4665 When `n == 1`, *unique_in_window* is memoryless:
4667 >>> list(unique_in_window('aab', n=1))
4668 ['a', 'a', 'b']
4670 The items in *iterable* must be hashable.
4672 """
4673 if n <= 0:
4674 raise ValueError('n must be greater than 0')
4676 window = deque(maxlen=n)
4677 counts = Counter()
4678 use_key = key is not None
4680 for item in iterable:
4681 if len(window) == n:
4682 to_discard = window[0]
4683 if counts[to_discard] == 1:
4684 del counts[to_discard]
4685 else:
4686 counts[to_discard] -= 1
4688 k = key(item) if use_key else item
4689 if k not in counts:
4690 yield item
4691 counts[k] += 1
4692 window.append(k)
4695def duplicates_everseen(iterable, key=None):
4696 """Yield duplicate elements after their first appearance.
4698 >>> list(duplicates_everseen('mississippi'))
4699 ['s', 'i', 's', 's', 'i', 'p', 'i']
4700 >>> list(duplicates_everseen('AaaBbbCccAaa', str.lower))
4701 ['a', 'a', 'b', 'b', 'c', 'c', 'A', 'a', 'a']
4703 This function is analogous to :func:`unique_everseen` and is subject to
4704 the same performance considerations.
4706 """
4707 seen_set = set()
4708 seen_list = []
4709 use_key = key is not None
4711 for element in iterable:
4712 k = key(element) if use_key else element
4713 try:
4714 if k not in seen_set:
4715 seen_set.add(k)
4716 else:
4717 yield element
4718 except TypeError:
4719 if k not in seen_list:
4720 seen_list.append(k)
4721 else:
4722 yield element
4725def duplicates_justseen(iterable, key=None):
4726 """Yields serially-duplicate elements after their first appearance.
4728 >>> list(duplicates_justseen('mississippi'))
4729 ['s', 's', 'p']
4730 >>> list(duplicates_justseen('AaaBbbCccAaa', str.lower))
4731 ['a', 'a', 'b', 'b', 'c', 'c', 'a', 'a']
4733 This function is analogous to :func:`unique_justseen`.
4735 """
4736 return flatten(g for _, g in groupby(iterable, key) for _ in g)
4739def classify_unique(iterable, key=None):
4740 """Classify each element in terms of its uniqueness.
4742 For each element in the input iterable, return a 3-tuple consisting of:
4744 1. The element itself
4745 2. ``False`` if the element is equal to the one preceding it in the input,
4746 ``True`` otherwise (i.e. the equivalent of :func:`unique_justseen`)
4747 3. ``False`` if this element has been seen anywhere in the input before,
4748 ``True`` otherwise (i.e. the equivalent of :func:`unique_everseen`)
4750 >>> list(classify_unique('otto')) # doctest: +NORMALIZE_WHITESPACE
4751 [('o', True, True),
4752 ('t', True, True),
4753 ('t', False, False),
4754 ('o', True, False)]
4756 This function is analogous to :func:`unique_everseen` and is subject to
4757 the same performance considerations.
4759 """
4760 seen_set = set()
4761 seen_list = []
4762 use_key = key is not None
4763 previous = None
4765 for i, element in enumerate(iterable):
4766 k = key(element) if use_key else element
4767 is_unique_justseen = not i or previous != k
4768 previous = k
4769 is_unique_everseen = False
4770 try:
4771 if k not in seen_set:
4772 seen_set.add(k)
4773 is_unique_everseen = True
4774 except TypeError:
4775 if k not in seen_list:
4776 seen_list.append(k)
4777 is_unique_everseen = True
4778 yield element, is_unique_justseen, is_unique_everseen
4781def minmax(iterable_or_value, *others, key=None, default=_marker):
4782 """Returns both the smallest and largest items from an iterable
4783 or from two or more arguments.
4785 >>> minmax([3, 1, 5])
4786 (1, 5)
4788 >>> minmax(4, 2, 6)
4789 (2, 6)
4791 If a *key* function is provided, it will be used to transform the input
4792 items for comparison.
4794 >>> minmax([5, 30], key=str) # '30' sorts before '5'
4795 (30, 5)
4797 If a *default* value is provided, it will be returned if there are no
4798 input items.
4800 >>> minmax([], default=(0, 0))
4801 (0, 0)
4803 Otherwise ``ValueError`` is raised.
4805 This function makes a single pass over the input elements and takes care to
4806 minimize the number of comparisons made during processing.
4808 Note that unlike the builtin ``max`` function, which always returns the first
4809 item with the maximum value, this function may return another item when there are
4810 ties.
4812 This function is based on the
4813 `recipe <https://code.activestate.com/recipes/577916-fast-minmax-function>`__ by
4814 Raymond Hettinger.
4815 """
4816 iterable = (iterable_or_value, *others) if others else iterable_or_value
4818 it = iter(iterable)
4820 try:
4821 lo = hi = next(it)
4822 except StopIteration as exc:
4823 if default is _marker:
4824 raise ValueError(
4825 '`minmax()` argument is an empty iterable. '
4826 'Provide a `default` value to suppress this error.'
4827 ) from exc
4828 return default
4830 # Different branches depending on the presence of key. This saves a lot
4831 # of unimportant copies which would slow the "key=None" branch
4832 # significantly down.
4833 if key is None:
4834 for x, y in zip_longest(it, it, fillvalue=lo):
4835 if y < x:
4836 x, y = y, x
4837 if x < lo:
4838 lo = x
4839 if hi < y:
4840 hi = y
4842 else:
4843 lo_key = hi_key = key(lo)
4845 for x, y in zip_longest(it, it, fillvalue=lo):
4846 x_key, y_key = key(x), key(y)
4848 if y_key < x_key:
4849 x, y, x_key, y_key = y, x, y_key, x_key
4850 if x_key < lo_key:
4851 lo, lo_key = x, x_key
4852 if hi_key < y_key:
4853 hi, hi_key = y, y_key
4855 return lo, hi
4858def constrained_batches(
4859 iterable, max_size, max_count=None, get_len=len, strict=True
4860):
4861 """Yield batches of items from *iterable* with a combined size limited by
4862 *max_size*.
4864 >>> iterable = [b'12345', b'123', b'12345678', b'1', b'1', b'12', b'1']
4865 >>> list(constrained_batches(iterable, 10))
4866 [(b'12345', b'123'), (b'12345678', b'1', b'1'), (b'12', b'1')]
4868 If a *max_count* is supplied, the number of items per batch is also
4869 limited:
4871 >>> iterable = [b'12345', b'123', b'12345678', b'1', b'1', b'12', b'1']
4872 >>> list(constrained_batches(iterable, 10, max_count = 2))
4873 [(b'12345', b'123'), (b'12345678', b'1'), (b'1', b'12'), (b'1',)]
4875 If a *get_len* function is supplied, use that instead of :func:`len` to
4876 determine item size.
4878 If *strict* is ``True``, raise ``ValueError`` if any single item is bigger
4879 than *max_size*. Otherwise, allow single items to exceed *max_size*.
4880 """
4881 if max_size <= 0:
4882 raise ValueError('maximum size must be greater than zero')
4884 batch = []
4885 batch_size = 0
4886 batch_count = 0
4887 for item in iterable:
4888 item_len = get_len(item)
4889 if strict and item_len > max_size:
4890 raise ValueError('item size exceeds maximum size')
4892 reached_count = batch_count == max_count
4893 reached_size = item_len + batch_size > max_size
4894 if batch_count and (reached_size or reached_count):
4895 yield tuple(batch)
4896 batch.clear()
4897 batch_size = 0
4898 batch_count = 0
4900 batch.append(item)
4901 batch_size += item_len
4902 batch_count += 1
4904 if batch:
4905 yield tuple(batch)
4908def gray_product(*iterables, repeat=1):
4909 """Like :func:`itertools.product`, but return tuples in an order such
4910 that only one element in the generated tuple changes from one iteration
4911 to the next.
4913 >>> list(gray_product('AB','CD'))
4914 [('A', 'C'), ('B', 'C'), ('B', 'D'), ('A', 'D')]
4916 The *repeat* keyword argument specifies the number of repetitions
4917 of the iterables. For example, ``gray_product('AB', repeat=3)`` is
4918 equivalent to ``gray_product('AB', 'AB', 'AB')``.
4920 This function consumes all of the input iterables before producing output.
4921 If any of the input iterables have fewer than two items, ``ValueError``
4922 is raised.
4924 For information on the algorithm, see
4925 `this section <https://www-cs-faculty.stanford.edu/~knuth/fasc2a.ps.gz>`__
4926 of Donald Knuth's *The Art of Computer Programming*.
4927 """
4928 all_iterables = tuple(map(tuple, iterables)) * repeat
4929 iterable_count = len(all_iterables)
4930 for iterable in all_iterables:
4931 if len(iterable) < 2:
4932 raise ValueError("each iterable must have two or more items")
4934 # This is based on "Algorithm H" from section 7.2.1.1, page 20.
4935 # a holds the indexes of the source iterables for the n-tuple to be yielded
4936 # f is the array of "focus pointers"
4937 # o is the array of "directions"
4938 a = [0] * iterable_count
4939 f = list(range(iterable_count + 1))
4940 o = [1] * iterable_count
4941 while True:
4942 yield tuple(all_iterables[i][a[i]] for i in range(iterable_count))
4943 j = f[0]
4944 f[0] = 0
4945 if j == iterable_count:
4946 break
4947 a[j] = a[j] + o[j]
4948 if a[j] == 0 or a[j] == len(all_iterables[j]) - 1:
4949 o[j] = -o[j]
4950 f[j] = f[j + 1]
4951 f[j + 1] = j + 1
4954def partial_product(*iterables, repeat=1):
4955 """Yields tuples containing one item from each iterator, with subsequent
4956 tuples changing a single item at a time by advancing each iterator until it
4957 is exhausted. This sequence guarantees every value in each iterable is
4958 output at least once without generating all possible combinations.
4960 This may be useful, for example, when testing an expensive function.
4962 >>> list(partial_product('AB', 'C', 'DEF'))
4963 [('A', 'C', 'D'), ('B', 'C', 'D'), ('B', 'C', 'E'), ('B', 'C', 'F')]
4965 The *repeat* keyword argument specifies the number of repetitions
4966 of the iterables. For example, ``partial_product('AB', repeat=3)`` is
4967 equivalent to ``partial_product('AB', 'AB', 'AB')``.
4968 """
4970 all_iterables = tuple(map(tuple, iterables)) * repeat
4971 iterators = tuple(map(iter, all_iterables))
4973 try:
4974 prod = [next(it) for it in iterators]
4975 except StopIteration:
4976 return
4977 yield tuple(prod)
4979 for i, it in enumerate(iterators):
4980 for prod[i] in it:
4981 yield tuple(prod)
4984def takewhile_inclusive(predicate, iterable):
4985 """A variant of :func:`takewhile` that yields one additional element.
4987 >>> list(takewhile_inclusive(lambda x: x < 5, [1, 4, 6, 4, 1]))
4988 [1, 4, 6]
4990 :func:`takewhile` would return ``[1, 4]``.
4991 """
4992 for x in iterable:
4993 yield x
4994 if not predicate(x):
4995 break
4998def outer_product(func, xs, ys, *args, **kwargs):
4999 """A generalized outer product that applies a binary function to all
5000 pairs of items. Returns a 2D matrix with ``len(xs)`` rows and ``len(ys)``
5001 columns.
5002 Also accepts ``*args`` and ``**kwargs`` that are passed to ``func``.
5004 Multiplication table:
5006 >>> from operator import mul
5007 >>> list(outer_product(mul, range(1, 4), range(1, 6)))
5008 [(1, 2, 3, 4, 5), (2, 4, 6, 8, 10), (3, 6, 9, 12, 15)]
5010 Cross tabulation:
5012 >>> xs = ['A', 'B', 'A', 'A', 'B', 'B', 'A', 'A', 'B', 'B']
5013 >>> ys = ['X', 'X', 'X', 'Y', 'Z', 'Z', 'Y', 'Y', 'Z', 'Z']
5014 >>> pair_counts = Counter(zip(xs, ys))
5015 >>> count_rows = lambda x, y: pair_counts[x, y]
5016 >>> list(outer_product(count_rows, sorted(set(xs)), sorted(set(ys))))
5017 [(2, 3, 0), (1, 0, 4)]
5019 Usage with ``*args`` and ``**kwargs``:
5021 >>> animals = ['cat', 'wolf', 'mouse']
5022 >>> list(outer_product(min, animals, animals, key=len))
5023 [('cat', 'cat', 'cat'), ('cat', 'wolf', 'wolf'), ('cat', 'wolf', 'mouse')]
5024 """
5025 ys = tuple(ys)
5026 return batched(
5027 starmap(lambda x, y: func(x, y, *args, **kwargs), product(xs, ys)),
5028 n=len(ys),
5029 )
5032def iter_suppress(iterable, *exceptions):
5033 """Yield each of the items from *iterable*. If the iteration raises one of
5034 the specified *exceptions*, that exception will be suppressed and iteration
5035 will stop.
5037 >>> from itertools import chain
5038 >>> def breaks_at_five(x):
5039 ... while True:
5040 ... if x >= 5:
5041 ... raise RuntimeError
5042 ... yield x
5043 ... x += 1
5044 >>> it_1 = iter_suppress(breaks_at_five(1), RuntimeError)
5045 >>> it_2 = iter_suppress(breaks_at_five(2), RuntimeError)
5046 >>> list(chain(it_1, it_2))
5047 [1, 2, 3, 4, 2, 3, 4]
5048 """
5049 try:
5050 yield from iterable
5051 except exceptions:
5052 return
5055def filter_map(func, iterable):
5056 """Apply *func* to every element of *iterable*, yielding only those which
5057 are not ``None``.
5059 >>> elems = ['1', 'a', '2', 'b', '3']
5060 >>> list(filter_map(lambda s: int(s) if s.isnumeric() else None, elems))
5061 [1, 2, 3]
5062 """
5063 for x in iterable:
5064 y = func(x)
5065 if y is not None:
5066 yield y
5069def powerset_of_sets(iterable, *, baseset=set):
5070 """Yields all possible subsets of the iterable.
5072 >>> list(powerset_of_sets([1, 2, 3])) # doctest: +SKIP
5073 [set(), {1}, {2}, {3}, {1, 2}, {1, 3}, {2, 3}, {1, 2, 3}]
5074 >>> list(powerset_of_sets([1, 1, 0])) # doctest: +SKIP
5075 [set(), {1}, {0}, {0, 1}]
5077 :func:`powerset_of_sets` takes care to minimize the number
5078 of hash operations performed.
5080 The *baseset* parameter determines what kind of sets are
5081 constructed, either *set* or *frozenset*.
5082 """
5083 sets = tuple(dict.fromkeys(map(frozenset, zip(iterable))))
5084 union = baseset().union
5085 return chain.from_iterable(
5086 starmap(union, combinations(sets, r)) for r in range(len(sets) + 1)
5087 )
5090def join_mappings(**field_to_map):
5091 """
5092 Joins multiple mappings together using their common keys.
5094 >>> user_scores = {'elliot': 50, 'claris': 60}
5095 >>> user_times = {'elliot': 30, 'claris': 40}
5096 >>> join_mappings(score=user_scores, time=user_times)
5097 {'elliot': {'score': 50, 'time': 30}, 'claris': {'score': 60, 'time': 40}}
5098 """
5099 ret = defaultdict(dict)
5101 for field_name, mapping in field_to_map.items():
5102 for key, value in mapping.items():
5103 ret[key][field_name] = value
5105 return dict(ret)
5108def _complex_sumprod(v1, v2):
5109 """High precision sumprod() for complex numbers.
5110 Used by :func:`dft` and :func:`idft`.
5111 """
5113 real = attrgetter('real')
5114 imag = attrgetter('imag')
5115 r1 = chain(map(real, v1), map(neg, map(imag, v1)))
5116 r2 = chain(map(real, v2), map(imag, v2))
5117 i1 = chain(map(real, v1), map(imag, v1))
5118 i2 = chain(map(imag, v2), map(real, v2))
5119 return complex(_fsumprod(r1, r2), _fsumprod(i1, i2))
5122def dft(xarr):
5123 """Discrete Fourier Transform. *xarr* is a sequence of complex numbers.
5124 Yields the components of the corresponding transformed output vector.
5126 >>> import cmath
5127 >>> xarr = [1, 2-1j, -1j, -1+2j] # time domain
5128 >>> Xarr = [2, -2-2j, -2j, 4+4j] # frequency domain
5129 >>> magnitudes, phases = zip(*map(cmath.polar, Xarr))
5130 >>> all(map(cmath.isclose, dft(xarr), Xarr))
5131 True
5133 Inputs are restricted to numeric types that can add and multiply
5134 with a complex number. This includes int, float, complex, and
5135 Fraction, but excludes Decimal.
5137 See :func:`idft` for the inverse Discrete Fourier Transform.
5138 """
5139 N = len(xarr)
5140 roots_of_unity = [e ** (n / N * tau * -1j) for n in range(N)]
5141 for k in range(N):
5142 coeffs = [roots_of_unity[k * n % N] for n in range(N)]
5143 yield _complex_sumprod(xarr, coeffs)
5146def idft(Xarr):
5147 """Inverse Discrete Fourier Transform. *Xarr* is a sequence of
5148 complex numbers. Yields the components of the corresponding
5149 inverse-transformed output vector.
5151 >>> import cmath
5152 >>> xarr = [1, 2-1j, -1j, -1+2j] # time domain
5153 >>> Xarr = [2, -2-2j, -2j, 4+4j] # frequency domain
5154 >>> all(map(cmath.isclose, idft(Xarr), xarr))
5155 True
5157 Inputs are restricted to numeric types that can add and multiply
5158 with a complex number. This includes int, float, complex, and
5159 Fraction, but excludes Decimal.
5161 See :func:`dft` for the Discrete Fourier Transform.
5162 """
5163 N = len(Xarr)
5164 roots_of_unity = [e ** (n / N * tau * 1j) for n in range(N)]
5165 for k in range(N):
5166 coeffs = [roots_of_unity[k * n % N] for n in range(N)]
5167 yield _complex_sumprod(Xarr, coeffs) / N
5170def doublestarmap(func, iterable):
5171 """Apply *func* to every item of *iterable* by dictionary unpacking
5172 the item into *func*.
5174 The difference between :func:`itertools.starmap` and :func:`doublestarmap`
5175 parallels the distinction between ``func(*a)`` and ``func(**a)``.
5177 >>> iterable = [{'a': 1, 'b': 2}, {'a': 40, 'b': 60}]
5178 >>> list(doublestarmap(lambda a, b: a + b, iterable))
5179 [3, 100]
5181 ``TypeError`` will be raised if *func*'s signature doesn't match the
5182 mapping contained in *iterable* or if *iterable* does not contain mappings.
5183 """
5184 for item in iterable:
5185 yield func(**item)
5188def _nth_prime_bounds(n):
5189 """Bounds for the nth prime (counting from 1): lb < p_n < ub."""
5190 # At and above 688,383, the lb/ub spread is under 0.003 * p_n.
5192 if n < 1:
5193 raise ValueError
5195 if n < 6:
5196 return (n, 2.25 * n)
5198 # https://en.wikipedia.org/wiki/Prime-counting_function#Inequalities
5199 upper_bound = n * log(n * log(n))
5200 lower_bound = upper_bound - n
5201 if n >= 688_383:
5202 upper_bound -= n * (1.0 - (log(log(n)) - 2.0) / log(n))
5204 return lower_bound, upper_bound
5207def nth_prime(n, *, approximate=False):
5208 """Return the nth prime (counting from 0).
5210 >>> nth_prime(0)
5211 2
5212 >>> nth_prime(100)
5213 547
5215 If *approximate* is set to True, will return a prime close
5216 to the nth prime. The estimation is much faster than computing
5217 an exact result.
5219 >>> nth_prime(200_000_000, approximate=True) # Exact result is 4222234763
5220 4217820427
5222 """
5223 lb, ub = _nth_prime_bounds(n + 1)
5225 if not approximate or n <= 1_000_000:
5226 return nth(sieve(ceil(ub)), n)
5228 # Search from the midpoint and return the first odd prime
5229 odd = floor((lb + ub) / 2) | 1
5230 return first_true(count(odd, step=2), pred=is_prime)
5233def argmin(iterable, *, key=None):
5234 """
5235 Index of the first occurrence of a minimum value in an iterable.
5237 >>> argmin('efghabcdijkl')
5238 4
5239 >>> argmin([3, 2, 1, 0, 4, 2, 1, 0])
5240 3
5242 For example, look up a label corresponding to the position
5243 of a value that minimizes a cost function::
5245 >>> def cost(x):
5246 ... "Days for a wound to heal given a subject's age."
5247 ... return x**2 - 20*x + 150
5248 ...
5249 >>> labels = ['homer', 'marge', 'bart', 'lisa', 'maggie']
5250 >>> ages = [ 35, 30, 10, 9, 1 ]
5252 # Fastest healing family member
5253 >>> labels[argmin(ages, key=cost)]
5254 'bart'
5256 # Age with fastest healing
5257 >>> min(ages, key=cost)
5258 10
5260 """
5261 if key is not None:
5262 iterable = map(key, iterable)
5263 return min(enumerate(iterable), key=itemgetter(1))[0]
5266def argmax(iterable, *, key=None):
5267 """
5268 Index of the first occurrence of a maximum value in an iterable.
5270 >>> argmax('abcdefghabcd')
5271 7
5272 >>> argmax([0, 1, 2, 3, 3, 2, 1, 0])
5273 3
5275 For example, identify the best machine learning model::
5277 >>> models = ['svm', 'random forest', 'knn', 'naïve bayes']
5278 >>> accuracy = [ 68, 61, 84, 72 ]
5280 # Most accurate model
5281 >>> models[argmax(accuracy)]
5282 'knn'
5284 # Best accuracy
5285 >>> max(accuracy)
5286 84
5288 """
5289 if key is not None:
5290 iterable = map(key, iterable)
5291 return max(enumerate(iterable), key=itemgetter(1))[0]
5294def _extract_monotonic(iterator, indices):
5295 'Non-decreasing indices, lazily consumed'
5296 num_read = 0
5297 for index in indices:
5298 advance = index - num_read
5299 try:
5300 value = next(islice(iterator, advance, None))
5301 except ValueError:
5302 if advance != -1 or index < 0:
5303 raise ValueError(f'Invalid index: {index}') from None
5304 except StopIteration:
5305 raise IndexError(index) from None
5306 else:
5307 num_read += advance + 1
5308 yield value
5311def _extract_buffered(iterator, index_and_position):
5312 'Arbitrary index order, greedily consumed'
5313 buffer = {}
5314 iterator_position = -1
5315 next_to_emit = 0
5317 for index, order in index_and_position:
5318 advance = index - iterator_position
5319 if advance:
5320 try:
5321 value = next(islice(iterator, advance - 1, None))
5322 except StopIteration:
5323 raise IndexError(index) from None
5324 iterator_position = index
5326 buffer[order] = value
5328 while next_to_emit in buffer:
5329 yield buffer.pop(next_to_emit)
5330 next_to_emit += 1
5333def extract(iterable, indices, *, monotonic=False):
5334 """Yield values at the specified indices.
5336 Example:
5338 >>> data = 'abcdefghijklmnopqrstuvwxyz'
5339 >>> list(extract(data, [7, 4, 11, 11, 14]))
5340 ['h', 'e', 'l', 'l', 'o']
5342 The *iterable* is consumed lazily and can be infinite.
5344 When *monotonic* is false, the *indices* are consumed immediately
5345 and must be finite. When *monotonic* is true, *indices* are consumed
5346 lazily and can be infinite but must be non-decreasing.
5348 Raises ``IndexError`` if an index lies beyond the iterable.
5349 Raises ``ValueError`` for a negative index or for a decreasing
5350 index when *monotonic* is true.
5351 """
5353 iterator = iter(iterable)
5354 indices = iter(indices)
5356 if monotonic:
5357 return _extract_monotonic(iterator, indices)
5359 index_and_position = sorted(zip(indices, count()))
5360 if index_and_position and index_and_position[0][0] < 0:
5361 raise ValueError('Indices must be non-negative')
5362 return _extract_buffered(iterator, index_and_position)
5365class serialize:
5366 """Wrap a non-concurrent iterator with a lock to enforce sequential access.
5368 Applies a non-reentrant lock around calls to ``__next__``, allowing
5369 iterator and generator instances to be shared by multiple consumer
5370 threads.
5371 """
5373 __slots__ = ('iterator', 'lock')
5375 def __init__(self, iterable):
5376 self.iterator = iter(iterable)
5377 self.lock = Lock()
5379 def __iter__(self):
5380 return self
5382 def __next__(self):
5383 with self.lock:
5384 return next(self.iterator)
5387def synchronized(func):
5388 """Wrap an iterator-returning callable to make its iterators thread-safe.
5390 Existing itertools and more-itertools can be wrapped so that their
5391 iterator instances are serialized.
5393 For example, ``itertools.count`` does not make thread-safe instances,
5394 but that is easily fixed with::
5396 atomic_counter = synchronized(itertools.count)
5398 Can also be used as a decorator for generator functions definitions
5399 so that the generator instances are serialized::
5401 @synchronized
5402 def enumerate_and_timestamp(iterable):
5403 for count, value in enumerate(iterable):
5404 yield count, time_ns(), value
5406 """
5408 @wraps(func)
5409 def inner(*args, **kwargs):
5410 iterator = func(*args, **kwargs)
5411 return serialize(iterator)
5413 return inner
5416def concurrent_tee(iterable, n=2):
5417 """Variant of itertools.tee() but with guaranteed threading semantics.
5419 Takes a non-threadsafe iterator as an input and creates concurrent
5420 tee objects for other threads to have reliable independent copies of
5421 the data stream.
5423 The new iterators are only thread-safe if consumed within a single thread.
5424 To share just one of the new iterators across multiple threads, wrap it
5425 with :func:`serialize`.
5426 """
5428 if n < 0:
5429 raise ValueError
5430 if n == 0:
5431 return ()
5432 iterator = _concurrent_tee(iterable)
5433 result = [iterator]
5434 for _ in range(n - 1):
5435 result.append(_concurrent_tee(iterator))
5436 return tuple(result)
5439class _concurrent_tee:
5440 __slots__ = ('iterator', 'link', 'lock')
5442 def __init__(self, iterable):
5443 if isinstance(iterable, _concurrent_tee):
5444 self.iterator = iterable.iterator
5445 self.link = iterable.link
5446 self.lock = iterable.lock
5447 else:
5448 self.iterator = iter(iterable)
5449 self.link = [None, None]
5450 self.lock = Lock()
5452 def __iter__(self):
5453 return self
5455 def __next__(self):
5456 link = self.link
5457 if link[1] is None:
5458 with self.lock:
5459 if link[1] is None:
5460 link[0] = next(self.iterator)
5461 link[1] = [None, None]
5462 value, self.link = link
5463 return value
5466def _windowed_running_min(iterator, maxlen):
5467 sis = deque() # Strictly increasing subsequence
5468 for index, value in enumerate(iterator):
5469 if sis and sis[0][0] == index - maxlen:
5470 sis.popleft()
5471 while sis and not sis[-1][1] < value: # Remove non-increasing values
5472 sis.pop()
5473 sis.append((index, value)) # Most recent value at position -1
5474 yield sis[0][1] # Window minimum at position 0
5477def running_min(iterable, *, maxlen=None):
5478 """Smallest of values seen so far or values in a sliding window.
5480 Set *maxlen* to a positive integer to specify the maximum size
5481 of the sliding window. The default of *None* is equivalent to
5482 an unbounded window.
5484 For example:
5486 >>> list(running_min([4, 3, 7, 0, 8, 1, 6, 2, 9, 5]))
5487 [4, 3, 3, 0, 0, 0, 0, 0, 0, 0]
5489 >>> list(running_min([4, 3, 7, 0, 8, 1, 6, 2, 9, 5], maxlen=3))
5490 [4, 3, 3, 0, 0, 0, 1, 1, 2, 2]
5492 Supports numeric types such as int, float, Decimal, and Fraction,
5493 but not complex numbers which are unorderable.
5494 """
5496 iterator = iter(iterable)
5498 if maxlen is None:
5499 return accumulate(iterator, func=min)
5501 if maxlen <= 0:
5502 raise ValueError('Window size should be positive')
5504 return _windowed_running_min(iterator, maxlen)
5507def _windowed_running_max(iterator, maxlen):
5508 sds = deque() # Strictly decreasing subsequence
5509 for index, value in enumerate(iterator):
5510 if sds and sds[0][0] == index - maxlen:
5511 sds.popleft()
5512 while sds and not sds[-1][1] > value: # Remove non-decreasing values
5513 sds.pop()
5514 sds.append((index, value)) # Most recent value at position -1
5515 yield sds[0][1] # Window maximum at position 0
5518def running_max(iterable, *, maxlen=None):
5519 """Largest of values seen so far or values in a sliding window.
5521 Set *maxlen* to a positive integer to specify the maximum size
5522 of the sliding window. The default of *None* is equivalent to
5523 an unbounded window.
5525 For example:
5527 >>> list(running_max([4, 3, 7, 0, 8, 1, 6, 2, 9, 5]))
5528 [4, 4, 7, 7, 8, 8, 8, 8, 9, 9]
5530 >>> list(running_max([4, 3, 7, 0, 8, 1, 6, 2, 9, 5], maxlen=3))
5531 [4, 4, 7, 7, 8, 8, 8, 6, 9, 9]
5533 Supports numeric types such as int, float, Decimal, and Fraction,
5534 but not complex numbers which are unorderable.
5535 """
5537 iterator = iter(iterable)
5539 if maxlen is None:
5540 return accumulate(iterator, func=max)
5542 if maxlen <= 0:
5543 raise ValueError('Window size should be positive')
5545 return _windowed_running_max(iterator, maxlen)
5548@dataclass(frozen=True, slots=True)
5549class Stats:
5550 size: int
5551 minimum: float
5552 median: float
5553 maximum: float
5554 mean: float
5557def running_statistics(iterable, *, maxlen=None):
5558 """Statistics for values seen so far or values in a sliding window.
5560 Set *maxlen* to a positive integer to specify the maximum size
5561 of the sliding window. The default of *None* is equivalent to
5562 an unbounded window.
5564 Yields instances of a ``Stats`` dataclass with fields for the dataset *size*,
5565 *mininum* value, *median* value, *maximum* value, and the arithmetic *mean*.
5567 Supports numeric types such as int, float, Decimal, and Fraction,
5568 but not complex numbers which are unorderable.
5569 """
5571 # fmt: off
5572 t0, t1, t2, t3 = tee(iterable, 4)
5573 return map(
5574 Stats,
5575 count(1) if maxlen is None else chain(range(1, maxlen), repeat(maxlen)),
5576 running_min(t0, maxlen=maxlen),
5577 running_median(t1, maxlen=maxlen),
5578 running_max(t2, maxlen=maxlen),
5579 running_mean(t3, maxlen=maxlen),
5580 )
5581 # fmt: on