Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/toolz/itertoolz.py: 14%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import itertools
2import heapq
3import collections
4import operator
5from functools import partial
6from itertools import filterfalse, zip_longest
7from collections.abc import Sequence
8from toolz.utils import no_default
11__all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave',
12 'unique', 'isiterable', 'isdistinct', 'take', 'drop', 'take_nth',
13 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv',
14 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate',
15 'sliding_window', 'partition', 'partition_all', 'count', 'pluck',
16 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample')
19def remove(predicate, seq):
20 """ Return those items of sequence for which predicate(item) is False
22 >>> def iseven(x):
23 ... return x % 2 == 0
24 >>> list(remove(iseven, [1, 2, 3, 4]))
25 [1, 3]
26 """
27 return filterfalse(predicate, seq)
30def accumulate(binop, seq, initial=no_default):
31 """ Repeatedly apply binary function to a sequence, accumulating results
33 >>> from operator import add, mul
34 >>> list(accumulate(add, [1, 2, 3, 4, 5]))
35 [1, 3, 6, 10, 15]
36 >>> list(accumulate(mul, [1, 2, 3, 4, 5]))
37 [1, 2, 6, 24, 120]
39 Accumulate is similar to ``reduce`` and is good for making functions like
40 cumulative sum:
42 >>> from functools import partial, reduce
43 >>> sum = partial(reduce, add)
44 >>> cumsum = partial(accumulate, add)
46 Accumulate also takes an optional argument that will be used as the first
47 value. This is similar to reduce.
49 >>> list(accumulate(add, [1, 2, 3], -1))
50 [-1, 0, 2, 5]
51 >>> list(accumulate(add, [], 1))
52 [1]
54 See Also:
55 itertools.accumulate : In standard itertools for Python 3.2+
56 """
57 seq = iter(seq)
58 if initial == no_default:
59 try:
60 result = next(seq)
61 except StopIteration:
62 return
63 else:
64 result = initial
65 yield result
66 for elem in seq:
67 result = binop(result, elem)
68 yield result
71def groupby(key, seq):
72 """ Group a collection by a key function
74 >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank']
75 >>> groupby(len, names) # doctest: +SKIP
76 {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}
78 >>> iseven = lambda x: x % 2 == 0
79 >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) # doctest: +SKIP
80 {False: [1, 3, 5, 7], True: [2, 4, 6, 8]}
82 Non-callable keys imply grouping on a member.
84 >>> groupby('gender', [{'name': 'Alice', 'gender': 'F'},
85 ... {'name': 'Bob', 'gender': 'M'},
86 ... {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP
87 {'F': [{'gender': 'F', 'name': 'Alice'}],
88 'M': [{'gender': 'M', 'name': 'Bob'},
89 {'gender': 'M', 'name': 'Charlie'}]}
91 Not to be confused with ``itertools.groupby``
93 See Also:
94 countby
95 """
96 if not callable(key):
97 key = getter(key)
98 d = collections.defaultdict(lambda: [].append)
99 for item in seq:
100 d[key(item)](item)
101 rv = {}
102 for k, v in d.items():
103 rv[k] = v.__self__
104 return rv
107def merge_sorted(*seqs, **kwargs):
108 """ Merge and sort a collection of sorted collections
110 This works lazily and only keeps one value from each iterable in memory.
112 >>> list(merge_sorted([1, 3, 5], [2, 4, 6]))
113 [1, 2, 3, 4, 5, 6]
115 >>> ''.join(merge_sorted('abc', 'abc', 'abc'))
116 'aaabbbccc'
118 The "key" function used to sort the input may be passed as a keyword.
120 >>> list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3))
121 [2, 1, 3, 3]
122 """
123 if len(seqs) == 0:
124 return iter([])
125 elif len(seqs) == 1:
126 return iter(seqs[0])
128 key = kwargs.get('key', None)
129 if key is None:
130 return _merge_sorted_binary(seqs)
131 else:
132 return _merge_sorted_binary_key(seqs, key)
135def _merge_sorted_binary(seqs):
136 mid = len(seqs) // 2
137 L1 = seqs[:mid]
138 if len(L1) == 1:
139 seq1 = iter(L1[0])
140 else:
141 seq1 = _merge_sorted_binary(L1)
142 L2 = seqs[mid:]
143 if len(L2) == 1:
144 seq2 = iter(L2[0])
145 else:
146 seq2 = _merge_sorted_binary(L2)
148 try:
149 val2 = next(seq2)
150 except StopIteration:
151 yield from seq1
152 return
154 for val1 in seq1:
155 if val2 < val1:
156 yield val2
157 for val2 in seq2:
158 if val2 < val1:
159 yield val2
160 else:
161 yield val1
162 break
163 else:
164 break
165 else:
166 yield val1
167 else:
168 yield val2
169 yield from seq2
170 return
171 yield val1
172 yield from seq1
175def _merge_sorted_binary_key(seqs, key):
176 mid = len(seqs) // 2
177 L1 = seqs[:mid]
178 if len(L1) == 1:
179 seq1 = iter(L1[0])
180 else:
181 seq1 = _merge_sorted_binary_key(L1, key)
182 L2 = seqs[mid:]
183 if len(L2) == 1:
184 seq2 = iter(L2[0])
185 else:
186 seq2 = _merge_sorted_binary_key(L2, key)
188 try:
189 val2 = next(seq2)
190 except StopIteration:
191 yield from seq1
192 return
193 key2 = key(val2)
195 for val1 in seq1:
196 key1 = key(val1)
197 if key2 < key1:
198 yield val2
199 for val2 in seq2:
200 key2 = key(val2)
201 if key2 < key1:
202 yield val2
203 else:
204 yield val1
205 break
206 else:
207 break
208 else:
209 yield val1
210 else:
211 yield val2
212 yield from seq2
213 return
214 yield val1
215 yield from seq1
218def interleave(seqs):
219 """ Interleave a sequence of sequences
221 >>> list(interleave([[1, 2], [3, 4]]))
222 [1, 3, 2, 4]
224 >>> ''.join(interleave(('ABC', 'XY')))
225 'AXBYC'
227 Both the individual sequences and the sequence of sequences may be infinite
229 Returns a lazy iterator
230 """
231 iters = itertools.cycle(map(iter, seqs))
232 while True:
233 try:
234 for itr in iters:
235 yield next(itr)
236 return
237 except StopIteration:
238 predicate = partial(operator.is_not, itr)
239 iters = itertools.cycle(itertools.takewhile(predicate, iters))
242def unique(seq, key=None):
243 """ Return only unique elements of a sequence
245 >>> tuple(unique((1, 2, 3)))
246 (1, 2, 3)
247 >>> tuple(unique((1, 2, 1, 3)))
248 (1, 2, 3)
250 Uniqueness can be defined by key keyword
252 >>> tuple(unique(['cat', 'mouse', 'dog', 'hen'], key=len))
253 ('cat', 'mouse')
254 """
255 seen = set()
256 seen_add = seen.add
257 if key is None:
258 for item in seq:
259 if item not in seen:
260 seen_add(item)
261 yield item
262 else: # calculate key
263 for item in seq:
264 val = key(item)
265 if val not in seen:
266 seen_add(val)
267 yield item
270def isiterable(x):
271 """ Is x iterable?
273 >>> isiterable([1, 2, 3])
274 True
275 >>> isiterable('abc')
276 True
277 >>> isiterable(5)
278 False
279 """
280 try:
281 iter(x)
282 return True
283 except TypeError:
284 return False
287def isdistinct(seq):
288 """ All values in sequence are distinct
290 >>> isdistinct([1, 2, 3])
291 True
292 >>> isdistinct([1, 2, 1])
293 False
295 >>> isdistinct("Hello")
296 False
297 >>> isdistinct("World")
298 True
299 """
300 if iter(seq) is seq:
301 seen = set()
302 seen_add = seen.add
303 for item in seq:
304 if item in seen:
305 return False
306 seen_add(item)
307 return True
308 else:
309 return len(seq) == len(set(seq))
312def take(n, seq):
313 """ The first n elements of a sequence
315 >>> list(take(2, [10, 20, 30, 40, 50]))
316 [10, 20]
318 See Also:
319 drop
320 tail
321 """
322 return itertools.islice(seq, n)
325def tail(n, seq):
326 """ The last n elements of a sequence
328 >>> tail(2, [10, 20, 30, 40, 50])
329 [40, 50]
331 See Also:
332 drop
333 take
334 """
335 try:
336 return seq[-n:]
337 except (TypeError, KeyError):
338 return tuple(collections.deque(seq, n))
341def drop(n, seq):
342 """ The sequence following the first n elements
344 >>> list(drop(2, [10, 20, 30, 40, 50]))
345 [30, 40, 50]
347 See Also:
348 take
349 tail
350 """
351 return itertools.islice(seq, n, None)
354def take_nth(n, seq):
355 """ Every nth item in seq
357 >>> list(take_nth(2, [10, 20, 30, 40, 50]))
358 [10, 30, 50]
359 """
360 return itertools.islice(seq, 0, None, n)
363def first(seq):
364 """ The first element in a sequence
366 >>> first('ABC')
367 'A'
368 """
369 return next(iter(seq))
372def second(seq):
373 """ The second element in a sequence
375 >>> second('ABC')
376 'B'
377 """
378 seq = iter(seq)
379 next(seq)
380 return next(seq)
383def nth(n, seq):
384 """ The nth element in a sequence
386 >>> nth(1, 'ABC')
387 'B'
388 """
389 if isinstance(seq, (tuple, list, Sequence)):
390 return seq[n]
391 else:
392 return next(itertools.islice(seq, n, None))
395def last(seq):
396 """ The last element in a sequence
398 >>> last('ABC')
399 'C'
400 """
401 return tail(1, seq)[0]
404rest = partial(drop, 1)
407def _get(ind, seq, default):
408 try:
409 return seq[ind]
410 except (KeyError, IndexError):
411 return default
414def get(ind, seq, default=no_default):
415 """ Get element in a sequence or dict
417 Provides standard indexing
419 >>> get(1, 'ABC') # Same as 'ABC'[1]
420 'B'
422 Pass a list to get multiple values
424 >>> get([1, 2], 'ABC') # ('ABC'[1], 'ABC'[2])
425 ('B', 'C')
427 Works on any value that supports indexing/getitem
428 For example here we see that it works with dictionaries
430 >>> phonebook = {'Alice': '555-1234',
431 ... 'Bob': '555-5678',
432 ... 'Charlie':'555-9999'}
433 >>> get('Alice', phonebook)
434 '555-1234'
436 >>> get(['Alice', 'Bob'], phonebook)
437 ('555-1234', '555-5678')
439 Provide a default for missing values
441 >>> get(['Alice', 'Dennis'], phonebook, None)
442 ('555-1234', None)
444 See Also:
445 pluck
446 """
447 try:
448 return seq[ind]
449 except TypeError: # `ind` may be a list
450 if isinstance(ind, list):
451 if default == no_default:
452 if len(ind) > 1:
453 return operator.itemgetter(*ind)(seq)
454 elif ind:
455 return seq[ind[0]],
456 else:
457 return ()
458 else:
459 return tuple(_get(i, seq, default) for i in ind)
460 elif default != no_default:
461 return default
462 else:
463 raise
464 except (KeyError, IndexError): # we know `ind` is not a list
465 if default == no_default:
466 raise
467 else:
468 return default
471def concat(seqs):
472 """ Concatenate zero or more iterables, any of which may be infinite.
474 An infinite sequence will prevent the rest of the arguments from
475 being included.
477 We use chain.from_iterable rather than ``chain(*seqs)`` so that seqs
478 can be a generator.
480 >>> list(concat([[], [1], [2, 3]]))
481 [1, 2, 3]
483 See also:
484 itertools.chain.from_iterable equivalent
485 """
486 return itertools.chain.from_iterable(seqs)
489def concatv(*seqs):
490 """ Variadic version of concat
492 >>> list(concatv([], ["a"], ["b", "c"]))
493 ['a', 'b', 'c']
495 See also:
496 itertools.chain
497 """
498 return concat(seqs)
501def mapcat(func, seqs):
502 """ Apply func to each sequence in seqs, concatenating results.
504 >>> list(mapcat(lambda s: [c.upper() for c in s],
505 ... [["a", "b"], ["c", "d", "e"]]))
506 ['A', 'B', 'C', 'D', 'E']
507 """
508 return concat(map(func, seqs))
511def cons(el, seq):
512 """ Add el to beginning of (possibly infinite) sequence seq.
514 >>> list(cons(1, [2, 3]))
515 [1, 2, 3]
516 """
517 return itertools.chain([el], seq)
520def interpose(el, seq):
521 """ Introduce element between each pair of elements in seq
523 >>> list(interpose("a", [1, 2, 3]))
524 [1, 'a', 2, 'a', 3]
525 """
526 inposed = concat(zip(itertools.repeat(el), seq))
527 next(inposed)
528 return inposed
531def frequencies(seq):
532 """ Find number of occurrences of each value in seq
534 >>> frequencies(['cat', 'cat', 'ox', 'pig', 'pig', 'cat']) #doctest: +SKIP
535 {'cat': 3, 'ox': 1, 'pig': 2}
537 See Also:
538 countby
539 groupby
540 """
541 d = collections.defaultdict(int)
542 for item in seq:
543 d[item] += 1
544 return dict(d)
547def reduceby(key, binop, seq, init=no_default):
548 """ Perform a simultaneous groupby and reduction
550 The computation:
552 >>> result = reduceby(key, binop, seq, init) # doctest: +SKIP
554 is equivalent to the following:
556 >>> def reduction(group): # doctest: +SKIP
557 ... return reduce(binop, group, init) # doctest: +SKIP
559 >>> groups = groupby(key, seq) # doctest: +SKIP
560 >>> result = valmap(reduction, groups) # doctest: +SKIP
562 But the former does not build the intermediate groups, allowing it to
563 operate in much less space. This makes it suitable for larger datasets
564 that do not fit comfortably in memory
566 The ``init`` keyword argument is the default initialization of the
567 reduction. This can be either a constant value like ``0`` or a callable
568 like ``lambda : 0`` as might be used in ``defaultdict``.
570 Simple Examples
571 ---------------
573 >>> from operator import add, mul
574 >>> iseven = lambda x: x % 2 == 0
576 >>> data = [1, 2, 3, 4, 5]
578 >>> reduceby(iseven, add, data) # doctest: +SKIP
579 {False: 9, True: 6}
581 >>> reduceby(iseven, mul, data) # doctest: +SKIP
582 {False: 15, True: 8}
584 Complex Example
585 ---------------
587 >>> projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000},
588 ... {'name': 'fight crime', 'state': 'IL', 'cost': 100000},
589 ... {'name': 'help farmers', 'state': 'IL', 'cost': 2000000},
590 ... {'name': 'help farmers', 'state': 'CA', 'cost': 200000}]
592 >>> reduceby('state', # doctest: +SKIP
593 ... lambda acc, x: acc + x['cost'],
594 ... projects, 0)
595 {'CA': 1200000, 'IL': 2100000}
597 Example Using ``init``
598 ----------------------
600 >>> def set_add(s, i):
601 ... s.add(i)
602 ... return s
604 >>> reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2, 3], set) # doctest: +SKIP
605 {True: set([2, 4]),
606 False: set([1, 3])}
607 """
608 is_no_default = init == no_default
609 if not is_no_default and not callable(init):
610 _init = init
611 init = lambda: _init
612 if not callable(key):
613 key = getter(key)
614 d = {}
615 for item in seq:
616 k = key(item)
617 if k not in d:
618 if is_no_default:
619 d[k] = item
620 continue
621 else:
622 d[k] = init()
623 d[k] = binop(d[k], item)
624 return d
627def iterate(func, x):
628 """ Repeatedly apply a function func onto an original input
630 Yields x, then func(x), then func(func(x)), then func(func(func(x))), etc..
632 >>> def inc(x): return x + 1
633 >>> counter = iterate(inc, 0)
634 >>> next(counter)
635 0
636 >>> next(counter)
637 1
638 >>> next(counter)
639 2
641 >>> double = lambda x: x * 2
642 >>> powers_of_two = iterate(double, 1)
643 >>> next(powers_of_two)
644 1
645 >>> next(powers_of_two)
646 2
647 >>> next(powers_of_two)
648 4
649 >>> next(powers_of_two)
650 8
651 """
652 while True:
653 yield x
654 x = func(x)
657def sliding_window(n, seq):
658 """ A sequence of overlapping subsequences
660 >>> list(sliding_window(2, [1, 2, 3, 4]))
661 [(1, 2), (2, 3), (3, 4)]
663 This function creates a sliding window suitable for transformations like
664 sliding means / smoothing
666 >>> mean = lambda seq: float(sum(seq)) / len(seq)
667 >>> list(map(mean, sliding_window(2, [1, 2, 3, 4])))
668 [1.5, 2.5, 3.5]
669 """
670 return zip(*(collections.deque(itertools.islice(it, i), 0) or it
671 for i, it in enumerate(itertools.tee(seq, n))))
674no_pad = '__no__pad__'
677def partition(n, seq, pad=no_pad):
678 """ Partition sequence into tuples of length n
680 >>> list(partition(2, [1, 2, 3, 4]))
681 [(1, 2), (3, 4)]
683 If the length of ``seq`` is not evenly divisible by ``n``, the final tuple
684 is dropped if ``pad`` is not specified, or filled to length ``n`` by pad:
686 >>> list(partition(2, [1, 2, 3, 4, 5]))
687 [(1, 2), (3, 4)]
689 >>> list(partition(2, [1, 2, 3, 4, 5], pad=None))
690 [(1, 2), (3, 4), (5, None)]
692 See Also:
693 partition_all
694 """
695 args = [iter(seq)] * n
696 if pad is no_pad:
697 return zip(*args)
698 else:
699 return zip_longest(*args, fillvalue=pad)
702def partition_all(n, seq):
703 """ Partition all elements of sequence into tuples of length at most n
705 The final tuple may be shorter to accommodate extra elements.
707 >>> list(partition_all(2, [1, 2, 3, 4]))
708 [(1, 2), (3, 4)]
710 >>> list(partition_all(2, [1, 2, 3, 4, 5]))
711 [(1, 2), (3, 4), (5,)]
713 See Also:
714 partition
715 """
716 args = [iter(seq)] * n
717 it = zip_longest(*args, fillvalue=no_pad)
718 try:
719 prev = next(it)
720 except StopIteration:
721 return
722 for item in it:
723 yield prev
724 prev = item
725 if prev[-1] is no_pad:
726 try:
727 # If seq defines __len__, then
728 # we can quickly calculate where no_pad starts
729 end = len(seq) % n
730 if prev[end - 1] is no_pad or prev[end] is not no_pad:
731 raise LookupError(
732 'The sequence passed to `parition_all` has invalid length'
733 )
734 yield prev[:end]
735 except TypeError:
736 # Get first index of no_pad without using .index()
737 # https://github.com/pytoolz/toolz/issues/387
738 # Binary search from CPython's bisect module,
739 # modified for identity testing.
740 lo, hi = 0, n
741 while lo < hi:
742 mid = (lo + hi) // 2
743 if prev[mid] is no_pad:
744 hi = mid
745 else:
746 lo = mid + 1
747 yield prev[:lo]
748 else:
749 yield prev
752def count(seq):
753 """ Count the number of items in seq
755 Like the builtin ``len`` but works on lazy sequences.
757 Not to be confused with ``itertools.count``
759 See also:
760 len
761 """
762 if hasattr(seq, '__len__'):
763 return len(seq)
764 return sum(1 for i in seq)
767def pluck(ind, seqs, default=no_default):
768 """ plucks an element or several elements from each item in a sequence.
770 ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more
771 elements of each item in the sequence.
773 This is equivalent to running `map(curried.get(ind), seqs)`
775 ``ind`` can be either a single string/index or a list of strings/indices.
776 ``seqs`` should be sequence containing sequences or dicts.
778 e.g.
780 >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}]
781 >>> list(pluck('name', data))
782 ['Cheese', 'Pies']
783 >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]]))
784 [(1, 2), (4, 5)]
786 See Also:
787 get
788 map
789 """
790 if default == no_default:
791 get = getter(ind)
792 return map(get, seqs)
793 elif isinstance(ind, list):
794 return (tuple(_get(item, seq, default) for item in ind)
795 for seq in seqs)
796 return (_get(ind, seq, default) for seq in seqs)
799def getter(index):
800 if isinstance(index, list):
801 if len(index) == 1:
802 index = index[0]
803 return lambda x: (x[index],)
804 elif index:
805 return operator.itemgetter(*index)
806 else:
807 return lambda x: ()
808 else:
809 return operator.itemgetter(index)
812def join(leftkey, leftseq, rightkey, rightseq,
813 left_default=no_default, right_default=no_default):
814 """ Join two sequences on common attributes
816 This is a semi-streaming operation. The LEFT sequence is fully evaluated
817 and placed into memory. The RIGHT sequence is evaluated lazily and so can
818 be arbitrarily large.
820 (Note: If right_default is defined, then unique keys of rightseq
821 will also be stored in memory.)
823 >>> friends = [('Alice', 'Edith'),
824 ... ('Alice', 'Zhao'),
825 ... ('Edith', 'Alice'),
826 ... ('Zhao', 'Alice'),
827 ... ('Zhao', 'Edith')]
829 >>> cities = [('Alice', 'NYC'),
830 ... ('Alice', 'Chicago'),
831 ... ('Dan', 'Sydney'),
832 ... ('Edith', 'Paris'),
833 ... ('Edith', 'Berlin'),
834 ... ('Zhao', 'Shanghai')]
836 >>> # Vacation opportunities
837 >>> # In what cities do people have friends?
838 >>> result = join(second, friends,
839 ... first, cities)
840 >>> for ((a, b), (c, d)) in sorted(unique(result)):
841 ... print((a, d))
842 ('Alice', 'Berlin')
843 ('Alice', 'Paris')
844 ('Alice', 'Shanghai')
845 ('Edith', 'Chicago')
846 ('Edith', 'NYC')
847 ('Zhao', 'Chicago')
848 ('Zhao', 'NYC')
849 ('Zhao', 'Berlin')
850 ('Zhao', 'Paris')
852 Specify outer joins with keyword arguments ``left_default`` and/or
853 ``right_default``. Here is a full outer join in which unmatched elements
854 are paired with None.
856 >>> identity = lambda x: x
857 >>> list(join(identity, [1, 2, 3],
858 ... identity, [2, 3, 4],
859 ... left_default=None, right_default=None))
860 [(2, 2), (3, 3), (None, 4), (1, None)]
862 Usually the key arguments are callables to be applied to the sequences. If
863 the keys are not obviously callable then it is assumed that indexing was
864 intended, e.g. the following is a legal change.
865 The join is implemented as a hash join and the keys of leftseq must be
866 hashable. Additionally, if right_default is defined, then keys of rightseq
867 must also be hashable.
869 >>> # result = join(second, friends, first, cities)
870 >>> result = join(1, friends, 0, cities) # doctest: +SKIP
871 """
872 if not callable(leftkey):
873 leftkey = getter(leftkey)
874 if not callable(rightkey):
875 rightkey = getter(rightkey)
877 d = groupby(leftkey, leftseq)
879 if left_default == no_default and right_default == no_default:
880 # Inner Join
881 for item in rightseq:
882 key = rightkey(item)
883 if key in d:
884 for left_match in d[key]:
885 yield (left_match, item)
886 elif left_default != no_default and right_default == no_default:
887 # Right Join
888 for item in rightseq:
889 key = rightkey(item)
890 if key in d:
891 for left_match in d[key]:
892 yield (left_match, item)
893 else:
894 yield (left_default, item)
895 elif right_default != no_default:
896 seen_keys = set()
897 seen = seen_keys.add
899 if left_default == no_default:
900 # Left Join
901 for item in rightseq:
902 key = rightkey(item)
903 seen(key)
904 if key in d:
905 for left_match in d[key]:
906 yield (left_match, item)
907 else:
908 # Full Join
909 for item in rightseq:
910 key = rightkey(item)
911 seen(key)
912 if key in d:
913 for left_match in d[key]:
914 yield (left_match, item)
915 else:
916 yield (left_default, item)
918 for key, matches in d.items():
919 if key not in seen_keys:
920 for match in matches:
921 yield (match, right_default)
924def diff(*seqs, **kwargs):
925 """ Return those items that differ between sequences
927 >>> list(diff([1, 2, 3], [1, 2, 10, 100]))
928 [(3, 10)]
930 Shorter sequences may be padded with a ``default`` value:
932 >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None))
933 [(3, 10), (None, 100)]
935 A ``key`` function may also be applied to each item to use during
936 comparisons:
938 >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower))
939 [('bananas', 'Oranges')]
940 """
941 N = len(seqs)
942 if N == 1 and isinstance(seqs[0], list):
943 seqs = seqs[0]
944 N = len(seqs)
945 if N < 2:
946 raise TypeError('Too few sequences given (min 2 required)')
947 default = kwargs.get('default', no_default)
948 if default == no_default:
949 iters = zip(*seqs)
950 else:
951 iters = zip_longest(*seqs, fillvalue=default)
952 key = kwargs.get('key', None)
953 if key is None:
954 for items in iters:
955 if items.count(items[0]) != N:
956 yield items
957 else:
958 for items in iters:
959 vals = tuple(map(key, items))
960 if vals.count(vals[0]) != N:
961 yield items
964def topk(k, seq, key=None):
965 """ Find the k largest elements of a sequence
967 Operates lazily in ``n*log(k)`` time
969 >>> topk(2, [1, 100, 10, 1000])
970 (1000, 100)
972 Use a key function to change sorted order
974 >>> topk(2, ['Alice', 'Bob', 'Charlie', 'Dan'], key=len)
975 ('Charlie', 'Alice')
977 See also:
978 heapq.nlargest
979 """
980 if key is not None and not callable(key):
981 key = getter(key)
982 return tuple(heapq.nlargest(k, seq, key=key))
985def peek(seq):
986 """ Retrieve the next element of a sequence
988 Returns the first element and an iterable equivalent to the original
989 sequence, still having the element retrieved.
991 >>> seq = [0, 1, 2, 3, 4]
992 >>> first, seq = peek(seq)
993 >>> first
994 0
995 >>> list(seq)
996 [0, 1, 2, 3, 4]
997 """
998 iterator = iter(seq)
999 item = next(iterator)
1000 return item, itertools.chain((item,), iterator)
1003def peekn(n, seq):
1004 """ Retrieve the next n elements of a sequence
1006 Returns a tuple of the first n elements and an iterable equivalent
1007 to the original, still having the elements retrieved.
1009 >>> seq = [0, 1, 2, 3, 4]
1010 >>> first_two, seq = peekn(2, seq)
1011 >>> first_two
1012 (0, 1)
1013 >>> list(seq)
1014 [0, 1, 2, 3, 4]
1015 """
1016 iterator = iter(seq)
1017 peeked = tuple(take(n, iterator))
1018 return peeked, itertools.chain(iter(peeked), iterator)
1021def random_sample(prob, seq, random_state=None):
1022 """ Return elements from a sequence with probability of prob
1024 Returns a lazy iterator of random items from seq.
1026 ``random_sample`` considers each item independently and without
1027 replacement. See below how the first time it returned 13 items and the
1028 next time it returned 6 items.
1030 >>> seq = list(range(100))
1031 >>> list(random_sample(0.1, seq)) # doctest: +SKIP
1032 [6, 9, 19, 35, 45, 50, 58, 62, 68, 72, 78, 86, 95]
1033 >>> list(random_sample(0.1, seq)) # doctest: +SKIP
1034 [6, 44, 54, 61, 69, 94]
1036 Providing an integer seed for ``random_state`` will result in
1037 deterministic sampling. Given the same seed it will return the same sample
1038 every time.
1040 >>> list(random_sample(0.1, seq, random_state=2016))
1041 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]
1042 >>> list(random_sample(0.1, seq, random_state=2016))
1043 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]
1045 ``random_state`` can also be any object with a method ``random`` that
1046 returns floats between 0.0 and 1.0 (exclusive).
1048 >>> from random import Random
1049 >>> randobj = Random(2016)
1050 >>> list(random_sample(0.1, seq, random_state=randobj))
1051 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]
1052 """
1053 if not hasattr(random_state, 'random'):
1054 from random import Random
1056 random_state = Random(random_state)
1057 return filter(lambda _: random_state.random() < prob, seq)