Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/toolz/itertoolz.py: 14%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1import itertools
2import heapq
3import collections
4import operator
5from functools import partial
6from itertools import filterfalse, zip_longest
7from collections.abc import Sequence
8from toolz.utils import no_default
11__all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave',
12 'unique', 'isiterable', 'isdistinct', 'take', 'drop', 'take_nth',
13 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv',
14 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate',
15 'sliding_window', 'partition', 'partition_all', 'count', 'pluck',
16 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample')
19def remove(predicate, seq):
20 """ Return those items of sequence for which predicate(item) is False
22 >>> def iseven(x):
23 ... return x % 2 == 0
24 >>> list(remove(iseven, [1, 2, 3, 4]))
25 [1, 3]
26 """
27 return filterfalse(predicate, seq)
30def accumulate(binop, seq, initial=no_default):
31 """ Repeatedly apply binary function to a sequence, accumulating results
33 >>> from operator import add, mul
34 >>> list(accumulate(add, [1, 2, 3, 4, 5]))
35 [1, 3, 6, 10, 15]
36 >>> list(accumulate(mul, [1, 2, 3, 4, 5]))
37 [1, 2, 6, 24, 120]
39 Accumulate is similar to ``reduce`` and is good for making functions like
40 cumulative sum:
42 >>> from functools import partial, reduce
43 >>> sum = partial(reduce, add)
44 >>> cumsum = partial(accumulate, add)
46 Accumulate also takes an optional argument that will be used as the first
47 value. This is similar to reduce.
49 >>> list(accumulate(add, [1, 2, 3], -1))
50 [-1, 0, 2, 5]
51 >>> list(accumulate(add, [], 1))
52 [1]
54 See Also:
55 itertools.accumulate : In standard itertools for Python 3.2+
56 """
57 seq = iter(seq)
58 if initial == no_default:
59 try:
60 result = next(seq)
61 except StopIteration:
62 return
63 else:
64 result = initial
65 yield result
66 for elem in seq:
67 result = binop(result, elem)
68 yield result
71def groupby(key, seq):
72 """ Group a collection by a key function
74 >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank']
75 >>> groupby(len, names) # doctest: +SKIP
76 {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}
78 >>> iseven = lambda x: x % 2 == 0
79 >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) # doctest: +SKIP
80 {False: [1, 3, 5, 7], True: [2, 4, 6, 8]}
82 Non-callable keys imply grouping on a member.
84 >>> groupby('gender', [{'name': 'Alice', 'gender': 'F'},
85 ... {'name': 'Bob', 'gender': 'M'},
86 ... {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP
87 {'F': [{'gender': 'F', 'name': 'Alice'}],
88 'M': [{'gender': 'M', 'name': 'Bob'},
89 {'gender': 'M', 'name': 'Charlie'}]}
91 Not to be confused with ``itertools.groupby``
93 See Also:
94 countby
95 """
96 if not callable(key):
97 key = getter(key)
98 d = collections.defaultdict(lambda: [].append)
99 for item in seq:
100 d[key(item)](item)
101 rv = {}
102 for k, v in d.items():
103 rv[k] = v.__self__
104 return rv
107def merge_sorted(*seqs, **kwargs):
108 """ Merge and sort a collection of sorted collections
110 This works lazily and only keeps one value from each iterable in memory.
112 >>> list(merge_sorted([1, 3, 5], [2, 4, 6]))
113 [1, 2, 3, 4, 5, 6]
115 >>> ''.join(merge_sorted('abc', 'abc', 'abc'))
116 'aaabbbccc'
118 The "key" function used to sort the input may be passed as a keyword.
120 >>> list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3))
121 [2, 1, 3, 3]
122 """
123 if len(seqs) == 0:
124 return iter([])
125 elif len(seqs) == 1:
126 return iter(seqs[0])
128 key = kwargs.get('key', None)
129 if key is None:
130 return _merge_sorted_binary(seqs)
131 else:
132 return _merge_sorted_binary_key(seqs, key)
135def _merge_sorted_binary(seqs):
136 mid = len(seqs) // 2
137 L1 = seqs[:mid]
138 if len(L1) == 1:
139 seq1 = iter(L1[0])
140 else:
141 seq1 = _merge_sorted_binary(L1)
142 L2 = seqs[mid:]
143 if len(L2) == 1:
144 seq2 = iter(L2[0])
145 else:
146 seq2 = _merge_sorted_binary(L2)
148 try:
149 val2 = next(seq2)
150 except StopIteration:
151 for val1 in seq1:
152 yield val1
153 return
155 for val1 in seq1:
156 if val2 < val1:
157 yield val2
158 for val2 in seq2:
159 if val2 < val1:
160 yield val2
161 else:
162 yield val1
163 break
164 else:
165 break
166 else:
167 yield val1
168 else:
169 yield val2
170 for val2 in seq2:
171 yield val2
172 return
173 yield val1
174 for val1 in seq1:
175 yield val1
178def _merge_sorted_binary_key(seqs, key):
179 mid = len(seqs) // 2
180 L1 = seqs[:mid]
181 if len(L1) == 1:
182 seq1 = iter(L1[0])
183 else:
184 seq1 = _merge_sorted_binary_key(L1, key)
185 L2 = seqs[mid:]
186 if len(L2) == 1:
187 seq2 = iter(L2[0])
188 else:
189 seq2 = _merge_sorted_binary_key(L2, key)
191 try:
192 val2 = next(seq2)
193 except StopIteration:
194 for val1 in seq1:
195 yield val1
196 return
197 key2 = key(val2)
199 for val1 in seq1:
200 key1 = key(val1)
201 if key2 < key1:
202 yield val2
203 for val2 in seq2:
204 key2 = key(val2)
205 if key2 < key1:
206 yield val2
207 else:
208 yield val1
209 break
210 else:
211 break
212 else:
213 yield val1
214 else:
215 yield val2
216 for val2 in seq2:
217 yield val2
218 return
219 yield val1
220 for val1 in seq1:
221 yield val1
224def interleave(seqs):
225 """ Interleave a sequence of sequences
227 >>> list(interleave([[1, 2], [3, 4]]))
228 [1, 3, 2, 4]
230 >>> ''.join(interleave(('ABC', 'XY')))
231 'AXBYC'
233 Both the individual sequences and the sequence of sequences may be infinite
235 Returns a lazy iterator
236 """
237 iters = itertools.cycle(map(iter, seqs))
238 while True:
239 try:
240 for itr in iters:
241 yield next(itr)
242 return
243 except StopIteration:
244 predicate = partial(operator.is_not, itr)
245 iters = itertools.cycle(itertools.takewhile(predicate, iters))
248def unique(seq, key=None):
249 """ Return only unique elements of a sequence
251 >>> tuple(unique((1, 2, 3)))
252 (1, 2, 3)
253 >>> tuple(unique((1, 2, 1, 3)))
254 (1, 2, 3)
256 Uniqueness can be defined by key keyword
258 >>> tuple(unique(['cat', 'mouse', 'dog', 'hen'], key=len))
259 ('cat', 'mouse')
260 """
261 seen = set()
262 seen_add = seen.add
263 if key is None:
264 for item in seq:
265 if item not in seen:
266 seen_add(item)
267 yield item
268 else: # calculate key
269 for item in seq:
270 val = key(item)
271 if val not in seen:
272 seen_add(val)
273 yield item
276def isiterable(x):
277 """ Is x iterable?
279 >>> isiterable([1, 2, 3])
280 True
281 >>> isiterable('abc')
282 True
283 >>> isiterable(5)
284 False
285 """
286 try:
287 iter(x)
288 return True
289 except TypeError:
290 return False
293def isdistinct(seq):
294 """ All values in sequence are distinct
296 >>> isdistinct([1, 2, 3])
297 True
298 >>> isdistinct([1, 2, 1])
299 False
301 >>> isdistinct("Hello")
302 False
303 >>> isdistinct("World")
304 True
305 """
306 if iter(seq) is seq:
307 seen = set()
308 seen_add = seen.add
309 for item in seq:
310 if item in seen:
311 return False
312 seen_add(item)
313 return True
314 else:
315 return len(seq) == len(set(seq))
318def take(n, seq):
319 """ The first n elements of a sequence
321 >>> list(take(2, [10, 20, 30, 40, 50]))
322 [10, 20]
324 See Also:
325 drop
326 tail
327 """
328 return itertools.islice(seq, n)
331def tail(n, seq):
332 """ The last n elements of a sequence
334 >>> tail(2, [10, 20, 30, 40, 50])
335 [40, 50]
337 See Also:
338 drop
339 take
340 """
341 try:
342 return seq[-n:]
343 except (TypeError, KeyError):
344 return tuple(collections.deque(seq, n))
347def drop(n, seq):
348 """ The sequence following the first n elements
350 >>> list(drop(2, [10, 20, 30, 40, 50]))
351 [30, 40, 50]
353 See Also:
354 take
355 tail
356 """
357 return itertools.islice(seq, n, None)
360def take_nth(n, seq):
361 """ Every nth item in seq
363 >>> list(take_nth(2, [10, 20, 30, 40, 50]))
364 [10, 30, 50]
365 """
366 return itertools.islice(seq, 0, None, n)
369def first(seq):
370 """ The first element in a sequence
372 >>> first('ABC')
373 'A'
374 """
375 return next(iter(seq))
378def second(seq):
379 """ The second element in a sequence
381 >>> second('ABC')
382 'B'
383 """
384 seq = iter(seq)
385 next(seq)
386 return next(seq)
389def nth(n, seq):
390 """ The nth element in a sequence
392 >>> nth(1, 'ABC')
393 'B'
394 """
395 if isinstance(seq, (tuple, list, Sequence)):
396 return seq[n]
397 else:
398 return next(itertools.islice(seq, n, None))
401def last(seq):
402 """ The last element in a sequence
404 >>> last('ABC')
405 'C'
406 """
407 return tail(1, seq)[0]
410rest = partial(drop, 1)
413def _get(ind, seq, default):
414 try:
415 return seq[ind]
416 except (KeyError, IndexError):
417 return default
420def get(ind, seq, default=no_default):
421 """ Get element in a sequence or dict
423 Provides standard indexing
425 >>> get(1, 'ABC') # Same as 'ABC'[1]
426 'B'
428 Pass a list to get multiple values
430 >>> get([1, 2], 'ABC') # ('ABC'[1], 'ABC'[2])
431 ('B', 'C')
433 Works on any value that supports indexing/getitem
434 For example here we see that it works with dictionaries
436 >>> phonebook = {'Alice': '555-1234',
437 ... 'Bob': '555-5678',
438 ... 'Charlie':'555-9999'}
439 >>> get('Alice', phonebook)
440 '555-1234'
442 >>> get(['Alice', 'Bob'], phonebook)
443 ('555-1234', '555-5678')
445 Provide a default for missing values
447 >>> get(['Alice', 'Dennis'], phonebook, None)
448 ('555-1234', None)
450 See Also:
451 pluck
452 """
453 try:
454 return seq[ind]
455 except TypeError: # `ind` may be a list
456 if isinstance(ind, list):
457 if default == no_default:
458 if len(ind) > 1:
459 return operator.itemgetter(*ind)(seq)
460 elif ind:
461 return seq[ind[0]],
462 else:
463 return ()
464 else:
465 return tuple(_get(i, seq, default) for i in ind)
466 elif default != no_default:
467 return default
468 else:
469 raise
470 except (KeyError, IndexError): # we know `ind` is not a list
471 if default == no_default:
472 raise
473 else:
474 return default
477def concat(seqs):
478 """ Concatenate zero or more iterables, any of which may be infinite.
480 An infinite sequence will prevent the rest of the arguments from
481 being included.
483 We use chain.from_iterable rather than ``chain(*seqs)`` so that seqs
484 can be a generator.
486 >>> list(concat([[], [1], [2, 3]]))
487 [1, 2, 3]
489 See also:
490 itertools.chain.from_iterable equivalent
491 """
492 return itertools.chain.from_iterable(seqs)
495def concatv(*seqs):
496 """ Variadic version of concat
498 >>> list(concatv([], ["a"], ["b", "c"]))
499 ['a', 'b', 'c']
501 See also:
502 itertools.chain
503 """
504 return concat(seqs)
507def mapcat(func, seqs):
508 """ Apply func to each sequence in seqs, concatenating results.
510 >>> list(mapcat(lambda s: [c.upper() for c in s],
511 ... [["a", "b"], ["c", "d", "e"]]))
512 ['A', 'B', 'C', 'D', 'E']
513 """
514 return concat(map(func, seqs))
517def cons(el, seq):
518 """ Add el to beginning of (possibly infinite) sequence seq.
520 >>> list(cons(1, [2, 3]))
521 [1, 2, 3]
522 """
523 return itertools.chain([el], seq)
526def interpose(el, seq):
527 """ Introduce element between each pair of elements in seq
529 >>> list(interpose("a", [1, 2, 3]))
530 [1, 'a', 2, 'a', 3]
531 """
532 inposed = concat(zip(itertools.repeat(el), seq))
533 next(inposed)
534 return inposed
537def frequencies(seq):
538 """ Find number of occurrences of each value in seq
540 >>> frequencies(['cat', 'cat', 'ox', 'pig', 'pig', 'cat']) #doctest: +SKIP
541 {'cat': 3, 'ox': 1, 'pig': 2}
543 See Also:
544 countby
545 groupby
546 """
547 d = collections.defaultdict(int)
548 for item in seq:
549 d[item] += 1
550 return dict(d)
553def reduceby(key, binop, seq, init=no_default):
554 """ Perform a simultaneous groupby and reduction
556 The computation:
558 >>> result = reduceby(key, binop, seq, init) # doctest: +SKIP
560 is equivalent to the following:
562 >>> def reduction(group): # doctest: +SKIP
563 ... return reduce(binop, group, init) # doctest: +SKIP
565 >>> groups = groupby(key, seq) # doctest: +SKIP
566 >>> result = valmap(reduction, groups) # doctest: +SKIP
568 But the former does not build the intermediate groups, allowing it to
569 operate in much less space. This makes it suitable for larger datasets
570 that do not fit comfortably in memory
572 The ``init`` keyword argument is the default initialization of the
573 reduction. This can be either a constant value like ``0`` or a callable
574 like ``lambda : 0`` as might be used in ``defaultdict``.
576 Simple Examples
577 ---------------
579 >>> from operator import add, mul
580 >>> iseven = lambda x: x % 2 == 0
582 >>> data = [1, 2, 3, 4, 5]
584 >>> reduceby(iseven, add, data) # doctest: +SKIP
585 {False: 9, True: 6}
587 >>> reduceby(iseven, mul, data) # doctest: +SKIP
588 {False: 15, True: 8}
590 Complex Example
591 ---------------
593 >>> projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000},
594 ... {'name': 'fight crime', 'state': 'IL', 'cost': 100000},
595 ... {'name': 'help farmers', 'state': 'IL', 'cost': 2000000},
596 ... {'name': 'help farmers', 'state': 'CA', 'cost': 200000}]
598 >>> reduceby('state', # doctest: +SKIP
599 ... lambda acc, x: acc + x['cost'],
600 ... projects, 0)
601 {'CA': 1200000, 'IL': 2100000}
603 Example Using ``init``
604 ----------------------
606 >>> def set_add(s, i):
607 ... s.add(i)
608 ... return s
610 >>> reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2, 3], set) # doctest: +SKIP
611 {True: set([2, 4]),
612 False: set([1, 3])}
613 """
614 is_no_default = init == no_default
615 if not is_no_default and not callable(init):
616 _init = init
617 init = lambda: _init
618 if not callable(key):
619 key = getter(key)
620 d = {}
621 for item in seq:
622 k = key(item)
623 if k not in d:
624 if is_no_default:
625 d[k] = item
626 continue
627 else:
628 d[k] = init()
629 d[k] = binop(d[k], item)
630 return d
633def iterate(func, x):
634 """ Repeatedly apply a function func onto an original input
636 Yields x, then func(x), then func(func(x)), then func(func(func(x))), etc..
638 >>> def inc(x): return x + 1
639 >>> counter = iterate(inc, 0)
640 >>> next(counter)
641 0
642 >>> next(counter)
643 1
644 >>> next(counter)
645 2
647 >>> double = lambda x: x * 2
648 >>> powers_of_two = iterate(double, 1)
649 >>> next(powers_of_two)
650 1
651 >>> next(powers_of_two)
652 2
653 >>> next(powers_of_two)
654 4
655 >>> next(powers_of_two)
656 8
657 """
658 while True:
659 yield x
660 x = func(x)
663def sliding_window(n, seq):
664 """ A sequence of overlapping subsequences
666 >>> list(sliding_window(2, [1, 2, 3, 4]))
667 [(1, 2), (2, 3), (3, 4)]
669 This function creates a sliding window suitable for transformations like
670 sliding means / smoothing
672 >>> mean = lambda seq: float(sum(seq)) / len(seq)
673 >>> list(map(mean, sliding_window(2, [1, 2, 3, 4])))
674 [1.5, 2.5, 3.5]
675 """
676 return zip(*(collections.deque(itertools.islice(it, i), 0) or it
677 for i, it in enumerate(itertools.tee(seq, n))))
680no_pad = '__no__pad__'
683def partition(n, seq, pad=no_pad):
684 """ Partition sequence into tuples of length n
686 >>> list(partition(2, [1, 2, 3, 4]))
687 [(1, 2), (3, 4)]
689 If the length of ``seq`` is not evenly divisible by ``n``, the final tuple
690 is dropped if ``pad`` is not specified, or filled to length ``n`` by pad:
692 >>> list(partition(2, [1, 2, 3, 4, 5]))
693 [(1, 2), (3, 4)]
695 >>> list(partition(2, [1, 2, 3, 4, 5], pad=None))
696 [(1, 2), (3, 4), (5, None)]
698 See Also:
699 partition_all
700 """
701 args = [iter(seq)] * n
702 if pad is no_pad:
703 return zip(*args)
704 else:
705 return zip_longest(*args, fillvalue=pad)
708def partition_all(n, seq):
709 """ Partition all elements of sequence into tuples of length at most n
711 The final tuple may be shorter to accommodate extra elements.
713 >>> list(partition_all(2, [1, 2, 3, 4]))
714 [(1, 2), (3, 4)]
716 >>> list(partition_all(2, [1, 2, 3, 4, 5]))
717 [(1, 2), (3, 4), (5,)]
719 See Also:
720 partition
721 """
722 args = [iter(seq)] * n
723 it = zip_longest(*args, fillvalue=no_pad)
724 try:
725 prev = next(it)
726 except StopIteration:
727 return
728 for item in it:
729 yield prev
730 prev = item
731 if prev[-1] is no_pad:
732 try:
733 # If seq defines __len__, then
734 # we can quickly calculate where no_pad starts
735 yield prev[:len(seq) % n]
736 except TypeError:
737 # Get first index of no_pad without using .index()
738 # https://github.com/pytoolz/toolz/issues/387
739 # Binary search from CPython's bisect module,
740 # modified for identity testing.
741 lo, hi = 0, n
742 while lo < hi:
743 mid = (lo + hi) // 2
744 if prev[mid] is no_pad:
745 hi = mid
746 else:
747 lo = mid + 1
748 yield prev[:lo]
749 else:
750 yield prev
753def count(seq):
754 """ Count the number of items in seq
756 Like the builtin ``len`` but works on lazy sequences.
758 Not to be confused with ``itertools.count``
760 See also:
761 len
762 """
763 if hasattr(seq, '__len__'):
764 return len(seq)
765 return sum(1 for i in seq)
768def pluck(ind, seqs, default=no_default):
769 """ plucks an element or several elements from each item in a sequence.
771 ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more
772 elements of each item in the sequence.
774 This is equivalent to running `map(curried.get(ind), seqs)`
776 ``ind`` can be either a single string/index or a list of strings/indices.
777 ``seqs`` should be sequence containing sequences or dicts.
779 e.g.
781 >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}]
782 >>> list(pluck('name', data))
783 ['Cheese', 'Pies']
784 >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]]))
785 [(1, 2), (4, 5)]
787 See Also:
788 get
789 map
790 """
791 if default == no_default:
792 get = getter(ind)
793 return map(get, seqs)
794 elif isinstance(ind, list):
795 return (tuple(_get(item, seq, default) for item in ind)
796 for seq in seqs)
797 return (_get(ind, seq, default) for seq in seqs)
800def getter(index):
801 if isinstance(index, list):
802 if len(index) == 1:
803 index = index[0]
804 return lambda x: (x[index],)
805 elif index:
806 return operator.itemgetter(*index)
807 else:
808 return lambda x: ()
809 else:
810 return operator.itemgetter(index)
813def join(leftkey, leftseq, rightkey, rightseq,
814 left_default=no_default, right_default=no_default):
815 """ Join two sequences on common attributes
817 This is a semi-streaming operation. The LEFT sequence is fully evaluated
818 and placed into memory. The RIGHT sequence is evaluated lazily and so can
819 be arbitrarily large.
821 (Note: If right_default is defined, then unique keys of rightseq
822 will also be stored in memory.)
824 >>> friends = [('Alice', 'Edith'),
825 ... ('Alice', 'Zhao'),
826 ... ('Edith', 'Alice'),
827 ... ('Zhao', 'Alice'),
828 ... ('Zhao', 'Edith')]
830 >>> cities = [('Alice', 'NYC'),
831 ... ('Alice', 'Chicago'),
832 ... ('Dan', 'Sydney'),
833 ... ('Edith', 'Paris'),
834 ... ('Edith', 'Berlin'),
835 ... ('Zhao', 'Shanghai')]
837 >>> # Vacation opportunities
838 >>> # In what cities do people have friends?
839 >>> result = join(second, friends,
840 ... first, cities)
841 >>> for ((a, b), (c, d)) in sorted(unique(result)):
842 ... print((a, d))
843 ('Alice', 'Berlin')
844 ('Alice', 'Paris')
845 ('Alice', 'Shanghai')
846 ('Edith', 'Chicago')
847 ('Edith', 'NYC')
848 ('Zhao', 'Chicago')
849 ('Zhao', 'NYC')
850 ('Zhao', 'Berlin')
851 ('Zhao', 'Paris')
853 Specify outer joins with keyword arguments ``left_default`` and/or
854 ``right_default``. Here is a full outer join in which unmatched elements
855 are paired with None.
857 >>> identity = lambda x: x
858 >>> list(join(identity, [1, 2, 3],
859 ... identity, [2, 3, 4],
860 ... left_default=None, right_default=None))
861 [(2, 2), (3, 3), (None, 4), (1, None)]
863 Usually the key arguments are callables to be applied to the sequences. If
864 the keys are not obviously callable then it is assumed that indexing was
865 intended, e.g. the following is a legal change.
866 The join is implemented as a hash join and the keys of leftseq must be
867 hashable. Additionally, if right_default is defined, then keys of rightseq
868 must also be hashable.
870 >>> # result = join(second, friends, first, cities)
871 >>> result = join(1, friends, 0, cities) # doctest: +SKIP
872 """
873 if not callable(leftkey):
874 leftkey = getter(leftkey)
875 if not callable(rightkey):
876 rightkey = getter(rightkey)
878 d = groupby(leftkey, leftseq)
880 if left_default == no_default and right_default == no_default:
881 # Inner Join
882 for item in rightseq:
883 key = rightkey(item)
884 if key in d:
885 for left_match in d[key]:
886 yield (left_match, item)
887 elif left_default != no_default and right_default == no_default:
888 # Right Join
889 for item in rightseq:
890 key = rightkey(item)
891 if key in d:
892 for left_match in d[key]:
893 yield (left_match, item)
894 else:
895 yield (left_default, item)
896 elif right_default != no_default:
897 seen_keys = set()
898 seen = seen_keys.add
900 if left_default == no_default:
901 # Left Join
902 for item in rightseq:
903 key = rightkey(item)
904 seen(key)
905 if key in d:
906 for left_match in d[key]:
907 yield (left_match, item)
908 else:
909 # Full Join
910 for item in rightseq:
911 key = rightkey(item)
912 seen(key)
913 if key in d:
914 for left_match in d[key]:
915 yield (left_match, item)
916 else:
917 yield (left_default, item)
919 for key, matches in d.items():
920 if key not in seen_keys:
921 for match in matches:
922 yield (match, right_default)
925def diff(*seqs, **kwargs):
926 """ Return those items that differ between sequences
928 >>> list(diff([1, 2, 3], [1, 2, 10, 100]))
929 [(3, 10)]
931 Shorter sequences may be padded with a ``default`` value:
933 >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None))
934 [(3, 10), (None, 100)]
936 A ``key`` function may also be applied to each item to use during
937 comparisons:
939 >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower))
940 [('bananas', 'Oranges')]
941 """
942 N = len(seqs)
943 if N == 1 and isinstance(seqs[0], list):
944 seqs = seqs[0]
945 N = len(seqs)
946 if N < 2:
947 raise TypeError('Too few sequences given (min 2 required)')
948 default = kwargs.get('default', no_default)
949 if default == no_default:
950 iters = zip(*seqs)
951 else:
952 iters = zip_longest(*seqs, fillvalue=default)
953 key = kwargs.get('key', None)
954 if key is None:
955 for items in iters:
956 if items.count(items[0]) != N:
957 yield items
958 else:
959 for items in iters:
960 vals = tuple(map(key, items))
961 if vals.count(vals[0]) != N:
962 yield items
965def topk(k, seq, key=None):
966 """ Find the k largest elements of a sequence
968 Operates lazily in ``n*log(k)`` time
970 >>> topk(2, [1, 100, 10, 1000])
971 (1000, 100)
973 Use a key function to change sorted order
975 >>> topk(2, ['Alice', 'Bob', 'Charlie', 'Dan'], key=len)
976 ('Charlie', 'Alice')
978 See also:
979 heapq.nlargest
980 """
981 if key is not None and not callable(key):
982 key = getter(key)
983 return tuple(heapq.nlargest(k, seq, key=key))
986def peek(seq):
987 """ Retrieve the next element of a sequence
989 Returns the first element and an iterable equivalent to the original
990 sequence, still having the element retrieved.
992 >>> seq = [0, 1, 2, 3, 4]
993 >>> first, seq = peek(seq)
994 >>> first
995 0
996 >>> list(seq)
997 [0, 1, 2, 3, 4]
998 """
999 iterator = iter(seq)
1000 item = next(iterator)
1001 return item, itertools.chain((item,), iterator)
1004def peekn(n, seq):
1005 """ Retrieve the next n elements of a sequence
1007 Returns a tuple of the first n elements and an iterable equivalent
1008 to the original, still having the elements retrieved.
1010 >>> seq = [0, 1, 2, 3, 4]
1011 >>> first_two, seq = peekn(2, seq)
1012 >>> first_two
1013 (0, 1)
1014 >>> list(seq)
1015 [0, 1, 2, 3, 4]
1016 """
1017 iterator = iter(seq)
1018 peeked = tuple(take(n, iterator))
1019 return peeked, itertools.chain(iter(peeked), iterator)
1022def random_sample(prob, seq, random_state=None):
1023 """ Return elements from a sequence with probability of prob
1025 Returns a lazy iterator of random items from seq.
1027 ``random_sample`` considers each item independently and without
1028 replacement. See below how the first time it returned 13 items and the
1029 next time it returned 6 items.
1031 >>> seq = list(range(100))
1032 >>> list(random_sample(0.1, seq)) # doctest: +SKIP
1033 [6, 9, 19, 35, 45, 50, 58, 62, 68, 72, 78, 86, 95]
1034 >>> list(random_sample(0.1, seq)) # doctest: +SKIP
1035 [6, 44, 54, 61, 69, 94]
1037 Providing an integer seed for ``random_state`` will result in
1038 deterministic sampling. Given the same seed it will return the same sample
1039 every time.
1041 >>> list(random_sample(0.1, seq, random_state=2016))
1042 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]
1043 >>> list(random_sample(0.1, seq, random_state=2016))
1044 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]
1046 ``random_state`` can also be any object with a method ``random`` that
1047 returns floats between 0.0 and 1.0 (exclusive).
1049 >>> from random import Random
1050 >>> randobj = Random(2016)
1051 >>> list(random_sample(0.1, seq, random_state=randobj))
1052 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]
1053 """
1054 if not hasattr(random_state, 'random'):
1055 from random import Random
1057 random_state = Random(random_state)
1058 return filter(lambda _: random_state.random() < prob, seq)