Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/toolz/itertoolz.py: 14%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

363 statements  

1import itertools 

2import heapq 

3import collections 

4import operator 

5from functools import partial 

6from itertools import filterfalse, zip_longest 

7from collections.abc import Sequence 

8from toolz.utils import no_default 

9 

10 

11__all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave', 

12 'unique', 'isiterable', 'isdistinct', 'take', 'drop', 'take_nth', 

13 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv', 

14 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate', 

15 'sliding_window', 'partition', 'partition_all', 'count', 'pluck', 

16 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample') 

17 

18 

19def remove(predicate, seq): 

20 """ Return those items of sequence for which predicate(item) is False 

21 

22 >>> def iseven(x): 

23 ... return x % 2 == 0 

24 >>> list(remove(iseven, [1, 2, 3, 4])) 

25 [1, 3] 

26 """ 

27 return filterfalse(predicate, seq) 

28 

29 

30def accumulate(binop, seq, initial=no_default): 

31 """ Repeatedly apply binary function to a sequence, accumulating results 

32 

33 >>> from operator import add, mul 

34 >>> list(accumulate(add, [1, 2, 3, 4, 5])) 

35 [1, 3, 6, 10, 15] 

36 >>> list(accumulate(mul, [1, 2, 3, 4, 5])) 

37 [1, 2, 6, 24, 120] 

38 

39 Accumulate is similar to ``reduce`` and is good for making functions like 

40 cumulative sum: 

41 

42 >>> from functools import partial, reduce 

43 >>> sum = partial(reduce, add) 

44 >>> cumsum = partial(accumulate, add) 

45 

46 Accumulate also takes an optional argument that will be used as the first 

47 value. This is similar to reduce. 

48 

49 >>> list(accumulate(add, [1, 2, 3], -1)) 

50 [-1, 0, 2, 5] 

51 >>> list(accumulate(add, [], 1)) 

52 [1] 

53 

54 See Also: 

55 itertools.accumulate : In standard itertools for Python 3.2+ 

56 """ 

57 seq = iter(seq) 

58 if initial == no_default: 

59 try: 

60 result = next(seq) 

61 except StopIteration: 

62 return 

63 else: 

64 result = initial 

65 yield result 

66 for elem in seq: 

67 result = binop(result, elem) 

68 yield result 

69 

70 

71def groupby(key, seq): 

72 """ Group a collection by a key function 

73 

74 >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank'] 

75 >>> groupby(len, names) # doctest: +SKIP 

76 {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']} 

77 

78 >>> iseven = lambda x: x % 2 == 0 

79 >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) # doctest: +SKIP 

80 {False: [1, 3, 5, 7], True: [2, 4, 6, 8]} 

81 

82 Non-callable keys imply grouping on a member. 

83 

84 >>> groupby('gender', [{'name': 'Alice', 'gender': 'F'}, 

85 ... {'name': 'Bob', 'gender': 'M'}, 

86 ... {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP 

87 {'F': [{'gender': 'F', 'name': 'Alice'}], 

88 'M': [{'gender': 'M', 'name': 'Bob'}, 

89 {'gender': 'M', 'name': 'Charlie'}]} 

90 

91 Not to be confused with ``itertools.groupby`` 

92 

93 See Also: 

94 countby 

95 """ 

96 if not callable(key): 

97 key = getter(key) 

98 d = collections.defaultdict(lambda: [].append) 

99 for item in seq: 

100 d[key(item)](item) 

101 rv = {} 

102 for k, v in d.items(): 

103 rv[k] = v.__self__ 

104 return rv 

105 

106 

107def merge_sorted(*seqs, **kwargs): 

108 """ Merge and sort a collection of sorted collections 

109 

110 This works lazily and only keeps one value from each iterable in memory. 

111 

112 >>> list(merge_sorted([1, 3, 5], [2, 4, 6])) 

113 [1, 2, 3, 4, 5, 6] 

114 

115 >>> ''.join(merge_sorted('abc', 'abc', 'abc')) 

116 'aaabbbccc' 

117 

118 The "key" function used to sort the input may be passed as a keyword. 

119 

120 >>> list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3)) 

121 [2, 1, 3, 3] 

122 """ 

123 if len(seqs) == 0: 

124 return iter([]) 

125 elif len(seqs) == 1: 

126 return iter(seqs[0]) 

127 

128 key = kwargs.get('key', None) 

129 if key is None: 

130 return _merge_sorted_binary(seqs) 

131 else: 

132 return _merge_sorted_binary_key(seqs, key) 

133 

134 

135def _merge_sorted_binary(seqs): 

136 mid = len(seqs) // 2 

137 L1 = seqs[:mid] 

138 if len(L1) == 1: 

139 seq1 = iter(L1[0]) 

140 else: 

141 seq1 = _merge_sorted_binary(L1) 

142 L2 = seqs[mid:] 

143 if len(L2) == 1: 

144 seq2 = iter(L2[0]) 

145 else: 

146 seq2 = _merge_sorted_binary(L2) 

147 

148 try: 

149 val2 = next(seq2) 

150 except StopIteration: 

151 for val1 in seq1: 

152 yield val1 

153 return 

154 

155 for val1 in seq1: 

156 if val2 < val1: 

157 yield val2 

158 for val2 in seq2: 

159 if val2 < val1: 

160 yield val2 

161 else: 

162 yield val1 

163 break 

164 else: 

165 break 

166 else: 

167 yield val1 

168 else: 

169 yield val2 

170 for val2 in seq2: 

171 yield val2 

172 return 

173 yield val1 

174 for val1 in seq1: 

175 yield val1 

176 

177 

178def _merge_sorted_binary_key(seqs, key): 

179 mid = len(seqs) // 2 

180 L1 = seqs[:mid] 

181 if len(L1) == 1: 

182 seq1 = iter(L1[0]) 

183 else: 

184 seq1 = _merge_sorted_binary_key(L1, key) 

185 L2 = seqs[mid:] 

186 if len(L2) == 1: 

187 seq2 = iter(L2[0]) 

188 else: 

189 seq2 = _merge_sorted_binary_key(L2, key) 

190 

191 try: 

192 val2 = next(seq2) 

193 except StopIteration: 

194 for val1 in seq1: 

195 yield val1 

196 return 

197 key2 = key(val2) 

198 

199 for val1 in seq1: 

200 key1 = key(val1) 

201 if key2 < key1: 

202 yield val2 

203 for val2 in seq2: 

204 key2 = key(val2) 

205 if key2 < key1: 

206 yield val2 

207 else: 

208 yield val1 

209 break 

210 else: 

211 break 

212 else: 

213 yield val1 

214 else: 

215 yield val2 

216 for val2 in seq2: 

217 yield val2 

218 return 

219 yield val1 

220 for val1 in seq1: 

221 yield val1 

222 

223 

224def interleave(seqs): 

225 """ Interleave a sequence of sequences 

226 

227 >>> list(interleave([[1, 2], [3, 4]])) 

228 [1, 3, 2, 4] 

229 

230 >>> ''.join(interleave(('ABC', 'XY'))) 

231 'AXBYC' 

232 

233 Both the individual sequences and the sequence of sequences may be infinite 

234 

235 Returns a lazy iterator 

236 """ 

237 iters = itertools.cycle(map(iter, seqs)) 

238 while True: 

239 try: 

240 for itr in iters: 

241 yield next(itr) 

242 return 

243 except StopIteration: 

244 predicate = partial(operator.is_not, itr) 

245 iters = itertools.cycle(itertools.takewhile(predicate, iters)) 

246 

247 

248def unique(seq, key=None): 

249 """ Return only unique elements of a sequence 

250 

251 >>> tuple(unique((1, 2, 3))) 

252 (1, 2, 3) 

253 >>> tuple(unique((1, 2, 1, 3))) 

254 (1, 2, 3) 

255 

256 Uniqueness can be defined by key keyword 

257 

258 >>> tuple(unique(['cat', 'mouse', 'dog', 'hen'], key=len)) 

259 ('cat', 'mouse') 

260 """ 

261 seen = set() 

262 seen_add = seen.add 

263 if key is None: 

264 for item in seq: 

265 if item not in seen: 

266 seen_add(item) 

267 yield item 

268 else: # calculate key 

269 for item in seq: 

270 val = key(item) 

271 if val not in seen: 

272 seen_add(val) 

273 yield item 

274 

275 

276def isiterable(x): 

277 """ Is x iterable? 

278 

279 >>> isiterable([1, 2, 3]) 

280 True 

281 >>> isiterable('abc') 

282 True 

283 >>> isiterable(5) 

284 False 

285 """ 

286 try: 

287 iter(x) 

288 return True 

289 except TypeError: 

290 return False 

291 

292 

293def isdistinct(seq): 

294 """ All values in sequence are distinct 

295 

296 >>> isdistinct([1, 2, 3]) 

297 True 

298 >>> isdistinct([1, 2, 1]) 

299 False 

300 

301 >>> isdistinct("Hello") 

302 False 

303 >>> isdistinct("World") 

304 True 

305 """ 

306 if iter(seq) is seq: 

307 seen = set() 

308 seen_add = seen.add 

309 for item in seq: 

310 if item in seen: 

311 return False 

312 seen_add(item) 

313 return True 

314 else: 

315 return len(seq) == len(set(seq)) 

316 

317 

318def take(n, seq): 

319 """ The first n elements of a sequence 

320 

321 >>> list(take(2, [10, 20, 30, 40, 50])) 

322 [10, 20] 

323 

324 See Also: 

325 drop 

326 tail 

327 """ 

328 return itertools.islice(seq, n) 

329 

330 

331def tail(n, seq): 

332 """ The last n elements of a sequence 

333 

334 >>> tail(2, [10, 20, 30, 40, 50]) 

335 [40, 50] 

336 

337 See Also: 

338 drop 

339 take 

340 """ 

341 try: 

342 return seq[-n:] 

343 except (TypeError, KeyError): 

344 return tuple(collections.deque(seq, n)) 

345 

346 

347def drop(n, seq): 

348 """ The sequence following the first n elements 

349 

350 >>> list(drop(2, [10, 20, 30, 40, 50])) 

351 [30, 40, 50] 

352 

353 See Also: 

354 take 

355 tail 

356 """ 

357 return itertools.islice(seq, n, None) 

358 

359 

360def take_nth(n, seq): 

361 """ Every nth item in seq 

362 

363 >>> list(take_nth(2, [10, 20, 30, 40, 50])) 

364 [10, 30, 50] 

365 """ 

366 return itertools.islice(seq, 0, None, n) 

367 

368 

369def first(seq): 

370 """ The first element in a sequence 

371 

372 >>> first('ABC') 

373 'A' 

374 """ 

375 return next(iter(seq)) 

376 

377 

378def second(seq): 

379 """ The second element in a sequence 

380 

381 >>> second('ABC') 

382 'B' 

383 """ 

384 seq = iter(seq) 

385 next(seq) 

386 return next(seq) 

387 

388 

389def nth(n, seq): 

390 """ The nth element in a sequence 

391 

392 >>> nth(1, 'ABC') 

393 'B' 

394 """ 

395 if isinstance(seq, (tuple, list, Sequence)): 

396 return seq[n] 

397 else: 

398 return next(itertools.islice(seq, n, None)) 

399 

400 

401def last(seq): 

402 """ The last element in a sequence 

403 

404 >>> last('ABC') 

405 'C' 

406 """ 

407 return tail(1, seq)[0] 

408 

409 

410rest = partial(drop, 1) 

411 

412 

413def _get(ind, seq, default): 

414 try: 

415 return seq[ind] 

416 except (KeyError, IndexError): 

417 return default 

418 

419 

420def get(ind, seq, default=no_default): 

421 """ Get element in a sequence or dict 

422 

423 Provides standard indexing 

424 

425 >>> get(1, 'ABC') # Same as 'ABC'[1] 

426 'B' 

427 

428 Pass a list to get multiple values 

429 

430 >>> get([1, 2], 'ABC') # ('ABC'[1], 'ABC'[2]) 

431 ('B', 'C') 

432 

433 Works on any value that supports indexing/getitem 

434 For example here we see that it works with dictionaries 

435 

436 >>> phonebook = {'Alice': '555-1234', 

437 ... 'Bob': '555-5678', 

438 ... 'Charlie':'555-9999'} 

439 >>> get('Alice', phonebook) 

440 '555-1234' 

441 

442 >>> get(['Alice', 'Bob'], phonebook) 

443 ('555-1234', '555-5678') 

444 

445 Provide a default for missing values 

446 

447 >>> get(['Alice', 'Dennis'], phonebook, None) 

448 ('555-1234', None) 

449 

450 See Also: 

451 pluck 

452 """ 

453 try: 

454 return seq[ind] 

455 except TypeError: # `ind` may be a list 

456 if isinstance(ind, list): 

457 if default == no_default: 

458 if len(ind) > 1: 

459 return operator.itemgetter(*ind)(seq) 

460 elif ind: 

461 return seq[ind[0]], 

462 else: 

463 return () 

464 else: 

465 return tuple(_get(i, seq, default) for i in ind) 

466 elif default != no_default: 

467 return default 

468 else: 

469 raise 

470 except (KeyError, IndexError): # we know `ind` is not a list 

471 if default == no_default: 

472 raise 

473 else: 

474 return default 

475 

476 

477def concat(seqs): 

478 """ Concatenate zero or more iterables, any of which may be infinite. 

479 

480 An infinite sequence will prevent the rest of the arguments from 

481 being included. 

482 

483 We use chain.from_iterable rather than ``chain(*seqs)`` so that seqs 

484 can be a generator. 

485 

486 >>> list(concat([[], [1], [2, 3]])) 

487 [1, 2, 3] 

488 

489 See also: 

490 itertools.chain.from_iterable equivalent 

491 """ 

492 return itertools.chain.from_iterable(seqs) 

493 

494 

495def concatv(*seqs): 

496 """ Variadic version of concat 

497 

498 >>> list(concatv([], ["a"], ["b", "c"])) 

499 ['a', 'b', 'c'] 

500 

501 See also: 

502 itertools.chain 

503 """ 

504 return concat(seqs) 

505 

506 

507def mapcat(func, seqs): 

508 """ Apply func to each sequence in seqs, concatenating results. 

509 

510 >>> list(mapcat(lambda s: [c.upper() for c in s], 

511 ... [["a", "b"], ["c", "d", "e"]])) 

512 ['A', 'B', 'C', 'D', 'E'] 

513 """ 

514 return concat(map(func, seqs)) 

515 

516 

517def cons(el, seq): 

518 """ Add el to beginning of (possibly infinite) sequence seq. 

519 

520 >>> list(cons(1, [2, 3])) 

521 [1, 2, 3] 

522 """ 

523 return itertools.chain([el], seq) 

524 

525 

526def interpose(el, seq): 

527 """ Introduce element between each pair of elements in seq 

528 

529 >>> list(interpose("a", [1, 2, 3])) 

530 [1, 'a', 2, 'a', 3] 

531 """ 

532 inposed = concat(zip(itertools.repeat(el), seq)) 

533 next(inposed) 

534 return inposed 

535 

536 

537def frequencies(seq): 

538 """ Find number of occurrences of each value in seq 

539 

540 >>> frequencies(['cat', 'cat', 'ox', 'pig', 'pig', 'cat']) #doctest: +SKIP 

541 {'cat': 3, 'ox': 1, 'pig': 2} 

542 

543 See Also: 

544 countby 

545 groupby 

546 """ 

547 d = collections.defaultdict(int) 

548 for item in seq: 

549 d[item] += 1 

550 return dict(d) 

551 

552 

553def reduceby(key, binop, seq, init=no_default): 

554 """ Perform a simultaneous groupby and reduction 

555 

556 The computation: 

557 

558 >>> result = reduceby(key, binop, seq, init) # doctest: +SKIP 

559 

560 is equivalent to the following: 

561 

562 >>> def reduction(group): # doctest: +SKIP 

563 ... return reduce(binop, group, init) # doctest: +SKIP 

564 

565 >>> groups = groupby(key, seq) # doctest: +SKIP 

566 >>> result = valmap(reduction, groups) # doctest: +SKIP 

567 

568 But the former does not build the intermediate groups, allowing it to 

569 operate in much less space. This makes it suitable for larger datasets 

570 that do not fit comfortably in memory 

571 

572 The ``init`` keyword argument is the default initialization of the 

573 reduction. This can be either a constant value like ``0`` or a callable 

574 like ``lambda : 0`` as might be used in ``defaultdict``. 

575 

576 Simple Examples 

577 --------------- 

578 

579 >>> from operator import add, mul 

580 >>> iseven = lambda x: x % 2 == 0 

581 

582 >>> data = [1, 2, 3, 4, 5] 

583 

584 >>> reduceby(iseven, add, data) # doctest: +SKIP 

585 {False: 9, True: 6} 

586 

587 >>> reduceby(iseven, mul, data) # doctest: +SKIP 

588 {False: 15, True: 8} 

589 

590 Complex Example 

591 --------------- 

592 

593 >>> projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000}, 

594 ... {'name': 'fight crime', 'state': 'IL', 'cost': 100000}, 

595 ... {'name': 'help farmers', 'state': 'IL', 'cost': 2000000}, 

596 ... {'name': 'help farmers', 'state': 'CA', 'cost': 200000}] 

597 

598 >>> reduceby('state', # doctest: +SKIP 

599 ... lambda acc, x: acc + x['cost'], 

600 ... projects, 0) 

601 {'CA': 1200000, 'IL': 2100000} 

602 

603 Example Using ``init`` 

604 ---------------------- 

605 

606 >>> def set_add(s, i): 

607 ... s.add(i) 

608 ... return s 

609 

610 >>> reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2, 3], set) # doctest: +SKIP 

611 {True: set([2, 4]), 

612 False: set([1, 3])} 

613 """ 

614 is_no_default = init == no_default 

615 if not is_no_default and not callable(init): 

616 _init = init 

617 init = lambda: _init 

618 if not callable(key): 

619 key = getter(key) 

620 d = {} 

621 for item in seq: 

622 k = key(item) 

623 if k not in d: 

624 if is_no_default: 

625 d[k] = item 

626 continue 

627 else: 

628 d[k] = init() 

629 d[k] = binop(d[k], item) 

630 return d 

631 

632 

633def iterate(func, x): 

634 """ Repeatedly apply a function func onto an original input 

635 

636 Yields x, then func(x), then func(func(x)), then func(func(func(x))), etc.. 

637 

638 >>> def inc(x): return x + 1 

639 >>> counter = iterate(inc, 0) 

640 >>> next(counter) 

641 0 

642 >>> next(counter) 

643 1 

644 >>> next(counter) 

645 2 

646 

647 >>> double = lambda x: x * 2 

648 >>> powers_of_two = iterate(double, 1) 

649 >>> next(powers_of_two) 

650 1 

651 >>> next(powers_of_two) 

652 2 

653 >>> next(powers_of_two) 

654 4 

655 >>> next(powers_of_two) 

656 8 

657 """ 

658 while True: 

659 yield x 

660 x = func(x) 

661 

662 

663def sliding_window(n, seq): 

664 """ A sequence of overlapping subsequences 

665 

666 >>> list(sliding_window(2, [1, 2, 3, 4])) 

667 [(1, 2), (2, 3), (3, 4)] 

668 

669 This function creates a sliding window suitable for transformations like 

670 sliding means / smoothing 

671 

672 >>> mean = lambda seq: float(sum(seq)) / len(seq) 

673 >>> list(map(mean, sliding_window(2, [1, 2, 3, 4]))) 

674 [1.5, 2.5, 3.5] 

675 """ 

676 return zip(*(collections.deque(itertools.islice(it, i), 0) or it 

677 for i, it in enumerate(itertools.tee(seq, n)))) 

678 

679 

680no_pad = '__no__pad__' 

681 

682 

683def partition(n, seq, pad=no_pad): 

684 """ Partition sequence into tuples of length n 

685 

686 >>> list(partition(2, [1, 2, 3, 4])) 

687 [(1, 2), (3, 4)] 

688 

689 If the length of ``seq`` is not evenly divisible by ``n``, the final tuple 

690 is dropped if ``pad`` is not specified, or filled to length ``n`` by pad: 

691 

692 >>> list(partition(2, [1, 2, 3, 4, 5])) 

693 [(1, 2), (3, 4)] 

694 

695 >>> list(partition(2, [1, 2, 3, 4, 5], pad=None)) 

696 [(1, 2), (3, 4), (5, None)] 

697 

698 See Also: 

699 partition_all 

700 """ 

701 args = [iter(seq)] * n 

702 if pad is no_pad: 

703 return zip(*args) 

704 else: 

705 return zip_longest(*args, fillvalue=pad) 

706 

707 

708def partition_all(n, seq): 

709 """ Partition all elements of sequence into tuples of length at most n 

710 

711 The final tuple may be shorter to accommodate extra elements. 

712 

713 >>> list(partition_all(2, [1, 2, 3, 4])) 

714 [(1, 2), (3, 4)] 

715 

716 >>> list(partition_all(2, [1, 2, 3, 4, 5])) 

717 [(1, 2), (3, 4), (5,)] 

718 

719 See Also: 

720 partition 

721 """ 

722 args = [iter(seq)] * n 

723 it = zip_longest(*args, fillvalue=no_pad) 

724 try: 

725 prev = next(it) 

726 except StopIteration: 

727 return 

728 for item in it: 

729 yield prev 

730 prev = item 

731 if prev[-1] is no_pad: 

732 try: 

733 # If seq defines __len__, then 

734 # we can quickly calculate where no_pad starts 

735 yield prev[:len(seq) % n] 

736 except TypeError: 

737 # Get first index of no_pad without using .index() 

738 # https://github.com/pytoolz/toolz/issues/387 

739 # Binary search from CPython's bisect module, 

740 # modified for identity testing. 

741 lo, hi = 0, n 

742 while lo < hi: 

743 mid = (lo + hi) // 2 

744 if prev[mid] is no_pad: 

745 hi = mid 

746 else: 

747 lo = mid + 1 

748 yield prev[:lo] 

749 else: 

750 yield prev 

751 

752 

753def count(seq): 

754 """ Count the number of items in seq 

755 

756 Like the builtin ``len`` but works on lazy sequences. 

757 

758 Not to be confused with ``itertools.count`` 

759 

760 See also: 

761 len 

762 """ 

763 if hasattr(seq, '__len__'): 

764 return len(seq) 

765 return sum(1 for i in seq) 

766 

767 

768def pluck(ind, seqs, default=no_default): 

769 """ plucks an element or several elements from each item in a sequence. 

770 

771 ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more 

772 elements of each item in the sequence. 

773 

774 This is equivalent to running `map(curried.get(ind), seqs)` 

775 

776 ``ind`` can be either a single string/index or a list of strings/indices. 

777 ``seqs`` should be sequence containing sequences or dicts. 

778 

779 e.g. 

780 

781 >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}] 

782 >>> list(pluck('name', data)) 

783 ['Cheese', 'Pies'] 

784 >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]])) 

785 [(1, 2), (4, 5)] 

786 

787 See Also: 

788 get 

789 map 

790 """ 

791 if default == no_default: 

792 get = getter(ind) 

793 return map(get, seqs) 

794 elif isinstance(ind, list): 

795 return (tuple(_get(item, seq, default) for item in ind) 

796 for seq in seqs) 

797 return (_get(ind, seq, default) for seq in seqs) 

798 

799 

800def getter(index): 

801 if isinstance(index, list): 

802 if len(index) == 1: 

803 index = index[0] 

804 return lambda x: (x[index],) 

805 elif index: 

806 return operator.itemgetter(*index) 

807 else: 

808 return lambda x: () 

809 else: 

810 return operator.itemgetter(index) 

811 

812 

813def join(leftkey, leftseq, rightkey, rightseq, 

814 left_default=no_default, right_default=no_default): 

815 """ Join two sequences on common attributes 

816 

817 This is a semi-streaming operation. The LEFT sequence is fully evaluated 

818 and placed into memory. The RIGHT sequence is evaluated lazily and so can 

819 be arbitrarily large. 

820 

821 (Note: If right_default is defined, then unique keys of rightseq 

822 will also be stored in memory.) 

823 

824 >>> friends = [('Alice', 'Edith'), 

825 ... ('Alice', 'Zhao'), 

826 ... ('Edith', 'Alice'), 

827 ... ('Zhao', 'Alice'), 

828 ... ('Zhao', 'Edith')] 

829 

830 >>> cities = [('Alice', 'NYC'), 

831 ... ('Alice', 'Chicago'), 

832 ... ('Dan', 'Sydney'), 

833 ... ('Edith', 'Paris'), 

834 ... ('Edith', 'Berlin'), 

835 ... ('Zhao', 'Shanghai')] 

836 

837 >>> # Vacation opportunities 

838 >>> # In what cities do people have friends? 

839 >>> result = join(second, friends, 

840 ... first, cities) 

841 >>> for ((a, b), (c, d)) in sorted(unique(result)): 

842 ... print((a, d)) 

843 ('Alice', 'Berlin') 

844 ('Alice', 'Paris') 

845 ('Alice', 'Shanghai') 

846 ('Edith', 'Chicago') 

847 ('Edith', 'NYC') 

848 ('Zhao', 'Chicago') 

849 ('Zhao', 'NYC') 

850 ('Zhao', 'Berlin') 

851 ('Zhao', 'Paris') 

852 

853 Specify outer joins with keyword arguments ``left_default`` and/or 

854 ``right_default``. Here is a full outer join in which unmatched elements 

855 are paired with None. 

856 

857 >>> identity = lambda x: x 

858 >>> list(join(identity, [1, 2, 3], 

859 ... identity, [2, 3, 4], 

860 ... left_default=None, right_default=None)) 

861 [(2, 2), (3, 3), (None, 4), (1, None)] 

862 

863 Usually the key arguments are callables to be applied to the sequences. If 

864 the keys are not obviously callable then it is assumed that indexing was 

865 intended, e.g. the following is a legal change. 

866 The join is implemented as a hash join and the keys of leftseq must be 

867 hashable. Additionally, if right_default is defined, then keys of rightseq 

868 must also be hashable. 

869 

870 >>> # result = join(second, friends, first, cities) 

871 >>> result = join(1, friends, 0, cities) # doctest: +SKIP 

872 """ 

873 if not callable(leftkey): 

874 leftkey = getter(leftkey) 

875 if not callable(rightkey): 

876 rightkey = getter(rightkey) 

877 

878 d = groupby(leftkey, leftseq) 

879 

880 if left_default == no_default and right_default == no_default: 

881 # Inner Join 

882 for item in rightseq: 

883 key = rightkey(item) 

884 if key in d: 

885 for left_match in d[key]: 

886 yield (left_match, item) 

887 elif left_default != no_default and right_default == no_default: 

888 # Right Join 

889 for item in rightseq: 

890 key = rightkey(item) 

891 if key in d: 

892 for left_match in d[key]: 

893 yield (left_match, item) 

894 else: 

895 yield (left_default, item) 

896 elif right_default != no_default: 

897 seen_keys = set() 

898 seen = seen_keys.add 

899 

900 if left_default == no_default: 

901 # Left Join 

902 for item in rightseq: 

903 key = rightkey(item) 

904 seen(key) 

905 if key in d: 

906 for left_match in d[key]: 

907 yield (left_match, item) 

908 else: 

909 # Full Join 

910 for item in rightseq: 

911 key = rightkey(item) 

912 seen(key) 

913 if key in d: 

914 for left_match in d[key]: 

915 yield (left_match, item) 

916 else: 

917 yield (left_default, item) 

918 

919 for key, matches in d.items(): 

920 if key not in seen_keys: 

921 for match in matches: 

922 yield (match, right_default) 

923 

924 

925def diff(*seqs, **kwargs): 

926 """ Return those items that differ between sequences 

927 

928 >>> list(diff([1, 2, 3], [1, 2, 10, 100])) 

929 [(3, 10)] 

930 

931 Shorter sequences may be padded with a ``default`` value: 

932 

933 >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None)) 

934 [(3, 10), (None, 100)] 

935 

936 A ``key`` function may also be applied to each item to use during 

937 comparisons: 

938 

939 >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower)) 

940 [('bananas', 'Oranges')] 

941 """ 

942 N = len(seqs) 

943 if N == 1 and isinstance(seqs[0], list): 

944 seqs = seqs[0] 

945 N = len(seqs) 

946 if N < 2: 

947 raise TypeError('Too few sequences given (min 2 required)') 

948 default = kwargs.get('default', no_default) 

949 if default == no_default: 

950 iters = zip(*seqs) 

951 else: 

952 iters = zip_longest(*seqs, fillvalue=default) 

953 key = kwargs.get('key', None) 

954 if key is None: 

955 for items in iters: 

956 if items.count(items[0]) != N: 

957 yield items 

958 else: 

959 for items in iters: 

960 vals = tuple(map(key, items)) 

961 if vals.count(vals[0]) != N: 

962 yield items 

963 

964 

965def topk(k, seq, key=None): 

966 """ Find the k largest elements of a sequence 

967 

968 Operates lazily in ``n*log(k)`` time 

969 

970 >>> topk(2, [1, 100, 10, 1000]) 

971 (1000, 100) 

972 

973 Use a key function to change sorted order 

974 

975 >>> topk(2, ['Alice', 'Bob', 'Charlie', 'Dan'], key=len) 

976 ('Charlie', 'Alice') 

977 

978 See also: 

979 heapq.nlargest 

980 """ 

981 if key is not None and not callable(key): 

982 key = getter(key) 

983 return tuple(heapq.nlargest(k, seq, key=key)) 

984 

985 

986def peek(seq): 

987 """ Retrieve the next element of a sequence 

988 

989 Returns the first element and an iterable equivalent to the original 

990 sequence, still having the element retrieved. 

991 

992 >>> seq = [0, 1, 2, 3, 4] 

993 >>> first, seq = peek(seq) 

994 >>> first 

995 0 

996 >>> list(seq) 

997 [0, 1, 2, 3, 4] 

998 """ 

999 iterator = iter(seq) 

1000 item = next(iterator) 

1001 return item, itertools.chain((item,), iterator) 

1002 

1003 

1004def peekn(n, seq): 

1005 """ Retrieve the next n elements of a sequence 

1006 

1007 Returns a tuple of the first n elements and an iterable equivalent 

1008 to the original, still having the elements retrieved. 

1009 

1010 >>> seq = [0, 1, 2, 3, 4] 

1011 >>> first_two, seq = peekn(2, seq) 

1012 >>> first_two 

1013 (0, 1) 

1014 >>> list(seq) 

1015 [0, 1, 2, 3, 4] 

1016 """ 

1017 iterator = iter(seq) 

1018 peeked = tuple(take(n, iterator)) 

1019 return peeked, itertools.chain(iter(peeked), iterator) 

1020 

1021 

1022def random_sample(prob, seq, random_state=None): 

1023 """ Return elements from a sequence with probability of prob 

1024 

1025 Returns a lazy iterator of random items from seq. 

1026 

1027 ``random_sample`` considers each item independently and without 

1028 replacement. See below how the first time it returned 13 items and the 

1029 next time it returned 6 items. 

1030 

1031 >>> seq = list(range(100)) 

1032 >>> list(random_sample(0.1, seq)) # doctest: +SKIP 

1033 [6, 9, 19, 35, 45, 50, 58, 62, 68, 72, 78, 86, 95] 

1034 >>> list(random_sample(0.1, seq)) # doctest: +SKIP 

1035 [6, 44, 54, 61, 69, 94] 

1036 

1037 Providing an integer seed for ``random_state`` will result in 

1038 deterministic sampling. Given the same seed it will return the same sample 

1039 every time. 

1040 

1041 >>> list(random_sample(0.1, seq, random_state=2016)) 

1042 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] 

1043 >>> list(random_sample(0.1, seq, random_state=2016)) 

1044 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] 

1045 

1046 ``random_state`` can also be any object with a method ``random`` that 

1047 returns floats between 0.0 and 1.0 (exclusive). 

1048 

1049 >>> from random import Random 

1050 >>> randobj = Random(2016) 

1051 >>> list(random_sample(0.1, seq, random_state=randobj)) 

1052 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] 

1053 """ 

1054 if not hasattr(random_state, 'random'): 

1055 from random import Random 

1056 

1057 random_state = Random(random_state) 

1058 return filter(lambda _: random_state.random() < prob, seq)