Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/toolz/itertoolz.py: 14%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

360 statements  

1import itertools 

2import heapq 

3import collections 

4import operator 

5from functools import partial 

6from itertools import filterfalse, zip_longest 

7from collections.abc import Sequence 

8from toolz.utils import no_default 

9 

10 

11__all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave', 

12 'unique', 'isiterable', 'isdistinct', 'take', 'drop', 'take_nth', 

13 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv', 

14 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate', 

15 'sliding_window', 'partition', 'partition_all', 'count', 'pluck', 

16 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample') 

17 

18 

19def remove(predicate, seq): 

20 """ Return those items of sequence for which predicate(item) is False 

21 

22 >>> def iseven(x): 

23 ... return x % 2 == 0 

24 >>> list(remove(iseven, [1, 2, 3, 4])) 

25 [1, 3] 

26 """ 

27 return filterfalse(predicate, seq) 

28 

29 

30def accumulate(binop, seq, initial=no_default): 

31 """ Repeatedly apply binary function to a sequence, accumulating results 

32 

33 >>> from operator import add, mul 

34 >>> list(accumulate(add, [1, 2, 3, 4, 5])) 

35 [1, 3, 6, 10, 15] 

36 >>> list(accumulate(mul, [1, 2, 3, 4, 5])) 

37 [1, 2, 6, 24, 120] 

38 

39 Accumulate is similar to ``reduce`` and is good for making functions like 

40 cumulative sum: 

41 

42 >>> from functools import partial, reduce 

43 >>> sum = partial(reduce, add) 

44 >>> cumsum = partial(accumulate, add) 

45 

46 Accumulate also takes an optional argument that will be used as the first 

47 value. This is similar to reduce. 

48 

49 >>> list(accumulate(add, [1, 2, 3], -1)) 

50 [-1, 0, 2, 5] 

51 >>> list(accumulate(add, [], 1)) 

52 [1] 

53 

54 See Also: 

55 itertools.accumulate : In standard itertools for Python 3.2+ 

56 """ 

57 seq = iter(seq) 

58 if initial == no_default: 

59 try: 

60 result = next(seq) 

61 except StopIteration: 

62 return 

63 else: 

64 result = initial 

65 yield result 

66 for elem in seq: 

67 result = binop(result, elem) 

68 yield result 

69 

70 

71def groupby(key, seq): 

72 """ Group a collection by a key function 

73 

74 >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank'] 

75 >>> groupby(len, names) # doctest: +SKIP 

76 {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']} 

77 

78 >>> iseven = lambda x: x % 2 == 0 

79 >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) # doctest: +SKIP 

80 {False: [1, 3, 5, 7], True: [2, 4, 6, 8]} 

81 

82 Non-callable keys imply grouping on a member. 

83 

84 >>> groupby('gender', [{'name': 'Alice', 'gender': 'F'}, 

85 ... {'name': 'Bob', 'gender': 'M'}, 

86 ... {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP 

87 {'F': [{'gender': 'F', 'name': 'Alice'}], 

88 'M': [{'gender': 'M', 'name': 'Bob'}, 

89 {'gender': 'M', 'name': 'Charlie'}]} 

90 

91 Not to be confused with ``itertools.groupby`` 

92 

93 See Also: 

94 countby 

95 """ 

96 if not callable(key): 

97 key = getter(key) 

98 d = collections.defaultdict(lambda: [].append) 

99 for item in seq: 

100 d[key(item)](item) 

101 rv = {} 

102 for k, v in d.items(): 

103 rv[k] = v.__self__ 

104 return rv 

105 

106 

107def merge_sorted(*seqs, **kwargs): 

108 """ Merge and sort a collection of sorted collections 

109 

110 This works lazily and only keeps one value from each iterable in memory. 

111 

112 >>> list(merge_sorted([1, 3, 5], [2, 4, 6])) 

113 [1, 2, 3, 4, 5, 6] 

114 

115 >>> ''.join(merge_sorted('abc', 'abc', 'abc')) 

116 'aaabbbccc' 

117 

118 The "key" function used to sort the input may be passed as a keyword. 

119 

120 >>> list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3)) 

121 [2, 1, 3, 3] 

122 """ 

123 if len(seqs) == 0: 

124 return iter([]) 

125 elif len(seqs) == 1: 

126 return iter(seqs[0]) 

127 

128 key = kwargs.get('key', None) 

129 if key is None: 

130 return _merge_sorted_binary(seqs) 

131 else: 

132 return _merge_sorted_binary_key(seqs, key) 

133 

134 

135def _merge_sorted_binary(seqs): 

136 mid = len(seqs) // 2 

137 L1 = seqs[:mid] 

138 if len(L1) == 1: 

139 seq1 = iter(L1[0]) 

140 else: 

141 seq1 = _merge_sorted_binary(L1) 

142 L2 = seqs[mid:] 

143 if len(L2) == 1: 

144 seq2 = iter(L2[0]) 

145 else: 

146 seq2 = _merge_sorted_binary(L2) 

147 

148 try: 

149 val2 = next(seq2) 

150 except StopIteration: 

151 yield from seq1 

152 return 

153 

154 for val1 in seq1: 

155 if val2 < val1: 

156 yield val2 

157 for val2 in seq2: 

158 if val2 < val1: 

159 yield val2 

160 else: 

161 yield val1 

162 break 

163 else: 

164 break 

165 else: 

166 yield val1 

167 else: 

168 yield val2 

169 yield from seq2 

170 return 

171 yield val1 

172 yield from seq1 

173 

174 

175def _merge_sorted_binary_key(seqs, key): 

176 mid = len(seqs) // 2 

177 L1 = seqs[:mid] 

178 if len(L1) == 1: 

179 seq1 = iter(L1[0]) 

180 else: 

181 seq1 = _merge_sorted_binary_key(L1, key) 

182 L2 = seqs[mid:] 

183 if len(L2) == 1: 

184 seq2 = iter(L2[0]) 

185 else: 

186 seq2 = _merge_sorted_binary_key(L2, key) 

187 

188 try: 

189 val2 = next(seq2) 

190 except StopIteration: 

191 yield from seq1 

192 return 

193 key2 = key(val2) 

194 

195 for val1 in seq1: 

196 key1 = key(val1) 

197 if key2 < key1: 

198 yield val2 

199 for val2 in seq2: 

200 key2 = key(val2) 

201 if key2 < key1: 

202 yield val2 

203 else: 

204 yield val1 

205 break 

206 else: 

207 break 

208 else: 

209 yield val1 

210 else: 

211 yield val2 

212 yield from seq2 

213 return 

214 yield val1 

215 yield from seq1 

216 

217 

218def interleave(seqs): 

219 """ Interleave a sequence of sequences 

220 

221 >>> list(interleave([[1, 2], [3, 4]])) 

222 [1, 3, 2, 4] 

223 

224 >>> ''.join(interleave(('ABC', 'XY'))) 

225 'AXBYC' 

226 

227 Both the individual sequences and the sequence of sequences may be infinite 

228 

229 Returns a lazy iterator 

230 """ 

231 iters = itertools.cycle(map(iter, seqs)) 

232 while True: 

233 try: 

234 for itr in iters: 

235 yield next(itr) 

236 return 

237 except StopIteration: 

238 predicate = partial(operator.is_not, itr) 

239 iters = itertools.cycle(itertools.takewhile(predicate, iters)) 

240 

241 

242def unique(seq, key=None): 

243 """ Return only unique elements of a sequence 

244 

245 >>> tuple(unique((1, 2, 3))) 

246 (1, 2, 3) 

247 >>> tuple(unique((1, 2, 1, 3))) 

248 (1, 2, 3) 

249 

250 Uniqueness can be defined by key keyword 

251 

252 >>> tuple(unique(['cat', 'mouse', 'dog', 'hen'], key=len)) 

253 ('cat', 'mouse') 

254 """ 

255 seen = set() 

256 seen_add = seen.add 

257 if key is None: 

258 for item in seq: 

259 if item not in seen: 

260 seen_add(item) 

261 yield item 

262 else: # calculate key 

263 for item in seq: 

264 val = key(item) 

265 if val not in seen: 

266 seen_add(val) 

267 yield item 

268 

269 

270def isiterable(x): 

271 """ Is x iterable? 

272 

273 >>> isiterable([1, 2, 3]) 

274 True 

275 >>> isiterable('abc') 

276 True 

277 >>> isiterable(5) 

278 False 

279 """ 

280 try: 

281 iter(x) 

282 return True 

283 except TypeError: 

284 return False 

285 

286 

287def isdistinct(seq): 

288 """ All values in sequence are distinct 

289 

290 >>> isdistinct([1, 2, 3]) 

291 True 

292 >>> isdistinct([1, 2, 1]) 

293 False 

294 

295 >>> isdistinct("Hello") 

296 False 

297 >>> isdistinct("World") 

298 True 

299 """ 

300 if iter(seq) is seq: 

301 seen = set() 

302 seen_add = seen.add 

303 for item in seq: 

304 if item in seen: 

305 return False 

306 seen_add(item) 

307 return True 

308 else: 

309 return len(seq) == len(set(seq)) 

310 

311 

312def take(n, seq): 

313 """ The first n elements of a sequence 

314 

315 >>> list(take(2, [10, 20, 30, 40, 50])) 

316 [10, 20] 

317 

318 See Also: 

319 drop 

320 tail 

321 """ 

322 return itertools.islice(seq, n) 

323 

324 

325def tail(n, seq): 

326 """ The last n elements of a sequence 

327 

328 >>> tail(2, [10, 20, 30, 40, 50]) 

329 [40, 50] 

330 

331 See Also: 

332 drop 

333 take 

334 """ 

335 try: 

336 return seq[-n:] 

337 except (TypeError, KeyError): 

338 return tuple(collections.deque(seq, n)) 

339 

340 

341def drop(n, seq): 

342 """ The sequence following the first n elements 

343 

344 >>> list(drop(2, [10, 20, 30, 40, 50])) 

345 [30, 40, 50] 

346 

347 See Also: 

348 take 

349 tail 

350 """ 

351 return itertools.islice(seq, n, None) 

352 

353 

354def take_nth(n, seq): 

355 """ Every nth item in seq 

356 

357 >>> list(take_nth(2, [10, 20, 30, 40, 50])) 

358 [10, 30, 50] 

359 """ 

360 return itertools.islice(seq, 0, None, n) 

361 

362 

363def first(seq): 

364 """ The first element in a sequence 

365 

366 >>> first('ABC') 

367 'A' 

368 """ 

369 return next(iter(seq)) 

370 

371 

372def second(seq): 

373 """ The second element in a sequence 

374 

375 >>> second('ABC') 

376 'B' 

377 """ 

378 seq = iter(seq) 

379 next(seq) 

380 return next(seq) 

381 

382 

383def nth(n, seq): 

384 """ The nth element in a sequence 

385 

386 >>> nth(1, 'ABC') 

387 'B' 

388 """ 

389 if isinstance(seq, (tuple, list, Sequence)): 

390 return seq[n] 

391 else: 

392 return next(itertools.islice(seq, n, None)) 

393 

394 

395def last(seq): 

396 """ The last element in a sequence 

397 

398 >>> last('ABC') 

399 'C' 

400 """ 

401 return tail(1, seq)[0] 

402 

403 

404rest = partial(drop, 1) 

405 

406 

407def _get(ind, seq, default): 

408 try: 

409 return seq[ind] 

410 except (KeyError, IndexError): 

411 return default 

412 

413 

414def get(ind, seq, default=no_default): 

415 """ Get element in a sequence or dict 

416 

417 Provides standard indexing 

418 

419 >>> get(1, 'ABC') # Same as 'ABC'[1] 

420 'B' 

421 

422 Pass a list to get multiple values 

423 

424 >>> get([1, 2], 'ABC') # ('ABC'[1], 'ABC'[2]) 

425 ('B', 'C') 

426 

427 Works on any value that supports indexing/getitem 

428 For example here we see that it works with dictionaries 

429 

430 >>> phonebook = {'Alice': '555-1234', 

431 ... 'Bob': '555-5678', 

432 ... 'Charlie':'555-9999'} 

433 >>> get('Alice', phonebook) 

434 '555-1234' 

435 

436 >>> get(['Alice', 'Bob'], phonebook) 

437 ('555-1234', '555-5678') 

438 

439 Provide a default for missing values 

440 

441 >>> get(['Alice', 'Dennis'], phonebook, None) 

442 ('555-1234', None) 

443 

444 See Also: 

445 pluck 

446 """ 

447 try: 

448 return seq[ind] 

449 except TypeError: # `ind` may be a list 

450 if isinstance(ind, list): 

451 if default == no_default: 

452 if len(ind) > 1: 

453 return operator.itemgetter(*ind)(seq) 

454 elif ind: 

455 return seq[ind[0]], 

456 else: 

457 return () 

458 else: 

459 return tuple(_get(i, seq, default) for i in ind) 

460 elif default != no_default: 

461 return default 

462 else: 

463 raise 

464 except (KeyError, IndexError): # we know `ind` is not a list 

465 if default == no_default: 

466 raise 

467 else: 

468 return default 

469 

470 

471def concat(seqs): 

472 """ Concatenate zero or more iterables, any of which may be infinite. 

473 

474 An infinite sequence will prevent the rest of the arguments from 

475 being included. 

476 

477 We use chain.from_iterable rather than ``chain(*seqs)`` so that seqs 

478 can be a generator. 

479 

480 >>> list(concat([[], [1], [2, 3]])) 

481 [1, 2, 3] 

482 

483 See also: 

484 itertools.chain.from_iterable equivalent 

485 """ 

486 return itertools.chain.from_iterable(seqs) 

487 

488 

489def concatv(*seqs): 

490 """ Variadic version of concat 

491 

492 >>> list(concatv([], ["a"], ["b", "c"])) 

493 ['a', 'b', 'c'] 

494 

495 See also: 

496 itertools.chain 

497 """ 

498 return concat(seqs) 

499 

500 

501def mapcat(func, seqs): 

502 """ Apply func to each sequence in seqs, concatenating results. 

503 

504 >>> list(mapcat(lambda s: [c.upper() for c in s], 

505 ... [["a", "b"], ["c", "d", "e"]])) 

506 ['A', 'B', 'C', 'D', 'E'] 

507 """ 

508 return concat(map(func, seqs)) 

509 

510 

511def cons(el, seq): 

512 """ Add el to beginning of (possibly infinite) sequence seq. 

513 

514 >>> list(cons(1, [2, 3])) 

515 [1, 2, 3] 

516 """ 

517 return itertools.chain([el], seq) 

518 

519 

520def interpose(el, seq): 

521 """ Introduce element between each pair of elements in seq 

522 

523 >>> list(interpose("a", [1, 2, 3])) 

524 [1, 'a', 2, 'a', 3] 

525 """ 

526 inposed = concat(zip(itertools.repeat(el), seq)) 

527 next(inposed) 

528 return inposed 

529 

530 

531def frequencies(seq): 

532 """ Find number of occurrences of each value in seq 

533 

534 >>> frequencies(['cat', 'cat', 'ox', 'pig', 'pig', 'cat']) #doctest: +SKIP 

535 {'cat': 3, 'ox': 1, 'pig': 2} 

536 

537 See Also: 

538 countby 

539 groupby 

540 """ 

541 d = collections.defaultdict(int) 

542 for item in seq: 

543 d[item] += 1 

544 return dict(d) 

545 

546 

547def reduceby(key, binop, seq, init=no_default): 

548 """ Perform a simultaneous groupby and reduction 

549 

550 The computation: 

551 

552 >>> result = reduceby(key, binop, seq, init) # doctest: +SKIP 

553 

554 is equivalent to the following: 

555 

556 >>> def reduction(group): # doctest: +SKIP 

557 ... return reduce(binop, group, init) # doctest: +SKIP 

558 

559 >>> groups = groupby(key, seq) # doctest: +SKIP 

560 >>> result = valmap(reduction, groups) # doctest: +SKIP 

561 

562 But the former does not build the intermediate groups, allowing it to 

563 operate in much less space. This makes it suitable for larger datasets 

564 that do not fit comfortably in memory 

565 

566 The ``init`` keyword argument is the default initialization of the 

567 reduction. This can be either a constant value like ``0`` or a callable 

568 like ``lambda : 0`` as might be used in ``defaultdict``. 

569 

570 Simple Examples 

571 --------------- 

572 

573 >>> from operator import add, mul 

574 >>> iseven = lambda x: x % 2 == 0 

575 

576 >>> data = [1, 2, 3, 4, 5] 

577 

578 >>> reduceby(iseven, add, data) # doctest: +SKIP 

579 {False: 9, True: 6} 

580 

581 >>> reduceby(iseven, mul, data) # doctest: +SKIP 

582 {False: 15, True: 8} 

583 

584 Complex Example 

585 --------------- 

586 

587 >>> projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000}, 

588 ... {'name': 'fight crime', 'state': 'IL', 'cost': 100000}, 

589 ... {'name': 'help farmers', 'state': 'IL', 'cost': 2000000}, 

590 ... {'name': 'help farmers', 'state': 'CA', 'cost': 200000}] 

591 

592 >>> reduceby('state', # doctest: +SKIP 

593 ... lambda acc, x: acc + x['cost'], 

594 ... projects, 0) 

595 {'CA': 1200000, 'IL': 2100000} 

596 

597 Example Using ``init`` 

598 ---------------------- 

599 

600 >>> def set_add(s, i): 

601 ... s.add(i) 

602 ... return s 

603 

604 >>> reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2, 3], set) # doctest: +SKIP 

605 {True: set([2, 4]), 

606 False: set([1, 3])} 

607 """ 

608 is_no_default = init == no_default 

609 if not is_no_default and not callable(init): 

610 _init = init 

611 init = lambda: _init 

612 if not callable(key): 

613 key = getter(key) 

614 d = {} 

615 for item in seq: 

616 k = key(item) 

617 if k not in d: 

618 if is_no_default: 

619 d[k] = item 

620 continue 

621 else: 

622 d[k] = init() 

623 d[k] = binop(d[k], item) 

624 return d 

625 

626 

627def iterate(func, x): 

628 """ Repeatedly apply a function func onto an original input 

629 

630 Yields x, then func(x), then func(func(x)), then func(func(func(x))), etc.. 

631 

632 >>> def inc(x): return x + 1 

633 >>> counter = iterate(inc, 0) 

634 >>> next(counter) 

635 0 

636 >>> next(counter) 

637 1 

638 >>> next(counter) 

639 2 

640 

641 >>> double = lambda x: x * 2 

642 >>> powers_of_two = iterate(double, 1) 

643 >>> next(powers_of_two) 

644 1 

645 >>> next(powers_of_two) 

646 2 

647 >>> next(powers_of_two) 

648 4 

649 >>> next(powers_of_two) 

650 8 

651 """ 

652 while True: 

653 yield x 

654 x = func(x) 

655 

656 

657def sliding_window(n, seq): 

658 """ A sequence of overlapping subsequences 

659 

660 >>> list(sliding_window(2, [1, 2, 3, 4])) 

661 [(1, 2), (2, 3), (3, 4)] 

662 

663 This function creates a sliding window suitable for transformations like 

664 sliding means / smoothing 

665 

666 >>> mean = lambda seq: float(sum(seq)) / len(seq) 

667 >>> list(map(mean, sliding_window(2, [1, 2, 3, 4]))) 

668 [1.5, 2.5, 3.5] 

669 """ 

670 return zip(*(collections.deque(itertools.islice(it, i), 0) or it 

671 for i, it in enumerate(itertools.tee(seq, n)))) 

672 

673 

674no_pad = '__no__pad__' 

675 

676 

677def partition(n, seq, pad=no_pad): 

678 """ Partition sequence into tuples of length n 

679 

680 >>> list(partition(2, [1, 2, 3, 4])) 

681 [(1, 2), (3, 4)] 

682 

683 If the length of ``seq`` is not evenly divisible by ``n``, the final tuple 

684 is dropped if ``pad`` is not specified, or filled to length ``n`` by pad: 

685 

686 >>> list(partition(2, [1, 2, 3, 4, 5])) 

687 [(1, 2), (3, 4)] 

688 

689 >>> list(partition(2, [1, 2, 3, 4, 5], pad=None)) 

690 [(1, 2), (3, 4), (5, None)] 

691 

692 See Also: 

693 partition_all 

694 """ 

695 args = [iter(seq)] * n 

696 if pad is no_pad: 

697 return zip(*args) 

698 else: 

699 return zip_longest(*args, fillvalue=pad) 

700 

701 

702def partition_all(n, seq): 

703 """ Partition all elements of sequence into tuples of length at most n 

704 

705 The final tuple may be shorter to accommodate extra elements. 

706 

707 >>> list(partition_all(2, [1, 2, 3, 4])) 

708 [(1, 2), (3, 4)] 

709 

710 >>> list(partition_all(2, [1, 2, 3, 4, 5])) 

711 [(1, 2), (3, 4), (5,)] 

712 

713 See Also: 

714 partition 

715 """ 

716 args = [iter(seq)] * n 

717 it = zip_longest(*args, fillvalue=no_pad) 

718 try: 

719 prev = next(it) 

720 except StopIteration: 

721 return 

722 for item in it: 

723 yield prev 

724 prev = item 

725 if prev[-1] is no_pad: 

726 try: 

727 # If seq defines __len__, then 

728 # we can quickly calculate where no_pad starts 

729 end = len(seq) % n 

730 if prev[end - 1] is no_pad or prev[end] is not no_pad: 

731 raise LookupError( 

732 'The sequence passed to `parition_all` has invalid length' 

733 ) 

734 yield prev[:end] 

735 except TypeError: 

736 # Get first index of no_pad without using .index() 

737 # https://github.com/pytoolz/toolz/issues/387 

738 # Binary search from CPython's bisect module, 

739 # modified for identity testing. 

740 lo, hi = 0, n 

741 while lo < hi: 

742 mid = (lo + hi) // 2 

743 if prev[mid] is no_pad: 

744 hi = mid 

745 else: 

746 lo = mid + 1 

747 yield prev[:lo] 

748 else: 

749 yield prev 

750 

751 

752def count(seq): 

753 """ Count the number of items in seq 

754 

755 Like the builtin ``len`` but works on lazy sequences. 

756 

757 Not to be confused with ``itertools.count`` 

758 

759 See also: 

760 len 

761 """ 

762 if hasattr(seq, '__len__'): 

763 return len(seq) 

764 return sum(1 for i in seq) 

765 

766 

767def pluck(ind, seqs, default=no_default): 

768 """ plucks an element or several elements from each item in a sequence. 

769 

770 ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more 

771 elements of each item in the sequence. 

772 

773 This is equivalent to running `map(curried.get(ind), seqs)` 

774 

775 ``ind`` can be either a single string/index or a list of strings/indices. 

776 ``seqs`` should be sequence containing sequences or dicts. 

777 

778 e.g. 

779 

780 >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}] 

781 >>> list(pluck('name', data)) 

782 ['Cheese', 'Pies'] 

783 >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]])) 

784 [(1, 2), (4, 5)] 

785 

786 See Also: 

787 get 

788 map 

789 """ 

790 if default == no_default: 

791 get = getter(ind) 

792 return map(get, seqs) 

793 elif isinstance(ind, list): 

794 return (tuple(_get(item, seq, default) for item in ind) 

795 for seq in seqs) 

796 return (_get(ind, seq, default) for seq in seqs) 

797 

798 

799def getter(index): 

800 if isinstance(index, list): 

801 if len(index) == 1: 

802 index = index[0] 

803 return lambda x: (x[index],) 

804 elif index: 

805 return operator.itemgetter(*index) 

806 else: 

807 return lambda x: () 

808 else: 

809 return operator.itemgetter(index) 

810 

811 

812def join(leftkey, leftseq, rightkey, rightseq, 

813 left_default=no_default, right_default=no_default): 

814 """ Join two sequences on common attributes 

815 

816 This is a semi-streaming operation. The LEFT sequence is fully evaluated 

817 and placed into memory. The RIGHT sequence is evaluated lazily and so can 

818 be arbitrarily large. 

819 

820 (Note: If right_default is defined, then unique keys of rightseq 

821 will also be stored in memory.) 

822 

823 >>> friends = [('Alice', 'Edith'), 

824 ... ('Alice', 'Zhao'), 

825 ... ('Edith', 'Alice'), 

826 ... ('Zhao', 'Alice'), 

827 ... ('Zhao', 'Edith')] 

828 

829 >>> cities = [('Alice', 'NYC'), 

830 ... ('Alice', 'Chicago'), 

831 ... ('Dan', 'Sydney'), 

832 ... ('Edith', 'Paris'), 

833 ... ('Edith', 'Berlin'), 

834 ... ('Zhao', 'Shanghai')] 

835 

836 >>> # Vacation opportunities 

837 >>> # In what cities do people have friends? 

838 >>> result = join(second, friends, 

839 ... first, cities) 

840 >>> for ((a, b), (c, d)) in sorted(unique(result)): 

841 ... print((a, d)) 

842 ('Alice', 'Berlin') 

843 ('Alice', 'Paris') 

844 ('Alice', 'Shanghai') 

845 ('Edith', 'Chicago') 

846 ('Edith', 'NYC') 

847 ('Zhao', 'Chicago') 

848 ('Zhao', 'NYC') 

849 ('Zhao', 'Berlin') 

850 ('Zhao', 'Paris') 

851 

852 Specify outer joins with keyword arguments ``left_default`` and/or 

853 ``right_default``. Here is a full outer join in which unmatched elements 

854 are paired with None. 

855 

856 >>> identity = lambda x: x 

857 >>> list(join(identity, [1, 2, 3], 

858 ... identity, [2, 3, 4], 

859 ... left_default=None, right_default=None)) 

860 [(2, 2), (3, 3), (None, 4), (1, None)] 

861 

862 Usually the key arguments are callables to be applied to the sequences. If 

863 the keys are not obviously callable then it is assumed that indexing was 

864 intended, e.g. the following is a legal change. 

865 The join is implemented as a hash join and the keys of leftseq must be 

866 hashable. Additionally, if right_default is defined, then keys of rightseq 

867 must also be hashable. 

868 

869 >>> # result = join(second, friends, first, cities) 

870 >>> result = join(1, friends, 0, cities) # doctest: +SKIP 

871 """ 

872 if not callable(leftkey): 

873 leftkey = getter(leftkey) 

874 if not callable(rightkey): 

875 rightkey = getter(rightkey) 

876 

877 d = groupby(leftkey, leftseq) 

878 

879 if left_default == no_default and right_default == no_default: 

880 # Inner Join 

881 for item in rightseq: 

882 key = rightkey(item) 

883 if key in d: 

884 for left_match in d[key]: 

885 yield (left_match, item) 

886 elif left_default != no_default and right_default == no_default: 

887 # Right Join 

888 for item in rightseq: 

889 key = rightkey(item) 

890 if key in d: 

891 for left_match in d[key]: 

892 yield (left_match, item) 

893 else: 

894 yield (left_default, item) 

895 elif right_default != no_default: 

896 seen_keys = set() 

897 seen = seen_keys.add 

898 

899 if left_default == no_default: 

900 # Left Join 

901 for item in rightseq: 

902 key = rightkey(item) 

903 seen(key) 

904 if key in d: 

905 for left_match in d[key]: 

906 yield (left_match, item) 

907 else: 

908 # Full Join 

909 for item in rightseq: 

910 key = rightkey(item) 

911 seen(key) 

912 if key in d: 

913 for left_match in d[key]: 

914 yield (left_match, item) 

915 else: 

916 yield (left_default, item) 

917 

918 for key, matches in d.items(): 

919 if key not in seen_keys: 

920 for match in matches: 

921 yield (match, right_default) 

922 

923 

924def diff(*seqs, **kwargs): 

925 """ Return those items that differ between sequences 

926 

927 >>> list(diff([1, 2, 3], [1, 2, 10, 100])) 

928 [(3, 10)] 

929 

930 Shorter sequences may be padded with a ``default`` value: 

931 

932 >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None)) 

933 [(3, 10), (None, 100)] 

934 

935 A ``key`` function may also be applied to each item to use during 

936 comparisons: 

937 

938 >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower)) 

939 [('bananas', 'Oranges')] 

940 """ 

941 N = len(seqs) 

942 if N == 1 and isinstance(seqs[0], list): 

943 seqs = seqs[0] 

944 N = len(seqs) 

945 if N < 2: 

946 raise TypeError('Too few sequences given (min 2 required)') 

947 default = kwargs.get('default', no_default) 

948 if default == no_default: 

949 iters = zip(*seqs) 

950 else: 

951 iters = zip_longest(*seqs, fillvalue=default) 

952 key = kwargs.get('key', None) 

953 if key is None: 

954 for items in iters: 

955 if items.count(items[0]) != N: 

956 yield items 

957 else: 

958 for items in iters: 

959 vals = tuple(map(key, items)) 

960 if vals.count(vals[0]) != N: 

961 yield items 

962 

963 

964def topk(k, seq, key=None): 

965 """ Find the k largest elements of a sequence 

966 

967 Operates lazily in ``n*log(k)`` time 

968 

969 >>> topk(2, [1, 100, 10, 1000]) 

970 (1000, 100) 

971 

972 Use a key function to change sorted order 

973 

974 >>> topk(2, ['Alice', 'Bob', 'Charlie', 'Dan'], key=len) 

975 ('Charlie', 'Alice') 

976 

977 See also: 

978 heapq.nlargest 

979 """ 

980 if key is not None and not callable(key): 

981 key = getter(key) 

982 return tuple(heapq.nlargest(k, seq, key=key)) 

983 

984 

985def peek(seq): 

986 """ Retrieve the next element of a sequence 

987 

988 Returns the first element and an iterable equivalent to the original 

989 sequence, still having the element retrieved. 

990 

991 >>> seq = [0, 1, 2, 3, 4] 

992 >>> first, seq = peek(seq) 

993 >>> first 

994 0 

995 >>> list(seq) 

996 [0, 1, 2, 3, 4] 

997 """ 

998 iterator = iter(seq) 

999 item = next(iterator) 

1000 return item, itertools.chain((item,), iterator) 

1001 

1002 

1003def peekn(n, seq): 

1004 """ Retrieve the next n elements of a sequence 

1005 

1006 Returns a tuple of the first n elements and an iterable equivalent 

1007 to the original, still having the elements retrieved. 

1008 

1009 >>> seq = [0, 1, 2, 3, 4] 

1010 >>> first_two, seq = peekn(2, seq) 

1011 >>> first_two 

1012 (0, 1) 

1013 >>> list(seq) 

1014 [0, 1, 2, 3, 4] 

1015 """ 

1016 iterator = iter(seq) 

1017 peeked = tuple(take(n, iterator)) 

1018 return peeked, itertools.chain(iter(peeked), iterator) 

1019 

1020 

1021def random_sample(prob, seq, random_state=None): 

1022 """ Return elements from a sequence with probability of prob 

1023 

1024 Returns a lazy iterator of random items from seq. 

1025 

1026 ``random_sample`` considers each item independently and without 

1027 replacement. See below how the first time it returned 13 items and the 

1028 next time it returned 6 items. 

1029 

1030 >>> seq = list(range(100)) 

1031 >>> list(random_sample(0.1, seq)) # doctest: +SKIP 

1032 [6, 9, 19, 35, 45, 50, 58, 62, 68, 72, 78, 86, 95] 

1033 >>> list(random_sample(0.1, seq)) # doctest: +SKIP 

1034 [6, 44, 54, 61, 69, 94] 

1035 

1036 Providing an integer seed for ``random_state`` will result in 

1037 deterministic sampling. Given the same seed it will return the same sample 

1038 every time. 

1039 

1040 >>> list(random_sample(0.1, seq, random_state=2016)) 

1041 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] 

1042 >>> list(random_sample(0.1, seq, random_state=2016)) 

1043 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] 

1044 

1045 ``random_state`` can also be any object with a method ``random`` that 

1046 returns floats between 0.0 and 1.0 (exclusive). 

1047 

1048 >>> from random import Random 

1049 >>> randobj = Random(2016) 

1050 >>> list(random_sample(0.1, seq, random_state=randobj)) 

1051 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98] 

1052 """ 

1053 if not hasattr(random_state, 'random'): 

1054 from random import Random 

1055 

1056 random_state = Random(random_state) 

1057 return filter(lambda _: random_state.random() < prob, seq)