Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/toolz/itertoolz.py: 14%

1import itertools

2import heapq

3import collections

4import operator

5from functools import partial

6from itertools import filterfalse, zip_longest

7from collections.abc import Sequence

8from toolz.utils import no_default

11__all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave',

12 'unique', 'isiterable', 'isdistinct', 'take', 'drop', 'take_nth',

13 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv',

14 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate',

15 'sliding_window', 'partition', 'partition_all', 'count', 'pluck',

16 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample')

19def remove(predicate, seq):

20 """ Return those items of sequence for which predicate(item) is False

22 >>> def iseven(x):

23 ... return x % 2 == 0

24 >>> list(remove(iseven, [1, 2, 3, 4]))

25 [1, 3]

26 """

27 return filterfalse(predicate, seq)

30def accumulate(binop, seq, initial=no_default):

31 """ Repeatedly apply binary function to a sequence, accumulating results

33 >>> from operator import add, mul

34 >>> list(accumulate(add, [1, 2, 3, 4, 5]))

35 [1, 3, 6, 10, 15]

36 >>> list(accumulate(mul, [1, 2, 3, 4, 5]))

37 [1, 2, 6, 24, 120]

39 Accumulate is similar to ``reduce`` and is good for making functions like

40 cumulative sum:

42 >>> from functools import partial, reduce

43 >>> sum = partial(reduce, add)

44 >>> cumsum = partial(accumulate, add)

46 Accumulate also takes an optional argument that will be used as the first

47 value. This is similar to reduce.

49 >>> list(accumulate(add, [1, 2, 3], -1))

50 [-1, 0, 2, 5]

51 >>> list(accumulate(add, [], 1))

52 [1]

54 See Also:

55 itertools.accumulate : In standard itertools for Python 3.2+

56 """

57 seq = iter(seq)

58 if initial == no_default:

59 try:

60 result = next(seq)

61 except StopIteration:

62 return

63 else:

64 result = initial

65 yield result

66 for elem in seq:

67 result = binop(result, elem)

68 yield result

71def groupby(key, seq):

72 """ Group a collection by a key function

74 >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank']

75 >>> groupby(len, names) # doctest: +SKIP

76 {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}

78 >>> iseven = lambda x: x % 2 == 0

79 >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) # doctest: +SKIP

80 {False: [1, 3, 5, 7], True: [2, 4, 6, 8]}

82 Non-callable keys imply grouping on a member.

84 >>> groupby('gender', [{'name': 'Alice', 'gender': 'F'},

85 ... {'name': 'Bob', 'gender': 'M'},

86 ... {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP

87 {'F': [{'gender': 'F', 'name': 'Alice'}],

88 'M': [{'gender': 'M', 'name': 'Bob'},

89 {'gender': 'M', 'name': 'Charlie'}]}

91 Not to be confused with ``itertools.groupby``

93 See Also:

94 countby

95 """

96 if not callable(key):

97 key = getter(key)

98 d = collections.defaultdict(lambda: [].append)

99 for item in seq:

100 d[key(item)](item)

101 rv = {}

102 for k, v in d.items():

103 rv[k] = v.__self__

104 return rv

105

106

107def merge_sorted(*seqs, **kwargs):

108 """ Merge and sort a collection of sorted collections

109

110 This works lazily and only keeps one value from each iterable in memory.

111

112 >>> list(merge_sorted([1, 3, 5], [2, 4, 6]))

113 [1, 2, 3, 4, 5, 6]

114

115 >>> ''.join(merge_sorted('abc', 'abc', 'abc'))

116 'aaabbbccc'

117

118 The "key" function used to sort the input may be passed as a keyword.

119

120 >>> list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3))

121 [2, 1, 3, 3]

122 """

123 if len(seqs) == 0:

124 return iter([])

125 elif len(seqs) == 1:

126 return iter(seqs[0])

127

128 key = kwargs.get('key', None)

129 if key is None:

130 return _merge_sorted_binary(seqs)

131 else:

132 return _merge_sorted_binary_key(seqs, key)

133

134

135def _merge_sorted_binary(seqs):

136 mid = len(seqs) // 2

137 L1 = seqs[:mid]

138 if len(L1) == 1:

139 seq1 = iter(L1[0])

140 else:

141 seq1 = _merge_sorted_binary(L1)

142 L2 = seqs[mid:]

143 if len(L2) == 1:

144 seq2 = iter(L2[0])

145 else:

146 seq2 = _merge_sorted_binary(L2)

147

148 try:

149 val2 = next(seq2)

150 except StopIteration:

151 yield from seq1

152 return

153

154 for val1 in seq1:

155 if val2 < val1:

156 yield val2

157 for val2 in seq2:

158 if val2 < val1:

159 yield val2

160 else:

161 yield val1

162 break

163 else:

164 break

165 else:

166 yield val1

167 else:

168 yield val2

169 yield from seq2

170 return

171 yield val1

172 yield from seq1

173

174

175def _merge_sorted_binary_key(seqs, key):

176 mid = len(seqs) // 2

177 L1 = seqs[:mid]

178 if len(L1) == 1:

179 seq1 = iter(L1[0])

180 else:

181 seq1 = _merge_sorted_binary_key(L1, key)

182 L2 = seqs[mid:]

183 if len(L2) == 1:

184 seq2 = iter(L2[0])

185 else:

186 seq2 = _merge_sorted_binary_key(L2, key)

187

188 try:

189 val2 = next(seq2)

190 except StopIteration:

191 yield from seq1

192 return

193 key2 = key(val2)

194

195 for val1 in seq1:

196 key1 = key(val1)

197 if key2 < key1:

198 yield val2

199 for val2 in seq2:

200 key2 = key(val2)

201 if key2 < key1:

202 yield val2

203 else:

204 yield val1

205 break

206 else:

207 break

208 else:

209 yield val1

210 else:

211 yield val2

212 yield from seq2

213 return

214 yield val1

215 yield from seq1

216

217

218def interleave(seqs):

219 """ Interleave a sequence of sequences

220

221 >>> list(interleave([[1, 2], [3, 4]]))

222 [1, 3, 2, 4]

223

224 >>> ''.join(interleave(('ABC', 'XY')))

225 'AXBYC'

226

227 Both the individual sequences and the sequence of sequences may be infinite

228

229 Returns a lazy iterator

230 """

231 iters = itertools.cycle(map(iter, seqs))

232 while True:

233 try:

234 for itr in iters:

235 yield next(itr)

236 return

237 except StopIteration:

238 predicate = partial(operator.is_not, itr)

239 iters = itertools.cycle(itertools.takewhile(predicate, iters))

240

241

242def unique(seq, key=None):

243 """ Return only unique elements of a sequence

244

245 >>> tuple(unique((1, 2, 3)))

246 (1, 2, 3)

247 >>> tuple(unique((1, 2, 1, 3)))

248 (1, 2, 3)

249

250 Uniqueness can be defined by key keyword

251

252 >>> tuple(unique(['cat', 'mouse', 'dog', 'hen'], key=len))

253 ('cat', 'mouse')

254 """

255 seen = set()

256 seen_add = seen.add

257 if key is None:

258 for item in seq:

259 if item not in seen:

260 seen_add(item)

261 yield item

262 else: # calculate key

263 for item in seq:

264 val = key(item)

265 if val not in seen:

266 seen_add(val)

267 yield item

268

269

270def isiterable(x):

271 """ Is x iterable?

272

273 >>> isiterable([1, 2, 3])

274 True

275 >>> isiterable('abc')

276 True

277 >>> isiterable(5)

278 False

279 """

280 try:

281 iter(x)

282 return True

283 except TypeError:

284 return False

285

286

287def isdistinct(seq):

288 """ All values in sequence are distinct

289

290 >>> isdistinct([1, 2, 3])

291 True

292 >>> isdistinct([1, 2, 1])

293 False

294

295 >>> isdistinct("Hello")

296 False

297 >>> isdistinct("World")

298 True

299 """

300 if iter(seq) is seq:

301 seen = set()

302 seen_add = seen.add

303 for item in seq:

304 if item in seen:

305 return False

306 seen_add(item)

307 return True

308 else:

309 return len(seq) == len(set(seq))

310

311

312def take(n, seq):

313 """ The first n elements of a sequence

314

315 >>> list(take(2, [10, 20, 30, 40, 50]))

316 [10, 20]

317

318 See Also:

319 drop

320 tail

321 """

322 return itertools.islice(seq, n)

323

324

325def tail(n, seq):

326 """ The last n elements of a sequence

327

328 >>> tail(2, [10, 20, 30, 40, 50])

329 [40, 50]

330

331 See Also:

332 drop

333 take

334 """

335 try:

336 return seq[-n:]

337 except (TypeError, KeyError):

338 return tuple(collections.deque(seq, n))

339

340

341def drop(n, seq):

342 """ The sequence following the first n elements

343

344 >>> list(drop(2, [10, 20, 30, 40, 50]))

345 [30, 40, 50]

346

347 See Also:

348 take

349 tail

350 """

351 return itertools.islice(seq, n, None)

352

353

354def take_nth(n, seq):

355 """ Every nth item in seq

356

357 >>> list(take_nth(2, [10, 20, 30, 40, 50]))

358 [10, 30, 50]

359 """

360 return itertools.islice(seq, 0, None, n)

361

362

363def first(seq):

364 """ The first element in a sequence

365

366 >>> first('ABC')

367 'A'

368 """

369 return next(iter(seq))

370

371

372def second(seq):

373 """ The second element in a sequence

374

375 >>> second('ABC')

376 'B'

377 """

378 seq = iter(seq)

379 next(seq)

380 return next(seq)

381

382

383def nth(n, seq):

384 """ The nth element in a sequence

385

386 >>> nth(1, 'ABC')

387 'B'

388 """

389 if isinstance(seq, (tuple, list, Sequence)):

390 return seq[n]

391 else:

392 return next(itertools.islice(seq, n, None))

393

394

395def last(seq):

396 """ The last element in a sequence

397

398 >>> last('ABC')

399 'C'

400 """

401 return tail(1, seq)[0]

402

403

404rest = partial(drop, 1)

405

406

407def _get(ind, seq, default):

408 try:

409 return seq[ind]

410 except (KeyError, IndexError):

411 return default

412

413

414def get(ind, seq, default=no_default):

415 """ Get element in a sequence or dict

416

417 Provides standard indexing

418

419 >>> get(1, 'ABC') # Same as 'ABC'[1]

420 'B'

421

422 Pass a list to get multiple values

423

424 >>> get([1, 2], 'ABC') # ('ABC'[1], 'ABC'[2])

425 ('B', 'C')

426

427 Works on any value that supports indexing/getitem

428 For example here we see that it works with dictionaries

429

430 >>> phonebook = {'Alice': '555-1234',

431 ... 'Bob': '555-5678',

432 ... 'Charlie':'555-9999'}

433 >>> get('Alice', phonebook)

434 '555-1234'

435

436 >>> get(['Alice', 'Bob'], phonebook)

437 ('555-1234', '555-5678')

438

439 Provide a default for missing values

440

441 >>> get(['Alice', 'Dennis'], phonebook, None)

442 ('555-1234', None)

443

444 See Also:

445 pluck

446 """

447 try:

448 return seq[ind]

449 except TypeError: # `ind` may be a list

450 if isinstance(ind, list):

451 if default == no_default:

452 if len(ind) > 1:

453 return operator.itemgetter(*ind)(seq)

454 elif ind:

455 return seq[ind[0]],

456 else:

457 return ()

458 else:

459 return tuple(_get(i, seq, default) for i in ind)

460 elif default != no_default:

461 return default

462 else:

463 raise

464 except (KeyError, IndexError): # we know `ind` is not a list

465 if default == no_default:

466 raise

467 else:

468 return default

469

470

471def concat(seqs):

472 """ Concatenate zero or more iterables, any of which may be infinite.

473

474 An infinite sequence will prevent the rest of the arguments from

475 being included.

476

477 We use chain.from_iterable rather than ``chain(*seqs)`` so that seqs

478 can be a generator.

479

480 >>> list(concat([[], [1], [2, 3]]))

481 [1, 2, 3]

482

483 See also:

484 itertools.chain.from_iterable equivalent

485 """

486 return itertools.chain.from_iterable(seqs)

487

488

489def concatv(*seqs):

490 """ Variadic version of concat

491

492 >>> list(concatv([], ["a"], ["b", "c"]))

493 ['a', 'b', 'c']

494

495 See also:

496 itertools.chain

497 """

498 return concat(seqs)

499

500

501def mapcat(func, seqs):

502 """ Apply func to each sequence in seqs, concatenating results.

503

504 >>> list(mapcat(lambda s: [c.upper() for c in s],

505 ... [["a", "b"], ["c", "d", "e"]]))

506 ['A', 'B', 'C', 'D', 'E']

507 """

508 return concat(map(func, seqs))

509

510

511def cons(el, seq):

512 """ Add el to beginning of (possibly infinite) sequence seq.

513

514 >>> list(cons(1, [2, 3]))

515 [1, 2, 3]

516 """

517 return itertools.chain([el], seq)

518

519

520def interpose(el, seq):

521 """ Introduce element between each pair of elements in seq

522

523 >>> list(interpose("a", [1, 2, 3]))

524 [1, 'a', 2, 'a', 3]

525 """

526 inposed = concat(zip(itertools.repeat(el), seq))

527 next(inposed)

528 return inposed

529

530

531def frequencies(seq):

532 """ Find number of occurrences of each value in seq

533

534 >>> frequencies(['cat', 'cat', 'ox', 'pig', 'pig', 'cat']) #doctest: +SKIP

535 {'cat': 3, 'ox': 1, 'pig': 2}

536

537 See Also:

538 countby

539 groupby

540 """

541 d = collections.defaultdict(int)

542 for item in seq:

543 d[item] += 1

544 return dict(d)

545

546

547def reduceby(key, binop, seq, init=no_default):

548 """ Perform a simultaneous groupby and reduction

549

550 The computation:

551

552 >>> result = reduceby(key, binop, seq, init) # doctest: +SKIP

553

554 is equivalent to the following:

555

556 >>> def reduction(group): # doctest: +SKIP

557 ... return reduce(binop, group, init) # doctest: +SKIP

558

559 >>> groups = groupby(key, seq) # doctest: +SKIP

560 >>> result = valmap(reduction, groups) # doctest: +SKIP

561

562 But the former does not build the intermediate groups, allowing it to

563 operate in much less space. This makes it suitable for larger datasets

564 that do not fit comfortably in memory

565

566 The ``init`` keyword argument is the default initialization of the

567 reduction. This can be either a constant value like ``0`` or a callable

568 like ``lambda : 0`` as might be used in ``defaultdict``.

569

570 Simple Examples

571 ---------------

572

573 >>> from operator import add, mul

574 >>> iseven = lambda x: x % 2 == 0

575

576 >>> data = [1, 2, 3, 4, 5]

577

578 >>> reduceby(iseven, add, data) # doctest: +SKIP

579 {False: 9, True: 6}

580

581 >>> reduceby(iseven, mul, data) # doctest: +SKIP

582 {False: 15, True: 8}

583

584 Complex Example

585 ---------------

586

587 >>> projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000},

588 ... {'name': 'fight crime', 'state': 'IL', 'cost': 100000},

589 ... {'name': 'help farmers', 'state': 'IL', 'cost': 2000000},

590 ... {'name': 'help farmers', 'state': 'CA', 'cost': 200000}]

591

592 >>> reduceby('state', # doctest: +SKIP

593 ... lambda acc, x: acc + x['cost'],

594 ... projects, 0)

595 {'CA': 1200000, 'IL': 2100000}

596

597 Example Using ``init``

598 ----------------------

599

600 >>> def set_add(s, i):

601 ... s.add(i)

602 ... return s

603

604 >>> reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2, 3], set) # doctest: +SKIP

605 {True: set([2, 4]),

606 False: set([1, 3])}

607 """

608 is_no_default = init == no_default

609 if not is_no_default and not callable(init):

610 _init = init

611 init = lambda: _init

612 if not callable(key):

613 key = getter(key)

614 d = {}

615 for item in seq:

616 k = key(item)

617 if k not in d:

618 if is_no_default:

619 d[k] = item

620 continue

621 else:

622 d[k] = init()

623 d[k] = binop(d[k], item)

624 return d

625

626

627def iterate(func, x):

628 """ Repeatedly apply a function func onto an original input

629

630 Yields x, then func(x), then func(func(x)), then func(func(func(x))), etc..

631

632 >>> def inc(x): return x + 1

633 >>> counter = iterate(inc, 0)

634 >>> next(counter)

635 0

636 >>> next(counter)

637 1

638 >>> next(counter)

639 2

640

641 >>> double = lambda x: x * 2

642 >>> powers_of_two = iterate(double, 1)

643 >>> next(powers_of_two)

644 1

645 >>> next(powers_of_two)

646 2

647 >>> next(powers_of_two)

648 4

649 >>> next(powers_of_two)

650 8

651 """

652 while True:

653 yield x

654 x = func(x)

655

656

657def sliding_window(n, seq):

658 """ A sequence of overlapping subsequences

659

660 >>> list(sliding_window(2, [1, 2, 3, 4]))

661 [(1, 2), (2, 3), (3, 4)]

662

663 This function creates a sliding window suitable for transformations like

664 sliding means / smoothing

665

666 >>> mean = lambda seq: float(sum(seq)) / len(seq)

667 >>> list(map(mean, sliding_window(2, [1, 2, 3, 4])))

668 [1.5, 2.5, 3.5]

669 """

670 return zip(*(collections.deque(itertools.islice(it, i), 0) or it

671 for i, it in enumerate(itertools.tee(seq, n))))

672

673

674no_pad = '__no__pad__'

675

676

677def partition(n, seq, pad=no_pad):

678 """ Partition sequence into tuples of length n

679

680 >>> list(partition(2, [1, 2, 3, 4]))

681 [(1, 2), (3, 4)]

682

683 If the length of ``seq`` is not evenly divisible by ``n``, the final tuple

684 is dropped if ``pad`` is not specified, or filled to length ``n`` by pad:

685

686 >>> list(partition(2, [1, 2, 3, 4, 5]))

687 [(1, 2), (3, 4)]

688

689 >>> list(partition(2, [1, 2, 3, 4, 5], pad=None))

690 [(1, 2), (3, 4), (5, None)]

691

692 See Also:

693 partition_all

694 """

695 args = [iter(seq)] * n

696 if pad is no_pad:

697 return zip(*args)

698 else:

699 return zip_longest(*args, fillvalue=pad)

700

701

702def partition_all(n, seq):

703 """ Partition all elements of sequence into tuples of length at most n

704

705 The final tuple may be shorter to accommodate extra elements.

706

707 >>> list(partition_all(2, [1, 2, 3, 4]))

708 [(1, 2), (3, 4)]

709

710 >>> list(partition_all(2, [1, 2, 3, 4, 5]))

711 [(1, 2), (3, 4), (5,)]

712

713 See Also:

714 partition

715 """

716 args = [iter(seq)] * n

717 it = zip_longest(*args, fillvalue=no_pad)

718 try:

719 prev = next(it)

720 except StopIteration:

721 return

722 for item in it:

723 yield prev

724 prev = item

725 if prev[-1] is no_pad:

726 try:

727 # If seq defines __len__, then

728 # we can quickly calculate where no_pad starts

729 end = len(seq) % n

730 if prev[end - 1] is no_pad or prev[end] is not no_pad:

731 raise LookupError(

732 'The sequence passed to `parition_all` has invalid length'

733 )

734 yield prev[:end]

735 except TypeError:

736 # Get first index of no_pad without using .index()

737 # https://github.com/pytoolz/toolz/issues/387

738 # Binary search from CPython's bisect module,

739 # modified for identity testing.

740 lo, hi = 0, n

741 while lo < hi:

742 mid = (lo + hi) // 2

743 if prev[mid] is no_pad:

744 hi = mid

745 else:

746 lo = mid + 1

747 yield prev[:lo]

748 else:

749 yield prev

750

751

752def count(seq):

753 """ Count the number of items in seq

754

755 Like the builtin ``len`` but works on lazy sequences.

756

757 Not to be confused with ``itertools.count``

758

759 See also:

760 len

761 """

762 if hasattr(seq, '__len__'):

763 return len(seq)

764 return sum(1 for i in seq)

765

766

767def pluck(ind, seqs, default=no_default):

768 """ plucks an element or several elements from each item in a sequence.

769

770 ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more

771 elements of each item in the sequence.

772

773 This is equivalent to running `map(curried.get(ind), seqs)`

774

775 ``ind`` can be either a single string/index or a list of strings/indices.

776 ``seqs`` should be sequence containing sequences or dicts.

777

778 e.g.

779

780 >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}]

781 >>> list(pluck('name', data))

782 ['Cheese', 'Pies']

783 >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]]))

784 [(1, 2), (4, 5)]

785

786 See Also:

787 get

788 map

789 """

790 if default == no_default:

791 get = getter(ind)

792 return map(get, seqs)

793 elif isinstance(ind, list):

794 return (tuple(_get(item, seq, default) for item in ind)

795 for seq in seqs)

796 return (_get(ind, seq, default) for seq in seqs)

797

798

799def getter(index):

800 if isinstance(index, list):

801 if len(index) == 1:

802 index = index[0]

803 return lambda x: (x[index],)

804 elif index:

805 return operator.itemgetter(*index)

806 else:

807 return lambda x: ()

808 else:

809 return operator.itemgetter(index)

810

811

812def join(leftkey, leftseq, rightkey, rightseq,

813 left_default=no_default, right_default=no_default):

814 """ Join two sequences on common attributes

815

816 This is a semi-streaming operation. The LEFT sequence is fully evaluated

817 and placed into memory. The RIGHT sequence is evaluated lazily and so can

818 be arbitrarily large.

819

820 (Note: If right_default is defined, then unique keys of rightseq

821 will also be stored in memory.)

822

823 >>> friends = [('Alice', 'Edith'),

824 ... ('Alice', 'Zhao'),

825 ... ('Edith', 'Alice'),

826 ... ('Zhao', 'Alice'),

827 ... ('Zhao', 'Edith')]

828

829 >>> cities = [('Alice', 'NYC'),

830 ... ('Alice', 'Chicago'),

831 ... ('Dan', 'Sydney'),

832 ... ('Edith', 'Paris'),

833 ... ('Edith', 'Berlin'),

834 ... ('Zhao', 'Shanghai')]

835

836 >>> # Vacation opportunities

837 >>> # In what cities do people have friends?

838 >>> result = join(second, friends,

839 ... first, cities)

840 >>> for ((a, b), (c, d)) in sorted(unique(result)):

841 ... print((a, d))

842 ('Alice', 'Berlin')

843 ('Alice', 'Paris')

844 ('Alice', 'Shanghai')

845 ('Edith', 'Chicago')

846 ('Edith', 'NYC')

847 ('Zhao', 'Chicago')

848 ('Zhao', 'NYC')

849 ('Zhao', 'Berlin')

850 ('Zhao', 'Paris')

851

852 Specify outer joins with keyword arguments ``left_default`` and/or

853 ``right_default``. Here is a full outer join in which unmatched elements

854 are paired with None.

855

856 >>> identity = lambda x: x

857 >>> list(join(identity, [1, 2, 3],

858 ... identity, [2, 3, 4],

859 ... left_default=None, right_default=None))

860 [(2, 2), (3, 3), (None, 4), (1, None)]

861

862 Usually the key arguments are callables to be applied to the sequences. If

863 the keys are not obviously callable then it is assumed that indexing was

864 intended, e.g. the following is a legal change.

865 The join is implemented as a hash join and the keys of leftseq must be

866 hashable. Additionally, if right_default is defined, then keys of rightseq

867 must also be hashable.

868

869 >>> # result = join(second, friends, first, cities)

870 >>> result = join(1, friends, 0, cities) # doctest: +SKIP

871 """

872 if not callable(leftkey):

873 leftkey = getter(leftkey)

874 if not callable(rightkey):

875 rightkey = getter(rightkey)

876

877 d = groupby(leftkey, leftseq)

878

879 if left_default == no_default and right_default == no_default:

880 # Inner Join

881 for item in rightseq:

882 key = rightkey(item)

883 if key in d:

884 for left_match in d[key]:

885 yield (left_match, item)

886 elif left_default != no_default and right_default == no_default:

887 # Right Join

888 for item in rightseq:

889 key = rightkey(item)

890 if key in d:

891 for left_match in d[key]:

892 yield (left_match, item)

893 else:

894 yield (left_default, item)

895 elif right_default != no_default:

896 seen_keys = set()

897 seen = seen_keys.add

898

899 if left_default == no_default:

900 # Left Join

901 for item in rightseq:

902 key = rightkey(item)

903 seen(key)

904 if key in d:

905 for left_match in d[key]:

906 yield (left_match, item)

907 else:

908 # Full Join

909 for item in rightseq:

910 key = rightkey(item)

911 seen(key)

912 if key in d:

913 for left_match in d[key]:

914 yield (left_match, item)

915 else:

916 yield (left_default, item)

917

918 for key, matches in d.items():

919 if key not in seen_keys:

920 for match in matches:

921 yield (match, right_default)

922

923

924def diff(*seqs, **kwargs):

925 """ Return those items that differ between sequences

926

927 >>> list(diff([1, 2, 3], [1, 2, 10, 100]))

928 [(3, 10)]

929

930 Shorter sequences may be padded with a ``default`` value:

931

932 >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None))

933 [(3, 10), (None, 100)]

934

935 A ``key`` function may also be applied to each item to use during

936 comparisons:

937

938 >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower))

939 [('bananas', 'Oranges')]

940 """

941 N = len(seqs)

942 if N == 1 and isinstance(seqs[0], list):

943 seqs = seqs[0]

944 N = len(seqs)

945 if N < 2:

946 raise TypeError('Too few sequences given (min 2 required)')

947 default = kwargs.get('default', no_default)

948 if default == no_default:

949 iters = zip(*seqs)

950 else:

951 iters = zip_longest(*seqs, fillvalue=default)

952 key = kwargs.get('key', None)

953 if key is None:

954 for items in iters:

955 if items.count(items[0]) != N:

956 yield items

957 else:

958 for items in iters:

959 vals = tuple(map(key, items))

960 if vals.count(vals[0]) != N:

961 yield items

962

963

964def topk(k, seq, key=None):

965 """ Find the k largest elements of a sequence

966

967 Operates lazily in ``n*log(k)`` time

968

969 >>> topk(2, [1, 100, 10, 1000])

970 (1000, 100)

971

972 Use a key function to change sorted order

973

974 >>> topk(2, ['Alice', 'Bob', 'Charlie', 'Dan'], key=len)

975 ('Charlie', 'Alice')

976

977 See also:

978 heapq.nlargest

979 """

980 if key is not None and not callable(key):

981 key = getter(key)

982 return tuple(heapq.nlargest(k, seq, key=key))

983

984

985def peek(seq):

986 """ Retrieve the next element of a sequence

987

988 Returns the first element and an iterable equivalent to the original

989 sequence, still having the element retrieved.

990

991 >>> seq = [0, 1, 2, 3, 4]

992 >>> first, seq = peek(seq)

993 >>> first

994 0

995 >>> list(seq)

996 [0, 1, 2, 3, 4]

997 """

998 iterator = iter(seq)

999 item = next(iterator)

1000 return item, itertools.chain((item,), iterator)

1001

1002

1003def peekn(n, seq):

1004 """ Retrieve the next n elements of a sequence

1005

1006 Returns a tuple of the first n elements and an iterable equivalent

1007 to the original, still having the elements retrieved.

1008

1009 >>> seq = [0, 1, 2, 3, 4]

1010 >>> first_two, seq = peekn(2, seq)

1011 >>> first_two

1012 (0, 1)

1013 >>> list(seq)

1014 [0, 1, 2, 3, 4]

1015 """

1016 iterator = iter(seq)

1017 peeked = tuple(take(n, iterator))

1018 return peeked, itertools.chain(iter(peeked), iterator)

1019

1020

1021def random_sample(prob, seq, random_state=None):

1022 """ Return elements from a sequence with probability of prob

1023

1024 Returns a lazy iterator of random items from seq.

1025

1026 ``random_sample`` considers each item independently and without

1027 replacement. See below how the first time it returned 13 items and the

1028 next time it returned 6 items.

1029

1030 >>> seq = list(range(100))

1031 >>> list(random_sample(0.1, seq)) # doctest: +SKIP

1032 [6, 9, 19, 35, 45, 50, 58, 62, 68, 72, 78, 86, 95]

1033 >>> list(random_sample(0.1, seq)) # doctest: +SKIP

1034 [6, 44, 54, 61, 69, 94]

1035

1036 Providing an integer seed for ``random_state`` will result in

1037 deterministic sampling. Given the same seed it will return the same sample

1038 every time.

1039

1040 >>> list(random_sample(0.1, seq, random_state=2016))

1041 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]

1042 >>> list(random_sample(0.1, seq, random_state=2016))

1043 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]

1044

1045 ``random_state`` can also be any object with a method ``random`` that

1046 returns floats between 0.0 and 1.0 (exclusive).

1047

1048 >>> from random import Random

1049 >>> randobj = Random(2016)

1050 >>> list(random_sample(0.1, seq, random_state=randobj))

1051 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]

1052 """

1053 if not hasattr(random_state, 'random'):

1054 from random import Random

1055

1056 random_state = Random(random_state)

1057 return filter(lambda _: random_state.random() < prob, seq)