Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/toolz/itertoolz.py: 71%

1import itertools

2import heapq

3import collections

4import operator

5from functools import partial

6from itertools import filterfalse, zip_longest

7from collections.abc import Sequence

8from toolz.utils import no_default

11__all__ = ('remove', 'accumulate', 'groupby', 'merge_sorted', 'interleave',

12 'unique', 'isiterable', 'isdistinct', 'take', 'drop', 'take_nth',

13 'first', 'second', 'nth', 'last', 'get', 'concat', 'concatv',

14 'mapcat', 'cons', 'interpose', 'frequencies', 'reduceby', 'iterate',

15 'sliding_window', 'partition', 'partition_all', 'count', 'pluck',

16 'join', 'tail', 'diff', 'topk', 'peek', 'peekn', 'random_sample')

19def remove(predicate, seq):

20 """ Return those items of sequence for which predicate(item) is False

22 >>> def iseven(x):

23 ... return x % 2 == 0

24 >>> list(remove(iseven, [1, 2, 3, 4]))

25 [1, 3]

26 """

27 return filterfalse(predicate, seq)

30def accumulate(binop, seq, initial=no_default):

31 """ Repeatedly apply binary function to a sequence, accumulating results

33 >>> from operator import add, mul

34 >>> list(accumulate(add, [1, 2, 3, 4, 5]))

35 [1, 3, 6, 10, 15]

36 >>> list(accumulate(mul, [1, 2, 3, 4, 5]))

37 [1, 2, 6, 24, 120]

39 Accumulate is similar to ``reduce`` and is good for making functions like

40 cumulative sum:

42 >>> from functools import partial, reduce

43 >>> sum = partial(reduce, add)

44 >>> cumsum = partial(accumulate, add)

46 Accumulate also takes an optional argument that will be used as the first

47 value. This is similar to reduce.

49 >>> list(accumulate(add, [1, 2, 3], -1))

50 [-1, 0, 2, 5]

51 >>> list(accumulate(add, [], 1))

52 [1]

54 See Also:

55 itertools.accumulate : In standard itertools for Python 3.2+

56 """

57 seq = iter(seq)

58 if initial == no_default:

59 try:

60 result = next(seq)

61 except StopIteration:

62 return

63 else:

64 result = initial

65 yield result

66 for elem in seq:

67 result = binop(result, elem)

68 yield result

71def groupby(key, seq):

72 """ Group a collection by a key function

74 >>> names = ['Alice', 'Bob', 'Charlie', 'Dan', 'Edith', 'Frank']

75 >>> groupby(len, names) # doctest: +SKIP

76 {3: ['Bob', 'Dan'], 5: ['Alice', 'Edith', 'Frank'], 7: ['Charlie']}

78 >>> iseven = lambda x: x % 2 == 0

79 >>> groupby(iseven, [1, 2, 3, 4, 5, 6, 7, 8]) # doctest: +SKIP

80 {False: [1, 3, 5, 7], True: [2, 4, 6, 8]}

82 Non-callable keys imply grouping on a member.

84 >>> groupby('gender', [{'name': 'Alice', 'gender': 'F'},

85 ... {'name': 'Bob', 'gender': 'M'},

86 ... {'name': 'Charlie', 'gender': 'M'}]) # doctest:+SKIP

87 {'F': [{'gender': 'F', 'name': 'Alice'}],

88 'M': [{'gender': 'M', 'name': 'Bob'},

89 {'gender': 'M', 'name': 'Charlie'}]}

91 Not to be confused with ``itertools.groupby``

93 See Also:

94 countby

95 """

96 if not callable(key):

97 key = getter(key)

98 d = collections.defaultdict(lambda: [].append)

99 for item in seq:

100 d[key(item)](item)

101 rv = {}

102 for k, v in d.items():

103 rv[k] = v.__self__

104 return rv

105

106

107def merge_sorted(*seqs, **kwargs):

108 """ Merge and sort a collection of sorted collections

109

110 This works lazily and only keeps one value from each iterable in memory.

111

112 >>> list(merge_sorted([1, 3, 5], [2, 4, 6]))

113 [1, 2, 3, 4, 5, 6]

114

115 >>> ''.join(merge_sorted('abc', 'abc', 'abc'))

116 'aaabbbccc'

117

118 The "key" function used to sort the input may be passed as a keyword.

119

120 >>> list(merge_sorted([2, 3], [1, 3], key=lambda x: x // 3))

121 [2, 1, 3, 3]

122 """

123 if len(seqs) == 0:

124 return iter([])

125 elif len(seqs) == 1:

126 return iter(seqs[0])

127

128 key = kwargs.get('key', None)

129 if key is None:

130 return _merge_sorted_binary(seqs)

131 else:

132 return _merge_sorted_binary_key(seqs, key)

133

134

135def _merge_sorted_binary(seqs):

136 mid = len(seqs) // 2

137 L1 = seqs[:mid]

138 if len(L1) == 1:

139 seq1 = iter(L1[0])

140 else:

141 seq1 = _merge_sorted_binary(L1)

142 L2 = seqs[mid:]

143 if len(L2) == 1:

144 seq2 = iter(L2[0])

145 else:

146 seq2 = _merge_sorted_binary(L2)

147

148 try:

149 val2 = next(seq2)

150 except StopIteration:

151 for val1 in seq1:

152 yield val1

153 return

154

155 for val1 in seq1:

156 if val2 < val1:

157 yield val2

158 for val2 in seq2:

159 if val2 < val1:

160 yield val2

161 else:

162 yield val1

163 break

164 else:

165 break

166 else:

167 yield val1

168 else:

169 yield val2

170 for val2 in seq2:

171 yield val2

172 return

173 yield val1

174 for val1 in seq1:

175 yield val1

176

177

178def _merge_sorted_binary_key(seqs, key):

179 mid = len(seqs) // 2

180 L1 = seqs[:mid]

181 if len(L1) == 1:

182 seq1 = iter(L1[0])

183 else:

184 seq1 = _merge_sorted_binary_key(L1, key)

185 L2 = seqs[mid:]

186 if len(L2) == 1:

187 seq2 = iter(L2[0])

188 else:

189 seq2 = _merge_sorted_binary_key(L2, key)

190

191 try:

192 val2 = next(seq2)

193 except StopIteration:

194 for val1 in seq1:

195 yield val1

196 return

197 key2 = key(val2)

198

199 for val1 in seq1:

200 key1 = key(val1)

201 if key2 < key1:

202 yield val2

203 for val2 in seq2:

204 key2 = key(val2)

205 if key2 < key1:

206 yield val2

207 else:

208 yield val1

209 break

210 else:

211 break

212 else:

213 yield val1

214 else:

215 yield val2

216 for val2 in seq2:

217 yield val2

218 return

219 yield val1

220 for val1 in seq1:

221 yield val1

222

223

224def interleave(seqs):

225 """ Interleave a sequence of sequences

226

227 >>> list(interleave([[1, 2], [3, 4]]))

228 [1, 3, 2, 4]

229

230 >>> ''.join(interleave(('ABC', 'XY')))

231 'AXBYC'

232

233 Both the individual sequences and the sequence of sequences may be infinite

234

235 Returns a lazy iterator

236 """

237 iters = itertools.cycle(map(iter, seqs))

238 while True:

239 try:

240 for itr in iters:

241 yield next(itr)

242 return

243 except StopIteration:

244 predicate = partial(operator.is_not, itr)

245 iters = itertools.cycle(itertools.takewhile(predicate, iters))

246

247

248def unique(seq, key=None):

249 """ Return only unique elements of a sequence

250

251 >>> tuple(unique((1, 2, 3)))

252 (1, 2, 3)

253 >>> tuple(unique((1, 2, 1, 3)))

254 (1, 2, 3)

255

256 Uniqueness can be defined by key keyword

257

258 >>> tuple(unique(['cat', 'mouse', 'dog', 'hen'], key=len))

259 ('cat', 'mouse')

260 """

261 seen = set()

262 seen_add = seen.add

263 if key is None:

264 for item in seq:

265 if item not in seen:

266 seen_add(item)

267 yield item

268 else: # calculate key

269 for item in seq:

270 val = key(item)

271 if val not in seen:

272 seen_add(val)

273 yield item

274

275

276def isiterable(x):

277 """ Is x iterable?

278

279 >>> isiterable([1, 2, 3])

280 True

281 >>> isiterable('abc')

282 True

283 >>> isiterable(5)

284 False

285 """

286 try:

287 iter(x)

288 return True

289 except TypeError:

290 return False

291

292

293def isdistinct(seq):

294 """ All values in sequence are distinct

295

296 >>> isdistinct([1, 2, 3])

297 True

298 >>> isdistinct([1, 2, 1])

299 False

300

301 >>> isdistinct("Hello")

302 False

303 >>> isdistinct("World")

304 True

305 """

306 if iter(seq) is seq:

307 seen = set()

308 seen_add = seen.add

309 for item in seq:

310 if item in seen:

311 return False

312 seen_add(item)

313 return True

314 else:

315 return len(seq) == len(set(seq))

316

317

318def take(n, seq):

319 """ The first n elements of a sequence

320

321 >>> list(take(2, [10, 20, 30, 40, 50]))

322 [10, 20]

323

324 See Also:

325 drop

326 tail

327 """

328 return itertools.islice(seq, n)

329

330

331def tail(n, seq):

332 """ The last n elements of a sequence

333

334 >>> tail(2, [10, 20, 30, 40, 50])

335 [40, 50]

336

337 See Also:

338 drop

339 take

340 """

341 try:

342 return seq[-n:]

343 except (TypeError, KeyError):

344 return tuple(collections.deque(seq, n))

345

346

347def drop(n, seq):

348 """ The sequence following the first n elements

349

350 >>> list(drop(2, [10, 20, 30, 40, 50]))

351 [30, 40, 50]

352

353 See Also:

354 take

355 tail

356 """

357 return itertools.islice(seq, n, None)

358

359

360def take_nth(n, seq):

361 """ Every nth item in seq

362

363 >>> list(take_nth(2, [10, 20, 30, 40, 50]))

364 [10, 30, 50]

365 """

366 return itertools.islice(seq, 0, None, n)

367

368

369def first(seq):

370 """ The first element in a sequence

371

372 >>> first('ABC')

373 'A'

374 """

375 return next(iter(seq))

376

377

378def second(seq):

379 """ The second element in a sequence

380

381 >>> second('ABC')

382 'B'

383 """

384 seq = iter(seq)

385 next(seq)

386 return next(seq)

387

388

389def nth(n, seq):

390 """ The nth element in a sequence

391

392 >>> nth(1, 'ABC')

393 'B'

394 """

395 if isinstance(seq, (tuple, list, Sequence)):

396 return seq[n]

397 else:

398 return next(itertools.islice(seq, n, None))

399

400

401def last(seq):

402 """ The last element in a sequence

403

404 >>> last('ABC')

405 'C'

406 """

407 return tail(1, seq)[0]

408

409

410rest = partial(drop, 1)

411

412

413def _get(ind, seq, default):

414 try:

415 return seq[ind]

416 except (KeyError, IndexError):

417 return default

418

419

420def get(ind, seq, default=no_default):

421 """ Get element in a sequence or dict

422

423 Provides standard indexing

424

425 >>> get(1, 'ABC') # Same as 'ABC'[1]

426 'B'

427

428 Pass a list to get multiple values

429

430 >>> get([1, 2], 'ABC') # ('ABC'[1], 'ABC'[2])

431 ('B', 'C')

432

433 Works on any value that supports indexing/getitem

434 For example here we see that it works with dictionaries

435

436 >>> phonebook = {'Alice': '555-1234',

437 ... 'Bob': '555-5678',

438 ... 'Charlie':'555-9999'}

439 >>> get('Alice', phonebook)

440 '555-1234'

441

442 >>> get(['Alice', 'Bob'], phonebook)

443 ('555-1234', '555-5678')

444

445 Provide a default for missing values

446

447 >>> get(['Alice', 'Dennis'], phonebook, None)

448 ('555-1234', None)

449

450 See Also:

451 pluck

452 """

453 try:

454 return seq[ind]

455 except TypeError: # `ind` may be a list

456 if isinstance(ind, list):

457 if default == no_default:

458 if len(ind) > 1:

459 return operator.itemgetter(*ind)(seq)

460 elif ind:

461 return seq[ind[0]],

462 else:

463 return ()

464 else:

465 return tuple(_get(i, seq, default) for i in ind)

466 elif default != no_default:

467 return default

468 else:

469 raise

470 except (KeyError, IndexError): # we know `ind` is not a list

471 if default == no_default:

472 raise

473 else:

474 return default

475

476

477def concat(seqs):

478 """ Concatenate zero or more iterables, any of which may be infinite.

479

480 An infinite sequence will prevent the rest of the arguments from

481 being included.

482

483 We use chain.from_iterable rather than ``chain(*seqs)`` so that seqs

484 can be a generator.

485

486 >>> list(concat([[], [1], [2, 3]]))

487 [1, 2, 3]

488

489 See also:

490 itertools.chain.from_iterable equivalent

491 """

492 return itertools.chain.from_iterable(seqs)

493

494

495def concatv(*seqs):

496 """ Variadic version of concat

497

498 >>> list(concatv([], ["a"], ["b", "c"]))

499 ['a', 'b', 'c']

500

501 See also:

502 itertools.chain

503 """

504 return concat(seqs)

505

506

507def mapcat(func, seqs):

508 """ Apply func to each sequence in seqs, concatenating results.

509

510 >>> list(mapcat(lambda s: [c.upper() for c in s],

511 ... [["a", "b"], ["c", "d", "e"]]))

512 ['A', 'B', 'C', 'D', 'E']

513 """

514 return concat(map(func, seqs))

515

516

517def cons(el, seq):

518 """ Add el to beginning of (possibly infinite) sequence seq.

519

520 >>> list(cons(1, [2, 3]))

521 [1, 2, 3]

522 """

523 return itertools.chain([el], seq)

524

525

526def interpose(el, seq):

527 """ Introduce element between each pair of elements in seq

528

529 >>> list(interpose("a", [1, 2, 3]))

530 [1, 'a', 2, 'a', 3]

531 """

532 inposed = concat(zip(itertools.repeat(el), seq))

533 next(inposed)

534 return inposed

535

536

537def frequencies(seq):

538 """ Find number of occurrences of each value in seq

539

540 >>> frequencies(['cat', 'cat', 'ox', 'pig', 'pig', 'cat']) #doctest: +SKIP

541 {'cat': 3, 'ox': 1, 'pig': 2}

542

543 See Also:

544 countby

545 groupby

546 """

547 d = collections.defaultdict(int)

548 for item in seq:

549 d[item] += 1

550 return dict(d)

551

552

553def reduceby(key, binop, seq, init=no_default):

554 """ Perform a simultaneous groupby and reduction

555

556 The computation:

557

558 >>> result = reduceby(key, binop, seq, init) # doctest: +SKIP

559

560 is equivalent to the following:

561

562 >>> def reduction(group): # doctest: +SKIP

563 ... return reduce(binop, group, init) # doctest: +SKIP

564

565 >>> groups = groupby(key, seq) # doctest: +SKIP

566 >>> result = valmap(reduction, groups) # doctest: +SKIP

567

568 But the former does not build the intermediate groups, allowing it to

569 operate in much less space. This makes it suitable for larger datasets

570 that do not fit comfortably in memory

571

572 The ``init`` keyword argument is the default initialization of the

573 reduction. This can be either a constant value like ``0`` or a callable

574 like ``lambda : 0`` as might be used in ``defaultdict``.

575

576 Simple Examples

577 ---------------

578

579 >>> from operator import add, mul

580 >>> iseven = lambda x: x % 2 == 0

581

582 >>> data = [1, 2, 3, 4, 5]

583

584 >>> reduceby(iseven, add, data) # doctest: +SKIP

585 {False: 9, True: 6}

586

587 >>> reduceby(iseven, mul, data) # doctest: +SKIP

588 {False: 15, True: 8}

589

590 Complex Example

591 ---------------

592

593 >>> projects = [{'name': 'build roads', 'state': 'CA', 'cost': 1000000},

594 ... {'name': 'fight crime', 'state': 'IL', 'cost': 100000},

595 ... {'name': 'help farmers', 'state': 'IL', 'cost': 2000000},

596 ... {'name': 'help farmers', 'state': 'CA', 'cost': 200000}]

597

598 >>> reduceby('state', # doctest: +SKIP

599 ... lambda acc, x: acc + x['cost'],

600 ... projects, 0)

601 {'CA': 1200000, 'IL': 2100000}

602

603 Example Using ``init``

604 ----------------------

605

606 >>> def set_add(s, i):

607 ... s.add(i)

608 ... return s

609

610 >>> reduceby(iseven, set_add, [1, 2, 3, 4, 1, 2, 3], set) # doctest: +SKIP

611 {True: set([2, 4]),

612 False: set([1, 3])}

613 """

614 is_no_default = init == no_default

615 if not is_no_default and not callable(init):

616 _init = init

617 init = lambda: _init

618 if not callable(key):

619 key = getter(key)

620 d = {}

621 for item in seq:

622 k = key(item)

623 if k not in d:

624 if is_no_default:

625 d[k] = item

626 continue

627 else:

628 d[k] = init()

629 d[k] = binop(d[k], item)

630 return d

631

632

633def iterate(func, x):

634 """ Repeatedly apply a function func onto an original input

635

636 Yields x, then func(x), then func(func(x)), then func(func(func(x))), etc..

637

638 >>> def inc(x): return x + 1

639 >>> counter = iterate(inc, 0)

640 >>> next(counter)

641 0

642 >>> next(counter)

643 1

644 >>> next(counter)

645 2

646

647 >>> double = lambda x: x * 2

648 >>> powers_of_two = iterate(double, 1)

649 >>> next(powers_of_two)

650 1

651 >>> next(powers_of_two)

652 2

653 >>> next(powers_of_two)

654 4

655 >>> next(powers_of_two)

656 8

657 """

658 while True:

659 yield x

660 x = func(x)

661

662

663def sliding_window(n, seq):

664 """ A sequence of overlapping subsequences

665

666 >>> list(sliding_window(2, [1, 2, 3, 4]))

667 [(1, 2), (2, 3), (3, 4)]

668

669 This function creates a sliding window suitable for transformations like

670 sliding means / smoothing

671

672 >>> mean = lambda seq: float(sum(seq)) / len(seq)

673 >>> list(map(mean, sliding_window(2, [1, 2, 3, 4])))

674 [1.5, 2.5, 3.5]

675 """

676 return zip(*(collections.deque(itertools.islice(it, i), 0) or it

677 for i, it in enumerate(itertools.tee(seq, n))))

678

679

680no_pad = '__no__pad__'

681

682

683def partition(n, seq, pad=no_pad):

684 """ Partition sequence into tuples of length n

685

686 >>> list(partition(2, [1, 2, 3, 4]))

687 [(1, 2), (3, 4)]

688

689 If the length of ``seq`` is not evenly divisible by ``n``, the final tuple

690 is dropped if ``pad`` is not specified, or filled to length ``n`` by pad:

691

692 >>> list(partition(2, [1, 2, 3, 4, 5]))

693 [(1, 2), (3, 4)]

694

695 >>> list(partition(2, [1, 2, 3, 4, 5], pad=None))

696 [(1, 2), (3, 4), (5, None)]

697

698 See Also:

699 partition_all

700 """

701 args = [iter(seq)] * n

702 if pad is no_pad:

703 return zip(*args)

704 else:

705 return zip_longest(*args, fillvalue=pad)

706

707

708def partition_all(n, seq):

709 """ Partition all elements of sequence into tuples of length at most n

710

711 The final tuple may be shorter to accommodate extra elements.

712

713 >>> list(partition_all(2, [1, 2, 3, 4]))

714 [(1, 2), (3, 4)]

715

716 >>> list(partition_all(2, [1, 2, 3, 4, 5]))

717 [(1, 2), (3, 4), (5,)]

718

719 See Also:

720 partition

721 """

722 args = [iter(seq)] * n

723 it = zip_longest(*args, fillvalue=no_pad)

724 try:

725 prev = next(it)

726 except StopIteration:

727 return

728 for item in it:

729 yield prev

730 prev = item

731 if prev[-1] is no_pad:

732 try:

733 # If seq defines __len__, then

734 # we can quickly calculate where no_pad starts

735 yield prev[:len(seq) % n]

736 except TypeError:

737 # Get first index of no_pad without using .index()

738 # https://github.com/pytoolz/toolz/issues/387

739 # Binary search from CPython's bisect module,

740 # modified for identity testing.

741 lo, hi = 0, n

742 while lo < hi:

743 mid = (lo + hi) // 2

744 if prev[mid] is no_pad:

745 hi = mid

746 else:

747 lo = mid + 1

748 yield prev[:lo]

749 else:

750 yield prev

751

752

753def count(seq):

754 """ Count the number of items in seq

755

756 Like the builtin ``len`` but works on lazy sequences.

757

758 Not to be confused with ``itertools.count``

759

760 See also:

761 len

762 """

763 if hasattr(seq, '__len__'):

764 return len(seq)

765 return sum(1 for i in seq)

766

767

768def pluck(ind, seqs, default=no_default):

769 """ plucks an element or several elements from each item in a sequence.

770

771 ``pluck`` maps ``itertoolz.get`` over a sequence and returns one or more

772 elements of each item in the sequence.

773

774 This is equivalent to running `map(curried.get(ind), seqs)`

775

776 ``ind`` can be either a single string/index or a list of strings/indices.

777 ``seqs`` should be sequence containing sequences or dicts.

778

779 e.g.

780

781 >>> data = [{'id': 1, 'name': 'Cheese'}, {'id': 2, 'name': 'Pies'}]

782 >>> list(pluck('name', data))

783 ['Cheese', 'Pies']

784 >>> list(pluck([0, 1], [[1, 2, 3], [4, 5, 7]]))

785 [(1, 2), (4, 5)]

786

787 See Also:

788 get

789 map

790 """

791 if default == no_default:

792 get = getter(ind)

793 return map(get, seqs)

794 elif isinstance(ind, list):

795 return (tuple(_get(item, seq, default) for item in ind)

796 for seq in seqs)

797 return (_get(ind, seq, default) for seq in seqs)

798

799

800def getter(index):

801 if isinstance(index, list):

802 if len(index) == 1:

803 index = index[0]

804 return lambda x: (x[index],)

805 elif index:

806 return operator.itemgetter(*index)

807 else:

808 return lambda x: ()

809 else:

810 return operator.itemgetter(index)

811

812

813def join(leftkey, leftseq, rightkey, rightseq,

814 left_default=no_default, right_default=no_default):

815 """ Join two sequences on common attributes

816

817 This is a semi-streaming operation. The LEFT sequence is fully evaluated

818 and placed into memory. The RIGHT sequence is evaluated lazily and so can

819 be arbitrarily large.

820 (Note: If right_default is defined, then unique keys of rightseq

821 will also be stored in memory.)

822

823 >>> friends = [('Alice', 'Edith'),

824 ... ('Alice', 'Zhao'),

825 ... ('Edith', 'Alice'),

826 ... ('Zhao', 'Alice'),

827 ... ('Zhao', 'Edith')]

828

829 >>> cities = [('Alice', 'NYC'),

830 ... ('Alice', 'Chicago'),

831 ... ('Dan', 'Syndey'),

832 ... ('Edith', 'Paris'),

833 ... ('Edith', 'Berlin'),

834 ... ('Zhao', 'Shanghai')]

835

836 >>> # Vacation opportunities

837 >>> # In what cities do people have friends?

838 >>> result = join(second, friends,

839 ... first, cities)

840 >>> for ((a, b), (c, d)) in sorted(unique(result)):

841 ... print((a, d))

842 ('Alice', 'Berlin')

843 ('Alice', 'Paris')

844 ('Alice', 'Shanghai')

845 ('Edith', 'Chicago')

846 ('Edith', 'NYC')

847 ('Zhao', 'Chicago')

848 ('Zhao', 'NYC')

849 ('Zhao', 'Berlin')

850 ('Zhao', 'Paris')

851

852 Specify outer joins with keyword arguments ``left_default`` and/or

853 ``right_default``. Here is a full outer join in which unmatched elements

854 are paired with None.

855

856 >>> identity = lambda x: x

857 >>> list(join(identity, [1, 2, 3],

858 ... identity, [2, 3, 4],

859 ... left_default=None, right_default=None))

860 [(2, 2), (3, 3), (None, 4), (1, None)]

861

862 Usually the key arguments are callables to be applied to the sequences. If

863 the keys are not obviously callable then it is assumed that indexing was

864 intended, e.g. the following is a legal change.

865 The join is implemented as a hash join and the keys of leftseq must be

866 hashable. Additionally, if right_default is defined, then keys of rightseq

867 must also be hashable.

868

869 >>> # result = join(second, friends, first, cities)

870 >>> result = join(1, friends, 0, cities) # doctest: +SKIP

871 """

872 if not callable(leftkey):

873 leftkey = getter(leftkey)

874 if not callable(rightkey):

875 rightkey = getter(rightkey)

876

877 d = groupby(leftkey, leftseq)

878

879 if left_default == no_default and right_default == no_default:

880 # Inner Join

881 for item in rightseq:

882 key = rightkey(item)

883 if key in d:

884 for left_match in d[key]:

885 yield (left_match, item)

886 elif left_default != no_default and right_default == no_default:

887 # Right Join

888 for item in rightseq:

889 key = rightkey(item)

890 if key in d:

891 for left_match in d[key]:

892 yield (left_match, item)

893 else:

894 yield (left_default, item)

895 elif right_default != no_default:

896 seen_keys = set()

897 seen = seen_keys.add

898

899 if left_default == no_default:

900 # Left Join

901 for item in rightseq:

902 key = rightkey(item)

903 seen(key)

904 if key in d:

905 for left_match in d[key]:

906 yield (left_match, item)

907 else:

908 # Full Join

909 for item in rightseq:

910 key = rightkey(item)

911 seen(key)

912 if key in d:

913 for left_match in d[key]:

914 yield (left_match, item)

915 else:

916 yield (left_default, item)

917

918 for key, matches in d.items():

919 if key not in seen_keys:

920 for match in matches:

921 yield (match, right_default)

922

923

924def diff(*seqs, **kwargs):

925 """ Return those items that differ between sequences

926

927 >>> list(diff([1, 2, 3], [1, 2, 10, 100]))

928 [(3, 10)]

929

930 Shorter sequences may be padded with a ``default`` value:

931

932 >>> list(diff([1, 2, 3], [1, 2, 10, 100], default=None))

933 [(3, 10), (None, 100)]

934

935 A ``key`` function may also be applied to each item to use during

936 comparisons:

937

938 >>> list(diff(['apples', 'bananas'], ['Apples', 'Oranges'], key=str.lower))

939 [('bananas', 'Oranges')]

940 """

941 N = len(seqs)

942 if N == 1 and isinstance(seqs[0], list):

943 seqs = seqs[0]

944 N = len(seqs)

945 if N < 2:

946 raise TypeError('Too few sequences given (min 2 required)')

947 default = kwargs.get('default', no_default)

948 if default == no_default:

949 iters = zip(*seqs)

950 else:

951 iters = zip_longest(*seqs, fillvalue=default)

952 key = kwargs.get('key', None)

953 if key is None:

954 for items in iters:

955 if items.count(items[0]) != N:

956 yield items

957 else:

958 for items in iters:

959 vals = tuple(map(key, items))

960 if vals.count(vals[0]) != N:

961 yield items

962

963

964def topk(k, seq, key=None):

965 """ Find the k largest elements of a sequence

966

967 Operates lazily in ``n*log(k)`` time

968

969 >>> topk(2, [1, 100, 10, 1000])

970 (1000, 100)

971

972 Use a key function to change sorted order

973

974 >>> topk(2, ['Alice', 'Bob', 'Charlie', 'Dan'], key=len)

975 ('Charlie', 'Alice')

976

977 See also:

978 heapq.nlargest

979 """

980 if key is not None and not callable(key):

981 key = getter(key)

982 return tuple(heapq.nlargest(k, seq, key=key))

983

984

985def peek(seq):

986 """ Retrieve the next element of a sequence

987

988 Returns the first element and an iterable equivalent to the original

989 sequence, still having the element retrieved.

990

991 >>> seq = [0, 1, 2, 3, 4]

992 >>> first, seq = peek(seq)

993 >>> first

994 0

995 >>> list(seq)

996 [0, 1, 2, 3, 4]

997 """

998 iterator = iter(seq)

999 item = next(iterator)

1000 return item, itertools.chain((item,), iterator)

1001

1002

1003def peekn(n, seq):

1004 """ Retrieve the next n elements of a sequence

1005

1006 Returns a tuple of the first n elements and an iterable equivalent

1007 to the original, still having the elements retrieved.

1008

1009 >>> seq = [0, 1, 2, 3, 4]

1010 >>> first_two, seq = peekn(2, seq)

1011 >>> first_two

1012 (0, 1)

1013 >>> list(seq)

1014 [0, 1, 2, 3, 4]

1015 """

1016 iterator = iter(seq)

1017 peeked = tuple(take(n, iterator))

1018 return peeked, itertools.chain(iter(peeked), iterator)

1019

1020

1021def random_sample(prob, seq, random_state=None):

1022 """ Return elements from a sequence with probability of prob

1023

1024 Returns a lazy iterator of random items from seq.

1025

1026 ``random_sample`` considers each item independently and without

1027 replacement. See below how the first time it returned 13 items and the

1028 next time it returned 6 items.

1029

1030 >>> seq = list(range(100))

1031 >>> list(random_sample(0.1, seq)) # doctest: +SKIP

1032 [6, 9, 19, 35, 45, 50, 58, 62, 68, 72, 78, 86, 95]

1033 >>> list(random_sample(0.1, seq)) # doctest: +SKIP

1034 [6, 44, 54, 61, 69, 94]

1035

1036 Providing an integer seed for ``random_state`` will result in

1037 deterministic sampling. Given the same seed it will return the same sample

1038 every time.

1039

1040 >>> list(random_sample(0.1, seq, random_state=2016))

1041 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]

1042 >>> list(random_sample(0.1, seq, random_state=2016))

1043 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]

1044

1045 ``random_state`` can also be any object with a method ``random`` that

1046 returns floats between 0.0 and 1.0 (exclusive).

1047

1048 >>> from random import Random

1049 >>> randobj = Random(2016)

1050 >>> list(random_sample(0.1, seq, random_state=randobj))

1051 [7, 9, 19, 25, 30, 32, 34, 48, 59, 60, 81, 98]

1052 """

1053 if not hasattr(random_state, 'random'):

1054 from random import Random

1055

1056 random_state = Random(random_state)

1057 return filter(lambda _: random_state.random() < prob, seq)