Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/boltons/iterutils.py: 25%

1# -*- coding: utf-8 -*-

5# Redistribution and use in source and binary forms, with or without

6# modification, are permitted provided that the following conditions are

7# met:

9# * Redistributions of source code must retain the above copyright

10# notice, this list of conditions and the following disclaimer.

11#

12# * Redistributions in binary form must reproduce the above

13# copyright notice, this list of conditions and the following

14# disclaimer in the documentation and/or other materials provided

15# with the distribution.

16#

17# * The names of the contributors may not be used to endorse or

18# promote products derived from this software without specific

19# prior written permission.

20#

21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

33""":mod:`itertools` is full of great examples of Python generator

34usage. However, there are still some critical gaps. ``iterutils``

35fills many of those gaps with featureful, tested, and Pythonic

36solutions.

38Many of the functions below have two versions, one which

39returns an iterator (denoted by the ``*_iter`` naming pattern), and a

40shorter-named convenience form that returns a list. Some of the

41following are based on examples in itertools docs.

42"""

44import os

45import math

46import time

47import codecs

48import random

49import itertools

51try:

52 from collections.abc import Mapping, Sequence, Set, ItemsView, Iterable

53except ImportError:

54 from collections import Mapping, Sequence, Set, ItemsView, Iterable

57try:

58 from .typeutils import make_sentinel

59 _UNSET = make_sentinel('_UNSET')

60 _REMAP_EXIT = make_sentinel('_REMAP_EXIT')

61except ImportError:

62 _REMAP_EXIT = object()

63 _UNSET = object()

65try:

66 from future_builtins import filter

67 from itertools import izip

68 _IS_PY3 = False

69except ImportError:

70 # Python 3 compat

71 _IS_PY3 = True

72 basestring = (str, bytes)

73 unicode = str

74 izip, xrange = zip, range

77def is_iterable(obj):

78 """Similar in nature to :func:`callable`, ``is_iterable`` returns

79 ``True`` if an object is `iterable`_, ``False`` if not.

81 >>> is_iterable([])

82 True

83 >>> is_iterable(object())

84 False

86 .. _iterable: https://docs.python.org/2/glossary.html#term-iterable

87 """

88 try:

89 iter(obj)

90 except TypeError:

91 return False

92 return True

95def is_scalar(obj):

96 """A near-mirror of :func:`is_iterable`. Returns ``False`` if an

97 object is an iterable container type. Strings are considered

98 scalar as well, because strings are more often treated as whole

99 values as opposed to iterables of 1-character substrings.

100

101 >>> is_scalar(object())

102 True

103 >>> is_scalar(range(10))

104 False

105 >>> is_scalar('hello')

106 True

107 """

108 return not is_iterable(obj) or isinstance(obj, basestring)

109

110

111def is_collection(obj):

112 """The opposite of :func:`is_scalar`. Returns ``True`` if an object

113 is an iterable other than a string.

114

115 >>> is_collection(object())

116 False

117 >>> is_collection(range(10))

118 True

119 >>> is_collection('hello')

120 False

121 """

122 return is_iterable(obj) and not isinstance(obj, basestring)

123

124

125def split(src, sep=None, maxsplit=None):

126 """Splits an iterable based on a separator. Like :meth:`str.split`,

127 but for all iterables. Returns a list of lists.

128

129 >>> split(['hi', 'hello', None, None, 'sup', None, 'soap', None])

130 [['hi', 'hello'], ['sup'], ['soap']]

131

132 See :func:`split_iter` docs for more info.

133 """

134 return list(split_iter(src, sep, maxsplit))

135

136

137def split_iter(src, sep=None, maxsplit=None):

138 """Splits an iterable based on a separator, *sep*, a max of

139 *maxsplit* times (no max by default). *sep* can be:

140

141 * a single value

142 * an iterable of separators

143 * a single-argument callable that returns True when a separator is

144 encountered

145

146 ``split_iter()`` yields lists of non-separator values. A separator will

147 never appear in the output.

148

149 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None]))

150 [['hi', 'hello'], ['sup'], ['soap']]

151

152 Note that ``split_iter`` is based on :func:`str.split`, so if

153 *sep* is ``None``, ``split()`` **groups** separators. If empty lists

154 are desired between two contiguous ``None`` values, simply use

155 ``sep=[None]``:

156

157 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None]))

158 [['hi', 'hello'], ['sup']]

159 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None], sep=[None]))

160 [['hi', 'hello'], [], ['sup'], []]

161

162 Using a callable separator:

163

164 >>> falsy_sep = lambda x: not x

165 >>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep))

166 [['hi', 'hello'], [], ['sup'], []]

167

168 See :func:`split` for a list-returning version.

169

170 """

171 if not is_iterable(src):

172 raise TypeError('expected an iterable')

173

174 if maxsplit is not None:

175 maxsplit = int(maxsplit)

176 if maxsplit == 0:

177 yield [src]

178 return

179

180 if callable(sep):

181 sep_func = sep

182 elif not is_scalar(sep):

183 sep = frozenset(sep)

184 sep_func = lambda x: x in sep

185 else:

186 sep_func = lambda x: x == sep

187

188 cur_group = []

189 split_count = 0

190 for s in src:

191 if maxsplit is not None and split_count >= maxsplit:

192 sep_func = lambda x: False

193 if sep_func(s):

194 if sep is None and not cur_group:

195 # If sep is none, str.split() "groups" separators

196 # check the str.split() docs for more info

197 continue

198 split_count += 1

199 yield cur_group

200 cur_group = []

201 else:

202 cur_group.append(s)

203

204 if cur_group or sep is not None:

205 yield cur_group

206 return

207

208

209def lstrip(iterable, strip_value=None):

210 """Strips values from the beginning of an iterable. Stripped items will

211 match the value of the argument strip_value. Functionality is analogous

212 to that of the method str.lstrip. Returns a list.

213

214 >>> lstrip(['Foo', 'Bar', 'Bam'], 'Foo')

215 ['Bar', 'Bam']

216

217 """

218 return list(lstrip_iter(iterable, strip_value))

219

220

221def lstrip_iter(iterable, strip_value=None):

222 """Strips values from the beginning of an iterable. Stripped items will

223 match the value of the argument strip_value. Functionality is analogous

224 to that of the method str.lstrip. Returns a generator.

225

226 >>> list(lstrip_iter(['Foo', 'Bar', 'Bam'], 'Foo'))

227 ['Bar', 'Bam']

228

229 """

230 iterator = iter(iterable)

231 for i in iterator:

232 if i != strip_value:

233 yield i

234 break

235 for i in iterator:

236 yield i

237

238

239def rstrip(iterable, strip_value=None):

240 """Strips values from the end of an iterable. Stripped items will

241 match the value of the argument strip_value. Functionality is analogous

242 to that of the method str.rstrip. Returns a list.

243

244 >>> rstrip(['Foo', 'Bar', 'Bam'], 'Bam')

245 ['Foo', 'Bar']

246

247 """

248 return list(rstrip_iter(iterable,strip_value))

249

250

251def rstrip_iter(iterable, strip_value=None):

252 """Strips values from the end of an iterable. Stripped items will

253 match the value of the argument strip_value. Functionality is analogous

254 to that of the method str.rstrip. Returns a generator.

255

256 >>> list(rstrip_iter(['Foo', 'Bar', 'Bam'], 'Bam'))

257 ['Foo', 'Bar']

258

259 """

260 iterator = iter(iterable)

261 for i in iterator:

262 if i == strip_value:

263 cache = list()

264 cache.append(i)

265 broken = False

266 for i in iterator:

267 if i == strip_value:

268 cache.append(i)

269 else:

270 broken = True

271 break

272 if not broken: # Return to caller here because the end of the

273 return # iterator has been reached

274 for t in cache:

275 yield t

276 yield i

277

278

279def strip(iterable, strip_value=None):

280 """Strips values from the beginning and end of an iterable. Stripped items

281 will match the value of the argument strip_value. Functionality is

282 analogous to that of the method str.strip. Returns a list.

283

284 >>> strip(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu')

285 ['Foo', 'Bar', 'Bam']

286

287 """

288 return list(strip_iter(iterable,strip_value))

289

290

291def strip_iter(iterable,strip_value=None):

292 """Strips values from the beginning and end of an iterable. Stripped items

293 will match the value of the argument strip_value. Functionality is

294 analogous to that of the method str.strip. Returns a generator.

295

296 >>> list(strip_iter(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu'))

297 ['Foo', 'Bar', 'Bam']

298

299 """

300 return rstrip_iter(lstrip_iter(iterable,strip_value),strip_value)

301

302

303def chunked(src, size, count=None, **kw):

304 """Returns a list of *count* chunks, each with *size* elements,

305 generated from iterable *src*. If *src* is not evenly divisible by

306 *size*, the final chunk will have fewer than *size* elements.

307 Provide the *fill* keyword argument to provide a pad value and

308 enable padding, otherwise no padding will take place.

309

310 >>> chunked(range(10), 3)

311 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]

312 >>> chunked(range(10), 3, fill=None)

313 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]

314 >>> chunked(range(10), 3, count=2)

315 [[0, 1, 2], [3, 4, 5]]

316

317 See :func:`chunked_iter` for more info.

318 """

319 chunk_iter = chunked_iter(src, size, **kw)

320 if count is None:

321 return list(chunk_iter)

322 else:

323 return list(itertools.islice(chunk_iter, count))

324

325

326def _validate_positive_int(value, name, strictly_positive=True):

327 value = int(value)

328 if value < 0 or (strictly_positive and value == 0):

329 raise ValueError('expected a positive integer ' + name)

330 return value

331

332

333def chunked_iter(src, size, **kw):

334 """Generates *size*-sized chunks from *src* iterable. Unless the

335 optional *fill* keyword argument is provided, iterables not evenly

336 divisible by *size* will have a final chunk that is smaller than

337 *size*.

338

339 >>> list(chunked_iter(range(10), 3))

340 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]

341 >>> list(chunked_iter(range(10), 3, fill=None))

342 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]

343

344 Note that ``fill=None`` in fact uses ``None`` as the fill value.

345 """

346 # TODO: add count kwarg?

347 if not is_iterable(src):

348 raise TypeError('expected an iterable')

349 size = _validate_positive_int(size, 'chunk size')

350 do_fill = True

351 try:

352 fill_val = kw.pop('fill')

353 except KeyError:

354 do_fill = False

355 fill_val = None

356 if kw:

357 raise ValueError('got unexpected keyword arguments: %r' % kw.keys())

358 if not src:

359 return

360 postprocess = lambda chk: chk

361 if isinstance(src, basestring):

362 postprocess = lambda chk, _sep=type(src)(): _sep.join(chk)

363 if _IS_PY3 and isinstance(src, bytes):

364 postprocess = lambda chk: bytes(chk)

365 src_iter = iter(src)

366 while True:

367 cur_chunk = list(itertools.islice(src_iter, size))

368 if not cur_chunk:

369 break

370 lc = len(cur_chunk)

371 if lc < size and do_fill:

372 cur_chunk[lc:] = [fill_val] * (size - lc)

373 yield postprocess(cur_chunk)

374 return

375

376

377def chunk_ranges(input_size, chunk_size, input_offset=0, overlap_size=0, align=False):

378 """Generates *chunk_size*-sized chunk ranges for an input with length *input_size*.

379 Optionally, a start of the input can be set via *input_offset*, and

380 and overlap between the chunks may be specified via *overlap_size*.

381 Also, if *align* is set to *True*, any items with *i % (chunk_size-overlap_size) == 0*

382 are always at the beginning of the chunk.

383

384 Returns an iterator of (start, end) tuples, one tuple per chunk.

385

386 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5))

387 [(10, 15), (15, 20)]

388 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=1))

389 [(10, 15), (14, 19), (18, 20)]

390 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=2))

391 [(10, 15), (13, 18), (16, 20)]

392

393 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=False))

394 [(4, 9), (9, 14), (14, 19)]

395 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=True))

396 [(4, 5), (5, 10), (10, 15), (15, 19)]

397

398 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=False))

399 [(2, 7), (6, 11), (10, 15), (14, 17)]

400 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=True))

401 [(2, 5), (4, 9), (8, 13), (12, 17)]

402 >>> list(chunk_ranges(input_offset=3, input_size=15, chunk_size=5, overlap_size=1, align=True))

403 [(3, 5), (4, 9), (8, 13), (12, 17), (16, 18)]

404 """

405 input_size = _validate_positive_int(input_size, 'input_size', strictly_positive=False)

406 chunk_size = _validate_positive_int(chunk_size, 'chunk_size')

407 input_offset = _validate_positive_int(input_offset, 'input_offset', strictly_positive=False)

408 overlap_size = _validate_positive_int(overlap_size, 'overlap_size', strictly_positive=False)

409

410 input_stop = input_offset + input_size

411

412 if align:

413 initial_chunk_len = chunk_size - input_offset % (chunk_size - overlap_size)

414 if initial_chunk_len != overlap_size:

415 yield (input_offset, min(input_offset + initial_chunk_len, input_stop))

416 if input_offset + initial_chunk_len >= input_stop:

417 return

418 input_offset = input_offset + initial_chunk_len - overlap_size

419

420 for i in range(input_offset, input_stop, chunk_size - overlap_size):

421 yield (i, min(i + chunk_size, input_stop))

422

423 if i + chunk_size >= input_stop:

424 return

425

426

427def pairwise(src):

428 """Convenience function for calling :func:`windowed` on *src*, with

429 *size* set to 2.

430

431 >>> pairwise(range(5))

432 [(0, 1), (1, 2), (2, 3), (3, 4)]

433 >>> pairwise([])

434 []

435

436 The number of pairs is always one less than the number of elements

437 in the iterable passed in, except on empty inputs, which returns

438 an empty list.

439 """

440 return windowed(src, 2)

441

442

443def pairwise_iter(src):

444 """Convenience function for calling :func:`windowed_iter` on *src*,

445 with *size* set to 2.

446

447 >>> list(pairwise_iter(range(5)))

448 [(0, 1), (1, 2), (2, 3), (3, 4)]

449 >>> list(pairwise_iter([]))

450 []

451

452 The number of pairs is always one less than the number of elements

453 in the iterable passed in, or zero, when *src* is empty.

454

455 """

456 return windowed_iter(src, 2)

457

458

459def windowed(src, size):

460 """Returns tuples with exactly length *size*. If the iterable is

461 too short to make a window of length *size*, no tuples are

462 returned. See :func:`windowed_iter` for more.

463 """

464 return list(windowed_iter(src, size))

465

466

467def windowed_iter(src, size):

468 """Returns tuples with length *size* which represent a sliding

469 window over iterable *src*.

470

471 >>> list(windowed_iter(range(7), 3))

472 [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)]

473

474 If the iterable is too short to make a window of length *size*,

475 then no window tuples are returned.

476

477 >>> list(windowed_iter(range(3), 5))

478 []

479 """

480 # TODO: lists? (for consistency)

481 tees = itertools.tee(src, size)

482 try:

483 for i, t in enumerate(tees):

484 for _ in xrange(i):

485 next(t)

486 except StopIteration:

487 return izip([])

488 return izip(*tees)

489

490

491def xfrange(stop, start=None, step=1.0):

492 """Same as :func:`frange`, but generator-based instead of returning a

493 list.

494

495 >>> tuple(xfrange(1, 3, step=0.75))

496 (1.0, 1.75, 2.5)

497

498 See :func:`frange` for more details.

499 """

500 if not step:

501 raise ValueError('step must be non-zero')

502 if start is None:

503 start, stop = 0.0, stop * 1.0

504 else:

505 # swap when all args are used

506 stop, start = start * 1.0, stop * 1.0

507 cur = start

508 while cur < stop:

509 yield cur

510 cur += step

511

512

513def frange(stop, start=None, step=1.0):

514 """A :func:`range` clone for float-based ranges.

515

516 >>> frange(5)

517 [0.0, 1.0, 2.0, 3.0, 4.0]

518 >>> frange(6, step=1.25)

519 [0.0, 1.25, 2.5, 3.75, 5.0]

520 >>> frange(100.5, 101.5, 0.25)

521 [100.5, 100.75, 101.0, 101.25]

522 >>> frange(5, 0)

523 []

524 >>> frange(5, 0, step=-1.25)

525 [5.0, 3.75, 2.5, 1.25]

526 """

527 if not step:

528 raise ValueError('step must be non-zero')

529 if start is None:

530 start, stop = 0.0, stop * 1.0

531 else:

532 # swap when all args are used

533 stop, start = start * 1.0, stop * 1.0

534 count = int(math.ceil((stop - start) / step))

535 ret = [None] * count

536 if not ret:

537 return ret

538 ret[0] = start

539 for i in xrange(1, count):

540 ret[i] = ret[i - 1] + step

541 return ret

542

543

544def backoff(start, stop, count=None, factor=2.0, jitter=False):

545 """Returns a list of geometrically-increasing floating-point numbers,

546 suitable for usage with `exponential backoff`_. Exactly like

547 :func:`backoff_iter`, but without the ``'repeat'`` option for

548 *count*. See :func:`backoff_iter` for more details.

549

550 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff

551

552 >>> backoff(1, 10)

553 [1.0, 2.0, 4.0, 8.0, 10.0]

554 """

555 if count == 'repeat':

556 raise ValueError("'repeat' supported in backoff_iter, not backoff")

557 return list(backoff_iter(start, stop, count=count,

558 factor=factor, jitter=jitter))

559

560

561def backoff_iter(start, stop, count=None, factor=2.0, jitter=False):

562 """Generates a sequence of geometrically-increasing floats, suitable

563 for usage with `exponential backoff`_. Starts with *start*,

564 increasing by *factor* until *stop* is reached, optionally

565 stopping iteration once *count* numbers are yielded. *factor*

566 defaults to 2. In general retrying with properly-configured

567 backoff creates a better-behaved component for a larger service

568 ecosystem.

569

570 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff

571

572 >>> list(backoff_iter(1.0, 10.0, count=5))

573 [1.0, 2.0, 4.0, 8.0, 10.0]

574 >>> list(backoff_iter(1.0, 10.0, count=8))

575 [1.0, 2.0, 4.0, 8.0, 10.0, 10.0, 10.0, 10.0]

576 >>> list(backoff_iter(0.25, 100.0, factor=10))

577 [0.25, 2.5, 25.0, 100.0]

578

579 A simplified usage example:

580

581 .. code-block:: python

582

583 for timeout in backoff_iter(0.25, 5.0):

584 try:

585 res = network_call()

586 break

587 except Exception as e:

588 log(e)

589 time.sleep(timeout)

590

591 An enhancement for large-scale systems would be to add variation,

592 or *jitter*, to timeout values. This is done to avoid a thundering

593 herd on the receiving end of the network call.

594

595 Finally, for *count*, the special value ``'repeat'`` can be passed to

596 continue yielding indefinitely.

597

598 Args:

599

600 start (float): Positive number for baseline.

601 stop (float): Positive number for maximum.

602 count (int): Number of steps before stopping

603 iteration. Defaults to the number of steps between *start* and

604 *stop*. Pass the string, `'repeat'`, to continue iteration

605 indefinitely.

606 factor (float): Rate of exponential increase. Defaults to `2.0`,

607 e.g., `[1, 2, 4, 8, 16]`.

608 jitter (float): A factor between `-1.0` and `1.0`, used to

609 uniformly randomize and thus spread out timeouts in a distributed

610 system, avoiding rhythm effects. Positive values use the base

611 backoff curve as a maximum, negative values use the curve as a

612 minimum. Set to 1.0 or `True` for a jitter approximating

613 Ethernet's time-tested backoff solution. Defaults to `False`.

614

615 """

616 start = float(start)

617 stop = float(stop)

618 factor = float(factor)

619 if start < 0.0:

620 raise ValueError('expected start >= 0, not %r' % start)

621 if factor < 1.0:

622 raise ValueError('expected factor >= 1.0, not %r' % factor)

623 if stop == 0.0:

624 raise ValueError('expected stop >= 0')

625 if stop < start:

626 raise ValueError('expected stop >= start, not %r' % stop)

627 if count is None:

628 denom = start if start else 1

629 count = 1 + math.ceil(math.log(stop/denom, factor))

630 count = count if start else count + 1

631 if count != 'repeat' and count < 0:

632 raise ValueError('count must be positive or "repeat", not %r' % count)

633 if jitter:

634 jitter = float(jitter)

635 if not (-1.0 <= jitter <= 1.0):

636 raise ValueError('expected jitter -1 <= j <= 1, not: %r' % jitter)

637

638 cur, i = start, 0

639 while count == 'repeat' or i < count:

640 if not jitter:

641 cur_ret = cur

642 elif jitter:

643 cur_ret = cur - (cur * jitter * random.random())

644 yield cur_ret

645 i += 1

646 if cur == 0:

647 cur = 1

648 elif cur < stop:

649 cur *= factor

650 if cur > stop:

651 cur = stop

652 return

653

654

655def bucketize(src, key=bool, value_transform=None, key_filter=None):

656 """Group values in the *src* iterable by the value returned by *key*.

657

658 >>> bucketize(range(5))

659 {False: [0], True: [1, 2, 3, 4]}

660 >>> is_odd = lambda x: x % 2 == 1

661 >>> bucketize(range(5), is_odd)

662 {False: [0, 2, 4], True: [1, 3]}

663

664 *key* is :class:`bool` by default, but can either be a callable or a string or a list

665 if it is a string, it is the name of the attribute on which to bucketize objects.

666

667 >>> bucketize([1+1j, 2+2j, 1, 2], key='real')

668 {1.0: [(1+1j), 1], 2.0: [(2+2j), 2]}

669

670 if *key* is a list, it contains the buckets where to put each object

671

672 >>> bucketize([1,2,365,4,98],key=[0,1,2,0,2])

673 {0: [1, 4], 1: [2], 2: [365, 98]}

674

675

676 Value lists are not deduplicated:

677

678 >>> bucketize([None, None, None, 'hello'])

679 {False: [None, None, None], True: ['hello']}

680

681 Bucketize into more than 3 groups

682

683 >>> bucketize(range(10), lambda x: x % 3)

684 {0: [0, 3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]}

685

686 ``bucketize`` has a couple of advanced options useful in certain

687 cases. *value_transform* can be used to modify values as they are

688 added to buckets, and *key_filter* will allow excluding certain

689 buckets from being collected.

690

691 >>> bucketize(range(5), value_transform=lambda x: x*x)

692 {False: [0], True: [1, 4, 9, 16]}

693

694 >>> bucketize(range(10), key=lambda x: x % 3, key_filter=lambda k: k % 3 != 1)

695 {0: [0, 3, 6, 9], 2: [2, 5, 8]}

696

697 Note in some of these examples there were at most two keys, ``True`` and

698 ``False``, and each key present has a list with at least one

699 item. See :func:`partition` for a version specialized for binary

700 use cases.

701

702 """

703 if not is_iterable(src):

704 raise TypeError('expected an iterable')

705 elif isinstance(key, list):

706 if len(key) != len(src):

707 raise ValueError("key and src have to be the same length")

708 src = zip(key, src)

709

710 if isinstance(key, basestring):

711 key_func = lambda x: getattr(x, key, x)

712 elif callable(key):

713 key_func = key

714 elif isinstance(key, list):

715 key_func = lambda x: x[0]

716 else:

717 raise TypeError('expected key to be callable or a string or a list')

718

719 if value_transform is None:

720 value_transform = lambda x: x

721 if not callable(value_transform):

722 raise TypeError('expected callable value transform function')

723 if isinstance(key, list):

724 f = value_transform

725 value_transform=lambda x: f(x[1])

726

727 ret = {}

728 for val in src:

729 key_of_val = key_func(val)

730 if key_filter is None or key_filter(key_of_val):

731 ret.setdefault(key_of_val, []).append(value_transform(val))

732 return ret

733

734

735def partition(src, key=bool):

736 """No relation to :meth:`str.partition`, ``partition`` is like

737 :func:`bucketize`, but for added convenience returns a tuple of

738 ``(truthy_values, falsy_values)``.

739

740 >>> nonempty, empty = partition(['', '', 'hi', '', 'bye'])

741 >>> nonempty

742 ['hi', 'bye']

743

744 *key* defaults to :class:`bool`, but can be carefully overridden to

745 use either a function that returns either ``True`` or ``False`` or

746 a string name of the attribute on which to partition objects.

747

748 >>> import string

749 >>> is_digit = lambda x: x in string.digits

750 >>> decimal_digits, hexletters = partition(string.hexdigits, is_digit)

751 >>> ''.join(decimal_digits), ''.join(hexletters)

752 ('0123456789', 'abcdefABCDEF')

753 """

754 bucketized = bucketize(src, key)

755 return bucketized.get(True, []), bucketized.get(False, [])

756

757

758def unique(src, key=None):

759 """``unique()`` returns a list of unique values, as determined by

760 *key*, in the order they first appeared in the input iterable,

761 *src*.

762

763 >>> ones_n_zeros = '11010110001010010101010'

764 >>> ''.join(unique(ones_n_zeros))

765 '10'

766

767 See :func:`unique_iter` docs for more details.

768 """

769 return list(unique_iter(src, key))

770

771

772def unique_iter(src, key=None):

773 """Yield unique elements from the iterable, *src*, based on *key*,

774 in the order in which they first appeared in *src*.

775

776 >>> repetitious = [1, 2, 3] * 10

777 >>> list(unique_iter(repetitious))

778 [1, 2, 3]

779

780 By default, *key* is the object itself, but *key* can either be a

781 callable or, for convenience, a string name of the attribute on

782 which to uniqueify objects, falling back on identity when the

783 attribute is not present.

784

785 >>> pleasantries = ['hi', 'hello', 'ok', 'bye', 'yes']

786 >>> list(unique_iter(pleasantries, key=lambda x: len(x)))

787 ['hi', 'hello', 'bye']

788 """

789 if not is_iterable(src):

790 raise TypeError('expected an iterable, not %r' % type(src))

791 if key is None:

792 key_func = lambda x: x

793 elif callable(key):

794 key_func = key

795 elif isinstance(key, basestring):

796 key_func = lambda x: getattr(x, key, x)

797 else:

798 raise TypeError('"key" expected a string or callable, not %r' % key)

799 seen = set()

800 for i in src:

801 k = key_func(i)

802 if k not in seen:

803 seen.add(k)

804 yield i

805 return

806

807

808def redundant(src, key=None, groups=False):

809 """The complement of :func:`unique()`.

810

811 By default returns non-unique/duplicate values as a list of the

812 *first* redundant value in *src*. Pass ``groups=True`` to get

813 groups of all values with redundancies, ordered by position of the

814 first redundant value. This is useful in conjunction with some

815 normalizing *key* function.

816

817 >>> redundant([1, 2, 3, 4])

818 []

819 >>> redundant([1, 2, 3, 2, 3, 3, 4])

820 [2, 3]

821 >>> redundant([1, 2, 3, 2, 3, 3, 4], groups=True)

822 [[2, 2], [3, 3, 3]]

823

824 An example using a *key* function to do case-insensitive

825 redundancy detection.

826

827 >>> redundant(['hi', 'Hi', 'HI', 'hello'], key=str.lower)

828 ['Hi']

829 >>> redundant(['hi', 'Hi', 'HI', 'hello'], groups=True, key=str.lower)

830 [['hi', 'Hi', 'HI']]

831

832 *key* should also be used when the values in *src* are not hashable.

833

834 .. note::

835

836 This output of this function is designed for reporting

837 duplicates in contexts when a unique input is desired. Due to

838 the grouped return type, there is no streaming equivalent of

839 this function for the time being.

840

841 """

842 if key is None:

843 pass

844 elif callable(key):

845 key_func = key

846 elif isinstance(key, basestring):

847 key_func = lambda x: getattr(x, key, x)

848 else:

849 raise TypeError('"key" expected a string or callable, not %r' % key)

850 seen = {} # key to first seen item

851 redundant_order = []

852 redundant_groups = {}

853 for i in src:

854 k = key_func(i) if key else i

855 if k not in seen:

856 seen[k] = i

857 else:

858 if k in redundant_groups:

859 if groups:

860 redundant_groups[k].append(i)

861 else:

862 redundant_order.append(k)

863 redundant_groups[k] = [seen[k], i]

864 if not groups:

865 ret = [redundant_groups[k][1] for k in redundant_order]

866 else:

867 ret = [redundant_groups[k] for k in redundant_order]

868 return ret

869

870

871def one(src, default=None, key=None):

872 """Along the same lines as builtins, :func:`all` and :func:`any`, and

873 similar to :func:`first`, ``one()`` returns the single object in

874 the given iterable *src* that evaluates to ``True``, as determined

875 by callable *key*. If unset, *key* defaults to :class:`bool`. If

876 no such objects are found, *default* is returned. If *default* is

877 not passed, ``None`` is returned.

878

879 If *src* has more than one object that evaluates to ``True``, or

880 if there is no object that fulfills such condition, return

881 *default*. It's like an `XOR`_ over an iterable.

882

883 >>> one((True, False, False))

884 True

885 >>> one((True, False, True))

886 >>> one((0, 0, 'a'))

887 'a'

888 >>> one((0, False, None))

889 >>> one((True, True), default=False)

890 False

891 >>> bool(one(('', 1)))

892 True

893 >>> one((10, 20, 30, 42), key=lambda i: i > 40)

894 42

895

896 See `Martín Gaitán's original repo`_ for further use cases.

897

898 .. _Martín Gaitán's original repo: https://github.com/mgaitan/one

899 .. _XOR: https://en.wikipedia.org/wiki/Exclusive_or

900

901 """

902 ones = list(itertools.islice(filter(key, src), 2))

903 return ones[0] if len(ones) == 1 else default

904

905

906def first(iterable, default=None, key=None):

907 """Return first element of *iterable* that evaluates to ``True``, else

908 return ``None`` or optional *default*. Similar to :func:`one`.

909

910 >>> first([0, False, None, [], (), 42])

911 42

912 >>> first([0, False, None, [], ()]) is None

913 True

914 >>> first([0, False, None, [], ()], default='ohai')

915 'ohai'

916 >>> import re

917 >>> m = first(re.match(regex, 'abc') for regex in ['b.*', 'a(.*)'])

918 >>> m.group(1)

919 'bc'

920

921 The optional *key* argument specifies a one-argument predicate function

922 like that used for *filter()*. The *key* argument, if supplied, should be

923 in keyword form. For example, finding the first even number in an iterable:

924

925 >>> first([1, 1, 3, 4, 5], key=lambda x: x % 2 == 0)

926 4

927

928 Contributed by Hynek Schlawack, author of `the original standalone module`_.

929

930 .. _the original standalone module: https://github.com/hynek/first

931 """

932 return next(filter(key, iterable), default)

933

934

935def flatten_iter(iterable):

936 """``flatten_iter()`` yields all the elements from *iterable* while

937 collapsing any nested iterables.

938

939 >>> nested = [[1, 2], [[3], [4, 5]]]

940 >>> list(flatten_iter(nested))

941 [1, 2, 3, 4, 5]

942 """

943 for item in iterable:

944 if isinstance(item, Iterable) and not isinstance(item, basestring):

945 for subitem in flatten_iter(item):

946 yield subitem

947 else:

948 yield item

949

950def flatten(iterable):

951 """``flatten()`` returns a collapsed list of all the elements from

952 *iterable* while collapsing any nested iterables.

953

954 >>> nested = [[1, 2], [[3], [4, 5]]]

955 >>> flatten(nested)

956 [1, 2, 3, 4, 5]

957 """

958 return list(flatten_iter(iterable))

959

960

961def same(iterable, ref=_UNSET):

962 """``same()`` returns ``True`` when all values in *iterable* are

963 equal to one another, or optionally a reference value,

964 *ref*. Similar to :func:`all` and :func:`any` in that it evaluates

965 an iterable and returns a :class:`bool`. ``same()`` returns

966 ``True`` for empty iterables.

967

968 >>> same([])

969 True

970 >>> same([1])

971 True

972 >>> same(['a', 'a', 'a'])

973 True

974 >>> same(range(20))

975 False

976 >>> same([[], []])

977 True

978 >>> same([[], []], ref='test')

979 False

980

981 """

982 iterator = iter(iterable)

983 if ref is _UNSET:

984 ref = next(iterator, ref)

985 return all(val == ref for val in iterator)

986

987

988def default_visit(path, key, value):

989 # print('visit(%r, %r, %r)' % (path, key, value))

990 return key, value

991

992# enable the extreme: monkeypatching iterutils with a different default_visit

993_orig_default_visit = default_visit

994

995

996def default_enter(path, key, value):

997 # print('enter(%r, %r)' % (key, value))

998 if isinstance(value, basestring):

999 return value, False

1000 elif isinstance(value, Mapping):

1001 return value.__class__(), ItemsView(value)

1002 elif isinstance(value, Sequence):

1003 return value.__class__(), enumerate(value)

1004 elif isinstance(value, Set):

1005 return value.__class__(), enumerate(value)

1006 else:

1007 # files, strings, other iterables, and scalars are not

1008 # traversed

1009 return value, False

1010

1011

1012def default_exit(path, key, old_parent, new_parent, new_items):

1013 # print('exit(%r, %r, %r, %r, %r)'

1014 # % (path, key, old_parent, new_parent, new_items))

1015 ret = new_parent

1016 if isinstance(new_parent, Mapping):

1017 new_parent.update(new_items)

1018 elif isinstance(new_parent, Sequence):

1019 vals = [v for i, v in new_items]

1020 try:

1021 new_parent.extend(vals)

1022 except AttributeError:

1023 ret = new_parent.__class__(vals) # tuples

1024 elif isinstance(new_parent, Set):

1025 vals = [v for i, v in new_items]

1026 try:

1027 new_parent.update(vals)

1028 except AttributeError:

1029 ret = new_parent.__class__(vals) # frozensets

1030 else:

1031 raise RuntimeError('unexpected iterable type: %r' % type(new_parent))

1032 return ret

1033

1034

1035def remap(root, visit=default_visit, enter=default_enter, exit=default_exit,

1036 **kwargs):

1037 """The remap ("recursive map") function is used to traverse and

1038 transform nested structures. Lists, tuples, sets, and dictionaries

1039 are just a few of the data structures nested into heterogeneous

1040 tree-like structures that are so common in programming.

1041 Unfortunately, Python's built-in ways to manipulate collections

1042 are almost all flat. List comprehensions may be fast and succinct,

1043 but they do not recurse, making it tedious to apply quick changes

1044 or complex transforms to real-world data.

1045

1046 remap goes where list comprehensions cannot.

1047

1048 Here's an example of removing all Nones from some data:

1049

1050 >>> from pprint import pprint

1051 >>> reviews = {'Star Trek': {'TNG': 10, 'DS9': 8.5, 'ENT': None},

1052 ... 'Babylon 5': 6, 'Dr. Who': None}

1053 >>> pprint(remap(reviews, lambda p, k, v: v is not None))

1054 {'Babylon 5': 6, 'Star Trek': {'DS9': 8.5, 'TNG': 10}}

1055

1056 Notice how both Nones have been removed despite the nesting in the

1057 dictionary. Not bad for a one-liner, and that's just the beginning.

1058 See `this remap cookbook`_ for more delicious recipes.

1059

1060 .. _this remap cookbook: http://sedimental.org/remap.html

1061

1062 remap takes four main arguments: the object to traverse and three

1063 optional callables which determine how the remapped object will be

1064 created.

1065

1066 Args:

1067

1068 root: The target object to traverse. By default, remap

1069 supports iterables like :class:`list`, :class:`tuple`,

1070 :class:`dict`, and :class:`set`, but any object traversable by

1071 *enter* will work.

1072 visit (callable): This function is called on every item in

1073 *root*. It must accept three positional arguments, *path*,

1074 *key*, and *value*. *path* is simply a tuple of parents'

1075 keys. *visit* should return the new key-value pair. It may

1076 also return ``True`` as shorthand to keep the old item

1077 unmodified, or ``False`` to drop the item from the new

1078 structure. *visit* is called after *enter*, on the new parent.

1079

1080 The *visit* function is called for every item in root,

1081 including duplicate items. For traversable values, it is

1082 called on the new parent object, after all its children

1083 have been visited. The default visit behavior simply

1084 returns the key-value pair unmodified.

1085 enter (callable): This function controls which items in *root*

1086 are traversed. It accepts the same arguments as *visit*: the

1087 path, the key, and the value of the current item. It returns a

1088 pair of the blank new parent, and an iterator over the items

1089 which should be visited. If ``False`` is returned instead of

1090 an iterator, the value will not be traversed.

1091

1092 The *enter* function is only called once per unique value. The

1093 default enter behavior support mappings, sequences, and

1094 sets. Strings and all other iterables will not be traversed.

1095 exit (callable): This function determines how to handle items

1096 once they have been visited. It gets the same three

1097 arguments as the other functions -- *path*, *key*, *value*

1098 -- plus two more: the blank new parent object returned

1099 from *enter*, and a list of the new items, as remapped by

1100 *visit*.

1101

1102 Like *enter*, the *exit* function is only called once per

1103 unique value. The default exit behavior is to simply add

1104 all new items to the new parent, e.g., using

1105 :meth:`list.extend` and :meth:`dict.update` to add to the

1106 new parent. Immutable objects, such as a :class:`tuple` or

1107 :class:`namedtuple`, must be recreated from scratch, but

1108 use the same type as the new parent passed back from the

1109 *enter* function.

1110 reraise_visit (bool): A pragmatic convenience for the *visit*

1111 callable. When set to ``False``, remap ignores any errors

1112 raised by the *visit* callback. Items causing exceptions

1113 are kept. See examples for more details.

1114

1115 remap is designed to cover the majority of cases with just the

1116 *visit* callable. While passing in multiple callables is very

1117 empowering, remap is designed so very few cases should require

1118 passing more than one function.

1119

1120 When passing *enter* and *exit*, it's common and easiest to build

1121 on the default behavior. Simply add ``from boltons.iterutils import

1122 default_enter`` (or ``default_exit``), and have your enter/exit

1123 function call the default behavior before or after your custom

1124 logic. See `this example`_.

1125

1126 Duplicate and self-referential objects (aka reference loops) are

1127 automatically handled internally, `as shown here`_.

1128

1129 .. _this example: http://sedimental.org/remap.html#sort_all_lists

1130 .. _as shown here: http://sedimental.org/remap.html#corner_cases

1131

1132 """

1133 # TODO: improve argument formatting in sphinx doc

1134 # TODO: enter() return (False, items) to continue traverse but cancel copy?

1135 if not callable(visit):

1136 raise TypeError('visit expected callable, not: %r' % visit)

1137 if not callable(enter):

1138 raise TypeError('enter expected callable, not: %r' % enter)

1139 if not callable(exit):

1140 raise TypeError('exit expected callable, not: %r' % exit)

1141 reraise_visit = kwargs.pop('reraise_visit', True)

1142 if kwargs:

1143 raise TypeError('unexpected keyword arguments: %r' % kwargs.keys())

1144

1145 path, registry, stack = (), {}, [(None, root)]

1146 new_items_stack = []

1147 while stack:

1148 key, value = stack.pop()

1149 id_value = id(value)

1150 if key is _REMAP_EXIT:

1151 key, new_parent, old_parent = value

1152 id_value = id(old_parent)

1153 path, new_items = new_items_stack.pop()

1154 value = exit(path, key, old_parent, new_parent, new_items)

1155 registry[id_value] = value

1156 if not new_items_stack:

1157 continue

1158 elif id_value in registry:

1159 value = registry[id_value]

1160 else:

1161 res = enter(path, key, value)

1162 try:

1163 new_parent, new_items = res

1164 except TypeError:

1165 # TODO: handle False?

1166 raise TypeError('enter should return a tuple of (new_parent,'

1167 ' items_iterator), not: %r' % res)

1168 if new_items is not False:

1169 # traverse unless False is explicitly passed

1170 registry[id_value] = new_parent

1171 new_items_stack.append((path, []))

1172 if value is not root:

1173 path += (key,)

1174 stack.append((_REMAP_EXIT, (key, new_parent, value)))

1175 if new_items:

1176 stack.extend(reversed(list(new_items)))

1177 continue

1178 if visit is _orig_default_visit:

1179 # avoid function call overhead by inlining identity operation

1180 visited_item = (key, value)

1181 else:

1182 try:

1183 visited_item = visit(path, key, value)

1184 except Exception:

1185 if reraise_visit:

1186 raise

1187 visited_item = True

1188 if visited_item is False:

1189 continue # drop

1190 elif visited_item is True:

1191 visited_item = (key, value)

1192 # TODO: typecheck?

1193 # raise TypeError('expected (key, value) from visit(),'

1194 # ' not: %r' % visited_item)

1195 try:

1196 new_items_stack[-1][1].append(visited_item)

1197 except IndexError:

1198 raise TypeError('expected remappable root, not: %r' % root)

1199 return value

1200

1201

1202class PathAccessError(KeyError, IndexError, TypeError):

1203 """An amalgamation of KeyError, IndexError, and TypeError,

1204 representing what can occur when looking up a path in a nested

1205 object.

1206 """

1207 def __init__(self, exc, seg, path):

1208 self.exc = exc

1209 self.seg = seg

1210 self.path = path

1211

1212 def __repr__(self):

1213 cn = self.__class__.__name__

1214 return '%s(%r, %r, %r)' % (cn, self.exc, self.seg, self.path)

1215

1216 def __str__(self):

1217 return ('could not access %r from path %r, got error: %r'

1218 % (self.seg, self.path, self.exc))

1219

1220

1221def get_path(root, path, default=_UNSET):

1222 """Retrieve a value from a nested object via a tuple representing the

1223 lookup path.

1224

1225 >>> root = {'a': {'b': {'c': [[1], [2], [3]]}}}

1226 >>> get_path(root, ('a', 'b', 'c', 2, 0))

1227 3

1228

1229 The path format is intentionally consistent with that of

1230 :func:`remap`.

1231

1232 One of get_path's chief aims is improved error messaging. EAFP is

1233 great, but the error messages are not.

1234

1235 For instance, ``root['a']['b']['c'][2][1]`` gives back

1236 ``IndexError: list index out of range``

1237

1238 What went out of range where? get_path currently raises

1239 ``PathAccessError: could not access 2 from path ('a', 'b', 'c', 2,

1240 1), got error: IndexError('list index out of range',)``, a

1241 subclass of IndexError and KeyError.

1242

1243 You can also pass a default that covers the entire operation,

1244 should the lookup fail at any level.

1245

1246 Args:

1247 root: The target nesting of dictionaries, lists, or other

1248 objects supporting ``__getitem__``.

1249 path (tuple): A list of strings and integers to be successively

1250 looked up within *root*.

1251 default: The value to be returned should any

1252 ``PathAccessError`` exceptions be raised.

1253 """

1254 if isinstance(path, basestring):

1255 path = path.split('.')

1256 cur = root

1257 try:

1258 for seg in path:

1259 try:

1260 cur = cur[seg]

1261 except (KeyError, IndexError) as exc:

1262 raise PathAccessError(exc, seg, path)

1263 except TypeError as exc:

1264 # either string index in a list, or a parent that

1265 # doesn't support indexing

1266 try:

1267 seg = int(seg)

1268 cur = cur[seg]

1269 except (ValueError, KeyError, IndexError, TypeError):

1270 if not is_iterable(cur):

1271 exc = TypeError('%r object is not indexable'

1272 % type(cur).__name__)

1273 raise PathAccessError(exc, seg, path)

1274 except PathAccessError:

1275 if default is _UNSET:

1276 raise

1277 return default

1278 return cur

1279

1280

1281def research(root, query=lambda p, k, v: True, reraise=False):

1282 """The :func:`research` function uses :func:`remap` to recurse over

1283 any data nested in *root*, and find values which match a given

1284 criterion, specified by the *query* callable.

1285

1286 Results are returned as a list of ``(path, value)`` pairs. The

1287 paths are tuples in the same format accepted by

1288 :func:`get_path`. This can be useful for comparing values nested

1289 in two or more different structures.

1290

1291 Here's a simple example that finds all integers:

1292

1293 >>> root = {'a': {'b': 1, 'c': (2, 'd', 3)}, 'e': None}

1294 >>> res = research(root, query=lambda p, k, v: isinstance(v, int))

1295 >>> print(sorted(res))

1296 [(('a', 'b'), 1), (('a', 'c', 0), 2), (('a', 'c', 2), 3)]

1297

1298 Note how *query* follows the same, familiar ``path, key, value``

1299 signature as the ``visit`` and ``enter`` functions on

1300 :func:`remap`, and returns a :class:`bool`.

1301

1302 Args:

1303 root: The target object to search. Supports the same types of

1304 objects as :func:`remap`, including :class:`list`,

1305 :class:`tuple`, :class:`dict`, and :class:`set`.

1306 query (callable): The function called on every object to

1307 determine whether to include it in the search results. The

1308 callable must accept three arguments, *path*, *key*, and

1309 *value*, commonly abbreviated *p*, *k*, and *v*, same as

1310 *enter* and *visit* from :func:`remap`.

1311 reraise (bool): Whether to reraise exceptions raised by *query*

1312 or to simply drop the result that caused the error.

1313

1314

1315 With :func:`research` it's easy to inspect the details of a data

1316 structure, like finding values that are at a certain depth (using

1317 ``len(p)``) and much more. If more advanced functionality is

1318 needed, check out the code and make your own :func:`remap`

1319 wrapper, and consider `submitting a patch`_!

1320

1321 .. _submitting a patch: https://github.com/mahmoud/boltons/pulls

1322 """

1323 ret = []

1324

1325 if not callable(query):

1326 raise TypeError('query expected callable, not: %r' % query)

1327

1328 def enter(path, key, value):

1329 try:

1330 if query(path, key, value):

1331 ret.append((path + (key,), value))

1332 except Exception:

1333 if reraise:

1334 raise

1335 return default_enter(path, key, value)

1336

1337 remap(root, enter=enter)

1338 return ret

1339

1340

1341# TODO: recollect()

1342# TODO: refilter()

1343# TODO: reiter()

1344

1345

1346# GUID iterators: 10x faster and somewhat more compact than uuid.

1347

1348class GUIDerator(object):

1349 """The GUIDerator is an iterator that yields a globally-unique

1350 identifier (GUID) on every iteration. The GUIDs produced are

1351 hexadecimal strings.

1352

1353 Testing shows it to be around 12x faster than the uuid module. By

1354 default it is also more compact, partly due to its default 96-bit

1355 (24-hexdigit) length. 96 bits of randomness means that there is a

1356 1 in 2 ^ 32 chance of collision after 2 ^ 64 iterations. If more

1357 or less uniqueness is desired, the *size* argument can be adjusted

1358 accordingly.

1359

1360 Args:

1361 size (int): character length of the GUID, defaults to 24. Lengths

1362 between 20 and 36 are considered valid.

1363

1364 The GUIDerator has built-in fork protection that causes it to

1365 detect a fork on next iteration and reseed accordingly.

1366

1367 """

1368 def __init__(self, size=24):

1369 self.size = size

1370 if size < 20 or size > 36:

1371 raise ValueError('expected 20 < size <= 36')

1372 import hashlib

1373 self._sha1 = hashlib.sha1

1374 self.count = itertools.count()

1375 self.reseed()

1376

1377 def reseed(self):

1378 import socket

1379 self.pid = os.getpid()

1380 self.salt = '-'.join([str(self.pid),

1381 socket.gethostname() or b'<nohostname>',

1382 str(time.time()),

1383 codecs.encode(os.urandom(6),

1384 'hex_codec').decode('ascii')])

1385 # that codecs trick is the best/only way to get a bytes to

1386 # hexbytes in py2/3

1387 return

1388

1389 def __iter__(self):

1390 return self

1391

1392 if _IS_PY3:

1393 def __next__(self):

1394 if os.getpid() != self.pid:

1395 self.reseed()

1396 target_bytes = (self.salt + str(next(self.count))).encode('utf8')

1397 hash_text = self._sha1(target_bytes).hexdigest()[:self.size]

1398 return hash_text

1399 else:

1400 def __next__(self):

1401 if os.getpid() != self.pid:

1402 self.reseed()

1403 return self._sha1(self.salt +

1404 str(next(self.count))).hexdigest()[:self.size]

1405

1406 next = __next__

1407

1408

1409class SequentialGUIDerator(GUIDerator):

1410 """Much like the standard GUIDerator, the SequentialGUIDerator is an

1411 iterator that yields a globally-unique identifier (GUID) on every

1412 iteration. The GUIDs produced are hexadecimal strings.

1413

1414 The SequentialGUIDerator differs in that it picks a starting GUID

1415 value and increments every iteration. This yields GUIDs which are

1416 of course unique, but also ordered and lexicographically sortable.

1417

1418 The SequentialGUIDerator is around 50% faster than the normal

1419 GUIDerator, making it almost 20x as fast as the built-in uuid

1420 module. By default it is also more compact, partly due to its

1421 96-bit (24-hexdigit) default length. 96 bits of randomness means that

1422 there is a 1 in 2 ^ 32 chance of collision after 2 ^ 64

1423 iterations. If more or less uniqueness is desired, the *size*

1424 argument can be adjusted accordingly.

1425

1426 Args:

1427 size (int): character length of the GUID, defaults to 24.

1428

1429 Note that with SequentialGUIDerator there is a chance of GUIDs

1430 growing larger than the size configured. The SequentialGUIDerator

1431 has built-in fork protection that causes it to detect a fork on

1432 next iteration and reseed accordingly.

1433

1434 """

1435

1436 if _IS_PY3:

1437 def reseed(self):

1438 super(SequentialGUIDerator, self).reseed()

1439 start_str = self._sha1(self.salt.encode('utf8')).hexdigest()

1440 self.start = int(start_str[:self.size], 16)

1441 self.start |= (1 << ((self.size * 4) - 2))

1442 else:

1443 def reseed(self):

1444 super(SequentialGUIDerator, self).reseed()

1445 start_str = self._sha1(self.salt).hexdigest()

1446 self.start = int(start_str[:self.size], 16)

1447 self.start |= (1 << ((self.size * 4) - 2))

1448

1449 def __next__(self):

1450 if os.getpid() != self.pid:

1451 self.reseed()

1452 return '%x' % (next(self.count) + self.start)

1453

1454 next = __next__

1455

1456

1457guid_iter = GUIDerator()

1458seq_guid_iter = SequentialGUIDerator()

1459

1460

1461def soft_sorted(iterable, first=None, last=None, key=None, reverse=False):

1462 """For when you care about the order of some elements, but not about

1463 others.

1464

1465 Use this to float to the top and/or sink to the bottom a specific

1466 ordering, while sorting the rest of the elements according to

1467 normal :func:`sorted` rules.

1468

1469 >>> soft_sorted(['two', 'b', 'one', 'a'], first=['one', 'two'])

1470 ['one', 'two', 'a', 'b']

1471 >>> soft_sorted(range(7), first=[6, 15], last=[2, 4], reverse=True)

1472 [6, 5, 3, 1, 0, 2, 4]

1473 >>> import string

1474 >>> ''.join(soft_sorted(string.hexdigits, first='za1', last='b', key=str.lower))

1475 'aA1023456789cCdDeEfFbB'

1476

1477 Args:

1478 iterable (list): A list or other iterable to sort.

1479 first (list): A sequence to enforce for elements which should

1480 appear at the beginning of the returned list.

1481 last (list): A sequence to enforce for elements which should

1482 appear at the end of the returned list.

1483 key (callable): Callable used to generate a comparable key for

1484 each item to be sorted, same as the key in

1485 :func:`sorted`. Note that entries in *first* and *last*

1486 should be the keys for the items. Defaults to

1487 passthrough/the identity function.

1488 reverse (bool): Whether or not elements not explicitly ordered

1489 by *first* and *last* should be in reverse order or not.

1490

1491 Returns a new list in sorted order.

1492 """

1493 first = first or []

1494 last = last or []

1495 key = key or (lambda x: x)

1496 seq = list(iterable)

1497 other = [x for x in seq if not ((first and key(x) in first) or (last and key(x) in last))]

1498 other.sort(key=key, reverse=reverse)

1499

1500 if first:

1501 first = sorted([x for x in seq if key(x) in first], key=lambda x: first.index(key(x)))

1502 if last:

1503 last = sorted([x for x in seq if key(x) in last], key=lambda x: last.index(key(x)))

1504 return first + other + last

1505

1506

1507def untyped_sorted(iterable, key=None, reverse=False):

1508 """A version of :func:`sorted` which will happily sort an iterable of

1509 heterogeneous types and return a new list, similar to legacy Python's

1510 behavior.

1511

1512 >>> untyped_sorted(['abc', 2.0, 1, 2, 'def'])

1513 [1, 2.0, 2, 'abc', 'def']

1514

1515 Note how mutually orderable types are sorted as expected, as in

1516 the case of the integers and floats above.

1517

1518 .. note::

1519

1520 Results may vary across Python versions and builds, but the

1521 function will produce a sorted list, except in the case of

1522 explicitly unorderable objects.

1523

1524 """

1525 class _Wrapper(object):

1526 slots = ('obj',)

1527

1528 def __init__(self, obj):

1529 self.obj = obj

1530

1531 def __lt__(self, other):

1532 obj = key(self.obj) if key is not None else self.obj

1533 other = key(other.obj) if key is not None else other.obj

1534 try:

1535 ret = obj < other

1536 except TypeError:

1537 ret = ((type(obj).__name__, id(type(obj)), obj)

1538 < (type(other).__name__, id(type(other)), other))

1539 return ret

1540

1541 if key is not None and not callable(key):

1542 raise TypeError('expected function or callable object for key, not: %r'

1543 % key)

1544

1545 return sorted(iterable, key=_Wrapper, reverse=reverse)

1546

1547"""

1548May actually be faster to do an isinstance check for a str path

1549

1550$ python -m timeit -s "x = [1]" "x[0]"

155110000000 loops, best of 3: 0.0207 usec per loop

1552$ python -m timeit -s "x = [1]" "try: x[0] \nexcept: pass"

155310000000 loops, best of 3: 0.029 usec per loop

1554$ python -m timeit -s "x = [1]" "try: x[1] \nexcept: pass"

15551000000 loops, best of 3: 0.315 usec per loop

1556# setting up try/except is fast, only around 0.01us

1557# actually triggering the exception takes almost 10x as long

1558

1559$ python -m timeit -s "x = [1]" "isinstance(x, basestring)"

156010000000 loops, best of 3: 0.141 usec per loop

1561$ python -m timeit -s "x = [1]" "isinstance(x, str)"

156210000000 loops, best of 3: 0.131 usec per loop

1563$ python -m timeit -s "x = [1]" "try: x.split('.')\n except: pass"

15641000000 loops, best of 3: 0.443 usec per loop

1565$ python -m timeit -s "x = [1]" "try: x.split('.') \nexcept AttributeError: pass"

15661000000 loops, best of 3: 0.544 usec per loop

1567"""