Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/boltons/iterutils.py: 24%

1# -*- coding: utf-8 -*-

5# Redistribution and use in source and binary forms, with or without

6# modification, are permitted provided that the following conditions are

7# met:

9# * Redistributions of source code must retain the above copyright

10# notice, this list of conditions and the following disclaimer.

11#

12# * Redistributions in binary form must reproduce the above

13# copyright notice, this list of conditions and the following

14# disclaimer in the documentation and/or other materials provided

15# with the distribution.

16#

17# * The names of the contributors may not be used to endorse or

18# promote products derived from this software without specific

19# prior written permission.

20#

21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

33""":mod:`itertools` is full of great examples of Python generator

34usage. However, there are still some critical gaps. ``iterutils``

35fills many of those gaps with featureful, tested, and Pythonic

36solutions.

38Many of the functions below have two versions, one which

39returns an iterator (denoted by the ``*_iter`` naming pattern), and a

40shorter-named convenience form that returns a list. Some of the

41following are based on examples in itertools docs.

42"""

44import os

45import math

46import time

47import codecs

48import random

49import itertools

51try:

52 from collections.abc import Mapping, Sequence, Set, ItemsView, Iterable

53except ImportError:

54 from collections import Mapping, Sequence, Set, ItemsView, Iterable

57try:

58 from .typeutils import make_sentinel

59 _UNSET = make_sentinel('_UNSET')

60 _REMAP_EXIT = make_sentinel('_REMAP_EXIT')

61except ImportError:

62 _REMAP_EXIT = object()

63 _UNSET = object()

65try:

66 from future_builtins import filter

67 from itertools import izip, izip_longest as zip_longest

68 _IS_PY3 = False

69except ImportError:

70 # Python 3 compat

71 _IS_PY3 = True

72 basestring = (str, bytes)

73 unicode = str

74 izip, xrange = zip, range

75 from itertools import zip_longest

78def is_iterable(obj):

79 """Similar in nature to :func:`callable`, ``is_iterable`` returns

80 ``True`` if an object is `iterable`_, ``False`` if not.

82 >>> is_iterable([])

83 True

84 >>> is_iterable(object())

85 False

87 .. _iterable: https://docs.python.org/2/glossary.html#term-iterable

88 """

89 try:

90 iter(obj)

91 except TypeError:

92 return False

93 return True

96def is_scalar(obj):

97 """A near-mirror of :func:`is_iterable`. Returns ``False`` if an

98 object is an iterable container type. Strings are considered

99 scalar as well, because strings are more often treated as whole

100 values as opposed to iterables of 1-character substrings.

101

102 >>> is_scalar(object())

103 True

104 >>> is_scalar(range(10))

105 False

106 >>> is_scalar('hello')

107 True

108 """

109 return not is_iterable(obj) or isinstance(obj, basestring)

110

111

112def is_collection(obj):

113 """The opposite of :func:`is_scalar`. Returns ``True`` if an object

114 is an iterable other than a string.

115

116 >>> is_collection(object())

117 False

118 >>> is_collection(range(10))

119 True

120 >>> is_collection('hello')

121 False

122 """

123 return is_iterable(obj) and not isinstance(obj, basestring)

124

125

126def split(src, sep=None, maxsplit=None):

127 """Splits an iterable based on a separator. Like :meth:`str.split`,

128 but for all iterables. Returns a list of lists.

129

130 >>> split(['hi', 'hello', None, None, 'sup', None, 'soap', None])

131 [['hi', 'hello'], ['sup'], ['soap']]

132

133 See :func:`split_iter` docs for more info.

134 """

135 return list(split_iter(src, sep, maxsplit))

136

137

138def split_iter(src, sep=None, maxsplit=None):

139 """Splits an iterable based on a separator, *sep*, a max of

140 *maxsplit* times (no max by default). *sep* can be:

141

142 * a single value

143 * an iterable of separators

144 * a single-argument callable that returns True when a separator is

145 encountered

146

147 ``split_iter()`` yields lists of non-separator values. A separator will

148 never appear in the output.

149

150 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None]))

151 [['hi', 'hello'], ['sup'], ['soap']]

152

153 Note that ``split_iter`` is based on :func:`str.split`, so if

154 *sep* is ``None``, ``split()`` **groups** separators. If empty lists

155 are desired between two contiguous ``None`` values, simply use

156 ``sep=[None]``:

157

158 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None]))

159 [['hi', 'hello'], ['sup']]

160 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None], sep=[None]))

161 [['hi', 'hello'], [], ['sup'], []]

162

163 Using a callable separator:

164

165 >>> falsy_sep = lambda x: not x

166 >>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep))

167 [['hi', 'hello'], [], ['sup'], []]

168

169 See :func:`split` for a list-returning version.

170

171 """

172 if not is_iterable(src):

173 raise TypeError('expected an iterable')

174

175 if maxsplit is not None:

176 maxsplit = int(maxsplit)

177 if maxsplit == 0:

178 yield [src]

179 return

180

181 if callable(sep):

182 sep_func = sep

183 elif not is_scalar(sep):

184 sep = frozenset(sep)

185 sep_func = lambda x: x in sep

186 else:

187 sep_func = lambda x: x == sep

188

189 cur_group = []

190 split_count = 0

191 for s in src:

192 if maxsplit is not None and split_count >= maxsplit:

193 sep_func = lambda x: False

194 if sep_func(s):

195 if sep is None and not cur_group:

196 # If sep is none, str.split() "groups" separators

197 # check the str.split() docs for more info

198 continue

199 split_count += 1

200 yield cur_group

201 cur_group = []

202 else:

203 cur_group.append(s)

204

205 if cur_group or sep is not None:

206 yield cur_group

207 return

208

209

210def lstrip(iterable, strip_value=None):

211 """Strips values from the beginning of an iterable. Stripped items will

212 match the value of the argument strip_value. Functionality is analogous

213 to that of the method str.lstrip. Returns a list.

214

215 >>> lstrip(['Foo', 'Bar', 'Bam'], 'Foo')

216 ['Bar', 'Bam']

217

218 """

219 return list(lstrip_iter(iterable, strip_value))

220

221

222def lstrip_iter(iterable, strip_value=None):

223 """Strips values from the beginning of an iterable. Stripped items will

224 match the value of the argument strip_value. Functionality is analogous

225 to that of the method str.lstrip. Returns a generator.

226

227 >>> list(lstrip_iter(['Foo', 'Bar', 'Bam'], 'Foo'))

228 ['Bar', 'Bam']

229

230 """

231 iterator = iter(iterable)

232 for i in iterator:

233 if i != strip_value:

234 yield i

235 break

236 for i in iterator:

237 yield i

238

239

240def rstrip(iterable, strip_value=None):

241 """Strips values from the end of an iterable. Stripped items will

242 match the value of the argument strip_value. Functionality is analogous

243 to that of the method str.rstrip. Returns a list.

244

245 >>> rstrip(['Foo', 'Bar', 'Bam'], 'Bam')

246 ['Foo', 'Bar']

247

248 """

249 return list(rstrip_iter(iterable,strip_value))

250

251

252def rstrip_iter(iterable, strip_value=None):

253 """Strips values from the end of an iterable. Stripped items will

254 match the value of the argument strip_value. Functionality is analogous

255 to that of the method str.rstrip. Returns a generator.

256

257 >>> list(rstrip_iter(['Foo', 'Bar', 'Bam'], 'Bam'))

258 ['Foo', 'Bar']

259

260 """

261 iterator = iter(iterable)

262 for i in iterator:

263 if i == strip_value:

264 cache = list()

265 cache.append(i)

266 broken = False

267 for i in iterator:

268 if i == strip_value:

269 cache.append(i)

270 else:

271 broken = True

272 break

273 if not broken: # Return to caller here because the end of the

274 return # iterator has been reached

275 for t in cache:

276 yield t

277 yield i

278

279

280def strip(iterable, strip_value=None):

281 """Strips values from the beginning and end of an iterable. Stripped items

282 will match the value of the argument strip_value. Functionality is

283 analogous to that of the method str.strip. Returns a list.

284

285 >>> strip(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu')

286 ['Foo', 'Bar', 'Bam']

287

288 """

289 return list(strip_iter(iterable,strip_value))

290

291

292def strip_iter(iterable,strip_value=None):

293 """Strips values from the beginning and end of an iterable. Stripped items

294 will match the value of the argument strip_value. Functionality is

295 analogous to that of the method str.strip. Returns a generator.

296

297 >>> list(strip_iter(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu'))

298 ['Foo', 'Bar', 'Bam']

299

300 """

301 return rstrip_iter(lstrip_iter(iterable,strip_value),strip_value)

302

303

304def chunked(src, size, count=None, **kw):

305 """Returns a list of *count* chunks, each with *size* elements,

306 generated from iterable *src*. If *src* is not evenly divisible by

307 *size*, the final chunk will have fewer than *size* elements.

308 Provide the *fill* keyword argument to provide a pad value and

309 enable padding, otherwise no padding will take place.

310

311 >>> chunked(range(10), 3)

312 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]

313 >>> chunked(range(10), 3, fill=None)

314 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]

315 >>> chunked(range(10), 3, count=2)

316 [[0, 1, 2], [3, 4, 5]]

317

318 See :func:`chunked_iter` for more info.

319 """

320 chunk_iter = chunked_iter(src, size, **kw)

321 if count is None:

322 return list(chunk_iter)

323 else:

324 return list(itertools.islice(chunk_iter, count))

325

326

327def _validate_positive_int(value, name, strictly_positive=True):

328 value = int(value)

329 if value < 0 or (strictly_positive and value == 0):

330 raise ValueError('expected a positive integer ' + name)

331 return value

332

333

334def chunked_iter(src, size, **kw):

335 """Generates *size*-sized chunks from *src* iterable. Unless the

336 optional *fill* keyword argument is provided, iterables not evenly

337 divisible by *size* will have a final chunk that is smaller than

338 *size*.

339

340 >>> list(chunked_iter(range(10), 3))

341 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]

342 >>> list(chunked_iter(range(10), 3, fill=None))

343 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]

344

345 Note that ``fill=None`` in fact uses ``None`` as the fill value.

346 """

347 # TODO: add count kwarg?

348 if not is_iterable(src):

349 raise TypeError('expected an iterable')

350 size = _validate_positive_int(size, 'chunk size')

351 do_fill = True

352 try:

353 fill_val = kw.pop('fill')

354 except KeyError:

355 do_fill = False

356 fill_val = None

357 if kw:

358 raise ValueError('got unexpected keyword arguments: %r' % kw.keys())

359 if not src:

360 return

361 postprocess = lambda chk: chk

362 if isinstance(src, basestring):

363 postprocess = lambda chk, _sep=type(src)(): _sep.join(chk)

364 if _IS_PY3 and isinstance(src, bytes):

365 postprocess = lambda chk: bytes(chk)

366 src_iter = iter(src)

367 while True:

368 cur_chunk = list(itertools.islice(src_iter, size))

369 if not cur_chunk:

370 break

371 lc = len(cur_chunk)

372 if lc < size and do_fill:

373 cur_chunk[lc:] = [fill_val] * (size - lc)

374 yield postprocess(cur_chunk)

375 return

376

377

378def chunk_ranges(input_size, chunk_size, input_offset=0, overlap_size=0, align=False):

379 """Generates *chunk_size*-sized chunk ranges for an input with length *input_size*.

380 Optionally, a start of the input can be set via *input_offset*, and

381 and overlap between the chunks may be specified via *overlap_size*.

382 Also, if *align* is set to *True*, any items with *i % (chunk_size-overlap_size) == 0*

383 are always at the beginning of the chunk.

384

385 Returns an iterator of (start, end) tuples, one tuple per chunk.

386

387 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5))

388 [(10, 15), (15, 20)]

389 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=1))

390 [(10, 15), (14, 19), (18, 20)]

391 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=2))

392 [(10, 15), (13, 18), (16, 20)]

393

394 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=False))

395 [(4, 9), (9, 14), (14, 19)]

396 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=True))

397 [(4, 5), (5, 10), (10, 15), (15, 19)]

398

399 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=False))

400 [(2, 7), (6, 11), (10, 15), (14, 17)]

401 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=True))

402 [(2, 5), (4, 9), (8, 13), (12, 17)]

403 >>> list(chunk_ranges(input_offset=3, input_size=15, chunk_size=5, overlap_size=1, align=True))

404 [(3, 5), (4, 9), (8, 13), (12, 17), (16, 18)]

405 """

406 input_size = _validate_positive_int(input_size, 'input_size', strictly_positive=False)

407 chunk_size = _validate_positive_int(chunk_size, 'chunk_size')

408 input_offset = _validate_positive_int(input_offset, 'input_offset', strictly_positive=False)

409 overlap_size = _validate_positive_int(overlap_size, 'overlap_size', strictly_positive=False)

410

411 input_stop = input_offset + input_size

412

413 if align:

414 initial_chunk_len = chunk_size - input_offset % (chunk_size - overlap_size)

415 if initial_chunk_len != overlap_size:

416 yield (input_offset, min(input_offset + initial_chunk_len, input_stop))

417 if input_offset + initial_chunk_len >= input_stop:

418 return

419 input_offset = input_offset + initial_chunk_len - overlap_size

420

421 for i in range(input_offset, input_stop, chunk_size - overlap_size):

422 yield (i, min(i + chunk_size, input_stop))

423

424 if i + chunk_size >= input_stop:

425 return

426

427

428def pairwise(src, end=_UNSET):

429 """Convenience function for calling :func:`windowed` on *src*, with

430 *size* set to 2.

431

432 >>> pairwise(range(5))

433 [(0, 1), (1, 2), (2, 3), (3, 4)]

434 >>> pairwise([])

435 []

436

437 Unless *end* is set, the number of pairs is always one less than

438 the number of elements in the iterable passed in, except on an empty input,

439 which will return an empty list.

440

441 With *end* set, a number of pairs equal to the length of *src* is returned,

442 with the last item of the last pair being equal to *end*.

443

444 >>> list(pairwise(range(3), end=None))

445 [(0, 1), (1, 2), (2, None)]

446

447 This way, *end* values can be useful as sentinels to signal the end of the iterable.

448 """

449 return windowed(src, 2, fill=end)

450

451

452def pairwise_iter(src, end=_UNSET):

453 """Convenience function for calling :func:`windowed_iter` on *src*,

454 with *size* set to 2.

455

456 >>> list(pairwise_iter(range(5)))

457 [(0, 1), (1, 2), (2, 3), (3, 4)]

458 >>> list(pairwise_iter([]))

459 []

460

461 Unless *end* is set, the number of pairs is always one less

462 than the number of elements in the iterable passed in,

463 or zero, when *src* is empty.

464

465 With *end* set, a number of pairs equal to the length of *src* is returned,

466 with the last item of the last pair being equal to *end*.

467

468 >>> list(pairwise_iter(range(3), end=None))

469 [(0, 1), (1, 2), (2, None)]

470

471 This way, *end* values can be useful as sentinels to signal the end

472 of the iterable. For infinite iterators, setting *end* has no effect.

473 """

474 return windowed_iter(src, 2, fill=end)

475

476

477def windowed(src, size, fill=_UNSET):

478 """Returns tuples with exactly length *size*. If *fill* is unset

479 and the iterable is too short to make a window of length *size*,

480 no tuples are returned. See :func:`windowed_iter` for more.

481 """

482 return list(windowed_iter(src, size, fill=fill))

483

484

485def windowed_iter(src, size, fill=_UNSET):

486 """Returns tuples with length *size* which represent a sliding

487 window over iterable *src*.

488

489 >>> list(windowed_iter(range(7), 3))

490 [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)]

491

492 If *fill* is unset, and the iterable is too short to make a window

493 of length *size*, then no window tuples are returned.

494

495 >>> list(windowed_iter(range(3), 5))

496 []

497

498 With *fill* set, the iterator always yields a number of windows

499 equal to the length of the *src* iterable.

500

501 >>> windowed(range(4), 3, fill=None)

502 [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]

503

504 This way, *fill* values can be useful to signal the end of the iterable.

505 For infinite iterators, setting *fill* has no effect.

506 """

507 tees = itertools.tee(src, size)

508 if fill is _UNSET:

509 try:

510 for i, t in enumerate(tees):

511 for _ in range(i):

512 next(t)

513 except StopIteration:

514 return zip([])

515 return zip(*tees)

516

517 for i, t in enumerate(tees):

518 for _ in range(i):

519 try:

520 next(t)

521 except StopIteration:

522 continue

523 return zip_longest(*tees, fillvalue=fill)

524

525

526

527def xfrange(stop, start=None, step=1.0):

528 """Same as :func:`frange`, but generator-based instead of returning a

529 list.

530

531 >>> tuple(xfrange(1, 3, step=0.75))

532 (1.0, 1.75, 2.5)

533

534 See :func:`frange` for more details.

535 """

536 if not step:

537 raise ValueError('step must be non-zero')

538 if start is None:

539 start, stop = 0.0, stop * 1.0

540 else:

541 # swap when all args are used

542 stop, start = start * 1.0, stop * 1.0

543 cur = start

544 while cur < stop:

545 yield cur

546 cur += step

547

548

549def frange(stop, start=None, step=1.0):

550 """A :func:`range` clone for float-based ranges.

551

552 >>> frange(5)

553 [0.0, 1.0, 2.0, 3.0, 4.0]

554 >>> frange(6, step=1.25)

555 [0.0, 1.25, 2.5, 3.75, 5.0]

556 >>> frange(100.5, 101.5, 0.25)

557 [100.5, 100.75, 101.0, 101.25]

558 >>> frange(5, 0)

559 []

560 >>> frange(5, 0, step=-1.25)

561 [5.0, 3.75, 2.5, 1.25]

562 """

563 if not step:

564 raise ValueError('step must be non-zero')

565 if start is None:

566 start, stop = 0.0, stop * 1.0

567 else:

568 # swap when all args are used

569 stop, start = start * 1.0, stop * 1.0

570 count = int(math.ceil((stop - start) / step))

571 ret = [None] * count

572 if not ret:

573 return ret

574 ret[0] = start

575 for i in xrange(1, count):

576 ret[i] = ret[i - 1] + step

577 return ret

578

579

580def backoff(start, stop, count=None, factor=2.0, jitter=False):

581 """Returns a list of geometrically-increasing floating-point numbers,

582 suitable for usage with `exponential backoff`_. Exactly like

583 :func:`backoff_iter`, but without the ``'repeat'`` option for

584 *count*. See :func:`backoff_iter` for more details.

585

586 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff

587

588 >>> backoff(1, 10)

589 [1.0, 2.0, 4.0, 8.0, 10.0]

590 """

591 if count == 'repeat':

592 raise ValueError("'repeat' supported in backoff_iter, not backoff")

593 return list(backoff_iter(start, stop, count=count,

594 factor=factor, jitter=jitter))

595

596

597def backoff_iter(start, stop, count=None, factor=2.0, jitter=False):

598 """Generates a sequence of geometrically-increasing floats, suitable

599 for usage with `exponential backoff`_. Starts with *start*,

600 increasing by *factor* until *stop* is reached, optionally

601 stopping iteration once *count* numbers are yielded. *factor*

602 defaults to 2. In general retrying with properly-configured

603 backoff creates a better-behaved component for a larger service

604 ecosystem.

605

606 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff

607

608 >>> list(backoff_iter(1.0, 10.0, count=5))

609 [1.0, 2.0, 4.0, 8.0, 10.0]

610 >>> list(backoff_iter(1.0, 10.0, count=8))

611 [1.0, 2.0, 4.0, 8.0, 10.0, 10.0, 10.0, 10.0]

612 >>> list(backoff_iter(0.25, 100.0, factor=10))

613 [0.25, 2.5, 25.0, 100.0]

614

615 A simplified usage example:

616

617 .. code-block:: python

618

619 for timeout in backoff_iter(0.25, 5.0):

620 try:

621 res = network_call()

622 break

623 except Exception as e:

624 log(e)

625 time.sleep(timeout)

626

627 An enhancement for large-scale systems would be to add variation,

628 or *jitter*, to timeout values. This is done to avoid a thundering

629 herd on the receiving end of the network call.

630

631 Finally, for *count*, the special value ``'repeat'`` can be passed to

632 continue yielding indefinitely.

633

634 Args:

635

636 start (float): Positive number for baseline.

637 stop (float): Positive number for maximum.

638 count (int): Number of steps before stopping

639 iteration. Defaults to the number of steps between *start* and

640 *stop*. Pass the string, `'repeat'`, to continue iteration

641 indefinitely.

642 factor (float): Rate of exponential increase. Defaults to `2.0`,

643 e.g., `[1, 2, 4, 8, 16]`.

644 jitter (float): A factor between `-1.0` and `1.0`, used to

645 uniformly randomize and thus spread out timeouts in a distributed

646 system, avoiding rhythm effects. Positive values use the base

647 backoff curve as a maximum, negative values use the curve as a

648 minimum. Set to 1.0 or `True` for a jitter approximating

649 Ethernet's time-tested backoff solution. Defaults to `False`.

650

651 """

652 start = float(start)

653 stop = float(stop)

654 factor = float(factor)

655 if start < 0.0:

656 raise ValueError('expected start >= 0, not %r' % start)

657 if factor < 1.0:

658 raise ValueError('expected factor >= 1.0, not %r' % factor)

659 if stop == 0.0:

660 raise ValueError('expected stop >= 0')

661 if stop < start:

662 raise ValueError('expected stop >= start, not %r' % stop)

663 if count is None:

664 denom = start if start else 1

665 count = 1 + math.ceil(math.log(stop/denom, factor))

666 count = count if start else count + 1

667 if count != 'repeat' and count < 0:

668 raise ValueError('count must be positive or "repeat", not %r' % count)

669 if jitter:

670 jitter = float(jitter)

671 if not (-1.0 <= jitter <= 1.0):

672 raise ValueError('expected jitter -1 <= j <= 1, not: %r' % jitter)

673

674 cur, i = start, 0

675 while count == 'repeat' or i < count:

676 if not jitter:

677 cur_ret = cur

678 elif jitter:

679 cur_ret = cur - (cur * jitter * random.random())

680 yield cur_ret

681 i += 1

682 if cur == 0:

683 cur = 1

684 elif cur < stop:

685 cur *= factor

686 if cur > stop:

687 cur = stop

688 return

689

690

691def bucketize(src, key=bool, value_transform=None, key_filter=None):

692 """Group values in the *src* iterable by the value returned by *key*.

693

694 >>> bucketize(range(5))

695 {False: [0], True: [1, 2, 3, 4]}

696 >>> is_odd = lambda x: x % 2 == 1

697 >>> bucketize(range(5), is_odd)

698 {False: [0, 2, 4], True: [1, 3]}

699

700 *key* is :class:`bool` by default, but can either be a callable or a string or a list

701 if it is a string, it is the name of the attribute on which to bucketize objects.

702

703 >>> bucketize([1+1j, 2+2j, 1, 2], key='real')

704 {1.0: [(1+1j), 1], 2.0: [(2+2j), 2]}

705

706 if *key* is a list, it contains the buckets where to put each object

707

708 >>> bucketize([1,2,365,4,98],key=[0,1,2,0,2])

709 {0: [1, 4], 1: [2], 2: [365, 98]}

710

711

712 Value lists are not deduplicated:

713

714 >>> bucketize([None, None, None, 'hello'])

715 {False: [None, None, None], True: ['hello']}

716

717 Bucketize into more than 3 groups

718

719 >>> bucketize(range(10), lambda x: x % 3)

720 {0: [0, 3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]}

721

722 ``bucketize`` has a couple of advanced options useful in certain

723 cases. *value_transform* can be used to modify values as they are

724 added to buckets, and *key_filter* will allow excluding certain

725 buckets from being collected.

726

727 >>> bucketize(range(5), value_transform=lambda x: x*x)

728 {False: [0], True: [1, 4, 9, 16]}

729

730 >>> bucketize(range(10), key=lambda x: x % 3, key_filter=lambda k: k % 3 != 1)

731 {0: [0, 3, 6, 9], 2: [2, 5, 8]}

732

733 Note in some of these examples there were at most two keys, ``True`` and

734 ``False``, and each key present has a list with at least one

735 item. See :func:`partition` for a version specialized for binary

736 use cases.

737

738 """

739 if not is_iterable(src):

740 raise TypeError('expected an iterable')

741 elif isinstance(key, list):

742 if len(key) != len(src):

743 raise ValueError("key and src have to be the same length")

744 src = zip(key, src)

745

746 if isinstance(key, basestring):

747 key_func = lambda x: getattr(x, key, x)

748 elif callable(key):

749 key_func = key

750 elif isinstance(key, list):

751 key_func = lambda x: x[0]

752 else:

753 raise TypeError('expected key to be callable or a string or a list')

754

755 if value_transform is None:

756 value_transform = lambda x: x

757 if not callable(value_transform):

758 raise TypeError('expected callable value transform function')

759 if isinstance(key, list):

760 f = value_transform

761 value_transform=lambda x: f(x[1])

762

763 ret = {}

764 for val in src:

765 key_of_val = key_func(val)

766 if key_filter is None or key_filter(key_of_val):

767 ret.setdefault(key_of_val, []).append(value_transform(val))

768 return ret

769

770

771def partition(src, key=bool):

772 """No relation to :meth:`str.partition`, ``partition`` is like

773 :func:`bucketize`, but for added convenience returns a tuple of

774 ``(truthy_values, falsy_values)``.

775

776 >>> nonempty, empty = partition(['', '', 'hi', '', 'bye'])

777 >>> nonempty

778 ['hi', 'bye']

779

780 *key* defaults to :class:`bool`, but can be carefully overridden to

781 use either a function that returns either ``True`` or ``False`` or

782 a string name of the attribute on which to partition objects.

783

784 >>> import string

785 >>> is_digit = lambda x: x in string.digits

786 >>> decimal_digits, hexletters = partition(string.hexdigits, is_digit)

787 >>> ''.join(decimal_digits), ''.join(hexletters)

788 ('0123456789', 'abcdefABCDEF')

789 """

790 bucketized = bucketize(src, key)

791 return bucketized.get(True, []), bucketized.get(False, [])

792

793

794def unique(src, key=None):

795 """``unique()`` returns a list of unique values, as determined by

796 *key*, in the order they first appeared in the input iterable,

797 *src*.

798

799 >>> ones_n_zeros = '11010110001010010101010'

800 >>> ''.join(unique(ones_n_zeros))

801 '10'

802

803 See :func:`unique_iter` docs for more details.

804 """

805 return list(unique_iter(src, key))

806

807

808def unique_iter(src, key=None):

809 """Yield unique elements from the iterable, *src*, based on *key*,

810 in the order in which they first appeared in *src*.

811

812 >>> repetitious = [1, 2, 3] * 10

813 >>> list(unique_iter(repetitious))

814 [1, 2, 3]

815

816 By default, *key* is the object itself, but *key* can either be a

817 callable or, for convenience, a string name of the attribute on

818 which to uniqueify objects, falling back on identity when the

819 attribute is not present.

820

821 >>> pleasantries = ['hi', 'hello', 'ok', 'bye', 'yes']

822 >>> list(unique_iter(pleasantries, key=lambda x: len(x)))

823 ['hi', 'hello', 'bye']

824 """

825 if not is_iterable(src):

826 raise TypeError('expected an iterable, not %r' % type(src))

827 if key is None:

828 key_func = lambda x: x

829 elif callable(key):

830 key_func = key

831 elif isinstance(key, basestring):

832 key_func = lambda x: getattr(x, key, x)

833 else:

834 raise TypeError('"key" expected a string or callable, not %r' % key)

835 seen = set()

836 for i in src:

837 k = key_func(i)

838 if k not in seen:

839 seen.add(k)

840 yield i

841 return

842

843

844def redundant(src, key=None, groups=False):

845 """The complement of :func:`unique()`.

846

847 By default returns non-unique/duplicate values as a list of the

848 *first* redundant value in *src*. Pass ``groups=True`` to get

849 groups of all values with redundancies, ordered by position of the

850 first redundant value. This is useful in conjunction with some

851 normalizing *key* function.

852

853 >>> redundant([1, 2, 3, 4])

854 []

855 >>> redundant([1, 2, 3, 2, 3, 3, 4])

856 [2, 3]

857 >>> redundant([1, 2, 3, 2, 3, 3, 4], groups=True)

858 [[2, 2], [3, 3, 3]]

859

860 An example using a *key* function to do case-insensitive

861 redundancy detection.

862

863 >>> redundant(['hi', 'Hi', 'HI', 'hello'], key=str.lower)

864 ['Hi']

865 >>> redundant(['hi', 'Hi', 'HI', 'hello'], groups=True, key=str.lower)

866 [['hi', 'Hi', 'HI']]

867

868 *key* should also be used when the values in *src* are not hashable.

869

870 .. note::

871

872 This output of this function is designed for reporting

873 duplicates in contexts when a unique input is desired. Due to

874 the grouped return type, there is no streaming equivalent of

875 this function for the time being.

876

877 """

878 if key is None:

879 pass

880 elif callable(key):

881 key_func = key

882 elif isinstance(key, basestring):

883 key_func = lambda x: getattr(x, key, x)

884 else:

885 raise TypeError('"key" expected a string or callable, not %r' % key)

886 seen = {} # key to first seen item

887 redundant_order = []

888 redundant_groups = {}

889 for i in src:

890 k = key_func(i) if key else i

891 if k not in seen:

892 seen[k] = i

893 else:

894 if k in redundant_groups:

895 if groups:

896 redundant_groups[k].append(i)

897 else:

898 redundant_order.append(k)

899 redundant_groups[k] = [seen[k], i]

900 if not groups:

901 ret = [redundant_groups[k][1] for k in redundant_order]

902 else:

903 ret = [redundant_groups[k] for k in redundant_order]

904 return ret

905

906

907def one(src, default=None, key=None):

908 """Along the same lines as builtins, :func:`all` and :func:`any`, and

909 similar to :func:`first`, ``one()`` returns the single object in

910 the given iterable *src* that evaluates to ``True``, as determined

911 by callable *key*. If unset, *key* defaults to :class:`bool`. If

912 no such objects are found, *default* is returned. If *default* is

913 not passed, ``None`` is returned.

914

915 If *src* has more than one object that evaluates to ``True``, or

916 if there is no object that fulfills such condition, return

917 *default*. It's like an `XOR`_ over an iterable.

918

919 >>> one((True, False, False))

920 True

921 >>> one((True, False, True))

922 >>> one((0, 0, 'a'))

923 'a'

924 >>> one((0, False, None))

925 >>> one((True, True), default=False)

926 False

927 >>> bool(one(('', 1)))

928 True

929 >>> one((10, 20, 30, 42), key=lambda i: i > 40)

930 42

931

932 See `Martín Gaitán's original repo`_ for further use cases.

933

934 .. _Martín Gaitán's original repo: https://github.com/mgaitan/one

935 .. _XOR: https://en.wikipedia.org/wiki/Exclusive_or

936

937 """

938 ones = list(itertools.islice(filter(key, src), 2))

939 return ones[0] if len(ones) == 1 else default

940

941

942def first(iterable, default=None, key=None):

943 """Return first element of *iterable* that evaluates to ``True``, else

944 return ``None`` or optional *default*. Similar to :func:`one`.

945

946 >>> first([0, False, None, [], (), 42])

947 42

948 >>> first([0, False, None, [], ()]) is None

949 True

950 >>> first([0, False, None, [], ()], default='ohai')

951 'ohai'

952 >>> import re

953 >>> m = first(re.match(regex, 'abc') for regex in ['b.*', 'a(.*)'])

954 >>> m.group(1)

955 'bc'

956

957 The optional *key* argument specifies a one-argument predicate function

958 like that used for *filter()*. The *key* argument, if supplied, should be

959 in keyword form. For example, finding the first even number in an iterable:

960

961 >>> first([1, 1, 3, 4, 5], key=lambda x: x % 2 == 0)

962 4

963

964 Contributed by Hynek Schlawack, author of `the original standalone module`_.

965

966 .. _the original standalone module: https://github.com/hynek/first

967 """

968 return next(filter(key, iterable), default)

969

970

971def flatten_iter(iterable):

972 """``flatten_iter()`` yields all the elements from *iterable* while

973 collapsing any nested iterables.

974

975 >>> nested = [[1, 2], [[3], [4, 5]]]

976 >>> list(flatten_iter(nested))

977 [1, 2, 3, 4, 5]

978 """

979 for item in iterable:

980 if isinstance(item, Iterable) and not isinstance(item, basestring):

981 for subitem in flatten_iter(item):

982 yield subitem

983 else:

984 yield item

985

986def flatten(iterable):

987 """``flatten()`` returns a collapsed list of all the elements from

988 *iterable* while collapsing any nested iterables.

989

990 >>> nested = [[1, 2], [[3], [4, 5]]]

991 >>> flatten(nested)

992 [1, 2, 3, 4, 5]

993 """

994 return list(flatten_iter(iterable))

995

996

997def same(iterable, ref=_UNSET):

998 """``same()`` returns ``True`` when all values in *iterable* are

999 equal to one another, or optionally a reference value,

1000 *ref*. Similar to :func:`all` and :func:`any` in that it evaluates

1001 an iterable and returns a :class:`bool`. ``same()`` returns

1002 ``True`` for empty iterables.

1003

1004 >>> same([])

1005 True

1006 >>> same([1])

1007 True

1008 >>> same(['a', 'a', 'a'])

1009 True

1010 >>> same(range(20))

1011 False

1012 >>> same([[], []])

1013 True

1014 >>> same([[], []], ref='test')

1015 False

1016

1017 """

1018 iterator = iter(iterable)

1019 if ref is _UNSET:

1020 ref = next(iterator, ref)

1021 return all(val == ref for val in iterator)

1022

1023

1024def default_visit(path, key, value):

1025 # print('visit(%r, %r, %r)' % (path, key, value))

1026 return key, value

1027

1028# enable the extreme: monkeypatching iterutils with a different default_visit

1029_orig_default_visit = default_visit

1030

1031

1032def default_enter(path, key, value):

1033 # print('enter(%r, %r)' % (key, value))

1034 if isinstance(value, basestring):

1035 return value, False

1036 elif isinstance(value, Mapping):

1037 return value.__class__(), ItemsView(value)

1038 elif isinstance(value, Sequence):

1039 return value.__class__(), enumerate(value)

1040 elif isinstance(value, Set):

1041 return value.__class__(), enumerate(value)

1042 else:

1043 # files, strings, other iterables, and scalars are not

1044 # traversed

1045 return value, False

1046

1047

1048def default_exit(path, key, old_parent, new_parent, new_items):

1049 # print('exit(%r, %r, %r, %r, %r)'

1050 # % (path, key, old_parent, new_parent, new_items))

1051 ret = new_parent

1052 if isinstance(new_parent, Mapping):

1053 new_parent.update(new_items)

1054 elif isinstance(new_parent, Sequence):

1055 vals = [v for i, v in new_items]

1056 try:

1057 new_parent.extend(vals)

1058 except AttributeError:

1059 ret = new_parent.__class__(vals) # tuples

1060 elif isinstance(new_parent, Set):

1061 vals = [v for i, v in new_items]

1062 try:

1063 new_parent.update(vals)

1064 except AttributeError:

1065 ret = new_parent.__class__(vals) # frozensets

1066 else:

1067 raise RuntimeError('unexpected iterable type: %r' % type(new_parent))

1068 return ret

1069

1070

1071def remap(root, visit=default_visit, enter=default_enter, exit=default_exit,

1072 **kwargs):

1073 """The remap ("recursive map") function is used to traverse and

1074 transform nested structures. Lists, tuples, sets, and dictionaries

1075 are just a few of the data structures nested into heterogeneous

1076 tree-like structures that are so common in programming.

1077 Unfortunately, Python's built-in ways to manipulate collections

1078 are almost all flat. List comprehensions may be fast and succinct,

1079 but they do not recurse, making it tedious to apply quick changes

1080 or complex transforms to real-world data.

1081

1082 remap goes where list comprehensions cannot.

1083

1084 Here's an example of removing all Nones from some data:

1085

1086 >>> from pprint import pprint

1087 >>> reviews = {'Star Trek': {'TNG': 10, 'DS9': 8.5, 'ENT': None},

1088 ... 'Babylon 5': 6, 'Dr. Who': None}

1089 >>> pprint(remap(reviews, lambda p, k, v: v is not None))

1090 {'Babylon 5': 6, 'Star Trek': {'DS9': 8.5, 'TNG': 10}}

1091

1092 Notice how both Nones have been removed despite the nesting in the

1093 dictionary. Not bad for a one-liner, and that's just the beginning.

1094 See `this remap cookbook`_ for more delicious recipes.

1095

1096 .. _this remap cookbook: http://sedimental.org/remap.html

1097

1098 remap takes four main arguments: the object to traverse and three

1099 optional callables which determine how the remapped object will be

1100 created.

1101

1102 Args:

1103

1104 root: The target object to traverse. By default, remap

1105 supports iterables like :class:`list`, :class:`tuple`,

1106 :class:`dict`, and :class:`set`, but any object traversable by

1107 *enter* will work.

1108 visit (callable): This function is called on every item in

1109 *root*. It must accept three positional arguments, *path*,

1110 *key*, and *value*. *path* is simply a tuple of parents'

1111 keys. *visit* should return the new key-value pair. It may

1112 also return ``True`` as shorthand to keep the old item

1113 unmodified, or ``False`` to drop the item from the new

1114 structure. *visit* is called after *enter*, on the new parent.

1115

1116 The *visit* function is called for every item in root,

1117 including duplicate items. For traversable values, it is

1118 called on the new parent object, after all its children

1119 have been visited. The default visit behavior simply

1120 returns the key-value pair unmodified.

1121 enter (callable): This function controls which items in *root*

1122 are traversed. It accepts the same arguments as *visit*: the

1123 path, the key, and the value of the current item. It returns a

1124 pair of the blank new parent, and an iterator over the items

1125 which should be visited. If ``False`` is returned instead of

1126 an iterator, the value will not be traversed.

1127

1128 The *enter* function is only called once per unique value. The

1129 default enter behavior support mappings, sequences, and

1130 sets. Strings and all other iterables will not be traversed.

1131 exit (callable): This function determines how to handle items

1132 once they have been visited. It gets the same three

1133 arguments as the other functions -- *path*, *key*, *value*

1134 -- plus two more: the blank new parent object returned

1135 from *enter*, and a list of the new items, as remapped by

1136 *visit*.

1137

1138 Like *enter*, the *exit* function is only called once per

1139 unique value. The default exit behavior is to simply add

1140 all new items to the new parent, e.g., using

1141 :meth:`list.extend` and :meth:`dict.update` to add to the

1142 new parent. Immutable objects, such as a :class:`tuple` or

1143 :class:`namedtuple`, must be recreated from scratch, but

1144 use the same type as the new parent passed back from the

1145 *enter* function.

1146 reraise_visit (bool): A pragmatic convenience for the *visit*

1147 callable. When set to ``False``, remap ignores any errors

1148 raised by the *visit* callback. Items causing exceptions

1149 are kept. See examples for more details.

1150

1151 remap is designed to cover the majority of cases with just the

1152 *visit* callable. While passing in multiple callables is very

1153 empowering, remap is designed so very few cases should require

1154 passing more than one function.

1155

1156 When passing *enter* and *exit*, it's common and easiest to build

1157 on the default behavior. Simply add ``from boltons.iterutils import

1158 default_enter`` (or ``default_exit``), and have your enter/exit

1159 function call the default behavior before or after your custom

1160 logic. See `this example`_.

1161

1162 Duplicate and self-referential objects (aka reference loops) are

1163 automatically handled internally, `as shown here`_.

1164

1165 .. _this example: http://sedimental.org/remap.html#sort_all_lists

1166 .. _as shown here: http://sedimental.org/remap.html#corner_cases

1167

1168 """

1169 # TODO: improve argument formatting in sphinx doc

1170 # TODO: enter() return (False, items) to continue traverse but cancel copy?

1171 if not callable(visit):

1172 raise TypeError('visit expected callable, not: %r' % visit)

1173 if not callable(enter):

1174 raise TypeError('enter expected callable, not: %r' % enter)

1175 if not callable(exit):

1176 raise TypeError('exit expected callable, not: %r' % exit)

1177 reraise_visit = kwargs.pop('reraise_visit', True)

1178 if kwargs:

1179 raise TypeError('unexpected keyword arguments: %r' % kwargs.keys())

1180

1181 path, registry, stack = (), {}, [(None, root)]

1182 new_items_stack = []

1183 while stack:

1184 key, value = stack.pop()

1185 id_value = id(value)

1186 if key is _REMAP_EXIT:

1187 key, new_parent, old_parent = value

1188 id_value = id(old_parent)

1189 path, new_items = new_items_stack.pop()

1190 value = exit(path, key, old_parent, new_parent, new_items)

1191 registry[id_value] = value

1192 if not new_items_stack:

1193 continue

1194 elif id_value in registry:

1195 value = registry[id_value]

1196 else:

1197 res = enter(path, key, value)

1198 try:

1199 new_parent, new_items = res

1200 except TypeError:

1201 # TODO: handle False?

1202 raise TypeError('enter should return a tuple of (new_parent,'

1203 ' items_iterator), not: %r' % res)

1204 if new_items is not False:

1205 # traverse unless False is explicitly passed

1206 registry[id_value] = new_parent

1207 new_items_stack.append((path, []))

1208 if value is not root:

1209 path += (key,)

1210 stack.append((_REMAP_EXIT, (key, new_parent, value)))

1211 if new_items:

1212 stack.extend(reversed(list(new_items)))

1213 continue

1214 if visit is _orig_default_visit:

1215 # avoid function call overhead by inlining identity operation

1216 visited_item = (key, value)

1217 else:

1218 try:

1219 visited_item = visit(path, key, value)

1220 except Exception:

1221 if reraise_visit:

1222 raise

1223 visited_item = True

1224 if visited_item is False:

1225 continue # drop

1226 elif visited_item is True:

1227 visited_item = (key, value)

1228 # TODO: typecheck?

1229 # raise TypeError('expected (key, value) from visit(),'

1230 # ' not: %r' % visited_item)

1231 try:

1232 new_items_stack[-1][1].append(visited_item)

1233 except IndexError:

1234 raise TypeError('expected remappable root, not: %r' % root)

1235 return value

1236

1237

1238class PathAccessError(KeyError, IndexError, TypeError):

1239 """An amalgamation of KeyError, IndexError, and TypeError,

1240 representing what can occur when looking up a path in a nested

1241 object.

1242 """

1243 def __init__(self, exc, seg, path):

1244 self.exc = exc

1245 self.seg = seg

1246 self.path = path

1247

1248 def __repr__(self):

1249 cn = self.__class__.__name__

1250 return '%s(%r, %r, %r)' % (cn, self.exc, self.seg, self.path)

1251

1252 def __str__(self):

1253 return ('could not access %r from path %r, got error: %r'

1254 % (self.seg, self.path, self.exc))

1255

1256

1257def get_path(root, path, default=_UNSET):

1258 """Retrieve a value from a nested object via a tuple representing the

1259 lookup path.

1260

1261 >>> root = {'a': {'b': {'c': [[1], [2], [3]]}}}

1262 >>> get_path(root, ('a', 'b', 'c', 2, 0))

1263 3

1264

1265 The path format is intentionally consistent with that of

1266 :func:`remap`.

1267

1268 One of get_path's chief aims is improved error messaging. EAFP is

1269 great, but the error messages are not.

1270

1271 For instance, ``root['a']['b']['c'][2][1]`` gives back

1272 ``IndexError: list index out of range``

1273

1274 What went out of range where? get_path currently raises

1275 ``PathAccessError: could not access 2 from path ('a', 'b', 'c', 2,

1276 1), got error: IndexError('list index out of range',)``, a

1277 subclass of IndexError and KeyError.

1278

1279 You can also pass a default that covers the entire operation,

1280 should the lookup fail at any level.

1281

1282 Args:

1283 root: The target nesting of dictionaries, lists, or other

1284 objects supporting ``__getitem__``.

1285 path (tuple): A list of strings and integers to be successively

1286 looked up within *root*.

1287 default: The value to be returned should any

1288 ``PathAccessError`` exceptions be raised.

1289 """

1290 if isinstance(path, basestring):

1291 path = path.split('.')

1292 cur = root

1293 try:

1294 for seg in path:

1295 try:

1296 cur = cur[seg]

1297 except (KeyError, IndexError) as exc:

1298 raise PathAccessError(exc, seg, path)

1299 except TypeError as exc:

1300 # either string index in a list, or a parent that

1301 # doesn't support indexing

1302 try:

1303 seg = int(seg)

1304 cur = cur[seg]

1305 except (ValueError, KeyError, IndexError, TypeError):

1306 if not is_iterable(cur):

1307 exc = TypeError('%r object is not indexable'

1308 % type(cur).__name__)

1309 raise PathAccessError(exc, seg, path)

1310 except PathAccessError:

1311 if default is _UNSET:

1312 raise

1313 return default

1314 return cur

1315

1316

1317def research(root, query=lambda p, k, v: True, reraise=False):

1318 """The :func:`research` function uses :func:`remap` to recurse over

1319 any data nested in *root*, and find values which match a given

1320 criterion, specified by the *query* callable.

1321

1322 Results are returned as a list of ``(path, value)`` pairs. The

1323 paths are tuples in the same format accepted by

1324 :func:`get_path`. This can be useful for comparing values nested

1325 in two or more different structures.

1326

1327 Here's a simple example that finds all integers:

1328

1329 >>> root = {'a': {'b': 1, 'c': (2, 'd', 3)}, 'e': None}

1330 >>> res = research(root, query=lambda p, k, v: isinstance(v, int))

1331 >>> print(sorted(res))

1332 [(('a', 'b'), 1), (('a', 'c', 0), 2), (('a', 'c', 2), 3)]

1333

1334 Note how *query* follows the same, familiar ``path, key, value``

1335 signature as the ``visit`` and ``enter`` functions on

1336 :func:`remap`, and returns a :class:`bool`.

1337

1338 Args:

1339 root: The target object to search. Supports the same types of

1340 objects as :func:`remap`, including :class:`list`,

1341 :class:`tuple`, :class:`dict`, and :class:`set`.

1342 query (callable): The function called on every object to

1343 determine whether to include it in the search results. The

1344 callable must accept three arguments, *path*, *key*, and

1345 *value*, commonly abbreviated *p*, *k*, and *v*, same as

1346 *enter* and *visit* from :func:`remap`.

1347 reraise (bool): Whether to reraise exceptions raised by *query*

1348 or to simply drop the result that caused the error.

1349

1350

1351 With :func:`research` it's easy to inspect the details of a data

1352 structure, like finding values that are at a certain depth (using

1353 ``len(p)``) and much more. If more advanced functionality is

1354 needed, check out the code and make your own :func:`remap`

1355 wrapper, and consider `submitting a patch`_!

1356

1357 .. _submitting a patch: https://github.com/mahmoud/boltons/pulls

1358 """

1359 ret = []

1360

1361 if not callable(query):

1362 raise TypeError('query expected callable, not: %r' % query)

1363

1364 def enter(path, key, value):

1365 try:

1366 if query(path, key, value):

1367 ret.append((path + (key,), value))

1368 except Exception:

1369 if reraise:

1370 raise

1371 return default_enter(path, key, value)

1372

1373 remap(root, enter=enter)

1374 return ret

1375

1376

1377# TODO: recollect()

1378# TODO: refilter()

1379# TODO: reiter()

1380

1381

1382# GUID iterators: 10x faster and somewhat more compact than uuid.

1383

1384class GUIDerator(object):

1385 """The GUIDerator is an iterator that yields a globally-unique

1386 identifier (GUID) on every iteration. The GUIDs produced are

1387 hexadecimal strings.

1388

1389 Testing shows it to be around 12x faster than the uuid module. By

1390 default it is also more compact, partly due to its default 96-bit

1391 (24-hexdigit) length. 96 bits of randomness means that there is a

1392 1 in 2 ^ 32 chance of collision after 2 ^ 64 iterations. If more

1393 or less uniqueness is desired, the *size* argument can be adjusted

1394 accordingly.

1395

1396 Args:

1397 size (int): character length of the GUID, defaults to 24. Lengths

1398 between 20 and 36 are considered valid.

1399

1400 The GUIDerator has built-in fork protection that causes it to

1401 detect a fork on next iteration and reseed accordingly.

1402

1403 """

1404 def __init__(self, size=24):

1405 self.size = size

1406 if size < 20 or size > 36:

1407 raise ValueError('expected 20 < size <= 36')

1408 import hashlib

1409 self._sha1 = hashlib.sha1

1410 self.count = itertools.count()

1411 self.reseed()

1412

1413 def reseed(self):

1414 import socket

1415 self.pid = os.getpid()

1416 self.salt = '-'.join([str(self.pid),

1417 socket.gethostname() or b'<nohostname>',

1418 str(time.time()),

1419 codecs.encode(os.urandom(6),

1420 'hex_codec').decode('ascii')])

1421 # that codecs trick is the best/only way to get a bytes to

1422 # hexbytes in py2/3

1423 return

1424

1425 def __iter__(self):

1426 return self

1427

1428 if _IS_PY3:

1429 def __next__(self):

1430 if os.getpid() != self.pid:

1431 self.reseed()

1432 target_bytes = (self.salt + str(next(self.count))).encode('utf8')

1433 hash_text = self._sha1(target_bytes).hexdigest()[:self.size]

1434 return hash_text

1435 else:

1436 def __next__(self):

1437 if os.getpid() != self.pid:

1438 self.reseed()

1439 return self._sha1(self.salt +

1440 str(next(self.count))).hexdigest()[:self.size]

1441

1442 next = __next__

1443

1444

1445class SequentialGUIDerator(GUIDerator):

1446 """Much like the standard GUIDerator, the SequentialGUIDerator is an

1447 iterator that yields a globally-unique identifier (GUID) on every

1448 iteration. The GUIDs produced are hexadecimal strings.

1449

1450 The SequentialGUIDerator differs in that it picks a starting GUID

1451 value and increments every iteration. This yields GUIDs which are

1452 of course unique, but also ordered and lexicographically sortable.

1453

1454 The SequentialGUIDerator is around 50% faster than the normal

1455 GUIDerator, making it almost 20x as fast as the built-in uuid

1456 module. By default it is also more compact, partly due to its

1457 96-bit (24-hexdigit) default length. 96 bits of randomness means that

1458 there is a 1 in 2 ^ 32 chance of collision after 2 ^ 64

1459 iterations. If more or less uniqueness is desired, the *size*

1460 argument can be adjusted accordingly.

1461

1462 Args:

1463 size (int): character length of the GUID, defaults to 24.

1464

1465 Note that with SequentialGUIDerator there is a chance of GUIDs

1466 growing larger than the size configured. The SequentialGUIDerator

1467 has built-in fork protection that causes it to detect a fork on

1468 next iteration and reseed accordingly.

1469

1470 """

1471

1472 if _IS_PY3:

1473 def reseed(self):

1474 super(SequentialGUIDerator, self).reseed()

1475 start_str = self._sha1(self.salt.encode('utf8')).hexdigest()

1476 self.start = int(start_str[:self.size], 16)

1477 self.start |= (1 << ((self.size * 4) - 2))

1478 else:

1479 def reseed(self):

1480 super(SequentialGUIDerator, self).reseed()

1481 start_str = self._sha1(self.salt).hexdigest()

1482 self.start = int(start_str[:self.size], 16)

1483 self.start |= (1 << ((self.size * 4) - 2))

1484

1485 def __next__(self):

1486 if os.getpid() != self.pid:

1487 self.reseed()

1488 return '%x' % (next(self.count) + self.start)

1489

1490 next = __next__

1491

1492

1493guid_iter = GUIDerator()

1494seq_guid_iter = SequentialGUIDerator()

1495

1496

1497def soft_sorted(iterable, first=None, last=None, key=None, reverse=False):

1498 """For when you care about the order of some elements, but not about

1499 others.

1500

1501 Use this to float to the top and/or sink to the bottom a specific

1502 ordering, while sorting the rest of the elements according to

1503 normal :func:`sorted` rules.

1504

1505 >>> soft_sorted(['two', 'b', 'one', 'a'], first=['one', 'two'])

1506 ['one', 'two', 'a', 'b']

1507 >>> soft_sorted(range(7), first=[6, 15], last=[2, 4], reverse=True)

1508 [6, 5, 3, 1, 0, 2, 4]

1509 >>> import string

1510 >>> ''.join(soft_sorted(string.hexdigits, first='za1', last='b', key=str.lower))

1511 'aA1023456789cCdDeEfFbB'

1512

1513 Args:

1514 iterable (list): A list or other iterable to sort.

1515 first (list): A sequence to enforce for elements which should

1516 appear at the beginning of the returned list.

1517 last (list): A sequence to enforce for elements which should

1518 appear at the end of the returned list.

1519 key (callable): Callable used to generate a comparable key for

1520 each item to be sorted, same as the key in

1521 :func:`sorted`. Note that entries in *first* and *last*

1522 should be the keys for the items. Defaults to

1523 passthrough/the identity function.

1524 reverse (bool): Whether or not elements not explicitly ordered

1525 by *first* and *last* should be in reverse order or not.

1526

1527 Returns a new list in sorted order.

1528 """

1529 first = first or []

1530 last = last or []

1531 key = key or (lambda x: x)

1532 seq = list(iterable)

1533 other = [x for x in seq if not ((first and key(x) in first) or (last and key(x) in last))]

1534 other.sort(key=key, reverse=reverse)

1535

1536 if first:

1537 first = sorted([x for x in seq if key(x) in first], key=lambda x: first.index(key(x)))

1538 if last:

1539 last = sorted([x for x in seq if key(x) in last], key=lambda x: last.index(key(x)))

1540 return first + other + last

1541

1542

1543def untyped_sorted(iterable, key=None, reverse=False):

1544 """A version of :func:`sorted` which will happily sort an iterable of

1545 heterogeneous types and return a new list, similar to legacy Python's

1546 behavior.

1547

1548 >>> untyped_sorted(['abc', 2.0, 1, 2, 'def'])

1549 [1, 2.0, 2, 'abc', 'def']

1550

1551 Note how mutually orderable types are sorted as expected, as in

1552 the case of the integers and floats above.

1553

1554 .. note::

1555

1556 Results may vary across Python versions and builds, but the

1557 function will produce a sorted list, except in the case of

1558 explicitly unorderable objects.

1559

1560 """

1561 class _Wrapper(object):

1562 slots = ('obj',)

1563

1564 def __init__(self, obj):

1565 self.obj = obj

1566

1567 def __lt__(self, other):

1568 obj = key(self.obj) if key is not None else self.obj

1569 other = key(other.obj) if key is not None else other.obj

1570 try:

1571 ret = obj < other

1572 except TypeError:

1573 ret = ((type(obj).__name__, id(type(obj)), obj)

1574 < (type(other).__name__, id(type(other)), other))

1575 return ret

1576

1577 if key is not None and not callable(key):

1578 raise TypeError('expected function or callable object for key, not: %r'

1579 % key)

1580

1581 return sorted(iterable, key=_Wrapper, reverse=reverse)

1582

1583"""

1584May actually be faster to do an isinstance check for a str path

1585

1586$ python -m timeit -s "x = [1]" "x[0]"

158710000000 loops, best of 3: 0.0207 usec per loop

1588$ python -m timeit -s "x = [1]" "try: x[0] \nexcept: pass"

158910000000 loops, best of 3: 0.029 usec per loop

1590$ python -m timeit -s "x = [1]" "try: x[1] \nexcept: pass"

15911000000 loops, best of 3: 0.315 usec per loop

1592# setting up try/except is fast, only around 0.01us

1593# actually triggering the exception takes almost 10x as long

1594

1595$ python -m timeit -s "x = [1]" "isinstance(x, basestring)"

159610000000 loops, best of 3: 0.141 usec per loop

1597$ python -m timeit -s "x = [1]" "isinstance(x, str)"

159810000000 loops, best of 3: 0.131 usec per loop

1599$ python -m timeit -s "x = [1]" "try: x.split('.')\n except: pass"

16001000000 loops, best of 3: 0.443 usec per loop

1601$ python -m timeit -s "x = [1]" "try: x.split('.') \nexcept AttributeError: pass"

16021000000 loops, best of 3: 0.544 usec per loop

1603"""