Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/boltons/iterutils.py: 17%

3# Redistribution and use in source and binary forms, with or without

4# modification, are permitted provided that the following conditions are

5# met:

7# * Redistributions of source code must retain the above copyright

8# notice, this list of conditions and the following disclaimer.

10# * Redistributions in binary form must reproduce the above

11# copyright notice, this list of conditions and the following

12# disclaimer in the documentation and/or other materials provided

13# with the distribution.

14#

15# * The names of the contributors may not be used to endorse or

16# promote products derived from this software without specific

17# prior written permission.

18#

19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

31""":mod:`itertools` is full of great examples of Python generator

32usage. However, there are still some critical gaps. ``iterutils``

33fills many of those gaps with featureful, tested, and Pythonic

34solutions.

36Many of the functions below have two versions, one which

37returns an iterator (denoted by the ``*_iter`` naming pattern), and a

38shorter-named convenience form that returns a list. Some of the

39following are based on examples in itertools docs.

40"""

42import os

43import math

44import time

45import codecs

46import random

47import itertools

48from itertools import zip_longest

49from collections.abc import Mapping, Sequence, Set, ItemsView, Iterable

52try:

53 from .typeutils import make_sentinel

54 _UNSET = make_sentinel('_UNSET')

55 _REMAP_EXIT = make_sentinel('_REMAP_EXIT')

56except ImportError:

57 _REMAP_EXIT = object()

58 _UNSET = object()

61def is_iterable(obj):

62 """Similar in nature to :func:`callable`, ``is_iterable`` returns

63 ``True`` if an object is `iterable`_, ``False`` if not.

65 >>> is_iterable([])

66 True

67 >>> is_iterable(object())

68 False

70 .. _iterable: https://docs.python.org/2/glossary.html#term-iterable

71 """

72 try:

73 iter(obj)

74 except TypeError:

75 return False

76 return True

79def is_scalar(obj):

80 """A near-mirror of :func:`is_iterable`. Returns ``False`` if an

81 object is an iterable container type. Strings are considered

82 scalar as well, because strings are more often treated as whole

83 values as opposed to iterables of 1-character substrings.

85 >>> is_scalar(object())

86 True

87 >>> is_scalar(range(10))

88 False

89 >>> is_scalar('hello')

90 True

91 """

92 return not is_iterable(obj) or isinstance(obj, (str, bytes))

95def is_collection(obj):

96 """The opposite of :func:`is_scalar`. Returns ``True`` if an object

97 is an iterable other than a string.

99 >>> is_collection(object())

100 False

101 >>> is_collection(range(10))

102 True

103 >>> is_collection('hello')

104 False

105 """

106 return is_iterable(obj) and not isinstance(obj, (str, bytes))

107

108

109def split(src, sep=None, maxsplit=None):

110 """Splits an iterable based on a separator. Like :meth:`str.split`,

111 but for all iterables. Returns a list of lists.

112

113 >>> split(['hi', 'hello', None, None, 'sup', None, 'soap', None])

114 [['hi', 'hello'], ['sup'], ['soap']]

115

116 See :func:`split_iter` docs for more info.

117 """

118 return list(split_iter(src, sep, maxsplit))

119

120

121def split_iter(src, sep=None, maxsplit=None):

122 """Splits an iterable based on a separator, *sep*, a max of

123 *maxsplit* times (no max by default). *sep* can be:

124

125 * a single value

126 * an iterable of separators

127 * a single-argument callable that returns True when a separator is

128 encountered

129

130 ``split_iter()`` yields lists of non-separator values. A separator will

131 never appear in the output.

132

133 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None]))

134 [['hi', 'hello'], ['sup'], ['soap']]

135

136 Note that ``split_iter`` is based on :func:`str.split`, so if

137 *sep* is ``None``, ``split()`` **groups** separators. If empty lists

138 are desired between two contiguous ``None`` values, simply use

139 ``sep=[None]``:

140

141 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None]))

142 [['hi', 'hello'], ['sup']]

143 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None], sep=[None]))

144 [['hi', 'hello'], [], ['sup'], []]

145

146 Using a callable separator:

147

148 >>> falsy_sep = lambda x: not x

149 >>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep))

150 [['hi', 'hello'], [], ['sup'], []]

151

152 See :func:`split` for a list-returning version.

153

154 """

155 if not is_iterable(src):

156 raise TypeError('expected an iterable')

157

158 if maxsplit is not None:

159 maxsplit = int(maxsplit)

160 if maxsplit == 0:

161 yield [src]

162 return

163

164 if callable(sep):

165 sep_func = sep

166 elif not is_scalar(sep):

167 sep = frozenset(sep)

168 sep_func = lambda x: x in sep

169 else:

170 sep_func = lambda x: x == sep

171

172 cur_group = []

173 split_count = 0

174 for s in src:

175 if maxsplit is not None and split_count >= maxsplit:

176 sep_func = lambda x: False

177 if sep_func(s):

178 if sep is None and not cur_group:

179 # If sep is none, str.split() "groups" separators

180 # check the str.split() docs for more info

181 continue

182 split_count += 1

183 yield cur_group

184 cur_group = []

185 else:

186 cur_group.append(s)

187

188 if cur_group or sep is not None:

189 yield cur_group

190 return

191

192

193def lstrip(iterable, strip_value=None):

194 """Strips values from the beginning of an iterable. Stripped items will

195 match the value of the argument strip_value. Functionality is analogous

196 to that of the method str.lstrip. Returns a list.

197

198 >>> lstrip(['Foo', 'Bar', 'Bam'], 'Foo')

199 ['Bar', 'Bam']

200

201 """

202 return list(lstrip_iter(iterable, strip_value))

203

204

205def lstrip_iter(iterable, strip_value=None):

206 """Strips values from the beginning of an iterable. Stripped items will

207 match the value of the argument strip_value. Functionality is analogous

208 to that of the method str.lstrip. Returns a generator.

209

210 >>> list(lstrip_iter(['Foo', 'Bar', 'Bam'], 'Foo'))

211 ['Bar', 'Bam']

212

213 """

214 iterator = iter(iterable)

215 for i in iterator:

216 if i != strip_value:

217 yield i

218 break

219 for i in iterator:

220 yield i

221

222

223def rstrip(iterable, strip_value=None):

224 """Strips values from the end of an iterable. Stripped items will

225 match the value of the argument strip_value. Functionality is analogous

226 to that of the method str.rstrip. Returns a list.

227

228 >>> rstrip(['Foo', 'Bar', 'Bam'], 'Bam')

229 ['Foo', 'Bar']

230

231 """

232 return list(rstrip_iter(iterable,strip_value))

233

234

235def rstrip_iter(iterable, strip_value=None):

236 """Strips values from the end of an iterable. Stripped items will

237 match the value of the argument strip_value. Functionality is analogous

238 to that of the method str.rstrip. Returns a generator.

239

240 >>> list(rstrip_iter(['Foo', 'Bar', 'Bam'], 'Bam'))

241 ['Foo', 'Bar']

242

243 """

244 iterator = iter(iterable)

245 for i in iterator:

246 if i == strip_value:

247 cache = list()

248 cache.append(i)

249 broken = False

250 for i in iterator:

251 if i == strip_value:

252 cache.append(i)

253 else:

254 broken = True

255 break

256 if not broken: # Return to caller here because the end of the

257 return # iterator has been reached

258 yield from cache

259 yield i

260

261

262def strip(iterable, strip_value=None):

263 """Strips values from the beginning and end of an iterable. Stripped items

264 will match the value of the argument strip_value. Functionality is

265 analogous to that of the method str.strip. Returns a list.

266

267 >>> strip(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu')

268 ['Foo', 'Bar', 'Bam']

269

270 """

271 return list(strip_iter(iterable,strip_value))

272

273

274def strip_iter(iterable,strip_value=None):

275 """Strips values from the beginning and end of an iterable. Stripped items

276 will match the value of the argument strip_value. Functionality is

277 analogous to that of the method str.strip. Returns a generator.

278

279 >>> list(strip_iter(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu'))

280 ['Foo', 'Bar', 'Bam']

281

282 """

283 return rstrip_iter(lstrip_iter(iterable,strip_value),strip_value)

284

285

286def chunked(src, size, count=None, **kw):

287 """Returns a list of *count* chunks, each with *size* elements,

288 generated from iterable *src*. If *src* is not evenly divisible by

289 *size*, the final chunk will have fewer than *size* elements.

290 Provide the *fill* keyword argument to provide a pad value and

291 enable padding, otherwise no padding will take place.

292

293 >>> chunked(range(10), 3)

294 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]

295 >>> chunked(range(10), 3, fill=None)

296 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]

297 >>> chunked(range(10), 3, count=2)

298 [[0, 1, 2], [3, 4, 5]]

299

300 See :func:`chunked_iter` for more info.

301 """

302 chunk_iter = chunked_iter(src, size, **kw)

303 if count is None:

304 return list(chunk_iter)

305 else:

306 return list(itertools.islice(chunk_iter, count))

307

308

309def _validate_positive_int(value, name, strictly_positive=True):

310 value = int(value)

311 if value < 0 or (strictly_positive and value == 0):

312 raise ValueError('expected a positive integer ' + name)

313 return value

314

315

316def chunked_iter(src, size, **kw):

317 """Generates *size*-sized chunks from *src* iterable. Unless the

318 optional *fill* keyword argument is provided, iterables not evenly

319 divisible by *size* will have a final chunk that is smaller than

320 *size*.

321

322 >>> list(chunked_iter(range(10), 3))

323 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]

324 >>> list(chunked_iter(range(10), 3, fill=None))

325 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]

326

327 Note that ``fill=None`` in fact uses ``None`` as the fill value.

328 """

329 # TODO: add count kwarg?

330 if not is_iterable(src):

331 raise TypeError('expected an iterable')

332 size = _validate_positive_int(size, 'chunk size')

333 do_fill = True

334 try:

335 fill_val = kw.pop('fill')

336 except KeyError:

337 do_fill = False

338 fill_val = None

339 if kw:

340 raise ValueError('got unexpected keyword arguments: %r' % kw.keys())

341 if not src:

342 return

343 postprocess = lambda chk: chk

344 if isinstance(src, (str, bytes)):

345 postprocess = lambda chk, _sep=type(src)(): _sep.join(chk)

346 if isinstance(src, bytes):

347 postprocess = lambda chk: bytes(chk)

348 src_iter = iter(src)

349 while True:

350 cur_chunk = list(itertools.islice(src_iter, size))

351 if not cur_chunk:

352 break

353 lc = len(cur_chunk)

354 if lc < size and do_fill:

355 cur_chunk[lc:] = [fill_val] * (size - lc)

356 yield postprocess(cur_chunk)

357 return

358

359

360def chunk_ranges(input_size, chunk_size, input_offset=0, overlap_size=0, align=False):

361 """Generates *chunk_size*-sized chunk ranges for an input with length *input_size*.

362 Optionally, a start of the input can be set via *input_offset*, and

363 and overlap between the chunks may be specified via *overlap_size*.

364 Also, if *align* is set to *True*, any items with *i % (chunk_size-overlap_size) == 0*

365 are always at the beginning of the chunk.

366

367 Returns an iterator of (start, end) tuples, one tuple per chunk.

368

369 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5))

370 [(10, 15), (15, 20)]

371 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=1))

372 [(10, 15), (14, 19), (18, 20)]

373 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=2))

374 [(10, 15), (13, 18), (16, 20)]

375

376 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=False))

377 [(4, 9), (9, 14), (14, 19)]

378 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=True))

379 [(4, 5), (5, 10), (10, 15), (15, 19)]

380

381 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=False))

382 [(2, 7), (6, 11), (10, 15), (14, 17)]

383 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=True))

384 [(2, 5), (4, 9), (8, 13), (12, 17)]

385 >>> list(chunk_ranges(input_offset=3, input_size=15, chunk_size=5, overlap_size=1, align=True))

386 [(3, 5), (4, 9), (8, 13), (12, 17), (16, 18)]

387 """

388 input_size = _validate_positive_int(input_size, 'input_size', strictly_positive=False)

389 chunk_size = _validate_positive_int(chunk_size, 'chunk_size')

390 input_offset = _validate_positive_int(input_offset, 'input_offset', strictly_positive=False)

391 overlap_size = _validate_positive_int(overlap_size, 'overlap_size', strictly_positive=False)

392

393 input_stop = input_offset + input_size

394

395 if align:

396 initial_chunk_len = chunk_size - input_offset % (chunk_size - overlap_size)

397 if initial_chunk_len != overlap_size:

398 yield (input_offset, min(input_offset + initial_chunk_len, input_stop))

399 if input_offset + initial_chunk_len >= input_stop:

400 return

401 input_offset = input_offset + initial_chunk_len - overlap_size

402

403 for i in range(input_offset, input_stop, chunk_size - overlap_size):

404 yield (i, min(i + chunk_size, input_stop))

405

406 if i + chunk_size >= input_stop:

407 return

408

409

410def pairwise(src, end=_UNSET):

411 """Convenience function for calling :func:`windowed` on *src*, with

412 *size* set to 2.

413

414 >>> pairwise(range(5))

415 [(0, 1), (1, 2), (2, 3), (3, 4)]

416 >>> pairwise([])

417 []

418

419 Unless *end* is set, the number of pairs is always one less than

420 the number of elements in the iterable passed in, except on an empty input,

421 which will return an empty list.

422

423 With *end* set, a number of pairs equal to the length of *src* is returned,

424 with the last item of the last pair being equal to *end*.

425

426 >>> list(pairwise(range(3), end=None))

427 [(0, 1), (1, 2), (2, None)]

428

429 This way, *end* values can be useful as sentinels to signal the end of the iterable.

430 """

431 return windowed(src, 2, fill=end)

432

433

434def pairwise_iter(src, end=_UNSET):

435 """Convenience function for calling :func:`windowed_iter` on *src*,

436 with *size* set to 2.

437

438 >>> list(pairwise_iter(range(5)))

439 [(0, 1), (1, 2), (2, 3), (3, 4)]

440 >>> list(pairwise_iter([]))

441 []

442

443 Unless *end* is set, the number of pairs is always one less

444 than the number of elements in the iterable passed in,

445 or zero, when *src* is empty.

446

447 With *end* set, a number of pairs equal to the length of *src* is returned,

448 with the last item of the last pair being equal to *end*.

449

450 >>> list(pairwise_iter(range(3), end=None))

451 [(0, 1), (1, 2), (2, None)]

452

453 This way, *end* values can be useful as sentinels to signal the end

454 of the iterable. For infinite iterators, setting *end* has no effect.

455 """

456 return windowed_iter(src, 2, fill=end)

457

458

459def windowed(src, size, fill=_UNSET):

460 """Returns tuples with exactly length *size*. If *fill* is unset

461 and the iterable is too short to make a window of length *size*,

462 no tuples are returned. See :func:`windowed_iter` for more.

463 """

464 return list(windowed_iter(src, size, fill=fill))

465

466

467def windowed_iter(src, size, fill=_UNSET):

468 """Returns tuples with length *size* which represent a sliding

469 window over iterable *src*.

470

471 >>> list(windowed_iter(range(7), 3))

472 [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)]

473

474 If *fill* is unset, and the iterable is too short to make a window

475 of length *size*, then no window tuples are returned.

476

477 >>> list(windowed_iter(range(3), 5))

478 []

479

480 With *fill* set, the iterator always yields a number of windows

481 equal to the length of the *src* iterable.

482

483 >>> windowed(range(4), 3, fill=None)

484 [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]

485

486 This way, *fill* values can be useful to signal the end of the iterable.

487 For infinite iterators, setting *fill* has no effect.

488 """

489 tees = itertools.tee(src, size)

490 if fill is _UNSET:

491 try:

492 for i, t in enumerate(tees):

493 for _ in range(i):

494 next(t)

495 except StopIteration:

496 return zip([])

497 return zip(*tees)

498

499 for i, t in enumerate(tees):

500 for _ in range(i):

501 try:

502 next(t)

503 except StopIteration:

504 continue

505 return zip_longest(*tees, fillvalue=fill)

506

507

508

509def xfrange(stop, start=None, step=1.0):

510 """Same as :func:`frange`, but generator-based instead of returning a

511 list.

512

513 >>> tuple(xfrange(1, 3, step=0.75))

514 (1.0, 1.75, 2.5)

515

516 See :func:`frange` for more details.

517 """

518 if not step:

519 raise ValueError('step must be non-zero')

520 if start is None:

521 start, stop = 0.0, stop * 1.0

522 else:

523 # swap when all args are used

524 stop, start = start * 1.0, stop * 1.0

525 cur = start

526 while cur < stop:

527 yield cur

528 cur += step

529

530

531def frange(stop, start=None, step=1.0):

532 """A :func:`range` clone for float-based ranges.

533

534 >>> frange(5)

535 [0.0, 1.0, 2.0, 3.0, 4.0]

536 >>> frange(6, step=1.25)

537 [0.0, 1.25, 2.5, 3.75, 5.0]

538 >>> frange(100.5, 101.5, 0.25)

539 [100.5, 100.75, 101.0, 101.25]

540 >>> frange(5, 0)

541 []

542 >>> frange(5, 0, step=-1.25)

543 [5.0, 3.75, 2.5, 1.25]

544 """

545 if not step:

546 raise ValueError('step must be non-zero')

547 if start is None:

548 start, stop = 0.0, stop * 1.0

549 else:

550 # swap when all args are used

551 stop, start = start * 1.0, stop * 1.0

552 count = int(math.ceil((stop - start) / step))

553 ret = [None] * count

554 if not ret:

555 return ret

556 ret[0] = start

557 for i in range(1, count):

558 ret[i] = ret[i - 1] + step

559 return ret

560

561

562def backoff(start, stop, count=None, factor=2.0, jitter=False):

563 """Returns a list of geometrically-increasing floating-point numbers,

564 suitable for usage with `exponential backoff`_. Exactly like

565 :func:`backoff_iter`, but without the ``'repeat'`` option for

566 *count*. See :func:`backoff_iter` for more details.

567

568 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff

569

570 >>> backoff(1, 10)

571 [1.0, 2.0, 4.0, 8.0, 10.0]

572 """

573 if count == 'repeat':

574 raise ValueError("'repeat' supported in backoff_iter, not backoff")

575 return list(backoff_iter(start, stop, count=count,

576 factor=factor, jitter=jitter))

577

578

579def backoff_iter(start, stop, count=None, factor=2.0, jitter=False):

580 """Generates a sequence of geometrically-increasing floats, suitable

581 for usage with `exponential backoff`_. Starts with *start*,

582 increasing by *factor* until *stop* is reached, optionally

583 stopping iteration once *count* numbers are yielded. *factor*

584 defaults to 2. In general retrying with properly-configured

585 backoff creates a better-behaved component for a larger service

586 ecosystem.

587

588 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff

589

590 >>> list(backoff_iter(1.0, 10.0, count=5))

591 [1.0, 2.0, 4.0, 8.0, 10.0]

592 >>> list(backoff_iter(1.0, 10.0, count=8))

593 [1.0, 2.0, 4.0, 8.0, 10.0, 10.0, 10.0, 10.0]

594 >>> list(backoff_iter(0.25, 100.0, factor=10))

595 [0.25, 2.5, 25.0, 100.0]

596

597 A simplified usage example:

598

599 .. code-block:: python

600

601 for timeout in backoff_iter(0.25, 5.0):

602 try:

603 res = network_call()

604 break

605 except Exception as e:

606 log(e)

607 time.sleep(timeout)

608

609 An enhancement for large-scale systems would be to add variation,

610 or *jitter*, to timeout values. This is done to avoid a thundering

611 herd on the receiving end of the network call.

612

613 Finally, for *count*, the special value ``'repeat'`` can be passed to

614 continue yielding indefinitely.

615

616 Args:

617

618 start (float): Positive number for baseline.

619 stop (float): Positive number for maximum.

620 count (int): Number of steps before stopping

621 iteration. Defaults to the number of steps between *start* and

622 *stop*. Pass the string, `'repeat'`, to continue iteration

623 indefinitely.

624 factor (float): Rate of exponential increase. Defaults to `2.0`,

625 e.g., `[1, 2, 4, 8, 16]`.

626 jitter (float): A factor between `-1.0` and `1.0`, used to

627 uniformly randomize and thus spread out timeouts in a distributed

628 system, avoiding rhythm effects. Positive values use the base

629 backoff curve as a maximum, negative values use the curve as a

630 minimum. Set to 1.0 or `True` for a jitter approximating

631 Ethernet's time-tested backoff solution. Defaults to `False`.

632

633 """

634 start = float(start)

635 stop = float(stop)

636 factor = float(factor)

637 if start < 0.0:

638 raise ValueError('expected start >= 0, not %r' % start)

639 if factor < 1.0:

640 raise ValueError('expected factor >= 1.0, not %r' % factor)

641 if stop == 0.0:

642 raise ValueError('expected stop >= 0')

643 if stop < start:

644 raise ValueError('expected stop >= start, not %r' % stop)

645 if count is None:

646 denom = start if start else 1

647 count = 1 + math.ceil(math.log(stop/denom, factor))

648 count = count if start else count + 1

649 if count != 'repeat' and count < 0:

650 raise ValueError('count must be positive or "repeat", not %r' % count)

651 if jitter:

652 jitter = float(jitter)

653 if not (-1.0 <= jitter <= 1.0):

654 raise ValueError('expected jitter -1 <= j <= 1, not: %r' % jitter)

655

656 cur, i = start, 0

657 while count == 'repeat' or i < count:

658 if not jitter:

659 cur_ret = cur

660 elif jitter:

661 cur_ret = cur - (cur * jitter * random.random())

662 yield cur_ret

663 i += 1

664 if cur == 0:

665 cur = 1

666 elif cur < stop:

667 cur *= factor

668 if cur > stop:

669 cur = stop

670 return

671

672

673def bucketize(src, key=bool, value_transform=None, key_filter=None):

674 """Group values in the *src* iterable by the value returned by *key*.

675

676 >>> bucketize(range(5))

677 {False: [0], True: [1, 2, 3, 4]}

678 >>> is_odd = lambda x: x % 2 == 1

679 >>> bucketize(range(5), is_odd)

680 {False: [0, 2, 4], True: [1, 3]}

681

682 *key* is :class:`bool` by default, but can either be a callable or a string or a list

683 if it is a string, it is the name of the attribute on which to bucketize objects.

684

685 >>> bucketize([1+1j, 2+2j, 1, 2], key='real')

686 {1.0: [(1+1j), 1], 2.0: [(2+2j), 2]}

687

688 if *key* is a list, it contains the buckets where to put each object

689

690 >>> bucketize([1,2,365,4,98],key=[0,1,2,0,2])

691 {0: [1, 4], 1: [2], 2: [365, 98]}

692

693

694 Value lists are not deduplicated:

695

696 >>> bucketize([None, None, None, 'hello'])

697 {False: [None, None, None], True: ['hello']}

698

699 Bucketize into more than 3 groups

700

701 >>> bucketize(range(10), lambda x: x % 3)

702 {0: [0, 3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]}

703

704 ``bucketize`` has a couple of advanced options useful in certain

705 cases. *value_transform* can be used to modify values as they are

706 added to buckets, and *key_filter* will allow excluding certain

707 buckets from being collected.

708

709 >>> bucketize(range(5), value_transform=lambda x: x*x)

710 {False: [0], True: [1, 4, 9, 16]}

711

712 >>> bucketize(range(10), key=lambda x: x % 3, key_filter=lambda k: k % 3 != 1)

713 {0: [0, 3, 6, 9], 2: [2, 5, 8]}

714

715 Note in some of these examples there were at most two keys, ``True`` and

716 ``False``, and each key present has a list with at least one

717 item. See :func:`partition` for a version specialized for binary

718 use cases.

719

720 """

721 if not is_iterable(src):

722 raise TypeError('expected an iterable')

723 elif isinstance(key, list):

724 if len(key) != len(src):

725 raise ValueError("key and src have to be the same length")

726 src = zip(key, src)

727

728 if isinstance(key, str):

729 key_func = lambda x: getattr(x, key, x)

730 elif callable(key):

731 key_func = key

732 elif isinstance(key, list):

733 key_func = lambda x: x[0]

734 else:

735 raise TypeError('expected key to be callable or a string or a list')

736

737 if value_transform is None:

738 value_transform = lambda x: x

739 if not callable(value_transform):

740 raise TypeError('expected callable value transform function')

741 if isinstance(key, list):

742 f = value_transform

743 value_transform=lambda x: f(x[1])

744

745 ret = {}

746 for val in src:

747 key_of_val = key_func(val)

748 if key_filter is None or key_filter(key_of_val):

749 ret.setdefault(key_of_val, []).append(value_transform(val))

750 return ret

751

752

753def partition(src, key=bool):

754 """No relation to :meth:`str.partition`, ``partition`` is like

755 :func:`bucketize`, but for added convenience returns a tuple of

756 ``(truthy_values, falsy_values)``.

757

758 >>> nonempty, empty = partition(['', '', 'hi', '', 'bye'])

759 >>> nonempty

760 ['hi', 'bye']

761

762 *key* defaults to :class:`bool`, but can be carefully overridden to

763 use either a function that returns either ``True`` or ``False`` or

764 a string name of the attribute on which to partition objects.

765

766 >>> import string

767 >>> is_digit = lambda x: x in string.digits

768 >>> decimal_digits, hexletters = partition(string.hexdigits, is_digit)

769 >>> ''.join(decimal_digits), ''.join(hexletters)

770 ('0123456789', 'abcdefABCDEF')

771 """

772 bucketized = bucketize(src, key)

773 return bucketized.get(True, []), bucketized.get(False, [])

774

775

776def unique(src, key=None):

777 """``unique()`` returns a list of unique values, as determined by

778 *key*, in the order they first appeared in the input iterable,

779 *src*.

780

781 >>> ones_n_zeros = '11010110001010010101010'

782 >>> ''.join(unique(ones_n_zeros))

783 '10'

784

785 See :func:`unique_iter` docs for more details.

786 """

787 return list(unique_iter(src, key))

788

789

790def unique_iter(src, key=None):

791 """Yield unique elements from the iterable, *src*, based on *key*,

792 in the order in which they first appeared in *src*.

793

794 >>> repetitious = [1, 2, 3] * 10

795 >>> list(unique_iter(repetitious))

796 [1, 2, 3]

797

798 By default, *key* is the object itself, but *key* can either be a

799 callable or, for convenience, a string name of the attribute on

800 which to uniqueify objects, falling back on identity when the

801 attribute is not present.

802

803 >>> pleasantries = ['hi', 'hello', 'ok', 'bye', 'yes']

804 >>> list(unique_iter(pleasantries, key=lambda x: len(x)))

805 ['hi', 'hello', 'bye']

806 """

807 if not is_iterable(src):

808 raise TypeError('expected an iterable, not %r' % type(src))

809 if key is None:

810 key_func = lambda x: x

811 elif callable(key):

812 key_func = key

813 elif isinstance(key, str):

814 key_func = lambda x: getattr(x, key, x)

815 else:

816 raise TypeError('"key" expected a string or callable, not %r' % key)

817 seen = set()

818 for i in src:

819 k = key_func(i)

820 if k not in seen:

821 seen.add(k)

822 yield i

823 return

824

825

826def redundant(src, key=None, groups=False):

827 """The complement of :func:`unique()`.

828

829 By default returns non-unique/duplicate values as a list of the

830 *first* redundant value in *src*. Pass ``groups=True`` to get

831 groups of all values with redundancies, ordered by position of the

832 first redundant value. This is useful in conjunction with some

833 normalizing *key* function.

834

835 >>> redundant([1, 2, 3, 4])

836 []

837 >>> redundant([1, 2, 3, 2, 3, 3, 4])

838 [2, 3]

839 >>> redundant([1, 2, 3, 2, 3, 3, 4], groups=True)

840 [[2, 2], [3, 3, 3]]

841

842 An example using a *key* function to do case-insensitive

843 redundancy detection.

844

845 >>> redundant(['hi', 'Hi', 'HI', 'hello'], key=str.lower)

846 ['Hi']

847 >>> redundant(['hi', 'Hi', 'HI', 'hello'], groups=True, key=str.lower)

848 [['hi', 'Hi', 'HI']]

849

850 *key* should also be used when the values in *src* are not hashable.

851

852 .. note::

853

854 This output of this function is designed for reporting

855 duplicates in contexts when a unique input is desired. Due to

856 the grouped return type, there is no streaming equivalent of

857 this function for the time being.

858

859 """

860 if key is None:

861 pass

862 elif callable(key):

863 key_func = key

864 elif isinstance(key, (str, bytes)):

865 key_func = lambda x: getattr(x, key, x)

866 else:

867 raise TypeError('"key" expected a string or callable, not %r' % key)

868 seen = {} # key to first seen item

869 redundant_order = []

870 redundant_groups = {}

871 for i in src:

872 k = key_func(i) if key else i

873 if k not in seen:

874 seen[k] = i

875 else:

876 if k in redundant_groups:

877 if groups:

878 redundant_groups[k].append(i)

879 else:

880 redundant_order.append(k)

881 redundant_groups[k] = [seen[k], i]

882 if not groups:

883 ret = [redundant_groups[k][1] for k in redundant_order]

884 else:

885 ret = [redundant_groups[k] for k in redundant_order]

886 return ret

887

888

889def one(src, default=None, key=None):

890 """Along the same lines as builtins, :func:`all` and :func:`any`, and

891 similar to :func:`first`, ``one()`` returns the single object in

892 the given iterable *src* that evaluates to ``True``, as determined

893 by callable *key*. If unset, *key* defaults to :class:`bool`. If

894 no such objects are found, *default* is returned. If *default* is

895 not passed, ``None`` is returned.

896

897 If *src* has more than one object that evaluates to ``True``, or

898 if there is no object that fulfills such condition, return

899 *default*. It's like an `XOR`_ over an iterable.

900

901 >>> one((True, False, False))

902 True

903 >>> one((True, False, True))

904 >>> one((0, 0, 'a'))

905 'a'

906 >>> one((0, False, None))

907 >>> one((True, True), default=False)

908 False

909 >>> bool(one(('', 1)))

910 True

911 >>> one((10, 20, 30, 42), key=lambda i: i > 40)

912 42

913

914 See `Martín Gaitán's original repo`_ for further use cases.

915

916 .. _Martín Gaitán's original repo: https://github.com/mgaitan/one

917 .. _XOR: https://en.wikipedia.org/wiki/Exclusive_or

918

919 """

920 ones = list(itertools.islice(filter(key, src), 2))

921 return ones[0] if len(ones) == 1 else default

922

923

924def first(iterable, default=None, key=None):

925 """Return first element of *iterable* that evaluates to ``True``, else

926 return ``None`` or optional *default*. Similar to :func:`one`.

927

928 >>> first([0, False, None, [], (), 42])

929 42

930 >>> first([0, False, None, [], ()]) is None

931 True

932 >>> first([0, False, None, [], ()], default='ohai')

933 'ohai'

934 >>> import re

935 >>> m = first(re.match(regex, 'abc') for regex in ['b.*', 'a(.*)'])

936 >>> m.group(1)

937 'bc'

938

939 The optional *key* argument specifies a one-argument predicate function

940 like that used for *filter()*. The *key* argument, if supplied, should be

941 in keyword form. For example, finding the first even number in an iterable:

942

943 >>> first([1, 1, 3, 4, 5], key=lambda x: x % 2 == 0)

944 4

945

946 Contributed by Hynek Schlawack, author of `the original standalone module`_.

947

948 .. _the original standalone module: https://github.com/hynek/first

949 """

950 return next(filter(key, iterable), default)

951

952

953def flatten_iter(iterable):

954 """``flatten_iter()`` yields all the elements from *iterable* while

955 collapsing any nested iterables.

956

957 >>> nested = [[1, 2], [[3], [4, 5]]]

958 >>> list(flatten_iter(nested))

959 [1, 2, 3, 4, 5]

960 """

961 for item in iterable:

962 if isinstance(item, Iterable) and not isinstance(item, (str, bytes)):

963 yield from flatten_iter(item)

964 else:

965 yield item

966

967def flatten(iterable):

968 """``flatten()`` returns a collapsed list of all the elements from

969 *iterable* while collapsing any nested iterables.

970

971 >>> nested = [[1, 2], [[3], [4, 5]]]

972 >>> flatten(nested)

973 [1, 2, 3, 4, 5]

974 """

975 return list(flatten_iter(iterable))

976

977

978def same(iterable, ref=_UNSET):

979 """``same()`` returns ``True`` when all values in *iterable* are

980 equal to one another, or optionally a reference value,

981 *ref*. Similar to :func:`all` and :func:`any` in that it evaluates

982 an iterable and returns a :class:`bool`. ``same()`` returns

983 ``True`` for empty iterables.

984

985 >>> same([])

986 True

987 >>> same([1])

988 True

989 >>> same(['a', 'a', 'a'])

990 True

991 >>> same(range(20))

992 False

993 >>> same([[], []])

994 True

995 >>> same([[], []], ref='test')

996 False

997

998 """

999 iterator = iter(iterable)

1000 if ref is _UNSET:

1001 ref = next(iterator, ref)

1002 return all(val == ref for val in iterator)

1003

1004

1005def default_visit(path, key, value):

1006 # print('visit(%r, %r, %r)' % (path, key, value))

1007 return key, value

1008

1009# enable the extreme: monkeypatching iterutils with a different default_visit

1010_orig_default_visit = default_visit

1011

1012

1013def default_enter(path, key, value):

1014 # print('enter(%r, %r)' % (key, value))

1015 if isinstance(value, (str, bytes)):

1016 return value, False

1017 elif isinstance(value, Mapping):

1018 return value.__class__(), ItemsView(value)

1019 elif isinstance(value, Sequence):

1020 return value.__class__(), enumerate(value)

1021 elif isinstance(value, Set):

1022 return value.__class__(), enumerate(value)

1023 else:

1024 # files, strings, other iterables, and scalars are not

1025 # traversed

1026 return value, False

1027

1028

1029def default_exit(path, key, old_parent, new_parent, new_items):

1030 # print('exit(%r, %r, %r, %r, %r)'

1031 # % (path, key, old_parent, new_parent, new_items))

1032 ret = new_parent

1033 if isinstance(new_parent, Mapping):

1034 new_parent.update(new_items)

1035 elif isinstance(new_parent, Sequence):

1036 vals = [v for i, v in new_items]

1037 try:

1038 new_parent.extend(vals)

1039 except AttributeError:

1040 ret = new_parent.__class__(vals) # tuples

1041 elif isinstance(new_parent, Set):

1042 vals = [v for i, v in new_items]

1043 try:

1044 new_parent.update(vals)

1045 except AttributeError:

1046 ret = new_parent.__class__(vals) # frozensets

1047 else:

1048 raise RuntimeError('unexpected iterable type: %r' % type(new_parent))

1049 return ret

1050

1051

1052def remap(root, visit=default_visit, enter=default_enter, exit=default_exit,

1053 **kwargs):

1054 """The remap ("recursive map") function is used to traverse and

1055 transform nested structures. Lists, tuples, sets, and dictionaries

1056 are just a few of the data structures nested into heterogeneous

1057 tree-like structures that are so common in programming.

1058 Unfortunately, Python's built-in ways to manipulate collections

1059 are almost all flat. List comprehensions may be fast and succinct,

1060 but they do not recurse, making it tedious to apply quick changes

1061 or complex transforms to real-world data.

1062

1063 remap goes where list comprehensions cannot.

1064

1065 Here's an example of removing all Nones from some data:

1066

1067 >>> from pprint import pprint

1068 >>> reviews = {'Star Trek': {'TNG': 10, 'DS9': 8.5, 'ENT': None},

1069 ... 'Babylon 5': 6, 'Dr. Who': None}

1070 >>> pprint(remap(reviews, lambda p, k, v: v is not None))

1071 {'Babylon 5': 6, 'Star Trek': {'DS9': 8.5, 'TNG': 10}}

1072

1073 Notice how both Nones have been removed despite the nesting in the

1074 dictionary. Not bad for a one-liner, and that's just the beginning.

1075 See `this remap cookbook`_ for more delicious recipes.

1076

1077 .. _this remap cookbook: http://sedimental.org/remap.html

1078

1079 remap takes four main arguments: the object to traverse and three

1080 optional callables which determine how the remapped object will be

1081 created.

1082

1083 Args:

1084

1085 root: The target object to traverse. By default, remap

1086 supports iterables like :class:`list`, :class:`tuple`,

1087 :class:`dict`, and :class:`set`, but any object traversable by

1088 *enter* will work.

1089 visit (callable): This function is called on every item in

1090 *root*. It must accept three positional arguments, *path*,

1091 *key*, and *value*. *path* is simply a tuple of parents'

1092 keys. *visit* should return the new key-value pair. It may

1093 also return ``True`` as shorthand to keep the old item

1094 unmodified, or ``False`` to drop the item from the new

1095 structure. *visit* is called after *enter*, on the new parent.

1096

1097 The *visit* function is called for every item in root,

1098 including duplicate items. For traversable values, it is

1099 called on the new parent object, after all its children

1100 have been visited. The default visit behavior simply

1101 returns the key-value pair unmodified.

1102 enter (callable): This function controls which items in *root*

1103 are traversed. It accepts the same arguments as *visit*: the

1104 path, the key, and the value of the current item. It returns a

1105 pair of the blank new parent, and an iterator over the items

1106 which should be visited. If ``False`` is returned instead of

1107 an iterator, the value will not be traversed.

1108

1109 The *enter* function is only called once per unique value. The

1110 default enter behavior support mappings, sequences, and

1111 sets. Strings and all other iterables will not be traversed.

1112 exit (callable): This function determines how to handle items

1113 once they have been visited. It gets the same three

1114 arguments as the other functions -- *path*, *key*, *value*

1115 -- plus two more: the blank new parent object returned

1116 from *enter*, and a list of the new items, as remapped by

1117 *visit*.

1118

1119 Like *enter*, the *exit* function is only called once per

1120 unique value. The default exit behavior is to simply add

1121 all new items to the new parent, e.g., using

1122 :meth:`list.extend` and :meth:`dict.update` to add to the

1123 new parent. Immutable objects, such as a :class:`tuple` or

1124 :class:`namedtuple`, must be recreated from scratch, but

1125 use the same type as the new parent passed back from the

1126 *enter* function.

1127 reraise_visit (bool): A pragmatic convenience for the *visit*

1128 callable. When set to ``False``, remap ignores any errors

1129 raised by the *visit* callback. Items causing exceptions

1130 are kept. See examples for more details.

1131

1132 remap is designed to cover the majority of cases with just the

1133 *visit* callable. While passing in multiple callables is very

1134 empowering, remap is designed so very few cases should require

1135 passing more than one function.

1136

1137 When passing *enter* and *exit*, it's common and easiest to build

1138 on the default behavior. Simply add ``from boltons.iterutils import

1139 default_enter`` (or ``default_exit``), and have your enter/exit

1140 function call the default behavior before or after your custom

1141 logic. See `this example`_.

1142

1143 Duplicate and self-referential objects (aka reference loops) are

1144 automatically handled internally, `as shown here`_.

1145

1146 .. _this example: http://sedimental.org/remap.html#sort_all_lists

1147 .. _as shown here: http://sedimental.org/remap.html#corner_cases

1148

1149 """

1150 # TODO: improve argument formatting in sphinx doc

1151 # TODO: enter() return (False, items) to continue traverse but cancel copy?

1152 if not callable(visit):

1153 raise TypeError('visit expected callable, not: %r' % visit)

1154 if not callable(enter):

1155 raise TypeError('enter expected callable, not: %r' % enter)

1156 if not callable(exit):

1157 raise TypeError('exit expected callable, not: %r' % exit)

1158 reraise_visit = kwargs.pop('reraise_visit', True)

1159 if kwargs:

1160 raise TypeError('unexpected keyword arguments: %r' % kwargs.keys())

1161

1162 path, registry, stack = (), {}, [(None, root)]

1163 new_items_stack = []

1164 while stack:

1165 key, value = stack.pop()

1166 id_value = id(value)

1167 if key is _REMAP_EXIT:

1168 key, new_parent, old_parent = value

1169 id_value = id(old_parent)

1170 path, new_items = new_items_stack.pop()

1171 value = exit(path, key, old_parent, new_parent, new_items)

1172 registry[id_value] = value

1173 if not new_items_stack:

1174 continue

1175 elif id_value in registry:

1176 value = registry[id_value]

1177 else:

1178 res = enter(path, key, value)

1179 try:

1180 new_parent, new_items = res

1181 except TypeError:

1182 # TODO: handle False?

1183 raise TypeError('enter should return a tuple of (new_parent,'

1184 ' items_iterator), not: %r' % res)

1185 if new_items is not False:

1186 # traverse unless False is explicitly passed

1187 registry[id_value] = new_parent

1188 new_items_stack.append((path, []))

1189 if value is not root:

1190 path += (key,)

1191 stack.append((_REMAP_EXIT, (key, new_parent, value)))

1192 if new_items:

1193 stack.extend(reversed(list(new_items)))

1194 continue

1195 if visit is _orig_default_visit:

1196 # avoid function call overhead by inlining identity operation

1197 visited_item = (key, value)

1198 else:

1199 try:

1200 visited_item = visit(path, key, value)

1201 except Exception:

1202 if reraise_visit:

1203 raise

1204 visited_item = True

1205 if visited_item is False:

1206 continue # drop

1207 elif visited_item is True:

1208 visited_item = (key, value)

1209 # TODO: typecheck?

1210 # raise TypeError('expected (key, value) from visit(),'

1211 # ' not: %r' % visited_item)

1212 try:

1213 new_items_stack[-1][1].append(visited_item)

1214 except IndexError:

1215 raise TypeError('expected remappable root, not: %r' % root)

1216 return value

1217

1218

1219class PathAccessError(KeyError, IndexError, TypeError):

1220 """An amalgamation of KeyError, IndexError, and TypeError,

1221 representing what can occur when looking up a path in a nested

1222 object.

1223 """

1224 def __init__(self, exc, seg, path):

1225 self.exc = exc

1226 self.seg = seg

1227 self.path = path

1228

1229 def __repr__(self):

1230 cn = self.__class__.__name__

1231 return f'{cn}({self.exc!r}, {self.seg!r}, {self.path!r})'

1232

1233 def __str__(self):

1234 return ('could not access %r from path %r, got error: %r'

1235 % (self.seg, self.path, self.exc))

1236

1237

1238def get_path(root, path, default=_UNSET):

1239 """Retrieve a value from a nested object via a tuple representing the

1240 lookup path.

1241

1242 >>> root = {'a': {'b': {'c': [[1], [2], [3]]}}}

1243 >>> get_path(root, ('a', 'b', 'c', 2, 0))

1244 3

1245

1246 The path tuple format is intentionally consistent with that of

1247 :func:`remap`, but a single dotted string can also be passed.

1248

1249 One of get_path's chief aims is improved error messaging. EAFP is

1250 great, but the error messages are not.

1251

1252 For instance, ``root['a']['b']['c'][2][1]`` gives back

1253 ``IndexError: list index out of range``

1254

1255 What went out of range where? get_path currently raises

1256 ``PathAccessError: could not access 2 from path ('a', 'b', 'c', 2,

1257 1), got error: IndexError('list index out of range',)``, a

1258 subclass of IndexError and KeyError.

1259

1260 You can also pass a default that covers the entire operation,

1261 should the lookup fail at any level.

1262

1263 Args:

1264 root: The target nesting of dictionaries, lists, or other

1265 objects supporting ``__getitem__``.

1266 path (tuple): A sequence of strings and integers to be successively

1267 looked up within *root*. A dot-separated (``a.b``) string may

1268 also be passed.

1269 default: The value to be returned should any

1270 ``PathAccessError`` exceptions be raised.

1271 """

1272 if isinstance(path, str):

1273 path = path.split('.')

1274 cur = root

1275 try:

1276 for seg in path:

1277 try:

1278 cur = cur[seg]

1279 except (KeyError, IndexError) as exc:

1280 raise PathAccessError(exc, seg, path)

1281 except TypeError as exc:

1282 # either string index in a list, or a parent that

1283 # doesn't support indexing

1284 try:

1285 seg = int(seg)

1286 cur = cur[seg]

1287 except (ValueError, KeyError, IndexError, TypeError):

1288 if not is_iterable(cur):

1289 exc = TypeError('%r object is not indexable'

1290 % type(cur).__name__)

1291 raise PathAccessError(exc, seg, path)

1292 except PathAccessError:

1293 if default is _UNSET:

1294 raise

1295 return default

1296 return cur

1297

1298

1299def research(root, query=lambda p, k, v: True, reraise=False):

1300 """The :func:`research` function uses :func:`remap` to recurse over

1301 any data nested in *root*, and find values which match a given

1302 criterion, specified by the *query* callable.

1303

1304 Results are returned as a list of ``(path, value)`` pairs. The

1305 paths are tuples in the same format accepted by

1306 :func:`get_path`. This can be useful for comparing values nested

1307 in two or more different structures.

1308

1309 Here's a simple example that finds all integers:

1310

1311 >>> root = {'a': {'b': 1, 'c': (2, 'd', 3)}, 'e': None}

1312 >>> res = research(root, query=lambda p, k, v: isinstance(v, int))

1313 >>> print(sorted(res))

1314 [(('a', 'b'), 1), (('a', 'c', 0), 2), (('a', 'c', 2), 3)]

1315

1316 Note how *query* follows the same, familiar ``path, key, value``

1317 signature as the ``visit`` and ``enter`` functions on

1318 :func:`remap`, and returns a :class:`bool`.

1319

1320 Args:

1321 root: The target object to search. Supports the same types of

1322 objects as :func:`remap`, including :class:`list`,

1323 :class:`tuple`, :class:`dict`, and :class:`set`.

1324 query (callable): The function called on every object to

1325 determine whether to include it in the search results. The

1326 callable must accept three arguments, *path*, *key*, and

1327 *value*, commonly abbreviated *p*, *k*, and *v*, same as

1328 *enter* and *visit* from :func:`remap`.

1329 reraise (bool): Whether to reraise exceptions raised by *query*

1330 or to simply drop the result that caused the error.

1331

1332

1333 With :func:`research` it's easy to inspect the details of a data

1334 structure, like finding values that are at a certain depth (using

1335 ``len(p)``) and much more. If more advanced functionality is

1336 needed, check out the code and make your own :func:`remap`

1337 wrapper, and consider `submitting a patch`_!

1338

1339 .. _submitting a patch: https://github.com/mahmoud/boltons/pulls

1340 """

1341 ret = []

1342

1343 if not callable(query):

1344 raise TypeError('query expected callable, not: %r' % query)

1345

1346 def enter(path, key, value):

1347 try:

1348 if query(path, key, value):

1349 ret.append((path + (key,), value))

1350 except Exception:

1351 if reraise:

1352 raise

1353 return default_enter(path, key, value)

1354

1355 remap(root, enter=enter)

1356 return ret

1357

1358

1359# TODO: recollect()

1360# TODO: refilter()

1361# TODO: reiter()

1362

1363

1364# GUID iterators: 10x faster and somewhat more compact than uuid.

1365

1366class GUIDerator:

1367 """The GUIDerator is an iterator that yields a globally-unique

1368 identifier (GUID) on every iteration. The GUIDs produced are

1369 hexadecimal strings.

1370

1371 Testing shows it to be around 12x faster than the uuid module. By

1372 default it is also more compact, partly due to its default 96-bit

1373 (24-hexdigit) length. 96 bits of randomness means that there is a

1374 1 in 2 ^ 32 chance of collision after 2 ^ 64 iterations. If more

1375 or less uniqueness is desired, the *size* argument can be adjusted

1376 accordingly.

1377

1378 Args:

1379 size (int): character length of the GUID, defaults to 24. Lengths

1380 between 20 and 36 are considered valid.

1381

1382 The GUIDerator has built-in fork protection that causes it to

1383 detect a fork on next iteration and reseed accordingly.

1384

1385 """

1386 def __init__(self, size=24):

1387 self.size = size

1388 if size < 20 or size > 36:

1389 raise ValueError('expected 20 < size <= 36')

1390 import hashlib

1391 self._sha1 = hashlib.sha1

1392 self.count = itertools.count()

1393 self.reseed()

1394

1395 def reseed(self):

1396 import socket

1397 self.pid = os.getpid()

1398 self.salt = '-'.join([str(self.pid),

1399 socket.gethostname() or '<nohostname>',

1400 str(time.time()),

1401 os.urandom(6).hex()])

1402 return

1403

1404 def __iter__(self):

1405 return self

1406

1407 def __next__(self):

1408 if os.getpid() != self.pid:

1409 self.reseed()

1410 target_bytes = (self.salt + str(next(self.count))).encode('utf8')

1411 hash_text = self._sha1(target_bytes).hexdigest()[:self.size]

1412 return hash_text

1413

1414 next = __next__

1415

1416

1417class SequentialGUIDerator(GUIDerator):

1418 """Much like the standard GUIDerator, the SequentialGUIDerator is an

1419 iterator that yields a globally-unique identifier (GUID) on every

1420 iteration. The GUIDs produced are hexadecimal strings.

1421

1422 The SequentialGUIDerator differs in that it picks a starting GUID

1423 value and increments every iteration. This yields GUIDs which are

1424 of course unique, but also ordered and lexicographically sortable.

1425

1426 The SequentialGUIDerator is around 50% faster than the normal

1427 GUIDerator, making it almost 20x as fast as the built-in uuid

1428 module. By default it is also more compact, partly due to its

1429 96-bit (24-hexdigit) default length. 96 bits of randomness means that

1430 there is a 1 in 2 ^ 32 chance of collision after 2 ^ 64

1431 iterations. If more or less uniqueness is desired, the *size*

1432 argument can be adjusted accordingly.

1433

1434 Args:

1435 size (int): character length of the GUID, defaults to 24.

1436

1437 Note that with SequentialGUIDerator there is a chance of GUIDs

1438 growing larger than the size configured. The SequentialGUIDerator

1439 has built-in fork protection that causes it to detect a fork on

1440 next iteration and reseed accordingly.

1441

1442 """

1443

1444 def reseed(self):

1445 super().reseed()

1446 start_str = self._sha1(self.salt.encode('utf8')).hexdigest()

1447 self.start = int(start_str[:self.size], 16)

1448 self.start |= (1 << ((self.size * 4) - 2))

1449

1450 def __next__(self):

1451 if os.getpid() != self.pid:

1452 self.reseed()

1453 return '%x' % (next(self.count) + self.start)

1454

1455 next = __next__

1456

1457

1458guid_iter = GUIDerator()

1459seq_guid_iter = SequentialGUIDerator()

1460

1461

1462def soft_sorted(iterable, first=None, last=None, key=None, reverse=False):

1463 """For when you care about the order of some elements, but not about

1464 others.

1465

1466 Use this to float to the top and/or sink to the bottom a specific

1467 ordering, while sorting the rest of the elements according to

1468 normal :func:`sorted` rules.

1469

1470 >>> soft_sorted(['two', 'b', 'one', 'a'], first=['one', 'two'])

1471 ['one', 'two', 'a', 'b']

1472 >>> soft_sorted(range(7), first=[6, 15], last=[2, 4], reverse=True)

1473 [6, 5, 3, 1, 0, 2, 4]

1474 >>> import string

1475 >>> ''.join(soft_sorted(string.hexdigits, first='za1', last='b', key=str.lower))

1476 'aA1023456789cCdDeEfFbB'

1477

1478 Args:

1479 iterable (list): A list or other iterable to sort.

1480 first (list): A sequence to enforce for elements which should

1481 appear at the beginning of the returned list.

1482 last (list): A sequence to enforce for elements which should

1483 appear at the end of the returned list.

1484 key (callable): Callable used to generate a comparable key for

1485 each item to be sorted, same as the key in

1486 :func:`sorted`. Note that entries in *first* and *last*

1487 should be the keys for the items. Defaults to

1488 passthrough/the identity function.

1489 reverse (bool): Whether or not elements not explicitly ordered

1490 by *first* and *last* should be in reverse order or not.

1491

1492 Returns a new list in sorted order.

1493 """

1494 first = first or []

1495 last = last or []

1496 key = key or (lambda x: x)

1497 seq = list(iterable)

1498 other = [x for x in seq if not ((first and key(x) in first) or (last and key(x) in last))]

1499 other.sort(key=key, reverse=reverse)

1500

1501 if first:

1502 first = sorted([x for x in seq if key(x) in first], key=lambda x: first.index(key(x)))

1503 if last:

1504 last = sorted([x for x in seq if key(x) in last], key=lambda x: last.index(key(x)))

1505 return first + other + last

1506

1507

1508def untyped_sorted(iterable, key=None, reverse=False):

1509 """A version of :func:`sorted` which will happily sort an iterable of

1510 heterogeneous types and return a new list, similar to legacy Python's

1511 behavior.

1512

1513 >>> untyped_sorted(['abc', 2.0, 1, 2, 'def'])

1514 [1, 2.0, 2, 'abc', 'def']

1515

1516 Note how mutually orderable types are sorted as expected, as in

1517 the case of the integers and floats above.

1518

1519 .. note::

1520

1521 Results may vary across Python versions and builds, but the

1522 function will produce a sorted list, except in the case of

1523 explicitly unorderable objects.

1524

1525 """

1526 class _Wrapper:

1527 slots = ('obj',)

1528

1529 def __init__(self, obj):

1530 self.obj = obj

1531

1532 def __lt__(self, other):

1533 obj = key(self.obj) if key is not None else self.obj

1534 other = key(other.obj) if key is not None else other.obj

1535 try:

1536 ret = obj < other

1537 except TypeError:

1538 ret = ((type(obj).__name__, id(type(obj)), obj)

1539 < (type(other).__name__, id(type(other)), other))

1540 return ret

1541

1542 if key is not None and not callable(key):

1543 raise TypeError('expected function or callable object for key, not: %r'

1544 % key)

1545

1546 return sorted(iterable, key=_Wrapper, reverse=reverse)

1547

1548"""

1549May actually be faster to do an isinstance check for a str path

1550

1551$ python -m timeit -s "x = [1]" "x[0]"

155210000000 loops, best of 3: 0.0207 usec per loop

1553$ python -m timeit -s "x = [1]" "try: x[0] \nexcept: pass"

155410000000 loops, best of 3: 0.029 usec per loop

1555$ python -m timeit -s "x = [1]" "try: x[1] \nexcept: pass"

15561000000 loops, best of 3: 0.315 usec per loop

1557# setting up try/except is fast, only around 0.01us

1558# actually triggering the exception takes almost 10x as long

1559

1560$ python -m timeit -s "x = [1]" "isinstance(x, basestring)"

156110000000 loops, best of 3: 0.141 usec per loop

1562$ python -m timeit -s "x = [1]" "isinstance(x, str)"

156310000000 loops, best of 3: 0.131 usec per loop

1564$ python -m timeit -s "x = [1]" "try: x.split('.')\n except: pass"

15651000000 loops, best of 3: 0.443 usec per loop

1566$ python -m timeit -s "x = [1]" "try: x.split('.') \nexcept AttributeError: pass"

15671000000 loops, best of 3: 0.544 usec per loop

1568"""