Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/boltons/iterutils.py: 22%

3# Redistribution and use in source and binary forms, with or without

4# modification, are permitted provided that the following conditions are

5# met:

7# * Redistributions of source code must retain the above copyright

8# notice, this list of conditions and the following disclaimer.

10# * Redistributions in binary form must reproduce the above

11# copyright notice, this list of conditions and the following

12# disclaimer in the documentation and/or other materials provided

13# with the distribution.

14#

15# * The names of the contributors may not be used to endorse or

16# promote products derived from this software without specific

17# prior written permission.

18#

19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS

20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT

21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR

22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT

23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,

24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT

25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,

26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY

27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT

28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE

29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

31""":mod:`itertools` is full of great examples of Python generator

32usage. However, there are still some critical gaps. ``iterutils``

33fills many of those gaps with featureful, tested, and Pythonic

34solutions.

36Many of the functions below have two versions, one which

37returns an iterator (denoted by the ``*_iter`` naming pattern), and a

38shorter-named convenience form that returns a list. Some of the

39following are based on examples in itertools docs.

40"""

42import os

43import math

44import time

45import codecs

46import random

47import itertools

48from itertools import zip_longest

49from collections.abc import Mapping, Sequence, Set, ItemsView, Iterable

52try:

53 from .typeutils import make_sentinel

54 _UNSET = make_sentinel('_UNSET')

55 _REMAP_EXIT = make_sentinel('_REMAP_EXIT')

56except ImportError:

57 _REMAP_EXIT = object()

58 _UNSET = object()

61def is_iterable(obj):

62 """Similar in nature to :func:`callable`, ``is_iterable`` returns

63 ``True`` if an object is `iterable`_, ``False`` if not.

65 >>> is_iterable([])

66 True

67 >>> is_iterable(object())

68 False

70 .. _iterable: https://docs.python.org/2/glossary.html#term-iterable

71 """

72 try:

73 iter(obj)

74 except TypeError:

75 return False

76 return True

79def is_scalar(obj):

80 """A near-mirror of :func:`is_iterable`. Returns ``False`` if an

81 object is an iterable container type. Strings are considered

82 scalar as well, because strings are more often treated as whole

83 values as opposed to iterables of 1-character substrings.

85 >>> is_scalar(object())

86 True

87 >>> is_scalar(range(10))

88 False

89 >>> is_scalar('hello')

90 True

91 """

92 return not is_iterable(obj) or isinstance(obj, (str, bytes))

95def is_collection(obj):

96 """The opposite of :func:`is_scalar`. Returns ``True`` if an object

97 is an iterable other than a string.

99 >>> is_collection(object())

100 False

101 >>> is_collection(range(10))

102 True

103 >>> is_collection('hello')

104 False

105 """

106 return is_iterable(obj) and not isinstance(obj, (str, bytes))

107

108

109def split(src, sep=None, maxsplit=None):

110 """Splits an iterable based on a separator. Like :meth:`str.split`,

111 but for all iterables. Returns a list of lists.

112

113 >>> split(['hi', 'hello', None, None, 'sup', None, 'soap', None])

114 [['hi', 'hello'], ['sup'], ['soap']]

115

116 See :func:`split_iter` docs for more info.

117 """

118 return list(split_iter(src, sep, maxsplit))

119

120

121def split_iter(src, sep=None, maxsplit=None):

122 """Splits an iterable based on a separator, *sep*, a max of

123 *maxsplit* times (no max by default). *sep* can be:

124

125 * a single value

126 * an iterable of separators

127 * a single-argument callable that returns True when a separator is

128 encountered

129

130 ``split_iter()`` yields lists of non-separator values. A separator will

131 never appear in the output.

132

133 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None]))

134 [['hi', 'hello'], ['sup'], ['soap']]

135

136 Note that ``split_iter`` is based on :func:`str.split`, so if

137 *sep* is ``None``, ``split()`` **groups** separators. If empty lists

138 are desired between two contiguous ``None`` values, simply use

139 ``sep=[None]``:

140

141 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None]))

142 [['hi', 'hello'], ['sup']]

143 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None], sep=[None]))

144 [['hi', 'hello'], [], ['sup'], []]

145

146 Using a callable separator:

147

148 >>> falsy_sep = lambda x: not x

149 >>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep))

150 [['hi', 'hello'], [], ['sup'], []]

151

152 See :func:`split` for a list-returning version.

153

154 """

155 if not is_iterable(src):

156 raise TypeError('expected an iterable')

157

158 if maxsplit is not None:

159 maxsplit = int(maxsplit)

160 if maxsplit == 0:

161 yield [src]

162 return

163

164 if callable(sep):

165 sep_func = sep

166 elif not is_scalar(sep):

167 sep = frozenset(sep)

168 def sep_func(x): return x in sep

169 else:

170 def sep_func(x): return x == sep

171

172 cur_group = []

173 split_count = 0

174 for s in src:

175 if maxsplit is not None and split_count >= maxsplit:

176 def sep_func(x): return False

177 if sep_func(s):

178 if sep is None and not cur_group:

179 # If sep is none, str.split() "groups" separators

180 # check the str.split() docs for more info

181 continue

182 split_count += 1

183 yield cur_group

184 cur_group = []

185 else:

186 cur_group.append(s)

187

188 if cur_group or sep is not None:

189 yield cur_group

190 return

191

192

193def lstrip(iterable, strip_value=None):

194 """Strips values from the beginning of an iterable. Stripped items will

195 match the value of the argument strip_value. Functionality is analogous

196 to that of the method str.lstrip. Returns a list.

197

198 >>> lstrip(['Foo', 'Bar', 'Bam'], 'Foo')

199 ['Bar', 'Bam']

200

201 """

202 return list(lstrip_iter(iterable, strip_value))

203

204

205def lstrip_iter(iterable, strip_value=None):

206 """Strips values from the beginning of an iterable. Stripped items will

207 match the value of the argument strip_value. Functionality is analogous

208 to that of the method str.lstrip. Returns a generator.

209

210 >>> list(lstrip_iter(['Foo', 'Bar', 'Bam'], 'Foo'))

211 ['Bar', 'Bam']

212

213 """

214 iterator = iter(iterable)

215 for i in iterator:

216 if i != strip_value:

217 yield i

218 break

219 for i in iterator:

220 yield i

221

222

223def rstrip(iterable, strip_value=None):

224 """Strips values from the end of an iterable. Stripped items will

225 match the value of the argument strip_value. Functionality is analogous

226 to that of the method str.rstrip. Returns a list.

227

228 >>> rstrip(['Foo', 'Bar', 'Bam'], 'Bam')

229 ['Foo', 'Bar']

230

231 """

232 return list(rstrip_iter(iterable, strip_value))

233

234

235def rstrip_iter(iterable, strip_value=None):

236 """Strips values from the end of an iterable. Stripped items will

237 match the value of the argument strip_value. Functionality is analogous

238 to that of the method str.rstrip. Returns a generator.

239

240 >>> list(rstrip_iter(['Foo', 'Bar', 'Bam'], 'Bam'))

241 ['Foo', 'Bar']

242

243 """

244 iterator = iter(iterable)

245 for i in iterator:

246 if i == strip_value:

247 cache = list()

248 cache.append(i)

249 broken = False

250 for i in iterator:

251 if i == strip_value:

252 cache.append(i)

253 else:

254 broken = True

255 break

256 if not broken: # Return to caller here because the end of the

257 return # iterator has been reached

258 yield from cache

259 yield i

260

261

262def strip(iterable, strip_value=None):

263 """Strips values from the beginning and end of an iterable. Stripped items

264 will match the value of the argument strip_value. Functionality is

265 analogous to that of the method str.strip. Returns a list.

266

267 >>> strip(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu')

268 ['Foo', 'Bar', 'Bam']

269

270 """

271 return list(strip_iter(iterable, strip_value))

272

273

274def strip_iter(iterable, strip_value=None):

275 """Strips values from the beginning and end of an iterable. Stripped items

276 will match the value of the argument strip_value. Functionality is

277 analogous to that of the method str.strip. Returns a generator.

278

279 >>> list(strip_iter(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu'))

280 ['Foo', 'Bar', 'Bam']

281

282 """

283 return rstrip_iter(lstrip_iter(iterable, strip_value), strip_value)

284

285

286def chunked(src, size, count=None, **kw):

287 """Returns a list of *count* chunks, each with *size* elements,

288 generated from iterable *src*. If *src* is not evenly divisible by

289 *size*, the final chunk will have fewer than *size* elements.

290 Provide the *fill* keyword argument to provide a pad value and

291 enable padding, otherwise no padding will take place.

292

293 >>> chunked(range(10), 3)

294 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]

295 >>> chunked(range(10), 3, fill=None)

296 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]

297 >>> chunked(range(10), 3, count=2)

298 [[0, 1, 2], [3, 4, 5]]

299

300 See :func:`chunked_iter` for more info.

301 """

302 chunk_iter = chunked_iter(src, size, **kw)

303 if count is None:

304 return list(chunk_iter)

305 else:

306 return list(itertools.islice(chunk_iter, count))

307

308

309def _validate_positive_int(value, name, strictly_positive=True):

310 value = int(value)

311 if value < 0 or (strictly_positive and value == 0):

312 raise ValueError('expected a positive integer ' + name)

313 return value

314

315

316def chunked_iter(src, size, **kw):

317 """Generates *size*-sized chunks from *src* iterable. Unless the

318 optional *fill* keyword argument is provided, iterables not evenly

319 divisible by *size* will have a final chunk that is smaller than

320 *size*.

321

322 >>> list(chunked_iter(range(10), 3))

323 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]]

324 >>> list(chunked_iter(range(10), 3, fill=None))

325 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]]

326

327 Note that ``fill=None`` in fact uses ``None`` as the fill value.

328 """

329 # TODO: add count kwarg?

330 if not is_iterable(src):

331 raise TypeError('expected an iterable')

332 size = _validate_positive_int(size, 'chunk size')

333 do_fill = True

334 try:

335 fill_val = kw.pop('fill')

336 except KeyError:

337 do_fill = False

338 fill_val = None

339 if kw:

340 raise ValueError('got unexpected keyword arguments: %r' % kw.keys())

341 if not src:

342 return

343

344 def postprocess(chk): return chk

345 if isinstance(src, (str, bytes)):

346 def postprocess(chk, _sep=type(src)()): return _sep.join(chk)

347 if isinstance(src, bytes):

348 def postprocess(chk): return bytes(chk)

349 src_iter = iter(src)

350 while True:

351 cur_chunk = list(itertools.islice(src_iter, size))

352 if not cur_chunk:

353 break

354 lc = len(cur_chunk)

355 if lc < size and do_fill:

356 cur_chunk[lc:] = [fill_val] * (size - lc)

357 yield postprocess(cur_chunk)

358 return

359

360

361def chunk_ranges(input_size, chunk_size, input_offset=0, overlap_size=0, align=False):

362 """Generates *chunk_size*-sized chunk ranges for an input with length *input_size*.

363 Optionally, a start of the input can be set via *input_offset*, and

364 and overlap between the chunks may be specified via *overlap_size*.

365 Also, if *align* is set to *True*, any items with *i % (chunk_size-overlap_size) == 0*

366 are always at the beginning of the chunk.

367

368 Returns an iterator of (start, end) tuples, one tuple per chunk.

369

370 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5))

371 [(10, 15), (15, 20)]

372 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=1))

373 [(10, 15), (14, 19), (18, 20)]

374 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=2))

375 [(10, 15), (13, 18), (16, 20)]

376

377 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=False))

378 [(4, 9), (9, 14), (14, 19)]

379 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=True))

380 [(4, 5), (5, 10), (10, 15), (15, 19)]

381

382 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=False))

383 [(2, 7), (6, 11), (10, 15), (14, 17)]

384 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=True))

385 [(2, 5), (4, 9), (8, 13), (12, 17)]

386 >>> list(chunk_ranges(input_offset=3, input_size=15, chunk_size=5, overlap_size=1, align=True))

387 [(3, 5), (4, 9), (8, 13), (12, 17), (16, 18)]

388 """

389 input_size = _validate_positive_int(

390 input_size, 'input_size', strictly_positive=False)

391 chunk_size = _validate_positive_int(chunk_size, 'chunk_size')

392 input_offset = _validate_positive_int(

393 input_offset, 'input_offset', strictly_positive=False)

394 overlap_size = _validate_positive_int(

395 overlap_size, 'overlap_size', strictly_positive=False)

396

397 input_stop = input_offset + input_size

398

399 if align:

400 initial_chunk_len = chunk_size - \

401 input_offset % (chunk_size - overlap_size)

402 if initial_chunk_len != overlap_size:

403 yield (input_offset, min(input_offset + initial_chunk_len, input_stop))

404 if input_offset + initial_chunk_len >= input_stop:

405 return

406 input_offset = input_offset + initial_chunk_len - overlap_size

407

408 for i in range(input_offset, input_stop, chunk_size - overlap_size):

409 yield (i, min(i + chunk_size, input_stop))

410

411 if i + chunk_size >= input_stop:

412 return

413

414

415def pairwise(src, end=_UNSET):

416 """Convenience function for calling :func:`windowed` on *src*, with

417 *size* set to 2.

418

419 >>> pairwise(range(5))

420 [(0, 1), (1, 2), (2, 3), (3, 4)]

421 >>> pairwise([])

422 []

423

424 Unless *end* is set, the number of pairs is always one less than

425 the number of elements in the iterable passed in, except on an empty input,

426 which will return an empty list.

427

428 With *end* set, a number of pairs equal to the length of *src* is returned,

429 with the last item of the last pair being equal to *end*.

430

431 >>> list(pairwise(range(3), end=None))

432 [(0, 1), (1, 2), (2, None)]

433

434 This way, *end* values can be useful as sentinels to signal the end of the iterable.

435 """

436 return windowed(src, 2, fill=end)

437

438

439def pairwise_iter(src, end=_UNSET):

440 """Convenience function for calling :func:`windowed_iter` on *src*,

441 with *size* set to 2.

442

443 >>> list(pairwise_iter(range(5)))

444 [(0, 1), (1, 2), (2, 3), (3, 4)]

445 >>> list(pairwise_iter([]))

446 []

447

448 Unless *end* is set, the number of pairs is always one less

449 than the number of elements in the iterable passed in,

450 or zero, when *src* is empty.

451

452 With *end* set, a number of pairs equal to the length of *src* is returned,

453 with the last item of the last pair being equal to *end*.

454

455 >>> list(pairwise_iter(range(3), end=None))

456 [(0, 1), (1, 2), (2, None)]

457

458 This way, *end* values can be useful as sentinels to signal the end

459 of the iterable. For infinite iterators, setting *end* has no effect.

460 """

461 return windowed_iter(src, 2, fill=end)

462

463

464def windowed(src, size, fill=_UNSET):

465 """Returns tuples with exactly length *size*. If *fill* is unset

466 and the iterable is too short to make a window of length *size*,

467 no tuples are returned. See :func:`windowed_iter` for more.

468 """

469 return list(windowed_iter(src, size, fill=fill))

470

471

472def windowed_iter(src, size, fill=_UNSET):

473 """Returns tuples with length *size* which represent a sliding

474 window over iterable *src*.

475

476 >>> list(windowed_iter(range(7), 3))

477 [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)]

478

479 If *fill* is unset, and the iterable is too short to make a window

480 of length *size*, then no window tuples are returned.

481

482 >>> list(windowed_iter(range(3), 5))

483 []

484

485 With *fill* set, the iterator always yields a number of windows

486 equal to the length of the *src* iterable.

487

488 >>> windowed(range(4), 3, fill=None)

489 [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)]

490

491 This way, *fill* values can be useful to signal the end of the iterable.

492 For infinite iterators, setting *fill* has no effect.

493 """

494 tees = itertools.tee(src, size)

495 if fill is _UNSET:

496 try:

497 for i, t in enumerate(tees):

498 for _ in range(i):

499 next(t)

500 except StopIteration:

501 return zip([])

502 return zip(*tees)

503

504 for i, t in enumerate(tees):

505 for _ in range(i):

506 try:

507 next(t)

508 except StopIteration:

509 continue

510 return zip_longest(*tees, fillvalue=fill)

511

512

513def xfrange(stop, start=None, step=1.0):

514 """Same as :func:`frange`, but generator-based instead of returning a

515 list.

516

517 >>> tuple(xfrange(1, 3, step=0.75))

518 (1.0, 1.75, 2.5)

519

520 See :func:`frange` for more details.

521 """

522 if not step:

523 raise ValueError('step must be non-zero')

524 if start is None:

525 start, stop = 0.0, stop * 1.0

526 else:

527 # swap when all args are used

528 stop, start = start * 1.0, stop * 1.0

529 cur = start

530 while cur < stop:

531 yield cur

532 cur += step

533

534

535def frange(stop, start=None, step=1.0):

536 """A :func:`range` clone for float-based ranges.

537

538 >>> frange(5)

539 [0.0, 1.0, 2.0, 3.0, 4.0]

540 >>> frange(6, step=1.25)

541 [0.0, 1.25, 2.5, 3.75, 5.0]

542 >>> frange(100.5, 101.5, 0.25)

543 [100.5, 100.75, 101.0, 101.25]

544 >>> frange(5, 0)

545 []

546 >>> frange(5, 0, step=-1.25)

547 [5.0, 3.75, 2.5, 1.25]

548 """

549 if not step:

550 raise ValueError('step must be non-zero')

551 if start is None:

552 start, stop = 0.0, stop * 1.0

553 else:

554 # swap when all args are used

555 stop, start = start * 1.0, stop * 1.0

556 count = int(math.ceil((stop - start) / step))

557 ret = [None] * count

558 if not ret:

559 return ret

560 ret[0] = start

561 for i in range(1, count):

562 ret[i] = ret[i - 1] + step

563 return ret

564

565

566def backoff(start, stop, count=None, factor=2.0, jitter=False):

567 """Returns a list of geometrically-increasing floating-point numbers,

568 suitable for usage with `exponential backoff`_. Exactly like

569 :func:`backoff_iter`, but without the ``'repeat'`` option for

570 *count*. See :func:`backoff_iter` for more details.

571

572 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff

573

574 >>> backoff(1, 10)

575 [1.0, 2.0, 4.0, 8.0, 10.0]

576 """

577 if count == 'repeat':

578 raise ValueError("'repeat' supported in backoff_iter, not backoff")

579 return list(backoff_iter(start, stop, count=count,

580 factor=factor, jitter=jitter))

581

582

583def backoff_iter(start, stop, count=None, factor=2.0, jitter=False):

584 """Generates a sequence of geometrically-increasing floats, suitable

585 for usage with `exponential backoff`_. Starts with *start*,

586 increasing by *factor* until *stop* is reached, optionally

587 stopping iteration once *count* numbers are yielded. *factor*

588 defaults to 2. In general retrying with properly-configured

589 backoff creates a better-behaved component for a larger service

590 ecosystem.

591

592 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff

593

594 >>> list(backoff_iter(1.0, 10.0, count=5))

595 [1.0, 2.0, 4.0, 8.0, 10.0]

596 >>> list(backoff_iter(1.0, 10.0, count=8))

597 [1.0, 2.0, 4.0, 8.0, 10.0, 10.0, 10.0, 10.0]

598 >>> list(backoff_iter(0.25, 100.0, factor=10))

599 [0.25, 2.5, 25.0, 100.0]

600

601 A simplified usage example:

602

603 .. code-block:: python

604

605 for timeout in backoff_iter(0.25, 5.0):

606 try:

607 res = network_call()

608 break

609 except Exception as e:

610 log(e)

611 time.sleep(timeout)

612

613 An enhancement for large-scale systems would be to add variation,

614 or *jitter*, to timeout values. This is done to avoid a thundering

615 herd on the receiving end of the network call.

616

617 Finally, for *count*, the special value ``'repeat'`` can be passed to

618 continue yielding indefinitely.

619

620 Args:

621

622 start (float): Positive number for baseline.

623 stop (float): Positive number for maximum.

624 count (int): Number of steps before stopping

625 iteration. Defaults to the number of steps between *start* and

626 *stop*. Pass the string, `'repeat'`, to continue iteration

627 indefinitely.

628 factor (float): Rate of exponential increase. Defaults to `2.0`,

629 e.g., `[1, 2, 4, 8, 16]`.

630 jitter (float): A factor between `-1.0` and `1.0`, used to

631 uniformly randomize and thus spread out timeouts in a distributed

632 system, avoiding rhythm effects. Positive values use the base

633 backoff curve as a maximum, negative values use the curve as a

634 minimum. Set to 1.0 or `True` for a jitter approximating

635 Ethernet's time-tested backoff solution. Defaults to `False`.

636

637 """

638 start = float(start)

639 stop = float(stop)

640 factor = float(factor)

641 if start < 0.0:

642 raise ValueError('expected start >= 0, not %r' % start)

643 if factor < 1.0:

644 raise ValueError('expected factor >= 1.0, not %r' % factor)

645 if stop == 0.0:

646 raise ValueError('expected stop >= 0')

647 if stop < start:

648 raise ValueError('expected stop >= start, not %r' % stop)

649 if count is None:

650 denom = start if start else 1

651 count = 1 + math.ceil(math.log(stop/denom, factor))

652 count = count if start else count + 1

653 if count != 'repeat' and count < 0:

654 raise ValueError('count must be positive or "repeat", not %r' % count)

655 if jitter:

656 jitter = float(jitter)

657 if not (-1.0 <= jitter <= 1.0):

658 raise ValueError('expected jitter -1 <= j <= 1, not: %r' % jitter)

659

660 cur, i = start, 0

661 while count == 'repeat' or i < count:

662 if not jitter:

663 cur_ret = cur

664 elif jitter:

665 cur_ret = cur - (cur * jitter * random.random())

666 yield cur_ret

667 i += 1

668 if cur == 0:

669 cur = 1

670 elif cur < stop:

671 cur *= factor

672 if cur > stop:

673 cur = stop

674 return

675

676

677def bucketize(src, key=bool, value_transform=None, key_filter=None):

678 """Group values in the *src* iterable by the value returned by *key*.

679

680 >>> bucketize(range(5))

681 {False: [0], True: [1, 2, 3, 4]}

682 >>> is_odd = lambda x: x % 2 == 1

683 >>> bucketize(range(5), is_odd)

684 {False: [0, 2, 4], True: [1, 3]}

685

686 *key* is :class:`bool` by default, but can either be a callable or a string or a list

687 if it is a string, it is the name of the attribute on which to bucketize objects.

688

689 >>> bucketize([1+1j, 2+2j, 1, 2], key='real')

690 {1.0: [(1+1j), 1], 2.0: [(2+2j), 2]}

691

692 if *key* is a list, it contains the buckets where to put each object

693

694 >>> bucketize([1,2,365,4,98],key=[0,1,2,0,2])

695 {0: [1, 4], 1: [2], 2: [365, 98]}

696

697

698 Value lists are not deduplicated:

699

700 >>> bucketize([None, None, None, 'hello'])

701 {False: [None, None, None], True: ['hello']}

702

703 Bucketize into more than 3 groups

704

705 >>> bucketize(range(10), lambda x: x % 3)

706 {0: [0, 3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]}

707

708 ``bucketize`` has a couple of advanced options useful in certain

709 cases. *value_transform* can be used to modify values as they are

710 added to buckets, and *key_filter* will allow excluding certain

711 buckets from being collected.

712

713 >>> bucketize(range(5), value_transform=lambda x: x*x)

714 {False: [0], True: [1, 4, 9, 16]}

715

716 >>> bucketize(range(10), key=lambda x: x % 3, key_filter=lambda k: k % 3 != 1)

717 {0: [0, 3, 6, 9], 2: [2, 5, 8]}

718

719 Note in some of these examples there were at most two keys, ``True`` and

720 ``False``, and each key present has a list with at least one

721 item. See :func:`partition` for a version specialized for binary

722 use cases.

723

724 """

725 if not is_iterable(src):

726 raise TypeError('expected an iterable')

727 elif isinstance(key, list):

728 if len(key) != len(src):

729 raise ValueError("key and src have to be the same length")

730 src = zip(key, src)

731

732 if isinstance(key, str):

733 def key_func(x): return getattr(x, key, x)

734 elif callable(key):

735 key_func = key

736 elif isinstance(key, list):

737 def key_func(x): return x[0]

738 else:

739 raise TypeError('expected key to be callable or a string or a list')

740

741 if value_transform is None:

742 def value_transform(x): return x

743 if not callable(value_transform):

744 raise TypeError('expected callable value transform function')

745 if isinstance(key, list):

746 f = value_transform

747 def value_transform(x): return f(x[1])

748

749 ret = {}

750 for val in src:

751 key_of_val = key_func(val)

752 if key_filter is None or key_filter(key_of_val):

753 ret.setdefault(key_of_val, []).append(value_transform(val))

754 return ret

755

756

757def partition(src, key=bool):

758 """No relation to :meth:`str.partition`, ``partition`` is like

759 :func:`bucketize`, but for added convenience returns a tuple of

760 ``(truthy_values, falsy_values)``.

761

762 >>> nonempty, empty = partition(['', '', 'hi', '', 'bye'])

763 >>> nonempty

764 ['hi', 'bye']

765

766 *key* defaults to :class:`bool`, but can be carefully overridden to

767 use either a function that returns either ``True`` or ``False`` or

768 a string name of the attribute on which to partition objects.

769

770 >>> import string

771 >>> is_digit = lambda x: x in string.digits

772 >>> decimal_digits, hexletters = partition(string.hexdigits, is_digit)

773 >>> ''.join(decimal_digits), ''.join(hexletters)

774 ('0123456789', 'abcdefABCDEF')

775 """

776 bucketized = bucketize(src, key)

777 return bucketized.get(True, []), bucketized.get(False, [])

778

779

780def unique(src, key=None):

781 """``unique()`` returns a list of unique values, as determined by

782 *key*, in the order they first appeared in the input iterable,

783 *src*.

784

785 >>> ones_n_zeros = '11010110001010010101010'

786 >>> ''.join(unique(ones_n_zeros))

787 '10'

788

789 See :func:`unique_iter` docs for more details.

790 """

791 return list(unique_iter(src, key))

792

793

794def unique_iter(src, key=None):

795 """Yield unique elements from the iterable, *src*, based on *key*,

796 in the order in which they first appeared in *src*.

797

798 >>> repetitious = [1, 2, 3] * 10

799 >>> list(unique_iter(repetitious))

800 [1, 2, 3]

801

802 By default, *key* is the object itself, but *key* can either be a

803 callable or, for convenience, a string name of the attribute on

804 which to uniqueify objects, falling back on identity when the

805 attribute is not present.

806

807 >>> pleasantries = ['hi', 'hello', 'ok', 'bye', 'yes']

808 >>> list(unique_iter(pleasantries, key=lambda x: len(x)))

809 ['hi', 'hello', 'bye']

810 """

811 if not is_iterable(src):

812 raise TypeError('expected an iterable, not %r' % type(src))

813 if key is None:

814 def key_func(x): return x

815 elif callable(key):

816 key_func = key

817 elif isinstance(key, str):

818 def key_func(x): return getattr(x, key, x)

819 else:

820 raise TypeError('"key" expected a string or callable, not %r' % key)

821 seen = set()

822 for i in src:

823 k = key_func(i)

824 if k not in seen:

825 seen.add(k)

826 yield i

827 return

828

829

830def redundant(src, key=None, groups=False):

831 """The complement of :func:`unique()`.

832

833 By default returns non-unique/duplicate values as a list of the

834 *first* redundant value in *src*. Pass ``groups=True`` to get

835 groups of all values with redundancies, ordered by position of the

836 first redundant value. This is useful in conjunction with some

837 normalizing *key* function.

838

839 >>> redundant([1, 2, 3, 4])

840 []

841 >>> redundant([1, 2, 3, 2, 3, 3, 4])

842 [2, 3]

843 >>> redundant([1, 2, 3, 2, 3, 3, 4], groups=True)

844 [[2, 2], [3, 3, 3]]

845

846 An example using a *key* function to do case-insensitive

847 redundancy detection.

848

849 >>> redundant(['hi', 'Hi', 'HI', 'hello'], key=str.lower)

850 ['Hi']

851 >>> redundant(['hi', 'Hi', 'HI', 'hello'], groups=True, key=str.lower)

852 [['hi', 'Hi', 'HI']]

853

854 *key* should also be used when the values in *src* are not hashable.

855

856 .. note::

857

858 This output of this function is designed for reporting

859 duplicates in contexts when a unique input is desired. Due to

860 the grouped return type, there is no streaming equivalent of

861 this function for the time being.

862

863 """

864 if key is None:

865 pass

866 elif callable(key):

867 key_func = key

868 elif isinstance(key, (str, bytes)):

869 def key_func(x): return getattr(x, key, x)

870 else:

871 raise TypeError('"key" expected a string or callable, not %r' % key)

872 seen = {} # key to first seen item

873 redundant_order = []

874 redundant_groups = {}

875 for i in src:

876 k = key_func(i) if key else i

877 if k not in seen:

878 seen[k] = i

879 else:

880 if k in redundant_groups:

881 if groups:

882 redundant_groups[k].append(i)

883 else:

884 redundant_order.append(k)

885 redundant_groups[k] = [seen[k], i]

886 if not groups:

887 ret = [redundant_groups[k][1] for k in redundant_order]

888 else:

889 ret = [redundant_groups[k] for k in redundant_order]

890 return ret

891

892

893def one(src, default=None, key=None):

894 """Along the same lines as builtins, :func:`all` and :func:`any`, and

895 similar to :func:`first`, ``one()`` returns the single object in

896 the given iterable *src* that evaluates to ``True``, as determined

897 by callable *key*. If unset, *key* defaults to :class:`bool`. If

898 no such objects are found, *default* is returned. If *default* is

899 not passed, ``None`` is returned.

900

901 If *src* has more than one object that evaluates to ``True``, or

902 if there is no object that fulfills such condition, return

903 *default*. It's like an `XOR`_ over an iterable.

904

905 >>> one((True, False, False))

906 True

907 >>> one((True, False, True))

908 >>> one((0, 0, 'a'))

909 'a'

910 >>> one((0, False, None))

911 >>> one((True, True), default=False)

912 False

913 >>> bool(one(('', 1)))

914 True

915 >>> one((10, 20, 30, 42), key=lambda i: i > 40)

916 42

917

918 See `Martín Gaitán's original repo`_ for further use cases.

919

920 .. _Martín Gaitán's original repo: https://github.com/mgaitan/one

921 .. _XOR: https://en.wikipedia.org/wiki/Exclusive_or

922

923 """

924 ones = list(itertools.islice(filter(key, src), 2))

925 return ones[0] if len(ones) == 1 else default

926

927

928def first(iterable, default=None, key=None):

929 """Return first element of *iterable* that evaluates to ``True``, else

930 return ``None`` or optional *default*. Similar to :func:`one`.

931

932 >>> first([0, False, None, [], (), 42])

933 42

934 >>> first([0, False, None, [], ()]) is None

935 True

936 >>> first([0, False, None, [], ()], default='ohai')

937 'ohai'

938 >>> import re

939 >>> m = first(re.match(regex, 'abc') for regex in ['b.*', 'a(.*)'])

940 >>> m.group(1)

941 'bc'

942

943 The optional *key* argument specifies a one-argument predicate function

944 like that used for *filter()*. The *key* argument, if supplied, should be

945 in keyword form. For example, finding the first even number in an iterable:

946

947 >>> first([1, 1, 3, 4, 5], key=lambda x: x % 2 == 0)

948 4

949

950 Contributed by Hynek Schlawack, author of `the original standalone module`_.

951

952 .. _the original standalone module: https://github.com/hynek/first

953 """

954 return next(filter(key, iterable), default)

955

956

957def flatten_iter(iterable):

958 """``flatten_iter()`` yields all the elements from *iterable* while

959 collapsing any nested iterables.

960

961 >>> nested = [[1, 2], [[3], [4, 5]]]

962 >>> list(flatten_iter(nested))

963 [1, 2, 3, 4, 5]

964 """

965 for item in iterable:

966 if isinstance(item, Iterable) and not isinstance(item, (str, bytes)):

967 yield from flatten_iter(item)

968 else:

969 yield item

970

971

972def flatten(iterable):

973 """``flatten()`` returns a collapsed list of all the elements from

974 *iterable* while collapsing any nested iterables.

975

976 >>> nested = [[1, 2], [[3], [4, 5]]]

977 >>> flatten(nested)

978 [1, 2, 3, 4, 5]

979 """

980 return list(flatten_iter(iterable))

981

982

983def same(iterable, ref=_UNSET):

984 """``same()`` returns ``True`` when all values in *iterable* are

985 equal to one another, or optionally a reference value,

986 *ref*. Similar to :func:`all` and :func:`any` in that it evaluates

987 an iterable and returns a :class:`bool`. ``same()`` returns

988 ``True`` for empty iterables.

989

990 >>> same([])

991 True

992 >>> same([1])

993 True

994 >>> same(['a', 'a', 'a'])

995 True

996 >>> same(range(20))

997 False

998 >>> same([[], []])

999 True

1000 >>> same([[], []], ref='test')

1001 False

1002

1003 """

1004 iterator = iter(iterable)

1005 if ref is _UNSET:

1006 ref = next(iterator, ref)

1007 return all(val == ref for val in iterator)

1008

1009

1010def default_visit(path, key, value):

1011 # print('visit(%r, %r, %r)' % (path, key, value))

1012 return key, value

1013

1014

1015# enable the extreme: monkeypatching iterutils with a different default_visit

1016_orig_default_visit = default_visit

1017

1018

1019def default_enter(path, key, value):

1020 # print('enter(%r, %r)' % (key, value))

1021 if isinstance(value, (str, bytes)):

1022 return value, False

1023 elif isinstance(value, Mapping):

1024 return value.__class__(), ItemsView(value)

1025 elif isinstance(value, Sequence):

1026 return value.__class__(), enumerate(value)

1027 elif isinstance(value, Set):

1028 return value.__class__(), enumerate(value)

1029 else:

1030 # files, strings, other iterables, and scalars are not

1031 # traversed

1032 return value, False

1033

1034

1035def default_exit(path, key, old_parent, new_parent, new_items):

1036 # print('exit(%r, %r, %r, %r, %r)'

1037 # % (path, key, old_parent, new_parent, new_items))

1038 ret = new_parent

1039 if isinstance(new_parent, Mapping):

1040 new_parent.update(new_items)

1041 elif isinstance(new_parent, Sequence):

1042 vals = [v for i, v in new_items]

1043 try:

1044 new_parent.extend(vals)

1045 except AttributeError:

1046 ret = new_parent.__class__(vals) # tuples

1047 elif isinstance(new_parent, Set):

1048 vals = [v for i, v in new_items]

1049 try:

1050 new_parent.update(vals)

1051 except AttributeError:

1052 ret = new_parent.__class__(vals) # frozensets

1053 else:

1054 raise RuntimeError('unexpected iterable type: %r' % type(new_parent))

1055 return ret

1056

1057

1058def remap(root, visit=default_visit, enter=default_enter, exit=default_exit,

1059 **kwargs):

1060 """The remap ("recursive map") function is used to traverse and

1061 transform nested structures. Lists, tuples, sets, and dictionaries

1062 are just a few of the data structures nested into heterogeneous

1063 tree-like structures that are so common in programming.

1064 Unfortunately, Python's built-in ways to manipulate collections

1065 are almost all flat. List comprehensions may be fast and succinct,

1066 but they do not recurse, making it tedious to apply quick changes

1067 or complex transforms to real-world data.

1068

1069 remap goes where list comprehensions cannot.

1070

1071 Here's an example of removing all Nones from some data:

1072

1073 >>> from pprint import pprint

1074 >>> reviews = {'Star Trek': {'TNG': 10, 'DS9': 8.5, 'ENT': None},

1075 ... 'Babylon 5': 6, 'Dr. Who': None}

1076 >>> pprint(remap(reviews, lambda p, k, v: v is not None))

1077 {'Babylon 5': 6, 'Star Trek': {'DS9': 8.5, 'TNG': 10}}

1078

1079 Notice how both Nones have been removed despite the nesting in the

1080 dictionary. Not bad for a one-liner, and that's just the beginning.

1081 See `this remap cookbook`_ for more delicious recipes.

1082

1083 .. _this remap cookbook: http://sedimental.org/remap.html

1084

1085 remap takes four main arguments: the object to traverse and three

1086 optional callables which determine how the remapped object will be

1087 created.

1088

1089 Args:

1090

1091 root: The target object to traverse. By default, remap

1092 supports iterables like :class:`list`, :class:`tuple`,

1093 :class:`dict`, and :class:`set`, but any object traversable by

1094 *enter* will work.

1095 visit (callable): This function is called on every item in

1096 *root*. It must accept three positional arguments, *path*,

1097 *key*, and *value*. *path* is simply a tuple of parents'

1098 keys. *visit* should return the new key-value pair. It may

1099 also return ``True`` as shorthand to keep the old item

1100 unmodified, or ``False`` to drop the item from the new

1101 structure. *visit* is called after *enter*, on the new parent.

1102

1103 The *visit* function is called for every item in root,

1104 including duplicate items. For traversable values, it is

1105 called on the new parent object, after all its children

1106 have been visited. The default visit behavior simply

1107 returns the key-value pair unmodified.

1108 enter (callable): This function controls which items in *root*

1109 are traversed. It accepts the same arguments as *visit*: the

1110 path, the key, and the value of the current item. It returns a

1111 pair of the blank new parent, and an iterator over the items

1112 which should be visited. If ``False`` is returned instead of

1113 an iterator, the value will not be traversed.

1114

1115 The *enter* function is only called once per unique value. The

1116 default enter behavior support mappings, sequences, and

1117 sets. Strings and all other iterables will not be traversed.

1118 exit (callable): This function determines how to handle items

1119 once they have been visited. It gets the same three

1120 arguments as the other functions -- *path*, *key*, *value*

1121 -- plus two more: the blank new parent object returned

1122 from *enter*, and a list of the new items, as remapped by

1123 *visit*.

1124

1125 Like *enter*, the *exit* function is only called once per

1126 unique value. The default exit behavior is to simply add

1127 all new items to the new parent, e.g., using

1128 :meth:`list.extend` and :meth:`dict.update` to add to the

1129 new parent. Immutable objects, such as a :class:`tuple` or

1130 :class:`namedtuple`, must be recreated from scratch, but

1131 use the same type as the new parent passed back from the

1132 *enter* function.

1133 reraise_visit (bool): A pragmatic convenience for the *visit*

1134 callable. When set to ``False``, remap ignores any errors

1135 raised by the *visit* callback. Items causing exceptions

1136 are kept. See examples for more details.

1137 trace (bool): Pass ``trace=True`` to print out the entire

1138 traversal. Or pass a tuple of ``'visit'``, ``'enter'``,

1139 or ``'exit'`` to print only the selected events.

1140

1141 remap is designed to cover the majority of cases with just the

1142 *visit* callable. While passing in multiple callables is very

1143 empowering, remap is designed so very few cases should require

1144 passing more than one function.

1145

1146 When passing *enter* and *exit*, it's common and easiest to build

1147 on the default behavior. Simply add ``from boltons.iterutils import

1148 default_enter`` (or ``default_exit``), and have your enter/exit

1149 function call the default behavior before or after your custom

1150 logic. See `this example`_.

1151

1152 Duplicate and self-referential objects (aka reference loops) are

1153 automatically handled internally, `as shown here`_.

1154

1155 .. _this example: http://sedimental.org/remap.html#sort_all_lists

1156 .. _as shown here: http://sedimental.org/remap.html#corner_cases

1157

1158 """

1159 # TODO: improve argument formatting in sphinx doc

1160 # TODO: enter() return (False, items) to continue traverse but cancel copy?

1161 if not callable(visit):

1162 raise TypeError('visit expected callable, not: %r' % visit)

1163 if not callable(enter):

1164 raise TypeError('enter expected callable, not: %r' % enter)

1165 if not callable(exit):

1166 raise TypeError('exit expected callable, not: %r' % exit)

1167 reraise_visit = kwargs.pop('reraise_visit', True)

1168 trace = kwargs.pop('trace', ())

1169 if trace is True:

1170 trace = ('visit', 'enter', 'exit')

1171 elif isinstance(trace, str):

1172 trace = (trace,)

1173 if not isinstance(trace, (tuple, list, set)):

1174 raise TypeError('trace expected tuple of event names, not: %r' % trace)

1175 trace_enter, trace_exit, trace_visit = 'enter' in trace, 'exit' in trace, 'visit' in trace

1176

1177 if kwargs:

1178 raise TypeError('unexpected keyword arguments: %r' % kwargs.keys())

1179

1180 path, registry, stack = (), {}, [(None, root)]

1181 new_items_stack = []

1182 while stack:

1183 key, value = stack.pop()

1184 id_value = id(value)

1185 if key is _REMAP_EXIT:

1186 key, new_parent, old_parent = value

1187 id_value = id(old_parent)

1188 path, new_items = new_items_stack.pop()

1189 if trace_exit:

1190 print(' .. remap exit:', path, '-', key, '-',

1191 old_parent, '-', new_parent, '-', new_items)

1192 value = exit(path, key, old_parent, new_parent, new_items)

1193 if trace_exit:

1194 print(' .. remap exit result:', value)

1195 registry[id_value] = value

1196 if not new_items_stack:

1197 continue

1198 elif id_value in registry:

1199 value = registry[id_value]

1200 else:

1201 if trace_enter:

1202 print(' .. remap enter:', path, '-', key, '-', value)

1203 res = enter(path, key, value)

1204 if trace_enter:

1205 print(' .. remap enter result:', res)

1206 try:

1207 new_parent, new_items = res

1208 except TypeError:

1209 # TODO: handle False?

1210 raise TypeError('enter should return a tuple of (new_parent,'

1211 ' items_iterator), not: %r' % res)

1212 if new_items is not False:

1213 # traverse unless False is explicitly passed

1214 registry[id_value] = new_parent

1215 new_items_stack.append((path, []))

1216 if value is not root:

1217 path += (key,)

1218 stack.append((_REMAP_EXIT, (key, new_parent, value)))

1219 if new_items:

1220 stack.extend(reversed(list(new_items)))

1221 if trace_enter:

1222 print(' .. remap stack size now:', len(stack))

1223 continue

1224 if visit is _orig_default_visit:

1225 # avoid function call overhead by inlining identity operation

1226 visited_item = (key, value)

1227 else:

1228 try:

1229 if trace_visit:

1230 print(' .. remap visit:', path, '-', key, '-', value)

1231 visited_item = visit(path, key, value)

1232 except Exception:

1233 if reraise_visit:

1234 raise

1235 visited_item = True

1236 if visited_item is False:

1237 if trace_visit:

1238 print(' .. remap visit result: <drop>')

1239 continue # drop

1240 elif visited_item is True:

1241 visited_item = (key, value)

1242 if trace_visit:

1243 print(' .. remap visit result:', visited_item)

1244 # TODO: typecheck?

1245 # raise TypeError('expected (key, value) from visit(),'

1246 # ' not: %r' % visited_item)

1247 try:

1248 new_items_stack[-1][1].append(visited_item)

1249 except IndexError:

1250 raise TypeError('expected remappable root, not: %r' % root)

1251 return value

1252

1253

1254class PathAccessError(KeyError, IndexError, TypeError):

1255 """An amalgamation of KeyError, IndexError, and TypeError,

1256 representing what can occur when looking up a path in a nested

1257 object.

1258 """

1259

1260 def __init__(self, exc, seg, path):

1261 self.exc = exc

1262 self.seg = seg

1263 self.path = path

1264

1265 def __repr__(self):

1266 cn = self.__class__.__name__

1267 return f'{cn}({self.exc!r}, {self.seg!r}, {self.path!r})'

1268

1269 def __str__(self):

1270 return ('could not access %r from path %r, got error: %r'

1271 % (self.seg, self.path, self.exc))

1272

1273

1274def get_path(root, path, default=_UNSET):

1275 """Retrieve a value from a nested object via a tuple representing the

1276 lookup path.

1277

1278 >>> root = {'a': {'b': {'c': [[1], [2], [3]]}}}

1279 >>> get_path(root, ('a', 'b', 'c', 2, 0))

1280 3

1281

1282 The path tuple format is intentionally consistent with that of

1283 :func:`remap`, but a single dotted string can also be passed.

1284

1285 One of get_path's chief aims is improved error messaging. EAFP is

1286 great, but the error messages are not.

1287

1288 For instance, ``root['a']['b']['c'][2][1]`` gives back

1289 ``IndexError: list index out of range``

1290

1291 What went out of range where? get_path currently raises

1292 ``PathAccessError: could not access 2 from path ('a', 'b', 'c', 2,

1293 1), got error: IndexError('list index out of range',)``, a

1294 subclass of IndexError and KeyError.

1295

1296 You can also pass a default that covers the entire operation,

1297 should the lookup fail at any level.

1298

1299 Args:

1300 root: The target nesting of dictionaries, lists, or other

1301 objects supporting ``__getitem__``.

1302 path (tuple): A sequence of strings and integers to be successively

1303 looked up within *root*. A dot-separated (``a.b``) string may

1304 also be passed.

1305 default: The value to be returned should any

1306 ``PathAccessError`` exceptions be raised.

1307 """

1308 if isinstance(path, str):

1309 path = path.split('.')

1310 cur = root

1311 try:

1312 for seg in path:

1313 try:

1314 cur = cur[seg]

1315 except (KeyError, IndexError) as exc:

1316 raise PathAccessError(exc, seg, path)

1317 except TypeError as exc:

1318 # either string index in a list, or a parent that

1319 # doesn't support indexing

1320 try:

1321 seg = int(seg)

1322 cur = cur[seg]

1323 except (ValueError, KeyError, IndexError, TypeError):

1324 if not is_iterable(cur):

1325 exc = TypeError('%r object is not indexable'

1326 % type(cur).__name__)

1327 raise PathAccessError(exc, seg, path)

1328 except PathAccessError:

1329 if default is _UNSET:

1330 raise

1331 return default

1332 return cur

1333

1334

1335def research(root, query=lambda p, k, v: True, reraise=False, enter=default_enter):

1336 """The :func:`research` function uses :func:`remap` to recurse over

1337 any data nested in *root*, and find values which match a given

1338 criterion, specified by the *query* callable.

1339

1340 Results are returned as a list of ``(path, value)`` pairs. The

1341 paths are tuples in the same format accepted by

1342 :func:`get_path`. This can be useful for comparing values nested

1343 in two or more different structures.

1344

1345 Here's a simple example that finds all integers:

1346

1347 >>> root = {'a': {'b': 1, 'c': (2, 'd', 3)}, 'e': None}

1348 >>> res = research(root, query=lambda p, k, v: isinstance(v, int))

1349 >>> print(sorted(res))

1350 [(('a', 'b'), 1), (('a', 'c', 0), 2), (('a', 'c', 2), 3)]

1351

1352 Note how *query* follows the same, familiar ``path, key, value``

1353 signature as the ``visit`` and ``enter`` functions on

1354 :func:`remap`, and returns a :class:`bool`.

1355

1356 Args:

1357 root: The target object to search. Supports the same types of

1358 objects as :func:`remap`, including :class:`list`,

1359 :class:`tuple`, :class:`dict`, and :class:`set`.

1360 query (callable): The function called on every object to

1361 determine whether to include it in the search results. The

1362 callable must accept three arguments, *path*, *key*, and

1363 *value*, commonly abbreviated *p*, *k*, and *v*, same as

1364 *enter* and *visit* from :func:`remap`.

1365 reraise (bool): Whether to reraise exceptions raised by *query*

1366 or to simply drop the result that caused the error.

1367

1368

1369 With :func:`research` it's easy to inspect the details of a data

1370 structure, like finding values that are at a certain depth (using

1371 ``len(p)``) and much more. If more advanced functionality is

1372 needed, check out the code and make your own :func:`remap`

1373 wrapper, and consider `submitting a patch`_!

1374

1375 .. _submitting a patch: https://github.com/mahmoud/boltons/pulls

1376 """

1377 ret = []

1378

1379 if not callable(query):

1380 raise TypeError('query expected callable, not: %r' % query)

1381

1382 def _enter(path, key, value):

1383 try:

1384 if query(path, key, value):

1385 ret.append((path + (key,), value))

1386 except Exception:

1387 if reraise:

1388 raise

1389 return enter(path, key, value)

1390

1391 remap(root, enter=_enter)

1392 return ret

1393

1394

1395# TODO: recollect()

1396# TODO: refilter()

1397# TODO: reiter()

1398

1399

1400# GUID iterators: 10x faster and somewhat more compact than uuid.

1401

1402class GUIDerator:

1403 """The GUIDerator is an iterator that yields a globally-unique

1404 identifier (GUID) on every iteration. The GUIDs produced are

1405 hexadecimal strings.

1406

1407 Testing shows it to be around 12x faster than the uuid module. By

1408 default it is also more compact, partly due to its default 96-bit

1409 (24-hexdigit) length. 96 bits of randomness means that there is a

1410 1 in 2 ^ 32 chance of collision after 2 ^ 64 iterations. If more

1411 or less uniqueness is desired, the *size* argument can be adjusted

1412 accordingly.

1413

1414 Args:

1415 size (int): character length of the GUID, defaults to 24. Lengths

1416 between 20 and 36 are considered valid.

1417

1418 The GUIDerator has built-in fork protection that causes it to

1419 detect a fork on next iteration and reseed accordingly.

1420

1421 """

1422

1423 def __init__(self, size=24):

1424 self.size = size

1425 if size < 20 or size > 36:

1426 raise ValueError('expected 20 < size <= 36')

1427 import hashlib

1428 self._sha1 = hashlib.sha1

1429 self.count = itertools.count()

1430 self.reseed()

1431

1432 def reseed(self):

1433 import socket

1434 self.pid = os.getpid()

1435 self.salt = '-'.join([str(self.pid),

1436 socket.gethostname() or '<nohostname>',

1437 str(time.time()),

1438 os.urandom(6).hex()])

1439 return

1440

1441 def __iter__(self):

1442 return self

1443

1444 def __next__(self):

1445 if os.getpid() != self.pid:

1446 self.reseed()

1447 target_bytes = (self.salt + str(next(self.count))).encode('utf8')

1448 hash_text = self._sha1(target_bytes).hexdigest()[:self.size]

1449 return hash_text

1450

1451 next = __next__

1452

1453

1454class SequentialGUIDerator(GUIDerator):

1455 """Much like the standard GUIDerator, the SequentialGUIDerator is an

1456 iterator that yields a globally-unique identifier (GUID) on every

1457 iteration. The GUIDs produced are hexadecimal strings.

1458

1459 The SequentialGUIDerator differs in that it picks a starting GUID

1460 value and increments every iteration. This yields GUIDs which are

1461 of course unique, but also ordered and lexicographically sortable.

1462

1463 The SequentialGUIDerator is around 50% faster than the normal

1464 GUIDerator, making it almost 20x as fast as the built-in uuid

1465 module. By default it is also more compact, partly due to its

1466 96-bit (24-hexdigit) default length. 96 bits of randomness means that

1467 there is a 1 in 2 ^ 32 chance of collision after 2 ^ 64

1468 iterations. If more or less uniqueness is desired, the *size*

1469 argument can be adjusted accordingly.

1470

1471 Args:

1472 size (int): character length of the GUID, defaults to 24.

1473

1474 Note that with SequentialGUIDerator there is a chance of GUIDs

1475 growing larger than the size configured. The SequentialGUIDerator

1476 has built-in fork protection that causes it to detect a fork on

1477 next iteration and reseed accordingly.

1478

1479 """

1480

1481 def reseed(self):

1482 super().reseed()

1483 start_str = self._sha1(self.salt.encode('utf8')).hexdigest()

1484 self.start = int(start_str[:self.size], 16)

1485 self.start |= (1 << ((self.size * 4) - 2))

1486

1487 def __next__(self):

1488 if os.getpid() != self.pid:

1489 self.reseed()

1490 return '%x' % (next(self.count) + self.start)

1491

1492 next = __next__

1493

1494

1495guid_iter = GUIDerator()

1496seq_guid_iter = SequentialGUIDerator()

1497

1498

1499def soft_sorted(iterable, first=None, last=None, key=None, reverse=False):

1500 """For when you care about the order of some elements, but not about

1501 others.

1502

1503 Use this to float to the top and/or sink to the bottom a specific

1504 ordering, while sorting the rest of the elements according to

1505 normal :func:`sorted` rules.

1506

1507 >>> soft_sorted(['two', 'b', 'one', 'a'], first=['one', 'two'])

1508 ['one', 'two', 'a', 'b']

1509 >>> soft_sorted(range(7), first=[6, 15], last=[2, 4], reverse=True)

1510 [6, 5, 3, 1, 0, 2, 4]

1511 >>> import string

1512 >>> ''.join(soft_sorted(string.hexdigits, first='za1', last='b', key=str.lower))

1513 'aA1023456789cCdDeEfFbB'

1514

1515 Args:

1516 iterable (list): A list or other iterable to sort.

1517 first (list): A sequence to enforce for elements which should

1518 appear at the beginning of the returned list.

1519 last (list): A sequence to enforce for elements which should

1520 appear at the end of the returned list.

1521 key (callable): Callable used to generate a comparable key for

1522 each item to be sorted, same as the key in

1523 :func:`sorted`. Note that entries in *first* and *last*

1524 should be the keys for the items. Defaults to

1525 passthrough/the identity function.

1526 reverse (bool): Whether or not elements not explicitly ordered

1527 by *first* and *last* should be in reverse order or not.

1528

1529 Returns a new list in sorted order.

1530 """

1531 first = first or []

1532 last = last or []

1533 key = key or (lambda x: x)

1534 seq = list(iterable)

1535 other = [x for x in seq if not (

1536 (first and key(x) in first) or (last and key(x) in last))]

1537 other.sort(key=key, reverse=reverse)

1538

1539 if first:

1540 first = sorted([x for x in seq if key(x) in first],

1541 key=lambda x: first.index(key(x)))

1542 if last:

1543 last = sorted([x for x in seq if key(x) in last],

1544 key=lambda x: last.index(key(x)))

1545 return first + other + last

1546

1547

1548def untyped_sorted(iterable, key=None, reverse=False):

1549 """A version of :func:`sorted` which will happily sort an iterable of

1550 heterogeneous types and return a new list, similar to legacy Python's

1551 behavior.

1552

1553 >>> untyped_sorted(['abc', 2.0, 1, 2, 'def'])

1554 [1, 2.0, 2, 'abc', 'def']

1555

1556 Note how mutually orderable types are sorted as expected, as in

1557 the case of the integers and floats above.

1558

1559 .. note::

1560

1561 Results may vary across Python versions and builds, but the

1562 function will produce a sorted list, except in the case of

1563 explicitly unorderable objects.

1564

1565 """

1566 class _Wrapper:

1567 slots = ('obj',)

1568

1569 def __init__(self, obj):

1570 self.obj = obj

1571

1572 def __lt__(self, other):

1573 obj = key(self.obj) if key is not None else self.obj

1574 other = key(other.obj) if key is not None else other.obj

1575 try:

1576 ret = obj < other

1577 except TypeError:

1578 ret = ((type(obj).__name__, id(type(obj)), obj)

1579 < (type(other).__name__, id(type(other)), other))

1580 return ret

1581

1582 if key is not None and not callable(key):

1583 raise TypeError('expected function or callable object for key, not: %r'

1584 % key)

1585

1586 return sorted(iterable, key=_Wrapper, reverse=reverse)

1587

1588

1589"""

1590May actually be faster to do an isinstance check for a str path

1591

1592$ python -m timeit -s "x = [1]" "x[0]"

159310000000 loops, best of 3: 0.0207 usec per loop

1594$ python -m timeit -s "x = [1]" "try: x[0] \nexcept: pass"

159510000000 loops, best of 3: 0.029 usec per loop

1596$ python -m timeit -s "x = [1]" "try: x[1] \nexcept: pass"

15971000000 loops, best of 3: 0.315 usec per loop

1598# setting up try/except is fast, only around 0.01us

1599# actually triggering the exception takes almost 10x as long

1600

1601$ python -m timeit -s "x = [1]" "isinstance(x, basestring)"

160210000000 loops, best of 3: 0.141 usec per loop

1603$ python -m timeit -s "x = [1]" "isinstance(x, str)"

160410000000 loops, best of 3: 0.131 usec per loop

1605$ python -m timeit -s "x = [1]" "try: x.split('.')\n except: pass"

16061000000 loops, best of 3: 0.443 usec per loop

1607$ python -m timeit -s "x = [1]" "try: x.split('.') \nexcept AttributeError: pass"

16081000000 loops, best of 3: 0.544 usec per loop

1609"""