Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/boltons/iterutils.py: 17%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

521 statements  

1# Copyright (c) 2013, Mahmoud Hashemi 

2# 

3# Redistribution and use in source and binary forms, with or without 

4# modification, are permitted provided that the following conditions are 

5# met: 

6# 

7# * Redistributions of source code must retain the above copyright 

8# notice, this list of conditions and the following disclaimer. 

9# 

10# * Redistributions in binary form must reproduce the above 

11# copyright notice, this list of conditions and the following 

12# disclaimer in the documentation and/or other materials provided 

13# with the distribution. 

14# 

15# * The names of the contributors may not be used to endorse or 

16# promote products derived from this software without specific 

17# prior written permission. 

18# 

19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 

20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 

21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 

22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 

23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 

24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 

25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 

26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 

27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 

28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 

29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

30 

31""":mod:`itertools` is full of great examples of Python generator 

32usage. However, there are still some critical gaps. ``iterutils`` 

33fills many of those gaps with featureful, tested, and Pythonic 

34solutions. 

35 

36Many of the functions below have two versions, one which 

37returns an iterator (denoted by the ``*_iter`` naming pattern), and a 

38shorter-named convenience form that returns a list. Some of the 

39following are based on examples in itertools docs. 

40""" 

41 

42import os 

43import math 

44import time 

45import codecs 

46import random 

47import itertools 

48from itertools import zip_longest 

49from collections.abc import Mapping, Sequence, Set, ItemsView, Iterable 

50 

51 

52try: 

53 from .typeutils import make_sentinel 

54 _UNSET = make_sentinel('_UNSET') 

55 _REMAP_EXIT = make_sentinel('_REMAP_EXIT') 

56except ImportError: 

57 _REMAP_EXIT = object() 

58 _UNSET = object() 

59 

60 

61def is_iterable(obj): 

62 """Similar in nature to :func:`callable`, ``is_iterable`` returns 

63 ``True`` if an object is `iterable`_, ``False`` if not. 

64 

65 >>> is_iterable([]) 

66 True 

67 >>> is_iterable(object()) 

68 False 

69 

70 .. _iterable: https://docs.python.org/2/glossary.html#term-iterable 

71 """ 

72 try: 

73 iter(obj) 

74 except TypeError: 

75 return False 

76 return True 

77 

78 

79def is_scalar(obj): 

80 """A near-mirror of :func:`is_iterable`. Returns ``False`` if an 

81 object is an iterable container type. Strings are considered 

82 scalar as well, because strings are more often treated as whole 

83 values as opposed to iterables of 1-character substrings. 

84 

85 >>> is_scalar(object()) 

86 True 

87 >>> is_scalar(range(10)) 

88 False 

89 >>> is_scalar('hello') 

90 True 

91 """ 

92 return not is_iterable(obj) or isinstance(obj, (str, bytes)) 

93 

94 

95def is_collection(obj): 

96 """The opposite of :func:`is_scalar`. Returns ``True`` if an object 

97 is an iterable other than a string. 

98 

99 >>> is_collection(object()) 

100 False 

101 >>> is_collection(range(10)) 

102 True 

103 >>> is_collection('hello') 

104 False 

105 """ 

106 return is_iterable(obj) and not isinstance(obj, (str, bytes)) 

107 

108 

109def split(src, sep=None, maxsplit=None): 

110 """Splits an iterable based on a separator. Like :meth:`str.split`, 

111 but for all iterables. Returns a list of lists. 

112 

113 >>> split(['hi', 'hello', None, None, 'sup', None, 'soap', None]) 

114 [['hi', 'hello'], ['sup'], ['soap']] 

115 

116 See :func:`split_iter` docs for more info. 

117 """ 

118 return list(split_iter(src, sep, maxsplit)) 

119 

120 

121def split_iter(src, sep=None, maxsplit=None): 

122 """Splits an iterable based on a separator, *sep*, a max of 

123 *maxsplit* times (no max by default). *sep* can be: 

124 

125 * a single value 

126 * an iterable of separators 

127 * a single-argument callable that returns True when a separator is 

128 encountered 

129 

130 ``split_iter()`` yields lists of non-separator values. A separator will 

131 never appear in the output. 

132 

133 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None])) 

134 [['hi', 'hello'], ['sup'], ['soap']] 

135 

136 Note that ``split_iter`` is based on :func:`str.split`, so if 

137 *sep* is ``None``, ``split()`` **groups** separators. If empty lists 

138 are desired between two contiguous ``None`` values, simply use 

139 ``sep=[None]``: 

140 

141 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None])) 

142 [['hi', 'hello'], ['sup']] 

143 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None], sep=[None])) 

144 [['hi', 'hello'], [], ['sup'], []] 

145 

146 Using a callable separator: 

147 

148 >>> falsy_sep = lambda x: not x 

149 >>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep)) 

150 [['hi', 'hello'], [], ['sup'], []] 

151 

152 See :func:`split` for a list-returning version. 

153 

154 """ 

155 if not is_iterable(src): 

156 raise TypeError('expected an iterable') 

157 

158 if maxsplit is not None: 

159 maxsplit = int(maxsplit) 

160 if maxsplit == 0: 

161 yield [src] 

162 return 

163 

164 if callable(sep): 

165 sep_func = sep 

166 elif not is_scalar(sep): 

167 sep = frozenset(sep) 

168 sep_func = lambda x: x in sep 

169 else: 

170 sep_func = lambda x: x == sep 

171 

172 cur_group = [] 

173 split_count = 0 

174 for s in src: 

175 if maxsplit is not None and split_count >= maxsplit: 

176 sep_func = lambda x: False 

177 if sep_func(s): 

178 if sep is None and not cur_group: 

179 # If sep is none, str.split() "groups" separators 

180 # check the str.split() docs for more info 

181 continue 

182 split_count += 1 

183 yield cur_group 

184 cur_group = [] 

185 else: 

186 cur_group.append(s) 

187 

188 if cur_group or sep is not None: 

189 yield cur_group 

190 return 

191 

192 

193def lstrip(iterable, strip_value=None): 

194 """Strips values from the beginning of an iterable. Stripped items will 

195 match the value of the argument strip_value. Functionality is analogous 

196 to that of the method str.lstrip. Returns a list. 

197 

198 >>> lstrip(['Foo', 'Bar', 'Bam'], 'Foo') 

199 ['Bar', 'Bam'] 

200 

201 """ 

202 return list(lstrip_iter(iterable, strip_value)) 

203 

204 

205def lstrip_iter(iterable, strip_value=None): 

206 """Strips values from the beginning of an iterable. Stripped items will 

207 match the value of the argument strip_value. Functionality is analogous 

208 to that of the method str.lstrip. Returns a generator. 

209 

210 >>> list(lstrip_iter(['Foo', 'Bar', 'Bam'], 'Foo')) 

211 ['Bar', 'Bam'] 

212 

213 """ 

214 iterator = iter(iterable) 

215 for i in iterator: 

216 if i != strip_value: 

217 yield i 

218 break 

219 for i in iterator: 

220 yield i 

221 

222 

223def rstrip(iterable, strip_value=None): 

224 """Strips values from the end of an iterable. Stripped items will 

225 match the value of the argument strip_value. Functionality is analogous 

226 to that of the method str.rstrip. Returns a list. 

227 

228 >>> rstrip(['Foo', 'Bar', 'Bam'], 'Bam') 

229 ['Foo', 'Bar'] 

230 

231 """ 

232 return list(rstrip_iter(iterable,strip_value)) 

233 

234 

235def rstrip_iter(iterable, strip_value=None): 

236 """Strips values from the end of an iterable. Stripped items will 

237 match the value of the argument strip_value. Functionality is analogous 

238 to that of the method str.rstrip. Returns a generator. 

239 

240 >>> list(rstrip_iter(['Foo', 'Bar', 'Bam'], 'Bam')) 

241 ['Foo', 'Bar'] 

242 

243 """ 

244 iterator = iter(iterable) 

245 for i in iterator: 

246 if i == strip_value: 

247 cache = list() 

248 cache.append(i) 

249 broken = False 

250 for i in iterator: 

251 if i == strip_value: 

252 cache.append(i) 

253 else: 

254 broken = True 

255 break 

256 if not broken: # Return to caller here because the end of the 

257 return # iterator has been reached 

258 yield from cache 

259 yield i 

260 

261 

262def strip(iterable, strip_value=None): 

263 """Strips values from the beginning and end of an iterable. Stripped items 

264 will match the value of the argument strip_value. Functionality is 

265 analogous to that of the method str.strip. Returns a list. 

266 

267 >>> strip(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu') 

268 ['Foo', 'Bar', 'Bam'] 

269 

270 """ 

271 return list(strip_iter(iterable,strip_value)) 

272 

273 

274def strip_iter(iterable,strip_value=None): 

275 """Strips values from the beginning and end of an iterable. Stripped items 

276 will match the value of the argument strip_value. Functionality is 

277 analogous to that of the method str.strip. Returns a generator. 

278 

279 >>> list(strip_iter(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu')) 

280 ['Foo', 'Bar', 'Bam'] 

281 

282 """ 

283 return rstrip_iter(lstrip_iter(iterable,strip_value),strip_value) 

284 

285 

286def chunked(src, size, count=None, **kw): 

287 """Returns a list of *count* chunks, each with *size* elements, 

288 generated from iterable *src*. If *src* is not evenly divisible by 

289 *size*, the final chunk will have fewer than *size* elements. 

290 Provide the *fill* keyword argument to provide a pad value and 

291 enable padding, otherwise no padding will take place. 

292 

293 >>> chunked(range(10), 3) 

294 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] 

295 >>> chunked(range(10), 3, fill=None) 

296 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]] 

297 >>> chunked(range(10), 3, count=2) 

298 [[0, 1, 2], [3, 4, 5]] 

299 

300 See :func:`chunked_iter` for more info. 

301 """ 

302 chunk_iter = chunked_iter(src, size, **kw) 

303 if count is None: 

304 return list(chunk_iter) 

305 else: 

306 return list(itertools.islice(chunk_iter, count)) 

307 

308 

309def _validate_positive_int(value, name, strictly_positive=True): 

310 value = int(value) 

311 if value < 0 or (strictly_positive and value == 0): 

312 raise ValueError('expected a positive integer ' + name) 

313 return value 

314 

315 

316def chunked_iter(src, size, **kw): 

317 """Generates *size*-sized chunks from *src* iterable. Unless the 

318 optional *fill* keyword argument is provided, iterables not evenly 

319 divisible by *size* will have a final chunk that is smaller than 

320 *size*. 

321 

322 >>> list(chunked_iter(range(10), 3)) 

323 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] 

324 >>> list(chunked_iter(range(10), 3, fill=None)) 

325 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]] 

326 

327 Note that ``fill=None`` in fact uses ``None`` as the fill value. 

328 """ 

329 # TODO: add count kwarg? 

330 if not is_iterable(src): 

331 raise TypeError('expected an iterable') 

332 size = _validate_positive_int(size, 'chunk size') 

333 do_fill = True 

334 try: 

335 fill_val = kw.pop('fill') 

336 except KeyError: 

337 do_fill = False 

338 fill_val = None 

339 if kw: 

340 raise ValueError('got unexpected keyword arguments: %r' % kw.keys()) 

341 if not src: 

342 return 

343 postprocess = lambda chk: chk 

344 if isinstance(src, (str, bytes)): 

345 postprocess = lambda chk, _sep=type(src)(): _sep.join(chk) 

346 if isinstance(src, bytes): 

347 postprocess = lambda chk: bytes(chk) 

348 src_iter = iter(src) 

349 while True: 

350 cur_chunk = list(itertools.islice(src_iter, size)) 

351 if not cur_chunk: 

352 break 

353 lc = len(cur_chunk) 

354 if lc < size and do_fill: 

355 cur_chunk[lc:] = [fill_val] * (size - lc) 

356 yield postprocess(cur_chunk) 

357 return 

358 

359 

360def chunk_ranges(input_size, chunk_size, input_offset=0, overlap_size=0, align=False): 

361 """Generates *chunk_size*-sized chunk ranges for an input with length *input_size*. 

362 Optionally, a start of the input can be set via *input_offset*, and 

363 and overlap between the chunks may be specified via *overlap_size*. 

364 Also, if *align* is set to *True*, any items with *i % (chunk_size-overlap_size) == 0* 

365 are always at the beginning of the chunk. 

366 

367 Returns an iterator of (start, end) tuples, one tuple per chunk. 

368 

369 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5)) 

370 [(10, 15), (15, 20)] 

371 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=1)) 

372 [(10, 15), (14, 19), (18, 20)] 

373 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=2)) 

374 [(10, 15), (13, 18), (16, 20)] 

375 

376 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=False)) 

377 [(4, 9), (9, 14), (14, 19)] 

378 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=True)) 

379 [(4, 5), (5, 10), (10, 15), (15, 19)] 

380 

381 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=False)) 

382 [(2, 7), (6, 11), (10, 15), (14, 17)] 

383 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=True)) 

384 [(2, 5), (4, 9), (8, 13), (12, 17)] 

385 >>> list(chunk_ranges(input_offset=3, input_size=15, chunk_size=5, overlap_size=1, align=True)) 

386 [(3, 5), (4, 9), (8, 13), (12, 17), (16, 18)] 

387 """ 

388 input_size = _validate_positive_int(input_size, 'input_size', strictly_positive=False) 

389 chunk_size = _validate_positive_int(chunk_size, 'chunk_size') 

390 input_offset = _validate_positive_int(input_offset, 'input_offset', strictly_positive=False) 

391 overlap_size = _validate_positive_int(overlap_size, 'overlap_size', strictly_positive=False) 

392 

393 input_stop = input_offset + input_size 

394 

395 if align: 

396 initial_chunk_len = chunk_size - input_offset % (chunk_size - overlap_size) 

397 if initial_chunk_len != overlap_size: 

398 yield (input_offset, min(input_offset + initial_chunk_len, input_stop)) 

399 if input_offset + initial_chunk_len >= input_stop: 

400 return 

401 input_offset = input_offset + initial_chunk_len - overlap_size 

402 

403 for i in range(input_offset, input_stop, chunk_size - overlap_size): 

404 yield (i, min(i + chunk_size, input_stop)) 

405 

406 if i + chunk_size >= input_stop: 

407 return 

408 

409 

410def pairwise(src, end=_UNSET): 

411 """Convenience function for calling :func:`windowed` on *src*, with 

412 *size* set to 2. 

413 

414 >>> pairwise(range(5)) 

415 [(0, 1), (1, 2), (2, 3), (3, 4)] 

416 >>> pairwise([]) 

417 [] 

418 

419 Unless *end* is set, the number of pairs is always one less than  

420 the number of elements in the iterable passed in, except on an empty input,  

421 which will return an empty list. 

422 

423 With *end* set, a number of pairs equal to the length of *src* is returned, 

424 with the last item of the last pair being equal to *end*. 

425 

426 >>> list(pairwise(range(3), end=None)) 

427 [(0, 1), (1, 2), (2, None)] 

428 

429 This way, *end* values can be useful as sentinels to signal the end of the iterable. 

430 """ 

431 return windowed(src, 2, fill=end) 

432 

433 

434def pairwise_iter(src, end=_UNSET): 

435 """Convenience function for calling :func:`windowed_iter` on *src*, 

436 with *size* set to 2. 

437 

438 >>> list(pairwise_iter(range(5))) 

439 [(0, 1), (1, 2), (2, 3), (3, 4)] 

440 >>> list(pairwise_iter([])) 

441 [] 

442 

443 Unless *end* is set, the number of pairs is always one less  

444 than the number of elements in the iterable passed in,  

445 or zero, when *src* is empty. 

446 

447 With *end* set, a number of pairs equal to the length of *src* is returned, 

448 with the last item of the last pair being equal to *end*.  

449 

450 >>> list(pairwise_iter(range(3), end=None)) 

451 [(0, 1), (1, 2), (2, None)]  

452 

453 This way, *end* values can be useful as sentinels to signal the end 

454 of the iterable. For infinite iterators, setting *end* has no effect. 

455 """ 

456 return windowed_iter(src, 2, fill=end) 

457 

458 

459def windowed(src, size, fill=_UNSET): 

460 """Returns tuples with exactly length *size*. If *fill* is unset  

461 and the iterable is too short to make a window of length *size*,  

462 no tuples are returned. See :func:`windowed_iter` for more. 

463 """ 

464 return list(windowed_iter(src, size, fill=fill)) 

465 

466 

467def windowed_iter(src, size, fill=_UNSET): 

468 """Returns tuples with length *size* which represent a sliding 

469 window over iterable *src*. 

470 

471 >>> list(windowed_iter(range(7), 3)) 

472 [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)] 

473 

474 If *fill* is unset, and the iterable is too short to make a window  

475 of length *size*, then no window tuples are returned. 

476 

477 >>> list(windowed_iter(range(3), 5)) 

478 [] 

479 

480 With *fill* set, the iterator always yields a number of windows 

481 equal to the length of the *src* iterable. 

482  

483 >>> windowed(range(4), 3, fill=None) 

484 [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)] 

485 

486 This way, *fill* values can be useful to signal the end of the iterable. 

487 For infinite iterators, setting *fill* has no effect. 

488 """ 

489 tees = itertools.tee(src, size) 

490 if fill is _UNSET: 

491 try: 

492 for i, t in enumerate(tees): 

493 for _ in range(i): 

494 next(t) 

495 except StopIteration: 

496 return zip([]) 

497 return zip(*tees) 

498 

499 for i, t in enumerate(tees): 

500 for _ in range(i): 

501 try: 

502 next(t) 

503 except StopIteration: 

504 continue 

505 return zip_longest(*tees, fillvalue=fill) 

506 

507 

508 

509def xfrange(stop, start=None, step=1.0): 

510 """Same as :func:`frange`, but generator-based instead of returning a 

511 list. 

512 

513 >>> tuple(xfrange(1, 3, step=0.75)) 

514 (1.0, 1.75, 2.5) 

515 

516 See :func:`frange` for more details. 

517 """ 

518 if not step: 

519 raise ValueError('step must be non-zero') 

520 if start is None: 

521 start, stop = 0.0, stop * 1.0 

522 else: 

523 # swap when all args are used 

524 stop, start = start * 1.0, stop * 1.0 

525 cur = start 

526 while cur < stop: 

527 yield cur 

528 cur += step 

529 

530 

531def frange(stop, start=None, step=1.0): 

532 """A :func:`range` clone for float-based ranges. 

533 

534 >>> frange(5) 

535 [0.0, 1.0, 2.0, 3.0, 4.0] 

536 >>> frange(6, step=1.25) 

537 [0.0, 1.25, 2.5, 3.75, 5.0] 

538 >>> frange(100.5, 101.5, 0.25) 

539 [100.5, 100.75, 101.0, 101.25] 

540 >>> frange(5, 0) 

541 [] 

542 >>> frange(5, 0, step=-1.25) 

543 [5.0, 3.75, 2.5, 1.25] 

544 """ 

545 if not step: 

546 raise ValueError('step must be non-zero') 

547 if start is None: 

548 start, stop = 0.0, stop * 1.0 

549 else: 

550 # swap when all args are used 

551 stop, start = start * 1.0, stop * 1.0 

552 count = int(math.ceil((stop - start) / step)) 

553 ret = [None] * count 

554 if not ret: 

555 return ret 

556 ret[0] = start 

557 for i in range(1, count): 

558 ret[i] = ret[i - 1] + step 

559 return ret 

560 

561 

562def backoff(start, stop, count=None, factor=2.0, jitter=False): 

563 """Returns a list of geometrically-increasing floating-point numbers, 

564 suitable for usage with `exponential backoff`_. Exactly like 

565 :func:`backoff_iter`, but without the ``'repeat'`` option for 

566 *count*. See :func:`backoff_iter` for more details. 

567 

568 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff 

569 

570 >>> backoff(1, 10) 

571 [1.0, 2.0, 4.0, 8.0, 10.0] 

572 """ 

573 if count == 'repeat': 

574 raise ValueError("'repeat' supported in backoff_iter, not backoff") 

575 return list(backoff_iter(start, stop, count=count, 

576 factor=factor, jitter=jitter)) 

577 

578 

579def backoff_iter(start, stop, count=None, factor=2.0, jitter=False): 

580 """Generates a sequence of geometrically-increasing floats, suitable 

581 for usage with `exponential backoff`_. Starts with *start*, 

582 increasing by *factor* until *stop* is reached, optionally 

583 stopping iteration once *count* numbers are yielded. *factor* 

584 defaults to 2. In general retrying with properly-configured 

585 backoff creates a better-behaved component for a larger service 

586 ecosystem. 

587 

588 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff 

589 

590 >>> list(backoff_iter(1.0, 10.0, count=5)) 

591 [1.0, 2.0, 4.0, 8.0, 10.0] 

592 >>> list(backoff_iter(1.0, 10.0, count=8)) 

593 [1.0, 2.0, 4.0, 8.0, 10.0, 10.0, 10.0, 10.0] 

594 >>> list(backoff_iter(0.25, 100.0, factor=10)) 

595 [0.25, 2.5, 25.0, 100.0] 

596 

597 A simplified usage example: 

598 

599 .. code-block:: python 

600 

601 for timeout in backoff_iter(0.25, 5.0): 

602 try: 

603 res = network_call() 

604 break 

605 except Exception as e: 

606 log(e) 

607 time.sleep(timeout) 

608 

609 An enhancement for large-scale systems would be to add variation, 

610 or *jitter*, to timeout values. This is done to avoid a thundering 

611 herd on the receiving end of the network call. 

612 

613 Finally, for *count*, the special value ``'repeat'`` can be passed to 

614 continue yielding indefinitely. 

615 

616 Args: 

617 

618 start (float): Positive number for baseline. 

619 stop (float): Positive number for maximum. 

620 count (int): Number of steps before stopping 

621 iteration. Defaults to the number of steps between *start* and 

622 *stop*. Pass the string, `'repeat'`, to continue iteration 

623 indefinitely. 

624 factor (float): Rate of exponential increase. Defaults to `2.0`, 

625 e.g., `[1, 2, 4, 8, 16]`. 

626 jitter (float): A factor between `-1.0` and `1.0`, used to 

627 uniformly randomize and thus spread out timeouts in a distributed 

628 system, avoiding rhythm effects. Positive values use the base 

629 backoff curve as a maximum, negative values use the curve as a 

630 minimum. Set to 1.0 or `True` for a jitter approximating 

631 Ethernet's time-tested backoff solution. Defaults to `False`. 

632 

633 """ 

634 start = float(start) 

635 stop = float(stop) 

636 factor = float(factor) 

637 if start < 0.0: 

638 raise ValueError('expected start >= 0, not %r' % start) 

639 if factor < 1.0: 

640 raise ValueError('expected factor >= 1.0, not %r' % factor) 

641 if stop == 0.0: 

642 raise ValueError('expected stop >= 0') 

643 if stop < start: 

644 raise ValueError('expected stop >= start, not %r' % stop) 

645 if count is None: 

646 denom = start if start else 1 

647 count = 1 + math.ceil(math.log(stop/denom, factor)) 

648 count = count if start else count + 1 

649 if count != 'repeat' and count < 0: 

650 raise ValueError('count must be positive or "repeat", not %r' % count) 

651 if jitter: 

652 jitter = float(jitter) 

653 if not (-1.0 <= jitter <= 1.0): 

654 raise ValueError('expected jitter -1 <= j <= 1, not: %r' % jitter) 

655 

656 cur, i = start, 0 

657 while count == 'repeat' or i < count: 

658 if not jitter: 

659 cur_ret = cur 

660 elif jitter: 

661 cur_ret = cur - (cur * jitter * random.random()) 

662 yield cur_ret 

663 i += 1 

664 if cur == 0: 

665 cur = 1 

666 elif cur < stop: 

667 cur *= factor 

668 if cur > stop: 

669 cur = stop 

670 return 

671 

672 

673def bucketize(src, key=bool, value_transform=None, key_filter=None): 

674 """Group values in the *src* iterable by the value returned by *key*. 

675 

676 >>> bucketize(range(5)) 

677 {False: [0], True: [1, 2, 3, 4]} 

678 >>> is_odd = lambda x: x % 2 == 1 

679 >>> bucketize(range(5), is_odd) 

680 {False: [0, 2, 4], True: [1, 3]} 

681 

682 *key* is :class:`bool` by default, but can either be a callable or a string or a list 

683 if it is a string, it is the name of the attribute on which to bucketize objects. 

684 

685 >>> bucketize([1+1j, 2+2j, 1, 2], key='real') 

686 {1.0: [(1+1j), 1], 2.0: [(2+2j), 2]} 

687 

688 if *key* is a list, it contains the buckets where to put each object 

689 

690 >>> bucketize([1,2,365,4,98],key=[0,1,2,0,2]) 

691 {0: [1, 4], 1: [2], 2: [365, 98]} 

692 

693 

694 Value lists are not deduplicated: 

695 

696 >>> bucketize([None, None, None, 'hello']) 

697 {False: [None, None, None], True: ['hello']} 

698 

699 Bucketize into more than 3 groups 

700 

701 >>> bucketize(range(10), lambda x: x % 3) 

702 {0: [0, 3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]} 

703 

704 ``bucketize`` has a couple of advanced options useful in certain 

705 cases. *value_transform* can be used to modify values as they are 

706 added to buckets, and *key_filter* will allow excluding certain 

707 buckets from being collected. 

708 

709 >>> bucketize(range(5), value_transform=lambda x: x*x) 

710 {False: [0], True: [1, 4, 9, 16]} 

711 

712 >>> bucketize(range(10), key=lambda x: x % 3, key_filter=lambda k: k % 3 != 1) 

713 {0: [0, 3, 6, 9], 2: [2, 5, 8]} 

714 

715 Note in some of these examples there were at most two keys, ``True`` and 

716 ``False``, and each key present has a list with at least one 

717 item. See :func:`partition` for a version specialized for binary 

718 use cases. 

719 

720 """ 

721 if not is_iterable(src): 

722 raise TypeError('expected an iterable') 

723 elif isinstance(key, list): 

724 if len(key) != len(src): 

725 raise ValueError("key and src have to be the same length") 

726 src = zip(key, src) 

727 

728 if isinstance(key, str): 

729 key_func = lambda x: getattr(x, key, x) 

730 elif callable(key): 

731 key_func = key 

732 elif isinstance(key, list): 

733 key_func = lambda x: x[0] 

734 else: 

735 raise TypeError('expected key to be callable or a string or a list') 

736 

737 if value_transform is None: 

738 value_transform = lambda x: x 

739 if not callable(value_transform): 

740 raise TypeError('expected callable value transform function') 

741 if isinstance(key, list): 

742 f = value_transform 

743 value_transform=lambda x: f(x[1]) 

744 

745 ret = {} 

746 for val in src: 

747 key_of_val = key_func(val) 

748 if key_filter is None or key_filter(key_of_val): 

749 ret.setdefault(key_of_val, []).append(value_transform(val)) 

750 return ret 

751 

752 

753def partition(src, key=bool): 

754 """No relation to :meth:`str.partition`, ``partition`` is like 

755 :func:`bucketize`, but for added convenience returns a tuple of 

756 ``(truthy_values, falsy_values)``. 

757 

758 >>> nonempty, empty = partition(['', '', 'hi', '', 'bye']) 

759 >>> nonempty 

760 ['hi', 'bye'] 

761 

762 *key* defaults to :class:`bool`, but can be carefully overridden to 

763 use either a function that returns either ``True`` or ``False`` or 

764 a string name of the attribute on which to partition objects. 

765 

766 >>> import string 

767 >>> is_digit = lambda x: x in string.digits 

768 >>> decimal_digits, hexletters = partition(string.hexdigits, is_digit) 

769 >>> ''.join(decimal_digits), ''.join(hexletters) 

770 ('0123456789', 'abcdefABCDEF') 

771 """ 

772 bucketized = bucketize(src, key) 

773 return bucketized.get(True, []), bucketized.get(False, []) 

774 

775 

776def unique(src, key=None): 

777 """``unique()`` returns a list of unique values, as determined by 

778 *key*, in the order they first appeared in the input iterable, 

779 *src*. 

780 

781 >>> ones_n_zeros = '11010110001010010101010' 

782 >>> ''.join(unique(ones_n_zeros)) 

783 '10' 

784 

785 See :func:`unique_iter` docs for more details. 

786 """ 

787 return list(unique_iter(src, key)) 

788 

789 

790def unique_iter(src, key=None): 

791 """Yield unique elements from the iterable, *src*, based on *key*, 

792 in the order in which they first appeared in *src*. 

793 

794 >>> repetitious = [1, 2, 3] * 10 

795 >>> list(unique_iter(repetitious)) 

796 [1, 2, 3] 

797 

798 By default, *key* is the object itself, but *key* can either be a 

799 callable or, for convenience, a string name of the attribute on 

800 which to uniqueify objects, falling back on identity when the 

801 attribute is not present. 

802 

803 >>> pleasantries = ['hi', 'hello', 'ok', 'bye', 'yes'] 

804 >>> list(unique_iter(pleasantries, key=lambda x: len(x))) 

805 ['hi', 'hello', 'bye'] 

806 """ 

807 if not is_iterable(src): 

808 raise TypeError('expected an iterable, not %r' % type(src)) 

809 if key is None: 

810 key_func = lambda x: x 

811 elif callable(key): 

812 key_func = key 

813 elif isinstance(key, str): 

814 key_func = lambda x: getattr(x, key, x) 

815 else: 

816 raise TypeError('"key" expected a string or callable, not %r' % key) 

817 seen = set() 

818 for i in src: 

819 k = key_func(i) 

820 if k not in seen: 

821 seen.add(k) 

822 yield i 

823 return 

824 

825 

826def redundant(src, key=None, groups=False): 

827 """The complement of :func:`unique()`. 

828 

829 By default returns non-unique/duplicate values as a list of the 

830 *first* redundant value in *src*. Pass ``groups=True`` to get 

831 groups of all values with redundancies, ordered by position of the 

832 first redundant value. This is useful in conjunction with some 

833 normalizing *key* function. 

834 

835 >>> redundant([1, 2, 3, 4]) 

836 [] 

837 >>> redundant([1, 2, 3, 2, 3, 3, 4]) 

838 [2, 3] 

839 >>> redundant([1, 2, 3, 2, 3, 3, 4], groups=True) 

840 [[2, 2], [3, 3, 3]] 

841 

842 An example using a *key* function to do case-insensitive 

843 redundancy detection. 

844 

845 >>> redundant(['hi', 'Hi', 'HI', 'hello'], key=str.lower) 

846 ['Hi'] 

847 >>> redundant(['hi', 'Hi', 'HI', 'hello'], groups=True, key=str.lower) 

848 [['hi', 'Hi', 'HI']] 

849 

850 *key* should also be used when the values in *src* are not hashable. 

851 

852 .. note:: 

853 

854 This output of this function is designed for reporting 

855 duplicates in contexts when a unique input is desired. Due to 

856 the grouped return type, there is no streaming equivalent of 

857 this function for the time being. 

858 

859 """ 

860 if key is None: 

861 pass 

862 elif callable(key): 

863 key_func = key 

864 elif isinstance(key, (str, bytes)): 

865 key_func = lambda x: getattr(x, key, x) 

866 else: 

867 raise TypeError('"key" expected a string or callable, not %r' % key) 

868 seen = {} # key to first seen item 

869 redundant_order = [] 

870 redundant_groups = {} 

871 for i in src: 

872 k = key_func(i) if key else i 

873 if k not in seen: 

874 seen[k] = i 

875 else: 

876 if k in redundant_groups: 

877 if groups: 

878 redundant_groups[k].append(i) 

879 else: 

880 redundant_order.append(k) 

881 redundant_groups[k] = [seen[k], i] 

882 if not groups: 

883 ret = [redundant_groups[k][1] for k in redundant_order] 

884 else: 

885 ret = [redundant_groups[k] for k in redundant_order] 

886 return ret 

887 

888 

889def one(src, default=None, key=None): 

890 """Along the same lines as builtins, :func:`all` and :func:`any`, and 

891 similar to :func:`first`, ``one()`` returns the single object in 

892 the given iterable *src* that evaluates to ``True``, as determined 

893 by callable *key*. If unset, *key* defaults to :class:`bool`. If 

894 no such objects are found, *default* is returned. If *default* is 

895 not passed, ``None`` is returned. 

896 

897 If *src* has more than one object that evaluates to ``True``, or 

898 if there is no object that fulfills such condition, return 

899 *default*. It's like an `XOR`_ over an iterable. 

900 

901 >>> one((True, False, False)) 

902 True 

903 >>> one((True, False, True)) 

904 >>> one((0, 0, 'a')) 

905 'a' 

906 >>> one((0, False, None)) 

907 >>> one((True, True), default=False) 

908 False 

909 >>> bool(one(('', 1))) 

910 True 

911 >>> one((10, 20, 30, 42), key=lambda i: i > 40) 

912 42 

913 

914 See `Martín Gaitán's original repo`_ for further use cases. 

915 

916 .. _Martín Gaitán's original repo: https://github.com/mgaitan/one 

917 .. _XOR: https://en.wikipedia.org/wiki/Exclusive_or 

918 

919 """ 

920 ones = list(itertools.islice(filter(key, src), 2)) 

921 return ones[0] if len(ones) == 1 else default 

922 

923 

924def first(iterable, default=None, key=None): 

925 """Return first element of *iterable* that evaluates to ``True``, else 

926 return ``None`` or optional *default*. Similar to :func:`one`. 

927 

928 >>> first([0, False, None, [], (), 42]) 

929 42 

930 >>> first([0, False, None, [], ()]) is None 

931 True 

932 >>> first([0, False, None, [], ()], default='ohai') 

933 'ohai' 

934 >>> import re 

935 >>> m = first(re.match(regex, 'abc') for regex in ['b.*', 'a(.*)']) 

936 >>> m.group(1) 

937 'bc' 

938 

939 The optional *key* argument specifies a one-argument predicate function 

940 like that used for *filter()*. The *key* argument, if supplied, should be 

941 in keyword form. For example, finding the first even number in an iterable: 

942 

943 >>> first([1, 1, 3, 4, 5], key=lambda x: x % 2 == 0) 

944 4 

945 

946 Contributed by Hynek Schlawack, author of `the original standalone module`_. 

947 

948 .. _the original standalone module: https://github.com/hynek/first 

949 """ 

950 return next(filter(key, iterable), default) 

951 

952 

953def flatten_iter(iterable): 

954 """``flatten_iter()`` yields all the elements from *iterable* while 

955 collapsing any nested iterables. 

956 

957 >>> nested = [[1, 2], [[3], [4, 5]]] 

958 >>> list(flatten_iter(nested)) 

959 [1, 2, 3, 4, 5] 

960 """ 

961 for item in iterable: 

962 if isinstance(item, Iterable) and not isinstance(item, (str, bytes)): 

963 yield from flatten_iter(item) 

964 else: 

965 yield item 

966 

967def flatten(iterable): 

968 """``flatten()`` returns a collapsed list of all the elements from 

969 *iterable* while collapsing any nested iterables. 

970 

971 >>> nested = [[1, 2], [[3], [4, 5]]] 

972 >>> flatten(nested) 

973 [1, 2, 3, 4, 5] 

974 """ 

975 return list(flatten_iter(iterable)) 

976 

977 

978def same(iterable, ref=_UNSET): 

979 """``same()`` returns ``True`` when all values in *iterable* are 

980 equal to one another, or optionally a reference value, 

981 *ref*. Similar to :func:`all` and :func:`any` in that it evaluates 

982 an iterable and returns a :class:`bool`. ``same()`` returns 

983 ``True`` for empty iterables. 

984 

985 >>> same([]) 

986 True 

987 >>> same([1]) 

988 True 

989 >>> same(['a', 'a', 'a']) 

990 True 

991 >>> same(range(20)) 

992 False 

993 >>> same([[], []]) 

994 True 

995 >>> same([[], []], ref='test') 

996 False 

997 

998 """ 

999 iterator = iter(iterable) 

1000 if ref is _UNSET: 

1001 ref = next(iterator, ref) 

1002 return all(val == ref for val in iterator) 

1003 

1004 

1005def default_visit(path, key, value): 

1006 # print('visit(%r, %r, %r)' % (path, key, value)) 

1007 return key, value 

1008 

1009# enable the extreme: monkeypatching iterutils with a different default_visit 

1010_orig_default_visit = default_visit 

1011 

1012 

1013def default_enter(path, key, value): 

1014 # print('enter(%r, %r)' % (key, value)) 

1015 if isinstance(value, (str, bytes)): 

1016 return value, False 

1017 elif isinstance(value, Mapping): 

1018 return value.__class__(), ItemsView(value) 

1019 elif isinstance(value, Sequence): 

1020 return value.__class__(), enumerate(value) 

1021 elif isinstance(value, Set): 

1022 return value.__class__(), enumerate(value) 

1023 else: 

1024 # files, strings, other iterables, and scalars are not 

1025 # traversed 

1026 return value, False 

1027 

1028 

1029def default_exit(path, key, old_parent, new_parent, new_items): 

1030 # print('exit(%r, %r, %r, %r, %r)' 

1031 # % (path, key, old_parent, new_parent, new_items)) 

1032 ret = new_parent 

1033 if isinstance(new_parent, Mapping): 

1034 new_parent.update(new_items) 

1035 elif isinstance(new_parent, Sequence): 

1036 vals = [v for i, v in new_items] 

1037 try: 

1038 new_parent.extend(vals) 

1039 except AttributeError: 

1040 ret = new_parent.__class__(vals) # tuples 

1041 elif isinstance(new_parent, Set): 

1042 vals = [v for i, v in new_items] 

1043 try: 

1044 new_parent.update(vals) 

1045 except AttributeError: 

1046 ret = new_parent.__class__(vals) # frozensets 

1047 else: 

1048 raise RuntimeError('unexpected iterable type: %r' % type(new_parent)) 

1049 return ret 

1050 

1051 

1052def remap(root, visit=default_visit, enter=default_enter, exit=default_exit, 

1053 **kwargs): 

1054 """The remap ("recursive map") function is used to traverse and 

1055 transform nested structures. Lists, tuples, sets, and dictionaries 

1056 are just a few of the data structures nested into heterogeneous 

1057 tree-like structures that are so common in programming. 

1058 Unfortunately, Python's built-in ways to manipulate collections 

1059 are almost all flat. List comprehensions may be fast and succinct, 

1060 but they do not recurse, making it tedious to apply quick changes 

1061 or complex transforms to real-world data. 

1062 

1063 remap goes where list comprehensions cannot. 

1064 

1065 Here's an example of removing all Nones from some data: 

1066 

1067 >>> from pprint import pprint 

1068 >>> reviews = {'Star Trek': {'TNG': 10, 'DS9': 8.5, 'ENT': None}, 

1069 ... 'Babylon 5': 6, 'Dr. Who': None} 

1070 >>> pprint(remap(reviews, lambda p, k, v: v is not None)) 

1071 {'Babylon 5': 6, 'Star Trek': {'DS9': 8.5, 'TNG': 10}} 

1072 

1073 Notice how both Nones have been removed despite the nesting in the 

1074 dictionary. Not bad for a one-liner, and that's just the beginning. 

1075 See `this remap cookbook`_ for more delicious recipes. 

1076 

1077 .. _this remap cookbook: http://sedimental.org/remap.html 

1078 

1079 remap takes four main arguments: the object to traverse and three 

1080 optional callables which determine how the remapped object will be 

1081 created. 

1082 

1083 Args: 

1084 

1085 root: The target object to traverse. By default, remap 

1086 supports iterables like :class:`list`, :class:`tuple`, 

1087 :class:`dict`, and :class:`set`, but any object traversable by 

1088 *enter* will work. 

1089 visit (callable): This function is called on every item in 

1090 *root*. It must accept three positional arguments, *path*, 

1091 *key*, and *value*. *path* is simply a tuple of parents' 

1092 keys. *visit* should return the new key-value pair. It may 

1093 also return ``True`` as shorthand to keep the old item 

1094 unmodified, or ``False`` to drop the item from the new 

1095 structure. *visit* is called after *enter*, on the new parent. 

1096 

1097 The *visit* function is called for every item in root, 

1098 including duplicate items. For traversable values, it is 

1099 called on the new parent object, after all its children 

1100 have been visited. The default visit behavior simply 

1101 returns the key-value pair unmodified. 

1102 enter (callable): This function controls which items in *root* 

1103 are traversed. It accepts the same arguments as *visit*: the 

1104 path, the key, and the value of the current item. It returns a 

1105 pair of the blank new parent, and an iterator over the items 

1106 which should be visited. If ``False`` is returned instead of 

1107 an iterator, the value will not be traversed. 

1108 

1109 The *enter* function is only called once per unique value. The 

1110 default enter behavior support mappings, sequences, and 

1111 sets. Strings and all other iterables will not be traversed. 

1112 exit (callable): This function determines how to handle items 

1113 once they have been visited. It gets the same three 

1114 arguments as the other functions -- *path*, *key*, *value* 

1115 -- plus two more: the blank new parent object returned 

1116 from *enter*, and a list of the new items, as remapped by 

1117 *visit*. 

1118 

1119 Like *enter*, the *exit* function is only called once per 

1120 unique value. The default exit behavior is to simply add 

1121 all new items to the new parent, e.g., using 

1122 :meth:`list.extend` and :meth:`dict.update` to add to the 

1123 new parent. Immutable objects, such as a :class:`tuple` or 

1124 :class:`namedtuple`, must be recreated from scratch, but 

1125 use the same type as the new parent passed back from the 

1126 *enter* function. 

1127 reraise_visit (bool): A pragmatic convenience for the *visit* 

1128 callable. When set to ``False``, remap ignores any errors 

1129 raised by the *visit* callback. Items causing exceptions 

1130 are kept. See examples for more details. 

1131 

1132 remap is designed to cover the majority of cases with just the 

1133 *visit* callable. While passing in multiple callables is very 

1134 empowering, remap is designed so very few cases should require 

1135 passing more than one function. 

1136 

1137 When passing *enter* and *exit*, it's common and easiest to build 

1138 on the default behavior. Simply add ``from boltons.iterutils import 

1139 default_enter`` (or ``default_exit``), and have your enter/exit 

1140 function call the default behavior before or after your custom 

1141 logic. See `this example`_. 

1142 

1143 Duplicate and self-referential objects (aka reference loops) are 

1144 automatically handled internally, `as shown here`_. 

1145 

1146 .. _this example: http://sedimental.org/remap.html#sort_all_lists 

1147 .. _as shown here: http://sedimental.org/remap.html#corner_cases 

1148 

1149 """ 

1150 # TODO: improve argument formatting in sphinx doc 

1151 # TODO: enter() return (False, items) to continue traverse but cancel copy? 

1152 if not callable(visit): 

1153 raise TypeError('visit expected callable, not: %r' % visit) 

1154 if not callable(enter): 

1155 raise TypeError('enter expected callable, not: %r' % enter) 

1156 if not callable(exit): 

1157 raise TypeError('exit expected callable, not: %r' % exit) 

1158 reraise_visit = kwargs.pop('reraise_visit', True) 

1159 if kwargs: 

1160 raise TypeError('unexpected keyword arguments: %r' % kwargs.keys()) 

1161 

1162 path, registry, stack = (), {}, [(None, root)] 

1163 new_items_stack = [] 

1164 while stack: 

1165 key, value = stack.pop() 

1166 id_value = id(value) 

1167 if key is _REMAP_EXIT: 

1168 key, new_parent, old_parent = value 

1169 id_value = id(old_parent) 

1170 path, new_items = new_items_stack.pop() 

1171 value = exit(path, key, old_parent, new_parent, new_items) 

1172 registry[id_value] = value 

1173 if not new_items_stack: 

1174 continue 

1175 elif id_value in registry: 

1176 value = registry[id_value] 

1177 else: 

1178 res = enter(path, key, value) 

1179 try: 

1180 new_parent, new_items = res 

1181 except TypeError: 

1182 # TODO: handle False? 

1183 raise TypeError('enter should return a tuple of (new_parent,' 

1184 ' items_iterator), not: %r' % res) 

1185 if new_items is not False: 

1186 # traverse unless False is explicitly passed 

1187 registry[id_value] = new_parent 

1188 new_items_stack.append((path, [])) 

1189 if value is not root: 

1190 path += (key,) 

1191 stack.append((_REMAP_EXIT, (key, new_parent, value))) 

1192 if new_items: 

1193 stack.extend(reversed(list(new_items))) 

1194 continue 

1195 if visit is _orig_default_visit: 

1196 # avoid function call overhead by inlining identity operation 

1197 visited_item = (key, value) 

1198 else: 

1199 try: 

1200 visited_item = visit(path, key, value) 

1201 except Exception: 

1202 if reraise_visit: 

1203 raise 

1204 visited_item = True 

1205 if visited_item is False: 

1206 continue # drop 

1207 elif visited_item is True: 

1208 visited_item = (key, value) 

1209 # TODO: typecheck? 

1210 # raise TypeError('expected (key, value) from visit(),' 

1211 # ' not: %r' % visited_item) 

1212 try: 

1213 new_items_stack[-1][1].append(visited_item) 

1214 except IndexError: 

1215 raise TypeError('expected remappable root, not: %r' % root) 

1216 return value 

1217 

1218 

1219class PathAccessError(KeyError, IndexError, TypeError): 

1220 """An amalgamation of KeyError, IndexError, and TypeError, 

1221 representing what can occur when looking up a path in a nested 

1222 object. 

1223 """ 

1224 def __init__(self, exc, seg, path): 

1225 self.exc = exc 

1226 self.seg = seg 

1227 self.path = path 

1228 

1229 def __repr__(self): 

1230 cn = self.__class__.__name__ 

1231 return f'{cn}({self.exc!r}, {self.seg!r}, {self.path!r})' 

1232 

1233 def __str__(self): 

1234 return ('could not access %r from path %r, got error: %r' 

1235 % (self.seg, self.path, self.exc)) 

1236 

1237 

1238def get_path(root, path, default=_UNSET): 

1239 """Retrieve a value from a nested object via a tuple representing the 

1240 lookup path. 

1241 

1242 >>> root = {'a': {'b': {'c': [[1], [2], [3]]}}} 

1243 >>> get_path(root, ('a', 'b', 'c', 2, 0)) 

1244 3 

1245 

1246 The path tuple format is intentionally consistent with that of 

1247 :func:`remap`, but a single dotted string can also be passed. 

1248 

1249 One of get_path's chief aims is improved error messaging. EAFP is 

1250 great, but the error messages are not. 

1251 

1252 For instance, ``root['a']['b']['c'][2][1]`` gives back 

1253 ``IndexError: list index out of range`` 

1254 

1255 What went out of range where? get_path currently raises 

1256 ``PathAccessError: could not access 2 from path ('a', 'b', 'c', 2, 

1257 1), got error: IndexError('list index out of range',)``, a 

1258 subclass of IndexError and KeyError. 

1259 

1260 You can also pass a default that covers the entire operation, 

1261 should the lookup fail at any level. 

1262 

1263 Args: 

1264 root: The target nesting of dictionaries, lists, or other 

1265 objects supporting ``__getitem__``. 

1266 path (tuple): A sequence of strings and integers to be successively 

1267 looked up within *root*. A dot-separated (``a.b``) string may  

1268 also be passed. 

1269 default: The value to be returned should any 

1270 ``PathAccessError`` exceptions be raised. 

1271 """ 

1272 if isinstance(path, str): 

1273 path = path.split('.') 

1274 cur = root 

1275 try: 

1276 for seg in path: 

1277 try: 

1278 cur = cur[seg] 

1279 except (KeyError, IndexError) as exc: 

1280 raise PathAccessError(exc, seg, path) 

1281 except TypeError as exc: 

1282 # either string index in a list, or a parent that 

1283 # doesn't support indexing 

1284 try: 

1285 seg = int(seg) 

1286 cur = cur[seg] 

1287 except (ValueError, KeyError, IndexError, TypeError): 

1288 if not is_iterable(cur): 

1289 exc = TypeError('%r object is not indexable' 

1290 % type(cur).__name__) 

1291 raise PathAccessError(exc, seg, path) 

1292 except PathAccessError: 

1293 if default is _UNSET: 

1294 raise 

1295 return default 

1296 return cur 

1297 

1298 

1299def research(root, query=lambda p, k, v: True, reraise=False): 

1300 """The :func:`research` function uses :func:`remap` to recurse over 

1301 any data nested in *root*, and find values which match a given 

1302 criterion, specified by the *query* callable. 

1303 

1304 Results are returned as a list of ``(path, value)`` pairs. The 

1305 paths are tuples in the same format accepted by 

1306 :func:`get_path`. This can be useful for comparing values nested 

1307 in two or more different structures. 

1308 

1309 Here's a simple example that finds all integers: 

1310 

1311 >>> root = {'a': {'b': 1, 'c': (2, 'd', 3)}, 'e': None} 

1312 >>> res = research(root, query=lambda p, k, v: isinstance(v, int)) 

1313 >>> print(sorted(res)) 

1314 [(('a', 'b'), 1), (('a', 'c', 0), 2), (('a', 'c', 2), 3)] 

1315 

1316 Note how *query* follows the same, familiar ``path, key, value`` 

1317 signature as the ``visit`` and ``enter`` functions on 

1318 :func:`remap`, and returns a :class:`bool`. 

1319 

1320 Args: 

1321 root: The target object to search. Supports the same types of 

1322 objects as :func:`remap`, including :class:`list`, 

1323 :class:`tuple`, :class:`dict`, and :class:`set`. 

1324 query (callable): The function called on every object to 

1325 determine whether to include it in the search results. The 

1326 callable must accept three arguments, *path*, *key*, and 

1327 *value*, commonly abbreviated *p*, *k*, and *v*, same as 

1328 *enter* and *visit* from :func:`remap`. 

1329 reraise (bool): Whether to reraise exceptions raised by *query* 

1330 or to simply drop the result that caused the error. 

1331 

1332 

1333 With :func:`research` it's easy to inspect the details of a data 

1334 structure, like finding values that are at a certain depth (using 

1335 ``len(p)``) and much more. If more advanced functionality is 

1336 needed, check out the code and make your own :func:`remap` 

1337 wrapper, and consider `submitting a patch`_! 

1338 

1339 .. _submitting a patch: https://github.com/mahmoud/boltons/pulls 

1340 """ 

1341 ret = [] 

1342 

1343 if not callable(query): 

1344 raise TypeError('query expected callable, not: %r' % query) 

1345 

1346 def enter(path, key, value): 

1347 try: 

1348 if query(path, key, value): 

1349 ret.append((path + (key,), value)) 

1350 except Exception: 

1351 if reraise: 

1352 raise 

1353 return default_enter(path, key, value) 

1354 

1355 remap(root, enter=enter) 

1356 return ret 

1357 

1358 

1359# TODO: recollect() 

1360# TODO: refilter() 

1361# TODO: reiter() 

1362 

1363 

1364# GUID iterators: 10x faster and somewhat more compact than uuid. 

1365 

1366class GUIDerator: 

1367 """The GUIDerator is an iterator that yields a globally-unique 

1368 identifier (GUID) on every iteration. The GUIDs produced are 

1369 hexadecimal strings. 

1370 

1371 Testing shows it to be around 12x faster than the uuid module. By 

1372 default it is also more compact, partly due to its default 96-bit 

1373 (24-hexdigit) length. 96 bits of randomness means that there is a 

1374 1 in 2 ^ 32 chance of collision after 2 ^ 64 iterations. If more 

1375 or less uniqueness is desired, the *size* argument can be adjusted 

1376 accordingly. 

1377 

1378 Args: 

1379 size (int): character length of the GUID, defaults to 24. Lengths 

1380 between 20 and 36 are considered valid. 

1381 

1382 The GUIDerator has built-in fork protection that causes it to 

1383 detect a fork on next iteration and reseed accordingly. 

1384 

1385 """ 

1386 def __init__(self, size=24): 

1387 self.size = size 

1388 if size < 20 or size > 36: 

1389 raise ValueError('expected 20 < size <= 36') 

1390 import hashlib 

1391 self._sha1 = hashlib.sha1 

1392 self.count = itertools.count() 

1393 self.reseed() 

1394 

1395 def reseed(self): 

1396 import socket 

1397 self.pid = os.getpid() 

1398 self.salt = '-'.join([str(self.pid), 

1399 socket.gethostname() or '<nohostname>', 

1400 str(time.time()), 

1401 os.urandom(6).hex()]) 

1402 return 

1403 

1404 def __iter__(self): 

1405 return self 

1406 

1407 def __next__(self): 

1408 if os.getpid() != self.pid: 

1409 self.reseed() 

1410 target_bytes = (self.salt + str(next(self.count))).encode('utf8') 

1411 hash_text = self._sha1(target_bytes).hexdigest()[:self.size] 

1412 return hash_text 

1413 

1414 next = __next__ 

1415 

1416 

1417class SequentialGUIDerator(GUIDerator): 

1418 """Much like the standard GUIDerator, the SequentialGUIDerator is an 

1419 iterator that yields a globally-unique identifier (GUID) on every 

1420 iteration. The GUIDs produced are hexadecimal strings. 

1421 

1422 The SequentialGUIDerator differs in that it picks a starting GUID 

1423 value and increments every iteration. This yields GUIDs which are 

1424 of course unique, but also ordered and lexicographically sortable. 

1425 

1426 The SequentialGUIDerator is around 50% faster than the normal 

1427 GUIDerator, making it almost 20x as fast as the built-in uuid 

1428 module. By default it is also more compact, partly due to its 

1429 96-bit (24-hexdigit) default length. 96 bits of randomness means that 

1430 there is a 1 in 2 ^ 32 chance of collision after 2 ^ 64 

1431 iterations. If more or less uniqueness is desired, the *size* 

1432 argument can be adjusted accordingly. 

1433 

1434 Args: 

1435 size (int): character length of the GUID, defaults to 24. 

1436 

1437 Note that with SequentialGUIDerator there is a chance of GUIDs 

1438 growing larger than the size configured. The SequentialGUIDerator 

1439 has built-in fork protection that causes it to detect a fork on 

1440 next iteration and reseed accordingly. 

1441 

1442 """ 

1443 

1444 def reseed(self): 

1445 super().reseed() 

1446 start_str = self._sha1(self.salt.encode('utf8')).hexdigest() 

1447 self.start = int(start_str[:self.size], 16) 

1448 self.start |= (1 << ((self.size * 4) - 2)) 

1449 

1450 def __next__(self): 

1451 if os.getpid() != self.pid: 

1452 self.reseed() 

1453 return '%x' % (next(self.count) + self.start) 

1454 

1455 next = __next__ 

1456 

1457 

1458guid_iter = GUIDerator() 

1459seq_guid_iter = SequentialGUIDerator() 

1460 

1461 

1462def soft_sorted(iterable, first=None, last=None, key=None, reverse=False): 

1463 """For when you care about the order of some elements, but not about 

1464 others. 

1465 

1466 Use this to float to the top and/or sink to the bottom a specific 

1467 ordering, while sorting the rest of the elements according to 

1468 normal :func:`sorted` rules. 

1469 

1470 >>> soft_sorted(['two', 'b', 'one', 'a'], first=['one', 'two']) 

1471 ['one', 'two', 'a', 'b'] 

1472 >>> soft_sorted(range(7), first=[6, 15], last=[2, 4], reverse=True) 

1473 [6, 5, 3, 1, 0, 2, 4] 

1474 >>> import string 

1475 >>> ''.join(soft_sorted(string.hexdigits, first='za1', last='b', key=str.lower)) 

1476 'aA1023456789cCdDeEfFbB' 

1477 

1478 Args: 

1479 iterable (list): A list or other iterable to sort. 

1480 first (list): A sequence to enforce for elements which should 

1481 appear at the beginning of the returned list. 

1482 last (list): A sequence to enforce for elements which should 

1483 appear at the end of the returned list. 

1484 key (callable): Callable used to generate a comparable key for 

1485 each item to be sorted, same as the key in 

1486 :func:`sorted`. Note that entries in *first* and *last* 

1487 should be the keys for the items. Defaults to 

1488 passthrough/the identity function. 

1489 reverse (bool): Whether or not elements not explicitly ordered 

1490 by *first* and *last* should be in reverse order or not. 

1491 

1492 Returns a new list in sorted order. 

1493 """ 

1494 first = first or [] 

1495 last = last or [] 

1496 key = key or (lambda x: x) 

1497 seq = list(iterable) 

1498 other = [x for x in seq if not ((first and key(x) in first) or (last and key(x) in last))] 

1499 other.sort(key=key, reverse=reverse) 

1500 

1501 if first: 

1502 first = sorted([x for x in seq if key(x) in first], key=lambda x: first.index(key(x))) 

1503 if last: 

1504 last = sorted([x for x in seq if key(x) in last], key=lambda x: last.index(key(x))) 

1505 return first + other + last 

1506 

1507 

1508def untyped_sorted(iterable, key=None, reverse=False): 

1509 """A version of :func:`sorted` which will happily sort an iterable of 

1510 heterogeneous types and return a new list, similar to legacy Python's 

1511 behavior. 

1512 

1513 >>> untyped_sorted(['abc', 2.0, 1, 2, 'def']) 

1514 [1, 2.0, 2, 'abc', 'def'] 

1515 

1516 Note how mutually orderable types are sorted as expected, as in 

1517 the case of the integers and floats above. 

1518 

1519 .. note:: 

1520 

1521 Results may vary across Python versions and builds, but the 

1522 function will produce a sorted list, except in the case of 

1523 explicitly unorderable objects. 

1524 

1525 """ 

1526 class _Wrapper: 

1527 slots = ('obj',) 

1528 

1529 def __init__(self, obj): 

1530 self.obj = obj 

1531 

1532 def __lt__(self, other): 

1533 obj = key(self.obj) if key is not None else self.obj 

1534 other = key(other.obj) if key is not None else other.obj 

1535 try: 

1536 ret = obj < other 

1537 except TypeError: 

1538 ret = ((type(obj).__name__, id(type(obj)), obj) 

1539 < (type(other).__name__, id(type(other)), other)) 

1540 return ret 

1541 

1542 if key is not None and not callable(key): 

1543 raise TypeError('expected function or callable object for key, not: %r' 

1544 % key) 

1545 

1546 return sorted(iterable, key=_Wrapper, reverse=reverse) 

1547 

1548""" 

1549May actually be faster to do an isinstance check for a str path 

1550 

1551$ python -m timeit -s "x = [1]" "x[0]" 

155210000000 loops, best of 3: 0.0207 usec per loop 

1553$ python -m timeit -s "x = [1]" "try: x[0] \nexcept: pass" 

155410000000 loops, best of 3: 0.029 usec per loop 

1555$ python -m timeit -s "x = [1]" "try: x[1] \nexcept: pass" 

15561000000 loops, best of 3: 0.315 usec per loop 

1557# setting up try/except is fast, only around 0.01us 

1558# actually triggering the exception takes almost 10x as long 

1559 

1560$ python -m timeit -s "x = [1]" "isinstance(x, basestring)" 

156110000000 loops, best of 3: 0.141 usec per loop 

1562$ python -m timeit -s "x = [1]" "isinstance(x, str)" 

156310000000 loops, best of 3: 0.131 usec per loop 

1564$ python -m timeit -s "x = [1]" "try: x.split('.')\n except: pass" 

15651000000 loops, best of 3: 0.443 usec per loop 

1566$ python -m timeit -s "x = [1]" "try: x.split('.') \nexcept AttributeError: pass" 

15671000000 loops, best of 3: 0.544 usec per loop 

1568"""