Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/boltons/iterutils.py: 16%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

547 statements  

1# Copyright (c) 2013, Mahmoud Hashemi 

2# 

3# Redistribution and use in source and binary forms, with or without 

4# modification, are permitted provided that the following conditions are 

5# met: 

6# 

7# * Redistributions of source code must retain the above copyright 

8# notice, this list of conditions and the following disclaimer. 

9# 

10# * Redistributions in binary form must reproduce the above 

11# copyright notice, this list of conditions and the following 

12# disclaimer in the documentation and/or other materials provided 

13# with the distribution. 

14# 

15# * The names of the contributors may not be used to endorse or 

16# promote products derived from this software without specific 

17# prior written permission. 

18# 

19# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 

20# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 

21# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 

22# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 

23# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 

24# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 

25# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 

26# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 

27# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 

28# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 

29# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

30 

31""":mod:`itertools` is full of great examples of Python generator 

32usage. However, there are still some critical gaps. ``iterutils`` 

33fills many of those gaps with featureful, tested, and Pythonic 

34solutions. 

35 

36Many of the functions below have two versions, one which 

37returns an iterator (denoted by the ``*_iter`` naming pattern), and a 

38shorter-named convenience form that returns a list. Some of the 

39following are based on examples in itertools docs. 

40""" 

41 

42import os 

43import math 

44import time 

45import codecs 

46import random 

47import itertools 

48from itertools import zip_longest 

49from collections.abc import Mapping, Sequence, Set, ItemsView, Iterable 

50 

51 

52try: 

53 from .typeutils import make_sentinel 

54 _UNSET = make_sentinel('_UNSET') 

55 _REMAP_EXIT = make_sentinel('_REMAP_EXIT') 

56except ImportError: 

57 _REMAP_EXIT = object() 

58 _UNSET = object() 

59 

60 

61def is_iterable(obj): 

62 """Similar in nature to :func:`callable`, ``is_iterable`` returns 

63 ``True`` if an object is `iterable`_, ``False`` if not. 

64 

65 >>> is_iterable([]) 

66 True 

67 >>> is_iterable(object()) 

68 False 

69 

70 .. _iterable: https://docs.python.org/2/glossary.html#term-iterable 

71 """ 

72 try: 

73 iter(obj) 

74 except TypeError: 

75 return False 

76 return True 

77 

78 

79def is_scalar(obj): 

80 """A near-mirror of :func:`is_iterable`. Returns ``False`` if an 

81 object is an iterable container type. Strings are considered 

82 scalar as well, because strings are more often treated as whole 

83 values as opposed to iterables of 1-character substrings. 

84 

85 >>> is_scalar(object()) 

86 True 

87 >>> is_scalar(range(10)) 

88 False 

89 >>> is_scalar('hello') 

90 True 

91 """ 

92 return not is_iterable(obj) or isinstance(obj, (str, bytes)) 

93 

94 

95def is_collection(obj): 

96 """The opposite of :func:`is_scalar`. Returns ``True`` if an object 

97 is an iterable other than a string. 

98 

99 >>> is_collection(object()) 

100 False 

101 >>> is_collection(range(10)) 

102 True 

103 >>> is_collection('hello') 

104 False 

105 """ 

106 return is_iterable(obj) and not isinstance(obj, (str, bytes)) 

107 

108 

109def split(src, sep=None, maxsplit=None): 

110 """Splits an iterable based on a separator. Like :meth:`str.split`, 

111 but for all iterables. Returns a list of lists. 

112 

113 >>> split(['hi', 'hello', None, None, 'sup', None, 'soap', None]) 

114 [['hi', 'hello'], ['sup'], ['soap']] 

115 

116 See :func:`split_iter` docs for more info. 

117 """ 

118 return list(split_iter(src, sep, maxsplit)) 

119 

120 

121def split_iter(src, sep=None, maxsplit=None): 

122 """Splits an iterable based on a separator, *sep*, a max of 

123 *maxsplit* times (no max by default). *sep* can be: 

124 

125 * a single value 

126 * an iterable of separators 

127 * a single-argument callable that returns True when a separator is 

128 encountered 

129 

130 ``split_iter()`` yields lists of non-separator values. A separator will 

131 never appear in the output. 

132 

133 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None])) 

134 [['hi', 'hello'], ['sup'], ['soap']] 

135 

136 Note that ``split_iter`` is based on :func:`str.split`, so if 

137 *sep* is ``None``, ``split()`` **groups** separators. If empty lists 

138 are desired between two contiguous ``None`` values, simply use 

139 ``sep=[None]``: 

140 

141 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None])) 

142 [['hi', 'hello'], ['sup']] 

143 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None], sep=[None])) 

144 [['hi', 'hello'], [], ['sup'], []] 

145 

146 Using a callable separator: 

147 

148 >>> falsy_sep = lambda x: not x 

149 >>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep)) 

150 [['hi', 'hello'], [], ['sup'], []] 

151 

152 See :func:`split` for a list-returning version. 

153 

154 """ 

155 if not is_iterable(src): 

156 raise TypeError('expected an iterable') 

157 

158 if maxsplit is not None: 

159 maxsplit = int(maxsplit) 

160 if maxsplit == 0: 

161 yield [src] 

162 return 

163 

164 if callable(sep): 

165 sep_func = sep 

166 elif not is_scalar(sep): 

167 sep = frozenset(sep) 

168 def sep_func(x): return x in sep 

169 else: 

170 def sep_func(x): return x == sep 

171 

172 cur_group = [] 

173 split_count = 0 

174 for s in src: 

175 if maxsplit is not None and split_count >= maxsplit: 

176 def sep_func(x): return False 

177 if sep_func(s): 

178 if sep is None and not cur_group: 

179 # If sep is none, str.split() "groups" separators 

180 # check the str.split() docs for more info 

181 continue 

182 split_count += 1 

183 yield cur_group 

184 cur_group = [] 

185 else: 

186 cur_group.append(s) 

187 

188 if cur_group or sep is not None: 

189 yield cur_group 

190 return 

191 

192 

193def lstrip(iterable, strip_value=None): 

194 """Strips values from the beginning of an iterable. Stripped items will 

195 match the value of the argument strip_value. Functionality is analogous 

196 to that of the method str.lstrip. Returns a list. 

197 

198 >>> lstrip(['Foo', 'Bar', 'Bam'], 'Foo') 

199 ['Bar', 'Bam'] 

200 

201 """ 

202 return list(lstrip_iter(iterable, strip_value)) 

203 

204 

205def lstrip_iter(iterable, strip_value=None): 

206 """Strips values from the beginning of an iterable. Stripped items will 

207 match the value of the argument strip_value. Functionality is analogous 

208 to that of the method str.lstrip. Returns a generator. 

209 

210 >>> list(lstrip_iter(['Foo', 'Bar', 'Bam'], 'Foo')) 

211 ['Bar', 'Bam'] 

212 

213 """ 

214 iterator = iter(iterable) 

215 for i in iterator: 

216 if i != strip_value: 

217 yield i 

218 break 

219 for i in iterator: 

220 yield i 

221 

222 

223def rstrip(iterable, strip_value=None): 

224 """Strips values from the end of an iterable. Stripped items will 

225 match the value of the argument strip_value. Functionality is analogous 

226 to that of the method str.rstrip. Returns a list. 

227 

228 >>> rstrip(['Foo', 'Bar', 'Bam'], 'Bam') 

229 ['Foo', 'Bar'] 

230 

231 """ 

232 return list(rstrip_iter(iterable, strip_value)) 

233 

234 

235def rstrip_iter(iterable, strip_value=None): 

236 """Strips values from the end of an iterable. Stripped items will 

237 match the value of the argument strip_value. Functionality is analogous 

238 to that of the method str.rstrip. Returns a generator. 

239 

240 >>> list(rstrip_iter(['Foo', 'Bar', 'Bam'], 'Bam')) 

241 ['Foo', 'Bar'] 

242 

243 """ 

244 iterator = iter(iterable) 

245 for i in iterator: 

246 if i == strip_value: 

247 cache = list() 

248 cache.append(i) 

249 broken = False 

250 for i in iterator: 

251 if i == strip_value: 

252 cache.append(i) 

253 else: 

254 broken = True 

255 break 

256 if not broken: # Return to caller here because the end of the 

257 return # iterator has been reached 

258 yield from cache 

259 yield i 

260 

261 

262def strip(iterable, strip_value=None): 

263 """Strips values from the beginning and end of an iterable. Stripped items 

264 will match the value of the argument strip_value. Functionality is 

265 analogous to that of the method str.strip. Returns a list. 

266 

267 >>> strip(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu') 

268 ['Foo', 'Bar', 'Bam'] 

269 

270 """ 

271 return list(strip_iter(iterable, strip_value)) 

272 

273 

274def strip_iter(iterable, strip_value=None): 

275 """Strips values from the beginning and end of an iterable. Stripped items 

276 will match the value of the argument strip_value. Functionality is 

277 analogous to that of the method str.strip. Returns a generator. 

278 

279 >>> list(strip_iter(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu')) 

280 ['Foo', 'Bar', 'Bam'] 

281 

282 """ 

283 return rstrip_iter(lstrip_iter(iterable, strip_value), strip_value) 

284 

285 

286def chunked(src, size, count=None, **kw): 

287 """Returns a list of *count* chunks, each with *size* elements, 

288 generated from iterable *src*. If *src* is not evenly divisible by 

289 *size*, the final chunk will have fewer than *size* elements. 

290 Provide the *fill* keyword argument to provide a pad value and 

291 enable padding, otherwise no padding will take place. 

292 

293 >>> chunked(range(10), 3) 

294 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] 

295 >>> chunked(range(10), 3, fill=None) 

296 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]] 

297 >>> chunked(range(10), 3, count=2) 

298 [[0, 1, 2], [3, 4, 5]] 

299 

300 See :func:`chunked_iter` for more info. 

301 """ 

302 chunk_iter = chunked_iter(src, size, **kw) 

303 if count is None: 

304 return list(chunk_iter) 

305 else: 

306 return list(itertools.islice(chunk_iter, count)) 

307 

308 

309def _validate_positive_int(value, name, strictly_positive=True): 

310 value = int(value) 

311 if value < 0 or (strictly_positive and value == 0): 

312 raise ValueError('expected a positive integer ' + name) 

313 return value 

314 

315 

316def chunked_iter(src, size, **kw): 

317 """Generates *size*-sized chunks from *src* iterable. Unless the 

318 optional *fill* keyword argument is provided, iterables not evenly 

319 divisible by *size* will have a final chunk that is smaller than 

320 *size*. 

321 

322 >>> list(chunked_iter(range(10), 3)) 

323 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] 

324 >>> list(chunked_iter(range(10), 3, fill=None)) 

325 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]] 

326 

327 Note that ``fill=None`` in fact uses ``None`` as the fill value. 

328 """ 

329 # TODO: add count kwarg? 

330 if not is_iterable(src): 

331 raise TypeError('expected an iterable') 

332 size = _validate_positive_int(size, 'chunk size') 

333 do_fill = True 

334 try: 

335 fill_val = kw.pop('fill') 

336 except KeyError: 

337 do_fill = False 

338 fill_val = None 

339 if kw: 

340 raise ValueError('got unexpected keyword arguments: %r' % kw.keys()) 

341 if not src: 

342 return 

343 

344 def postprocess(chk): return chk 

345 if isinstance(src, (str, bytes)): 

346 def postprocess(chk, _sep=type(src)()): return _sep.join(chk) 

347 if isinstance(src, bytes): 

348 def postprocess(chk): return bytes(chk) 

349 src_iter = iter(src) 

350 while True: 

351 cur_chunk = list(itertools.islice(src_iter, size)) 

352 if not cur_chunk: 

353 break 

354 lc = len(cur_chunk) 

355 if lc < size and do_fill: 

356 cur_chunk[lc:] = [fill_val] * (size - lc) 

357 yield postprocess(cur_chunk) 

358 return 

359 

360 

361def chunk_ranges(input_size, chunk_size, input_offset=0, overlap_size=0, align=False): 

362 """Generates *chunk_size*-sized chunk ranges for an input with length *input_size*. 

363 Optionally, a start of the input can be set via *input_offset*, and 

364 and overlap between the chunks may be specified via *overlap_size*. 

365 Also, if *align* is set to *True*, any items with *i % (chunk_size-overlap_size) == 0* 

366 are always at the beginning of the chunk. 

367 

368 Returns an iterator of (start, end) tuples, one tuple per chunk. 

369 

370 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5)) 

371 [(10, 15), (15, 20)] 

372 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=1)) 

373 [(10, 15), (14, 19), (18, 20)] 

374 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=2)) 

375 [(10, 15), (13, 18), (16, 20)] 

376 

377 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=False)) 

378 [(4, 9), (9, 14), (14, 19)] 

379 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=True)) 

380 [(4, 5), (5, 10), (10, 15), (15, 19)] 

381 

382 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=False)) 

383 [(2, 7), (6, 11), (10, 15), (14, 17)] 

384 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=True)) 

385 [(2, 5), (4, 9), (8, 13), (12, 17)] 

386 >>> list(chunk_ranges(input_offset=3, input_size=15, chunk_size=5, overlap_size=1, align=True)) 

387 [(3, 5), (4, 9), (8, 13), (12, 17), (16, 18)] 

388 """ 

389 input_size = _validate_positive_int( 

390 input_size, 'input_size', strictly_positive=False) 

391 chunk_size = _validate_positive_int(chunk_size, 'chunk_size') 

392 input_offset = _validate_positive_int( 

393 input_offset, 'input_offset', strictly_positive=False) 

394 overlap_size = _validate_positive_int( 

395 overlap_size, 'overlap_size', strictly_positive=False) 

396 

397 input_stop = input_offset + input_size 

398 

399 if align: 

400 initial_chunk_len = chunk_size - \ 

401 input_offset % (chunk_size - overlap_size) 

402 if initial_chunk_len != overlap_size: 

403 yield (input_offset, min(input_offset + initial_chunk_len, input_stop)) 

404 if input_offset + initial_chunk_len >= input_stop: 

405 return 

406 input_offset = input_offset + initial_chunk_len - overlap_size 

407 

408 for i in range(input_offset, input_stop, chunk_size - overlap_size): 

409 yield (i, min(i + chunk_size, input_stop)) 

410 

411 if i + chunk_size >= input_stop: 

412 return 

413 

414 

415def pairwise(src, end=_UNSET): 

416 """Convenience function for calling :func:`windowed` on *src*, with 

417 *size* set to 2. 

418 

419 >>> pairwise(range(5)) 

420 [(0, 1), (1, 2), (2, 3), (3, 4)] 

421 >>> pairwise([]) 

422 [] 

423 

424 Unless *end* is set, the number of pairs is always one less than  

425 the number of elements in the iterable passed in, except on an empty input,  

426 which will return an empty list. 

427 

428 With *end* set, a number of pairs equal to the length of *src* is returned, 

429 with the last item of the last pair being equal to *end*. 

430 

431 >>> list(pairwise(range(3), end=None)) 

432 [(0, 1), (1, 2), (2, None)] 

433 

434 This way, *end* values can be useful as sentinels to signal the end of the iterable. 

435 """ 

436 return windowed(src, 2, fill=end) 

437 

438 

439def pairwise_iter(src, end=_UNSET): 

440 """Convenience function for calling :func:`windowed_iter` on *src*, 

441 with *size* set to 2. 

442 

443 >>> list(pairwise_iter(range(5))) 

444 [(0, 1), (1, 2), (2, 3), (3, 4)] 

445 >>> list(pairwise_iter([])) 

446 [] 

447 

448 Unless *end* is set, the number of pairs is always one less  

449 than the number of elements in the iterable passed in,  

450 or zero, when *src* is empty. 

451 

452 With *end* set, a number of pairs equal to the length of *src* is returned, 

453 with the last item of the last pair being equal to *end*.  

454 

455 >>> list(pairwise_iter(range(3), end=None)) 

456 [(0, 1), (1, 2), (2, None)]  

457 

458 This way, *end* values can be useful as sentinels to signal the end 

459 of the iterable. For infinite iterators, setting *end* has no effect. 

460 """ 

461 return windowed_iter(src, 2, fill=end) 

462 

463 

464def windowed(src, size, fill=_UNSET): 

465 """Returns tuples with exactly length *size*. If *fill* is unset  

466 and the iterable is too short to make a window of length *size*,  

467 no tuples are returned. See :func:`windowed_iter` for more. 

468 """ 

469 return list(windowed_iter(src, size, fill=fill)) 

470 

471 

472def windowed_iter(src, size, fill=_UNSET): 

473 """Returns tuples with length *size* which represent a sliding 

474 window over iterable *src*. 

475 

476 >>> list(windowed_iter(range(7), 3)) 

477 [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)] 

478 

479 If *fill* is unset, and the iterable is too short to make a window  

480 of length *size*, then no window tuples are returned. 

481 

482 >>> list(windowed_iter(range(3), 5)) 

483 [] 

484 

485 With *fill* set, the iterator always yields a number of windows 

486 equal to the length of the *src* iterable. 

487 

488 >>> windowed(range(4), 3, fill=None) 

489 [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)] 

490 

491 This way, *fill* values can be useful to signal the end of the iterable. 

492 For infinite iterators, setting *fill* has no effect. 

493 """ 

494 tees = itertools.tee(src, size) 

495 if fill is _UNSET: 

496 try: 

497 for i, t in enumerate(tees): 

498 for _ in range(i): 

499 next(t) 

500 except StopIteration: 

501 return zip([]) 

502 return zip(*tees) 

503 

504 for i, t in enumerate(tees): 

505 for _ in range(i): 

506 try: 

507 next(t) 

508 except StopIteration: 

509 continue 

510 return zip_longest(*tees, fillvalue=fill) 

511 

512 

513def xfrange(stop, start=None, step=1.0): 

514 """Same as :func:`frange`, but generator-based instead of returning a 

515 list. 

516 

517 >>> tuple(xfrange(1, 3, step=0.75)) 

518 (1.0, 1.75, 2.5) 

519 

520 See :func:`frange` for more details. 

521 """ 

522 if not step: 

523 raise ValueError('step must be non-zero') 

524 if start is None: 

525 start, stop = 0.0, stop * 1.0 

526 else: 

527 # swap when all args are used 

528 stop, start = start * 1.0, stop * 1.0 

529 cur = start 

530 while cur < stop: 

531 yield cur 

532 cur += step 

533 

534 

535def frange(stop, start=None, step=1.0): 

536 """A :func:`range` clone for float-based ranges. 

537 

538 >>> frange(5) 

539 [0.0, 1.0, 2.0, 3.0, 4.0] 

540 >>> frange(6, step=1.25) 

541 [0.0, 1.25, 2.5, 3.75, 5.0] 

542 >>> frange(100.5, 101.5, 0.25) 

543 [100.5, 100.75, 101.0, 101.25] 

544 >>> frange(5, 0) 

545 [] 

546 >>> frange(5, 0, step=-1.25) 

547 [5.0, 3.75, 2.5, 1.25] 

548 """ 

549 if not step: 

550 raise ValueError('step must be non-zero') 

551 if start is None: 

552 start, stop = 0.0, stop * 1.0 

553 else: 

554 # swap when all args are used 

555 stop, start = start * 1.0, stop * 1.0 

556 count = int(math.ceil((stop - start) / step)) 

557 ret = [None] * count 

558 if not ret: 

559 return ret 

560 ret[0] = start 

561 for i in range(1, count): 

562 ret[i] = ret[i - 1] + step 

563 return ret 

564 

565 

566def backoff(start, stop, count=None, factor=2.0, jitter=False): 

567 """Returns a list of geometrically-increasing floating-point numbers, 

568 suitable for usage with `exponential backoff`_. Exactly like 

569 :func:`backoff_iter`, but without the ``'repeat'`` option for 

570 *count*. See :func:`backoff_iter` for more details. 

571 

572 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff 

573 

574 >>> backoff(1, 10) 

575 [1.0, 2.0, 4.0, 8.0, 10.0] 

576 """ 

577 if count == 'repeat': 

578 raise ValueError("'repeat' supported in backoff_iter, not backoff") 

579 return list(backoff_iter(start, stop, count=count, 

580 factor=factor, jitter=jitter)) 

581 

582 

583def backoff_iter(start, stop, count=None, factor=2.0, jitter=False): 

584 """Generates a sequence of geometrically-increasing floats, suitable 

585 for usage with `exponential backoff`_. Starts with *start*, 

586 increasing by *factor* until *stop* is reached, optionally 

587 stopping iteration once *count* numbers are yielded. *factor* 

588 defaults to 2. In general retrying with properly-configured 

589 backoff creates a better-behaved component for a larger service 

590 ecosystem. 

591 

592 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff 

593 

594 >>> list(backoff_iter(1.0, 10.0, count=5)) 

595 [1.0, 2.0, 4.0, 8.0, 10.0] 

596 >>> list(backoff_iter(1.0, 10.0, count=8)) 

597 [1.0, 2.0, 4.0, 8.0, 10.0, 10.0, 10.0, 10.0] 

598 >>> list(backoff_iter(0.25, 100.0, factor=10)) 

599 [0.25, 2.5, 25.0, 100.0] 

600 

601 A simplified usage example: 

602 

603 .. code-block:: python 

604 

605 for timeout in backoff_iter(0.25, 5.0): 

606 try: 

607 res = network_call() 

608 break 

609 except Exception as e: 

610 log(e) 

611 time.sleep(timeout) 

612 

613 An enhancement for large-scale systems would be to add variation, 

614 or *jitter*, to timeout values. This is done to avoid a thundering 

615 herd on the receiving end of the network call. 

616 

617 Finally, for *count*, the special value ``'repeat'`` can be passed to 

618 continue yielding indefinitely. 

619 

620 Args: 

621 

622 start (float): Positive number for baseline. 

623 stop (float): Positive number for maximum. 

624 count (int): Number of steps before stopping 

625 iteration. Defaults to the number of steps between *start* and 

626 *stop*. Pass the string, `'repeat'`, to continue iteration 

627 indefinitely. 

628 factor (float): Rate of exponential increase. Defaults to `2.0`, 

629 e.g., `[1, 2, 4, 8, 16]`. 

630 jitter (float): A factor between `-1.0` and `1.0`, used to 

631 uniformly randomize and thus spread out timeouts in a distributed 

632 system, avoiding rhythm effects. Positive values use the base 

633 backoff curve as a maximum, negative values use the curve as a 

634 minimum. Set to 1.0 or `True` for a jitter approximating 

635 Ethernet's time-tested backoff solution. Defaults to `False`. 

636 

637 """ 

638 start = float(start) 

639 stop = float(stop) 

640 factor = float(factor) 

641 if start < 0.0: 

642 raise ValueError('expected start >= 0, not %r' % start) 

643 if factor < 1.0: 

644 raise ValueError('expected factor >= 1.0, not %r' % factor) 

645 if stop == 0.0: 

646 raise ValueError('expected stop >= 0') 

647 if stop < start: 

648 raise ValueError('expected stop >= start, not %r' % stop) 

649 if count is None: 

650 denom = start if start else 1 

651 count = 1 + math.ceil(math.log(stop/denom, factor)) 

652 count = count if start else count + 1 

653 if count != 'repeat' and count < 0: 

654 raise ValueError('count must be positive or "repeat", not %r' % count) 

655 if jitter: 

656 jitter = float(jitter) 

657 if not (-1.0 <= jitter <= 1.0): 

658 raise ValueError('expected jitter -1 <= j <= 1, not: %r' % jitter) 

659 

660 cur, i = start, 0 

661 while count == 'repeat' or i < count: 

662 if not jitter: 

663 cur_ret = cur 

664 elif jitter: 

665 cur_ret = cur - (cur * jitter * random.random()) 

666 yield cur_ret 

667 i += 1 

668 if cur == 0: 

669 cur = 1 

670 elif cur < stop: 

671 cur *= factor 

672 if cur > stop: 

673 cur = stop 

674 return 

675 

676 

677def bucketize(src, key=bool, value_transform=None, key_filter=None): 

678 """Group values in the *src* iterable by the value returned by *key*. 

679 

680 >>> bucketize(range(5)) 

681 {False: [0], True: [1, 2, 3, 4]} 

682 >>> is_odd = lambda x: x % 2 == 1 

683 >>> bucketize(range(5), is_odd) 

684 {False: [0, 2, 4], True: [1, 3]} 

685 

686 *key* is :class:`bool` by default, but can either be a callable or a string or a list 

687 if it is a string, it is the name of the attribute on which to bucketize objects. 

688 

689 >>> bucketize([1+1j, 2+2j, 1, 2], key='real') 

690 {1.0: [(1+1j), 1], 2.0: [(2+2j), 2]} 

691 

692 if *key* is a list, it contains the buckets where to put each object 

693 

694 >>> bucketize([1,2,365,4,98],key=[0,1,2,0,2]) 

695 {0: [1, 4], 1: [2], 2: [365, 98]} 

696 

697 

698 Value lists are not deduplicated: 

699 

700 >>> bucketize([None, None, None, 'hello']) 

701 {False: [None, None, None], True: ['hello']} 

702 

703 Bucketize into more than 3 groups 

704 

705 >>> bucketize(range(10), lambda x: x % 3) 

706 {0: [0, 3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]} 

707 

708 ``bucketize`` has a couple of advanced options useful in certain 

709 cases. *value_transform* can be used to modify values as they are 

710 added to buckets, and *key_filter* will allow excluding certain 

711 buckets from being collected. 

712 

713 >>> bucketize(range(5), value_transform=lambda x: x*x) 

714 {False: [0], True: [1, 4, 9, 16]} 

715 

716 >>> bucketize(range(10), key=lambda x: x % 3, key_filter=lambda k: k % 3 != 1) 

717 {0: [0, 3, 6, 9], 2: [2, 5, 8]} 

718 

719 Note in some of these examples there were at most two keys, ``True`` and 

720 ``False``, and each key present has a list with at least one 

721 item. See :func:`partition` for a version specialized for binary 

722 use cases. 

723 

724 """ 

725 if not is_iterable(src): 

726 raise TypeError('expected an iterable') 

727 elif isinstance(key, list): 

728 if len(key) != len(src): 

729 raise ValueError("key and src have to be the same length") 

730 src = zip(key, src) 

731 

732 if isinstance(key, str): 

733 def key_func(x): return getattr(x, key, x) 

734 elif callable(key): 

735 key_func = key 

736 elif isinstance(key, list): 

737 def key_func(x): return x[0] 

738 else: 

739 raise TypeError('expected key to be callable or a string or a list') 

740 

741 if value_transform is None: 

742 def value_transform(x): return x 

743 if not callable(value_transform): 

744 raise TypeError('expected callable value transform function') 

745 if isinstance(key, list): 

746 f = value_transform 

747 def value_transform(x): return f(x[1]) 

748 

749 ret = {} 

750 for val in src: 

751 key_of_val = key_func(val) 

752 if key_filter is None or key_filter(key_of_val): 

753 ret.setdefault(key_of_val, []).append(value_transform(val)) 

754 return ret 

755 

756 

757def partition(src, key=bool): 

758 """No relation to :meth:`str.partition`, ``partition`` is like 

759 :func:`bucketize`, but for added convenience returns a tuple of 

760 ``(truthy_values, falsy_values)``. 

761 

762 >>> nonempty, empty = partition(['', '', 'hi', '', 'bye']) 

763 >>> nonempty 

764 ['hi', 'bye'] 

765 

766 *key* defaults to :class:`bool`, but can be carefully overridden to 

767 use either a function that returns either ``True`` or ``False`` or 

768 a string name of the attribute on which to partition objects. 

769 

770 >>> import string 

771 >>> is_digit = lambda x: x in string.digits 

772 >>> decimal_digits, hexletters = partition(string.hexdigits, is_digit) 

773 >>> ''.join(decimal_digits), ''.join(hexletters) 

774 ('0123456789', 'abcdefABCDEF') 

775 """ 

776 bucketized = bucketize(src, key) 

777 return bucketized.get(True, []), bucketized.get(False, []) 

778 

779 

780def unique(src, key=None): 

781 """``unique()`` returns a list of unique values, as determined by 

782 *key*, in the order they first appeared in the input iterable, 

783 *src*. 

784 

785 >>> ones_n_zeros = '11010110001010010101010' 

786 >>> ''.join(unique(ones_n_zeros)) 

787 '10' 

788 

789 See :func:`unique_iter` docs for more details. 

790 """ 

791 return list(unique_iter(src, key)) 

792 

793 

794def unique_iter(src, key=None): 

795 """Yield unique elements from the iterable, *src*, based on *key*, 

796 in the order in which they first appeared in *src*. 

797 

798 >>> repetitious = [1, 2, 3] * 10 

799 >>> list(unique_iter(repetitious)) 

800 [1, 2, 3] 

801 

802 By default, *key* is the object itself, but *key* can either be a 

803 callable or, for convenience, a string name of the attribute on 

804 which to uniqueify objects, falling back on identity when the 

805 attribute is not present. 

806 

807 >>> pleasantries = ['hi', 'hello', 'ok', 'bye', 'yes'] 

808 >>> list(unique_iter(pleasantries, key=lambda x: len(x))) 

809 ['hi', 'hello', 'bye'] 

810 """ 

811 if not is_iterable(src): 

812 raise TypeError('expected an iterable, not %r' % type(src)) 

813 if key is None: 

814 def key_func(x): return x 

815 elif callable(key): 

816 key_func = key 

817 elif isinstance(key, str): 

818 def key_func(x): return getattr(x, key, x) 

819 else: 

820 raise TypeError('"key" expected a string or callable, not %r' % key) 

821 seen = set() 

822 for i in src: 

823 k = key_func(i) 

824 if k not in seen: 

825 seen.add(k) 

826 yield i 

827 return 

828 

829 

830def redundant(src, key=None, groups=False): 

831 """The complement of :func:`unique()`. 

832 

833 By default returns non-unique/duplicate values as a list of the 

834 *first* redundant value in *src*. Pass ``groups=True`` to get 

835 groups of all values with redundancies, ordered by position of the 

836 first redundant value. This is useful in conjunction with some 

837 normalizing *key* function. 

838 

839 >>> redundant([1, 2, 3, 4]) 

840 [] 

841 >>> redundant([1, 2, 3, 2, 3, 3, 4]) 

842 [2, 3] 

843 >>> redundant([1, 2, 3, 2, 3, 3, 4], groups=True) 

844 [[2, 2], [3, 3, 3]] 

845 

846 An example using a *key* function to do case-insensitive 

847 redundancy detection. 

848 

849 >>> redundant(['hi', 'Hi', 'HI', 'hello'], key=str.lower) 

850 ['Hi'] 

851 >>> redundant(['hi', 'Hi', 'HI', 'hello'], groups=True, key=str.lower) 

852 [['hi', 'Hi', 'HI']] 

853 

854 *key* should also be used when the values in *src* are not hashable. 

855 

856 .. note:: 

857 

858 This output of this function is designed for reporting 

859 duplicates in contexts when a unique input is desired. Due to 

860 the grouped return type, there is no streaming equivalent of 

861 this function for the time being. 

862 

863 """ 

864 if key is None: 

865 pass 

866 elif callable(key): 

867 key_func = key 

868 elif isinstance(key, (str, bytes)): 

869 def key_func(x): return getattr(x, key, x) 

870 else: 

871 raise TypeError('"key" expected a string or callable, not %r' % key) 

872 seen = {} # key to first seen item 

873 redundant_order = [] 

874 redundant_groups = {} 

875 for i in src: 

876 k = key_func(i) if key else i 

877 if k not in seen: 

878 seen[k] = i 

879 else: 

880 if k in redundant_groups: 

881 if groups: 

882 redundant_groups[k].append(i) 

883 else: 

884 redundant_order.append(k) 

885 redundant_groups[k] = [seen[k], i] 

886 if not groups: 

887 ret = [redundant_groups[k][1] for k in redundant_order] 

888 else: 

889 ret = [redundant_groups[k] for k in redundant_order] 

890 return ret 

891 

892 

893def one(src, default=None, key=None): 

894 """Along the same lines as builtins, :func:`all` and :func:`any`, and 

895 similar to :func:`first`, ``one()`` returns the single object in 

896 the given iterable *src* that evaluates to ``True``, as determined 

897 by callable *key*. If unset, *key* defaults to :class:`bool`. If 

898 no such objects are found, *default* is returned. If *default* is 

899 not passed, ``None`` is returned. 

900 

901 If *src* has more than one object that evaluates to ``True``, or 

902 if there is no object that fulfills such condition, return 

903 *default*. It's like an `XOR`_ over an iterable. 

904 

905 >>> one((True, False, False)) 

906 True 

907 >>> one((True, False, True)) 

908 >>> one((0, 0, 'a')) 

909 'a' 

910 >>> one((0, False, None)) 

911 >>> one((True, True), default=False) 

912 False 

913 >>> bool(one(('', 1))) 

914 True 

915 >>> one((10, 20, 30, 42), key=lambda i: i > 40) 

916 42 

917 

918 See `Martín Gaitán's original repo`_ for further use cases. 

919 

920 .. _Martín Gaitán's original repo: https://github.com/mgaitan/one 

921 .. _XOR: https://en.wikipedia.org/wiki/Exclusive_or 

922 

923 """ 

924 ones = list(itertools.islice(filter(key, src), 2)) 

925 return ones[0] if len(ones) == 1 else default 

926 

927 

928def first(iterable, default=None, key=None): 

929 """Return first element of *iterable* that evaluates to ``True``, else 

930 return ``None`` or optional *default*. Similar to :func:`one`. 

931 

932 >>> first([0, False, None, [], (), 42]) 

933 42 

934 >>> first([0, False, None, [], ()]) is None 

935 True 

936 >>> first([0, False, None, [], ()], default='ohai') 

937 'ohai' 

938 >>> import re 

939 >>> m = first(re.match(regex, 'abc') for regex in ['b.*', 'a(.*)']) 

940 >>> m.group(1) 

941 'bc' 

942 

943 The optional *key* argument specifies a one-argument predicate function 

944 like that used for *filter()*. The *key* argument, if supplied, should be 

945 in keyword form. For example, finding the first even number in an iterable: 

946 

947 >>> first([1, 1, 3, 4, 5], key=lambda x: x % 2 == 0) 

948 4 

949 

950 Contributed by Hynek Schlawack, author of `the original standalone module`_. 

951 

952 .. _the original standalone module: https://github.com/hynek/first 

953 """ 

954 return next(filter(key, iterable), default) 

955 

956 

957def flatten_iter(iterable): 

958 """``flatten_iter()`` yields all the elements from *iterable* while 

959 collapsing any nested iterables. 

960 

961 >>> nested = [[1, 2], [[3], [4, 5]]] 

962 >>> list(flatten_iter(nested)) 

963 [1, 2, 3, 4, 5] 

964 """ 

965 for item in iterable: 

966 if isinstance(item, Iterable) and not isinstance(item, (str, bytes)): 

967 yield from flatten_iter(item) 

968 else: 

969 yield item 

970 

971 

972def flatten(iterable): 

973 """``flatten()`` returns a collapsed list of all the elements from 

974 *iterable* while collapsing any nested iterables. 

975 

976 >>> nested = [[1, 2], [[3], [4, 5]]] 

977 >>> flatten(nested) 

978 [1, 2, 3, 4, 5] 

979 """ 

980 return list(flatten_iter(iterable)) 

981 

982 

983def same(iterable, ref=_UNSET): 

984 """``same()`` returns ``True`` when all values in *iterable* are 

985 equal to one another, or optionally a reference value, 

986 *ref*. Similar to :func:`all` and :func:`any` in that it evaluates 

987 an iterable and returns a :class:`bool`. ``same()`` returns 

988 ``True`` for empty iterables. 

989 

990 >>> same([]) 

991 True 

992 >>> same([1]) 

993 True 

994 >>> same(['a', 'a', 'a']) 

995 True 

996 >>> same(range(20)) 

997 False 

998 >>> same([[], []]) 

999 True 

1000 >>> same([[], []], ref='test') 

1001 False 

1002 

1003 """ 

1004 iterator = iter(iterable) 

1005 if ref is _UNSET: 

1006 ref = next(iterator, ref) 

1007 return all(val == ref for val in iterator) 

1008 

1009 

1010def default_visit(path, key, value): 

1011 # print('visit(%r, %r, %r)' % (path, key, value)) 

1012 return key, value 

1013 

1014 

1015# enable the extreme: monkeypatching iterutils with a different default_visit 

1016_orig_default_visit = default_visit 

1017 

1018 

1019def default_enter(path, key, value): 

1020 # print('enter(%r, %r)' % (key, value)) 

1021 if isinstance(value, (str, bytes)): 

1022 return value, False 

1023 elif isinstance(value, Mapping): 

1024 return value.__class__(), ItemsView(value) 

1025 elif isinstance(value, Sequence): 

1026 return value.__class__(), enumerate(value) 

1027 elif isinstance(value, Set): 

1028 return value.__class__(), enumerate(value) 

1029 else: 

1030 # files, strings, other iterables, and scalars are not 

1031 # traversed 

1032 return value, False 

1033 

1034 

1035def default_exit(path, key, old_parent, new_parent, new_items): 

1036 # print('exit(%r, %r, %r, %r, %r)' 

1037 # % (path, key, old_parent, new_parent, new_items)) 

1038 ret = new_parent 

1039 if isinstance(new_parent, Mapping): 

1040 new_parent.update(new_items) 

1041 elif isinstance(new_parent, Sequence): 

1042 vals = [v for i, v in new_items] 

1043 try: 

1044 new_parent.extend(vals) 

1045 except AttributeError: 

1046 ret = new_parent.__class__(vals) # tuples 

1047 elif isinstance(new_parent, Set): 

1048 vals = [v for i, v in new_items] 

1049 try: 

1050 new_parent.update(vals) 

1051 except AttributeError: 

1052 ret = new_parent.__class__(vals) # frozensets 

1053 else: 

1054 raise RuntimeError('unexpected iterable type: %r' % type(new_parent)) 

1055 return ret 

1056 

1057 

1058def remap(root, visit=default_visit, enter=default_enter, exit=default_exit, 

1059 **kwargs): 

1060 """The remap ("recursive map") function is used to traverse and 

1061 transform nested structures. Lists, tuples, sets, and dictionaries 

1062 are just a few of the data structures nested into heterogeneous 

1063 tree-like structures that are so common in programming. 

1064 Unfortunately, Python's built-in ways to manipulate collections 

1065 are almost all flat. List comprehensions may be fast and succinct, 

1066 but they do not recurse, making it tedious to apply quick changes 

1067 or complex transforms to real-world data. 

1068 

1069 remap goes where list comprehensions cannot. 

1070 

1071 Here's an example of removing all Nones from some data: 

1072 

1073 >>> from pprint import pprint 

1074 >>> reviews = {'Star Trek': {'TNG': 10, 'DS9': 8.5, 'ENT': None}, 

1075 ... 'Babylon 5': 6, 'Dr. Who': None} 

1076 >>> pprint(remap(reviews, lambda p, k, v: v is not None)) 

1077 {'Babylon 5': 6, 'Star Trek': {'DS9': 8.5, 'TNG': 10}} 

1078 

1079 Notice how both Nones have been removed despite the nesting in the 

1080 dictionary. Not bad for a one-liner, and that's just the beginning. 

1081 See `this remap cookbook`_ for more delicious recipes. 

1082 

1083 .. _this remap cookbook: http://sedimental.org/remap.html 

1084 

1085 remap takes four main arguments: the object to traverse and three 

1086 optional callables which determine how the remapped object will be 

1087 created. 

1088 

1089 Args: 

1090 

1091 root: The target object to traverse. By default, remap 

1092 supports iterables like :class:`list`, :class:`tuple`, 

1093 :class:`dict`, and :class:`set`, but any object traversable by 

1094 *enter* will work. 

1095 visit (callable): This function is called on every item in 

1096 *root*. It must accept three positional arguments, *path*, 

1097 *key*, and *value*. *path* is simply a tuple of parents' 

1098 keys. *visit* should return the new key-value pair. It may 

1099 also return ``True`` as shorthand to keep the old item 

1100 unmodified, or ``False`` to drop the item from the new 

1101 structure. *visit* is called after *enter*, on the new parent. 

1102 

1103 The *visit* function is called for every item in root, 

1104 including duplicate items. For traversable values, it is 

1105 called on the new parent object, after all its children 

1106 have been visited. The default visit behavior simply 

1107 returns the key-value pair unmodified. 

1108 enter (callable): This function controls which items in *root* 

1109 are traversed. It accepts the same arguments as *visit*: the 

1110 path, the key, and the value of the current item. It returns a 

1111 pair of the blank new parent, and an iterator over the items 

1112 which should be visited. If ``False`` is returned instead of 

1113 an iterator, the value will not be traversed. 

1114 

1115 The *enter* function is only called once per unique value. The 

1116 default enter behavior support mappings, sequences, and 

1117 sets. Strings and all other iterables will not be traversed. 

1118 exit (callable): This function determines how to handle items 

1119 once they have been visited. It gets the same three 

1120 arguments as the other functions -- *path*, *key*, *value* 

1121 -- plus two more: the blank new parent object returned 

1122 from *enter*, and a list of the new items, as remapped by 

1123 *visit*. 

1124 

1125 Like *enter*, the *exit* function is only called once per 

1126 unique value. The default exit behavior is to simply add 

1127 all new items to the new parent, e.g., using 

1128 :meth:`list.extend` and :meth:`dict.update` to add to the 

1129 new parent. Immutable objects, such as a :class:`tuple` or 

1130 :class:`namedtuple`, must be recreated from scratch, but 

1131 use the same type as the new parent passed back from the 

1132 *enter* function. 

1133 reraise_visit (bool): A pragmatic convenience for the *visit* 

1134 callable. When set to ``False``, remap ignores any errors 

1135 raised by the *visit* callback. Items causing exceptions 

1136 are kept. See examples for more details. 

1137 trace (bool): Pass ``trace=True`` to print out the entire 

1138 traversal. Or pass a tuple of ``'visit'``, ``'enter'``, 

1139 or ``'exit'`` to print only the selected events. 

1140 

1141 remap is designed to cover the majority of cases with just the 

1142 *visit* callable. While passing in multiple callables is very 

1143 empowering, remap is designed so very few cases should require 

1144 passing more than one function. 

1145 

1146 When passing *enter* and *exit*, it's common and easiest to build 

1147 on the default behavior. Simply add ``from boltons.iterutils import 

1148 default_enter`` (or ``default_exit``), and have your enter/exit 

1149 function call the default behavior before or after your custom 

1150 logic. See `this example`_. 

1151 

1152 Duplicate and self-referential objects (aka reference loops) are 

1153 automatically handled internally, `as shown here`_. 

1154 

1155 .. _this example: http://sedimental.org/remap.html#sort_all_lists 

1156 .. _as shown here: http://sedimental.org/remap.html#corner_cases 

1157 

1158 """ 

1159 # TODO: improve argument formatting in sphinx doc 

1160 # TODO: enter() return (False, items) to continue traverse but cancel copy? 

1161 if not callable(visit): 

1162 raise TypeError('visit expected callable, not: %r' % visit) 

1163 if not callable(enter): 

1164 raise TypeError('enter expected callable, not: %r' % enter) 

1165 if not callable(exit): 

1166 raise TypeError('exit expected callable, not: %r' % exit) 

1167 reraise_visit = kwargs.pop('reraise_visit', True) 

1168 trace = kwargs.pop('trace', ()) 

1169 if trace is True: 

1170 trace = ('visit', 'enter', 'exit') 

1171 elif isinstance(trace, str): 

1172 trace = (trace,) 

1173 if not isinstance(trace, (tuple, list, set)): 

1174 raise TypeError('trace expected tuple of event names, not: %r' % trace) 

1175 trace_enter, trace_exit, trace_visit = 'enter' in trace, 'exit' in trace, 'visit' in trace 

1176 

1177 if kwargs: 

1178 raise TypeError('unexpected keyword arguments: %r' % kwargs.keys()) 

1179 

1180 path, registry, stack = (), {}, [(None, root)] 

1181 new_items_stack = [] 

1182 while stack: 

1183 key, value = stack.pop() 

1184 id_value = id(value) 

1185 if key is _REMAP_EXIT: 

1186 key, new_parent, old_parent = value 

1187 id_value = id(old_parent) 

1188 path, new_items = new_items_stack.pop() 

1189 if trace_exit: 

1190 print(' .. remap exit:', path, '-', key, '-', 

1191 old_parent, '-', new_parent, '-', new_items) 

1192 value = exit(path, key, old_parent, new_parent, new_items) 

1193 if trace_exit: 

1194 print(' .. remap exit result:', value) 

1195 registry[id_value] = value 

1196 if not new_items_stack: 

1197 continue 

1198 elif id_value in registry: 

1199 value = registry[id_value] 

1200 else: 

1201 if trace_enter: 

1202 print(' .. remap enter:', path, '-', key, '-', value) 

1203 res = enter(path, key, value) 

1204 if trace_enter: 

1205 print(' .. remap enter result:', res) 

1206 try: 

1207 new_parent, new_items = res 

1208 except TypeError: 

1209 # TODO: handle False? 

1210 raise TypeError('enter should return a tuple of (new_parent,' 

1211 ' items_iterator), not: %r' % res) 

1212 if new_items is not False: 

1213 # traverse unless False is explicitly passed 

1214 registry[id_value] = new_parent 

1215 new_items_stack.append((path, [])) 

1216 if value is not root: 

1217 path += (key,) 

1218 stack.append((_REMAP_EXIT, (key, new_parent, value))) 

1219 if new_items: 

1220 stack.extend(reversed(list(new_items))) 

1221 if trace_enter: 

1222 print(' .. remap stack size now:', len(stack)) 

1223 continue 

1224 if visit is _orig_default_visit: 

1225 # avoid function call overhead by inlining identity operation 

1226 visited_item = (key, value) 

1227 else: 

1228 try: 

1229 if trace_visit: 

1230 print(' .. remap visit:', path, '-', key, '-', value) 

1231 visited_item = visit(path, key, value) 

1232 except Exception: 

1233 if reraise_visit: 

1234 raise 

1235 visited_item = True 

1236 if visited_item is False: 

1237 if trace_visit: 

1238 print(' .. remap visit result: <drop>') 

1239 continue # drop 

1240 elif visited_item is True: 

1241 visited_item = (key, value) 

1242 if trace_visit: 

1243 print(' .. remap visit result:', visited_item) 

1244 # TODO: typecheck? 

1245 # raise TypeError('expected (key, value) from visit(),' 

1246 # ' not: %r' % visited_item) 

1247 try: 

1248 new_items_stack[-1][1].append(visited_item) 

1249 except IndexError: 

1250 raise TypeError('expected remappable root, not: %r' % root) 

1251 return value 

1252 

1253 

1254class PathAccessError(KeyError, IndexError, TypeError): 

1255 """An amalgamation of KeyError, IndexError, and TypeError, 

1256 representing what can occur when looking up a path in a nested 

1257 object. 

1258 """ 

1259 

1260 def __init__(self, exc, seg, path): 

1261 self.exc = exc 

1262 self.seg = seg 

1263 self.path = path 

1264 

1265 def __repr__(self): 

1266 cn = self.__class__.__name__ 

1267 return f'{cn}({self.exc!r}, {self.seg!r}, {self.path!r})' 

1268 

1269 def __str__(self): 

1270 return ('could not access %r from path %r, got error: %r' 

1271 % (self.seg, self.path, self.exc)) 

1272 

1273 

1274def get_path(root, path, default=_UNSET): 

1275 """Retrieve a value from a nested object via a tuple representing the 

1276 lookup path. 

1277 

1278 >>> root = {'a': {'b': {'c': [[1], [2], [3]]}}} 

1279 >>> get_path(root, ('a', 'b', 'c', 2, 0)) 

1280 3 

1281 

1282 The path tuple format is intentionally consistent with that of 

1283 :func:`remap`, but a single dotted string can also be passed. 

1284 

1285 One of get_path's chief aims is improved error messaging. EAFP is 

1286 great, but the error messages are not. 

1287 

1288 For instance, ``root['a']['b']['c'][2][1]`` gives back 

1289 ``IndexError: list index out of range`` 

1290 

1291 What went out of range where? get_path currently raises 

1292 ``PathAccessError: could not access 2 from path ('a', 'b', 'c', 2, 

1293 1), got error: IndexError('list index out of range',)``, a 

1294 subclass of IndexError and KeyError. 

1295 

1296 You can also pass a default that covers the entire operation, 

1297 should the lookup fail at any level. 

1298 

1299 Args: 

1300 root: The target nesting of dictionaries, lists, or other 

1301 objects supporting ``__getitem__``. 

1302 path (tuple): A sequence of strings and integers to be successively 

1303 looked up within *root*. A dot-separated (``a.b``) string may  

1304 also be passed. 

1305 default: The value to be returned should any 

1306 ``PathAccessError`` exceptions be raised. 

1307 """ 

1308 if isinstance(path, str): 

1309 path = path.split('.') 

1310 cur = root 

1311 try: 

1312 for seg in path: 

1313 try: 

1314 cur = cur[seg] 

1315 except (KeyError, IndexError) as exc: 

1316 raise PathAccessError(exc, seg, path) 

1317 except TypeError as exc: 

1318 # either string index in a list, or a parent that 

1319 # doesn't support indexing 

1320 try: 

1321 seg = int(seg) 

1322 cur = cur[seg] 

1323 except (ValueError, KeyError, IndexError, TypeError): 

1324 if not is_iterable(cur): 

1325 exc = TypeError('%r object is not indexable' 

1326 % type(cur).__name__) 

1327 raise PathAccessError(exc, seg, path) 

1328 except PathAccessError: 

1329 if default is _UNSET: 

1330 raise 

1331 return default 

1332 return cur 

1333 

1334 

1335def research(root, query=lambda p, k, v: True, reraise=False, enter=default_enter): 

1336 """The :func:`research` function uses :func:`remap` to recurse over 

1337 any data nested in *root*, and find values which match a given 

1338 criterion, specified by the *query* callable. 

1339 

1340 Results are returned as a list of ``(path, value)`` pairs. The 

1341 paths are tuples in the same format accepted by 

1342 :func:`get_path`. This can be useful for comparing values nested 

1343 in two or more different structures. 

1344 

1345 Here's a simple example that finds all integers: 

1346 

1347 >>> root = {'a': {'b': 1, 'c': (2, 'd', 3)}, 'e': None} 

1348 >>> res = research(root, query=lambda p, k, v: isinstance(v, int)) 

1349 >>> print(sorted(res)) 

1350 [(('a', 'b'), 1), (('a', 'c', 0), 2), (('a', 'c', 2), 3)] 

1351 

1352 Note how *query* follows the same, familiar ``path, key, value`` 

1353 signature as the ``visit`` and ``enter`` functions on 

1354 :func:`remap`, and returns a :class:`bool`. 

1355 

1356 Args: 

1357 root: The target object to search. Supports the same types of 

1358 objects as :func:`remap`, including :class:`list`, 

1359 :class:`tuple`, :class:`dict`, and :class:`set`. 

1360 query (callable): The function called on every object to 

1361 determine whether to include it in the search results. The 

1362 callable must accept three arguments, *path*, *key*, and 

1363 *value*, commonly abbreviated *p*, *k*, and *v*, same as 

1364 *enter* and *visit* from :func:`remap`. 

1365 reraise (bool): Whether to reraise exceptions raised by *query* 

1366 or to simply drop the result that caused the error. 

1367 

1368 

1369 With :func:`research` it's easy to inspect the details of a data 

1370 structure, like finding values that are at a certain depth (using 

1371 ``len(p)``) and much more. If more advanced functionality is 

1372 needed, check out the code and make your own :func:`remap` 

1373 wrapper, and consider `submitting a patch`_! 

1374 

1375 .. _submitting a patch: https://github.com/mahmoud/boltons/pulls 

1376 """ 

1377 ret = [] 

1378 

1379 if not callable(query): 

1380 raise TypeError('query expected callable, not: %r' % query) 

1381 

1382 def _enter(path, key, value): 

1383 try: 

1384 if query(path, key, value): 

1385 ret.append((path + (key,), value)) 

1386 except Exception: 

1387 if reraise: 

1388 raise 

1389 return enter(path, key, value) 

1390 

1391 remap(root, enter=_enter) 

1392 return ret 

1393 

1394 

1395# TODO: recollect() 

1396# TODO: refilter() 

1397# TODO: reiter() 

1398 

1399 

1400# GUID iterators: 10x faster and somewhat more compact than uuid. 

1401 

1402class GUIDerator: 

1403 """The GUIDerator is an iterator that yields a globally-unique 

1404 identifier (GUID) on every iteration. The GUIDs produced are 

1405 hexadecimal strings. 

1406 

1407 Testing shows it to be around 12x faster than the uuid module. By 

1408 default it is also more compact, partly due to its default 96-bit 

1409 (24-hexdigit) length. 96 bits of randomness means that there is a 

1410 1 in 2 ^ 32 chance of collision after 2 ^ 64 iterations. If more 

1411 or less uniqueness is desired, the *size* argument can be adjusted 

1412 accordingly. 

1413 

1414 Args: 

1415 size (int): character length of the GUID, defaults to 24. Lengths 

1416 between 20 and 36 are considered valid. 

1417 

1418 The GUIDerator has built-in fork protection that causes it to 

1419 detect a fork on next iteration and reseed accordingly. 

1420 

1421 """ 

1422 

1423 def __init__(self, size=24): 

1424 self.size = size 

1425 if size < 20 or size > 36: 

1426 raise ValueError('expected 20 < size <= 36') 

1427 import hashlib 

1428 self._sha1 = hashlib.sha1 

1429 self.count = itertools.count() 

1430 self.reseed() 

1431 

1432 def reseed(self): 

1433 import socket 

1434 self.pid = os.getpid() 

1435 self.salt = '-'.join([str(self.pid), 

1436 socket.gethostname() or '<nohostname>', 

1437 str(time.time()), 

1438 os.urandom(6).hex()]) 

1439 return 

1440 

1441 def __iter__(self): 

1442 return self 

1443 

1444 def __next__(self): 

1445 if os.getpid() != self.pid: 

1446 self.reseed() 

1447 target_bytes = (self.salt + str(next(self.count))).encode('utf8') 

1448 hash_text = self._sha1(target_bytes).hexdigest()[:self.size] 

1449 return hash_text 

1450 

1451 next = __next__ 

1452 

1453 

1454class SequentialGUIDerator(GUIDerator): 

1455 """Much like the standard GUIDerator, the SequentialGUIDerator is an 

1456 iterator that yields a globally-unique identifier (GUID) on every 

1457 iteration. The GUIDs produced are hexadecimal strings. 

1458 

1459 The SequentialGUIDerator differs in that it picks a starting GUID 

1460 value and increments every iteration. This yields GUIDs which are 

1461 of course unique, but also ordered and lexicographically sortable. 

1462 

1463 The SequentialGUIDerator is around 50% faster than the normal 

1464 GUIDerator, making it almost 20x as fast as the built-in uuid 

1465 module. By default it is also more compact, partly due to its 

1466 96-bit (24-hexdigit) default length. 96 bits of randomness means that 

1467 there is a 1 in 2 ^ 32 chance of collision after 2 ^ 64 

1468 iterations. If more or less uniqueness is desired, the *size* 

1469 argument can be adjusted accordingly. 

1470 

1471 Args: 

1472 size (int): character length of the GUID, defaults to 24. 

1473 

1474 Note that with SequentialGUIDerator there is a chance of GUIDs 

1475 growing larger than the size configured. The SequentialGUIDerator 

1476 has built-in fork protection that causes it to detect a fork on 

1477 next iteration and reseed accordingly. 

1478 

1479 """ 

1480 

1481 def reseed(self): 

1482 super().reseed() 

1483 start_str = self._sha1(self.salt.encode('utf8')).hexdigest() 

1484 self.start = int(start_str[:self.size], 16) 

1485 self.start |= (1 << ((self.size * 4) - 2)) 

1486 

1487 def __next__(self): 

1488 if os.getpid() != self.pid: 

1489 self.reseed() 

1490 return '%x' % (next(self.count) + self.start) 

1491 

1492 next = __next__ 

1493 

1494 

1495guid_iter = GUIDerator() 

1496seq_guid_iter = SequentialGUIDerator() 

1497 

1498 

1499def soft_sorted(iterable, first=None, last=None, key=None, reverse=False): 

1500 """For when you care about the order of some elements, but not about 

1501 others. 

1502 

1503 Use this to float to the top and/or sink to the bottom a specific 

1504 ordering, while sorting the rest of the elements according to 

1505 normal :func:`sorted` rules. 

1506 

1507 >>> soft_sorted(['two', 'b', 'one', 'a'], first=['one', 'two']) 

1508 ['one', 'two', 'a', 'b'] 

1509 >>> soft_sorted(range(7), first=[6, 15], last=[2, 4], reverse=True) 

1510 [6, 5, 3, 1, 0, 2, 4] 

1511 >>> import string 

1512 >>> ''.join(soft_sorted(string.hexdigits, first='za1', last='b', key=str.lower)) 

1513 'aA1023456789cCdDeEfFbB' 

1514 

1515 Args: 

1516 iterable (list): A list or other iterable to sort. 

1517 first (list): A sequence to enforce for elements which should 

1518 appear at the beginning of the returned list. 

1519 last (list): A sequence to enforce for elements which should 

1520 appear at the end of the returned list. 

1521 key (callable): Callable used to generate a comparable key for 

1522 each item to be sorted, same as the key in 

1523 :func:`sorted`. Note that entries in *first* and *last* 

1524 should be the keys for the items. Defaults to 

1525 passthrough/the identity function. 

1526 reverse (bool): Whether or not elements not explicitly ordered 

1527 by *first* and *last* should be in reverse order or not. 

1528 

1529 Returns a new list in sorted order. 

1530 """ 

1531 first = first or [] 

1532 last = last or [] 

1533 key = key or (lambda x: x) 

1534 seq = list(iterable) 

1535 other = [x for x in seq if not ( 

1536 (first and key(x) in first) or (last and key(x) in last))] 

1537 other.sort(key=key, reverse=reverse) 

1538 

1539 if first: 

1540 first = sorted([x for x in seq if key(x) in first], 

1541 key=lambda x: first.index(key(x))) 

1542 if last: 

1543 last = sorted([x for x in seq if key(x) in last], 

1544 key=lambda x: last.index(key(x))) 

1545 return first + other + last 

1546 

1547 

1548def untyped_sorted(iterable, key=None, reverse=False): 

1549 """A version of :func:`sorted` which will happily sort an iterable of 

1550 heterogeneous types and return a new list, similar to legacy Python's 

1551 behavior. 

1552 

1553 >>> untyped_sorted(['abc', 2.0, 1, 2, 'def']) 

1554 [1, 2.0, 2, 'abc', 'def'] 

1555 

1556 Note how mutually orderable types are sorted as expected, as in 

1557 the case of the integers and floats above. 

1558 

1559 .. note:: 

1560 

1561 Results may vary across Python versions and builds, but the 

1562 function will produce a sorted list, except in the case of 

1563 explicitly unorderable objects. 

1564 

1565 """ 

1566 class _Wrapper: 

1567 slots = ('obj',) 

1568 

1569 def __init__(self, obj): 

1570 self.obj = obj 

1571 

1572 def __lt__(self, other): 

1573 obj = key(self.obj) if key is not None else self.obj 

1574 other = key(other.obj) if key is not None else other.obj 

1575 try: 

1576 ret = obj < other 

1577 except TypeError: 

1578 ret = ((type(obj).__name__, id(type(obj)), obj) 

1579 < (type(other).__name__, id(type(other)), other)) 

1580 return ret 

1581 

1582 if key is not None and not callable(key): 

1583 raise TypeError('expected function or callable object for key, not: %r' 

1584 % key) 

1585 

1586 return sorted(iterable, key=_Wrapper, reverse=reverse) 

1587 

1588 

1589""" 

1590May actually be faster to do an isinstance check for a str path 

1591 

1592$ python -m timeit -s "x = [1]" "x[0]" 

159310000000 loops, best of 3: 0.0207 usec per loop 

1594$ python -m timeit -s "x = [1]" "try: x[0] \nexcept: pass" 

159510000000 loops, best of 3: 0.029 usec per loop 

1596$ python -m timeit -s "x = [1]" "try: x[1] \nexcept: pass" 

15971000000 loops, best of 3: 0.315 usec per loop 

1598# setting up try/except is fast, only around 0.01us 

1599# actually triggering the exception takes almost 10x as long 

1600 

1601$ python -m timeit -s "x = [1]" "isinstance(x, basestring)" 

160210000000 loops, best of 3: 0.141 usec per loop 

1603$ python -m timeit -s "x = [1]" "isinstance(x, str)" 

160410000000 loops, best of 3: 0.131 usec per loop 

1605$ python -m timeit -s "x = [1]" "try: x.split('.')\n except: pass" 

16061000000 loops, best of 3: 0.443 usec per loop 

1607$ python -m timeit -s "x = [1]" "try: x.split('.') \nexcept AttributeError: pass" 

16081000000 loops, best of 3: 0.544 usec per loop 

1609"""