Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/boltons/iterutils.py: 25%

537 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-06-07 06:13 +0000

1# -*- coding: utf-8 -*- 

2 

3# Copyright (c) 2013, Mahmoud Hashemi 

4# 

5# Redistribution and use in source and binary forms, with or without 

6# modification, are permitted provided that the following conditions are 

7# met: 

8# 

9# * Redistributions of source code must retain the above copyright 

10# notice, this list of conditions and the following disclaimer. 

11# 

12# * Redistributions in binary form must reproduce the above 

13# copyright notice, this list of conditions and the following 

14# disclaimer in the documentation and/or other materials provided 

15# with the distribution. 

16# 

17# * The names of the contributors may not be used to endorse or 

18# promote products derived from this software without specific 

19# prior written permission. 

20# 

21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 

22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 

23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 

24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 

25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 

26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 

27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 

28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 

29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 

30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 

31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

32 

33""":mod:`itertools` is full of great examples of Python generator 

34usage. However, there are still some critical gaps. ``iterutils`` 

35fills many of those gaps with featureful, tested, and Pythonic 

36solutions. 

37 

38Many of the functions below have two versions, one which 

39returns an iterator (denoted by the ``*_iter`` naming pattern), and a 

40shorter-named convenience form that returns a list. Some of the 

41following are based on examples in itertools docs. 

42""" 

43 

44import os 

45import math 

46import time 

47import codecs 

48import random 

49import itertools 

50 

51try: 

52 from collections.abc import Mapping, Sequence, Set, ItemsView, Iterable 

53except ImportError: 

54 from collections import Mapping, Sequence, Set, ItemsView, Iterable 

55 

56 

57try: 

58 from .typeutils import make_sentinel 

59 _UNSET = make_sentinel('_UNSET') 

60 _REMAP_EXIT = make_sentinel('_REMAP_EXIT') 

61except ImportError: 

62 _REMAP_EXIT = object() 

63 _UNSET = object() 

64 

65try: 

66 from future_builtins import filter 

67 from itertools import izip 

68 _IS_PY3 = False 

69except ImportError: 

70 # Python 3 compat 

71 _IS_PY3 = True 

72 basestring = (str, bytes) 

73 unicode = str 

74 izip, xrange = zip, range 

75 

76 

77def is_iterable(obj): 

78 """Similar in nature to :func:`callable`, ``is_iterable`` returns 

79 ``True`` if an object is `iterable`_, ``False`` if not. 

80 

81 >>> is_iterable([]) 

82 True 

83 >>> is_iterable(object()) 

84 False 

85 

86 .. _iterable: https://docs.python.org/2/glossary.html#term-iterable 

87 """ 

88 try: 

89 iter(obj) 

90 except TypeError: 

91 return False 

92 return True 

93 

94 

95def is_scalar(obj): 

96 """A near-mirror of :func:`is_iterable`. Returns ``False`` if an 

97 object is an iterable container type. Strings are considered 

98 scalar as well, because strings are more often treated as whole 

99 values as opposed to iterables of 1-character substrings. 

100 

101 >>> is_scalar(object()) 

102 True 

103 >>> is_scalar(range(10)) 

104 False 

105 >>> is_scalar('hello') 

106 True 

107 """ 

108 return not is_iterable(obj) or isinstance(obj, basestring) 

109 

110 

111def is_collection(obj): 

112 """The opposite of :func:`is_scalar`. Returns ``True`` if an object 

113 is an iterable other than a string. 

114 

115 >>> is_collection(object()) 

116 False 

117 >>> is_collection(range(10)) 

118 True 

119 >>> is_collection('hello') 

120 False 

121 """ 

122 return is_iterable(obj) and not isinstance(obj, basestring) 

123 

124 

125def split(src, sep=None, maxsplit=None): 

126 """Splits an iterable based on a separator. Like :meth:`str.split`, 

127 but for all iterables. Returns a list of lists. 

128 

129 >>> split(['hi', 'hello', None, None, 'sup', None, 'soap', None]) 

130 [['hi', 'hello'], ['sup'], ['soap']] 

131 

132 See :func:`split_iter` docs for more info. 

133 """ 

134 return list(split_iter(src, sep, maxsplit)) 

135 

136 

137def split_iter(src, sep=None, maxsplit=None): 

138 """Splits an iterable based on a separator, *sep*, a max of 

139 *maxsplit* times (no max by default). *sep* can be: 

140 

141 * a single value 

142 * an iterable of separators 

143 * a single-argument callable that returns True when a separator is 

144 encountered 

145 

146 ``split_iter()`` yields lists of non-separator values. A separator will 

147 never appear in the output. 

148 

149 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None])) 

150 [['hi', 'hello'], ['sup'], ['soap']] 

151 

152 Note that ``split_iter`` is based on :func:`str.split`, so if 

153 *sep* is ``None``, ``split()`` **groups** separators. If empty lists 

154 are desired between two contiguous ``None`` values, simply use 

155 ``sep=[None]``: 

156 

157 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None])) 

158 [['hi', 'hello'], ['sup']] 

159 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None], sep=[None])) 

160 [['hi', 'hello'], [], ['sup'], []] 

161 

162 Using a callable separator: 

163 

164 >>> falsy_sep = lambda x: not x 

165 >>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep)) 

166 [['hi', 'hello'], [], ['sup'], []] 

167 

168 See :func:`split` for a list-returning version. 

169 

170 """ 

171 if not is_iterable(src): 

172 raise TypeError('expected an iterable') 

173 

174 if maxsplit is not None: 

175 maxsplit = int(maxsplit) 

176 if maxsplit == 0: 

177 yield [src] 

178 return 

179 

180 if callable(sep): 

181 sep_func = sep 

182 elif not is_scalar(sep): 

183 sep = frozenset(sep) 

184 sep_func = lambda x: x in sep 

185 else: 

186 sep_func = lambda x: x == sep 

187 

188 cur_group = [] 

189 split_count = 0 

190 for s in src: 

191 if maxsplit is not None and split_count >= maxsplit: 

192 sep_func = lambda x: False 

193 if sep_func(s): 

194 if sep is None and not cur_group: 

195 # If sep is none, str.split() "groups" separators 

196 # check the str.split() docs for more info 

197 continue 

198 split_count += 1 

199 yield cur_group 

200 cur_group = [] 

201 else: 

202 cur_group.append(s) 

203 

204 if cur_group or sep is not None: 

205 yield cur_group 

206 return 

207 

208 

209def lstrip(iterable, strip_value=None): 

210 """Strips values from the beginning of an iterable. Stripped items will 

211 match the value of the argument strip_value. Functionality is analogous 

212 to that of the method str.lstrip. Returns a list. 

213 

214 >>> lstrip(['Foo', 'Bar', 'Bam'], 'Foo') 

215 ['Bar', 'Bam'] 

216 

217 """ 

218 return list(lstrip_iter(iterable, strip_value)) 

219 

220 

221def lstrip_iter(iterable, strip_value=None): 

222 """Strips values from the beginning of an iterable. Stripped items will 

223 match the value of the argument strip_value. Functionality is analogous 

224 to that of the method str.lstrip. Returns a generator. 

225 

226 >>> list(lstrip_iter(['Foo', 'Bar', 'Bam'], 'Foo')) 

227 ['Bar', 'Bam'] 

228 

229 """ 

230 iterator = iter(iterable) 

231 for i in iterator: 

232 if i != strip_value: 

233 yield i 

234 break 

235 for i in iterator: 

236 yield i 

237 

238 

239def rstrip(iterable, strip_value=None): 

240 """Strips values from the end of an iterable. Stripped items will 

241 match the value of the argument strip_value. Functionality is analogous 

242 to that of the method str.rstrip. Returns a list. 

243 

244 >>> rstrip(['Foo', 'Bar', 'Bam'], 'Bam') 

245 ['Foo', 'Bar'] 

246 

247 """ 

248 return list(rstrip_iter(iterable,strip_value)) 

249 

250 

251def rstrip_iter(iterable, strip_value=None): 

252 """Strips values from the end of an iterable. Stripped items will 

253 match the value of the argument strip_value. Functionality is analogous 

254 to that of the method str.rstrip. Returns a generator. 

255 

256 >>> list(rstrip_iter(['Foo', 'Bar', 'Bam'], 'Bam')) 

257 ['Foo', 'Bar'] 

258 

259 """ 

260 iterator = iter(iterable) 

261 for i in iterator: 

262 if i == strip_value: 

263 cache = list() 

264 cache.append(i) 

265 broken = False 

266 for i in iterator: 

267 if i == strip_value: 

268 cache.append(i) 

269 else: 

270 broken = True 

271 break 

272 if not broken: # Return to caller here because the end of the 

273 return # iterator has been reached 

274 for t in cache: 

275 yield t 

276 yield i 

277 

278 

279def strip(iterable, strip_value=None): 

280 """Strips values from the beginning and end of an iterable. Stripped items 

281 will match the value of the argument strip_value. Functionality is 

282 analogous to that of the method str.strip. Returns a list. 

283 

284 >>> strip(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu') 

285 ['Foo', 'Bar', 'Bam'] 

286 

287 """ 

288 return list(strip_iter(iterable,strip_value)) 

289 

290 

291def strip_iter(iterable,strip_value=None): 

292 """Strips values from the beginning and end of an iterable. Stripped items 

293 will match the value of the argument strip_value. Functionality is 

294 analogous to that of the method str.strip. Returns a generator. 

295 

296 >>> list(strip_iter(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu')) 

297 ['Foo', 'Bar', 'Bam'] 

298 

299 """ 

300 return rstrip_iter(lstrip_iter(iterable,strip_value),strip_value) 

301 

302 

303def chunked(src, size, count=None, **kw): 

304 """Returns a list of *count* chunks, each with *size* elements, 

305 generated from iterable *src*. If *src* is not evenly divisible by 

306 *size*, the final chunk will have fewer than *size* elements. 

307 Provide the *fill* keyword argument to provide a pad value and 

308 enable padding, otherwise no padding will take place. 

309 

310 >>> chunked(range(10), 3) 

311 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] 

312 >>> chunked(range(10), 3, fill=None) 

313 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]] 

314 >>> chunked(range(10), 3, count=2) 

315 [[0, 1, 2], [3, 4, 5]] 

316 

317 See :func:`chunked_iter` for more info. 

318 """ 

319 chunk_iter = chunked_iter(src, size, **kw) 

320 if count is None: 

321 return list(chunk_iter) 

322 else: 

323 return list(itertools.islice(chunk_iter, count)) 

324 

325 

326def _validate_positive_int(value, name, strictly_positive=True): 

327 value = int(value) 

328 if value < 0 or (strictly_positive and value == 0): 

329 raise ValueError('expected a positive integer ' + name) 

330 return value 

331 

332 

333def chunked_iter(src, size, **kw): 

334 """Generates *size*-sized chunks from *src* iterable. Unless the 

335 optional *fill* keyword argument is provided, iterables not evenly 

336 divisible by *size* will have a final chunk that is smaller than 

337 *size*. 

338 

339 >>> list(chunked_iter(range(10), 3)) 

340 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] 

341 >>> list(chunked_iter(range(10), 3, fill=None)) 

342 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]] 

343 

344 Note that ``fill=None`` in fact uses ``None`` as the fill value. 

345 """ 

346 # TODO: add count kwarg? 

347 if not is_iterable(src): 

348 raise TypeError('expected an iterable') 

349 size = _validate_positive_int(size, 'chunk size') 

350 do_fill = True 

351 try: 

352 fill_val = kw.pop('fill') 

353 except KeyError: 

354 do_fill = False 

355 fill_val = None 

356 if kw: 

357 raise ValueError('got unexpected keyword arguments: %r' % kw.keys()) 

358 if not src: 

359 return 

360 postprocess = lambda chk: chk 

361 if isinstance(src, basestring): 

362 postprocess = lambda chk, _sep=type(src)(): _sep.join(chk) 

363 if _IS_PY3 and isinstance(src, bytes): 

364 postprocess = lambda chk: bytes(chk) 

365 src_iter = iter(src) 

366 while True: 

367 cur_chunk = list(itertools.islice(src_iter, size)) 

368 if not cur_chunk: 

369 break 

370 lc = len(cur_chunk) 

371 if lc < size and do_fill: 

372 cur_chunk[lc:] = [fill_val] * (size - lc) 

373 yield postprocess(cur_chunk) 

374 return 

375 

376 

377def chunk_ranges(input_size, chunk_size, input_offset=0, overlap_size=0, align=False): 

378 """Generates *chunk_size*-sized chunk ranges for an input with length *input_size*. 

379 Optionally, a start of the input can be set via *input_offset*, and 

380 and overlap between the chunks may be specified via *overlap_size*. 

381 Also, if *align* is set to *True*, any items with *i % (chunk_size-overlap_size) == 0* 

382 are always at the beginning of the chunk. 

383 

384 Returns an iterator of (start, end) tuples, one tuple per chunk. 

385 

386 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5)) 

387 [(10, 15), (15, 20)] 

388 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=1)) 

389 [(10, 15), (14, 19), (18, 20)] 

390 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=2)) 

391 [(10, 15), (13, 18), (16, 20)] 

392 

393 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=False)) 

394 [(4, 9), (9, 14), (14, 19)] 

395 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=True)) 

396 [(4, 5), (5, 10), (10, 15), (15, 19)] 

397 

398 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=False)) 

399 [(2, 7), (6, 11), (10, 15), (14, 17)] 

400 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=True)) 

401 [(2, 5), (4, 9), (8, 13), (12, 17)] 

402 >>> list(chunk_ranges(input_offset=3, input_size=15, chunk_size=5, overlap_size=1, align=True)) 

403 [(3, 5), (4, 9), (8, 13), (12, 17), (16, 18)] 

404 """ 

405 input_size = _validate_positive_int(input_size, 'input_size', strictly_positive=False) 

406 chunk_size = _validate_positive_int(chunk_size, 'chunk_size') 

407 input_offset = _validate_positive_int(input_offset, 'input_offset', strictly_positive=False) 

408 overlap_size = _validate_positive_int(overlap_size, 'overlap_size', strictly_positive=False) 

409 

410 input_stop = input_offset + input_size 

411 

412 if align: 

413 initial_chunk_len = chunk_size - input_offset % (chunk_size - overlap_size) 

414 if initial_chunk_len != overlap_size: 

415 yield (input_offset, min(input_offset + initial_chunk_len, input_stop)) 

416 if input_offset + initial_chunk_len >= input_stop: 

417 return 

418 input_offset = input_offset + initial_chunk_len - overlap_size 

419 

420 for i in range(input_offset, input_stop, chunk_size - overlap_size): 

421 yield (i, min(i + chunk_size, input_stop)) 

422 

423 if i + chunk_size >= input_stop: 

424 return 

425 

426 

427def pairwise(src): 

428 """Convenience function for calling :func:`windowed` on *src*, with 

429 *size* set to 2. 

430 

431 >>> pairwise(range(5)) 

432 [(0, 1), (1, 2), (2, 3), (3, 4)] 

433 >>> pairwise([]) 

434 [] 

435 

436 The number of pairs is always one less than the number of elements 

437 in the iterable passed in, except on empty inputs, which returns 

438 an empty list. 

439 """ 

440 return windowed(src, 2) 

441 

442 

443def pairwise_iter(src): 

444 """Convenience function for calling :func:`windowed_iter` on *src*, 

445 with *size* set to 2. 

446 

447 >>> list(pairwise_iter(range(5))) 

448 [(0, 1), (1, 2), (2, 3), (3, 4)] 

449 >>> list(pairwise_iter([])) 

450 [] 

451 

452 The number of pairs is always one less than the number of elements 

453 in the iterable passed in, or zero, when *src* is empty. 

454 

455 """ 

456 return windowed_iter(src, 2) 

457 

458 

459def windowed(src, size): 

460 """Returns tuples with exactly length *size*. If the iterable is 

461 too short to make a window of length *size*, no tuples are 

462 returned. See :func:`windowed_iter` for more. 

463 """ 

464 return list(windowed_iter(src, size)) 

465 

466 

467def windowed_iter(src, size): 

468 """Returns tuples with length *size* which represent a sliding 

469 window over iterable *src*. 

470 

471 >>> list(windowed_iter(range(7), 3)) 

472 [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)] 

473 

474 If the iterable is too short to make a window of length *size*, 

475 then no window tuples are returned. 

476 

477 >>> list(windowed_iter(range(3), 5)) 

478 [] 

479 """ 

480 # TODO: lists? (for consistency) 

481 tees = itertools.tee(src, size) 

482 try: 

483 for i, t in enumerate(tees): 

484 for _ in xrange(i): 

485 next(t) 

486 except StopIteration: 

487 return izip([]) 

488 return izip(*tees) 

489 

490 

491def xfrange(stop, start=None, step=1.0): 

492 """Same as :func:`frange`, but generator-based instead of returning a 

493 list. 

494 

495 >>> tuple(xfrange(1, 3, step=0.75)) 

496 (1.0, 1.75, 2.5) 

497 

498 See :func:`frange` for more details. 

499 """ 

500 if not step: 

501 raise ValueError('step must be non-zero') 

502 if start is None: 

503 start, stop = 0.0, stop * 1.0 

504 else: 

505 # swap when all args are used 

506 stop, start = start * 1.0, stop * 1.0 

507 cur = start 

508 while cur < stop: 

509 yield cur 

510 cur += step 

511 

512 

513def frange(stop, start=None, step=1.0): 

514 """A :func:`range` clone for float-based ranges. 

515 

516 >>> frange(5) 

517 [0.0, 1.0, 2.0, 3.0, 4.0] 

518 >>> frange(6, step=1.25) 

519 [0.0, 1.25, 2.5, 3.75, 5.0] 

520 >>> frange(100.5, 101.5, 0.25) 

521 [100.5, 100.75, 101.0, 101.25] 

522 >>> frange(5, 0) 

523 [] 

524 >>> frange(5, 0, step=-1.25) 

525 [5.0, 3.75, 2.5, 1.25] 

526 """ 

527 if not step: 

528 raise ValueError('step must be non-zero') 

529 if start is None: 

530 start, stop = 0.0, stop * 1.0 

531 else: 

532 # swap when all args are used 

533 stop, start = start * 1.0, stop * 1.0 

534 count = int(math.ceil((stop - start) / step)) 

535 ret = [None] * count 

536 if not ret: 

537 return ret 

538 ret[0] = start 

539 for i in xrange(1, count): 

540 ret[i] = ret[i - 1] + step 

541 return ret 

542 

543 

544def backoff(start, stop, count=None, factor=2.0, jitter=False): 

545 """Returns a list of geometrically-increasing floating-point numbers, 

546 suitable for usage with `exponential backoff`_. Exactly like 

547 :func:`backoff_iter`, but without the ``'repeat'`` option for 

548 *count*. See :func:`backoff_iter` for more details. 

549 

550 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff 

551 

552 >>> backoff(1, 10) 

553 [1.0, 2.0, 4.0, 8.0, 10.0] 

554 """ 

555 if count == 'repeat': 

556 raise ValueError("'repeat' supported in backoff_iter, not backoff") 

557 return list(backoff_iter(start, stop, count=count, 

558 factor=factor, jitter=jitter)) 

559 

560 

561def backoff_iter(start, stop, count=None, factor=2.0, jitter=False): 

562 """Generates a sequence of geometrically-increasing floats, suitable 

563 for usage with `exponential backoff`_. Starts with *start*, 

564 increasing by *factor* until *stop* is reached, optionally 

565 stopping iteration once *count* numbers are yielded. *factor* 

566 defaults to 2. In general retrying with properly-configured 

567 backoff creates a better-behaved component for a larger service 

568 ecosystem. 

569 

570 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff 

571 

572 >>> list(backoff_iter(1.0, 10.0, count=5)) 

573 [1.0, 2.0, 4.0, 8.0, 10.0] 

574 >>> list(backoff_iter(1.0, 10.0, count=8)) 

575 [1.0, 2.0, 4.0, 8.0, 10.0, 10.0, 10.0, 10.0] 

576 >>> list(backoff_iter(0.25, 100.0, factor=10)) 

577 [0.25, 2.5, 25.0, 100.0] 

578 

579 A simplified usage example: 

580 

581 .. code-block:: python 

582 

583 for timeout in backoff_iter(0.25, 5.0): 

584 try: 

585 res = network_call() 

586 break 

587 except Exception as e: 

588 log(e) 

589 time.sleep(timeout) 

590 

591 An enhancement for large-scale systems would be to add variation, 

592 or *jitter*, to timeout values. This is done to avoid a thundering 

593 herd on the receiving end of the network call. 

594 

595 Finally, for *count*, the special value ``'repeat'`` can be passed to 

596 continue yielding indefinitely. 

597 

598 Args: 

599 

600 start (float): Positive number for baseline. 

601 stop (float): Positive number for maximum. 

602 count (int): Number of steps before stopping 

603 iteration. Defaults to the number of steps between *start* and 

604 *stop*. Pass the string, `'repeat'`, to continue iteration 

605 indefinitely. 

606 factor (float): Rate of exponential increase. Defaults to `2.0`, 

607 e.g., `[1, 2, 4, 8, 16]`. 

608 jitter (float): A factor between `-1.0` and `1.0`, used to 

609 uniformly randomize and thus spread out timeouts in a distributed 

610 system, avoiding rhythm effects. Positive values use the base 

611 backoff curve as a maximum, negative values use the curve as a 

612 minimum. Set to 1.0 or `True` for a jitter approximating 

613 Ethernet's time-tested backoff solution. Defaults to `False`. 

614 

615 """ 

616 start = float(start) 

617 stop = float(stop) 

618 factor = float(factor) 

619 if start < 0.0: 

620 raise ValueError('expected start >= 0, not %r' % start) 

621 if factor < 1.0: 

622 raise ValueError('expected factor >= 1.0, not %r' % factor) 

623 if stop == 0.0: 

624 raise ValueError('expected stop >= 0') 

625 if stop < start: 

626 raise ValueError('expected stop >= start, not %r' % stop) 

627 if count is None: 

628 denom = start if start else 1 

629 count = 1 + math.ceil(math.log(stop/denom, factor)) 

630 count = count if start else count + 1 

631 if count != 'repeat' and count < 0: 

632 raise ValueError('count must be positive or "repeat", not %r' % count) 

633 if jitter: 

634 jitter = float(jitter) 

635 if not (-1.0 <= jitter <= 1.0): 

636 raise ValueError('expected jitter -1 <= j <= 1, not: %r' % jitter) 

637 

638 cur, i = start, 0 

639 while count == 'repeat' or i < count: 

640 if not jitter: 

641 cur_ret = cur 

642 elif jitter: 

643 cur_ret = cur - (cur * jitter * random.random()) 

644 yield cur_ret 

645 i += 1 

646 if cur == 0: 

647 cur = 1 

648 elif cur < stop: 

649 cur *= factor 

650 if cur > stop: 

651 cur = stop 

652 return 

653 

654 

655def bucketize(src, key=bool, value_transform=None, key_filter=None): 

656 """Group values in the *src* iterable by the value returned by *key*. 

657 

658 >>> bucketize(range(5)) 

659 {False: [0], True: [1, 2, 3, 4]} 

660 >>> is_odd = lambda x: x % 2 == 1 

661 >>> bucketize(range(5), is_odd) 

662 {False: [0, 2, 4], True: [1, 3]} 

663 

664 *key* is :class:`bool` by default, but can either be a callable or a string or a list 

665 if it is a string, it is the name of the attribute on which to bucketize objects. 

666 

667 >>> bucketize([1+1j, 2+2j, 1, 2], key='real') 

668 {1.0: [(1+1j), 1], 2.0: [(2+2j), 2]} 

669 

670 if *key* is a list, it contains the buckets where to put each object 

671 

672 >>> bucketize([1,2,365,4,98],key=[0,1,2,0,2]) 

673 {0: [1, 4], 1: [2], 2: [365, 98]} 

674 

675 

676 Value lists are not deduplicated: 

677 

678 >>> bucketize([None, None, None, 'hello']) 

679 {False: [None, None, None], True: ['hello']} 

680 

681 Bucketize into more than 3 groups 

682 

683 >>> bucketize(range(10), lambda x: x % 3) 

684 {0: [0, 3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]} 

685 

686 ``bucketize`` has a couple of advanced options useful in certain 

687 cases. *value_transform* can be used to modify values as they are 

688 added to buckets, and *key_filter* will allow excluding certain 

689 buckets from being collected. 

690 

691 >>> bucketize(range(5), value_transform=lambda x: x*x) 

692 {False: [0], True: [1, 4, 9, 16]} 

693 

694 >>> bucketize(range(10), key=lambda x: x % 3, key_filter=lambda k: k % 3 != 1) 

695 {0: [0, 3, 6, 9], 2: [2, 5, 8]} 

696 

697 Note in some of these examples there were at most two keys, ``True`` and 

698 ``False``, and each key present has a list with at least one 

699 item. See :func:`partition` for a version specialized for binary 

700 use cases. 

701 

702 """ 

703 if not is_iterable(src): 

704 raise TypeError('expected an iterable') 

705 elif isinstance(key, list): 

706 if len(key) != len(src): 

707 raise ValueError("key and src have to be the same length") 

708 src = zip(key, src) 

709 

710 if isinstance(key, basestring): 

711 key_func = lambda x: getattr(x, key, x) 

712 elif callable(key): 

713 key_func = key 

714 elif isinstance(key, list): 

715 key_func = lambda x: x[0] 

716 else: 

717 raise TypeError('expected key to be callable or a string or a list') 

718 

719 if value_transform is None: 

720 value_transform = lambda x: x 

721 if not callable(value_transform): 

722 raise TypeError('expected callable value transform function') 

723 if isinstance(key, list): 

724 f = value_transform 

725 value_transform=lambda x: f(x[1]) 

726 

727 ret = {} 

728 for val in src: 

729 key_of_val = key_func(val) 

730 if key_filter is None or key_filter(key_of_val): 

731 ret.setdefault(key_of_val, []).append(value_transform(val)) 

732 return ret 

733 

734 

735def partition(src, key=bool): 

736 """No relation to :meth:`str.partition`, ``partition`` is like 

737 :func:`bucketize`, but for added convenience returns a tuple of 

738 ``(truthy_values, falsy_values)``. 

739 

740 >>> nonempty, empty = partition(['', '', 'hi', '', 'bye']) 

741 >>> nonempty 

742 ['hi', 'bye'] 

743 

744 *key* defaults to :class:`bool`, but can be carefully overridden to 

745 use either a function that returns either ``True`` or ``False`` or 

746 a string name of the attribute on which to partition objects. 

747 

748 >>> import string 

749 >>> is_digit = lambda x: x in string.digits 

750 >>> decimal_digits, hexletters = partition(string.hexdigits, is_digit) 

751 >>> ''.join(decimal_digits), ''.join(hexletters) 

752 ('0123456789', 'abcdefABCDEF') 

753 """ 

754 bucketized = bucketize(src, key) 

755 return bucketized.get(True, []), bucketized.get(False, []) 

756 

757 

758def unique(src, key=None): 

759 """``unique()`` returns a list of unique values, as determined by 

760 *key*, in the order they first appeared in the input iterable, 

761 *src*. 

762 

763 >>> ones_n_zeros = '11010110001010010101010' 

764 >>> ''.join(unique(ones_n_zeros)) 

765 '10' 

766 

767 See :func:`unique_iter` docs for more details. 

768 """ 

769 return list(unique_iter(src, key)) 

770 

771 

772def unique_iter(src, key=None): 

773 """Yield unique elements from the iterable, *src*, based on *key*, 

774 in the order in which they first appeared in *src*. 

775 

776 >>> repetitious = [1, 2, 3] * 10 

777 >>> list(unique_iter(repetitious)) 

778 [1, 2, 3] 

779 

780 By default, *key* is the object itself, but *key* can either be a 

781 callable or, for convenience, a string name of the attribute on 

782 which to uniqueify objects, falling back on identity when the 

783 attribute is not present. 

784 

785 >>> pleasantries = ['hi', 'hello', 'ok', 'bye', 'yes'] 

786 >>> list(unique_iter(pleasantries, key=lambda x: len(x))) 

787 ['hi', 'hello', 'bye'] 

788 """ 

789 if not is_iterable(src): 

790 raise TypeError('expected an iterable, not %r' % type(src)) 

791 if key is None: 

792 key_func = lambda x: x 

793 elif callable(key): 

794 key_func = key 

795 elif isinstance(key, basestring): 

796 key_func = lambda x: getattr(x, key, x) 

797 else: 

798 raise TypeError('"key" expected a string or callable, not %r' % key) 

799 seen = set() 

800 for i in src: 

801 k = key_func(i) 

802 if k not in seen: 

803 seen.add(k) 

804 yield i 

805 return 

806 

807 

808def redundant(src, key=None, groups=False): 

809 """The complement of :func:`unique()`. 

810 

811 By default returns non-unique/duplicate values as a list of the 

812 *first* redundant value in *src*. Pass ``groups=True`` to get 

813 groups of all values with redundancies, ordered by position of the 

814 first redundant value. This is useful in conjunction with some 

815 normalizing *key* function. 

816 

817 >>> redundant([1, 2, 3, 4]) 

818 [] 

819 >>> redundant([1, 2, 3, 2, 3, 3, 4]) 

820 [2, 3] 

821 >>> redundant([1, 2, 3, 2, 3, 3, 4], groups=True) 

822 [[2, 2], [3, 3, 3]] 

823 

824 An example using a *key* function to do case-insensitive 

825 redundancy detection. 

826 

827 >>> redundant(['hi', 'Hi', 'HI', 'hello'], key=str.lower) 

828 ['Hi'] 

829 >>> redundant(['hi', 'Hi', 'HI', 'hello'], groups=True, key=str.lower) 

830 [['hi', 'Hi', 'HI']] 

831 

832 *key* should also be used when the values in *src* are not hashable. 

833 

834 .. note:: 

835 

836 This output of this function is designed for reporting 

837 duplicates in contexts when a unique input is desired. Due to 

838 the grouped return type, there is no streaming equivalent of 

839 this function for the time being. 

840 

841 """ 

842 if key is None: 

843 pass 

844 elif callable(key): 

845 key_func = key 

846 elif isinstance(key, basestring): 

847 key_func = lambda x: getattr(x, key, x) 

848 else: 

849 raise TypeError('"key" expected a string or callable, not %r' % key) 

850 seen = {} # key to first seen item 

851 redundant_order = [] 

852 redundant_groups = {} 

853 for i in src: 

854 k = key_func(i) if key else i 

855 if k not in seen: 

856 seen[k] = i 

857 else: 

858 if k in redundant_groups: 

859 if groups: 

860 redundant_groups[k].append(i) 

861 else: 

862 redundant_order.append(k) 

863 redundant_groups[k] = [seen[k], i] 

864 if not groups: 

865 ret = [redundant_groups[k][1] for k in redundant_order] 

866 else: 

867 ret = [redundant_groups[k] for k in redundant_order] 

868 return ret 

869 

870 

871def one(src, default=None, key=None): 

872 """Along the same lines as builtins, :func:`all` and :func:`any`, and 

873 similar to :func:`first`, ``one()`` returns the single object in 

874 the given iterable *src* that evaluates to ``True``, as determined 

875 by callable *key*. If unset, *key* defaults to :class:`bool`. If 

876 no such objects are found, *default* is returned. If *default* is 

877 not passed, ``None`` is returned. 

878 

879 If *src* has more than one object that evaluates to ``True``, or 

880 if there is no object that fulfills such condition, return 

881 *default*. It's like an `XOR`_ over an iterable. 

882 

883 >>> one((True, False, False)) 

884 True 

885 >>> one((True, False, True)) 

886 >>> one((0, 0, 'a')) 

887 'a' 

888 >>> one((0, False, None)) 

889 >>> one((True, True), default=False) 

890 False 

891 >>> bool(one(('', 1))) 

892 True 

893 >>> one((10, 20, 30, 42), key=lambda i: i > 40) 

894 42 

895 

896 See `Martín Gaitán's original repo`_ for further use cases. 

897 

898 .. _Martín Gaitán's original repo: https://github.com/mgaitan/one 

899 .. _XOR: https://en.wikipedia.org/wiki/Exclusive_or 

900 

901 """ 

902 ones = list(itertools.islice(filter(key, src), 2)) 

903 return ones[0] if len(ones) == 1 else default 

904 

905 

906def first(iterable, default=None, key=None): 

907 """Return first element of *iterable* that evaluates to ``True``, else 

908 return ``None`` or optional *default*. Similar to :func:`one`. 

909 

910 >>> first([0, False, None, [], (), 42]) 

911 42 

912 >>> first([0, False, None, [], ()]) is None 

913 True 

914 >>> first([0, False, None, [], ()], default='ohai') 

915 'ohai' 

916 >>> import re 

917 >>> m = first(re.match(regex, 'abc') for regex in ['b.*', 'a(.*)']) 

918 >>> m.group(1) 

919 'bc' 

920 

921 The optional *key* argument specifies a one-argument predicate function 

922 like that used for *filter()*. The *key* argument, if supplied, should be 

923 in keyword form. For example, finding the first even number in an iterable: 

924 

925 >>> first([1, 1, 3, 4, 5], key=lambda x: x % 2 == 0) 

926 4 

927 

928 Contributed by Hynek Schlawack, author of `the original standalone module`_. 

929 

930 .. _the original standalone module: https://github.com/hynek/first 

931 """ 

932 return next(filter(key, iterable), default) 

933 

934 

935def flatten_iter(iterable): 

936 """``flatten_iter()`` yields all the elements from *iterable* while 

937 collapsing any nested iterables. 

938 

939 >>> nested = [[1, 2], [[3], [4, 5]]] 

940 >>> list(flatten_iter(nested)) 

941 [1, 2, 3, 4, 5] 

942 """ 

943 for item in iterable: 

944 if isinstance(item, Iterable) and not isinstance(item, basestring): 

945 for subitem in flatten_iter(item): 

946 yield subitem 

947 else: 

948 yield item 

949 

950def flatten(iterable): 

951 """``flatten()`` returns a collapsed list of all the elements from 

952 *iterable* while collapsing any nested iterables. 

953 

954 >>> nested = [[1, 2], [[3], [4, 5]]] 

955 >>> flatten(nested) 

956 [1, 2, 3, 4, 5] 

957 """ 

958 return list(flatten_iter(iterable)) 

959 

960 

961def same(iterable, ref=_UNSET): 

962 """``same()`` returns ``True`` when all values in *iterable* are 

963 equal to one another, or optionally a reference value, 

964 *ref*. Similar to :func:`all` and :func:`any` in that it evaluates 

965 an iterable and returns a :class:`bool`. ``same()`` returns 

966 ``True`` for empty iterables. 

967 

968 >>> same([]) 

969 True 

970 >>> same([1]) 

971 True 

972 >>> same(['a', 'a', 'a']) 

973 True 

974 >>> same(range(20)) 

975 False 

976 >>> same([[], []]) 

977 True 

978 >>> same([[], []], ref='test') 

979 False 

980 

981 """ 

982 iterator = iter(iterable) 

983 if ref is _UNSET: 

984 ref = next(iterator, ref) 

985 return all(val == ref for val in iterator) 

986 

987 

988def default_visit(path, key, value): 

989 # print('visit(%r, %r, %r)' % (path, key, value)) 

990 return key, value 

991 

992# enable the extreme: monkeypatching iterutils with a different default_visit 

993_orig_default_visit = default_visit 

994 

995 

996def default_enter(path, key, value): 

997 # print('enter(%r, %r)' % (key, value)) 

998 if isinstance(value, basestring): 

999 return value, False 

1000 elif isinstance(value, Mapping): 

1001 return value.__class__(), ItemsView(value) 

1002 elif isinstance(value, Sequence): 

1003 return value.__class__(), enumerate(value) 

1004 elif isinstance(value, Set): 

1005 return value.__class__(), enumerate(value) 

1006 else: 

1007 # files, strings, other iterables, and scalars are not 

1008 # traversed 

1009 return value, False 

1010 

1011 

1012def default_exit(path, key, old_parent, new_parent, new_items): 

1013 # print('exit(%r, %r, %r, %r, %r)' 

1014 # % (path, key, old_parent, new_parent, new_items)) 

1015 ret = new_parent 

1016 if isinstance(new_parent, Mapping): 

1017 new_parent.update(new_items) 

1018 elif isinstance(new_parent, Sequence): 

1019 vals = [v for i, v in new_items] 

1020 try: 

1021 new_parent.extend(vals) 

1022 except AttributeError: 

1023 ret = new_parent.__class__(vals) # tuples 

1024 elif isinstance(new_parent, Set): 

1025 vals = [v for i, v in new_items] 

1026 try: 

1027 new_parent.update(vals) 

1028 except AttributeError: 

1029 ret = new_parent.__class__(vals) # frozensets 

1030 else: 

1031 raise RuntimeError('unexpected iterable type: %r' % type(new_parent)) 

1032 return ret 

1033 

1034 

1035def remap(root, visit=default_visit, enter=default_enter, exit=default_exit, 

1036 **kwargs): 

1037 """The remap ("recursive map") function is used to traverse and 

1038 transform nested structures. Lists, tuples, sets, and dictionaries 

1039 are just a few of the data structures nested into heterogeneous 

1040 tree-like structures that are so common in programming. 

1041 Unfortunately, Python's built-in ways to manipulate collections 

1042 are almost all flat. List comprehensions may be fast and succinct, 

1043 but they do not recurse, making it tedious to apply quick changes 

1044 or complex transforms to real-world data. 

1045 

1046 remap goes where list comprehensions cannot. 

1047 

1048 Here's an example of removing all Nones from some data: 

1049 

1050 >>> from pprint import pprint 

1051 >>> reviews = {'Star Trek': {'TNG': 10, 'DS9': 8.5, 'ENT': None}, 

1052 ... 'Babylon 5': 6, 'Dr. Who': None} 

1053 >>> pprint(remap(reviews, lambda p, k, v: v is not None)) 

1054 {'Babylon 5': 6, 'Star Trek': {'DS9': 8.5, 'TNG': 10}} 

1055 

1056 Notice how both Nones have been removed despite the nesting in the 

1057 dictionary. Not bad for a one-liner, and that's just the beginning. 

1058 See `this remap cookbook`_ for more delicious recipes. 

1059 

1060 .. _this remap cookbook: http://sedimental.org/remap.html 

1061 

1062 remap takes four main arguments: the object to traverse and three 

1063 optional callables which determine how the remapped object will be 

1064 created. 

1065 

1066 Args: 

1067 

1068 root: The target object to traverse. By default, remap 

1069 supports iterables like :class:`list`, :class:`tuple`, 

1070 :class:`dict`, and :class:`set`, but any object traversable by 

1071 *enter* will work. 

1072 visit (callable): This function is called on every item in 

1073 *root*. It must accept three positional arguments, *path*, 

1074 *key*, and *value*. *path* is simply a tuple of parents' 

1075 keys. *visit* should return the new key-value pair. It may 

1076 also return ``True`` as shorthand to keep the old item 

1077 unmodified, or ``False`` to drop the item from the new 

1078 structure. *visit* is called after *enter*, on the new parent. 

1079 

1080 The *visit* function is called for every item in root, 

1081 including duplicate items. For traversable values, it is 

1082 called on the new parent object, after all its children 

1083 have been visited. The default visit behavior simply 

1084 returns the key-value pair unmodified. 

1085 enter (callable): This function controls which items in *root* 

1086 are traversed. It accepts the same arguments as *visit*: the 

1087 path, the key, and the value of the current item. It returns a 

1088 pair of the blank new parent, and an iterator over the items 

1089 which should be visited. If ``False`` is returned instead of 

1090 an iterator, the value will not be traversed. 

1091 

1092 The *enter* function is only called once per unique value. The 

1093 default enter behavior support mappings, sequences, and 

1094 sets. Strings and all other iterables will not be traversed. 

1095 exit (callable): This function determines how to handle items 

1096 once they have been visited. It gets the same three 

1097 arguments as the other functions -- *path*, *key*, *value* 

1098 -- plus two more: the blank new parent object returned 

1099 from *enter*, and a list of the new items, as remapped by 

1100 *visit*. 

1101 

1102 Like *enter*, the *exit* function is only called once per 

1103 unique value. The default exit behavior is to simply add 

1104 all new items to the new parent, e.g., using 

1105 :meth:`list.extend` and :meth:`dict.update` to add to the 

1106 new parent. Immutable objects, such as a :class:`tuple` or 

1107 :class:`namedtuple`, must be recreated from scratch, but 

1108 use the same type as the new parent passed back from the 

1109 *enter* function. 

1110 reraise_visit (bool): A pragmatic convenience for the *visit* 

1111 callable. When set to ``False``, remap ignores any errors 

1112 raised by the *visit* callback. Items causing exceptions 

1113 are kept. See examples for more details. 

1114 

1115 remap is designed to cover the majority of cases with just the 

1116 *visit* callable. While passing in multiple callables is very 

1117 empowering, remap is designed so very few cases should require 

1118 passing more than one function. 

1119 

1120 When passing *enter* and *exit*, it's common and easiest to build 

1121 on the default behavior. Simply add ``from boltons.iterutils import 

1122 default_enter`` (or ``default_exit``), and have your enter/exit 

1123 function call the default behavior before or after your custom 

1124 logic. See `this example`_. 

1125 

1126 Duplicate and self-referential objects (aka reference loops) are 

1127 automatically handled internally, `as shown here`_. 

1128 

1129 .. _this example: http://sedimental.org/remap.html#sort_all_lists 

1130 .. _as shown here: http://sedimental.org/remap.html#corner_cases 

1131 

1132 """ 

1133 # TODO: improve argument formatting in sphinx doc 

1134 # TODO: enter() return (False, items) to continue traverse but cancel copy? 

1135 if not callable(visit): 

1136 raise TypeError('visit expected callable, not: %r' % visit) 

1137 if not callable(enter): 

1138 raise TypeError('enter expected callable, not: %r' % enter) 

1139 if not callable(exit): 

1140 raise TypeError('exit expected callable, not: %r' % exit) 

1141 reraise_visit = kwargs.pop('reraise_visit', True) 

1142 if kwargs: 

1143 raise TypeError('unexpected keyword arguments: %r' % kwargs.keys()) 

1144 

1145 path, registry, stack = (), {}, [(None, root)] 

1146 new_items_stack = [] 

1147 while stack: 

1148 key, value = stack.pop() 

1149 id_value = id(value) 

1150 if key is _REMAP_EXIT: 

1151 key, new_parent, old_parent = value 

1152 id_value = id(old_parent) 

1153 path, new_items = new_items_stack.pop() 

1154 value = exit(path, key, old_parent, new_parent, new_items) 

1155 registry[id_value] = value 

1156 if not new_items_stack: 

1157 continue 

1158 elif id_value in registry: 

1159 value = registry[id_value] 

1160 else: 

1161 res = enter(path, key, value) 

1162 try: 

1163 new_parent, new_items = res 

1164 except TypeError: 

1165 # TODO: handle False? 

1166 raise TypeError('enter should return a tuple of (new_parent,' 

1167 ' items_iterator), not: %r' % res) 

1168 if new_items is not False: 

1169 # traverse unless False is explicitly passed 

1170 registry[id_value] = new_parent 

1171 new_items_stack.append((path, [])) 

1172 if value is not root: 

1173 path += (key,) 

1174 stack.append((_REMAP_EXIT, (key, new_parent, value))) 

1175 if new_items: 

1176 stack.extend(reversed(list(new_items))) 

1177 continue 

1178 if visit is _orig_default_visit: 

1179 # avoid function call overhead by inlining identity operation 

1180 visited_item = (key, value) 

1181 else: 

1182 try: 

1183 visited_item = visit(path, key, value) 

1184 except Exception: 

1185 if reraise_visit: 

1186 raise 

1187 visited_item = True 

1188 if visited_item is False: 

1189 continue # drop 

1190 elif visited_item is True: 

1191 visited_item = (key, value) 

1192 # TODO: typecheck? 

1193 # raise TypeError('expected (key, value) from visit(),' 

1194 # ' not: %r' % visited_item) 

1195 try: 

1196 new_items_stack[-1][1].append(visited_item) 

1197 except IndexError: 

1198 raise TypeError('expected remappable root, not: %r' % root) 

1199 return value 

1200 

1201 

1202class PathAccessError(KeyError, IndexError, TypeError): 

1203 """An amalgamation of KeyError, IndexError, and TypeError, 

1204 representing what can occur when looking up a path in a nested 

1205 object. 

1206 """ 

1207 def __init__(self, exc, seg, path): 

1208 self.exc = exc 

1209 self.seg = seg 

1210 self.path = path 

1211 

1212 def __repr__(self): 

1213 cn = self.__class__.__name__ 

1214 return '%s(%r, %r, %r)' % (cn, self.exc, self.seg, self.path) 

1215 

1216 def __str__(self): 

1217 return ('could not access %r from path %r, got error: %r' 

1218 % (self.seg, self.path, self.exc)) 

1219 

1220 

1221def get_path(root, path, default=_UNSET): 

1222 """Retrieve a value from a nested object via a tuple representing the 

1223 lookup path. 

1224 

1225 >>> root = {'a': {'b': {'c': [[1], [2], [3]]}}} 

1226 >>> get_path(root, ('a', 'b', 'c', 2, 0)) 

1227 3 

1228 

1229 The path format is intentionally consistent with that of 

1230 :func:`remap`. 

1231 

1232 One of get_path's chief aims is improved error messaging. EAFP is 

1233 great, but the error messages are not. 

1234 

1235 For instance, ``root['a']['b']['c'][2][1]`` gives back 

1236 ``IndexError: list index out of range`` 

1237 

1238 What went out of range where? get_path currently raises 

1239 ``PathAccessError: could not access 2 from path ('a', 'b', 'c', 2, 

1240 1), got error: IndexError('list index out of range',)``, a 

1241 subclass of IndexError and KeyError. 

1242 

1243 You can also pass a default that covers the entire operation, 

1244 should the lookup fail at any level. 

1245 

1246 Args: 

1247 root: The target nesting of dictionaries, lists, or other 

1248 objects supporting ``__getitem__``. 

1249 path (tuple): A list of strings and integers to be successively 

1250 looked up within *root*. 

1251 default: The value to be returned should any 

1252 ``PathAccessError`` exceptions be raised. 

1253 """ 

1254 if isinstance(path, basestring): 

1255 path = path.split('.') 

1256 cur = root 

1257 try: 

1258 for seg in path: 

1259 try: 

1260 cur = cur[seg] 

1261 except (KeyError, IndexError) as exc: 

1262 raise PathAccessError(exc, seg, path) 

1263 except TypeError as exc: 

1264 # either string index in a list, or a parent that 

1265 # doesn't support indexing 

1266 try: 

1267 seg = int(seg) 

1268 cur = cur[seg] 

1269 except (ValueError, KeyError, IndexError, TypeError): 

1270 if not is_iterable(cur): 

1271 exc = TypeError('%r object is not indexable' 

1272 % type(cur).__name__) 

1273 raise PathAccessError(exc, seg, path) 

1274 except PathAccessError: 

1275 if default is _UNSET: 

1276 raise 

1277 return default 

1278 return cur 

1279 

1280 

1281def research(root, query=lambda p, k, v: True, reraise=False): 

1282 """The :func:`research` function uses :func:`remap` to recurse over 

1283 any data nested in *root*, and find values which match a given 

1284 criterion, specified by the *query* callable. 

1285 

1286 Results are returned as a list of ``(path, value)`` pairs. The 

1287 paths are tuples in the same format accepted by 

1288 :func:`get_path`. This can be useful for comparing values nested 

1289 in two or more different structures. 

1290 

1291 Here's a simple example that finds all integers: 

1292 

1293 >>> root = {'a': {'b': 1, 'c': (2, 'd', 3)}, 'e': None} 

1294 >>> res = research(root, query=lambda p, k, v: isinstance(v, int)) 

1295 >>> print(sorted(res)) 

1296 [(('a', 'b'), 1), (('a', 'c', 0), 2), (('a', 'c', 2), 3)] 

1297 

1298 Note how *query* follows the same, familiar ``path, key, value`` 

1299 signature as the ``visit`` and ``enter`` functions on 

1300 :func:`remap`, and returns a :class:`bool`. 

1301 

1302 Args: 

1303 root: The target object to search. Supports the same types of 

1304 objects as :func:`remap`, including :class:`list`, 

1305 :class:`tuple`, :class:`dict`, and :class:`set`. 

1306 query (callable): The function called on every object to 

1307 determine whether to include it in the search results. The 

1308 callable must accept three arguments, *path*, *key*, and 

1309 *value*, commonly abbreviated *p*, *k*, and *v*, same as 

1310 *enter* and *visit* from :func:`remap`. 

1311 reraise (bool): Whether to reraise exceptions raised by *query* 

1312 or to simply drop the result that caused the error. 

1313 

1314 

1315 With :func:`research` it's easy to inspect the details of a data 

1316 structure, like finding values that are at a certain depth (using 

1317 ``len(p)``) and much more. If more advanced functionality is 

1318 needed, check out the code and make your own :func:`remap` 

1319 wrapper, and consider `submitting a patch`_! 

1320 

1321 .. _submitting a patch: https://github.com/mahmoud/boltons/pulls 

1322 """ 

1323 ret = [] 

1324 

1325 if not callable(query): 

1326 raise TypeError('query expected callable, not: %r' % query) 

1327 

1328 def enter(path, key, value): 

1329 try: 

1330 if query(path, key, value): 

1331 ret.append((path + (key,), value)) 

1332 except Exception: 

1333 if reraise: 

1334 raise 

1335 return default_enter(path, key, value) 

1336 

1337 remap(root, enter=enter) 

1338 return ret 

1339 

1340 

1341# TODO: recollect() 

1342# TODO: refilter() 

1343# TODO: reiter() 

1344 

1345 

1346# GUID iterators: 10x faster and somewhat more compact than uuid. 

1347 

1348class GUIDerator(object): 

1349 """The GUIDerator is an iterator that yields a globally-unique 

1350 identifier (GUID) on every iteration. The GUIDs produced are 

1351 hexadecimal strings. 

1352 

1353 Testing shows it to be around 12x faster than the uuid module. By 

1354 default it is also more compact, partly due to its default 96-bit 

1355 (24-hexdigit) length. 96 bits of randomness means that there is a 

1356 1 in 2 ^ 32 chance of collision after 2 ^ 64 iterations. If more 

1357 or less uniqueness is desired, the *size* argument can be adjusted 

1358 accordingly. 

1359 

1360 Args: 

1361 size (int): character length of the GUID, defaults to 24. Lengths 

1362 between 20 and 36 are considered valid. 

1363 

1364 The GUIDerator has built-in fork protection that causes it to 

1365 detect a fork on next iteration and reseed accordingly. 

1366 

1367 """ 

1368 def __init__(self, size=24): 

1369 self.size = size 

1370 if size < 20 or size > 36: 

1371 raise ValueError('expected 20 < size <= 36') 

1372 import hashlib 

1373 self._sha1 = hashlib.sha1 

1374 self.count = itertools.count() 

1375 self.reseed() 

1376 

1377 def reseed(self): 

1378 import socket 

1379 self.pid = os.getpid() 

1380 self.salt = '-'.join([str(self.pid), 

1381 socket.gethostname() or b'<nohostname>', 

1382 str(time.time()), 

1383 codecs.encode(os.urandom(6), 

1384 'hex_codec').decode('ascii')]) 

1385 # that codecs trick is the best/only way to get a bytes to 

1386 # hexbytes in py2/3 

1387 return 

1388 

1389 def __iter__(self): 

1390 return self 

1391 

1392 if _IS_PY3: 

1393 def __next__(self): 

1394 if os.getpid() != self.pid: 

1395 self.reseed() 

1396 target_bytes = (self.salt + str(next(self.count))).encode('utf8') 

1397 hash_text = self._sha1(target_bytes).hexdigest()[:self.size] 

1398 return hash_text 

1399 else: 

1400 def __next__(self): 

1401 if os.getpid() != self.pid: 

1402 self.reseed() 

1403 return self._sha1(self.salt + 

1404 str(next(self.count))).hexdigest()[:self.size] 

1405 

1406 next = __next__ 

1407 

1408 

1409class SequentialGUIDerator(GUIDerator): 

1410 """Much like the standard GUIDerator, the SequentialGUIDerator is an 

1411 iterator that yields a globally-unique identifier (GUID) on every 

1412 iteration. The GUIDs produced are hexadecimal strings. 

1413 

1414 The SequentialGUIDerator differs in that it picks a starting GUID 

1415 value and increments every iteration. This yields GUIDs which are 

1416 of course unique, but also ordered and lexicographically sortable. 

1417 

1418 The SequentialGUIDerator is around 50% faster than the normal 

1419 GUIDerator, making it almost 20x as fast as the built-in uuid 

1420 module. By default it is also more compact, partly due to its 

1421 96-bit (24-hexdigit) default length. 96 bits of randomness means that 

1422 there is a 1 in 2 ^ 32 chance of collision after 2 ^ 64 

1423 iterations. If more or less uniqueness is desired, the *size* 

1424 argument can be adjusted accordingly. 

1425 

1426 Args: 

1427 size (int): character length of the GUID, defaults to 24. 

1428 

1429 Note that with SequentialGUIDerator there is a chance of GUIDs 

1430 growing larger than the size configured. The SequentialGUIDerator 

1431 has built-in fork protection that causes it to detect a fork on 

1432 next iteration and reseed accordingly. 

1433 

1434 """ 

1435 

1436 if _IS_PY3: 

1437 def reseed(self): 

1438 super(SequentialGUIDerator, self).reseed() 

1439 start_str = self._sha1(self.salt.encode('utf8')).hexdigest() 

1440 self.start = int(start_str[:self.size], 16) 

1441 self.start |= (1 << ((self.size * 4) - 2)) 

1442 else: 

1443 def reseed(self): 

1444 super(SequentialGUIDerator, self).reseed() 

1445 start_str = self._sha1(self.salt).hexdigest() 

1446 self.start = int(start_str[:self.size], 16) 

1447 self.start |= (1 << ((self.size * 4) - 2)) 

1448 

1449 def __next__(self): 

1450 if os.getpid() != self.pid: 

1451 self.reseed() 

1452 return '%x' % (next(self.count) + self.start) 

1453 

1454 next = __next__ 

1455 

1456 

1457guid_iter = GUIDerator() 

1458seq_guid_iter = SequentialGUIDerator() 

1459 

1460 

1461def soft_sorted(iterable, first=None, last=None, key=None, reverse=False): 

1462 """For when you care about the order of some elements, but not about 

1463 others. 

1464 

1465 Use this to float to the top and/or sink to the bottom a specific 

1466 ordering, while sorting the rest of the elements according to 

1467 normal :func:`sorted` rules. 

1468 

1469 >>> soft_sorted(['two', 'b', 'one', 'a'], first=['one', 'two']) 

1470 ['one', 'two', 'a', 'b'] 

1471 >>> soft_sorted(range(7), first=[6, 15], last=[2, 4], reverse=True) 

1472 [6, 5, 3, 1, 0, 2, 4] 

1473 >>> import string 

1474 >>> ''.join(soft_sorted(string.hexdigits, first='za1', last='b', key=str.lower)) 

1475 'aA1023456789cCdDeEfFbB' 

1476 

1477 Args: 

1478 iterable (list): A list or other iterable to sort. 

1479 first (list): A sequence to enforce for elements which should 

1480 appear at the beginning of the returned list. 

1481 last (list): A sequence to enforce for elements which should 

1482 appear at the end of the returned list. 

1483 key (callable): Callable used to generate a comparable key for 

1484 each item to be sorted, same as the key in 

1485 :func:`sorted`. Note that entries in *first* and *last* 

1486 should be the keys for the items. Defaults to 

1487 passthrough/the identity function. 

1488 reverse (bool): Whether or not elements not explicitly ordered 

1489 by *first* and *last* should be in reverse order or not. 

1490 

1491 Returns a new list in sorted order. 

1492 """ 

1493 first = first or [] 

1494 last = last or [] 

1495 key = key or (lambda x: x) 

1496 seq = list(iterable) 

1497 other = [x for x in seq if not ((first and key(x) in first) or (last and key(x) in last))] 

1498 other.sort(key=key, reverse=reverse) 

1499 

1500 if first: 

1501 first = sorted([x for x in seq if key(x) in first], key=lambda x: first.index(key(x))) 

1502 if last: 

1503 last = sorted([x for x in seq if key(x) in last], key=lambda x: last.index(key(x))) 

1504 return first + other + last 

1505 

1506 

1507def untyped_sorted(iterable, key=None, reverse=False): 

1508 """A version of :func:`sorted` which will happily sort an iterable of 

1509 heterogeneous types and return a new list, similar to legacy Python's 

1510 behavior. 

1511 

1512 >>> untyped_sorted(['abc', 2.0, 1, 2, 'def']) 

1513 [1, 2.0, 2, 'abc', 'def'] 

1514 

1515 Note how mutually orderable types are sorted as expected, as in 

1516 the case of the integers and floats above. 

1517 

1518 .. note:: 

1519 

1520 Results may vary across Python versions and builds, but the 

1521 function will produce a sorted list, except in the case of 

1522 explicitly unorderable objects. 

1523 

1524 """ 

1525 class _Wrapper(object): 

1526 slots = ('obj',) 

1527 

1528 def __init__(self, obj): 

1529 self.obj = obj 

1530 

1531 def __lt__(self, other): 

1532 obj = key(self.obj) if key is not None else self.obj 

1533 other = key(other.obj) if key is not None else other.obj 

1534 try: 

1535 ret = obj < other 

1536 except TypeError: 

1537 ret = ((type(obj).__name__, id(type(obj)), obj) 

1538 < (type(other).__name__, id(type(other)), other)) 

1539 return ret 

1540 

1541 if key is not None and not callable(key): 

1542 raise TypeError('expected function or callable object for key, not: %r' 

1543 % key) 

1544 

1545 return sorted(iterable, key=_Wrapper, reverse=reverse) 

1546 

1547""" 

1548May actually be faster to do an isinstance check for a str path 

1549 

1550$ python -m timeit -s "x = [1]" "x[0]" 

155110000000 loops, best of 3: 0.0207 usec per loop 

1552$ python -m timeit -s "x = [1]" "try: x[0] \nexcept: pass" 

155310000000 loops, best of 3: 0.029 usec per loop 

1554$ python -m timeit -s "x = [1]" "try: x[1] \nexcept: pass" 

15551000000 loops, best of 3: 0.315 usec per loop 

1556# setting up try/except is fast, only around 0.01us 

1557# actually triggering the exception takes almost 10x as long 

1558 

1559$ python -m timeit -s "x = [1]" "isinstance(x, basestring)" 

156010000000 loops, best of 3: 0.141 usec per loop 

1561$ python -m timeit -s "x = [1]" "isinstance(x, str)" 

156210000000 loops, best of 3: 0.131 usec per loop 

1563$ python -m timeit -s "x = [1]" "try: x.split('.')\n except: pass" 

15641000000 loops, best of 3: 0.443 usec per loop 

1565$ python -m timeit -s "x = [1]" "try: x.split('.') \nexcept AttributeError: pass" 

15661000000 loops, best of 3: 0.544 usec per loop 

1567"""