Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/boltons/iterutils.py: 24%

546 statements  

« prev     ^ index     » next       coverage.py v7.4.0, created at 2024-01-11 06:58 +0000

1# -*- coding: utf-8 -*- 

2 

3# Copyright (c) 2013, Mahmoud Hashemi 

4# 

5# Redistribution and use in source and binary forms, with or without 

6# modification, are permitted provided that the following conditions are 

7# met: 

8# 

9# * Redistributions of source code must retain the above copyright 

10# notice, this list of conditions and the following disclaimer. 

11# 

12# * Redistributions in binary form must reproduce the above 

13# copyright notice, this list of conditions and the following 

14# disclaimer in the documentation and/or other materials provided 

15# with the distribution. 

16# 

17# * The names of the contributors may not be used to endorse or 

18# promote products derived from this software without specific 

19# prior written permission. 

20# 

21# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 

22# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 

23# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 

24# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 

25# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 

26# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 

27# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 

28# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 

29# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 

30# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 

31# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 

32 

33""":mod:`itertools` is full of great examples of Python generator 

34usage. However, there are still some critical gaps. ``iterutils`` 

35fills many of those gaps with featureful, tested, and Pythonic 

36solutions. 

37 

38Many of the functions below have two versions, one which 

39returns an iterator (denoted by the ``*_iter`` naming pattern), and a 

40shorter-named convenience form that returns a list. Some of the 

41following are based on examples in itertools docs. 

42""" 

43 

44import os 

45import math 

46import time 

47import codecs 

48import random 

49import itertools 

50 

51try: 

52 from collections.abc import Mapping, Sequence, Set, ItemsView, Iterable 

53except ImportError: 

54 from collections import Mapping, Sequence, Set, ItemsView, Iterable 

55 

56 

57try: 

58 from .typeutils import make_sentinel 

59 _UNSET = make_sentinel('_UNSET') 

60 _REMAP_EXIT = make_sentinel('_REMAP_EXIT') 

61except ImportError: 

62 _REMAP_EXIT = object() 

63 _UNSET = object() 

64 

65try: 

66 from future_builtins import filter 

67 from itertools import izip, izip_longest as zip_longest 

68 _IS_PY3 = False 

69except ImportError: 

70 # Python 3 compat 

71 _IS_PY3 = True 

72 basestring = (str, bytes) 

73 unicode = str 

74 izip, xrange = zip, range 

75 from itertools import zip_longest 

76 

77 

78def is_iterable(obj): 

79 """Similar in nature to :func:`callable`, ``is_iterable`` returns 

80 ``True`` if an object is `iterable`_, ``False`` if not. 

81 

82 >>> is_iterable([]) 

83 True 

84 >>> is_iterable(object()) 

85 False 

86 

87 .. _iterable: https://docs.python.org/2/glossary.html#term-iterable 

88 """ 

89 try: 

90 iter(obj) 

91 except TypeError: 

92 return False 

93 return True 

94 

95 

96def is_scalar(obj): 

97 """A near-mirror of :func:`is_iterable`. Returns ``False`` if an 

98 object is an iterable container type. Strings are considered 

99 scalar as well, because strings are more often treated as whole 

100 values as opposed to iterables of 1-character substrings. 

101 

102 >>> is_scalar(object()) 

103 True 

104 >>> is_scalar(range(10)) 

105 False 

106 >>> is_scalar('hello') 

107 True 

108 """ 

109 return not is_iterable(obj) or isinstance(obj, basestring) 

110 

111 

112def is_collection(obj): 

113 """The opposite of :func:`is_scalar`. Returns ``True`` if an object 

114 is an iterable other than a string. 

115 

116 >>> is_collection(object()) 

117 False 

118 >>> is_collection(range(10)) 

119 True 

120 >>> is_collection('hello') 

121 False 

122 """ 

123 return is_iterable(obj) and not isinstance(obj, basestring) 

124 

125 

126def split(src, sep=None, maxsplit=None): 

127 """Splits an iterable based on a separator. Like :meth:`str.split`, 

128 but for all iterables. Returns a list of lists. 

129 

130 >>> split(['hi', 'hello', None, None, 'sup', None, 'soap', None]) 

131 [['hi', 'hello'], ['sup'], ['soap']] 

132 

133 See :func:`split_iter` docs for more info. 

134 """ 

135 return list(split_iter(src, sep, maxsplit)) 

136 

137 

138def split_iter(src, sep=None, maxsplit=None): 

139 """Splits an iterable based on a separator, *sep*, a max of 

140 *maxsplit* times (no max by default). *sep* can be: 

141 

142 * a single value 

143 * an iterable of separators 

144 * a single-argument callable that returns True when a separator is 

145 encountered 

146 

147 ``split_iter()`` yields lists of non-separator values. A separator will 

148 never appear in the output. 

149 

150 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None, 'soap', None])) 

151 [['hi', 'hello'], ['sup'], ['soap']] 

152 

153 Note that ``split_iter`` is based on :func:`str.split`, so if 

154 *sep* is ``None``, ``split()`` **groups** separators. If empty lists 

155 are desired between two contiguous ``None`` values, simply use 

156 ``sep=[None]``: 

157 

158 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None])) 

159 [['hi', 'hello'], ['sup']] 

160 >>> list(split_iter(['hi', 'hello', None, None, 'sup', None], sep=[None])) 

161 [['hi', 'hello'], [], ['sup'], []] 

162 

163 Using a callable separator: 

164 

165 >>> falsy_sep = lambda x: not x 

166 >>> list(split_iter(['hi', 'hello', None, '', 'sup', False], falsy_sep)) 

167 [['hi', 'hello'], [], ['sup'], []] 

168 

169 See :func:`split` for a list-returning version. 

170 

171 """ 

172 if not is_iterable(src): 

173 raise TypeError('expected an iterable') 

174 

175 if maxsplit is not None: 

176 maxsplit = int(maxsplit) 

177 if maxsplit == 0: 

178 yield [src] 

179 return 

180 

181 if callable(sep): 

182 sep_func = sep 

183 elif not is_scalar(sep): 

184 sep = frozenset(sep) 

185 sep_func = lambda x: x in sep 

186 else: 

187 sep_func = lambda x: x == sep 

188 

189 cur_group = [] 

190 split_count = 0 

191 for s in src: 

192 if maxsplit is not None and split_count >= maxsplit: 

193 sep_func = lambda x: False 

194 if sep_func(s): 

195 if sep is None and not cur_group: 

196 # If sep is none, str.split() "groups" separators 

197 # check the str.split() docs for more info 

198 continue 

199 split_count += 1 

200 yield cur_group 

201 cur_group = [] 

202 else: 

203 cur_group.append(s) 

204 

205 if cur_group or sep is not None: 

206 yield cur_group 

207 return 

208 

209 

210def lstrip(iterable, strip_value=None): 

211 """Strips values from the beginning of an iterable. Stripped items will 

212 match the value of the argument strip_value. Functionality is analogous 

213 to that of the method str.lstrip. Returns a list. 

214 

215 >>> lstrip(['Foo', 'Bar', 'Bam'], 'Foo') 

216 ['Bar', 'Bam'] 

217 

218 """ 

219 return list(lstrip_iter(iterable, strip_value)) 

220 

221 

222def lstrip_iter(iterable, strip_value=None): 

223 """Strips values from the beginning of an iterable. Stripped items will 

224 match the value of the argument strip_value. Functionality is analogous 

225 to that of the method str.lstrip. Returns a generator. 

226 

227 >>> list(lstrip_iter(['Foo', 'Bar', 'Bam'], 'Foo')) 

228 ['Bar', 'Bam'] 

229 

230 """ 

231 iterator = iter(iterable) 

232 for i in iterator: 

233 if i != strip_value: 

234 yield i 

235 break 

236 for i in iterator: 

237 yield i 

238 

239 

240def rstrip(iterable, strip_value=None): 

241 """Strips values from the end of an iterable. Stripped items will 

242 match the value of the argument strip_value. Functionality is analogous 

243 to that of the method str.rstrip. Returns a list. 

244 

245 >>> rstrip(['Foo', 'Bar', 'Bam'], 'Bam') 

246 ['Foo', 'Bar'] 

247 

248 """ 

249 return list(rstrip_iter(iterable,strip_value)) 

250 

251 

252def rstrip_iter(iterable, strip_value=None): 

253 """Strips values from the end of an iterable. Stripped items will 

254 match the value of the argument strip_value. Functionality is analogous 

255 to that of the method str.rstrip. Returns a generator. 

256 

257 >>> list(rstrip_iter(['Foo', 'Bar', 'Bam'], 'Bam')) 

258 ['Foo', 'Bar'] 

259 

260 """ 

261 iterator = iter(iterable) 

262 for i in iterator: 

263 if i == strip_value: 

264 cache = list() 

265 cache.append(i) 

266 broken = False 

267 for i in iterator: 

268 if i == strip_value: 

269 cache.append(i) 

270 else: 

271 broken = True 

272 break 

273 if not broken: # Return to caller here because the end of the 

274 return # iterator has been reached 

275 for t in cache: 

276 yield t 

277 yield i 

278 

279 

280def strip(iterable, strip_value=None): 

281 """Strips values from the beginning and end of an iterable. Stripped items 

282 will match the value of the argument strip_value. Functionality is 

283 analogous to that of the method str.strip. Returns a list. 

284 

285 >>> strip(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu') 

286 ['Foo', 'Bar', 'Bam'] 

287 

288 """ 

289 return list(strip_iter(iterable,strip_value)) 

290 

291 

292def strip_iter(iterable,strip_value=None): 

293 """Strips values from the beginning and end of an iterable. Stripped items 

294 will match the value of the argument strip_value. Functionality is 

295 analogous to that of the method str.strip. Returns a generator. 

296 

297 >>> list(strip_iter(['Fu', 'Foo', 'Bar', 'Bam', 'Fu'], 'Fu')) 

298 ['Foo', 'Bar', 'Bam'] 

299 

300 """ 

301 return rstrip_iter(lstrip_iter(iterable,strip_value),strip_value) 

302 

303 

304def chunked(src, size, count=None, **kw): 

305 """Returns a list of *count* chunks, each with *size* elements, 

306 generated from iterable *src*. If *src* is not evenly divisible by 

307 *size*, the final chunk will have fewer than *size* elements. 

308 Provide the *fill* keyword argument to provide a pad value and 

309 enable padding, otherwise no padding will take place. 

310 

311 >>> chunked(range(10), 3) 

312 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] 

313 >>> chunked(range(10), 3, fill=None) 

314 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]] 

315 >>> chunked(range(10), 3, count=2) 

316 [[0, 1, 2], [3, 4, 5]] 

317 

318 See :func:`chunked_iter` for more info. 

319 """ 

320 chunk_iter = chunked_iter(src, size, **kw) 

321 if count is None: 

322 return list(chunk_iter) 

323 else: 

324 return list(itertools.islice(chunk_iter, count)) 

325 

326 

327def _validate_positive_int(value, name, strictly_positive=True): 

328 value = int(value) 

329 if value < 0 or (strictly_positive and value == 0): 

330 raise ValueError('expected a positive integer ' + name) 

331 return value 

332 

333 

334def chunked_iter(src, size, **kw): 

335 """Generates *size*-sized chunks from *src* iterable. Unless the 

336 optional *fill* keyword argument is provided, iterables not evenly 

337 divisible by *size* will have a final chunk that is smaller than 

338 *size*. 

339 

340 >>> list(chunked_iter(range(10), 3)) 

341 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9]] 

342 >>> list(chunked_iter(range(10), 3, fill=None)) 

343 [[0, 1, 2], [3, 4, 5], [6, 7, 8], [9, None, None]] 

344 

345 Note that ``fill=None`` in fact uses ``None`` as the fill value. 

346 """ 

347 # TODO: add count kwarg? 

348 if not is_iterable(src): 

349 raise TypeError('expected an iterable') 

350 size = _validate_positive_int(size, 'chunk size') 

351 do_fill = True 

352 try: 

353 fill_val = kw.pop('fill') 

354 except KeyError: 

355 do_fill = False 

356 fill_val = None 

357 if kw: 

358 raise ValueError('got unexpected keyword arguments: %r' % kw.keys()) 

359 if not src: 

360 return 

361 postprocess = lambda chk: chk 

362 if isinstance(src, basestring): 

363 postprocess = lambda chk, _sep=type(src)(): _sep.join(chk) 

364 if _IS_PY3 and isinstance(src, bytes): 

365 postprocess = lambda chk: bytes(chk) 

366 src_iter = iter(src) 

367 while True: 

368 cur_chunk = list(itertools.islice(src_iter, size)) 

369 if not cur_chunk: 

370 break 

371 lc = len(cur_chunk) 

372 if lc < size and do_fill: 

373 cur_chunk[lc:] = [fill_val] * (size - lc) 

374 yield postprocess(cur_chunk) 

375 return 

376 

377 

378def chunk_ranges(input_size, chunk_size, input_offset=0, overlap_size=0, align=False): 

379 """Generates *chunk_size*-sized chunk ranges for an input with length *input_size*. 

380 Optionally, a start of the input can be set via *input_offset*, and 

381 and overlap between the chunks may be specified via *overlap_size*. 

382 Also, if *align* is set to *True*, any items with *i % (chunk_size-overlap_size) == 0* 

383 are always at the beginning of the chunk. 

384 

385 Returns an iterator of (start, end) tuples, one tuple per chunk. 

386 

387 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5)) 

388 [(10, 15), (15, 20)] 

389 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=1)) 

390 [(10, 15), (14, 19), (18, 20)] 

391 >>> list(chunk_ranges(input_offset=10, input_size=10, chunk_size=5, overlap_size=2)) 

392 [(10, 15), (13, 18), (16, 20)] 

393 

394 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=False)) 

395 [(4, 9), (9, 14), (14, 19)] 

396 >>> list(chunk_ranges(input_offset=4, input_size=15, chunk_size=5, align=True)) 

397 [(4, 5), (5, 10), (10, 15), (15, 19)] 

398 

399 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=False)) 

400 [(2, 7), (6, 11), (10, 15), (14, 17)] 

401 >>> list(chunk_ranges(input_offset=2, input_size=15, chunk_size=5, overlap_size=1, align=True)) 

402 [(2, 5), (4, 9), (8, 13), (12, 17)] 

403 >>> list(chunk_ranges(input_offset=3, input_size=15, chunk_size=5, overlap_size=1, align=True)) 

404 [(3, 5), (4, 9), (8, 13), (12, 17), (16, 18)] 

405 """ 

406 input_size = _validate_positive_int(input_size, 'input_size', strictly_positive=False) 

407 chunk_size = _validate_positive_int(chunk_size, 'chunk_size') 

408 input_offset = _validate_positive_int(input_offset, 'input_offset', strictly_positive=False) 

409 overlap_size = _validate_positive_int(overlap_size, 'overlap_size', strictly_positive=False) 

410 

411 input_stop = input_offset + input_size 

412 

413 if align: 

414 initial_chunk_len = chunk_size - input_offset % (chunk_size - overlap_size) 

415 if initial_chunk_len != overlap_size: 

416 yield (input_offset, min(input_offset + initial_chunk_len, input_stop)) 

417 if input_offset + initial_chunk_len >= input_stop: 

418 return 

419 input_offset = input_offset + initial_chunk_len - overlap_size 

420 

421 for i in range(input_offset, input_stop, chunk_size - overlap_size): 

422 yield (i, min(i + chunk_size, input_stop)) 

423 

424 if i + chunk_size >= input_stop: 

425 return 

426 

427 

428def pairwise(src, end=_UNSET): 

429 """Convenience function for calling :func:`windowed` on *src*, with 

430 *size* set to 2. 

431 

432 >>> pairwise(range(5)) 

433 [(0, 1), (1, 2), (2, 3), (3, 4)] 

434 >>> pairwise([]) 

435 [] 

436 

437 Unless *end* is set, the number of pairs is always one less than  

438 the number of elements in the iterable passed in, except on an empty input,  

439 which will return an empty list. 

440 

441 With *end* set, a number of pairs equal to the length of *src* is returned, 

442 with the last item of the last pair being equal to *end*. 

443 

444 >>> list(pairwise(range(3), end=None)) 

445 [(0, 1), (1, 2), (2, None)] 

446 

447 This way, *end* values can be useful as sentinels to signal the end of the iterable. 

448 """ 

449 return windowed(src, 2, fill=end) 

450 

451 

452def pairwise_iter(src, end=_UNSET): 

453 """Convenience function for calling :func:`windowed_iter` on *src*, 

454 with *size* set to 2. 

455 

456 >>> list(pairwise_iter(range(5))) 

457 [(0, 1), (1, 2), (2, 3), (3, 4)] 

458 >>> list(pairwise_iter([])) 

459 [] 

460 

461 Unless *end* is set, the number of pairs is always one less  

462 than the number of elements in the iterable passed in,  

463 or zero, when *src* is empty. 

464 

465 With *end* set, a number of pairs equal to the length of *src* is returned, 

466 with the last item of the last pair being equal to *end*.  

467 

468 >>> list(pairwise_iter(range(3), end=None)) 

469 [(0, 1), (1, 2), (2, None)]  

470 

471 This way, *end* values can be useful as sentinels to signal the end 

472 of the iterable. For infinite iterators, setting *end* has no effect. 

473 """ 

474 return windowed_iter(src, 2, fill=end) 

475 

476 

477def windowed(src, size, fill=_UNSET): 

478 """Returns tuples with exactly length *size*. If *fill* is unset  

479 and the iterable is too short to make a window of length *size*,  

480 no tuples are returned. See :func:`windowed_iter` for more. 

481 """ 

482 return list(windowed_iter(src, size, fill=fill)) 

483 

484 

485def windowed_iter(src, size, fill=_UNSET): 

486 """Returns tuples with length *size* which represent a sliding 

487 window over iterable *src*. 

488 

489 >>> list(windowed_iter(range(7), 3)) 

490 [(0, 1, 2), (1, 2, 3), (2, 3, 4), (3, 4, 5), (4, 5, 6)] 

491 

492 If *fill* is unset, and the iterable is too short to make a window  

493 of length *size*, then no window tuples are returned. 

494 

495 >>> list(windowed_iter(range(3), 5)) 

496 [] 

497 

498 With *fill* set, the iterator always yields a number of windows 

499 equal to the length of the *src* iterable. 

500  

501 >>> windowed(range(4), 3, fill=None) 

502 [(0, 1, 2), (1, 2, 3), (2, 3, None), (3, None, None)] 

503 

504 This way, *fill* values can be useful to signal the end of the iterable. 

505 For infinite iterators, setting *fill* has no effect. 

506 """ 

507 tees = itertools.tee(src, size) 

508 if fill is _UNSET: 

509 try: 

510 for i, t in enumerate(tees): 

511 for _ in range(i): 

512 next(t) 

513 except StopIteration: 

514 return zip([]) 

515 return zip(*tees) 

516 

517 for i, t in enumerate(tees): 

518 for _ in range(i): 

519 try: 

520 next(t) 

521 except StopIteration: 

522 continue 

523 return zip_longest(*tees, fillvalue=fill) 

524 

525 

526 

527def xfrange(stop, start=None, step=1.0): 

528 """Same as :func:`frange`, but generator-based instead of returning a 

529 list. 

530 

531 >>> tuple(xfrange(1, 3, step=0.75)) 

532 (1.0, 1.75, 2.5) 

533 

534 See :func:`frange` for more details. 

535 """ 

536 if not step: 

537 raise ValueError('step must be non-zero') 

538 if start is None: 

539 start, stop = 0.0, stop * 1.0 

540 else: 

541 # swap when all args are used 

542 stop, start = start * 1.0, stop * 1.0 

543 cur = start 

544 while cur < stop: 

545 yield cur 

546 cur += step 

547 

548 

549def frange(stop, start=None, step=1.0): 

550 """A :func:`range` clone for float-based ranges. 

551 

552 >>> frange(5) 

553 [0.0, 1.0, 2.0, 3.0, 4.0] 

554 >>> frange(6, step=1.25) 

555 [0.0, 1.25, 2.5, 3.75, 5.0] 

556 >>> frange(100.5, 101.5, 0.25) 

557 [100.5, 100.75, 101.0, 101.25] 

558 >>> frange(5, 0) 

559 [] 

560 >>> frange(5, 0, step=-1.25) 

561 [5.0, 3.75, 2.5, 1.25] 

562 """ 

563 if not step: 

564 raise ValueError('step must be non-zero') 

565 if start is None: 

566 start, stop = 0.0, stop * 1.0 

567 else: 

568 # swap when all args are used 

569 stop, start = start * 1.0, stop * 1.0 

570 count = int(math.ceil((stop - start) / step)) 

571 ret = [None] * count 

572 if not ret: 

573 return ret 

574 ret[0] = start 

575 for i in xrange(1, count): 

576 ret[i] = ret[i - 1] + step 

577 return ret 

578 

579 

580def backoff(start, stop, count=None, factor=2.0, jitter=False): 

581 """Returns a list of geometrically-increasing floating-point numbers, 

582 suitable for usage with `exponential backoff`_. Exactly like 

583 :func:`backoff_iter`, but without the ``'repeat'`` option for 

584 *count*. See :func:`backoff_iter` for more details. 

585 

586 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff 

587 

588 >>> backoff(1, 10) 

589 [1.0, 2.0, 4.0, 8.0, 10.0] 

590 """ 

591 if count == 'repeat': 

592 raise ValueError("'repeat' supported in backoff_iter, not backoff") 

593 return list(backoff_iter(start, stop, count=count, 

594 factor=factor, jitter=jitter)) 

595 

596 

597def backoff_iter(start, stop, count=None, factor=2.0, jitter=False): 

598 """Generates a sequence of geometrically-increasing floats, suitable 

599 for usage with `exponential backoff`_. Starts with *start*, 

600 increasing by *factor* until *stop* is reached, optionally 

601 stopping iteration once *count* numbers are yielded. *factor* 

602 defaults to 2. In general retrying with properly-configured 

603 backoff creates a better-behaved component for a larger service 

604 ecosystem. 

605 

606 .. _exponential backoff: https://en.wikipedia.org/wiki/Exponential_backoff 

607 

608 >>> list(backoff_iter(1.0, 10.0, count=5)) 

609 [1.0, 2.0, 4.0, 8.0, 10.0] 

610 >>> list(backoff_iter(1.0, 10.0, count=8)) 

611 [1.0, 2.0, 4.0, 8.0, 10.0, 10.0, 10.0, 10.0] 

612 >>> list(backoff_iter(0.25, 100.0, factor=10)) 

613 [0.25, 2.5, 25.0, 100.0] 

614 

615 A simplified usage example: 

616 

617 .. code-block:: python 

618 

619 for timeout in backoff_iter(0.25, 5.0): 

620 try: 

621 res = network_call() 

622 break 

623 except Exception as e: 

624 log(e) 

625 time.sleep(timeout) 

626 

627 An enhancement for large-scale systems would be to add variation, 

628 or *jitter*, to timeout values. This is done to avoid a thundering 

629 herd on the receiving end of the network call. 

630 

631 Finally, for *count*, the special value ``'repeat'`` can be passed to 

632 continue yielding indefinitely. 

633 

634 Args: 

635 

636 start (float): Positive number for baseline. 

637 stop (float): Positive number for maximum. 

638 count (int): Number of steps before stopping 

639 iteration. Defaults to the number of steps between *start* and 

640 *stop*. Pass the string, `'repeat'`, to continue iteration 

641 indefinitely. 

642 factor (float): Rate of exponential increase. Defaults to `2.0`, 

643 e.g., `[1, 2, 4, 8, 16]`. 

644 jitter (float): A factor between `-1.0` and `1.0`, used to 

645 uniformly randomize and thus spread out timeouts in a distributed 

646 system, avoiding rhythm effects. Positive values use the base 

647 backoff curve as a maximum, negative values use the curve as a 

648 minimum. Set to 1.0 or `True` for a jitter approximating 

649 Ethernet's time-tested backoff solution. Defaults to `False`. 

650 

651 """ 

652 start = float(start) 

653 stop = float(stop) 

654 factor = float(factor) 

655 if start < 0.0: 

656 raise ValueError('expected start >= 0, not %r' % start) 

657 if factor < 1.0: 

658 raise ValueError('expected factor >= 1.0, not %r' % factor) 

659 if stop == 0.0: 

660 raise ValueError('expected stop >= 0') 

661 if stop < start: 

662 raise ValueError('expected stop >= start, not %r' % stop) 

663 if count is None: 

664 denom = start if start else 1 

665 count = 1 + math.ceil(math.log(stop/denom, factor)) 

666 count = count if start else count + 1 

667 if count != 'repeat' and count < 0: 

668 raise ValueError('count must be positive or "repeat", not %r' % count) 

669 if jitter: 

670 jitter = float(jitter) 

671 if not (-1.0 <= jitter <= 1.0): 

672 raise ValueError('expected jitter -1 <= j <= 1, not: %r' % jitter) 

673 

674 cur, i = start, 0 

675 while count == 'repeat' or i < count: 

676 if not jitter: 

677 cur_ret = cur 

678 elif jitter: 

679 cur_ret = cur - (cur * jitter * random.random()) 

680 yield cur_ret 

681 i += 1 

682 if cur == 0: 

683 cur = 1 

684 elif cur < stop: 

685 cur *= factor 

686 if cur > stop: 

687 cur = stop 

688 return 

689 

690 

691def bucketize(src, key=bool, value_transform=None, key_filter=None): 

692 """Group values in the *src* iterable by the value returned by *key*. 

693 

694 >>> bucketize(range(5)) 

695 {False: [0], True: [1, 2, 3, 4]} 

696 >>> is_odd = lambda x: x % 2 == 1 

697 >>> bucketize(range(5), is_odd) 

698 {False: [0, 2, 4], True: [1, 3]} 

699 

700 *key* is :class:`bool` by default, but can either be a callable or a string or a list 

701 if it is a string, it is the name of the attribute on which to bucketize objects. 

702 

703 >>> bucketize([1+1j, 2+2j, 1, 2], key='real') 

704 {1.0: [(1+1j), 1], 2.0: [(2+2j), 2]} 

705 

706 if *key* is a list, it contains the buckets where to put each object 

707 

708 >>> bucketize([1,2,365,4,98],key=[0,1,2,0,2]) 

709 {0: [1, 4], 1: [2], 2: [365, 98]} 

710 

711 

712 Value lists are not deduplicated: 

713 

714 >>> bucketize([None, None, None, 'hello']) 

715 {False: [None, None, None], True: ['hello']} 

716 

717 Bucketize into more than 3 groups 

718 

719 >>> bucketize(range(10), lambda x: x % 3) 

720 {0: [0, 3, 6, 9], 1: [1, 4, 7], 2: [2, 5, 8]} 

721 

722 ``bucketize`` has a couple of advanced options useful in certain 

723 cases. *value_transform* can be used to modify values as they are 

724 added to buckets, and *key_filter* will allow excluding certain 

725 buckets from being collected. 

726 

727 >>> bucketize(range(5), value_transform=lambda x: x*x) 

728 {False: [0], True: [1, 4, 9, 16]} 

729 

730 >>> bucketize(range(10), key=lambda x: x % 3, key_filter=lambda k: k % 3 != 1) 

731 {0: [0, 3, 6, 9], 2: [2, 5, 8]} 

732 

733 Note in some of these examples there were at most two keys, ``True`` and 

734 ``False``, and each key present has a list with at least one 

735 item. See :func:`partition` for a version specialized for binary 

736 use cases. 

737 

738 """ 

739 if not is_iterable(src): 

740 raise TypeError('expected an iterable') 

741 elif isinstance(key, list): 

742 if len(key) != len(src): 

743 raise ValueError("key and src have to be the same length") 

744 src = zip(key, src) 

745 

746 if isinstance(key, basestring): 

747 key_func = lambda x: getattr(x, key, x) 

748 elif callable(key): 

749 key_func = key 

750 elif isinstance(key, list): 

751 key_func = lambda x: x[0] 

752 else: 

753 raise TypeError('expected key to be callable or a string or a list') 

754 

755 if value_transform is None: 

756 value_transform = lambda x: x 

757 if not callable(value_transform): 

758 raise TypeError('expected callable value transform function') 

759 if isinstance(key, list): 

760 f = value_transform 

761 value_transform=lambda x: f(x[1]) 

762 

763 ret = {} 

764 for val in src: 

765 key_of_val = key_func(val) 

766 if key_filter is None or key_filter(key_of_val): 

767 ret.setdefault(key_of_val, []).append(value_transform(val)) 

768 return ret 

769 

770 

771def partition(src, key=bool): 

772 """No relation to :meth:`str.partition`, ``partition`` is like 

773 :func:`bucketize`, but for added convenience returns a tuple of 

774 ``(truthy_values, falsy_values)``. 

775 

776 >>> nonempty, empty = partition(['', '', 'hi', '', 'bye']) 

777 >>> nonempty 

778 ['hi', 'bye'] 

779 

780 *key* defaults to :class:`bool`, but can be carefully overridden to 

781 use either a function that returns either ``True`` or ``False`` or 

782 a string name of the attribute on which to partition objects. 

783 

784 >>> import string 

785 >>> is_digit = lambda x: x in string.digits 

786 >>> decimal_digits, hexletters = partition(string.hexdigits, is_digit) 

787 >>> ''.join(decimal_digits), ''.join(hexletters) 

788 ('0123456789', 'abcdefABCDEF') 

789 """ 

790 bucketized = bucketize(src, key) 

791 return bucketized.get(True, []), bucketized.get(False, []) 

792 

793 

794def unique(src, key=None): 

795 """``unique()`` returns a list of unique values, as determined by 

796 *key*, in the order they first appeared in the input iterable, 

797 *src*. 

798 

799 >>> ones_n_zeros = '11010110001010010101010' 

800 >>> ''.join(unique(ones_n_zeros)) 

801 '10' 

802 

803 See :func:`unique_iter` docs for more details. 

804 """ 

805 return list(unique_iter(src, key)) 

806 

807 

808def unique_iter(src, key=None): 

809 """Yield unique elements from the iterable, *src*, based on *key*, 

810 in the order in which they first appeared in *src*. 

811 

812 >>> repetitious = [1, 2, 3] * 10 

813 >>> list(unique_iter(repetitious)) 

814 [1, 2, 3] 

815 

816 By default, *key* is the object itself, but *key* can either be a 

817 callable or, for convenience, a string name of the attribute on 

818 which to uniqueify objects, falling back on identity when the 

819 attribute is not present. 

820 

821 >>> pleasantries = ['hi', 'hello', 'ok', 'bye', 'yes'] 

822 >>> list(unique_iter(pleasantries, key=lambda x: len(x))) 

823 ['hi', 'hello', 'bye'] 

824 """ 

825 if not is_iterable(src): 

826 raise TypeError('expected an iterable, not %r' % type(src)) 

827 if key is None: 

828 key_func = lambda x: x 

829 elif callable(key): 

830 key_func = key 

831 elif isinstance(key, basestring): 

832 key_func = lambda x: getattr(x, key, x) 

833 else: 

834 raise TypeError('"key" expected a string or callable, not %r' % key) 

835 seen = set() 

836 for i in src: 

837 k = key_func(i) 

838 if k not in seen: 

839 seen.add(k) 

840 yield i 

841 return 

842 

843 

844def redundant(src, key=None, groups=False): 

845 """The complement of :func:`unique()`. 

846 

847 By default returns non-unique/duplicate values as a list of the 

848 *first* redundant value in *src*. Pass ``groups=True`` to get 

849 groups of all values with redundancies, ordered by position of the 

850 first redundant value. This is useful in conjunction with some 

851 normalizing *key* function. 

852 

853 >>> redundant([1, 2, 3, 4]) 

854 [] 

855 >>> redundant([1, 2, 3, 2, 3, 3, 4]) 

856 [2, 3] 

857 >>> redundant([1, 2, 3, 2, 3, 3, 4], groups=True) 

858 [[2, 2], [3, 3, 3]] 

859 

860 An example using a *key* function to do case-insensitive 

861 redundancy detection. 

862 

863 >>> redundant(['hi', 'Hi', 'HI', 'hello'], key=str.lower) 

864 ['Hi'] 

865 >>> redundant(['hi', 'Hi', 'HI', 'hello'], groups=True, key=str.lower) 

866 [['hi', 'Hi', 'HI']] 

867 

868 *key* should also be used when the values in *src* are not hashable. 

869 

870 .. note:: 

871 

872 This output of this function is designed for reporting 

873 duplicates in contexts when a unique input is desired. Due to 

874 the grouped return type, there is no streaming equivalent of 

875 this function for the time being. 

876 

877 """ 

878 if key is None: 

879 pass 

880 elif callable(key): 

881 key_func = key 

882 elif isinstance(key, basestring): 

883 key_func = lambda x: getattr(x, key, x) 

884 else: 

885 raise TypeError('"key" expected a string or callable, not %r' % key) 

886 seen = {} # key to first seen item 

887 redundant_order = [] 

888 redundant_groups = {} 

889 for i in src: 

890 k = key_func(i) if key else i 

891 if k not in seen: 

892 seen[k] = i 

893 else: 

894 if k in redundant_groups: 

895 if groups: 

896 redundant_groups[k].append(i) 

897 else: 

898 redundant_order.append(k) 

899 redundant_groups[k] = [seen[k], i] 

900 if not groups: 

901 ret = [redundant_groups[k][1] for k in redundant_order] 

902 else: 

903 ret = [redundant_groups[k] for k in redundant_order] 

904 return ret 

905 

906 

907def one(src, default=None, key=None): 

908 """Along the same lines as builtins, :func:`all` and :func:`any`, and 

909 similar to :func:`first`, ``one()`` returns the single object in 

910 the given iterable *src* that evaluates to ``True``, as determined 

911 by callable *key*. If unset, *key* defaults to :class:`bool`. If 

912 no such objects are found, *default* is returned. If *default* is 

913 not passed, ``None`` is returned. 

914 

915 If *src* has more than one object that evaluates to ``True``, or 

916 if there is no object that fulfills such condition, return 

917 *default*. It's like an `XOR`_ over an iterable. 

918 

919 >>> one((True, False, False)) 

920 True 

921 >>> one((True, False, True)) 

922 >>> one((0, 0, 'a')) 

923 'a' 

924 >>> one((0, False, None)) 

925 >>> one((True, True), default=False) 

926 False 

927 >>> bool(one(('', 1))) 

928 True 

929 >>> one((10, 20, 30, 42), key=lambda i: i > 40) 

930 42 

931 

932 See `Martín Gaitán's original repo`_ for further use cases. 

933 

934 .. _Martín Gaitán's original repo: https://github.com/mgaitan/one 

935 .. _XOR: https://en.wikipedia.org/wiki/Exclusive_or 

936 

937 """ 

938 ones = list(itertools.islice(filter(key, src), 2)) 

939 return ones[0] if len(ones) == 1 else default 

940 

941 

942def first(iterable, default=None, key=None): 

943 """Return first element of *iterable* that evaluates to ``True``, else 

944 return ``None`` or optional *default*. Similar to :func:`one`. 

945 

946 >>> first([0, False, None, [], (), 42]) 

947 42 

948 >>> first([0, False, None, [], ()]) is None 

949 True 

950 >>> first([0, False, None, [], ()], default='ohai') 

951 'ohai' 

952 >>> import re 

953 >>> m = first(re.match(regex, 'abc') for regex in ['b.*', 'a(.*)']) 

954 >>> m.group(1) 

955 'bc' 

956 

957 The optional *key* argument specifies a one-argument predicate function 

958 like that used for *filter()*. The *key* argument, if supplied, should be 

959 in keyword form. For example, finding the first even number in an iterable: 

960 

961 >>> first([1, 1, 3, 4, 5], key=lambda x: x % 2 == 0) 

962 4 

963 

964 Contributed by Hynek Schlawack, author of `the original standalone module`_. 

965 

966 .. _the original standalone module: https://github.com/hynek/first 

967 """ 

968 return next(filter(key, iterable), default) 

969 

970 

971def flatten_iter(iterable): 

972 """``flatten_iter()`` yields all the elements from *iterable* while 

973 collapsing any nested iterables. 

974 

975 >>> nested = [[1, 2], [[3], [4, 5]]] 

976 >>> list(flatten_iter(nested)) 

977 [1, 2, 3, 4, 5] 

978 """ 

979 for item in iterable: 

980 if isinstance(item, Iterable) and not isinstance(item, basestring): 

981 for subitem in flatten_iter(item): 

982 yield subitem 

983 else: 

984 yield item 

985 

986def flatten(iterable): 

987 """``flatten()`` returns a collapsed list of all the elements from 

988 *iterable* while collapsing any nested iterables. 

989 

990 >>> nested = [[1, 2], [[3], [4, 5]]] 

991 >>> flatten(nested) 

992 [1, 2, 3, 4, 5] 

993 """ 

994 return list(flatten_iter(iterable)) 

995 

996 

997def same(iterable, ref=_UNSET): 

998 """``same()`` returns ``True`` when all values in *iterable* are 

999 equal to one another, or optionally a reference value, 

1000 *ref*. Similar to :func:`all` and :func:`any` in that it evaluates 

1001 an iterable and returns a :class:`bool`. ``same()`` returns 

1002 ``True`` for empty iterables. 

1003 

1004 >>> same([]) 

1005 True 

1006 >>> same([1]) 

1007 True 

1008 >>> same(['a', 'a', 'a']) 

1009 True 

1010 >>> same(range(20)) 

1011 False 

1012 >>> same([[], []]) 

1013 True 

1014 >>> same([[], []], ref='test') 

1015 False 

1016 

1017 """ 

1018 iterator = iter(iterable) 

1019 if ref is _UNSET: 

1020 ref = next(iterator, ref) 

1021 return all(val == ref for val in iterator) 

1022 

1023 

1024def default_visit(path, key, value): 

1025 # print('visit(%r, %r, %r)' % (path, key, value)) 

1026 return key, value 

1027 

1028# enable the extreme: monkeypatching iterutils with a different default_visit 

1029_orig_default_visit = default_visit 

1030 

1031 

1032def default_enter(path, key, value): 

1033 # print('enter(%r, %r)' % (key, value)) 

1034 if isinstance(value, basestring): 

1035 return value, False 

1036 elif isinstance(value, Mapping): 

1037 return value.__class__(), ItemsView(value) 

1038 elif isinstance(value, Sequence): 

1039 return value.__class__(), enumerate(value) 

1040 elif isinstance(value, Set): 

1041 return value.__class__(), enumerate(value) 

1042 else: 

1043 # files, strings, other iterables, and scalars are not 

1044 # traversed 

1045 return value, False 

1046 

1047 

1048def default_exit(path, key, old_parent, new_parent, new_items): 

1049 # print('exit(%r, %r, %r, %r, %r)' 

1050 # % (path, key, old_parent, new_parent, new_items)) 

1051 ret = new_parent 

1052 if isinstance(new_parent, Mapping): 

1053 new_parent.update(new_items) 

1054 elif isinstance(new_parent, Sequence): 

1055 vals = [v for i, v in new_items] 

1056 try: 

1057 new_parent.extend(vals) 

1058 except AttributeError: 

1059 ret = new_parent.__class__(vals) # tuples 

1060 elif isinstance(new_parent, Set): 

1061 vals = [v for i, v in new_items] 

1062 try: 

1063 new_parent.update(vals) 

1064 except AttributeError: 

1065 ret = new_parent.__class__(vals) # frozensets 

1066 else: 

1067 raise RuntimeError('unexpected iterable type: %r' % type(new_parent)) 

1068 return ret 

1069 

1070 

1071def remap(root, visit=default_visit, enter=default_enter, exit=default_exit, 

1072 **kwargs): 

1073 """The remap ("recursive map") function is used to traverse and 

1074 transform nested structures. Lists, tuples, sets, and dictionaries 

1075 are just a few of the data structures nested into heterogeneous 

1076 tree-like structures that are so common in programming. 

1077 Unfortunately, Python's built-in ways to manipulate collections 

1078 are almost all flat. List comprehensions may be fast and succinct, 

1079 but they do not recurse, making it tedious to apply quick changes 

1080 or complex transforms to real-world data. 

1081 

1082 remap goes where list comprehensions cannot. 

1083 

1084 Here's an example of removing all Nones from some data: 

1085 

1086 >>> from pprint import pprint 

1087 >>> reviews = {'Star Trek': {'TNG': 10, 'DS9': 8.5, 'ENT': None}, 

1088 ... 'Babylon 5': 6, 'Dr. Who': None} 

1089 >>> pprint(remap(reviews, lambda p, k, v: v is not None)) 

1090 {'Babylon 5': 6, 'Star Trek': {'DS9': 8.5, 'TNG': 10}} 

1091 

1092 Notice how both Nones have been removed despite the nesting in the 

1093 dictionary. Not bad for a one-liner, and that's just the beginning. 

1094 See `this remap cookbook`_ for more delicious recipes. 

1095 

1096 .. _this remap cookbook: http://sedimental.org/remap.html 

1097 

1098 remap takes four main arguments: the object to traverse and three 

1099 optional callables which determine how the remapped object will be 

1100 created. 

1101 

1102 Args: 

1103 

1104 root: The target object to traverse. By default, remap 

1105 supports iterables like :class:`list`, :class:`tuple`, 

1106 :class:`dict`, and :class:`set`, but any object traversable by 

1107 *enter* will work. 

1108 visit (callable): This function is called on every item in 

1109 *root*. It must accept three positional arguments, *path*, 

1110 *key*, and *value*. *path* is simply a tuple of parents' 

1111 keys. *visit* should return the new key-value pair. It may 

1112 also return ``True`` as shorthand to keep the old item 

1113 unmodified, or ``False`` to drop the item from the new 

1114 structure. *visit* is called after *enter*, on the new parent. 

1115 

1116 The *visit* function is called for every item in root, 

1117 including duplicate items. For traversable values, it is 

1118 called on the new parent object, after all its children 

1119 have been visited. The default visit behavior simply 

1120 returns the key-value pair unmodified. 

1121 enter (callable): This function controls which items in *root* 

1122 are traversed. It accepts the same arguments as *visit*: the 

1123 path, the key, and the value of the current item. It returns a 

1124 pair of the blank new parent, and an iterator over the items 

1125 which should be visited. If ``False`` is returned instead of 

1126 an iterator, the value will not be traversed. 

1127 

1128 The *enter* function is only called once per unique value. The 

1129 default enter behavior support mappings, sequences, and 

1130 sets. Strings and all other iterables will not be traversed. 

1131 exit (callable): This function determines how to handle items 

1132 once they have been visited. It gets the same three 

1133 arguments as the other functions -- *path*, *key*, *value* 

1134 -- plus two more: the blank new parent object returned 

1135 from *enter*, and a list of the new items, as remapped by 

1136 *visit*. 

1137 

1138 Like *enter*, the *exit* function is only called once per 

1139 unique value. The default exit behavior is to simply add 

1140 all new items to the new parent, e.g., using 

1141 :meth:`list.extend` and :meth:`dict.update` to add to the 

1142 new parent. Immutable objects, such as a :class:`tuple` or 

1143 :class:`namedtuple`, must be recreated from scratch, but 

1144 use the same type as the new parent passed back from the 

1145 *enter* function. 

1146 reraise_visit (bool): A pragmatic convenience for the *visit* 

1147 callable. When set to ``False``, remap ignores any errors 

1148 raised by the *visit* callback. Items causing exceptions 

1149 are kept. See examples for more details. 

1150 

1151 remap is designed to cover the majority of cases with just the 

1152 *visit* callable. While passing in multiple callables is very 

1153 empowering, remap is designed so very few cases should require 

1154 passing more than one function. 

1155 

1156 When passing *enter* and *exit*, it's common and easiest to build 

1157 on the default behavior. Simply add ``from boltons.iterutils import 

1158 default_enter`` (or ``default_exit``), and have your enter/exit 

1159 function call the default behavior before or after your custom 

1160 logic. See `this example`_. 

1161 

1162 Duplicate and self-referential objects (aka reference loops) are 

1163 automatically handled internally, `as shown here`_. 

1164 

1165 .. _this example: http://sedimental.org/remap.html#sort_all_lists 

1166 .. _as shown here: http://sedimental.org/remap.html#corner_cases 

1167 

1168 """ 

1169 # TODO: improve argument formatting in sphinx doc 

1170 # TODO: enter() return (False, items) to continue traverse but cancel copy? 

1171 if not callable(visit): 

1172 raise TypeError('visit expected callable, not: %r' % visit) 

1173 if not callable(enter): 

1174 raise TypeError('enter expected callable, not: %r' % enter) 

1175 if not callable(exit): 

1176 raise TypeError('exit expected callable, not: %r' % exit) 

1177 reraise_visit = kwargs.pop('reraise_visit', True) 

1178 if kwargs: 

1179 raise TypeError('unexpected keyword arguments: %r' % kwargs.keys()) 

1180 

1181 path, registry, stack = (), {}, [(None, root)] 

1182 new_items_stack = [] 

1183 while stack: 

1184 key, value = stack.pop() 

1185 id_value = id(value) 

1186 if key is _REMAP_EXIT: 

1187 key, new_parent, old_parent = value 

1188 id_value = id(old_parent) 

1189 path, new_items = new_items_stack.pop() 

1190 value = exit(path, key, old_parent, new_parent, new_items) 

1191 registry[id_value] = value 

1192 if not new_items_stack: 

1193 continue 

1194 elif id_value in registry: 

1195 value = registry[id_value] 

1196 else: 

1197 res = enter(path, key, value) 

1198 try: 

1199 new_parent, new_items = res 

1200 except TypeError: 

1201 # TODO: handle False? 

1202 raise TypeError('enter should return a tuple of (new_parent,' 

1203 ' items_iterator), not: %r' % res) 

1204 if new_items is not False: 

1205 # traverse unless False is explicitly passed 

1206 registry[id_value] = new_parent 

1207 new_items_stack.append((path, [])) 

1208 if value is not root: 

1209 path += (key,) 

1210 stack.append((_REMAP_EXIT, (key, new_parent, value))) 

1211 if new_items: 

1212 stack.extend(reversed(list(new_items))) 

1213 continue 

1214 if visit is _orig_default_visit: 

1215 # avoid function call overhead by inlining identity operation 

1216 visited_item = (key, value) 

1217 else: 

1218 try: 

1219 visited_item = visit(path, key, value) 

1220 except Exception: 

1221 if reraise_visit: 

1222 raise 

1223 visited_item = True 

1224 if visited_item is False: 

1225 continue # drop 

1226 elif visited_item is True: 

1227 visited_item = (key, value) 

1228 # TODO: typecheck? 

1229 # raise TypeError('expected (key, value) from visit(),' 

1230 # ' not: %r' % visited_item) 

1231 try: 

1232 new_items_stack[-1][1].append(visited_item) 

1233 except IndexError: 

1234 raise TypeError('expected remappable root, not: %r' % root) 

1235 return value 

1236 

1237 

1238class PathAccessError(KeyError, IndexError, TypeError): 

1239 """An amalgamation of KeyError, IndexError, and TypeError, 

1240 representing what can occur when looking up a path in a nested 

1241 object. 

1242 """ 

1243 def __init__(self, exc, seg, path): 

1244 self.exc = exc 

1245 self.seg = seg 

1246 self.path = path 

1247 

1248 def __repr__(self): 

1249 cn = self.__class__.__name__ 

1250 return '%s(%r, %r, %r)' % (cn, self.exc, self.seg, self.path) 

1251 

1252 def __str__(self): 

1253 return ('could not access %r from path %r, got error: %r' 

1254 % (self.seg, self.path, self.exc)) 

1255 

1256 

1257def get_path(root, path, default=_UNSET): 

1258 """Retrieve a value from a nested object via a tuple representing the 

1259 lookup path. 

1260 

1261 >>> root = {'a': {'b': {'c': [[1], [2], [3]]}}} 

1262 >>> get_path(root, ('a', 'b', 'c', 2, 0)) 

1263 3 

1264 

1265 The path format is intentionally consistent with that of 

1266 :func:`remap`. 

1267 

1268 One of get_path's chief aims is improved error messaging. EAFP is 

1269 great, but the error messages are not. 

1270 

1271 For instance, ``root['a']['b']['c'][2][1]`` gives back 

1272 ``IndexError: list index out of range`` 

1273 

1274 What went out of range where? get_path currently raises 

1275 ``PathAccessError: could not access 2 from path ('a', 'b', 'c', 2, 

1276 1), got error: IndexError('list index out of range',)``, a 

1277 subclass of IndexError and KeyError. 

1278 

1279 You can also pass a default that covers the entire operation, 

1280 should the lookup fail at any level. 

1281 

1282 Args: 

1283 root: The target nesting of dictionaries, lists, or other 

1284 objects supporting ``__getitem__``. 

1285 path (tuple): A list of strings and integers to be successively 

1286 looked up within *root*. 

1287 default: The value to be returned should any 

1288 ``PathAccessError`` exceptions be raised. 

1289 """ 

1290 if isinstance(path, basestring): 

1291 path = path.split('.') 

1292 cur = root 

1293 try: 

1294 for seg in path: 

1295 try: 

1296 cur = cur[seg] 

1297 except (KeyError, IndexError) as exc: 

1298 raise PathAccessError(exc, seg, path) 

1299 except TypeError as exc: 

1300 # either string index in a list, or a parent that 

1301 # doesn't support indexing 

1302 try: 

1303 seg = int(seg) 

1304 cur = cur[seg] 

1305 except (ValueError, KeyError, IndexError, TypeError): 

1306 if not is_iterable(cur): 

1307 exc = TypeError('%r object is not indexable' 

1308 % type(cur).__name__) 

1309 raise PathAccessError(exc, seg, path) 

1310 except PathAccessError: 

1311 if default is _UNSET: 

1312 raise 

1313 return default 

1314 return cur 

1315 

1316 

1317def research(root, query=lambda p, k, v: True, reraise=False): 

1318 """The :func:`research` function uses :func:`remap` to recurse over 

1319 any data nested in *root*, and find values which match a given 

1320 criterion, specified by the *query* callable. 

1321 

1322 Results are returned as a list of ``(path, value)`` pairs. The 

1323 paths are tuples in the same format accepted by 

1324 :func:`get_path`. This can be useful for comparing values nested 

1325 in two or more different structures. 

1326 

1327 Here's a simple example that finds all integers: 

1328 

1329 >>> root = {'a': {'b': 1, 'c': (2, 'd', 3)}, 'e': None} 

1330 >>> res = research(root, query=lambda p, k, v: isinstance(v, int)) 

1331 >>> print(sorted(res)) 

1332 [(('a', 'b'), 1), (('a', 'c', 0), 2), (('a', 'c', 2), 3)] 

1333 

1334 Note how *query* follows the same, familiar ``path, key, value`` 

1335 signature as the ``visit`` and ``enter`` functions on 

1336 :func:`remap`, and returns a :class:`bool`. 

1337 

1338 Args: 

1339 root: The target object to search. Supports the same types of 

1340 objects as :func:`remap`, including :class:`list`, 

1341 :class:`tuple`, :class:`dict`, and :class:`set`. 

1342 query (callable): The function called on every object to 

1343 determine whether to include it in the search results. The 

1344 callable must accept three arguments, *path*, *key*, and 

1345 *value*, commonly abbreviated *p*, *k*, and *v*, same as 

1346 *enter* and *visit* from :func:`remap`. 

1347 reraise (bool): Whether to reraise exceptions raised by *query* 

1348 or to simply drop the result that caused the error. 

1349 

1350 

1351 With :func:`research` it's easy to inspect the details of a data 

1352 structure, like finding values that are at a certain depth (using 

1353 ``len(p)``) and much more. If more advanced functionality is 

1354 needed, check out the code and make your own :func:`remap` 

1355 wrapper, and consider `submitting a patch`_! 

1356 

1357 .. _submitting a patch: https://github.com/mahmoud/boltons/pulls 

1358 """ 

1359 ret = [] 

1360 

1361 if not callable(query): 

1362 raise TypeError('query expected callable, not: %r' % query) 

1363 

1364 def enter(path, key, value): 

1365 try: 

1366 if query(path, key, value): 

1367 ret.append((path + (key,), value)) 

1368 except Exception: 

1369 if reraise: 

1370 raise 

1371 return default_enter(path, key, value) 

1372 

1373 remap(root, enter=enter) 

1374 return ret 

1375 

1376 

1377# TODO: recollect() 

1378# TODO: refilter() 

1379# TODO: reiter() 

1380 

1381 

1382# GUID iterators: 10x faster and somewhat more compact than uuid. 

1383 

1384class GUIDerator(object): 

1385 """The GUIDerator is an iterator that yields a globally-unique 

1386 identifier (GUID) on every iteration. The GUIDs produced are 

1387 hexadecimal strings. 

1388 

1389 Testing shows it to be around 12x faster than the uuid module. By 

1390 default it is also more compact, partly due to its default 96-bit 

1391 (24-hexdigit) length. 96 bits of randomness means that there is a 

1392 1 in 2 ^ 32 chance of collision after 2 ^ 64 iterations. If more 

1393 or less uniqueness is desired, the *size* argument can be adjusted 

1394 accordingly. 

1395 

1396 Args: 

1397 size (int): character length of the GUID, defaults to 24. Lengths 

1398 between 20 and 36 are considered valid. 

1399 

1400 The GUIDerator has built-in fork protection that causes it to 

1401 detect a fork on next iteration and reseed accordingly. 

1402 

1403 """ 

1404 def __init__(self, size=24): 

1405 self.size = size 

1406 if size < 20 or size > 36: 

1407 raise ValueError('expected 20 < size <= 36') 

1408 import hashlib 

1409 self._sha1 = hashlib.sha1 

1410 self.count = itertools.count() 

1411 self.reseed() 

1412 

1413 def reseed(self): 

1414 import socket 

1415 self.pid = os.getpid() 

1416 self.salt = '-'.join([str(self.pid), 

1417 socket.gethostname() or b'<nohostname>', 

1418 str(time.time()), 

1419 codecs.encode(os.urandom(6), 

1420 'hex_codec').decode('ascii')]) 

1421 # that codecs trick is the best/only way to get a bytes to 

1422 # hexbytes in py2/3 

1423 return 

1424 

1425 def __iter__(self): 

1426 return self 

1427 

1428 if _IS_PY3: 

1429 def __next__(self): 

1430 if os.getpid() != self.pid: 

1431 self.reseed() 

1432 target_bytes = (self.salt + str(next(self.count))).encode('utf8') 

1433 hash_text = self._sha1(target_bytes).hexdigest()[:self.size] 

1434 return hash_text 

1435 else: 

1436 def __next__(self): 

1437 if os.getpid() != self.pid: 

1438 self.reseed() 

1439 return self._sha1(self.salt + 

1440 str(next(self.count))).hexdigest()[:self.size] 

1441 

1442 next = __next__ 

1443 

1444 

1445class SequentialGUIDerator(GUIDerator): 

1446 """Much like the standard GUIDerator, the SequentialGUIDerator is an 

1447 iterator that yields a globally-unique identifier (GUID) on every 

1448 iteration. The GUIDs produced are hexadecimal strings. 

1449 

1450 The SequentialGUIDerator differs in that it picks a starting GUID 

1451 value and increments every iteration. This yields GUIDs which are 

1452 of course unique, but also ordered and lexicographically sortable. 

1453 

1454 The SequentialGUIDerator is around 50% faster than the normal 

1455 GUIDerator, making it almost 20x as fast as the built-in uuid 

1456 module. By default it is also more compact, partly due to its 

1457 96-bit (24-hexdigit) default length. 96 bits of randomness means that 

1458 there is a 1 in 2 ^ 32 chance of collision after 2 ^ 64 

1459 iterations. If more or less uniqueness is desired, the *size* 

1460 argument can be adjusted accordingly. 

1461 

1462 Args: 

1463 size (int): character length of the GUID, defaults to 24. 

1464 

1465 Note that with SequentialGUIDerator there is a chance of GUIDs 

1466 growing larger than the size configured. The SequentialGUIDerator 

1467 has built-in fork protection that causes it to detect a fork on 

1468 next iteration and reseed accordingly. 

1469 

1470 """ 

1471 

1472 if _IS_PY3: 

1473 def reseed(self): 

1474 super(SequentialGUIDerator, self).reseed() 

1475 start_str = self._sha1(self.salt.encode('utf8')).hexdigest() 

1476 self.start = int(start_str[:self.size], 16) 

1477 self.start |= (1 << ((self.size * 4) - 2)) 

1478 else: 

1479 def reseed(self): 

1480 super(SequentialGUIDerator, self).reseed() 

1481 start_str = self._sha1(self.salt).hexdigest() 

1482 self.start = int(start_str[:self.size], 16) 

1483 self.start |= (1 << ((self.size * 4) - 2)) 

1484 

1485 def __next__(self): 

1486 if os.getpid() != self.pid: 

1487 self.reseed() 

1488 return '%x' % (next(self.count) + self.start) 

1489 

1490 next = __next__ 

1491 

1492 

1493guid_iter = GUIDerator() 

1494seq_guid_iter = SequentialGUIDerator() 

1495 

1496 

1497def soft_sorted(iterable, first=None, last=None, key=None, reverse=False): 

1498 """For when you care about the order of some elements, but not about 

1499 others. 

1500 

1501 Use this to float to the top and/or sink to the bottom a specific 

1502 ordering, while sorting the rest of the elements according to 

1503 normal :func:`sorted` rules. 

1504 

1505 >>> soft_sorted(['two', 'b', 'one', 'a'], first=['one', 'two']) 

1506 ['one', 'two', 'a', 'b'] 

1507 >>> soft_sorted(range(7), first=[6, 15], last=[2, 4], reverse=True) 

1508 [6, 5, 3, 1, 0, 2, 4] 

1509 >>> import string 

1510 >>> ''.join(soft_sorted(string.hexdigits, first='za1', last='b', key=str.lower)) 

1511 'aA1023456789cCdDeEfFbB' 

1512 

1513 Args: 

1514 iterable (list): A list or other iterable to sort. 

1515 first (list): A sequence to enforce for elements which should 

1516 appear at the beginning of the returned list. 

1517 last (list): A sequence to enforce for elements which should 

1518 appear at the end of the returned list. 

1519 key (callable): Callable used to generate a comparable key for 

1520 each item to be sorted, same as the key in 

1521 :func:`sorted`. Note that entries in *first* and *last* 

1522 should be the keys for the items. Defaults to 

1523 passthrough/the identity function. 

1524 reverse (bool): Whether or not elements not explicitly ordered 

1525 by *first* and *last* should be in reverse order or not. 

1526 

1527 Returns a new list in sorted order. 

1528 """ 

1529 first = first or [] 

1530 last = last or [] 

1531 key = key or (lambda x: x) 

1532 seq = list(iterable) 

1533 other = [x for x in seq if not ((first and key(x) in first) or (last and key(x) in last))] 

1534 other.sort(key=key, reverse=reverse) 

1535 

1536 if first: 

1537 first = sorted([x for x in seq if key(x) in first], key=lambda x: first.index(key(x))) 

1538 if last: 

1539 last = sorted([x for x in seq if key(x) in last], key=lambda x: last.index(key(x))) 

1540 return first + other + last 

1541 

1542 

1543def untyped_sorted(iterable, key=None, reverse=False): 

1544 """A version of :func:`sorted` which will happily sort an iterable of 

1545 heterogeneous types and return a new list, similar to legacy Python's 

1546 behavior. 

1547 

1548 >>> untyped_sorted(['abc', 2.0, 1, 2, 'def']) 

1549 [1, 2.0, 2, 'abc', 'def'] 

1550 

1551 Note how mutually orderable types are sorted as expected, as in 

1552 the case of the integers and floats above. 

1553 

1554 .. note:: 

1555 

1556 Results may vary across Python versions and builds, but the 

1557 function will produce a sorted list, except in the case of 

1558 explicitly unorderable objects. 

1559 

1560 """ 

1561 class _Wrapper(object): 

1562 slots = ('obj',) 

1563 

1564 def __init__(self, obj): 

1565 self.obj = obj 

1566 

1567 def __lt__(self, other): 

1568 obj = key(self.obj) if key is not None else self.obj 

1569 other = key(other.obj) if key is not None else other.obj 

1570 try: 

1571 ret = obj < other 

1572 except TypeError: 

1573 ret = ((type(obj).__name__, id(type(obj)), obj) 

1574 < (type(other).__name__, id(type(other)), other)) 

1575 return ret 

1576 

1577 if key is not None and not callable(key): 

1578 raise TypeError('expected function or callable object for key, not: %r' 

1579 % key) 

1580 

1581 return sorted(iterable, key=_Wrapper, reverse=reverse) 

1582 

1583""" 

1584May actually be faster to do an isinstance check for a str path 

1585 

1586$ python -m timeit -s "x = [1]" "x[0]" 

158710000000 loops, best of 3: 0.0207 usec per loop 

1588$ python -m timeit -s "x = [1]" "try: x[0] \nexcept: pass" 

158910000000 loops, best of 3: 0.029 usec per loop 

1590$ python -m timeit -s "x = [1]" "try: x[1] \nexcept: pass" 

15911000000 loops, best of 3: 0.315 usec per loop 

1592# setting up try/except is fast, only around 0.01us 

1593# actually triggering the exception takes almost 10x as long 

1594 

1595$ python -m timeit -s "x = [1]" "isinstance(x, basestring)" 

159610000000 loops, best of 3: 0.141 usec per loop 

1597$ python -m timeit -s "x = [1]" "isinstance(x, str)" 

159810000000 loops, best of 3: 0.131 usec per loop 

1599$ python -m timeit -s "x = [1]" "try: x.split('.')\n except: pass" 

16001000000 loops, best of 3: 0.443 usec per loop 

1601$ python -m timeit -s "x = [1]" "try: x.split('.') \nexcept AttributeError: pass" 

16021000000 loops, best of 3: 0.544 usec per loop 

1603"""