Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pyrsistent/_pvector.py: 59%

318 statements  

« prev     ^ index     » next       coverage.py v7.2.7, created at 2023-07-01 06:54 +0000

1from abc import abstractmethod, ABCMeta 

2from collections.abc import Sequence, Hashable 

3from numbers import Integral 

4import operator 

5from pyrsistent._transformations import transform 

6 

7 

8def _bitcount(val): 

9 return bin(val).count("1") 

10 

11BRANCH_FACTOR = 32 

12BIT_MASK = BRANCH_FACTOR - 1 

13SHIFT = _bitcount(BIT_MASK) 

14 

15 

16def compare_pvector(v, other, operator): 

17 return operator(v.tolist(), other.tolist() if isinstance(other, PVector) else other) 

18 

19 

20def _index_or_slice(index, stop): 

21 if stop is None: 

22 return index 

23 

24 return slice(index, stop) 

25 

26 

27class PythonPVector(object): 

28 """ 

29 Support structure for PVector that implements structural sharing for vectors using a trie. 

30 """ 

31 __slots__ = ('_count', '_shift', '_root', '_tail', '_tail_offset', '__weakref__') 

32 

33 def __new__(cls, count, shift, root, tail): 

34 self = super(PythonPVector, cls).__new__(cls) 

35 self._count = count 

36 self._shift = shift 

37 self._root = root 

38 self._tail = tail 

39 

40 # Derived attribute stored for performance 

41 self._tail_offset = self._count - len(self._tail) 

42 return self 

43 

44 def __len__(self): 

45 return self._count 

46 

47 def __getitem__(self, index): 

48 if isinstance(index, slice): 

49 # There are more conditions than the below where it would be OK to 

50 # return ourselves, implement those... 

51 if index.start is None and index.stop is None and index.step is None: 

52 return self 

53 

54 # This is a bit nasty realizing the whole structure as a list before 

55 # slicing it but it is the fastest way I've found to date, and it's easy :-) 

56 return _EMPTY_PVECTOR.extend(self.tolist()[index]) 

57 

58 if index < 0: 

59 index += self._count 

60 

61 return PythonPVector._node_for(self, index)[index & BIT_MASK] 

62 

63 def __add__(self, other): 

64 return self.extend(other) 

65 

66 def __repr__(self): 

67 return 'pvector({0})'.format(str(self.tolist())) 

68 

69 def __str__(self): 

70 return self.__repr__() 

71 

72 def __iter__(self): 

73 # This is kind of lazy and will produce some memory overhead but it is the fasted method 

74 # by far of those tried since it uses the speed of the built in python list directly. 

75 return iter(self.tolist()) 

76 

77 def __ne__(self, other): 

78 return not self.__eq__(other) 

79 

80 def __eq__(self, other): 

81 return self is other or (hasattr(other, '__len__') and self._count == len(other)) and compare_pvector(self, other, operator.eq) 

82 

83 def __gt__(self, other): 

84 return compare_pvector(self, other, operator.gt) 

85 

86 def __lt__(self, other): 

87 return compare_pvector(self, other, operator.lt) 

88 

89 def __ge__(self, other): 

90 return compare_pvector(self, other, operator.ge) 

91 

92 def __le__(self, other): 

93 return compare_pvector(self, other, operator.le) 

94 

95 def __mul__(self, times): 

96 if times <= 0 or self is _EMPTY_PVECTOR: 

97 return _EMPTY_PVECTOR 

98 

99 if times == 1: 

100 return self 

101 

102 return _EMPTY_PVECTOR.extend(times * self.tolist()) 

103 

104 __rmul__ = __mul__ 

105 

106 def _fill_list(self, node, shift, the_list): 

107 if shift: 

108 shift -= SHIFT 

109 for n in node: 

110 self._fill_list(n, shift, the_list) 

111 else: 

112 the_list.extend(node) 

113 

114 def tolist(self): 

115 """ 

116 The fastest way to convert the vector into a python list. 

117 """ 

118 the_list = [] 

119 self._fill_list(self._root, self._shift, the_list) 

120 the_list.extend(self._tail) 

121 return the_list 

122 

123 def _totuple(self): 

124 """ 

125 Returns the content as a python tuple. 

126 """ 

127 return tuple(self.tolist()) 

128 

129 def __hash__(self): 

130 # Taking the easy way out again... 

131 return hash(self._totuple()) 

132 

133 def transform(self, *transformations): 

134 return transform(self, transformations) 

135 

136 def __reduce__(self): 

137 # Pickling support 

138 return pvector, (self.tolist(),) 

139 

140 def mset(self, *args): 

141 if len(args) % 2: 

142 raise TypeError("mset expected an even number of arguments") 

143 

144 evolver = self.evolver() 

145 for i in range(0, len(args), 2): 

146 evolver[args[i]] = args[i+1] 

147 

148 return evolver.persistent() 

149 

150 class Evolver(object): 

151 __slots__ = ('_count', '_shift', '_root', '_tail', '_tail_offset', '_dirty_nodes', 

152 '_extra_tail', '_cached_leafs', '_orig_pvector') 

153 

154 def __init__(self, v): 

155 self._reset(v) 

156 

157 def __getitem__(self, index): 

158 if not isinstance(index, Integral): 

159 raise TypeError("'%s' object cannot be interpreted as an index" % type(index).__name__) 

160 

161 if index < 0: 

162 index += self._count + len(self._extra_tail) 

163 

164 if self._count <= index < self._count + len(self._extra_tail): 

165 return self._extra_tail[index - self._count] 

166 

167 return PythonPVector._node_for(self, index)[index & BIT_MASK] 

168 

169 def _reset(self, v): 

170 self._count = v._count 

171 self._shift = v._shift 

172 self._root = v._root 

173 self._tail = v._tail 

174 self._tail_offset = v._tail_offset 

175 self._dirty_nodes = {} 

176 self._cached_leafs = {} 

177 self._extra_tail = [] 

178 self._orig_pvector = v 

179 

180 def append(self, element): 

181 self._extra_tail.append(element) 

182 return self 

183 

184 def extend(self, iterable): 

185 self._extra_tail.extend(iterable) 

186 return self 

187 

188 def set(self, index, val): 

189 self[index] = val 

190 return self 

191 

192 def __setitem__(self, index, val): 

193 if not isinstance(index, Integral): 

194 raise TypeError("'%s' object cannot be interpreted as an index" % type(index).__name__) 

195 

196 if index < 0: 

197 index += self._count + len(self._extra_tail) 

198 

199 if 0 <= index < self._count: 

200 node = self._cached_leafs.get(index >> SHIFT) 

201 if node: 

202 node[index & BIT_MASK] = val 

203 elif index >= self._tail_offset: 

204 if id(self._tail) not in self._dirty_nodes: 

205 self._tail = list(self._tail) 

206 self._dirty_nodes[id(self._tail)] = True 

207 self._cached_leafs[index >> SHIFT] = self._tail 

208 self._tail[index & BIT_MASK] = val 

209 else: 

210 self._root = self._do_set(self._shift, self._root, index, val) 

211 elif self._count <= index < self._count + len(self._extra_tail): 

212 self._extra_tail[index - self._count] = val 

213 elif index == self._count + len(self._extra_tail): 

214 self._extra_tail.append(val) 

215 else: 

216 raise IndexError("Index out of range: %s" % (index,)) 

217 

218 def _do_set(self, level, node, i, val): 

219 if id(node) in self._dirty_nodes: 

220 ret = node 

221 else: 

222 ret = list(node) 

223 self._dirty_nodes[id(ret)] = True 

224 

225 if level == 0: 

226 ret[i & BIT_MASK] = val 

227 self._cached_leafs[i >> SHIFT] = ret 

228 else: 

229 sub_index = (i >> level) & BIT_MASK # >>> 

230 ret[sub_index] = self._do_set(level - SHIFT, node[sub_index], i, val) 

231 

232 return ret 

233 

234 def delete(self, index): 

235 del self[index] 

236 return self 

237 

238 def __delitem__(self, key): 

239 if self._orig_pvector: 

240 # All structural sharing bets are off, base evolver on _extra_tail only 

241 l = PythonPVector(self._count, self._shift, self._root, self._tail).tolist() 

242 l.extend(self._extra_tail) 

243 self._reset(_EMPTY_PVECTOR) 

244 self._extra_tail = l 

245 

246 del self._extra_tail[key] 

247 

248 def persistent(self): 

249 result = self._orig_pvector 

250 if self.is_dirty(): 

251 result = PythonPVector(self._count, self._shift, self._root, self._tail).extend(self._extra_tail) 

252 self._reset(result) 

253 

254 return result 

255 

256 def __len__(self): 

257 return self._count + len(self._extra_tail) 

258 

259 def is_dirty(self): 

260 return bool(self._dirty_nodes or self._extra_tail) 

261 

262 def evolver(self): 

263 return PythonPVector.Evolver(self) 

264 

265 def set(self, i, val): 

266 # This method could be implemented by a call to mset() but doing so would cause 

267 # a ~5 X performance penalty on PyPy (considered the primary platform for this implementation 

268 # of PVector) so we're keeping this implementation for now. 

269 

270 if not isinstance(i, Integral): 

271 raise TypeError("'%s' object cannot be interpreted as an index" % type(i).__name__) 

272 

273 if i < 0: 

274 i += self._count 

275 

276 if 0 <= i < self._count: 

277 if i >= self._tail_offset: 

278 new_tail = list(self._tail) 

279 new_tail[i & BIT_MASK] = val 

280 return PythonPVector(self._count, self._shift, self._root, new_tail) 

281 

282 return PythonPVector(self._count, self._shift, self._do_set(self._shift, self._root, i, val), self._tail) 

283 

284 if i == self._count: 

285 return self.append(val) 

286 

287 raise IndexError("Index out of range: %s" % (i,)) 

288 

289 def _do_set(self, level, node, i, val): 

290 ret = list(node) 

291 if level == 0: 

292 ret[i & BIT_MASK] = val 

293 else: 

294 sub_index = (i >> level) & BIT_MASK # >>> 

295 ret[sub_index] = self._do_set(level - SHIFT, node[sub_index], i, val) 

296 

297 return ret 

298 

299 @staticmethod 

300 def _node_for(pvector_like, i): 

301 if 0 <= i < pvector_like._count: 

302 if i >= pvector_like._tail_offset: 

303 return pvector_like._tail 

304 

305 node = pvector_like._root 

306 for level in range(pvector_like._shift, 0, -SHIFT): 

307 node = node[(i >> level) & BIT_MASK] # >>> 

308 

309 return node 

310 

311 raise IndexError("Index out of range: %s" % (i,)) 

312 

313 def _create_new_root(self): 

314 new_shift = self._shift 

315 

316 # Overflow root? 

317 if (self._count >> SHIFT) > (1 << self._shift): # >>> 

318 new_root = [self._root, self._new_path(self._shift, self._tail)] 

319 new_shift += SHIFT 

320 else: 

321 new_root = self._push_tail(self._shift, self._root, self._tail) 

322 

323 return new_root, new_shift 

324 

325 def append(self, val): 

326 if len(self._tail) < BRANCH_FACTOR: 

327 new_tail = list(self._tail) 

328 new_tail.append(val) 

329 return PythonPVector(self._count + 1, self._shift, self._root, new_tail) 

330 

331 # Full tail, push into tree 

332 new_root, new_shift = self._create_new_root() 

333 return PythonPVector(self._count + 1, new_shift, new_root, [val]) 

334 

335 def _new_path(self, level, node): 

336 if level == 0: 

337 return node 

338 

339 return [self._new_path(level - SHIFT, node)] 

340 

341 def _mutating_insert_tail(self): 

342 self._root, self._shift = self._create_new_root() 

343 self._tail = [] 

344 

345 def _mutating_fill_tail(self, offset, sequence): 

346 max_delta_len = BRANCH_FACTOR - len(self._tail) 

347 delta = sequence[offset:offset + max_delta_len] 

348 self._tail.extend(delta) 

349 delta_len = len(delta) 

350 self._count += delta_len 

351 return offset + delta_len 

352 

353 def _mutating_extend(self, sequence): 

354 offset = 0 

355 sequence_len = len(sequence) 

356 while offset < sequence_len: 

357 offset = self._mutating_fill_tail(offset, sequence) 

358 if len(self._tail) == BRANCH_FACTOR: 

359 self._mutating_insert_tail() 

360 

361 self._tail_offset = self._count - len(self._tail) 

362 

363 def extend(self, obj): 

364 # Mutates the new vector directly for efficiency but that's only an 

365 # implementation detail, once it is returned it should be considered immutable 

366 l = obj.tolist() if isinstance(obj, PythonPVector) else list(obj) 

367 if l: 

368 new_vector = self.append(l[0]) 

369 new_vector._mutating_extend(l[1:]) 

370 return new_vector 

371 

372 return self 

373 

374 def _push_tail(self, level, parent, tail_node): 

375 """ 

376 if parent is leaf, insert node, 

377 else does it map to an existing child? -> 

378 node_to_insert = push node one more level 

379 else alloc new path 

380 

381 return node_to_insert placed in copy of parent 

382 """ 

383 ret = list(parent) 

384 

385 if level == SHIFT: 

386 ret.append(tail_node) 

387 return ret 

388 

389 sub_index = ((self._count - 1) >> level) & BIT_MASK # >>> 

390 if len(parent) > sub_index: 

391 ret[sub_index] = self._push_tail(level - SHIFT, parent[sub_index], tail_node) 

392 return ret 

393 

394 ret.append(self._new_path(level - SHIFT, tail_node)) 

395 return ret 

396 

397 def index(self, value, *args, **kwargs): 

398 return self.tolist().index(value, *args, **kwargs) 

399 

400 def count(self, value): 

401 return self.tolist().count(value) 

402 

403 def delete(self, index, stop=None): 

404 l = self.tolist() 

405 del l[_index_or_slice(index, stop)] 

406 return _EMPTY_PVECTOR.extend(l) 

407 

408 def remove(self, value): 

409 l = self.tolist() 

410 l.remove(value) 

411 return _EMPTY_PVECTOR.extend(l) 

412 

413class PVector(metaclass=ABCMeta): 

414 """ 

415 Persistent vector implementation. Meant as a replacement for the cases where you would normally 

416 use a Python list. 

417 

418 Do not instantiate directly, instead use the factory functions :py:func:`v` and :py:func:`pvector` to 

419 create an instance. 

420 

421 Heavily influenced by the persistent vector available in Clojure. Initially this was more or 

422 less just a port of the Java code for the Clojure vector. It has since been modified and to 

423 some extent optimized for usage in Python. 

424 

425 The vector is organized as a trie, any mutating method will return a new vector that contains the changes. No 

426 updates are done to the original vector. Structural sharing between vectors are applied where possible to save 

427 space and to avoid making complete copies. 

428 

429 This structure corresponds most closely to the built in list type and is intended as a replacement. Where the 

430 semantics are the same (more or less) the same function names have been used but for some cases it is not possible, 

431 for example assignments. 

432 

433 The PVector implements the Sequence protocol and is Hashable. 

434 

435 Inserts are amortized O(1). Random access is log32(n) where n is the size of the vector. 

436 

437 The following are examples of some common operations on persistent vectors: 

438 

439 >>> p = v(1, 2, 3) 

440 >>> p2 = p.append(4) 

441 >>> p3 = p2.extend([5, 6, 7]) 

442 >>> p 

443 pvector([1, 2, 3]) 

444 >>> p2 

445 pvector([1, 2, 3, 4]) 

446 >>> p3 

447 pvector([1, 2, 3, 4, 5, 6, 7]) 

448 >>> p3[5] 

449 6 

450 >>> p.set(1, 99) 

451 pvector([1, 99, 3]) 

452 >>> 

453 """ 

454 

455 @abstractmethod 

456 def __len__(self): 

457 """ 

458 >>> len(v(1, 2, 3)) 

459 3 

460 """ 

461 

462 @abstractmethod 

463 def __getitem__(self, index): 

464 """ 

465 Get value at index. Full slicing support. 

466 

467 >>> v1 = v(5, 6, 7, 8) 

468 >>> v1[2] 

469 7 

470 >>> v1[1:3] 

471 pvector([6, 7]) 

472 """ 

473 

474 @abstractmethod 

475 def __add__(self, other): 

476 """ 

477 >>> v1 = v(1, 2) 

478 >>> v2 = v(3, 4) 

479 >>> v1 + v2 

480 pvector([1, 2, 3, 4]) 

481 """ 

482 

483 @abstractmethod 

484 def __mul__(self, times): 

485 """ 

486 >>> v1 = v(1, 2) 

487 >>> 3 * v1 

488 pvector([1, 2, 1, 2, 1, 2]) 

489 """ 

490 

491 @abstractmethod 

492 def __hash__(self): 

493 """ 

494 >>> v1 = v(1, 2, 3) 

495 >>> v2 = v(1, 2, 3) 

496 >>> hash(v1) == hash(v2) 

497 True 

498 """ 

499 

500 @abstractmethod 

501 def evolver(self): 

502 """ 

503 Create a new evolver for this pvector. The evolver acts as a mutable view of the vector 

504 with "transaction like" semantics. No part of the underlying vector i updated, it is still 

505 fully immutable. Furthermore multiple evolvers created from the same pvector do not 

506 interfere with each other. 

507 

508 You may want to use an evolver instead of working directly with the pvector in the 

509 following cases: 

510 

511 * Multiple updates are done to the same vector and the intermediate results are of no 

512 interest. In this case using an evolver may be a more efficient and easier to work with. 

513 * You need to pass a vector into a legacy function or a function that you have no control 

514 over which performs in place mutations of lists. In this case pass an evolver instance 

515 instead and then create a new pvector from the evolver once the function returns. 

516 

517 The following example illustrates a typical workflow when working with evolvers. It also 

518 displays most of the API (which i kept small by design, you should not be tempted to 

519 use evolvers in excess ;-)). 

520 

521 Create the evolver and perform various mutating updates to it: 

522 

523 >>> v1 = v(1, 2, 3, 4, 5) 

524 >>> e = v1.evolver() 

525 >>> e[1] = 22 

526 >>> _ = e.append(6) 

527 >>> _ = e.extend([7, 8, 9]) 

528 >>> e[8] += 1 

529 >>> len(e) 

530 9 

531 

532 The underlying pvector remains the same: 

533 

534 >>> v1 

535 pvector([1, 2, 3, 4, 5]) 

536 

537 The changes are kept in the evolver. An updated pvector can be created using the 

538 persistent() function on the evolver. 

539 

540 >>> v2 = e.persistent() 

541 >>> v2 

542 pvector([1, 22, 3, 4, 5, 6, 7, 8, 10]) 

543 

544 The new pvector will share data with the original pvector in the same way that would have 

545 been done if only using operations on the pvector. 

546 """ 

547 

548 @abstractmethod 

549 def mset(self, *args): 

550 """ 

551 Return a new vector with elements in specified positions replaced by values (multi set). 

552 

553 Elements on even positions in the argument list are interpreted as indexes while 

554 elements on odd positions are considered values. 

555 

556 >>> v1 = v(1, 2, 3) 

557 >>> v1.mset(0, 11, 2, 33) 

558 pvector([11, 2, 33]) 

559 """ 

560 

561 @abstractmethod 

562 def set(self, i, val): 

563 """ 

564 Return a new vector with element at position i replaced with val. The original vector remains unchanged. 

565 

566 Setting a value one step beyond the end of the vector is equal to appending. Setting beyond that will 

567 result in an IndexError. 

568 

569 >>> v1 = v(1, 2, 3) 

570 >>> v1.set(1, 4) 

571 pvector([1, 4, 3]) 

572 >>> v1.set(3, 4) 

573 pvector([1, 2, 3, 4]) 

574 >>> v1.set(-1, 4) 

575 pvector([1, 2, 4]) 

576 """ 

577 

578 @abstractmethod 

579 def append(self, val): 

580 """ 

581 Return a new vector with val appended. 

582 

583 >>> v1 = v(1, 2) 

584 >>> v1.append(3) 

585 pvector([1, 2, 3]) 

586 """ 

587 

588 @abstractmethod 

589 def extend(self, obj): 

590 """ 

591 Return a new vector with all values in obj appended to it. Obj may be another 

592 PVector or any other Iterable. 

593 

594 >>> v1 = v(1, 2, 3) 

595 >>> v1.extend([4, 5]) 

596 pvector([1, 2, 3, 4, 5]) 

597 """ 

598 

599 @abstractmethod 

600 def index(self, value, *args, **kwargs): 

601 """ 

602 Return first index of value. Additional indexes may be supplied to limit the search to a 

603 sub range of the vector. 

604 

605 >>> v1 = v(1, 2, 3, 4, 3) 

606 >>> v1.index(3) 

607 2 

608 >>> v1.index(3, 3, 5) 

609 4 

610 """ 

611 

612 @abstractmethod 

613 def count(self, value): 

614 """ 

615 Return the number of times that value appears in the vector. 

616 

617 >>> v1 = v(1, 4, 3, 4) 

618 >>> v1.count(4) 

619 2 

620 """ 

621 

622 @abstractmethod 

623 def transform(self, *transformations): 

624 """ 

625 Transform arbitrarily complex combinations of PVectors and PMaps. A transformation 

626 consists of two parts. One match expression that specifies which elements to transform 

627 and one transformation function that performs the actual transformation. 

628 

629 >>> from pyrsistent import freeze, ny 

630 >>> news_paper = freeze({'articles': [{'author': 'Sara', 'content': 'A short article'}, 

631 ... {'author': 'Steve', 'content': 'A slightly longer article'}], 

632 ... 'weather': {'temperature': '11C', 'wind': '5m/s'}}) 

633 >>> short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:25] + '...' if len(c) > 25 else c) 

634 >>> very_short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:15] + '...' if len(c) > 15 else c) 

635 >>> very_short_news.articles[0].content 

636 'A short article' 

637 >>> very_short_news.articles[1].content 

638 'A slightly long...' 

639 

640 When nothing has been transformed the original data structure is kept 

641 

642 >>> short_news is news_paper 

643 True 

644 >>> very_short_news is news_paper 

645 False 

646 >>> very_short_news.articles[0] is news_paper.articles[0] 

647 True 

648 """ 

649 

650 @abstractmethod 

651 def delete(self, index, stop=None): 

652 """ 

653 Delete a portion of the vector by index or range. 

654 

655 >>> v1 = v(1, 2, 3, 4, 5) 

656 >>> v1.delete(1) 

657 pvector([1, 3, 4, 5]) 

658 >>> v1.delete(1, 3) 

659 pvector([1, 4, 5]) 

660 """ 

661 

662 @abstractmethod 

663 def remove(self, value): 

664 """ 

665 Remove the first occurrence of a value from the vector. 

666 

667 >>> v1 = v(1, 2, 3, 2, 1) 

668 >>> v2 = v1.remove(1) 

669 >>> v2 

670 pvector([2, 3, 2, 1]) 

671 >>> v2.remove(1) 

672 pvector([2, 3, 2]) 

673 """ 

674 

675 

676_EMPTY_PVECTOR = PythonPVector(0, SHIFT, [], []) 

677PVector.register(PythonPVector) 

678Sequence.register(PVector) 

679Hashable.register(PVector) 

680 

681def python_pvector(iterable=()): 

682 """ 

683 Create a new persistent vector containing the elements in iterable. 

684 

685 >>> v1 = pvector([1, 2, 3]) 

686 >>> v1 

687 pvector([1, 2, 3]) 

688 """ 

689 return _EMPTY_PVECTOR.extend(iterable) 

690 

691try: 

692 # Use the C extension as underlying trie implementation if it is available 

693 import os 

694 if os.environ.get('PYRSISTENT_NO_C_EXTENSION'): 

695 pvector = python_pvector 

696 else: 

697 from pvectorc import pvector 

698 PVector.register(type(pvector())) 

699except ImportError: 

700 pvector = python_pvector 

701 

702 

703def v(*elements): 

704 """ 

705 Create a new persistent vector containing all parameters to this function. 

706 

707 >>> v1 = v(1, 2, 3) 

708 >>> v1 

709 pvector([1, 2, 3]) 

710 """ 

711 return pvector(elements)