Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyrsistent/_pvector.py: 62%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

320 statements  

1from abc import abstractmethod, ABCMeta 

2from collections.abc import Sequence, Hashable 

3from numbers import Integral 

4import operator 

5from typing import TypeVar, Generic 

6 

7from pyrsistent._transformations import transform 

8 

9T_co = TypeVar('T_co', covariant=True) 

10 

11 

12def _bitcount(val): 

13 return bin(val).count("1") 

14 

15BRANCH_FACTOR = 32 

16BIT_MASK = BRANCH_FACTOR - 1 

17SHIFT = _bitcount(BIT_MASK) 

18 

19 

20def compare_pvector(v, other, operator): 

21 return operator(v.tolist(), other.tolist() if isinstance(other, PVector) else other) 

22 

23 

24def _index_or_slice(index, stop): 

25 if stop is None: 

26 return index 

27 

28 return slice(index, stop) 

29 

30 

31class PythonPVector(object): 

32 """ 

33 Support structure for PVector that implements structural sharing for vectors using a trie. 

34 """ 

35 __slots__ = ('_count', '_shift', '_root', '_tail', '_tail_offset', '__weakref__') 

36 

37 def __new__(cls, count, shift, root, tail): 

38 self = super(PythonPVector, cls).__new__(cls) 

39 self._count = count 

40 self._shift = shift 

41 self._root = root 

42 self._tail = tail 

43 

44 # Derived attribute stored for performance 

45 self._tail_offset = self._count - len(self._tail) 

46 return self 

47 

48 def __len__(self): 

49 return self._count 

50 

51 def __getitem__(self, index): 

52 if isinstance(index, slice): 

53 # There are more conditions than the below where it would be OK to 

54 # return ourselves, implement those... 

55 if index.start is None and index.stop is None and index.step is None: 

56 return self 

57 

58 # This is a bit nasty realizing the whole structure as a list before 

59 # slicing it but it is the fastest way I've found to date, and it's easy :-) 

60 return _EMPTY_PVECTOR.extend(self.tolist()[index]) 

61 

62 if index < 0: 

63 index += self._count 

64 

65 return PythonPVector._node_for(self, index)[index & BIT_MASK] 

66 

67 def __add__(self, other): 

68 return self.extend(other) 

69 

70 def __repr__(self): 

71 return 'pvector({0})'.format(str(self.tolist())) 

72 

73 def __str__(self): 

74 return self.__repr__() 

75 

76 def __iter__(self): 

77 # This is kind of lazy and will produce some memory overhead but it is the fasted method 

78 # by far of those tried since it uses the speed of the built in python list directly. 

79 return iter(self.tolist()) 

80 

81 def __ne__(self, other): 

82 return not self.__eq__(other) 

83 

84 def __eq__(self, other): 

85 return self is other or (hasattr(other, '__len__') and self._count == len(other)) and compare_pvector(self, other, operator.eq) 

86 

87 def __gt__(self, other): 

88 return compare_pvector(self, other, operator.gt) 

89 

90 def __lt__(self, other): 

91 return compare_pvector(self, other, operator.lt) 

92 

93 def __ge__(self, other): 

94 return compare_pvector(self, other, operator.ge) 

95 

96 def __le__(self, other): 

97 return compare_pvector(self, other, operator.le) 

98 

99 def __mul__(self, times): 

100 if times <= 0 or self is _EMPTY_PVECTOR: 

101 return _EMPTY_PVECTOR 

102 

103 if times == 1: 

104 return self 

105 

106 return _EMPTY_PVECTOR.extend(times * self.tolist()) 

107 

108 __rmul__ = __mul__ 

109 

110 def _fill_list(self, node, shift, the_list): 

111 if shift: 

112 shift -= SHIFT 

113 for n in node: 

114 self._fill_list(n, shift, the_list) 

115 else: 

116 the_list.extend(node) 

117 

118 def tolist(self): 

119 """ 

120 The fastest way to convert the vector into a python list. 

121 """ 

122 the_list = [] 

123 self._fill_list(self._root, self._shift, the_list) 

124 the_list.extend(self._tail) 

125 return the_list 

126 

127 def _totuple(self): 

128 """ 

129 Returns the content as a python tuple. 

130 """ 

131 return tuple(self.tolist()) 

132 

133 def __hash__(self): 

134 # Taking the easy way out again... 

135 return hash(self._totuple()) 

136 

137 def transform(self, *transformations): 

138 return transform(self, transformations) 

139 

140 def __reduce__(self): 

141 # Pickling support 

142 return pvector, (self.tolist(),) 

143 

144 def mset(self, *args): 

145 if len(args) % 2: 

146 raise TypeError("mset expected an even number of arguments") 

147 

148 evolver = self.evolver() 

149 for i in range(0, len(args), 2): 

150 evolver[args[i]] = args[i+1] 

151 

152 return evolver.persistent() 

153 

154 class Evolver(object): 

155 __slots__ = ('_count', '_shift', '_root', '_tail', '_tail_offset', '_dirty_nodes', 

156 '_extra_tail', '_cached_leafs', '_orig_pvector') 

157 

158 def __init__(self, v): 

159 self._reset(v) 

160 

161 def __getitem__(self, index): 

162 if not isinstance(index, Integral): 

163 raise TypeError("'%s' object cannot be interpreted as an index" % type(index).__name__) 

164 

165 if index < 0: 

166 index += self._count + len(self._extra_tail) 

167 

168 if self._count <= index < self._count + len(self._extra_tail): 

169 return self._extra_tail[index - self._count] 

170 

171 return PythonPVector._node_for(self, index)[index & BIT_MASK] 

172 

173 def _reset(self, v): 

174 self._count = v._count 

175 self._shift = v._shift 

176 self._root = v._root 

177 self._tail = v._tail 

178 self._tail_offset = v._tail_offset 

179 self._dirty_nodes = {} 

180 self._cached_leafs = {} 

181 self._extra_tail = [] 

182 self._orig_pvector = v 

183 

184 def append(self, element): 

185 self._extra_tail.append(element) 

186 return self 

187 

188 def extend(self, iterable): 

189 self._extra_tail.extend(iterable) 

190 return self 

191 

192 def set(self, index, val): 

193 self[index] = val 

194 return self 

195 

196 def __setitem__(self, index, val): 

197 if not isinstance(index, Integral): 

198 raise TypeError("'%s' object cannot be interpreted as an index" % type(index).__name__) 

199 

200 if index < 0: 

201 index += self._count + len(self._extra_tail) 

202 

203 if 0 <= index < self._count: 

204 node = self._cached_leafs.get(index >> SHIFT) 

205 if node: 

206 node[index & BIT_MASK] = val 

207 elif index >= self._tail_offset: 

208 if id(self._tail) not in self._dirty_nodes: 

209 self._tail = list(self._tail) 

210 self._dirty_nodes[id(self._tail)] = True 

211 self._cached_leafs[index >> SHIFT] = self._tail 

212 self._tail[index & BIT_MASK] = val 

213 else: 

214 self._root = self._do_set(self._shift, self._root, index, val) 

215 elif self._count <= index < self._count + len(self._extra_tail): 

216 self._extra_tail[index - self._count] = val 

217 elif index == self._count + len(self._extra_tail): 

218 self._extra_tail.append(val) 

219 else: 

220 raise IndexError("Index out of range: %s" % (index,)) 

221 

222 def _do_set(self, level, node, i, val): 

223 if id(node) in self._dirty_nodes: 

224 ret = node 

225 else: 

226 ret = list(node) 

227 self._dirty_nodes[id(ret)] = True 

228 

229 if level == 0: 

230 ret[i & BIT_MASK] = val 

231 self._cached_leafs[i >> SHIFT] = ret 

232 else: 

233 sub_index = (i >> level) & BIT_MASK # >>> 

234 ret[sub_index] = self._do_set(level - SHIFT, node[sub_index], i, val) 

235 

236 return ret 

237 

238 def delete(self, index): 

239 del self[index] 

240 return self 

241 

242 def __delitem__(self, key): 

243 if self._orig_pvector: 

244 # All structural sharing bets are off, base evolver on _extra_tail only 

245 l = PythonPVector(self._count, self._shift, self._root, self._tail).tolist() 

246 l.extend(self._extra_tail) 

247 self._reset(_EMPTY_PVECTOR) 

248 self._extra_tail = l 

249 

250 del self._extra_tail[key] 

251 

252 def persistent(self): 

253 result = self._orig_pvector 

254 if self.is_dirty(): 

255 result = PythonPVector(self._count, self._shift, self._root, self._tail).extend(self._extra_tail) 

256 self._reset(result) 

257 

258 return result 

259 

260 def __len__(self): 

261 return self._count + len(self._extra_tail) 

262 

263 def is_dirty(self): 

264 return bool(self._dirty_nodes or self._extra_tail) 

265 

266 def evolver(self): 

267 return PythonPVector.Evolver(self) 

268 

269 def set(self, i, val): 

270 # This method could be implemented by a call to mset() but doing so would cause 

271 # a ~5 X performance penalty on PyPy (considered the primary platform for this implementation 

272 # of PVector) so we're keeping this implementation for now. 

273 

274 if not isinstance(i, Integral): 

275 raise TypeError("'%s' object cannot be interpreted as an index" % type(i).__name__) 

276 

277 if i < 0: 

278 i += self._count 

279 

280 if 0 <= i < self._count: 

281 if i >= self._tail_offset: 

282 new_tail = list(self._tail) 

283 new_tail[i & BIT_MASK] = val 

284 return PythonPVector(self._count, self._shift, self._root, new_tail) 

285 

286 return PythonPVector(self._count, self._shift, self._do_set(self._shift, self._root, i, val), self._tail) 

287 

288 if i == self._count: 

289 return self.append(val) 

290 

291 raise IndexError("Index out of range: %s" % (i,)) 

292 

293 def _do_set(self, level, node, i, val): 

294 ret = list(node) 

295 if level == 0: 

296 ret[i & BIT_MASK] = val 

297 else: 

298 sub_index = (i >> level) & BIT_MASK # >>> 

299 ret[sub_index] = self._do_set(level - SHIFT, node[sub_index], i, val) 

300 

301 return ret 

302 

303 @staticmethod 

304 def _node_for(pvector_like, i): 

305 if 0 <= i < pvector_like._count: 

306 if i >= pvector_like._tail_offset: 

307 return pvector_like._tail 

308 

309 node = pvector_like._root 

310 for level in range(pvector_like._shift, 0, -SHIFT): 

311 node = node[(i >> level) & BIT_MASK] # >>> 

312 

313 return node 

314 

315 raise IndexError("Index out of range: %s" % (i,)) 

316 

317 def _create_new_root(self): 

318 new_shift = self._shift 

319 

320 # Overflow root? 

321 if (self._count >> SHIFT) > (1 << self._shift): # >>> 

322 new_root = [self._root, self._new_path(self._shift, self._tail)] 

323 new_shift += SHIFT 

324 else: 

325 new_root = self._push_tail(self._shift, self._root, self._tail) 

326 

327 return new_root, new_shift 

328 

329 def append(self, val): 

330 if len(self._tail) < BRANCH_FACTOR: 

331 new_tail = list(self._tail) 

332 new_tail.append(val) 

333 return PythonPVector(self._count + 1, self._shift, self._root, new_tail) 

334 

335 # Full tail, push into tree 

336 new_root, new_shift = self._create_new_root() 

337 return PythonPVector(self._count + 1, new_shift, new_root, [val]) 

338 

339 def _new_path(self, level, node): 

340 if level == 0: 

341 return node 

342 

343 return [self._new_path(level - SHIFT, node)] 

344 

345 def _mutating_insert_tail(self): 

346 self._root, self._shift = self._create_new_root() 

347 self._tail = [] 

348 

349 def _mutating_fill_tail(self, offset, sequence): 

350 max_delta_len = BRANCH_FACTOR - len(self._tail) 

351 delta = sequence[offset:offset + max_delta_len] 

352 self._tail.extend(delta) 

353 delta_len = len(delta) 

354 self._count += delta_len 

355 return offset + delta_len 

356 

357 def _mutating_extend(self, sequence): 

358 offset = 0 

359 sequence_len = len(sequence) 

360 while offset < sequence_len: 

361 offset = self._mutating_fill_tail(offset, sequence) 

362 if len(self._tail) == BRANCH_FACTOR: 

363 self._mutating_insert_tail() 

364 

365 self._tail_offset = self._count - len(self._tail) 

366 

367 def extend(self, obj): 

368 # Mutates the new vector directly for efficiency but that's only an 

369 # implementation detail, once it is returned it should be considered immutable 

370 l = obj.tolist() if isinstance(obj, PythonPVector) else list(obj) 

371 if l: 

372 new_vector = self.append(l[0]) 

373 new_vector._mutating_extend(l[1:]) 

374 return new_vector 

375 

376 return self 

377 

378 def _push_tail(self, level, parent, tail_node): 

379 """ 

380 if parent is leaf, insert node, 

381 else does it map to an existing child? -> 

382 node_to_insert = push node one more level 

383 else alloc new path 

384 

385 return node_to_insert placed in copy of parent 

386 """ 

387 ret = list(parent) 

388 

389 if level == SHIFT: 

390 ret.append(tail_node) 

391 return ret 

392 

393 sub_index = ((self._count - 1) >> level) & BIT_MASK # >>> 

394 if len(parent) > sub_index: 

395 ret[sub_index] = self._push_tail(level - SHIFT, parent[sub_index], tail_node) 

396 return ret 

397 

398 ret.append(self._new_path(level - SHIFT, tail_node)) 

399 return ret 

400 

401 def index(self, value, *args, **kwargs): 

402 return self.tolist().index(value, *args, **kwargs) 

403 

404 def count(self, value): 

405 return self.tolist().count(value) 

406 

407 def delete(self, index, stop=None): 

408 l = self.tolist() 

409 del l[_index_or_slice(index, stop)] 

410 return _EMPTY_PVECTOR.extend(l) 

411 

412 def remove(self, value): 

413 l = self.tolist() 

414 l.remove(value) 

415 return _EMPTY_PVECTOR.extend(l) 

416 

417class PVector(Generic[T_co],metaclass=ABCMeta): 

418 """ 

419 Persistent vector implementation. Meant as a replacement for the cases where you would normally 

420 use a Python list. 

421 

422 Do not instantiate directly, instead use the factory functions :py:func:`v` and :py:func:`pvector` to 

423 create an instance. 

424 

425 Heavily influenced by the persistent vector available in Clojure. Initially this was more or 

426 less just a port of the Java code for the Clojure vector. It has since been modified and to 

427 some extent optimized for usage in Python. 

428 

429 The vector is organized as a trie, any mutating method will return a new vector that contains the changes. No 

430 updates are done to the original vector. Structural sharing between vectors are applied where possible to save 

431 space and to avoid making complete copies. 

432 

433 This structure corresponds most closely to the built in list type and is intended as a replacement. Where the 

434 semantics are the same (more or less) the same function names have been used but for some cases it is not possible, 

435 for example assignments. 

436 

437 The PVector implements the Sequence protocol and is Hashable. 

438 

439 Inserts are amortized O(1). Random access is log32(n) where n is the size of the vector. 

440 

441 The following are examples of some common operations on persistent vectors: 

442 

443 >>> p = v(1, 2, 3) 

444 >>> p2 = p.append(4) 

445 >>> p3 = p2.extend([5, 6, 7]) 

446 >>> p 

447 pvector([1, 2, 3]) 

448 >>> p2 

449 pvector([1, 2, 3, 4]) 

450 >>> p3 

451 pvector([1, 2, 3, 4, 5, 6, 7]) 

452 >>> p3[5] 

453 6 

454 >>> p.set(1, 99) 

455 pvector([1, 99, 3]) 

456 >>> 

457 """ 

458 

459 @abstractmethod 

460 def __len__(self): 

461 """ 

462 >>> len(v(1, 2, 3)) 

463 3 

464 """ 

465 

466 @abstractmethod 

467 def __getitem__(self, index): 

468 """ 

469 Get value at index. Full slicing support. 

470 

471 >>> v1 = v(5, 6, 7, 8) 

472 >>> v1[2] 

473 7 

474 >>> v1[1:3] 

475 pvector([6, 7]) 

476 """ 

477 

478 @abstractmethod 

479 def __add__(self, other): 

480 """ 

481 >>> v1 = v(1, 2) 

482 >>> v2 = v(3, 4) 

483 >>> v1 + v2 

484 pvector([1, 2, 3, 4]) 

485 """ 

486 

487 @abstractmethod 

488 def __mul__(self, times): 

489 """ 

490 >>> v1 = v(1, 2) 

491 >>> 3 * v1 

492 pvector([1, 2, 1, 2, 1, 2]) 

493 """ 

494 

495 @abstractmethod 

496 def __hash__(self): 

497 """ 

498 >>> v1 = v(1, 2, 3) 

499 >>> v2 = v(1, 2, 3) 

500 >>> hash(v1) == hash(v2) 

501 True 

502 """ 

503 

504 @abstractmethod 

505 def evolver(self): 

506 """ 

507 Create a new evolver for this pvector. The evolver acts as a mutable view of the vector 

508 with "transaction like" semantics. No part of the underlying vector i updated, it is still 

509 fully immutable. Furthermore multiple evolvers created from the same pvector do not 

510 interfere with each other. 

511 

512 You may want to use an evolver instead of working directly with the pvector in the 

513 following cases: 

514 

515 * Multiple updates are done to the same vector and the intermediate results are of no 

516 interest. In this case using an evolver may be a more efficient and easier to work with. 

517 * You need to pass a vector into a legacy function or a function that you have no control 

518 over which performs in place mutations of lists. In this case pass an evolver instance 

519 instead and then create a new pvector from the evolver once the function returns. 

520 

521 The following example illustrates a typical workflow when working with evolvers. It also 

522 displays most of the API (which i kept small by design, you should not be tempted to 

523 use evolvers in excess ;-)). 

524 

525 Create the evolver and perform various mutating updates to it: 

526 

527 >>> v1 = v(1, 2, 3, 4, 5) 

528 >>> e = v1.evolver() 

529 >>> e[1] = 22 

530 >>> _ = e.append(6) 

531 >>> _ = e.extend([7, 8, 9]) 

532 >>> e[8] += 1 

533 >>> len(e) 

534 9 

535 

536 The underlying pvector remains the same: 

537 

538 >>> v1 

539 pvector([1, 2, 3, 4, 5]) 

540 

541 The changes are kept in the evolver. An updated pvector can be created using the 

542 persistent() function on the evolver. 

543 

544 >>> v2 = e.persistent() 

545 >>> v2 

546 pvector([1, 22, 3, 4, 5, 6, 7, 8, 10]) 

547 

548 The new pvector will share data with the original pvector in the same way that would have 

549 been done if only using operations on the pvector. 

550 """ 

551 

552 @abstractmethod 

553 def mset(self, *args): 

554 """ 

555 Return a new vector with elements in specified positions replaced by values (multi set). 

556 

557 Elements on even positions in the argument list are interpreted as indexes while 

558 elements on odd positions are considered values. 

559 

560 >>> v1 = v(1, 2, 3) 

561 >>> v1.mset(0, 11, 2, 33) 

562 pvector([11, 2, 33]) 

563 """ 

564 

565 @abstractmethod 

566 def set(self, i, val): 

567 """ 

568 Return a new vector with element at position i replaced with val. The original vector remains unchanged. 

569 

570 Setting a value one step beyond the end of the vector is equal to appending. Setting beyond that will 

571 result in an IndexError. 

572 

573 >>> v1 = v(1, 2, 3) 

574 >>> v1.set(1, 4) 

575 pvector([1, 4, 3]) 

576 >>> v1.set(3, 4) 

577 pvector([1, 2, 3, 4]) 

578 >>> v1.set(-1, 4) 

579 pvector([1, 2, 4]) 

580 """ 

581 

582 @abstractmethod 

583 def append(self, val): 

584 """ 

585 Return a new vector with val appended. 

586 

587 >>> v1 = v(1, 2) 

588 >>> v1.append(3) 

589 pvector([1, 2, 3]) 

590 """ 

591 

592 @abstractmethod 

593 def extend(self, obj): 

594 """ 

595 Return a new vector with all values in obj appended to it. Obj may be another 

596 PVector or any other Iterable. 

597 

598 >>> v1 = v(1, 2, 3) 

599 >>> v1.extend([4, 5]) 

600 pvector([1, 2, 3, 4, 5]) 

601 """ 

602 

603 @abstractmethod 

604 def index(self, value, *args, **kwargs): 

605 """ 

606 Return first index of value. Additional indexes may be supplied to limit the search to a 

607 sub range of the vector. 

608 

609 >>> v1 = v(1, 2, 3, 4, 3) 

610 >>> v1.index(3) 

611 2 

612 >>> v1.index(3, 3, 5) 

613 4 

614 """ 

615 

616 @abstractmethod 

617 def count(self, value): 

618 """ 

619 Return the number of times that value appears in the vector. 

620 

621 >>> v1 = v(1, 4, 3, 4) 

622 >>> v1.count(4) 

623 2 

624 """ 

625 

626 @abstractmethod 

627 def transform(self, *transformations): 

628 """ 

629 Transform arbitrarily complex combinations of PVectors and PMaps. A transformation 

630 consists of two parts. One match expression that specifies which elements to transform 

631 and one transformation function that performs the actual transformation. 

632 

633 >>> from pyrsistent import freeze, ny 

634 >>> news_paper = freeze({'articles': [{'author': 'Sara', 'content': 'A short article'}, 

635 ... {'author': 'Steve', 'content': 'A slightly longer article'}], 

636 ... 'weather': {'temperature': '11C', 'wind': '5m/s'}}) 

637 >>> short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:25] + '...' if len(c) > 25 else c) 

638 >>> very_short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:15] + '...' if len(c) > 15 else c) 

639 >>> very_short_news.articles[0].content 

640 'A short article' 

641 >>> very_short_news.articles[1].content 

642 'A slightly long...' 

643 

644 When nothing has been transformed the original data structure is kept 

645 

646 >>> short_news is news_paper 

647 True 

648 >>> very_short_news is news_paper 

649 False 

650 >>> very_short_news.articles[0] is news_paper.articles[0] 

651 True 

652 """ 

653 

654 @abstractmethod 

655 def delete(self, index, stop=None): 

656 """ 

657 Delete a portion of the vector by index or range. 

658 

659 >>> v1 = v(1, 2, 3, 4, 5) 

660 >>> v1.delete(1) 

661 pvector([1, 3, 4, 5]) 

662 >>> v1.delete(1, 3) 

663 pvector([1, 4, 5]) 

664 """ 

665 

666 @abstractmethod 

667 def remove(self, value): 

668 """ 

669 Remove the first occurrence of a value from the vector. 

670 

671 >>> v1 = v(1, 2, 3, 2, 1) 

672 >>> v2 = v1.remove(1) 

673 >>> v2 

674 pvector([2, 3, 2, 1]) 

675 >>> v2.remove(1) 

676 pvector([2, 3, 2]) 

677 """ 

678 

679 

680_EMPTY_PVECTOR = PythonPVector(0, SHIFT, [], []) 

681PVector.register(PythonPVector) 

682Sequence.register(PVector) 

683Hashable.register(PVector) 

684 

685def python_pvector(iterable=()): 

686 """ 

687 Create a new persistent vector containing the elements in iterable. 

688 

689 >>> v1 = pvector([1, 2, 3]) 

690 >>> v1 

691 pvector([1, 2, 3]) 

692 """ 

693 return _EMPTY_PVECTOR.extend(iterable) 

694 

695try: 

696 # Use the C extension as underlying trie implementation if it is available 

697 import os 

698 if os.environ.get('PYRSISTENT_NO_C_EXTENSION'): 

699 pvector = python_pvector 

700 else: 

701 from pvectorc import pvector 

702 PVector.register(type(pvector())) 

703except ImportError: 

704 pvector = python_pvector 

705 

706 

707def v(*elements): 

708 """ 

709 Create a new persistent vector containing all parameters to this function. 

710 

711 >>> v1 = v(1, 2, 3) 

712 >>> v1 

713 pvector([1, 2, 3]) 

714 """ 

715 return pvector(elements)