1from abc import abstractmethod, ABCMeta
2from collections.abc import Sequence, Hashable
3from numbers import Integral
4import operator
5from typing import TypeVar, Generic
6
7from pyrsistent._transformations import transform
8
9T_co = TypeVar('T_co', covariant=True)
10
11
12def _bitcount(val):
13 return bin(val).count("1")
14
15BRANCH_FACTOR = 32
16BIT_MASK = BRANCH_FACTOR - 1
17SHIFT = _bitcount(BIT_MASK)
18
19
20def compare_pvector(v, other, operator):
21 return operator(v.tolist(), other.tolist() if isinstance(other, PVector) else other)
22
23
24def _index_or_slice(index, stop):
25 if stop is None:
26 return index
27
28 return slice(index, stop)
29
30
31class PythonPVector(object):
32 """
33 Support structure for PVector that implements structural sharing for vectors using a trie.
34 """
35 __slots__ = ('_count', '_shift', '_root', '_tail', '_tail_offset', '__weakref__')
36
37 def __new__(cls, count, shift, root, tail):
38 self = super(PythonPVector, cls).__new__(cls)
39 self._count = count
40 self._shift = shift
41 self._root = root
42 self._tail = tail
43
44 # Derived attribute stored for performance
45 self._tail_offset = self._count - len(self._tail)
46 return self
47
48 def __len__(self):
49 return self._count
50
51 def __getitem__(self, index):
52 if isinstance(index, slice):
53 # There are more conditions than the below where it would be OK to
54 # return ourselves, implement those...
55 if index.start is None and index.stop is None and index.step is None:
56 return self
57
58 # This is a bit nasty realizing the whole structure as a list before
59 # slicing it but it is the fastest way I've found to date, and it's easy :-)
60 return _EMPTY_PVECTOR.extend(self.tolist()[index])
61
62 if index < 0:
63 index += self._count
64
65 return PythonPVector._node_for(self, index)[index & BIT_MASK]
66
67 def __add__(self, other):
68 return self.extend(other)
69
70 def __repr__(self):
71 return 'pvector({0})'.format(str(self.tolist()))
72
73 def __str__(self):
74 return self.__repr__()
75
76 def __iter__(self):
77 # This is kind of lazy and will produce some memory overhead but it is the fasted method
78 # by far of those tried since it uses the speed of the built in python list directly.
79 return iter(self.tolist())
80
81 def __ne__(self, other):
82 return not self.__eq__(other)
83
84 def __eq__(self, other):
85 return self is other or (hasattr(other, '__len__') and self._count == len(other)) and compare_pvector(self, other, operator.eq)
86
87 def __gt__(self, other):
88 return compare_pvector(self, other, operator.gt)
89
90 def __lt__(self, other):
91 return compare_pvector(self, other, operator.lt)
92
93 def __ge__(self, other):
94 return compare_pvector(self, other, operator.ge)
95
96 def __le__(self, other):
97 return compare_pvector(self, other, operator.le)
98
99 def __mul__(self, times):
100 if times <= 0 or self is _EMPTY_PVECTOR:
101 return _EMPTY_PVECTOR
102
103 if times == 1:
104 return self
105
106 return _EMPTY_PVECTOR.extend(times * self.tolist())
107
108 __rmul__ = __mul__
109
110 def _fill_list(self, node, shift, the_list):
111 if shift:
112 shift -= SHIFT
113 for n in node:
114 self._fill_list(n, shift, the_list)
115 else:
116 the_list.extend(node)
117
118 def tolist(self):
119 """
120 The fastest way to convert the vector into a python list.
121 """
122 the_list = []
123 self._fill_list(self._root, self._shift, the_list)
124 the_list.extend(self._tail)
125 return the_list
126
127 def _totuple(self):
128 """
129 Returns the content as a python tuple.
130 """
131 return tuple(self.tolist())
132
133 def __hash__(self):
134 # Taking the easy way out again...
135 return hash(self._totuple())
136
137 def transform(self, *transformations):
138 return transform(self, transformations)
139
140 def __reduce__(self):
141 # Pickling support
142 return pvector, (self.tolist(),)
143
144 def mset(self, *args):
145 if len(args) % 2:
146 raise TypeError("mset expected an even number of arguments")
147
148 evolver = self.evolver()
149 for i in range(0, len(args), 2):
150 evolver[args[i]] = args[i+1]
151
152 return evolver.persistent()
153
154 class Evolver(object):
155 __slots__ = ('_count', '_shift', '_root', '_tail', '_tail_offset', '_dirty_nodes',
156 '_extra_tail', '_cached_leafs', '_orig_pvector')
157
158 def __init__(self, v):
159 self._reset(v)
160
161 def __getitem__(self, index):
162 if not isinstance(index, Integral):
163 raise TypeError("'%s' object cannot be interpreted as an index" % type(index).__name__)
164
165 if index < 0:
166 index += self._count + len(self._extra_tail)
167
168 if self._count <= index < self._count + len(self._extra_tail):
169 return self._extra_tail[index - self._count]
170
171 return PythonPVector._node_for(self, index)[index & BIT_MASK]
172
173 def _reset(self, v):
174 self._count = v._count
175 self._shift = v._shift
176 self._root = v._root
177 self._tail = v._tail
178 self._tail_offset = v._tail_offset
179 self._dirty_nodes = {}
180 self._cached_leafs = {}
181 self._extra_tail = []
182 self._orig_pvector = v
183
184 def append(self, element):
185 self._extra_tail.append(element)
186 return self
187
188 def extend(self, iterable):
189 self._extra_tail.extend(iterable)
190 return self
191
192 def set(self, index, val):
193 self[index] = val
194 return self
195
196 def __setitem__(self, index, val):
197 if not isinstance(index, Integral):
198 raise TypeError("'%s' object cannot be interpreted as an index" % type(index).__name__)
199
200 if index < 0:
201 index += self._count + len(self._extra_tail)
202
203 if 0 <= index < self._count:
204 node = self._cached_leafs.get(index >> SHIFT)
205 if node:
206 node[index & BIT_MASK] = val
207 elif index >= self._tail_offset:
208 if id(self._tail) not in self._dirty_nodes:
209 self._tail = list(self._tail)
210 self._dirty_nodes[id(self._tail)] = True
211 self._cached_leafs[index >> SHIFT] = self._tail
212 self._tail[index & BIT_MASK] = val
213 else:
214 self._root = self._do_set(self._shift, self._root, index, val)
215 elif self._count <= index < self._count + len(self._extra_tail):
216 self._extra_tail[index - self._count] = val
217 elif index == self._count + len(self._extra_tail):
218 self._extra_tail.append(val)
219 else:
220 raise IndexError("Index out of range: %s" % (index,))
221
222 def _do_set(self, level, node, i, val):
223 if id(node) in self._dirty_nodes:
224 ret = node
225 else:
226 ret = list(node)
227 self._dirty_nodes[id(ret)] = True
228
229 if level == 0:
230 ret[i & BIT_MASK] = val
231 self._cached_leafs[i >> SHIFT] = ret
232 else:
233 sub_index = (i >> level) & BIT_MASK # >>>
234 ret[sub_index] = self._do_set(level - SHIFT, node[sub_index], i, val)
235
236 return ret
237
238 def delete(self, index):
239 del self[index]
240 return self
241
242 def __delitem__(self, key):
243 if self._orig_pvector:
244 # All structural sharing bets are off, base evolver on _extra_tail only
245 l = PythonPVector(self._count, self._shift, self._root, self._tail).tolist()
246 l.extend(self._extra_tail)
247 self._reset(_EMPTY_PVECTOR)
248 self._extra_tail = l
249
250 del self._extra_tail[key]
251
252 def persistent(self):
253 result = self._orig_pvector
254 if self.is_dirty():
255 result = PythonPVector(self._count, self._shift, self._root, self._tail).extend(self._extra_tail)
256 self._reset(result)
257
258 return result
259
260 def __len__(self):
261 return self._count + len(self._extra_tail)
262
263 def is_dirty(self):
264 return bool(self._dirty_nodes or self._extra_tail)
265
266 def evolver(self):
267 return PythonPVector.Evolver(self)
268
269 def set(self, i, val):
270 # This method could be implemented by a call to mset() but doing so would cause
271 # a ~5 X performance penalty on PyPy (considered the primary platform for this implementation
272 # of PVector) so we're keeping this implementation for now.
273
274 if not isinstance(i, Integral):
275 raise TypeError("'%s' object cannot be interpreted as an index" % type(i).__name__)
276
277 if i < 0:
278 i += self._count
279
280 if 0 <= i < self._count:
281 if i >= self._tail_offset:
282 new_tail = list(self._tail)
283 new_tail[i & BIT_MASK] = val
284 return PythonPVector(self._count, self._shift, self._root, new_tail)
285
286 return PythonPVector(self._count, self._shift, self._do_set(self._shift, self._root, i, val), self._tail)
287
288 if i == self._count:
289 return self.append(val)
290
291 raise IndexError("Index out of range: %s" % (i,))
292
293 def _do_set(self, level, node, i, val):
294 ret = list(node)
295 if level == 0:
296 ret[i & BIT_MASK] = val
297 else:
298 sub_index = (i >> level) & BIT_MASK # >>>
299 ret[sub_index] = self._do_set(level - SHIFT, node[sub_index], i, val)
300
301 return ret
302
303 @staticmethod
304 def _node_for(pvector_like, i):
305 if 0 <= i < pvector_like._count:
306 if i >= pvector_like._tail_offset:
307 return pvector_like._tail
308
309 node = pvector_like._root
310 for level in range(pvector_like._shift, 0, -SHIFT):
311 node = node[(i >> level) & BIT_MASK] # >>>
312
313 return node
314
315 raise IndexError("Index out of range: %s" % (i,))
316
317 def _create_new_root(self):
318 new_shift = self._shift
319
320 # Overflow root?
321 if (self._count >> SHIFT) > (1 << self._shift): # >>>
322 new_root = [self._root, self._new_path(self._shift, self._tail)]
323 new_shift += SHIFT
324 else:
325 new_root = self._push_tail(self._shift, self._root, self._tail)
326
327 return new_root, new_shift
328
329 def append(self, val):
330 if len(self._tail) < BRANCH_FACTOR:
331 new_tail = list(self._tail)
332 new_tail.append(val)
333 return PythonPVector(self._count + 1, self._shift, self._root, new_tail)
334
335 # Full tail, push into tree
336 new_root, new_shift = self._create_new_root()
337 return PythonPVector(self._count + 1, new_shift, new_root, [val])
338
339 def _new_path(self, level, node):
340 if level == 0:
341 return node
342
343 return [self._new_path(level - SHIFT, node)]
344
345 def _mutating_insert_tail(self):
346 self._root, self._shift = self._create_new_root()
347 self._tail = []
348
349 def _mutating_fill_tail(self, offset, sequence):
350 max_delta_len = BRANCH_FACTOR - len(self._tail)
351 delta = sequence[offset:offset + max_delta_len]
352 self._tail.extend(delta)
353 delta_len = len(delta)
354 self._count += delta_len
355 return offset + delta_len
356
357 def _mutating_extend(self, sequence):
358 offset = 0
359 sequence_len = len(sequence)
360 while offset < sequence_len:
361 offset = self._mutating_fill_tail(offset, sequence)
362 if len(self._tail) == BRANCH_FACTOR:
363 self._mutating_insert_tail()
364
365 self._tail_offset = self._count - len(self._tail)
366
367 def extend(self, obj):
368 # Mutates the new vector directly for efficiency but that's only an
369 # implementation detail, once it is returned it should be considered immutable
370 l = obj.tolist() if isinstance(obj, PythonPVector) else list(obj)
371 if l:
372 new_vector = self.append(l[0])
373 new_vector._mutating_extend(l[1:])
374 return new_vector
375
376 return self
377
378 def _push_tail(self, level, parent, tail_node):
379 """
380 if parent is leaf, insert node,
381 else does it map to an existing child? ->
382 node_to_insert = push node one more level
383 else alloc new path
384
385 return node_to_insert placed in copy of parent
386 """
387 ret = list(parent)
388
389 if level == SHIFT:
390 ret.append(tail_node)
391 return ret
392
393 sub_index = ((self._count - 1) >> level) & BIT_MASK # >>>
394 if len(parent) > sub_index:
395 ret[sub_index] = self._push_tail(level - SHIFT, parent[sub_index], tail_node)
396 return ret
397
398 ret.append(self._new_path(level - SHIFT, tail_node))
399 return ret
400
401 def index(self, value, *args, **kwargs):
402 return self.tolist().index(value, *args, **kwargs)
403
404 def count(self, value):
405 return self.tolist().count(value)
406
407 def delete(self, index, stop=None):
408 l = self.tolist()
409 del l[_index_or_slice(index, stop)]
410 return _EMPTY_PVECTOR.extend(l)
411
412 def remove(self, value):
413 l = self.tolist()
414 l.remove(value)
415 return _EMPTY_PVECTOR.extend(l)
416
417class PVector(Generic[T_co],metaclass=ABCMeta):
418 """
419 Persistent vector implementation. Meant as a replacement for the cases where you would normally
420 use a Python list.
421
422 Do not instantiate directly, instead use the factory functions :py:func:`v` and :py:func:`pvector` to
423 create an instance.
424
425 Heavily influenced by the persistent vector available in Clojure. Initially this was more or
426 less just a port of the Java code for the Clojure vector. It has since been modified and to
427 some extent optimized for usage in Python.
428
429 The vector is organized as a trie, any mutating method will return a new vector that contains the changes. No
430 updates are done to the original vector. Structural sharing between vectors are applied where possible to save
431 space and to avoid making complete copies.
432
433 This structure corresponds most closely to the built in list type and is intended as a replacement. Where the
434 semantics are the same (more or less) the same function names have been used but for some cases it is not possible,
435 for example assignments.
436
437 The PVector implements the Sequence protocol and is Hashable.
438
439 Inserts are amortized O(1). Random access is log32(n) where n is the size of the vector.
440
441 The following are examples of some common operations on persistent vectors:
442
443 >>> p = v(1, 2, 3)
444 >>> p2 = p.append(4)
445 >>> p3 = p2.extend([5, 6, 7])
446 >>> p
447 pvector([1, 2, 3])
448 >>> p2
449 pvector([1, 2, 3, 4])
450 >>> p3
451 pvector([1, 2, 3, 4, 5, 6, 7])
452 >>> p3[5]
453 6
454 >>> p.set(1, 99)
455 pvector([1, 99, 3])
456 >>>
457 """
458
459 @abstractmethod
460 def __len__(self):
461 """
462 >>> len(v(1, 2, 3))
463 3
464 """
465
466 @abstractmethod
467 def __getitem__(self, index):
468 """
469 Get value at index. Full slicing support.
470
471 >>> v1 = v(5, 6, 7, 8)
472 >>> v1[2]
473 7
474 >>> v1[1:3]
475 pvector([6, 7])
476 """
477
478 @abstractmethod
479 def __add__(self, other):
480 """
481 >>> v1 = v(1, 2)
482 >>> v2 = v(3, 4)
483 >>> v1 + v2
484 pvector([1, 2, 3, 4])
485 """
486
487 @abstractmethod
488 def __mul__(self, times):
489 """
490 >>> v1 = v(1, 2)
491 >>> 3 * v1
492 pvector([1, 2, 1, 2, 1, 2])
493 """
494
495 @abstractmethod
496 def __hash__(self):
497 """
498 >>> v1 = v(1, 2, 3)
499 >>> v2 = v(1, 2, 3)
500 >>> hash(v1) == hash(v2)
501 True
502 """
503
504 @abstractmethod
505 def evolver(self):
506 """
507 Create a new evolver for this pvector. The evolver acts as a mutable view of the vector
508 with "transaction like" semantics. No part of the underlying vector i updated, it is still
509 fully immutable. Furthermore multiple evolvers created from the same pvector do not
510 interfere with each other.
511
512 You may want to use an evolver instead of working directly with the pvector in the
513 following cases:
514
515 * Multiple updates are done to the same vector and the intermediate results are of no
516 interest. In this case using an evolver may be a more efficient and easier to work with.
517 * You need to pass a vector into a legacy function or a function that you have no control
518 over which performs in place mutations of lists. In this case pass an evolver instance
519 instead and then create a new pvector from the evolver once the function returns.
520
521 The following example illustrates a typical workflow when working with evolvers. It also
522 displays most of the API (which i kept small by design, you should not be tempted to
523 use evolvers in excess ;-)).
524
525 Create the evolver and perform various mutating updates to it:
526
527 >>> v1 = v(1, 2, 3, 4, 5)
528 >>> e = v1.evolver()
529 >>> e[1] = 22
530 >>> _ = e.append(6)
531 >>> _ = e.extend([7, 8, 9])
532 >>> e[8] += 1
533 >>> len(e)
534 9
535
536 The underlying pvector remains the same:
537
538 >>> v1
539 pvector([1, 2, 3, 4, 5])
540
541 The changes are kept in the evolver. An updated pvector can be created using the
542 persistent() function on the evolver.
543
544 >>> v2 = e.persistent()
545 >>> v2
546 pvector([1, 22, 3, 4, 5, 6, 7, 8, 10])
547
548 The new pvector will share data with the original pvector in the same way that would have
549 been done if only using operations on the pvector.
550 """
551
552 @abstractmethod
553 def mset(self, *args):
554 """
555 Return a new vector with elements in specified positions replaced by values (multi set).
556
557 Elements on even positions in the argument list are interpreted as indexes while
558 elements on odd positions are considered values.
559
560 >>> v1 = v(1, 2, 3)
561 >>> v1.mset(0, 11, 2, 33)
562 pvector([11, 2, 33])
563 """
564
565 @abstractmethod
566 def set(self, i, val):
567 """
568 Return a new vector with element at position i replaced with val. The original vector remains unchanged.
569
570 Setting a value one step beyond the end of the vector is equal to appending. Setting beyond that will
571 result in an IndexError.
572
573 >>> v1 = v(1, 2, 3)
574 >>> v1.set(1, 4)
575 pvector([1, 4, 3])
576 >>> v1.set(3, 4)
577 pvector([1, 2, 3, 4])
578 >>> v1.set(-1, 4)
579 pvector([1, 2, 4])
580 """
581
582 @abstractmethod
583 def append(self, val):
584 """
585 Return a new vector with val appended.
586
587 >>> v1 = v(1, 2)
588 >>> v1.append(3)
589 pvector([1, 2, 3])
590 """
591
592 @abstractmethod
593 def extend(self, obj):
594 """
595 Return a new vector with all values in obj appended to it. Obj may be another
596 PVector or any other Iterable.
597
598 >>> v1 = v(1, 2, 3)
599 >>> v1.extend([4, 5])
600 pvector([1, 2, 3, 4, 5])
601 """
602
603 @abstractmethod
604 def index(self, value, *args, **kwargs):
605 """
606 Return first index of value. Additional indexes may be supplied to limit the search to a
607 sub range of the vector.
608
609 >>> v1 = v(1, 2, 3, 4, 3)
610 >>> v1.index(3)
611 2
612 >>> v1.index(3, 3, 5)
613 4
614 """
615
616 @abstractmethod
617 def count(self, value):
618 """
619 Return the number of times that value appears in the vector.
620
621 >>> v1 = v(1, 4, 3, 4)
622 >>> v1.count(4)
623 2
624 """
625
626 @abstractmethod
627 def transform(self, *transformations):
628 """
629 Transform arbitrarily complex combinations of PVectors and PMaps. A transformation
630 consists of two parts. One match expression that specifies which elements to transform
631 and one transformation function that performs the actual transformation.
632
633 >>> from pyrsistent import freeze, ny
634 >>> news_paper = freeze({'articles': [{'author': 'Sara', 'content': 'A short article'},
635 ... {'author': 'Steve', 'content': 'A slightly longer article'}],
636 ... 'weather': {'temperature': '11C', 'wind': '5m/s'}})
637 >>> short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:25] + '...' if len(c) > 25 else c)
638 >>> very_short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:15] + '...' if len(c) > 15 else c)
639 >>> very_short_news.articles[0].content
640 'A short article'
641 >>> very_short_news.articles[1].content
642 'A slightly long...'
643
644 When nothing has been transformed the original data structure is kept
645
646 >>> short_news is news_paper
647 True
648 >>> very_short_news is news_paper
649 False
650 >>> very_short_news.articles[0] is news_paper.articles[0]
651 True
652 """
653
654 @abstractmethod
655 def delete(self, index, stop=None):
656 """
657 Delete a portion of the vector by index or range.
658
659 >>> v1 = v(1, 2, 3, 4, 5)
660 >>> v1.delete(1)
661 pvector([1, 3, 4, 5])
662 >>> v1.delete(1, 3)
663 pvector([1, 4, 5])
664 """
665
666 @abstractmethod
667 def remove(self, value):
668 """
669 Remove the first occurrence of a value from the vector.
670
671 >>> v1 = v(1, 2, 3, 2, 1)
672 >>> v2 = v1.remove(1)
673 >>> v2
674 pvector([2, 3, 2, 1])
675 >>> v2.remove(1)
676 pvector([2, 3, 2])
677 """
678
679
680_EMPTY_PVECTOR = PythonPVector(0, SHIFT, [], [])
681PVector.register(PythonPVector)
682Sequence.register(PVector)
683Hashable.register(PVector)
684
685def python_pvector(iterable=()):
686 """
687 Create a new persistent vector containing the elements in iterable.
688
689 >>> v1 = pvector([1, 2, 3])
690 >>> v1
691 pvector([1, 2, 3])
692 """
693 return _EMPTY_PVECTOR.extend(iterable)
694
695try:
696 # Use the C extension as underlying trie implementation if it is available
697 import os
698 if os.environ.get('PYRSISTENT_NO_C_EXTENSION'):
699 pvector = python_pvector
700 else:
701 from pvectorc import pvector
702 PVector.register(type(pvector()))
703except ImportError:
704 pvector = python_pvector
705
706
707def v(*elements):
708 """
709 Create a new persistent vector containing all parameters to this function.
710
711 >>> v1 = v(1, 2, 3)
712 >>> v1
713 pvector([1, 2, 3])
714 """
715 return pvector(elements)