Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pyrsistent/_pvector.py: 59%
318 statements
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
« prev ^ index » next coverage.py v7.2.7, created at 2023-07-01 06:54 +0000
1from abc import abstractmethod, ABCMeta
2from collections.abc import Sequence, Hashable
3from numbers import Integral
4import operator
5from pyrsistent._transformations import transform
8def _bitcount(val):
9 return bin(val).count("1")
11BRANCH_FACTOR = 32
12BIT_MASK = BRANCH_FACTOR - 1
13SHIFT = _bitcount(BIT_MASK)
16def compare_pvector(v, other, operator):
17 return operator(v.tolist(), other.tolist() if isinstance(other, PVector) else other)
20def _index_or_slice(index, stop):
21 if stop is None:
22 return index
24 return slice(index, stop)
27class PythonPVector(object):
28 """
29 Support structure for PVector that implements structural sharing for vectors using a trie.
30 """
31 __slots__ = ('_count', '_shift', '_root', '_tail', '_tail_offset', '__weakref__')
33 def __new__(cls, count, shift, root, tail):
34 self = super(PythonPVector, cls).__new__(cls)
35 self._count = count
36 self._shift = shift
37 self._root = root
38 self._tail = tail
40 # Derived attribute stored for performance
41 self._tail_offset = self._count - len(self._tail)
42 return self
44 def __len__(self):
45 return self._count
47 def __getitem__(self, index):
48 if isinstance(index, slice):
49 # There are more conditions than the below where it would be OK to
50 # return ourselves, implement those...
51 if index.start is None and index.stop is None and index.step is None:
52 return self
54 # This is a bit nasty realizing the whole structure as a list before
55 # slicing it but it is the fastest way I've found to date, and it's easy :-)
56 return _EMPTY_PVECTOR.extend(self.tolist()[index])
58 if index < 0:
59 index += self._count
61 return PythonPVector._node_for(self, index)[index & BIT_MASK]
63 def __add__(self, other):
64 return self.extend(other)
66 def __repr__(self):
67 return 'pvector({0})'.format(str(self.tolist()))
69 def __str__(self):
70 return self.__repr__()
72 def __iter__(self):
73 # This is kind of lazy and will produce some memory overhead but it is the fasted method
74 # by far of those tried since it uses the speed of the built in python list directly.
75 return iter(self.tolist())
77 def __ne__(self, other):
78 return not self.__eq__(other)
80 def __eq__(self, other):
81 return self is other or (hasattr(other, '__len__') and self._count == len(other)) and compare_pvector(self, other, operator.eq)
83 def __gt__(self, other):
84 return compare_pvector(self, other, operator.gt)
86 def __lt__(self, other):
87 return compare_pvector(self, other, operator.lt)
89 def __ge__(self, other):
90 return compare_pvector(self, other, operator.ge)
92 def __le__(self, other):
93 return compare_pvector(self, other, operator.le)
95 def __mul__(self, times):
96 if times <= 0 or self is _EMPTY_PVECTOR:
97 return _EMPTY_PVECTOR
99 if times == 1:
100 return self
102 return _EMPTY_PVECTOR.extend(times * self.tolist())
104 __rmul__ = __mul__
106 def _fill_list(self, node, shift, the_list):
107 if shift:
108 shift -= SHIFT
109 for n in node:
110 self._fill_list(n, shift, the_list)
111 else:
112 the_list.extend(node)
114 def tolist(self):
115 """
116 The fastest way to convert the vector into a python list.
117 """
118 the_list = []
119 self._fill_list(self._root, self._shift, the_list)
120 the_list.extend(self._tail)
121 return the_list
123 def _totuple(self):
124 """
125 Returns the content as a python tuple.
126 """
127 return tuple(self.tolist())
129 def __hash__(self):
130 # Taking the easy way out again...
131 return hash(self._totuple())
133 def transform(self, *transformations):
134 return transform(self, transformations)
136 def __reduce__(self):
137 # Pickling support
138 return pvector, (self.tolist(),)
140 def mset(self, *args):
141 if len(args) % 2:
142 raise TypeError("mset expected an even number of arguments")
144 evolver = self.evolver()
145 for i in range(0, len(args), 2):
146 evolver[args[i]] = args[i+1]
148 return evolver.persistent()
150 class Evolver(object):
151 __slots__ = ('_count', '_shift', '_root', '_tail', '_tail_offset', '_dirty_nodes',
152 '_extra_tail', '_cached_leafs', '_orig_pvector')
154 def __init__(self, v):
155 self._reset(v)
157 def __getitem__(self, index):
158 if not isinstance(index, Integral):
159 raise TypeError("'%s' object cannot be interpreted as an index" % type(index).__name__)
161 if index < 0:
162 index += self._count + len(self._extra_tail)
164 if self._count <= index < self._count + len(self._extra_tail):
165 return self._extra_tail[index - self._count]
167 return PythonPVector._node_for(self, index)[index & BIT_MASK]
169 def _reset(self, v):
170 self._count = v._count
171 self._shift = v._shift
172 self._root = v._root
173 self._tail = v._tail
174 self._tail_offset = v._tail_offset
175 self._dirty_nodes = {}
176 self._cached_leafs = {}
177 self._extra_tail = []
178 self._orig_pvector = v
180 def append(self, element):
181 self._extra_tail.append(element)
182 return self
184 def extend(self, iterable):
185 self._extra_tail.extend(iterable)
186 return self
188 def set(self, index, val):
189 self[index] = val
190 return self
192 def __setitem__(self, index, val):
193 if not isinstance(index, Integral):
194 raise TypeError("'%s' object cannot be interpreted as an index" % type(index).__name__)
196 if index < 0:
197 index += self._count + len(self._extra_tail)
199 if 0 <= index < self._count:
200 node = self._cached_leafs.get(index >> SHIFT)
201 if node:
202 node[index & BIT_MASK] = val
203 elif index >= self._tail_offset:
204 if id(self._tail) not in self._dirty_nodes:
205 self._tail = list(self._tail)
206 self._dirty_nodes[id(self._tail)] = True
207 self._cached_leafs[index >> SHIFT] = self._tail
208 self._tail[index & BIT_MASK] = val
209 else:
210 self._root = self._do_set(self._shift, self._root, index, val)
211 elif self._count <= index < self._count + len(self._extra_tail):
212 self._extra_tail[index - self._count] = val
213 elif index == self._count + len(self._extra_tail):
214 self._extra_tail.append(val)
215 else:
216 raise IndexError("Index out of range: %s" % (index,))
218 def _do_set(self, level, node, i, val):
219 if id(node) in self._dirty_nodes:
220 ret = node
221 else:
222 ret = list(node)
223 self._dirty_nodes[id(ret)] = True
225 if level == 0:
226 ret[i & BIT_MASK] = val
227 self._cached_leafs[i >> SHIFT] = ret
228 else:
229 sub_index = (i >> level) & BIT_MASK # >>>
230 ret[sub_index] = self._do_set(level - SHIFT, node[sub_index], i, val)
232 return ret
234 def delete(self, index):
235 del self[index]
236 return self
238 def __delitem__(self, key):
239 if self._orig_pvector:
240 # All structural sharing bets are off, base evolver on _extra_tail only
241 l = PythonPVector(self._count, self._shift, self._root, self._tail).tolist()
242 l.extend(self._extra_tail)
243 self._reset(_EMPTY_PVECTOR)
244 self._extra_tail = l
246 del self._extra_tail[key]
248 def persistent(self):
249 result = self._orig_pvector
250 if self.is_dirty():
251 result = PythonPVector(self._count, self._shift, self._root, self._tail).extend(self._extra_tail)
252 self._reset(result)
254 return result
256 def __len__(self):
257 return self._count + len(self._extra_tail)
259 def is_dirty(self):
260 return bool(self._dirty_nodes or self._extra_tail)
262 def evolver(self):
263 return PythonPVector.Evolver(self)
265 def set(self, i, val):
266 # This method could be implemented by a call to mset() but doing so would cause
267 # a ~5 X performance penalty on PyPy (considered the primary platform for this implementation
268 # of PVector) so we're keeping this implementation for now.
270 if not isinstance(i, Integral):
271 raise TypeError("'%s' object cannot be interpreted as an index" % type(i).__name__)
273 if i < 0:
274 i += self._count
276 if 0 <= i < self._count:
277 if i >= self._tail_offset:
278 new_tail = list(self._tail)
279 new_tail[i & BIT_MASK] = val
280 return PythonPVector(self._count, self._shift, self._root, new_tail)
282 return PythonPVector(self._count, self._shift, self._do_set(self._shift, self._root, i, val), self._tail)
284 if i == self._count:
285 return self.append(val)
287 raise IndexError("Index out of range: %s" % (i,))
289 def _do_set(self, level, node, i, val):
290 ret = list(node)
291 if level == 0:
292 ret[i & BIT_MASK] = val
293 else:
294 sub_index = (i >> level) & BIT_MASK # >>>
295 ret[sub_index] = self._do_set(level - SHIFT, node[sub_index], i, val)
297 return ret
299 @staticmethod
300 def _node_for(pvector_like, i):
301 if 0 <= i < pvector_like._count:
302 if i >= pvector_like._tail_offset:
303 return pvector_like._tail
305 node = pvector_like._root
306 for level in range(pvector_like._shift, 0, -SHIFT):
307 node = node[(i >> level) & BIT_MASK] # >>>
309 return node
311 raise IndexError("Index out of range: %s" % (i,))
313 def _create_new_root(self):
314 new_shift = self._shift
316 # Overflow root?
317 if (self._count >> SHIFT) > (1 << self._shift): # >>>
318 new_root = [self._root, self._new_path(self._shift, self._tail)]
319 new_shift += SHIFT
320 else:
321 new_root = self._push_tail(self._shift, self._root, self._tail)
323 return new_root, new_shift
325 def append(self, val):
326 if len(self._tail) < BRANCH_FACTOR:
327 new_tail = list(self._tail)
328 new_tail.append(val)
329 return PythonPVector(self._count + 1, self._shift, self._root, new_tail)
331 # Full tail, push into tree
332 new_root, new_shift = self._create_new_root()
333 return PythonPVector(self._count + 1, new_shift, new_root, [val])
335 def _new_path(self, level, node):
336 if level == 0:
337 return node
339 return [self._new_path(level - SHIFT, node)]
341 def _mutating_insert_tail(self):
342 self._root, self._shift = self._create_new_root()
343 self._tail = []
345 def _mutating_fill_tail(self, offset, sequence):
346 max_delta_len = BRANCH_FACTOR - len(self._tail)
347 delta = sequence[offset:offset + max_delta_len]
348 self._tail.extend(delta)
349 delta_len = len(delta)
350 self._count += delta_len
351 return offset + delta_len
353 def _mutating_extend(self, sequence):
354 offset = 0
355 sequence_len = len(sequence)
356 while offset < sequence_len:
357 offset = self._mutating_fill_tail(offset, sequence)
358 if len(self._tail) == BRANCH_FACTOR:
359 self._mutating_insert_tail()
361 self._tail_offset = self._count - len(self._tail)
363 def extend(self, obj):
364 # Mutates the new vector directly for efficiency but that's only an
365 # implementation detail, once it is returned it should be considered immutable
366 l = obj.tolist() if isinstance(obj, PythonPVector) else list(obj)
367 if l:
368 new_vector = self.append(l[0])
369 new_vector._mutating_extend(l[1:])
370 return new_vector
372 return self
374 def _push_tail(self, level, parent, tail_node):
375 """
376 if parent is leaf, insert node,
377 else does it map to an existing child? ->
378 node_to_insert = push node one more level
379 else alloc new path
381 return node_to_insert placed in copy of parent
382 """
383 ret = list(parent)
385 if level == SHIFT:
386 ret.append(tail_node)
387 return ret
389 sub_index = ((self._count - 1) >> level) & BIT_MASK # >>>
390 if len(parent) > sub_index:
391 ret[sub_index] = self._push_tail(level - SHIFT, parent[sub_index], tail_node)
392 return ret
394 ret.append(self._new_path(level - SHIFT, tail_node))
395 return ret
397 def index(self, value, *args, **kwargs):
398 return self.tolist().index(value, *args, **kwargs)
400 def count(self, value):
401 return self.tolist().count(value)
403 def delete(self, index, stop=None):
404 l = self.tolist()
405 del l[_index_or_slice(index, stop)]
406 return _EMPTY_PVECTOR.extend(l)
408 def remove(self, value):
409 l = self.tolist()
410 l.remove(value)
411 return _EMPTY_PVECTOR.extend(l)
413class PVector(metaclass=ABCMeta):
414 """
415 Persistent vector implementation. Meant as a replacement for the cases where you would normally
416 use a Python list.
418 Do not instantiate directly, instead use the factory functions :py:func:`v` and :py:func:`pvector` to
419 create an instance.
421 Heavily influenced by the persistent vector available in Clojure. Initially this was more or
422 less just a port of the Java code for the Clojure vector. It has since been modified and to
423 some extent optimized for usage in Python.
425 The vector is organized as a trie, any mutating method will return a new vector that contains the changes. No
426 updates are done to the original vector. Structural sharing between vectors are applied where possible to save
427 space and to avoid making complete copies.
429 This structure corresponds most closely to the built in list type and is intended as a replacement. Where the
430 semantics are the same (more or less) the same function names have been used but for some cases it is not possible,
431 for example assignments.
433 The PVector implements the Sequence protocol and is Hashable.
435 Inserts are amortized O(1). Random access is log32(n) where n is the size of the vector.
437 The following are examples of some common operations on persistent vectors:
439 >>> p = v(1, 2, 3)
440 >>> p2 = p.append(4)
441 >>> p3 = p2.extend([5, 6, 7])
442 >>> p
443 pvector([1, 2, 3])
444 >>> p2
445 pvector([1, 2, 3, 4])
446 >>> p3
447 pvector([1, 2, 3, 4, 5, 6, 7])
448 >>> p3[5]
449 6
450 >>> p.set(1, 99)
451 pvector([1, 99, 3])
452 >>>
453 """
455 @abstractmethod
456 def __len__(self):
457 """
458 >>> len(v(1, 2, 3))
459 3
460 """
462 @abstractmethod
463 def __getitem__(self, index):
464 """
465 Get value at index. Full slicing support.
467 >>> v1 = v(5, 6, 7, 8)
468 >>> v1[2]
469 7
470 >>> v1[1:3]
471 pvector([6, 7])
472 """
474 @abstractmethod
475 def __add__(self, other):
476 """
477 >>> v1 = v(1, 2)
478 >>> v2 = v(3, 4)
479 >>> v1 + v2
480 pvector([1, 2, 3, 4])
481 """
483 @abstractmethod
484 def __mul__(self, times):
485 """
486 >>> v1 = v(1, 2)
487 >>> 3 * v1
488 pvector([1, 2, 1, 2, 1, 2])
489 """
491 @abstractmethod
492 def __hash__(self):
493 """
494 >>> v1 = v(1, 2, 3)
495 >>> v2 = v(1, 2, 3)
496 >>> hash(v1) == hash(v2)
497 True
498 """
500 @abstractmethod
501 def evolver(self):
502 """
503 Create a new evolver for this pvector. The evolver acts as a mutable view of the vector
504 with "transaction like" semantics. No part of the underlying vector i updated, it is still
505 fully immutable. Furthermore multiple evolvers created from the same pvector do not
506 interfere with each other.
508 You may want to use an evolver instead of working directly with the pvector in the
509 following cases:
511 * Multiple updates are done to the same vector and the intermediate results are of no
512 interest. In this case using an evolver may be a more efficient and easier to work with.
513 * You need to pass a vector into a legacy function or a function that you have no control
514 over which performs in place mutations of lists. In this case pass an evolver instance
515 instead and then create a new pvector from the evolver once the function returns.
517 The following example illustrates a typical workflow when working with evolvers. It also
518 displays most of the API (which i kept small by design, you should not be tempted to
519 use evolvers in excess ;-)).
521 Create the evolver and perform various mutating updates to it:
523 >>> v1 = v(1, 2, 3, 4, 5)
524 >>> e = v1.evolver()
525 >>> e[1] = 22
526 >>> _ = e.append(6)
527 >>> _ = e.extend([7, 8, 9])
528 >>> e[8] += 1
529 >>> len(e)
530 9
532 The underlying pvector remains the same:
534 >>> v1
535 pvector([1, 2, 3, 4, 5])
537 The changes are kept in the evolver. An updated pvector can be created using the
538 persistent() function on the evolver.
540 >>> v2 = e.persistent()
541 >>> v2
542 pvector([1, 22, 3, 4, 5, 6, 7, 8, 10])
544 The new pvector will share data with the original pvector in the same way that would have
545 been done if only using operations on the pvector.
546 """
548 @abstractmethod
549 def mset(self, *args):
550 """
551 Return a new vector with elements in specified positions replaced by values (multi set).
553 Elements on even positions in the argument list are interpreted as indexes while
554 elements on odd positions are considered values.
556 >>> v1 = v(1, 2, 3)
557 >>> v1.mset(0, 11, 2, 33)
558 pvector([11, 2, 33])
559 """
561 @abstractmethod
562 def set(self, i, val):
563 """
564 Return a new vector with element at position i replaced with val. The original vector remains unchanged.
566 Setting a value one step beyond the end of the vector is equal to appending. Setting beyond that will
567 result in an IndexError.
569 >>> v1 = v(1, 2, 3)
570 >>> v1.set(1, 4)
571 pvector([1, 4, 3])
572 >>> v1.set(3, 4)
573 pvector([1, 2, 3, 4])
574 >>> v1.set(-1, 4)
575 pvector([1, 2, 4])
576 """
578 @abstractmethod
579 def append(self, val):
580 """
581 Return a new vector with val appended.
583 >>> v1 = v(1, 2)
584 >>> v1.append(3)
585 pvector([1, 2, 3])
586 """
588 @abstractmethod
589 def extend(self, obj):
590 """
591 Return a new vector with all values in obj appended to it. Obj may be another
592 PVector or any other Iterable.
594 >>> v1 = v(1, 2, 3)
595 >>> v1.extend([4, 5])
596 pvector([1, 2, 3, 4, 5])
597 """
599 @abstractmethod
600 def index(self, value, *args, **kwargs):
601 """
602 Return first index of value. Additional indexes may be supplied to limit the search to a
603 sub range of the vector.
605 >>> v1 = v(1, 2, 3, 4, 3)
606 >>> v1.index(3)
607 2
608 >>> v1.index(3, 3, 5)
609 4
610 """
612 @abstractmethod
613 def count(self, value):
614 """
615 Return the number of times that value appears in the vector.
617 >>> v1 = v(1, 4, 3, 4)
618 >>> v1.count(4)
619 2
620 """
622 @abstractmethod
623 def transform(self, *transformations):
624 """
625 Transform arbitrarily complex combinations of PVectors and PMaps. A transformation
626 consists of two parts. One match expression that specifies which elements to transform
627 and one transformation function that performs the actual transformation.
629 >>> from pyrsistent import freeze, ny
630 >>> news_paper = freeze({'articles': [{'author': 'Sara', 'content': 'A short article'},
631 ... {'author': 'Steve', 'content': 'A slightly longer article'}],
632 ... 'weather': {'temperature': '11C', 'wind': '5m/s'}})
633 >>> short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:25] + '...' if len(c) > 25 else c)
634 >>> very_short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:15] + '...' if len(c) > 15 else c)
635 >>> very_short_news.articles[0].content
636 'A short article'
637 >>> very_short_news.articles[1].content
638 'A slightly long...'
640 When nothing has been transformed the original data structure is kept
642 >>> short_news is news_paper
643 True
644 >>> very_short_news is news_paper
645 False
646 >>> very_short_news.articles[0] is news_paper.articles[0]
647 True
648 """
650 @abstractmethod
651 def delete(self, index, stop=None):
652 """
653 Delete a portion of the vector by index or range.
655 >>> v1 = v(1, 2, 3, 4, 5)
656 >>> v1.delete(1)
657 pvector([1, 3, 4, 5])
658 >>> v1.delete(1, 3)
659 pvector([1, 4, 5])
660 """
662 @abstractmethod
663 def remove(self, value):
664 """
665 Remove the first occurrence of a value from the vector.
667 >>> v1 = v(1, 2, 3, 2, 1)
668 >>> v2 = v1.remove(1)
669 >>> v2
670 pvector([2, 3, 2, 1])
671 >>> v2.remove(1)
672 pvector([2, 3, 2])
673 """
676_EMPTY_PVECTOR = PythonPVector(0, SHIFT, [], [])
677PVector.register(PythonPVector)
678Sequence.register(PVector)
679Hashable.register(PVector)
681def python_pvector(iterable=()):
682 """
683 Create a new persistent vector containing the elements in iterable.
685 >>> v1 = pvector([1, 2, 3])
686 >>> v1
687 pvector([1, 2, 3])
688 """
689 return _EMPTY_PVECTOR.extend(iterable)
691try:
692 # Use the C extension as underlying trie implementation if it is available
693 import os
694 if os.environ.get('PYRSISTENT_NO_C_EXTENSION'):
695 pvector = python_pvector
696 else:
697 from pvectorc import pvector
698 PVector.register(type(pvector()))
699except ImportError:
700 pvector = python_pvector
703def v(*elements):
704 """
705 Create a new persistent vector containing all parameters to this function.
707 >>> v1 = v(1, 2, 3)
708 >>> v1
709 pvector([1, 2, 3])
710 """
711 return pvector(elements)