Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.11/site-packages/pyrsistent/_pvector.py: 62%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1from abc import abstractmethod, ABCMeta
2from collections.abc import Sequence, Hashable
3from numbers import Integral
4import operator
5from typing import TypeVar, Generic
7from pyrsistent._transformations import transform
9T_co = TypeVar('T_co', covariant=True)
12def _bitcount(val):
13 return bin(val).count("1")
15BRANCH_FACTOR = 32
16BIT_MASK = BRANCH_FACTOR - 1
17SHIFT = _bitcount(BIT_MASK)
20def compare_pvector(v, other, operator):
21 return operator(v.tolist(), other.tolist() if isinstance(other, PVector) else other)
24def _index_or_slice(index, stop):
25 if stop is None:
26 return index
28 return slice(index, stop)
31class PythonPVector(object):
32 """
33 Support structure for PVector that implements structural sharing for vectors using a trie.
34 """
35 __slots__ = ('_count', '_shift', '_root', '_tail', '_tail_offset', '__weakref__')
37 def __new__(cls, count, shift, root, tail):
38 self = super(PythonPVector, cls).__new__(cls)
39 self._count = count
40 self._shift = shift
41 self._root = root
42 self._tail = tail
44 # Derived attribute stored for performance
45 self._tail_offset = self._count - len(self._tail)
46 return self
48 def __len__(self):
49 return self._count
51 def __getitem__(self, index):
52 if isinstance(index, slice):
53 # There are more conditions than the below where it would be OK to
54 # return ourselves, implement those...
55 if index.start is None and index.stop is None and index.step is None:
56 return self
58 # This is a bit nasty realizing the whole structure as a list before
59 # slicing it but it is the fastest way I've found to date, and it's easy :-)
60 return _EMPTY_PVECTOR.extend(self.tolist()[index])
62 if index < 0:
63 index += self._count
65 return PythonPVector._node_for(self, index)[index & BIT_MASK]
67 def __add__(self, other):
68 return self.extend(other)
70 def __repr__(self):
71 return 'pvector({0})'.format(str(self.tolist()))
73 def __str__(self):
74 return self.__repr__()
76 def __iter__(self):
77 # This is kind of lazy and will produce some memory overhead but it is the fasted method
78 # by far of those tried since it uses the speed of the built in python list directly.
79 return iter(self.tolist())
81 def __ne__(self, other):
82 return not self.__eq__(other)
84 def __eq__(self, other):
85 return self is other or (hasattr(other, '__len__') and self._count == len(other)) and compare_pvector(self, other, operator.eq)
87 def __gt__(self, other):
88 return compare_pvector(self, other, operator.gt)
90 def __lt__(self, other):
91 return compare_pvector(self, other, operator.lt)
93 def __ge__(self, other):
94 return compare_pvector(self, other, operator.ge)
96 def __le__(self, other):
97 return compare_pvector(self, other, operator.le)
99 def __mul__(self, times):
100 if times <= 0 or self is _EMPTY_PVECTOR:
101 return _EMPTY_PVECTOR
103 if times == 1:
104 return self
106 return _EMPTY_PVECTOR.extend(times * self.tolist())
108 __rmul__ = __mul__
110 def _fill_list(self, node, shift, the_list):
111 if shift:
112 shift -= SHIFT
113 for n in node:
114 self._fill_list(n, shift, the_list)
115 else:
116 the_list.extend(node)
118 def tolist(self):
119 """
120 The fastest way to convert the vector into a python list.
121 """
122 the_list = []
123 self._fill_list(self._root, self._shift, the_list)
124 the_list.extend(self._tail)
125 return the_list
127 def _totuple(self):
128 """
129 Returns the content as a python tuple.
130 """
131 return tuple(self.tolist())
133 def __hash__(self):
134 # Taking the easy way out again...
135 return hash(self._totuple())
137 def transform(self, *transformations):
138 return transform(self, transformations)
140 def __reduce__(self):
141 # Pickling support
142 return pvector, (self.tolist(),)
144 def mset(self, *args):
145 if len(args) % 2:
146 raise TypeError("mset expected an even number of arguments")
148 evolver = self.evolver()
149 for i in range(0, len(args), 2):
150 evolver[args[i]] = args[i+1]
152 return evolver.persistent()
154 class Evolver(object):
155 __slots__ = ('_count', '_shift', '_root', '_tail', '_tail_offset', '_dirty_nodes',
156 '_extra_tail', '_cached_leafs', '_orig_pvector')
158 def __init__(self, v):
159 self._reset(v)
161 def __getitem__(self, index):
162 if not isinstance(index, Integral):
163 raise TypeError("'%s' object cannot be interpreted as an index" % type(index).__name__)
165 if index < 0:
166 index += self._count + len(self._extra_tail)
168 if self._count <= index < self._count + len(self._extra_tail):
169 return self._extra_tail[index - self._count]
171 return PythonPVector._node_for(self, index)[index & BIT_MASK]
173 def _reset(self, v):
174 self._count = v._count
175 self._shift = v._shift
176 self._root = v._root
177 self._tail = v._tail
178 self._tail_offset = v._tail_offset
179 self._dirty_nodes = {}
180 self._cached_leafs = {}
181 self._extra_tail = []
182 self._orig_pvector = v
184 def append(self, element):
185 self._extra_tail.append(element)
186 return self
188 def extend(self, iterable):
189 self._extra_tail.extend(iterable)
190 return self
192 def set(self, index, val):
193 self[index] = val
194 return self
196 def __setitem__(self, index, val):
197 if not isinstance(index, Integral):
198 raise TypeError("'%s' object cannot be interpreted as an index" % type(index).__name__)
200 if index < 0:
201 index += self._count + len(self._extra_tail)
203 if 0 <= index < self._count:
204 node = self._cached_leafs.get(index >> SHIFT)
205 if node:
206 node[index & BIT_MASK] = val
207 elif index >= self._tail_offset:
208 if id(self._tail) not in self._dirty_nodes:
209 self._tail = list(self._tail)
210 self._dirty_nodes[id(self._tail)] = True
211 self._cached_leafs[index >> SHIFT] = self._tail
212 self._tail[index & BIT_MASK] = val
213 else:
214 self._root = self._do_set(self._shift, self._root, index, val)
215 elif self._count <= index < self._count + len(self._extra_tail):
216 self._extra_tail[index - self._count] = val
217 elif index == self._count + len(self._extra_tail):
218 self._extra_tail.append(val)
219 else:
220 raise IndexError("Index out of range: %s" % (index,))
222 def _do_set(self, level, node, i, val):
223 if id(node) in self._dirty_nodes:
224 ret = node
225 else:
226 ret = list(node)
227 self._dirty_nodes[id(ret)] = True
229 if level == 0:
230 ret[i & BIT_MASK] = val
231 self._cached_leafs[i >> SHIFT] = ret
232 else:
233 sub_index = (i >> level) & BIT_MASK # >>>
234 ret[sub_index] = self._do_set(level - SHIFT, node[sub_index], i, val)
236 return ret
238 def delete(self, index):
239 del self[index]
240 return self
242 def __delitem__(self, key):
243 if self._orig_pvector:
244 # All structural sharing bets are off, base evolver on _extra_tail only
245 l = PythonPVector(self._count, self._shift, self._root, self._tail).tolist()
246 l.extend(self._extra_tail)
247 self._reset(_EMPTY_PVECTOR)
248 self._extra_tail = l
250 del self._extra_tail[key]
252 def persistent(self):
253 result = self._orig_pvector
254 if self.is_dirty():
255 result = PythonPVector(self._count, self._shift, self._root, self._tail).extend(self._extra_tail)
256 self._reset(result)
258 return result
260 def __len__(self):
261 return self._count + len(self._extra_tail)
263 def is_dirty(self):
264 return bool(self._dirty_nodes or self._extra_tail)
266 def evolver(self):
267 return PythonPVector.Evolver(self)
269 def set(self, i, val):
270 # This method could be implemented by a call to mset() but doing so would cause
271 # a ~5 X performance penalty on PyPy (considered the primary platform for this implementation
272 # of PVector) so we're keeping this implementation for now.
274 if not isinstance(i, Integral):
275 raise TypeError("'%s' object cannot be interpreted as an index" % type(i).__name__)
277 if i < 0:
278 i += self._count
280 if 0 <= i < self._count:
281 if i >= self._tail_offset:
282 new_tail = list(self._tail)
283 new_tail[i & BIT_MASK] = val
284 return PythonPVector(self._count, self._shift, self._root, new_tail)
286 return PythonPVector(self._count, self._shift, self._do_set(self._shift, self._root, i, val), self._tail)
288 if i == self._count:
289 return self.append(val)
291 raise IndexError("Index out of range: %s" % (i,))
293 def _do_set(self, level, node, i, val):
294 ret = list(node)
295 if level == 0:
296 ret[i & BIT_MASK] = val
297 else:
298 sub_index = (i >> level) & BIT_MASK # >>>
299 ret[sub_index] = self._do_set(level - SHIFT, node[sub_index], i, val)
301 return ret
303 @staticmethod
304 def _node_for(pvector_like, i):
305 if 0 <= i < pvector_like._count:
306 if i >= pvector_like._tail_offset:
307 return pvector_like._tail
309 node = pvector_like._root
310 for level in range(pvector_like._shift, 0, -SHIFT):
311 node = node[(i >> level) & BIT_MASK] # >>>
313 return node
315 raise IndexError("Index out of range: %s" % (i,))
317 def _create_new_root(self):
318 new_shift = self._shift
320 # Overflow root?
321 if (self._count >> SHIFT) > (1 << self._shift): # >>>
322 new_root = [self._root, self._new_path(self._shift, self._tail)]
323 new_shift += SHIFT
324 else:
325 new_root = self._push_tail(self._shift, self._root, self._tail)
327 return new_root, new_shift
329 def append(self, val):
330 if len(self._tail) < BRANCH_FACTOR:
331 new_tail = list(self._tail)
332 new_tail.append(val)
333 return PythonPVector(self._count + 1, self._shift, self._root, new_tail)
335 # Full tail, push into tree
336 new_root, new_shift = self._create_new_root()
337 return PythonPVector(self._count + 1, new_shift, new_root, [val])
339 def _new_path(self, level, node):
340 if level == 0:
341 return node
343 return [self._new_path(level - SHIFT, node)]
345 def _mutating_insert_tail(self):
346 self._root, self._shift = self._create_new_root()
347 self._tail = []
349 def _mutating_fill_tail(self, offset, sequence):
350 max_delta_len = BRANCH_FACTOR - len(self._tail)
351 delta = sequence[offset:offset + max_delta_len]
352 self._tail.extend(delta)
353 delta_len = len(delta)
354 self._count += delta_len
355 return offset + delta_len
357 def _mutating_extend(self, sequence):
358 offset = 0
359 sequence_len = len(sequence)
360 while offset < sequence_len:
361 offset = self._mutating_fill_tail(offset, sequence)
362 if len(self._tail) == BRANCH_FACTOR:
363 self._mutating_insert_tail()
365 self._tail_offset = self._count - len(self._tail)
367 def extend(self, obj):
368 # Mutates the new vector directly for efficiency but that's only an
369 # implementation detail, once it is returned it should be considered immutable
370 l = obj.tolist() if isinstance(obj, PythonPVector) else list(obj)
371 if l:
372 new_vector = self.append(l[0])
373 new_vector._mutating_extend(l[1:])
374 return new_vector
376 return self
378 def _push_tail(self, level, parent, tail_node):
379 """
380 if parent is leaf, insert node,
381 else does it map to an existing child? ->
382 node_to_insert = push node one more level
383 else alloc new path
385 return node_to_insert placed in copy of parent
386 """
387 ret = list(parent)
389 if level == SHIFT:
390 ret.append(tail_node)
391 return ret
393 sub_index = ((self._count - 1) >> level) & BIT_MASK # >>>
394 if len(parent) > sub_index:
395 ret[sub_index] = self._push_tail(level - SHIFT, parent[sub_index], tail_node)
396 return ret
398 ret.append(self._new_path(level - SHIFT, tail_node))
399 return ret
401 def index(self, value, *args, **kwargs):
402 return self.tolist().index(value, *args, **kwargs)
404 def count(self, value):
405 return self.tolist().count(value)
407 def delete(self, index, stop=None):
408 l = self.tolist()
409 del l[_index_or_slice(index, stop)]
410 return _EMPTY_PVECTOR.extend(l)
412 def remove(self, value):
413 l = self.tolist()
414 l.remove(value)
415 return _EMPTY_PVECTOR.extend(l)
417class PVector(Generic[T_co],metaclass=ABCMeta):
418 """
419 Persistent vector implementation. Meant as a replacement for the cases where you would normally
420 use a Python list.
422 Do not instantiate directly, instead use the factory functions :py:func:`v` and :py:func:`pvector` to
423 create an instance.
425 Heavily influenced by the persistent vector available in Clojure. Initially this was more or
426 less just a port of the Java code for the Clojure vector. It has since been modified and to
427 some extent optimized for usage in Python.
429 The vector is organized as a trie, any mutating method will return a new vector that contains the changes. No
430 updates are done to the original vector. Structural sharing between vectors are applied where possible to save
431 space and to avoid making complete copies.
433 This structure corresponds most closely to the built in list type and is intended as a replacement. Where the
434 semantics are the same (more or less) the same function names have been used but for some cases it is not possible,
435 for example assignments.
437 The PVector implements the Sequence protocol and is Hashable.
439 Inserts are amortized O(1). Random access is log32(n) where n is the size of the vector.
441 The following are examples of some common operations on persistent vectors:
443 >>> p = v(1, 2, 3)
444 >>> p2 = p.append(4)
445 >>> p3 = p2.extend([5, 6, 7])
446 >>> p
447 pvector([1, 2, 3])
448 >>> p2
449 pvector([1, 2, 3, 4])
450 >>> p3
451 pvector([1, 2, 3, 4, 5, 6, 7])
452 >>> p3[5]
453 6
454 >>> p.set(1, 99)
455 pvector([1, 99, 3])
456 >>>
457 """
459 @abstractmethod
460 def __len__(self):
461 """
462 >>> len(v(1, 2, 3))
463 3
464 """
466 @abstractmethod
467 def __getitem__(self, index):
468 """
469 Get value at index. Full slicing support.
471 >>> v1 = v(5, 6, 7, 8)
472 >>> v1[2]
473 7
474 >>> v1[1:3]
475 pvector([6, 7])
476 """
478 @abstractmethod
479 def __add__(self, other):
480 """
481 >>> v1 = v(1, 2)
482 >>> v2 = v(3, 4)
483 >>> v1 + v2
484 pvector([1, 2, 3, 4])
485 """
487 @abstractmethod
488 def __mul__(self, times):
489 """
490 >>> v1 = v(1, 2)
491 >>> 3 * v1
492 pvector([1, 2, 1, 2, 1, 2])
493 """
495 @abstractmethod
496 def __hash__(self):
497 """
498 >>> v1 = v(1, 2, 3)
499 >>> v2 = v(1, 2, 3)
500 >>> hash(v1) == hash(v2)
501 True
502 """
504 @abstractmethod
505 def evolver(self):
506 """
507 Create a new evolver for this pvector. The evolver acts as a mutable view of the vector
508 with "transaction like" semantics. No part of the underlying vector i updated, it is still
509 fully immutable. Furthermore multiple evolvers created from the same pvector do not
510 interfere with each other.
512 You may want to use an evolver instead of working directly with the pvector in the
513 following cases:
515 * Multiple updates are done to the same vector and the intermediate results are of no
516 interest. In this case using an evolver may be a more efficient and easier to work with.
517 * You need to pass a vector into a legacy function or a function that you have no control
518 over which performs in place mutations of lists. In this case pass an evolver instance
519 instead and then create a new pvector from the evolver once the function returns.
521 The following example illustrates a typical workflow when working with evolvers. It also
522 displays most of the API (which i kept small by design, you should not be tempted to
523 use evolvers in excess ;-)).
525 Create the evolver and perform various mutating updates to it:
527 >>> v1 = v(1, 2, 3, 4, 5)
528 >>> e = v1.evolver()
529 >>> e[1] = 22
530 >>> _ = e.append(6)
531 >>> _ = e.extend([7, 8, 9])
532 >>> e[8] += 1
533 >>> len(e)
534 9
536 The underlying pvector remains the same:
538 >>> v1
539 pvector([1, 2, 3, 4, 5])
541 The changes are kept in the evolver. An updated pvector can be created using the
542 persistent() function on the evolver.
544 >>> v2 = e.persistent()
545 >>> v2
546 pvector([1, 22, 3, 4, 5, 6, 7, 8, 10])
548 The new pvector will share data with the original pvector in the same way that would have
549 been done if only using operations on the pvector.
550 """
552 @abstractmethod
553 def mset(self, *args):
554 """
555 Return a new vector with elements in specified positions replaced by values (multi set).
557 Elements on even positions in the argument list are interpreted as indexes while
558 elements on odd positions are considered values.
560 >>> v1 = v(1, 2, 3)
561 >>> v1.mset(0, 11, 2, 33)
562 pvector([11, 2, 33])
563 """
565 @abstractmethod
566 def set(self, i, val):
567 """
568 Return a new vector with element at position i replaced with val. The original vector remains unchanged.
570 Setting a value one step beyond the end of the vector is equal to appending. Setting beyond that will
571 result in an IndexError.
573 >>> v1 = v(1, 2, 3)
574 >>> v1.set(1, 4)
575 pvector([1, 4, 3])
576 >>> v1.set(3, 4)
577 pvector([1, 2, 3, 4])
578 >>> v1.set(-1, 4)
579 pvector([1, 2, 4])
580 """
582 @abstractmethod
583 def append(self, val):
584 """
585 Return a new vector with val appended.
587 >>> v1 = v(1, 2)
588 >>> v1.append(3)
589 pvector([1, 2, 3])
590 """
592 @abstractmethod
593 def extend(self, obj):
594 """
595 Return a new vector with all values in obj appended to it. Obj may be another
596 PVector or any other Iterable.
598 >>> v1 = v(1, 2, 3)
599 >>> v1.extend([4, 5])
600 pvector([1, 2, 3, 4, 5])
601 """
603 @abstractmethod
604 def index(self, value, *args, **kwargs):
605 """
606 Return first index of value. Additional indexes may be supplied to limit the search to a
607 sub range of the vector.
609 >>> v1 = v(1, 2, 3, 4, 3)
610 >>> v1.index(3)
611 2
612 >>> v1.index(3, 3, 5)
613 4
614 """
616 @abstractmethod
617 def count(self, value):
618 """
619 Return the number of times that value appears in the vector.
621 >>> v1 = v(1, 4, 3, 4)
622 >>> v1.count(4)
623 2
624 """
626 @abstractmethod
627 def transform(self, *transformations):
628 """
629 Transform arbitrarily complex combinations of PVectors and PMaps. A transformation
630 consists of two parts. One match expression that specifies which elements to transform
631 and one transformation function that performs the actual transformation.
633 >>> from pyrsistent import freeze, ny
634 >>> news_paper = freeze({'articles': [{'author': 'Sara', 'content': 'A short article'},
635 ... {'author': 'Steve', 'content': 'A slightly longer article'}],
636 ... 'weather': {'temperature': '11C', 'wind': '5m/s'}})
637 >>> short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:25] + '...' if len(c) > 25 else c)
638 >>> very_short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:15] + '...' if len(c) > 15 else c)
639 >>> very_short_news.articles[0].content
640 'A short article'
641 >>> very_short_news.articles[1].content
642 'A slightly long...'
644 When nothing has been transformed the original data structure is kept
646 >>> short_news is news_paper
647 True
648 >>> very_short_news is news_paper
649 False
650 >>> very_short_news.articles[0] is news_paper.articles[0]
651 True
652 """
654 @abstractmethod
655 def delete(self, index, stop=None):
656 """
657 Delete a portion of the vector by index or range.
659 >>> v1 = v(1, 2, 3, 4, 5)
660 >>> v1.delete(1)
661 pvector([1, 3, 4, 5])
662 >>> v1.delete(1, 3)
663 pvector([1, 4, 5])
664 """
666 @abstractmethod
667 def remove(self, value):
668 """
669 Remove the first occurrence of a value from the vector.
671 >>> v1 = v(1, 2, 3, 2, 1)
672 >>> v2 = v1.remove(1)
673 >>> v2
674 pvector([2, 3, 2, 1])
675 >>> v2.remove(1)
676 pvector([2, 3, 2])
677 """
680_EMPTY_PVECTOR = PythonPVector(0, SHIFT, [], [])
681PVector.register(PythonPVector)
682Sequence.register(PVector)
683Hashable.register(PVector)
685def python_pvector(iterable=()):
686 """
687 Create a new persistent vector containing the elements in iterable.
689 >>> v1 = pvector([1, 2, 3])
690 >>> v1
691 pvector([1, 2, 3])
692 """
693 return _EMPTY_PVECTOR.extend(iterable)
695try:
696 # Use the C extension as underlying trie implementation if it is available
697 import os
698 if os.environ.get('PYRSISTENT_NO_C_EXTENSION'):
699 pvector = python_pvector
700 else:
701 from pvectorc import pvector
702 PVector.register(type(pvector()))
703except ImportError:
704 pvector = python_pvector
707def v(*elements):
708 """
709 Create a new persistent vector containing all parameters to this function.
711 >>> v1 = v(1, 2, 3)
712 >>> v1
713 pvector([1, 2, 3])
714 """
715 return pvector(elements)