1from collections.abc import Mapping, Hashable
2from itertools import chain
3from typing import Generic, TypeVar
4
5from pyrsistent._pvector import pvector
6from pyrsistent._transformations import transform
7
8KT = TypeVar('KT')
9VT_co = TypeVar('VT_co', covariant=True)
10class PMapView:
11 """View type for the persistent map/dict type `PMap`.
12
13 Provides an equivalent of Python's built-in `dict_values` and `dict_items`
14 types that result from expreessions such as `{}.values()` and
15 `{}.items()`. The equivalent for `{}.keys()` is absent because the keys are
16 instead represented by a `PSet` object, which can be created in `O(1)` time.
17
18 The `PMapView` class is overloaded by the `PMapValues` and `PMapItems`
19 classes which handle the specific case of values and items, respectively
20
21 Parameters
22 ----------
23 m : mapping
24 The mapping/dict-like object of which a view is to be created. This
25 should generally be a `PMap` object.
26 """
27 # The public methods that use the above.
28 def __init__(self, m):
29 # Make sure this is a persistnt map
30 if not isinstance(m, PMap):
31 # We can convert mapping objects into pmap objects, I guess (but why?)
32 if isinstance(m, Mapping):
33 m = pmap(m)
34 else:
35 raise TypeError("PViewMap requires a Mapping object")
36 object.__setattr__(self, '_map', m)
37
38 def __len__(self):
39 return len(self._map)
40
41 def __setattr__(self, k, v):
42 raise TypeError("%s is immutable" % (type(self),))
43
44 def __reversed__(self):
45 raise TypeError("Persistent maps are not reversible")
46
47class PMapValues(PMapView):
48 """View type for the values of the persistent map/dict type `PMap`.
49
50 Provides an equivalent of Python's built-in `dict_values` type that result
51 from expreessions such as `{}.values()`. See also `PMapView`.
52
53 Parameters
54 ----------
55 m : mapping
56 The mapping/dict-like object of which a view is to be created. This
57 should generally be a `PMap` object.
58 """
59 def __iter__(self):
60 return self._map.itervalues()
61
62 def __contains__(self, arg):
63 return arg in self._map.itervalues()
64
65 # The str and repr methods imitate the dict_view style currently.
66 def __str__(self):
67 return f"pmap_values({list(iter(self))})"
68
69 def __repr__(self):
70 return f"pmap_values({list(iter(self))})"
71
72 def __eq__(self, x):
73 # For whatever reason, dict_values always seem to return False for ==
74 # (probably it's not implemented), so we mimic that.
75 if x is self: return True
76 else: return False
77
78class PMapItems(PMapView):
79 """View type for the items of the persistent map/dict type `PMap`.
80
81 Provides an equivalent of Python's built-in `dict_items` type that result
82 from expreessions such as `{}.items()`. See also `PMapView`.
83
84 Parameters
85 ----------
86 m : mapping
87 The mapping/dict-like object of which a view is to be created. This
88 should generally be a `PMap` object.
89 """
90 def __iter__(self):
91 return self._map.iteritems()
92
93 def __contains__(self, arg):
94 try: (k,v) = arg
95 except Exception: return False
96 return k in self._map and self._map[k] == v
97
98 # The str and repr methods mitate the dict_view style currently.
99 def __str__(self):
100 return f"pmap_items({list(iter(self))})"
101
102 def __repr__(self):
103 return f"pmap_items({list(iter(self))})"
104
105 def __eq__(self, x):
106 if x is self: return True
107 elif not isinstance(x, type(self)): return False
108 else: return self._map == x._map
109
110class PMap(Generic[KT, VT_co]):
111 """
112 Persistent map/dict. Tries to follow the same naming conventions as the built in dict where feasible.
113
114 Do not instantiate directly, instead use the factory functions :py:func:`m` or :py:func:`pmap` to
115 create an instance.
116
117 Was originally written as a very close copy of the Clojure equivalent but was later rewritten to closer
118 re-assemble the python dict. This means that a sparse vector (a PVector) of buckets is used. The keys are
119 hashed and the elements inserted at position hash % len(bucket_vector). Whenever the map size exceeds 2/3 of
120 the containing vectors size the map is reallocated to a vector of double the size. This is done to avoid
121 excessive hash collisions.
122
123 This structure corresponds most closely to the built in dict type and is intended as a replacement. Where the
124 semantics are the same (more or less) the same function names have been used but for some cases it is not possible,
125 for example assignments and deletion of values.
126
127 PMap implements the Mapping protocol and is Hashable. It also supports dot-notation for
128 element access.
129
130 Random access and insert is log32(n) where n is the size of the map.
131
132 The following are examples of some common operations on persistent maps
133
134 >>> m1 = m(a=1, b=3)
135 >>> m2 = m1.set('c', 3)
136 >>> m3 = m2.remove('a')
137 >>> m1 == {'a': 1, 'b': 3}
138 True
139 >>> m2 == {'a': 1, 'b': 3, 'c': 3}
140 True
141 >>> m3 == {'b': 3, 'c': 3}
142 True
143 >>> m3['c']
144 3
145 >>> m3.c
146 3
147 """
148 __slots__ = ('_size', '_buckets', '__weakref__', '_cached_hash')
149
150 def __new__(cls, size, buckets):
151 self = super(PMap, cls).__new__(cls)
152 self._size = size
153 self._buckets = buckets
154 return self
155
156 @staticmethod
157 def _get_bucket(buckets, key):
158 index = hash(key) % len(buckets)
159 bucket = buckets[index]
160 return index, bucket
161
162 @staticmethod
163 def _getitem(buckets, key):
164 _, bucket = PMap._get_bucket(buckets, key)
165 if bucket:
166 for k, v in bucket:
167 if k == key:
168 return v
169
170 raise KeyError(key)
171
172 def __getitem__(self, key):
173 return PMap._getitem(self._buckets, key)
174
175 @staticmethod
176 def _contains(buckets, key):
177 _, bucket = PMap._get_bucket(buckets, key)
178 if bucket:
179 for k, _ in bucket:
180 if k == key:
181 return True
182
183 return False
184
185 return False
186
187 def __contains__(self, key):
188 return self._contains(self._buckets, key)
189
190 get = Mapping.get
191
192 def __iter__(self):
193 return self.iterkeys()
194
195 # If this method is not defined, then reversed(pmap) will attempt to reverse
196 # the map using len() and getitem, usually resulting in a mysterious
197 # KeyError.
198 def __reversed__(self):
199 raise TypeError("Persistent maps are not reversible")
200
201 def __getattr__(self, key):
202 try:
203 return self[key]
204 except KeyError as e:
205 raise AttributeError(
206 "{0} has no attribute '{1}'".format(type(self).__name__, key)
207 ) from e
208
209 def iterkeys(self):
210 for k, _ in self.iteritems():
211 yield k
212
213 # These are more efficient implementations compared to the original
214 # methods that are based on the keys iterator and then calls the
215 # accessor functions to access the value for the corresponding key
216 def itervalues(self):
217 for _, v in self.iteritems():
218 yield v
219
220 def iteritems(self):
221 for bucket in self._buckets:
222 if bucket:
223 for k, v in bucket:
224 yield k, v
225
226 def values(self):
227 return PMapValues(self)
228
229 def keys(self):
230 from ._pset import PSet
231 return PSet(self)
232
233 def items(self):
234 return PMapItems(self)
235
236 def __len__(self):
237 return self._size
238
239 def __repr__(self):
240 return 'pmap({0})'.format(str(dict(self)))
241
242 def __eq__(self, other):
243 if self is other:
244 return True
245 if not isinstance(other, Mapping):
246 return NotImplemented
247 if len(self) != len(other):
248 return False
249 if isinstance(other, PMap):
250 if (hasattr(self, '_cached_hash') and hasattr(other, '_cached_hash')
251 and self._cached_hash != other._cached_hash):
252 return False
253 if self._buckets == other._buckets:
254 return True
255 return dict(self.iteritems()) == dict(other.iteritems())
256 elif isinstance(other, dict):
257 return dict(self.iteritems()) == other
258 return dict(self.iteritems()) == dict(other.items())
259
260 __ne__ = Mapping.__ne__
261
262 def __lt__(self, other):
263 raise TypeError('PMaps are not orderable')
264
265 __le__ = __lt__
266 __gt__ = __lt__
267 __ge__ = __lt__
268
269 def __str__(self):
270 return self.__repr__()
271
272 def __hash__(self):
273 if not hasattr(self, '_cached_hash'):
274 self._cached_hash = hash(frozenset(self.iteritems()))
275 return self._cached_hash
276
277 def set(self, key, val):
278 """
279 Return a new PMap with key and val inserted.
280
281 >>> m1 = m(a=1, b=2)
282 >>> m2 = m1.set('a', 3)
283 >>> m3 = m1.set('c' ,4)
284 >>> m1 == {'a': 1, 'b': 2}
285 True
286 >>> m2 == {'a': 3, 'b': 2}
287 True
288 >>> m3 == {'a': 1, 'b': 2, 'c': 4}
289 True
290 """
291 return self.evolver().set(key, val).persistent()
292
293 def remove(self, key):
294 """
295 Return a new PMap without the element specified by key. Raises KeyError if the element
296 is not present.
297
298 >>> m1 = m(a=1, b=2)
299 >>> m1.remove('a')
300 pmap({'b': 2})
301 """
302 return self.evolver().remove(key).persistent()
303
304 def discard(self, key):
305 """
306 Return a new PMap without the element specified by key. Returns reference to itself
307 if element is not present.
308
309 >>> m1 = m(a=1, b=2)
310 >>> m1.discard('a')
311 pmap({'b': 2})
312 >>> m1 is m1.discard('c')
313 True
314 """
315 try:
316 return self.remove(key)
317 except KeyError:
318 return self
319
320 def update(self, *maps):
321 """
322 Return a new PMap with the items in Mappings inserted. If the same key is present in multiple
323 maps the rightmost (last) value is inserted.
324
325 >>> m1 = m(a=1, b=2)
326 >>> m1.update(m(a=2, c=3), {'a': 17, 'd': 35}) == {'a': 17, 'b': 2, 'c': 3, 'd': 35}
327 True
328 """
329 return self.update_with(lambda l, r: r, *maps)
330
331 def update_with(self, update_fn, *maps):
332 """
333 Return a new PMap with the items in Mappings maps inserted. If the same key is present in multiple
334 maps the values will be merged using merge_fn going from left to right.
335
336 >>> from operator import add
337 >>> m1 = m(a=1, b=2)
338 >>> m1.update_with(add, m(a=2)) == {'a': 3, 'b': 2}
339 True
340
341 The reverse behaviour of the regular merge. Keep the leftmost element instead of the rightmost.
342
343 >>> m1 = m(a=1)
344 >>> m1.update_with(lambda l, r: l, m(a=2), {'a':3})
345 pmap({'a': 1})
346 """
347 evolver = self.evolver()
348 for map in maps:
349 for key, value in map.items():
350 evolver.set(key, update_fn(evolver[key], value) if key in evolver else value)
351
352 return evolver.persistent()
353
354 def __add__(self, other):
355 return self.update(other)
356
357 __or__ = __add__
358
359 def __reduce__(self):
360 # Pickling support
361 return pmap, (dict(self),)
362
363 def transform(self, *transformations):
364 """
365 Transform arbitrarily complex combinations of PVectors and PMaps. A transformation
366 consists of two parts. One match expression that specifies which elements to transform
367 and one transformation function that performs the actual transformation.
368
369 >>> from pyrsistent import freeze, ny
370 >>> news_paper = freeze({'articles': [{'author': 'Sara', 'content': 'A short article'},
371 ... {'author': 'Steve', 'content': 'A slightly longer article'}],
372 ... 'weather': {'temperature': '11C', 'wind': '5m/s'}})
373 >>> short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:25] + '...' if len(c) > 25 else c)
374 >>> very_short_news = news_paper.transform(['articles', ny, 'content'], lambda c: c[:15] + '...' if len(c) > 15 else c)
375 >>> very_short_news.articles[0].content
376 'A short article'
377 >>> very_short_news.articles[1].content
378 'A slightly long...'
379
380 When nothing has been transformed the original data structure is kept
381
382 >>> short_news is news_paper
383 True
384 >>> very_short_news is news_paper
385 False
386 >>> very_short_news.articles[0] is news_paper.articles[0]
387 True
388 """
389 return transform(self, transformations)
390
391 def copy(self):
392 return self
393
394 class _Evolver(object):
395 __slots__ = ('_buckets_evolver', '_size', '_original_pmap')
396
397 def __init__(self, original_pmap):
398 self._original_pmap = original_pmap
399 self._buckets_evolver = original_pmap._buckets.evolver()
400 self._size = original_pmap._size
401
402 def __getitem__(self, key):
403 return PMap._getitem(self._buckets_evolver, key)
404
405 def __setitem__(self, key, val):
406 self.set(key, val)
407
408 def set(self, key, val):
409 kv = (key, val)
410 index, bucket = PMap._get_bucket(self._buckets_evolver, key)
411 reallocation_required = len(self._buckets_evolver) < 0.67 * self._size
412 if bucket:
413 for k, v in bucket:
414 if k == key:
415 if v is not val:
416 # Use `not (k2 == k)` rather than `!=` to avoid relying on a well implemented `__ne__`, see #268.
417 new_bucket = [(k2, v2) if not (k2 == k) else (k2, val) for k2, v2 in bucket]
418 self._buckets_evolver[index] = new_bucket
419
420 return self
421
422 # Only check and perform reallocation if not replacing an existing value.
423 # This is a performance tweak, see #247.
424 if reallocation_required:
425 self._reallocate()
426 return self.set(key, val)
427
428 new_bucket = [kv]
429 new_bucket.extend(bucket)
430 self._buckets_evolver[index] = new_bucket
431 self._size += 1
432 else:
433 if reallocation_required:
434 self._reallocate()
435 return self.set(key, val)
436
437 self._buckets_evolver[index] = [kv]
438 self._size += 1
439
440 return self
441
442 def _reallocate(self):
443 new_size = 2 * len(self._buckets_evolver)
444 new_list = new_size * [None]
445 buckets = self._buckets_evolver.persistent()
446 for k, v in chain.from_iterable(x for x in buckets if x):
447 index = hash(k) % new_size
448 if new_list[index]:
449 new_list[index].append((k, v))
450 else:
451 new_list[index] = [(k, v)]
452
453 # A reallocation should always result in a dirty buckets evolver to avoid
454 # possible loss of elements when doing the reallocation.
455 self._buckets_evolver = pvector().evolver()
456 self._buckets_evolver.extend(new_list)
457
458 def is_dirty(self):
459 return self._buckets_evolver.is_dirty()
460
461 def persistent(self):
462 if self.is_dirty():
463 self._original_pmap = PMap(self._size, self._buckets_evolver.persistent())
464
465 return self._original_pmap
466
467 def __len__(self):
468 return self._size
469
470 def __contains__(self, key):
471 return PMap._contains(self._buckets_evolver, key)
472
473 def __delitem__(self, key):
474 self.remove(key)
475
476 def remove(self, key):
477 index, bucket = PMap._get_bucket(self._buckets_evolver, key)
478
479 if bucket:
480 # Use `not (k == key)` rather than `!=` to avoid relying on a well implemented `__ne__`, see #268.
481 new_bucket = [(k, v) for (k, v) in bucket if not (k == key)]
482 size_diff = len(bucket) - len(new_bucket)
483 if size_diff > 0:
484 self._buckets_evolver[index] = new_bucket if new_bucket else None
485 self._size -= size_diff
486 return self
487
488 raise KeyError('{0}'.format(key))
489
490 def evolver(self):
491 """
492 Create a new evolver for this pmap. For a discussion on evolvers in general see the
493 documentation for the pvector evolver.
494
495 Create the evolver and perform various mutating updates to it:
496
497 >>> m1 = m(a=1, b=2)
498 >>> e = m1.evolver()
499 >>> e['c'] = 3
500 >>> len(e)
501 3
502 >>> del e['a']
503
504 The underlying pmap remains the same:
505
506 >>> m1 == {'a': 1, 'b': 2}
507 True
508
509 The changes are kept in the evolver. An updated pmap can be created using the
510 persistent() function on the evolver.
511
512 >>> m2 = e.persistent()
513 >>> m2 == {'b': 2, 'c': 3}
514 True
515
516 The new pmap will share data with the original pmap in the same way that would have
517 been done if only using operations on the pmap.
518 """
519 return self._Evolver(self)
520
521Mapping.register(PMap)
522Hashable.register(PMap)
523
524
525def _turbo_mapping(initial, pre_size):
526 if pre_size:
527 size = pre_size
528 else:
529 try:
530 size = 2 * len(initial) or 8
531 except Exception:
532 # Guess we can't figure out the length. Give up on length hinting,
533 # we can always reallocate later.
534 size = 8
535
536 buckets = size * [None]
537
538 if not isinstance(initial, Mapping):
539 # Make a dictionary of the initial data if it isn't already,
540 # that will save us some job further down since we can assume no
541 # key collisions
542 initial = dict(initial)
543
544 for k, v in initial.items():
545 h = hash(k)
546 index = h % size
547 bucket = buckets[index]
548
549 if bucket:
550 bucket.append((k, v))
551 else:
552 buckets[index] = [(k, v)]
553
554 return PMap(len(initial), pvector().extend(buckets))
555
556
557_EMPTY_PMAP = _turbo_mapping({}, 0)
558
559
560def pmap(initial={}, pre_size=0):
561 """
562 Create new persistent map, inserts all elements in initial into the newly created map.
563 The optional argument pre_size may be used to specify an initial size of the underlying bucket vector. This
564 may have a positive performance impact in the cases where you know beforehand that a large number of elements
565 will be inserted into the map eventually since it will reduce the number of reallocations required.
566
567 >>> pmap({'a': 13, 'b': 14}) == {'a': 13, 'b': 14}
568 True
569 """
570 if not initial and pre_size == 0:
571 return _EMPTY_PMAP
572
573 return _turbo_mapping(initial, pre_size)
574
575
576def m(**kwargs):
577 """
578 Creates a new persistent map. Inserts all key value arguments into the newly created map.
579
580 >>> m(a=13, b=14) == {'a': 13, 'b': 14}
581 True
582 """
583 return pmap(kwargs)