1from toolz.itertoolz import getter, cons, pluck
2from itertools import tee, starmap
3
4
5# See #166: https://github.com/pytoolz/toolz/issues/166
6# See #173: https://github.com/pytoolz/toolz/pull/173
7class EqualityHashKey(object):
8 """ Create a hash key that uses equality comparisons between items.
9
10 This may be used to create hash keys for otherwise unhashable types:
11
12 >>> from toolz import curry
13 >>> EqualityHashDefault = curry(EqualityHashKey, None)
14 >>> set(map(EqualityHashDefault, [[], (), [1], [1]])) # doctest: +SKIP
15 {=[]=, =()=, =[1]=}
16
17 **Caution:** adding N ``EqualityHashKey`` items to a hash container
18 may require O(N**2) operations, not O(N) as for typical hashable types.
19 Therefore, a suitable key function such as ``tuple`` or ``frozenset``
20 is usually preferred over using ``EqualityHashKey`` if possible.
21
22 The ``key`` argument to ``EqualityHashKey`` should be a function or
23 index that returns a hashable object that effectively distinguishes
24 unequal items. This helps avoid the poor scaling that occurs when
25 using the default key. For example, the above example can be improved
26 by using a key function that distinguishes items by length or type:
27
28 >>> EqualityHashLen = curry(EqualityHashKey, len)
29 >>> EqualityHashType = curry(EqualityHashKey, type) # this works too
30 >>> set(map(EqualityHashLen, [[], (), [1], [1]])) # doctest: +SKIP
31 {=[]=, =()=, =[1]=}
32
33 ``EqualityHashKey`` is convenient to use when a suitable key function
34 is complicated or unavailable. For example, the following returns all
35 unique values based on equality:
36
37 >>> from toolz import unique
38 >>> vals = [[], [], (), [1], [1], [2], {}, {}, {}]
39 >>> list(unique(vals, key=EqualityHashDefault))
40 [[], (), [1], [2], {}]
41
42 **Warning:** don't change the equality value of an item already in a hash
43 container. Unhashable types are unhashable for a reason. For example:
44
45 >>> L1 = [1] ; L2 = [2]
46 >>> s = set(map(EqualityHashDefault, [L1, L2]))
47 >>> s # doctest: +SKIP
48 {=[1]=, =[2]=}
49
50 >>> L1[0] = 2 # Don't do this! ``s`` now has duplicate items!
51 >>> s # doctest: +SKIP
52 {=[2]=, =[2]=}
53
54 Although this may appear problematic, immutable data types is a common
55 idiom in functional programming, and``EqualityHashKey`` easily allows
56 the same idiom to be used by convention rather than strict requirement.
57
58 See Also:
59 identity
60 """
61 __slots__ = ['item', 'key']
62 _default_hashkey = '__default__hashkey__'
63
64 def __init__(self, key, item):
65 if key is None:
66 self.key = self._default_hashkey
67 elif not callable(key):
68 self.key = getter(key)
69 else:
70 self.key = key
71 self.item = item
72
73 def __hash__(self):
74 if self.key == self._default_hashkey:
75 val = self.key
76 else:
77 val = self.key(self.item)
78 return hash(val)
79
80 def __eq__(self, other):
81 try:
82 return (self._default_hashkey == other._default_hashkey and
83 self.item == other.item)
84 except AttributeError:
85 return False
86
87 def __ne__(self, other):
88 return not self.__eq__(other)
89
90 def __str__(self):
91 return '=%s=' % str(self.item)
92
93 def __repr__(self):
94 return '=%s=' % repr(self.item)
95
96
97# See issue #293: https://github.com/pytoolz/toolz/issues/239
98def unzip(seq):
99 """Inverse of ``zip``
100
101 >>> a, b = unzip([('a', 1), ('b', 2)])
102 >>> list(a)
103 ['a', 'b']
104 >>> list(b)
105 [1, 2]
106
107 Unlike the naive implementation ``def unzip(seq): zip(*seq)`` this
108 implementation can handle an infinite sequence ``seq``.
109
110 Caveats:
111
112 * The implementation uses ``tee``, and so can use a significant amount
113 of auxiliary storage if the resulting iterators are consumed at
114 different times.
115
116 * The inner sequence cannot be infinite. In Python 3 ``zip(*seq)`` can be
117 used if ``seq`` is a finite sequence of infinite sequences.
118
119 """
120
121 seq = iter(seq)
122
123 # Check how many iterators we need
124 try:
125 first = tuple(next(seq))
126 except StopIteration:
127 return tuple()
128
129 # and create them
130 niters = len(first)
131 seqs = tee(cons(first, seq), niters)
132
133 return tuple(starmap(pluck, enumerate(seqs)))