1import numpy as np
2from numpy import partition, argpartition
3
4__all__ = ["rankdata", "nanrankdata", "partition", "argpartition", "push"]
5
6
7def rankdata(a, axis=None):
8 "Slow rankdata function used for unaccelerated dtypes."
9 return _rank(scipy_rankdata, a, axis)
10
11
12def nanrankdata(a, axis=None):
13 "Slow nanrankdata function used for unaccelerated dtypes."
14 return _rank(_nanrankdata_1d, a, axis)
15
16
17def _rank(func1d, a, axis):
18 a = np.asarray(a)
19 if axis is None:
20 a = a.ravel()
21 axis = 0
22 if a.size == 0:
23 y = a.astype(np.float64, copy=True)
24 else:
25 y = np.apply_along_axis(func1d, axis, a)
26 if a.dtype != np.float64:
27 y = y.astype(np.float64)
28 return y
29
30
31def _nanrankdata_1d(a):
32 y = np.empty(a.shape, dtype=np.float64)
33 y.fill(np.nan)
34 idx = ~np.isnan(a)
35 y[idx] = scipy_rankdata(a[idx])
36 return y
37
38
39def push(a, n=None, axis=-1):
40 "Slow push used for unaccelerated dtypes."
41 if n is None:
42 n = np.inf
43 y = np.array(a)
44 ndim = y.ndim
45 if axis != -1 or axis != ndim - 1:
46 y = np.rollaxis(y, axis, ndim)
47 if ndim == 1:
48 y = y[None, :]
49 elif ndim == 0:
50 return y
51 fidx = ~np.isnan(y)
52 recent = np.empty(y.shape[:-1])
53 count = np.empty(y.shape[:-1])
54 recent.fill(np.nan)
55 count.fill(np.nan)
56 with np.errstate(invalid="ignore"):
57 for i in range(y.shape[-1]):
58 idx = (i - count) > n
59 recent[idx] = np.nan
60 idx = ~fidx[..., i]
61 y[idx, i] = recent[idx]
62 idx = fidx[..., i]
63 count[idx] = i
64 recent[idx] = y[idx, i]
65 if axis != -1 or axis != ndim - 1:
66 y = np.rollaxis(y, ndim - 1, axis)
67 if ndim == 1:
68 return y[0]
69 return y
70
71
72# ---------------------------------------------------------------------------
73#
74# SciPy
75#
76# Local copy of SciPy's rankdata to avoid a SciPy dependency. The SciPy
77# license is included in the Bottleneck license file, which is distributed
78# with Bottleneck.
79#
80# Code taken from scipy master branch on Aug 31, 2016.
81
82
83def scipy_rankdata(a, method="average"):
84 """
85 rankdata(a, method='average')
86 Assign ranks to data, dealing with ties appropriately.
87 Ranks begin at 1. The `method` argument controls how ranks are assigned
88 to equal values. See [1]_ for further discussion of ranking methods.
89 Parameters
90 ----------
91 a : array_like
92 The array of values to be ranked. The array is first flattened.
93 method : str, optional
94 The method used to assign ranks to tied elements.
95 The options are 'average', 'min', 'max', 'dense' and 'ordinal'.
96 'average':
97 The average of the ranks that would have been assigned to
98 all the tied values is assigned to each value.
99 'min':
100 The minimum of the ranks that would have been assigned to all
101 the tied values is assigned to each value. (This is also
102 referred to as "competition" ranking.)
103 'max':
104 The maximum of the ranks that would have been assigned to all
105 the tied values is assigned to each value.
106 'dense':
107 Like 'min', but the rank of the next highest element is assigned
108 the rank immediately after those assigned to the tied elements.
109 'ordinal':
110 All values are given a distinct rank, corresponding to the order
111 that the values occur in `a`.
112 The default is 'average'.
113 Returns
114 -------
115 ranks : ndarray
116 An array of length equal to the size of `a`, containing rank
117 scores.
118 References
119 ----------
120 .. [1] "Ranking", http://en.wikipedia.org/wiki/Ranking
121 Examples
122 --------
123 >>> from scipy.stats import rankdata
124 >>> rankdata([0, 2, 3, 2])
125 array([ 1. , 2.5, 4. , 2.5])
126 >>> rankdata([0, 2, 3, 2], method='min')
127 array([ 1, 2, 4, 2])
128 >>> rankdata([0, 2, 3, 2], method='max')
129 array([ 1, 3, 4, 3])
130 >>> rankdata([0, 2, 3, 2], method='dense')
131 array([ 1, 2, 3, 2])
132 >>> rankdata([0, 2, 3, 2], method='ordinal')
133 array([ 1, 2, 4, 3])
134 """
135 if method not in ("average", "min", "max", "dense", "ordinal"):
136 raise ValueError('unknown method "{0}"'.format(method))
137
138 a = np.ravel(np.asarray(a))
139 algo = "mergesort" if method == "ordinal" else "quicksort"
140 sorter = np.argsort(a, kind=algo)
141
142 inv = np.empty(sorter.size, dtype=np.intp)
143 inv[sorter] = np.arange(sorter.size, dtype=np.intp)
144
145 if method == "ordinal":
146 return inv + 1
147
148 a = a[sorter]
149 obs = np.r_[True, a[1:] != a[:-1]]
150 dense = obs.cumsum()[inv]
151
152 if method == "dense":
153 return dense
154
155 # cumulative counts of each unique value
156 count = np.r_[np.nonzero(obs)[0], len(obs)]
157
158 if method == "max":
159 return count[dense]
160
161 if method == "min":
162 return count[dense - 1] + 1
163
164 # average method
165 return 0.5 * (count[dense] + count[dense - 1] + 1)