1from __future__ import annotations
2
3import numpy as np
4
5from pandas._typing import (
6 ArrayLike,
7 Scalar,
8 npt,
9)
10from pandas.compat.numpy import np_percentile_argname
11
12from pandas.core.dtypes.missing import (
13 isna,
14 na_value_for_dtype,
15)
16
17
18def quantile_compat(
19 values: ArrayLike, qs: npt.NDArray[np.float64], interpolation: str
20) -> ArrayLike:
21 """
22 Compute the quantiles of the given values for each quantile in `qs`.
23
24 Parameters
25 ----------
26 values : np.ndarray or ExtensionArray
27 qs : np.ndarray[float64]
28 interpolation : str
29
30 Returns
31 -------
32 np.ndarray or ExtensionArray
33 """
34 if isinstance(values, np.ndarray):
35 fill_value = na_value_for_dtype(values.dtype, compat=False)
36 mask = isna(values)
37 return quantile_with_mask(values, mask, fill_value, qs, interpolation)
38 else:
39 return values._quantile(qs, interpolation)
40
41
42def quantile_with_mask(
43 values: np.ndarray,
44 mask: npt.NDArray[np.bool_],
45 fill_value,
46 qs: npt.NDArray[np.float64],
47 interpolation: str,
48) -> np.ndarray:
49 """
50 Compute the quantiles of the given values for each quantile in `qs`.
51
52 Parameters
53 ----------
54 values : np.ndarray
55 For ExtensionArray, this is _values_for_factorize()[0]
56 mask : np.ndarray[bool]
57 mask = isna(values)
58 For ExtensionArray, this is computed before calling _value_for_factorize
59 fill_value : Scalar
60 The value to interpret fill NA entries with
61 For ExtensionArray, this is _values_for_factorize()[1]
62 qs : np.ndarray[float64]
63 interpolation : str
64 Type of interpolation
65
66 Returns
67 -------
68 np.ndarray
69
70 Notes
71 -----
72 Assumes values is already 2D. For ExtensionArray this means np.atleast_2d
73 has been called on _values_for_factorize()[0]
74
75 Quantile is computed along axis=1.
76 """
77 assert values.shape == mask.shape
78 if values.ndim == 1:
79 # unsqueeze, operate, re-squeeze
80 values = np.atleast_2d(values)
81 mask = np.atleast_2d(mask)
82 res_values = quantile_with_mask(values, mask, fill_value, qs, interpolation)
83 return res_values[0]
84
85 assert values.ndim == 2
86
87 is_empty = values.shape[1] == 0
88
89 if is_empty:
90 # create the array of na_values
91 # 2d len(values) * len(qs)
92 flat = np.array([fill_value] * len(qs))
93 result = np.repeat(flat, len(values)).reshape(len(values), len(qs))
94 else:
95 result = _nanpercentile(
96 values,
97 qs * 100.0,
98 na_value=fill_value,
99 mask=mask,
100 interpolation=interpolation,
101 )
102
103 result = np.array(result, copy=False)
104 result = result.T
105
106 return result
107
108
109def _nanpercentile_1d(
110 values: np.ndarray,
111 mask: npt.NDArray[np.bool_],
112 qs: npt.NDArray[np.float64],
113 na_value: Scalar,
114 interpolation: str,
115) -> Scalar | np.ndarray:
116 """
117 Wrapper for np.percentile that skips missing values, specialized to
118 1-dimensional case.
119
120 Parameters
121 ----------
122 values : array over which to find quantiles
123 mask : ndarray[bool]
124 locations in values that should be considered missing
125 qs : np.ndarray[float64] of quantile indices to find
126 na_value : scalar
127 value to return for empty or all-null values
128 interpolation : str
129
130 Returns
131 -------
132 quantiles : scalar or array
133 """
134 # mask is Union[ExtensionArray, ndarray]
135 values = values[~mask]
136
137 if len(values) == 0:
138 # Can't pass dtype=values.dtype here bc we might have na_value=np.nan
139 # with values.dtype=int64 see test_quantile_empty
140 # equiv: 'np.array([na_value] * len(qs))' but much faster
141 return np.full(len(qs), na_value)
142
143 return np.percentile(
144 values,
145 qs,
146 # error: No overload variant of "percentile" matches argument
147 # types "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]"
148 # , "Dict[str, str]" [call-overload]
149 **{np_percentile_argname: interpolation}, # type: ignore[call-overload]
150 )
151
152
153def _nanpercentile(
154 values: np.ndarray,
155 qs: npt.NDArray[np.float64],
156 *,
157 na_value,
158 mask: npt.NDArray[np.bool_],
159 interpolation: str,
160):
161 """
162 Wrapper for np.percentile that skips missing values.
163
164 Parameters
165 ----------
166 values : np.ndarray[ndim=2] over which to find quantiles
167 qs : np.ndarray[float64] of quantile indices to find
168 na_value : scalar
169 value to return for empty or all-null values
170 mask : np.ndarray[bool]
171 locations in values that should be considered missing
172 interpolation : str
173
174 Returns
175 -------
176 quantiles : scalar or array
177 """
178
179 if values.dtype.kind in ["m", "M"]:
180 # need to cast to integer to avoid rounding errors in numpy
181 result = _nanpercentile(
182 values.view("i8"),
183 qs=qs,
184 na_value=na_value.view("i8"),
185 mask=mask,
186 interpolation=interpolation,
187 )
188
189 # Note: we have to do `astype` and not view because in general we
190 # have float result at this point, not i8
191 return result.astype(values.dtype)
192
193 if mask.any():
194 # Caller is responsible for ensuring mask shape match
195 assert mask.shape == values.shape
196 result = [
197 _nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation)
198 for (val, m) in zip(list(values), list(mask))
199 ]
200 if values.dtype.kind == "f":
201 # preserve itemsize
202 result = np.array(result, dtype=values.dtype, copy=False).T
203 else:
204 result = np.array(result, copy=False).T
205 if (
206 result.dtype != values.dtype
207 and not mask.all()
208 and (result == result.astype(values.dtype, copy=False)).all()
209 ):
210 # mask.all() will never get cast back to int
211 # e.g. values id integer dtype and result is floating dtype,
212 # only cast back to integer dtype if result values are all-integer.
213 result = result.astype(values.dtype, copy=False)
214 return result
215 else:
216 return np.percentile(
217 values,
218 qs,
219 axis=1,
220 # error: No overload variant of "percentile" matches argument types
221 # "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]",
222 # "int", "Dict[str, str]" [call-overload]
223 **{np_percentile_argname: interpolation}, # type: ignore[call-overload]
224 )