1from __future__ import annotations
2
3from typing import TYPE_CHECKING
4
5import numpy as np
6
7from pandas.core.dtypes.missing import (
8 isna,
9 na_value_for_dtype,
10)
11
12if TYPE_CHECKING:
13 from pandas._typing import (
14 ArrayLike,
15 Scalar,
16 npt,
17 )
18
19
20def quantile_compat(
21 values: ArrayLike, qs: npt.NDArray[np.float64], interpolation: str
22) -> ArrayLike:
23 """
24 Compute the quantiles of the given values for each quantile in `qs`.
25
26 Parameters
27 ----------
28 values : np.ndarray or ExtensionArray
29 qs : np.ndarray[float64]
30 interpolation : str
31
32 Returns
33 -------
34 np.ndarray or ExtensionArray
35 """
36 if isinstance(values, np.ndarray):
37 fill_value = na_value_for_dtype(values.dtype, compat=False)
38 mask = isna(values)
39 return quantile_with_mask(values, mask, fill_value, qs, interpolation)
40 else:
41 return values._quantile(qs, interpolation)
42
43
44def quantile_with_mask(
45 values: np.ndarray,
46 mask: npt.NDArray[np.bool_],
47 fill_value,
48 qs: npt.NDArray[np.float64],
49 interpolation: str,
50) -> np.ndarray:
51 """
52 Compute the quantiles of the given values for each quantile in `qs`.
53
54 Parameters
55 ----------
56 values : np.ndarray
57 For ExtensionArray, this is _values_for_factorize()[0]
58 mask : np.ndarray[bool]
59 mask = isna(values)
60 For ExtensionArray, this is computed before calling _value_for_factorize
61 fill_value : Scalar
62 The value to interpret fill NA entries with
63 For ExtensionArray, this is _values_for_factorize()[1]
64 qs : np.ndarray[float64]
65 interpolation : str
66 Type of interpolation
67
68 Returns
69 -------
70 np.ndarray
71
72 Notes
73 -----
74 Assumes values is already 2D. For ExtensionArray this means np.atleast_2d
75 has been called on _values_for_factorize()[0]
76
77 Quantile is computed along axis=1.
78 """
79 assert values.shape == mask.shape
80 if values.ndim == 1:
81 # unsqueeze, operate, re-squeeze
82 values = np.atleast_2d(values)
83 mask = np.atleast_2d(mask)
84 res_values = quantile_with_mask(values, mask, fill_value, qs, interpolation)
85 return res_values[0]
86
87 assert values.ndim == 2
88
89 is_empty = values.shape[1] == 0
90
91 if is_empty:
92 # create the array of na_values
93 # 2d len(values) * len(qs)
94 flat = np.array([fill_value] * len(qs))
95 result = np.repeat(flat, len(values)).reshape(len(values), len(qs))
96 else:
97 result = _nanpercentile(
98 values,
99 qs * 100.0,
100 na_value=fill_value,
101 mask=mask,
102 interpolation=interpolation,
103 )
104
105 result = np.asarray(result)
106 result = result.T
107
108 return result
109
110
111def _nanpercentile_1d(
112 values: np.ndarray,
113 mask: npt.NDArray[np.bool_],
114 qs: npt.NDArray[np.float64],
115 na_value: Scalar,
116 interpolation: str,
117) -> Scalar | np.ndarray:
118 """
119 Wrapper for np.percentile that skips missing values, specialized to
120 1-dimensional case.
121
122 Parameters
123 ----------
124 values : array over which to find quantiles
125 mask : ndarray[bool]
126 locations in values that should be considered missing
127 qs : np.ndarray[float64] of quantile indices to find
128 na_value : scalar
129 value to return for empty or all-null values
130 interpolation : str
131
132 Returns
133 -------
134 quantiles : scalar or array
135 """
136 # mask is Union[ExtensionArray, ndarray]
137 values = values[~mask]
138
139 if len(values) == 0:
140 # Can't pass dtype=values.dtype here bc we might have na_value=np.nan
141 # with values.dtype=int64 see test_quantile_empty
142 # equiv: 'np.array([na_value] * len(qs))' but much faster
143 return np.full(len(qs), na_value)
144
145 return np.percentile(
146 values,
147 qs,
148 # error: No overload variant of "percentile" matches argument
149 # types "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]"
150 # , "Dict[str, str]" [call-overload]
151 method=interpolation, # type: ignore[call-overload]
152 )
153
154
155def _nanpercentile(
156 values: np.ndarray,
157 qs: npt.NDArray[np.float64],
158 *,
159 na_value,
160 mask: npt.NDArray[np.bool_],
161 interpolation: str,
162):
163 """
164 Wrapper for np.percentile that skips missing values.
165
166 Parameters
167 ----------
168 values : np.ndarray[ndim=2] over which to find quantiles
169 qs : np.ndarray[float64] of quantile indices to find
170 na_value : scalar
171 value to return for empty or all-null values
172 mask : np.ndarray[bool]
173 locations in values that should be considered missing
174 interpolation : str
175
176 Returns
177 -------
178 quantiles : scalar or array
179 """
180
181 if values.dtype.kind in "mM":
182 # need to cast to integer to avoid rounding errors in numpy
183 result = _nanpercentile(
184 values.view("i8"),
185 qs=qs,
186 na_value=na_value.view("i8"),
187 mask=mask,
188 interpolation=interpolation,
189 )
190
191 # Note: we have to do `astype` and not view because in general we
192 # have float result at this point, not i8
193 return result.astype(values.dtype)
194
195 if mask.any():
196 # Caller is responsible for ensuring mask shape match
197 assert mask.shape == values.shape
198 result = [
199 _nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation)
200 for (val, m) in zip(list(values), list(mask))
201 ]
202 if values.dtype.kind == "f":
203 # preserve itemsize
204 result = np.asarray(result, dtype=values.dtype).T
205 else:
206 result = np.asarray(result).T
207 if (
208 result.dtype != values.dtype
209 and not mask.all()
210 and (result == result.astype(values.dtype, copy=False)).all()
211 ):
212 # mask.all() will never get cast back to int
213 # e.g. values id integer dtype and result is floating dtype,
214 # only cast back to integer dtype if result values are all-integer.
215 result = result.astype(values.dtype, copy=False)
216 return result
217 else:
218 return np.percentile(
219 values,
220 qs,
221 axis=1,
222 # error: No overload variant of "percentile" matches argument types
223 # "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]",
224 # "int", "Dict[str, str]" [call-overload]
225 method=interpolation, # type: ignore[call-overload]
226 )