Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/array_algos/quantile.py: 19%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

47 statements  

1from __future__ import annotations 

2 

3from typing import TYPE_CHECKING 

4 

5import numpy as np 

6 

7from pandas.core.dtypes.missing import ( 

8 isna, 

9 na_value_for_dtype, 

10) 

11 

12if TYPE_CHECKING: 

13 from pandas._typing import ( 

14 ArrayLike, 

15 Scalar, 

16 npt, 

17 ) 

18 

19 

20def quantile_compat( 

21 values: ArrayLike, qs: npt.NDArray[np.float64], interpolation: str 

22) -> ArrayLike: 

23 """ 

24 Compute the quantiles of the given values for each quantile in `qs`. 

25 

26 Parameters 

27 ---------- 

28 values : np.ndarray or ExtensionArray 

29 qs : np.ndarray[float64] 

30 interpolation : str 

31 

32 Returns 

33 ------- 

34 np.ndarray or ExtensionArray 

35 """ 

36 if isinstance(values, np.ndarray): 

37 fill_value = na_value_for_dtype(values.dtype, compat=False) 

38 mask = isna(values) 

39 return quantile_with_mask(values, mask, fill_value, qs, interpolation) 

40 else: 

41 return values._quantile(qs, interpolation) 

42 

43 

44def quantile_with_mask( 

45 values: np.ndarray, 

46 mask: npt.NDArray[np.bool_], 

47 fill_value, 

48 qs: npt.NDArray[np.float64], 

49 interpolation: str, 

50) -> np.ndarray: 

51 """ 

52 Compute the quantiles of the given values for each quantile in `qs`. 

53 

54 Parameters 

55 ---------- 

56 values : np.ndarray 

57 For ExtensionArray, this is _values_for_factorize()[0] 

58 mask : np.ndarray[bool] 

59 mask = isna(values) 

60 For ExtensionArray, this is computed before calling _value_for_factorize 

61 fill_value : Scalar 

62 The value to interpret fill NA entries with 

63 For ExtensionArray, this is _values_for_factorize()[1] 

64 qs : np.ndarray[float64] 

65 interpolation : str 

66 Type of interpolation 

67 

68 Returns 

69 ------- 

70 np.ndarray 

71 

72 Notes 

73 ----- 

74 Assumes values is already 2D. For ExtensionArray this means np.atleast_2d 

75 has been called on _values_for_factorize()[0] 

76 

77 Quantile is computed along axis=1. 

78 """ 

79 assert values.shape == mask.shape 

80 if values.ndim == 1: 

81 # unsqueeze, operate, re-squeeze 

82 values = np.atleast_2d(values) 

83 mask = np.atleast_2d(mask) 

84 res_values = quantile_with_mask(values, mask, fill_value, qs, interpolation) 

85 return res_values[0] 

86 

87 assert values.ndim == 2 

88 

89 is_empty = values.shape[1] == 0 

90 

91 if is_empty: 

92 # create the array of na_values 

93 # 2d len(values) * len(qs) 

94 flat = np.array([fill_value] * len(qs)) 

95 result = np.repeat(flat, len(values)).reshape(len(values), len(qs)) 

96 else: 

97 result = _nanpercentile( 

98 values, 

99 qs * 100.0, 

100 na_value=fill_value, 

101 mask=mask, 

102 interpolation=interpolation, 

103 ) 

104 

105 result = np.asarray(result) 

106 result = result.T 

107 

108 return result 

109 

110 

111def _nanpercentile_1d( 

112 values: np.ndarray, 

113 mask: npt.NDArray[np.bool_], 

114 qs: npt.NDArray[np.float64], 

115 na_value: Scalar, 

116 interpolation: str, 

117) -> Scalar | np.ndarray: 

118 """ 

119 Wrapper for np.percentile that skips missing values, specialized to 

120 1-dimensional case. 

121 

122 Parameters 

123 ---------- 

124 values : array over which to find quantiles 

125 mask : ndarray[bool] 

126 locations in values that should be considered missing 

127 qs : np.ndarray[float64] of quantile indices to find 

128 na_value : scalar 

129 value to return for empty or all-null values 

130 interpolation : str 

131 

132 Returns 

133 ------- 

134 quantiles : scalar or array 

135 """ 

136 # mask is Union[ExtensionArray, ndarray] 

137 values = values[~mask] 

138 

139 if len(values) == 0: 

140 # Can't pass dtype=values.dtype here bc we might have na_value=np.nan 

141 # with values.dtype=int64 see test_quantile_empty 

142 # equiv: 'np.array([na_value] * len(qs))' but much faster 

143 return np.full(len(qs), na_value) 

144 

145 return np.percentile( 

146 values, 

147 qs, 

148 # error: No overload variant of "percentile" matches argument 

149 # types "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]" 

150 # , "Dict[str, str]" [call-overload] 

151 method=interpolation, # type: ignore[call-overload] 

152 ) 

153 

154 

155def _nanpercentile( 

156 values: np.ndarray, 

157 qs: npt.NDArray[np.float64], 

158 *, 

159 na_value, 

160 mask: npt.NDArray[np.bool_], 

161 interpolation: str, 

162): 

163 """ 

164 Wrapper for np.percentile that skips missing values. 

165 

166 Parameters 

167 ---------- 

168 values : np.ndarray[ndim=2] over which to find quantiles 

169 qs : np.ndarray[float64] of quantile indices to find 

170 na_value : scalar 

171 value to return for empty or all-null values 

172 mask : np.ndarray[bool] 

173 locations in values that should be considered missing 

174 interpolation : str 

175 

176 Returns 

177 ------- 

178 quantiles : scalar or array 

179 """ 

180 

181 if values.dtype.kind in "mM": 

182 # need to cast to integer to avoid rounding errors in numpy 

183 result = _nanpercentile( 

184 values.view("i8"), 

185 qs=qs, 

186 na_value=na_value.view("i8"), 

187 mask=mask, 

188 interpolation=interpolation, 

189 ) 

190 

191 # Note: we have to do `astype` and not view because in general we 

192 # have float result at this point, not i8 

193 return result.astype(values.dtype) 

194 

195 if mask.any(): 

196 # Caller is responsible for ensuring mask shape match 

197 assert mask.shape == values.shape 

198 result = [ 

199 _nanpercentile_1d(val, m, qs, na_value, interpolation=interpolation) 

200 for (val, m) in zip(list(values), list(mask)) 

201 ] 

202 if values.dtype.kind == "f": 

203 # preserve itemsize 

204 result = np.asarray(result, dtype=values.dtype).T 

205 else: 

206 result = np.asarray(result).T 

207 if ( 

208 result.dtype != values.dtype 

209 and not mask.all() 

210 and (result == result.astype(values.dtype, copy=False)).all() 

211 ): 

212 # mask.all() will never get cast back to int 

213 # e.g. values id integer dtype and result is floating dtype, 

214 # only cast back to integer dtype if result values are all-integer. 

215 result = result.astype(values.dtype, copy=False) 

216 return result 

217 else: 

218 return np.percentile( 

219 values, 

220 qs, 

221 axis=1, 

222 # error: No overload variant of "percentile" matches argument types 

223 # "ndarray[Any, Any]", "ndarray[Any, dtype[floating[_64Bit]]]", 

224 # "int", "Dict[str, str]" [call-overload] 

225 method=interpolation, # type: ignore[call-overload] 

226 )