Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/arrays/numeric.py: 26%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

149 statements  

1from __future__ import annotations 

2 

3import numbers 

4from typing import ( 

5 TYPE_CHECKING, 

6 Any, 

7 Callable, 

8) 

9 

10import numpy as np 

11 

12from pandas._libs import ( 

13 lib, 

14 missing as libmissing, 

15) 

16from pandas.errors import AbstractMethodError 

17from pandas.util._decorators import cache_readonly 

18 

19from pandas.core.dtypes.common import ( 

20 is_integer_dtype, 

21 is_string_dtype, 

22 pandas_dtype, 

23) 

24 

25from pandas.core.arrays.masked import ( 

26 BaseMaskedArray, 

27 BaseMaskedDtype, 

28) 

29 

30if TYPE_CHECKING: 

31 from collections.abc import Mapping 

32 

33 import pyarrow 

34 

35 from pandas._typing import ( 

36 Dtype, 

37 DtypeObj, 

38 Self, 

39 npt, 

40 ) 

41 

42 

43class NumericDtype(BaseMaskedDtype): 

44 _default_np_dtype: np.dtype 

45 _checker: Callable[[Any], bool] # is_foo_dtype 

46 

47 def __repr__(self) -> str: 

48 return f"{self.name}Dtype()" 

49 

50 @cache_readonly 

51 def is_signed_integer(self) -> bool: 

52 return self.kind == "i" 

53 

54 @cache_readonly 

55 def is_unsigned_integer(self) -> bool: 

56 return self.kind == "u" 

57 

58 @property 

59 def _is_numeric(self) -> bool: 

60 return True 

61 

62 def __from_arrow__( 

63 self, array: pyarrow.Array | pyarrow.ChunkedArray 

64 ) -> BaseMaskedArray: 

65 """ 

66 Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray. 

67 """ 

68 import pyarrow 

69 

70 from pandas.core.arrays.arrow._arrow_utils import ( 

71 pyarrow_array_to_numpy_and_mask, 

72 ) 

73 

74 array_class = self.construct_array_type() 

75 

76 pyarrow_type = pyarrow.from_numpy_dtype(self.type) 

77 if not array.type.equals(pyarrow_type) and not pyarrow.types.is_null( 

78 array.type 

79 ): 

80 # test_from_arrow_type_error raise for string, but allow 

81 # through itemsize conversion GH#31896 

82 rt_dtype = pandas_dtype(array.type.to_pandas_dtype()) 

83 if rt_dtype.kind not in "iuf": 

84 # Could allow "c" or potentially disallow float<->int conversion, 

85 # but at the moment we specifically test that uint<->int works 

86 raise TypeError( 

87 f"Expected array of {self} type, got {array.type} instead" 

88 ) 

89 

90 array = array.cast(pyarrow_type) 

91 

92 if isinstance(array, pyarrow.ChunkedArray): 

93 # TODO this "if" can be removed when requiring pyarrow >= 10.0, which fixed 

94 # combine_chunks for empty arrays https://github.com/apache/arrow/pull/13757 

95 if array.num_chunks == 0: 

96 array = pyarrow.array([], type=array.type) 

97 else: 

98 array = array.combine_chunks() 

99 

100 data, mask = pyarrow_array_to_numpy_and_mask(array, dtype=self.numpy_dtype) 

101 return array_class(data.copy(), ~mask, copy=False) 

102 

103 @classmethod 

104 def _get_dtype_mapping(cls) -> Mapping[np.dtype, NumericDtype]: 

105 raise AbstractMethodError(cls) 

106 

107 @classmethod 

108 def _standardize_dtype(cls, dtype: NumericDtype | str | np.dtype) -> NumericDtype: 

109 """ 

110 Convert a string representation or a numpy dtype to NumericDtype. 

111 """ 

112 if isinstance(dtype, str) and (dtype.startswith(("Int", "UInt", "Float"))): 

113 # Avoid DeprecationWarning from NumPy about np.dtype("Int64") 

114 # https://github.com/numpy/numpy/pull/7476 

115 dtype = dtype.lower() 

116 

117 if not isinstance(dtype, NumericDtype): 

118 mapping = cls._get_dtype_mapping() 

119 try: 

120 dtype = mapping[np.dtype(dtype)] 

121 except KeyError as err: 

122 raise ValueError(f"invalid dtype specified {dtype}") from err 

123 return dtype 

124 

125 @classmethod 

126 def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray: 

127 """ 

128 Safely cast the values to the given dtype. 

129 

130 "safe" in this context means the casting is lossless. 

131 """ 

132 raise AbstractMethodError(cls) 

133 

134 

135def _coerce_to_data_and_mask( 

136 values, dtype, copy: bool, dtype_cls: type[NumericDtype], default_dtype: np.dtype 

137): 

138 checker = dtype_cls._checker 

139 

140 mask = None 

141 inferred_type = None 

142 

143 if dtype is None and hasattr(values, "dtype"): 

144 if checker(values.dtype): 

145 dtype = values.dtype 

146 

147 if dtype is not None: 

148 dtype = dtype_cls._standardize_dtype(dtype) 

149 

150 cls = dtype_cls.construct_array_type() 

151 if isinstance(values, cls): 

152 values, mask = values._data, values._mask 

153 if dtype is not None: 

154 values = values.astype(dtype.numpy_dtype, copy=False) 

155 

156 if copy: 

157 values = values.copy() 

158 mask = mask.copy() 

159 return values, mask, dtype, inferred_type 

160 

161 original = values 

162 if not copy: 

163 values = np.asarray(values) 

164 else: 

165 values = np.array(values, copy=copy) 

166 inferred_type = None 

167 if values.dtype == object or is_string_dtype(values.dtype): 

168 inferred_type = lib.infer_dtype(values, skipna=True) 

169 if inferred_type == "boolean" and dtype is None: 

170 name = dtype_cls.__name__.strip("_") 

171 raise TypeError(f"{values.dtype} cannot be converted to {name}") 

172 

173 elif values.dtype.kind == "b" and checker(dtype): 

174 if not copy: 

175 values = np.asarray(values, dtype=default_dtype) 

176 else: 

177 values = np.array(values, dtype=default_dtype, copy=copy) 

178 

179 elif values.dtype.kind not in "iuf": 

180 name = dtype_cls.__name__.strip("_") 

181 raise TypeError(f"{values.dtype} cannot be converted to {name}") 

182 

183 if values.ndim != 1: 

184 raise TypeError("values must be a 1D list-like") 

185 

186 if mask is None: 

187 if values.dtype.kind in "iu": 

188 # fastpath 

189 mask = np.zeros(len(values), dtype=np.bool_) 

190 else: 

191 mask = libmissing.is_numeric_na(values) 

192 else: 

193 assert len(mask) == len(values) 

194 

195 if mask.ndim != 1: 

196 raise TypeError("mask must be a 1D list-like") 

197 

198 # infer dtype if needed 

199 if dtype is None: 

200 dtype = default_dtype 

201 else: 

202 dtype = dtype.numpy_dtype 

203 

204 if is_integer_dtype(dtype) and values.dtype.kind == "f" and len(values) > 0: 

205 if mask.all(): 

206 values = np.ones(values.shape, dtype=dtype) 

207 else: 

208 idx = np.nanargmax(values) 

209 if int(values[idx]) != original[idx]: 

210 # We have ints that lost precision during the cast. 

211 inferred_type = lib.infer_dtype(original, skipna=True) 

212 if ( 

213 inferred_type not in ["floating", "mixed-integer-float"] 

214 and not mask.any() 

215 ): 

216 values = np.asarray(original, dtype=dtype) 

217 else: 

218 values = np.asarray(original, dtype="object") 

219 

220 # we copy as need to coerce here 

221 if mask.any(): 

222 values = values.copy() 

223 values[mask] = cls._internal_fill_value 

224 if inferred_type in ("string", "unicode"): 

225 # casts from str are always safe since they raise 

226 # a ValueError if the str cannot be parsed into a float 

227 values = values.astype(dtype, copy=copy) 

228 else: 

229 values = dtype_cls._safe_cast(values, dtype, copy=False) 

230 

231 return values, mask, dtype, inferred_type 

232 

233 

234class NumericArray(BaseMaskedArray): 

235 """ 

236 Base class for IntegerArray and FloatingArray. 

237 """ 

238 

239 _dtype_cls: type[NumericDtype] 

240 

241 def __init__( 

242 self, values: np.ndarray, mask: npt.NDArray[np.bool_], copy: bool = False 

243 ) -> None: 

244 checker = self._dtype_cls._checker 

245 if not (isinstance(values, np.ndarray) and checker(values.dtype)): 

246 descr = ( 

247 "floating" 

248 if self._dtype_cls.kind == "f" # type: ignore[comparison-overlap] 

249 else "integer" 

250 ) 

251 raise TypeError( 

252 f"values should be {descr} numpy array. Use " 

253 "the 'pd.array' function instead" 

254 ) 

255 if values.dtype == np.float16: 

256 # If we don't raise here, then accessing self.dtype would raise 

257 raise TypeError("FloatingArray does not support np.float16 dtype.") 

258 

259 super().__init__(values, mask, copy=copy) 

260 

261 @cache_readonly 

262 def dtype(self) -> NumericDtype: 

263 mapping = self._dtype_cls._get_dtype_mapping() 

264 return mapping[self._data.dtype] 

265 

266 @classmethod 

267 def _coerce_to_array( 

268 cls, value, *, dtype: DtypeObj, copy: bool = False 

269 ) -> tuple[np.ndarray, np.ndarray]: 

270 dtype_cls = cls._dtype_cls 

271 default_dtype = dtype_cls._default_np_dtype 

272 values, mask, _, _ = _coerce_to_data_and_mask( 

273 value, dtype, copy, dtype_cls, default_dtype 

274 ) 

275 return values, mask 

276 

277 @classmethod 

278 def _from_sequence_of_strings( 

279 cls, strings, *, dtype: Dtype | None = None, copy: bool = False 

280 ) -> Self: 

281 from pandas.core.tools.numeric import to_numeric 

282 

283 scalars = to_numeric(strings, errors="raise", dtype_backend="numpy_nullable") 

284 return cls._from_sequence(scalars, dtype=dtype, copy=copy) 

285 

286 _HANDLED_TYPES = (np.ndarray, numbers.Number)