Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/arrays/numeric.py: 26%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

152 statements  

1from __future__ import annotations 

2 

3import numbers 

4from typing import ( 

5 TYPE_CHECKING, 

6 Any, 

7 Callable, 

8 Mapping, 

9 TypeVar, 

10) 

11 

12import numpy as np 

13 

14from pandas._libs import ( 

15 lib, 

16 missing as libmissing, 

17) 

18from pandas._typing import ( 

19 Dtype, 

20 DtypeObj, 

21 npt, 

22) 

23from pandas.errors import AbstractMethodError 

24from pandas.util._decorators import cache_readonly 

25 

26from pandas.core.dtypes.common import ( 

27 is_bool_dtype, 

28 is_float_dtype, 

29 is_integer_dtype, 

30 is_object_dtype, 

31 is_string_dtype, 

32 pandas_dtype, 

33) 

34 

35from pandas.core.arrays.masked import ( 

36 BaseMaskedArray, 

37 BaseMaskedDtype, 

38) 

39 

40if TYPE_CHECKING: 

41 import pyarrow 

42 

43 

44T = TypeVar("T", bound="NumericArray") 

45 

46 

47class NumericDtype(BaseMaskedDtype): 

48 _default_np_dtype: np.dtype 

49 _checker: Callable[[Any], bool] # is_foo_dtype 

50 

51 def __repr__(self) -> str: 

52 return f"{self.name}Dtype()" 

53 

54 @cache_readonly 

55 def is_signed_integer(self) -> bool: 

56 return self.kind == "i" 

57 

58 @cache_readonly 

59 def is_unsigned_integer(self) -> bool: 

60 return self.kind == "u" 

61 

62 @property 

63 def _is_numeric(self) -> bool: 

64 return True 

65 

66 def __from_arrow__( 

67 self, array: pyarrow.Array | pyarrow.ChunkedArray 

68 ) -> BaseMaskedArray: 

69 """ 

70 Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray. 

71 """ 

72 import pyarrow 

73 

74 from pandas.core.arrays.arrow._arrow_utils import ( 

75 pyarrow_array_to_numpy_and_mask, 

76 ) 

77 

78 array_class = self.construct_array_type() 

79 

80 pyarrow_type = pyarrow.from_numpy_dtype(self.type) 

81 if not array.type.equals(pyarrow_type): 

82 # test_from_arrow_type_error raise for string, but allow 

83 # through itemsize conversion GH#31896 

84 rt_dtype = pandas_dtype(array.type.to_pandas_dtype()) 

85 if rt_dtype.kind not in ["i", "u", "f"]: 

86 # Could allow "c" or potentially disallow float<->int conversion, 

87 # but at the moment we specifically test that uint<->int works 

88 raise TypeError( 

89 f"Expected array of {self} type, got {array.type} instead" 

90 ) 

91 

92 array = array.cast(pyarrow_type) 

93 

94 if isinstance(array, pyarrow.Array): 

95 chunks = [array] 

96 else: 

97 # pyarrow.ChunkedArray 

98 chunks = array.chunks 

99 

100 results = [] 

101 for arr in chunks: 

102 data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=self.numpy_dtype) 

103 num_arr = array_class(data.copy(), ~mask, copy=False) 

104 results.append(num_arr) 

105 

106 if not results: 

107 return array_class( 

108 np.array([], dtype=self.numpy_dtype), np.array([], dtype=np.bool_) 

109 ) 

110 elif len(results) == 1: 

111 # avoid additional copy in _concat_same_type 

112 return results[0] 

113 else: 

114 return array_class._concat_same_type(results) 

115 

116 @classmethod 

117 def _str_to_dtype_mapping(cls) -> Mapping[str, NumericDtype]: 

118 raise AbstractMethodError(cls) 

119 

120 @classmethod 

121 def _standardize_dtype(cls, dtype: NumericDtype | str | np.dtype) -> NumericDtype: 

122 """ 

123 Convert a string representation or a numpy dtype to NumericDtype. 

124 """ 

125 if isinstance(dtype, str) and (dtype.startswith(("Int", "UInt", "Float"))): 

126 # Avoid DeprecationWarning from NumPy about np.dtype("Int64") 

127 # https://github.com/numpy/numpy/pull/7476 

128 dtype = dtype.lower() 

129 

130 if not isinstance(dtype, NumericDtype): 

131 mapping = cls._str_to_dtype_mapping() 

132 try: 

133 dtype = mapping[str(np.dtype(dtype))] 

134 except KeyError as err: 

135 raise ValueError(f"invalid dtype specified {dtype}") from err 

136 return dtype 

137 

138 @classmethod 

139 def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray: 

140 """ 

141 Safely cast the values to the given dtype. 

142 

143 "safe" in this context means the casting is lossless. 

144 """ 

145 raise AbstractMethodError(cls) 

146 

147 

148def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype): 

149 checker = dtype_cls._checker 

150 

151 inferred_type = None 

152 

153 if dtype is None and hasattr(values, "dtype"): 

154 if checker(values.dtype): 

155 dtype = values.dtype 

156 

157 if dtype is not None: 

158 dtype = dtype_cls._standardize_dtype(dtype) 

159 

160 cls = dtype_cls.construct_array_type() 

161 if isinstance(values, cls): 

162 values, mask = values._data, values._mask 

163 if dtype is not None: 

164 values = values.astype(dtype.numpy_dtype, copy=False) 

165 

166 if copy: 

167 values = values.copy() 

168 mask = mask.copy() 

169 return values, mask, dtype, inferred_type 

170 

171 original = values 

172 values = np.array(values, copy=copy) 

173 inferred_type = None 

174 if is_object_dtype(values.dtype) or is_string_dtype(values.dtype): 

175 inferred_type = lib.infer_dtype(values, skipna=True) 

176 if inferred_type == "boolean" and dtype is None: 

177 name = dtype_cls.__name__.strip("_") 

178 raise TypeError(f"{values.dtype} cannot be converted to {name}") 

179 

180 elif is_bool_dtype(values) and checker(dtype): 

181 values = np.array(values, dtype=default_dtype, copy=copy) 

182 

183 elif not (is_integer_dtype(values) or is_float_dtype(values)): 

184 name = dtype_cls.__name__.strip("_") 

185 raise TypeError(f"{values.dtype} cannot be converted to {name}") 

186 

187 if values.ndim != 1: 

188 raise TypeError("values must be a 1D list-like") 

189 

190 if mask is None: 

191 if is_integer_dtype(values): 

192 # fastpath 

193 mask = np.zeros(len(values), dtype=np.bool_) 

194 else: 

195 mask = libmissing.is_numeric_na(values) 

196 else: 

197 assert len(mask) == len(values) 

198 

199 if mask.ndim != 1: 

200 raise TypeError("mask must be a 1D list-like") 

201 

202 # infer dtype if needed 

203 if dtype is None: 

204 dtype = default_dtype 

205 else: 

206 dtype = dtype.type 

207 

208 if is_integer_dtype(dtype) and is_float_dtype(values.dtype) and len(values) > 0: 

209 if mask.all(): 

210 values = np.ones(values.shape, dtype=dtype) 

211 else: 

212 idx = np.nanargmax(values) 

213 if int(values[idx]) != original[idx]: 

214 # We have ints that lost precision during the cast. 

215 inferred_type = lib.infer_dtype(original, skipna=True) 

216 if ( 

217 inferred_type not in ["floating", "mixed-integer-float"] 

218 and not mask.any() 

219 ): 

220 values = np.array(original, dtype=dtype, copy=False) 

221 else: 

222 values = np.array(original, dtype="object", copy=False) 

223 

224 # we copy as need to coerce here 

225 if mask.any(): 

226 values = values.copy() 

227 values[mask] = cls._internal_fill_value 

228 if inferred_type in ("string", "unicode"): 

229 # casts from str are always safe since they raise 

230 # a ValueError if the str cannot be parsed into a float 

231 values = values.astype(dtype, copy=copy) 

232 else: 

233 values = dtype_cls._safe_cast(values, dtype, copy=False) 

234 

235 return values, mask, dtype, inferred_type 

236 

237 

238class NumericArray(BaseMaskedArray): 

239 """ 

240 Base class for IntegerArray and FloatingArray. 

241 """ 

242 

243 _dtype_cls: type[NumericDtype] 

244 

245 def __init__( 

246 self, values: np.ndarray, mask: npt.NDArray[np.bool_], copy: bool = False 

247 ) -> None: 

248 checker = self._dtype_cls._checker 

249 if not (isinstance(values, np.ndarray) and checker(values.dtype)): 

250 descr = ( 

251 "floating" 

252 if self._dtype_cls.kind == "f" # type: ignore[comparison-overlap] 

253 else "integer" 

254 ) 

255 raise TypeError( 

256 f"values should be {descr} numpy array. Use " 

257 "the 'pd.array' function instead" 

258 ) 

259 if values.dtype == np.float16: 

260 # If we don't raise here, then accessing self.dtype would raise 

261 raise TypeError("FloatingArray does not support np.float16 dtype.") 

262 

263 super().__init__(values, mask, copy=copy) 

264 

265 @cache_readonly 

266 def dtype(self) -> NumericDtype: 

267 mapping = self._dtype_cls._str_to_dtype_mapping() 

268 return mapping[str(self._data.dtype)] 

269 

270 @classmethod 

271 def _coerce_to_array( 

272 cls, value, *, dtype: DtypeObj, copy: bool = False 

273 ) -> tuple[np.ndarray, np.ndarray]: 

274 dtype_cls = cls._dtype_cls 

275 default_dtype = dtype_cls._default_np_dtype 

276 mask = None 

277 values, mask, _, _ = _coerce_to_data_and_mask( 

278 value, mask, dtype, copy, dtype_cls, default_dtype 

279 ) 

280 return values, mask 

281 

282 @classmethod 

283 def _from_sequence_of_strings( 

284 cls: type[T], strings, *, dtype: Dtype | None = None, copy: bool = False 

285 ) -> T: 

286 from pandas.core.tools.numeric import to_numeric 

287 

288 scalars = to_numeric(strings, errors="raise", dtype_backend="numpy_nullable") 

289 return cls._from_sequence(scalars, dtype=dtype, copy=copy) 

290 

291 _HANDLED_TYPES = (np.ndarray, numbers.Number)