1from __future__ import annotations
2
3import numbers
4from typing import (
5 TYPE_CHECKING,
6 Any,
7 Callable,
8)
9
10import numpy as np
11
12from pandas._libs import (
13 lib,
14 missing as libmissing,
15)
16from pandas.errors import AbstractMethodError
17from pandas.util._decorators import cache_readonly
18
19from pandas.core.dtypes.common import (
20 is_integer_dtype,
21 is_string_dtype,
22 pandas_dtype,
23)
24
25from pandas.core.arrays.masked import (
26 BaseMaskedArray,
27 BaseMaskedDtype,
28)
29
30if TYPE_CHECKING:
31 from collections.abc import Mapping
32
33 import pyarrow
34
35 from pandas._typing import (
36 Dtype,
37 DtypeObj,
38 Self,
39 npt,
40 )
41
42
43class NumericDtype(BaseMaskedDtype):
44 _default_np_dtype: np.dtype
45 _checker: Callable[[Any], bool] # is_foo_dtype
46
47 def __repr__(self) -> str:
48 return f"{self.name}Dtype()"
49
50 @cache_readonly
51 def is_signed_integer(self) -> bool:
52 return self.kind == "i"
53
54 @cache_readonly
55 def is_unsigned_integer(self) -> bool:
56 return self.kind == "u"
57
58 @property
59 def _is_numeric(self) -> bool:
60 return True
61
62 def __from_arrow__(
63 self, array: pyarrow.Array | pyarrow.ChunkedArray
64 ) -> BaseMaskedArray:
65 """
66 Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray.
67 """
68 import pyarrow
69
70 from pandas.core.arrays.arrow._arrow_utils import (
71 pyarrow_array_to_numpy_and_mask,
72 )
73
74 array_class = self.construct_array_type()
75
76 pyarrow_type = pyarrow.from_numpy_dtype(self.type)
77 if not array.type.equals(pyarrow_type) and not pyarrow.types.is_null(
78 array.type
79 ):
80 # test_from_arrow_type_error raise for string, but allow
81 # through itemsize conversion GH#31896
82 rt_dtype = pandas_dtype(array.type.to_pandas_dtype())
83 if rt_dtype.kind not in "iuf":
84 # Could allow "c" or potentially disallow float<->int conversion,
85 # but at the moment we specifically test that uint<->int works
86 raise TypeError(
87 f"Expected array of {self} type, got {array.type} instead"
88 )
89
90 array = array.cast(pyarrow_type)
91
92 if isinstance(array, pyarrow.ChunkedArray):
93 # TODO this "if" can be removed when requiring pyarrow >= 10.0, which fixed
94 # combine_chunks for empty arrays https://github.com/apache/arrow/pull/13757
95 if array.num_chunks == 0:
96 array = pyarrow.array([], type=array.type)
97 else:
98 array = array.combine_chunks()
99
100 data, mask = pyarrow_array_to_numpy_and_mask(array, dtype=self.numpy_dtype)
101 return array_class(data.copy(), ~mask, copy=False)
102
103 @classmethod
104 def _get_dtype_mapping(cls) -> Mapping[np.dtype, NumericDtype]:
105 raise AbstractMethodError(cls)
106
107 @classmethod
108 def _standardize_dtype(cls, dtype: NumericDtype | str | np.dtype) -> NumericDtype:
109 """
110 Convert a string representation or a numpy dtype to NumericDtype.
111 """
112 if isinstance(dtype, str) and (dtype.startswith(("Int", "UInt", "Float"))):
113 # Avoid DeprecationWarning from NumPy about np.dtype("Int64")
114 # https://github.com/numpy/numpy/pull/7476
115 dtype = dtype.lower()
116
117 if not isinstance(dtype, NumericDtype):
118 mapping = cls._get_dtype_mapping()
119 try:
120 dtype = mapping[np.dtype(dtype)]
121 except KeyError as err:
122 raise ValueError(f"invalid dtype specified {dtype}") from err
123 return dtype
124
125 @classmethod
126 def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:
127 """
128 Safely cast the values to the given dtype.
129
130 "safe" in this context means the casting is lossless.
131 """
132 raise AbstractMethodError(cls)
133
134
135def _coerce_to_data_and_mask(
136 values, dtype, copy: bool, dtype_cls: type[NumericDtype], default_dtype: np.dtype
137):
138 checker = dtype_cls._checker
139
140 mask = None
141 inferred_type = None
142
143 if dtype is None and hasattr(values, "dtype"):
144 if checker(values.dtype):
145 dtype = values.dtype
146
147 if dtype is not None:
148 dtype = dtype_cls._standardize_dtype(dtype)
149
150 cls = dtype_cls.construct_array_type()
151 if isinstance(values, cls):
152 values, mask = values._data, values._mask
153 if dtype is not None:
154 values = values.astype(dtype.numpy_dtype, copy=False)
155
156 if copy:
157 values = values.copy()
158 mask = mask.copy()
159 return values, mask, dtype, inferred_type
160
161 original = values
162 if not copy:
163 values = np.asarray(values)
164 else:
165 values = np.array(values, copy=copy)
166 inferred_type = None
167 if values.dtype == object or is_string_dtype(values.dtype):
168 inferred_type = lib.infer_dtype(values, skipna=True)
169 if inferred_type == "boolean" and dtype is None:
170 name = dtype_cls.__name__.strip("_")
171 raise TypeError(f"{values.dtype} cannot be converted to {name}")
172
173 elif values.dtype.kind == "b" and checker(dtype):
174 if not copy:
175 values = np.asarray(values, dtype=default_dtype)
176 else:
177 values = np.array(values, dtype=default_dtype, copy=copy)
178
179 elif values.dtype.kind not in "iuf":
180 name = dtype_cls.__name__.strip("_")
181 raise TypeError(f"{values.dtype} cannot be converted to {name}")
182
183 if values.ndim != 1:
184 raise TypeError("values must be a 1D list-like")
185
186 if mask is None:
187 if values.dtype.kind in "iu":
188 # fastpath
189 mask = np.zeros(len(values), dtype=np.bool_)
190 else:
191 mask = libmissing.is_numeric_na(values)
192 else:
193 assert len(mask) == len(values)
194
195 if mask.ndim != 1:
196 raise TypeError("mask must be a 1D list-like")
197
198 # infer dtype if needed
199 if dtype is None:
200 dtype = default_dtype
201 else:
202 dtype = dtype.numpy_dtype
203
204 if is_integer_dtype(dtype) and values.dtype.kind == "f" and len(values) > 0:
205 if mask.all():
206 values = np.ones(values.shape, dtype=dtype)
207 else:
208 idx = np.nanargmax(values)
209 if int(values[idx]) != original[idx]:
210 # We have ints that lost precision during the cast.
211 inferred_type = lib.infer_dtype(original, skipna=True)
212 if (
213 inferred_type not in ["floating", "mixed-integer-float"]
214 and not mask.any()
215 ):
216 values = np.asarray(original, dtype=dtype)
217 else:
218 values = np.asarray(original, dtype="object")
219
220 # we copy as need to coerce here
221 if mask.any():
222 values = values.copy()
223 values[mask] = cls._internal_fill_value
224 if inferred_type in ("string", "unicode"):
225 # casts from str are always safe since they raise
226 # a ValueError if the str cannot be parsed into a float
227 values = values.astype(dtype, copy=copy)
228 else:
229 values = dtype_cls._safe_cast(values, dtype, copy=False)
230
231 return values, mask, dtype, inferred_type
232
233
234class NumericArray(BaseMaskedArray):
235 """
236 Base class for IntegerArray and FloatingArray.
237 """
238
239 _dtype_cls: type[NumericDtype]
240
241 def __init__(
242 self, values: np.ndarray, mask: npt.NDArray[np.bool_], copy: bool = False
243 ) -> None:
244 checker = self._dtype_cls._checker
245 if not (isinstance(values, np.ndarray) and checker(values.dtype)):
246 descr = (
247 "floating"
248 if self._dtype_cls.kind == "f" # type: ignore[comparison-overlap]
249 else "integer"
250 )
251 raise TypeError(
252 f"values should be {descr} numpy array. Use "
253 "the 'pd.array' function instead"
254 )
255 if values.dtype == np.float16:
256 # If we don't raise here, then accessing self.dtype would raise
257 raise TypeError("FloatingArray does not support np.float16 dtype.")
258
259 super().__init__(values, mask, copy=copy)
260
261 @cache_readonly
262 def dtype(self) -> NumericDtype:
263 mapping = self._dtype_cls._get_dtype_mapping()
264 return mapping[self._data.dtype]
265
266 @classmethod
267 def _coerce_to_array(
268 cls, value, *, dtype: DtypeObj, copy: bool = False
269 ) -> tuple[np.ndarray, np.ndarray]:
270 dtype_cls = cls._dtype_cls
271 default_dtype = dtype_cls._default_np_dtype
272 values, mask, _, _ = _coerce_to_data_and_mask(
273 value, dtype, copy, dtype_cls, default_dtype
274 )
275 return values, mask
276
277 @classmethod
278 def _from_sequence_of_strings(
279 cls, strings, *, dtype: Dtype | None = None, copy: bool = False
280 ) -> Self:
281 from pandas.core.tools.numeric import to_numeric
282
283 scalars = to_numeric(strings, errors="raise", dtype_backend="numpy_nullable")
284 return cls._from_sequence(scalars, dtype=dtype, copy=copy)
285
286 _HANDLED_TYPES = (np.ndarray, numbers.Number)