Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/arrays/numeric.py: 26%

1from __future__ import annotations

3import numbers

4from typing import (

5 TYPE_CHECKING,

6 Any,

7 Callable,

8 Mapping,

9 TypeVar,

10)

12import numpy as np

14from pandas._libs import (

15 lib,

16 missing as libmissing,

17)

18from pandas._typing import (

19 Dtype,

20 DtypeObj,

21 npt,

22)

23from pandas.errors import AbstractMethodError

24from pandas.util._decorators import cache_readonly

26from pandas.core.dtypes.common import (

27 is_bool_dtype,

28 is_float_dtype,

29 is_integer_dtype,

30 is_object_dtype,

31 is_string_dtype,

32 pandas_dtype,

33)

35from pandas.core.arrays.masked import (

36 BaseMaskedArray,

37 BaseMaskedDtype,

38)

40if TYPE_CHECKING:

41 import pyarrow

44T = TypeVar("T", bound="NumericArray")

47class NumericDtype(BaseMaskedDtype):

48 _default_np_dtype: np.dtype

49 _checker: Callable[[Any], bool] # is_foo_dtype

51 def __repr__(self) -> str:

52 return f"{self.name}Dtype()"

54 @cache_readonly

55 def is_signed_integer(self) -> bool:

56 return self.kind == "i"

58 @cache_readonly

59 def is_unsigned_integer(self) -> bool:

60 return self.kind == "u"

62 @property

63 def _is_numeric(self) -> bool:

64 return True

66 def __from_arrow__(

67 self, array: pyarrow.Array | pyarrow.ChunkedArray

68 ) -> BaseMaskedArray:

69 """

70 Construct IntegerArray/FloatingArray from pyarrow Array/ChunkedArray.

71 """

72 import pyarrow

74 from pandas.core.arrays.arrow._arrow_utils import (

75 pyarrow_array_to_numpy_and_mask,

76 )

78 array_class = self.construct_array_type()

80 pyarrow_type = pyarrow.from_numpy_dtype(self.type)

81 if not array.type.equals(pyarrow_type):

82 # test_from_arrow_type_error raise for string, but allow

83 # through itemsize conversion GH#31896

84 rt_dtype = pandas_dtype(array.type.to_pandas_dtype())

85 if rt_dtype.kind not in ["i", "u", "f"]:

86 # Could allow "c" or potentially disallow float<->int conversion,

87 # but at the moment we specifically test that uint<->int works

88 raise TypeError(

89 f"Expected array of {self} type, got {array.type} instead"

90 )

92 array = array.cast(pyarrow_type)

94 if isinstance(array, pyarrow.Array):

95 chunks = [array]

96 else:

97 # pyarrow.ChunkedArray

98 chunks = array.chunks

100 results = []

101 for arr in chunks:

102 data, mask = pyarrow_array_to_numpy_and_mask(arr, dtype=self.numpy_dtype)

103 num_arr = array_class(data.copy(), ~mask, copy=False)

104 results.append(num_arr)

105

106 if not results:

107 return array_class(

108 np.array([], dtype=self.numpy_dtype), np.array([], dtype=np.bool_)

109 )

110 elif len(results) == 1:

111 # avoid additional copy in _concat_same_type

112 return results[0]

113 else:

114 return array_class._concat_same_type(results)

115

116 @classmethod

117 def _str_to_dtype_mapping(cls) -> Mapping[str, NumericDtype]:

118 raise AbstractMethodError(cls)

119

120 @classmethod

121 def _standardize_dtype(cls, dtype: NumericDtype | str | np.dtype) -> NumericDtype:

122 """

123 Convert a string representation or a numpy dtype to NumericDtype.

124 """

125 if isinstance(dtype, str) and (dtype.startswith(("Int", "UInt", "Float"))):

126 # Avoid DeprecationWarning from NumPy about np.dtype("Int64")

127 # https://github.com/numpy/numpy/pull/7476

128 dtype = dtype.lower()

129

130 if not isinstance(dtype, NumericDtype):

131 mapping = cls._str_to_dtype_mapping()

132 try:

133 dtype = mapping[str(np.dtype(dtype))]

134 except KeyError as err:

135 raise ValueError(f"invalid dtype specified {dtype}") from err

136 return dtype

137

138 @classmethod

139 def _safe_cast(cls, values: np.ndarray, dtype: np.dtype, copy: bool) -> np.ndarray:

140 """

141 Safely cast the values to the given dtype.

142

143 "safe" in this context means the casting is lossless.

144 """

145 raise AbstractMethodError(cls)

146

147

148def _coerce_to_data_and_mask(values, mask, dtype, copy, dtype_cls, default_dtype):

149 checker = dtype_cls._checker

150

151 inferred_type = None

152

153 if dtype is None and hasattr(values, "dtype"):

154 if checker(values.dtype):

155 dtype = values.dtype

156

157 if dtype is not None:

158 dtype = dtype_cls._standardize_dtype(dtype)

159

160 cls = dtype_cls.construct_array_type()

161 if isinstance(values, cls):

162 values, mask = values._data, values._mask

163 if dtype is not None:

164 values = values.astype(dtype.numpy_dtype, copy=False)

165

166 if copy:

167 values = values.copy()

168 mask = mask.copy()

169 return values, mask, dtype, inferred_type

170

171 original = values

172 values = np.array(values, copy=copy)

173 inferred_type = None

174 if is_object_dtype(values.dtype) or is_string_dtype(values.dtype):

175 inferred_type = lib.infer_dtype(values, skipna=True)

176 if inferred_type == "boolean" and dtype is None:

177 name = dtype_cls.__name__.strip("_")

178 raise TypeError(f"{values.dtype} cannot be converted to {name}")

179

180 elif is_bool_dtype(values) and checker(dtype):

181 values = np.array(values, dtype=default_dtype, copy=copy)

182

183 elif not (is_integer_dtype(values) or is_float_dtype(values)):

184 name = dtype_cls.__name__.strip("_")

185 raise TypeError(f"{values.dtype} cannot be converted to {name}")

186

187 if values.ndim != 1:

188 raise TypeError("values must be a 1D list-like")

189

190 if mask is None:

191 if is_integer_dtype(values):

192 # fastpath

193 mask = np.zeros(len(values), dtype=np.bool_)

194 else:

195 mask = libmissing.is_numeric_na(values)

196 else:

197 assert len(mask) == len(values)

198

199 if mask.ndim != 1:

200 raise TypeError("mask must be a 1D list-like")

201

202 # infer dtype if needed

203 if dtype is None:

204 dtype = default_dtype

205 else:

206 dtype = dtype.type

207

208 if is_integer_dtype(dtype) and is_float_dtype(values.dtype) and len(values) > 0:

209 if mask.all():

210 values = np.ones(values.shape, dtype=dtype)

211 else:

212 idx = np.nanargmax(values)

213 if int(values[idx]) != original[idx]:

214 # We have ints that lost precision during the cast.

215 inferred_type = lib.infer_dtype(original, skipna=True)

216 if (

217 inferred_type not in ["floating", "mixed-integer-float"]

218 and not mask.any()

219 ):

220 values = np.array(original, dtype=dtype, copy=False)

221 else:

222 values = np.array(original, dtype="object", copy=False)

223

224 # we copy as need to coerce here

225 if mask.any():

226 values = values.copy()

227 values[mask] = cls._internal_fill_value

228 if inferred_type in ("string", "unicode"):

229 # casts from str are always safe since they raise

230 # a ValueError if the str cannot be parsed into a float

231 values = values.astype(dtype, copy=copy)

232 else:

233 values = dtype_cls._safe_cast(values, dtype, copy=False)

234

235 return values, mask, dtype, inferred_type

236

237

238class NumericArray(BaseMaskedArray):

239 """

240 Base class for IntegerArray and FloatingArray.

241 """

242

243 _dtype_cls: type[NumericDtype]

244

245 def __init__(

246 self, values: np.ndarray, mask: npt.NDArray[np.bool_], copy: bool = False

247 ) -> None:

248 checker = self._dtype_cls._checker

249 if not (isinstance(values, np.ndarray) and checker(values.dtype)):

250 descr = (

251 "floating"

252 if self._dtype_cls.kind == "f" # type: ignore[comparison-overlap]

253 else "integer"

254 )

255 raise TypeError(

256 f"values should be {descr} numpy array. Use "

257 "the 'pd.array' function instead"

258 )

259 if values.dtype == np.float16:

260 # If we don't raise here, then accessing self.dtype would raise

261 raise TypeError("FloatingArray does not support np.float16 dtype.")

262

263 super().__init__(values, mask, copy=copy)

264

265 @cache_readonly

266 def dtype(self) -> NumericDtype:

267 mapping = self._dtype_cls._str_to_dtype_mapping()

268 return mapping[str(self._data.dtype)]

269

270 @classmethod

271 def _coerce_to_array(

272 cls, value, *, dtype: DtypeObj, copy: bool = False

273 ) -> tuple[np.ndarray, np.ndarray]:

274 dtype_cls = cls._dtype_cls

275 default_dtype = dtype_cls._default_np_dtype

276 mask = None

277 values, mask, _, _ = _coerce_to_data_and_mask(

278 value, mask, dtype, copy, dtype_cls, default_dtype

279 )

280 return values, mask

281

282 @classmethod

283 def _from_sequence_of_strings(

284 cls: type[T], strings, *, dtype: Dtype | None = None, copy: bool = False

285 ) -> T:

286 from pandas.core.tools.numeric import to_numeric

287

288 scalars = to_numeric(strings, errors="raise", dtype_backend="numpy_nullable")

289 return cls._from_sequence(scalars, dtype=dtype, copy=copy)

290

291 _HANDLED_TYPES = (np.ndarray, numbers.Number)