1from __future__ import annotations
2
3import numpy as np
4
5from pandas._libs import lib
6from pandas._libs.tslibs import (
7 get_unit_from_dtype,
8 is_supported_unit,
9)
10from pandas._typing import (
11 AxisInt,
12 Dtype,
13 NpDtype,
14 Scalar,
15 npt,
16)
17from pandas.compat.numpy import function as nv
18
19from pandas.core.dtypes.astype import astype_array
20from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
21from pandas.core.dtypes.common import (
22 is_dtype_equal,
23 pandas_dtype,
24)
25from pandas.core.dtypes.dtypes import PandasDtype
26from pandas.core.dtypes.missing import isna
27
28from pandas.core import (
29 arraylike,
30 nanops,
31 ops,
32)
33from pandas.core.arraylike import OpsMixin
34from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
35from pandas.core.construction import ensure_wrapped_if_datetimelike
36from pandas.core.strings.object_array import ObjectStringArrayMixin
37
38
39class PandasArray(
40 OpsMixin,
41 NDArrayBackedExtensionArray,
42 ObjectStringArrayMixin,
43):
44 """
45 A pandas ExtensionArray for NumPy data.
46
47 This is mostly for internal compatibility, and is not especially
48 useful on its own.
49
50 Parameters
51 ----------
52 values : ndarray
53 The NumPy ndarray to wrap. Must be 1-dimensional.
54 copy : bool, default False
55 Whether to copy `values`.
56
57 Attributes
58 ----------
59 None
60
61 Methods
62 -------
63 None
64 """
65
66 # If you're wondering why pd.Series(cls) doesn't put the array in an
67 # ExtensionBlock, search for `ABCPandasArray`. We check for
68 # that _typ to ensure that users don't unnecessarily use EAs inside
69 # pandas internals, which turns off things like block consolidation.
70 _typ = "npy_extension"
71 __array_priority__ = 1000
72 _ndarray: np.ndarray
73 _dtype: PandasDtype
74 _internal_fill_value = np.nan
75
76 # ------------------------------------------------------------------------
77 # Constructors
78
79 def __init__(self, values: np.ndarray | PandasArray, copy: bool = False) -> None:
80 if isinstance(values, type(self)):
81 values = values._ndarray
82 if not isinstance(values, np.ndarray):
83 raise ValueError(
84 f"'values' must be a NumPy array, not {type(values).__name__}"
85 )
86
87 if values.ndim == 0:
88 # Technically we support 2, but do not advertise that fact.
89 raise ValueError("PandasArray must be 1-dimensional.")
90
91 if copy:
92 values = values.copy()
93
94 dtype = PandasDtype(values.dtype)
95 super().__init__(values, dtype)
96
97 @classmethod
98 def _from_sequence(
99 cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
100 ) -> PandasArray:
101 if isinstance(dtype, PandasDtype):
102 dtype = dtype._dtype
103
104 # error: Argument "dtype" to "asarray" has incompatible type
105 # "Union[ExtensionDtype, str, dtype[Any], dtype[floating[_64Bit]], Type[object],
106 # None]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,
107 # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],
108 # _DTypeDict, Tuple[Any, Any]]]"
109 result = np.asarray(scalars, dtype=dtype) # type: ignore[arg-type]
110 if (
111 result.ndim > 1
112 and not hasattr(scalars, "dtype")
113 and (dtype is None or dtype == object)
114 ):
115 # e.g. list-of-tuples
116 result = construct_1d_object_array_from_listlike(scalars)
117
118 if copy and result is scalars:
119 result = result.copy()
120 return cls(result)
121
122 def _from_backing_data(self, arr: np.ndarray) -> PandasArray:
123 return type(self)(arr)
124
125 # ------------------------------------------------------------------------
126 # Data
127
128 @property
129 def dtype(self) -> PandasDtype:
130 return self._dtype
131
132 # ------------------------------------------------------------------------
133 # NumPy Array Interface
134
135 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:
136 return np.asarray(self._ndarray, dtype=dtype)
137
138 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
139 # Lightly modified version of
140 # https://numpy.org/doc/stable/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html
141 # The primary modification is not boxing scalar return values
142 # in PandasArray, since pandas' ExtensionArrays are 1-d.
143 out = kwargs.get("out", ())
144
145 result = ops.maybe_dispatch_ufunc_to_dunder_op(
146 self, ufunc, method, *inputs, **kwargs
147 )
148 if result is not NotImplemented:
149 return result
150
151 if "out" in kwargs:
152 # e.g. test_ufunc_unary
153 return arraylike.dispatch_ufunc_with_out(
154 self, ufunc, method, *inputs, **kwargs
155 )
156
157 if method == "reduce":
158 result = arraylike.dispatch_reduction_ufunc(
159 self, ufunc, method, *inputs, **kwargs
160 )
161 if result is not NotImplemented:
162 # e.g. tests.series.test_ufunc.TestNumpyReductions
163 return result
164
165 # Defer to the implementation of the ufunc on unwrapped values.
166 inputs = tuple(x._ndarray if isinstance(x, PandasArray) else x for x in inputs)
167 if out:
168 kwargs["out"] = tuple(
169 x._ndarray if isinstance(x, PandasArray) else x for x in out
170 )
171 result = getattr(ufunc, method)(*inputs, **kwargs)
172
173 if ufunc.nout > 1:
174 # multiple return values; re-box array-like results
175 return tuple(type(self)(x) for x in result)
176 elif method == "at":
177 # no return value
178 return None
179 elif method == "reduce":
180 if isinstance(result, np.ndarray):
181 # e.g. test_np_reduce_2d
182 return type(self)(result)
183
184 # e.g. test_np_max_nested_tuples
185 return result
186 else:
187 # one return value; re-box array-like results
188 return type(self)(result)
189
190 # ------------------------------------------------------------------------
191 # Pandas ExtensionArray Interface
192
193 def astype(self, dtype, copy: bool = True):
194 dtype = pandas_dtype(dtype)
195
196 if is_dtype_equal(dtype, self.dtype):
197 if copy:
198 return self.copy()
199 return self
200
201 result = astype_array(self._ndarray, dtype=dtype, copy=copy)
202 return result
203
204 def isna(self) -> np.ndarray:
205 return isna(self._ndarray)
206
207 def _validate_scalar(self, fill_value):
208 if fill_value is None:
209 # Primarily for subclasses
210 fill_value = self.dtype.na_value
211 return fill_value
212
213 def _values_for_factorize(self) -> tuple[np.ndarray, float | None]:
214 if self.dtype.kind in ["i", "u", "b"]:
215 fv = None
216 else:
217 fv = np.nan
218 return self._ndarray, fv
219
220 # ------------------------------------------------------------------------
221 # Reductions
222
223 def any(
224 self,
225 *,
226 axis: AxisInt | None = None,
227 out=None,
228 keepdims: bool = False,
229 skipna: bool = True,
230 ):
231 nv.validate_any((), {"out": out, "keepdims": keepdims})
232 result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna)
233 return self._wrap_reduction_result(axis, result)
234
235 def all(
236 self,
237 *,
238 axis: AxisInt | None = None,
239 out=None,
240 keepdims: bool = False,
241 skipna: bool = True,
242 ):
243 nv.validate_all((), {"out": out, "keepdims": keepdims})
244 result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna)
245 return self._wrap_reduction_result(axis, result)
246
247 def min(
248 self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs
249 ) -> Scalar:
250 nv.validate_min((), kwargs)
251 result = nanops.nanmin(
252 values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna
253 )
254 return self._wrap_reduction_result(axis, result)
255
256 def max(
257 self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs
258 ) -> Scalar:
259 nv.validate_max((), kwargs)
260 result = nanops.nanmax(
261 values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna
262 )
263 return self._wrap_reduction_result(axis, result)
264
265 def sum(
266 self,
267 *,
268 axis: AxisInt | None = None,
269 skipna: bool = True,
270 min_count: int = 0,
271 **kwargs,
272 ) -> Scalar:
273 nv.validate_sum((), kwargs)
274 result = nanops.nansum(
275 self._ndarray, axis=axis, skipna=skipna, min_count=min_count
276 )
277 return self._wrap_reduction_result(axis, result)
278
279 def prod(
280 self,
281 *,
282 axis: AxisInt | None = None,
283 skipna: bool = True,
284 min_count: int = 0,
285 **kwargs,
286 ) -> Scalar:
287 nv.validate_prod((), kwargs)
288 result = nanops.nanprod(
289 self._ndarray, axis=axis, skipna=skipna, min_count=min_count
290 )
291 return self._wrap_reduction_result(axis, result)
292
293 def mean(
294 self,
295 *,
296 axis: AxisInt | None = None,
297 dtype: NpDtype | None = None,
298 out=None,
299 keepdims: bool = False,
300 skipna: bool = True,
301 ):
302 nv.validate_mean((), {"dtype": dtype, "out": out, "keepdims": keepdims})
303 result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna)
304 return self._wrap_reduction_result(axis, result)
305
306 def median(
307 self,
308 *,
309 axis: AxisInt | None = None,
310 out=None,
311 overwrite_input: bool = False,
312 keepdims: bool = False,
313 skipna: bool = True,
314 ):
315 nv.validate_median(
316 (), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims}
317 )
318 result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
319 return self._wrap_reduction_result(axis, result)
320
321 def std(
322 self,
323 *,
324 axis: AxisInt | None = None,
325 dtype: NpDtype | None = None,
326 out=None,
327 ddof: int = 1,
328 keepdims: bool = False,
329 skipna: bool = True,
330 ):
331 nv.validate_stat_ddof_func(
332 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std"
333 )
334 result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
335 return self._wrap_reduction_result(axis, result)
336
337 def var(
338 self,
339 *,
340 axis: AxisInt | None = None,
341 dtype: NpDtype | None = None,
342 out=None,
343 ddof: int = 1,
344 keepdims: bool = False,
345 skipna: bool = True,
346 ):
347 nv.validate_stat_ddof_func(
348 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="var"
349 )
350 result = nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
351 return self._wrap_reduction_result(axis, result)
352
353 def sem(
354 self,
355 *,
356 axis: AxisInt | None = None,
357 dtype: NpDtype | None = None,
358 out=None,
359 ddof: int = 1,
360 keepdims: bool = False,
361 skipna: bool = True,
362 ):
363 nv.validate_stat_ddof_func(
364 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="sem"
365 )
366 result = nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
367 return self._wrap_reduction_result(axis, result)
368
369 def kurt(
370 self,
371 *,
372 axis: AxisInt | None = None,
373 dtype: NpDtype | None = None,
374 out=None,
375 keepdims: bool = False,
376 skipna: bool = True,
377 ):
378 nv.validate_stat_ddof_func(
379 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="kurt"
380 )
381 result = nanops.nankurt(self._ndarray, axis=axis, skipna=skipna)
382 return self._wrap_reduction_result(axis, result)
383
384 def skew(
385 self,
386 *,
387 axis: AxisInt | None = None,
388 dtype: NpDtype | None = None,
389 out=None,
390 keepdims: bool = False,
391 skipna: bool = True,
392 ):
393 nv.validate_stat_ddof_func(
394 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="skew"
395 )
396 result = nanops.nanskew(self._ndarray, axis=axis, skipna=skipna)
397 return self._wrap_reduction_result(axis, result)
398
399 # ------------------------------------------------------------------------
400 # Additional Methods
401
402 def to_numpy(
403 self,
404 dtype: npt.DTypeLike | None = None,
405 copy: bool = False,
406 na_value: object = lib.no_default,
407 ) -> np.ndarray:
408 mask = self.isna()
409 if na_value is not lib.no_default and mask.any():
410 result = self._ndarray.copy()
411 result[mask] = na_value
412 else:
413 result = self._ndarray
414
415 result = np.asarray(result, dtype=dtype)
416
417 if copy and result is self._ndarray:
418 result = result.copy()
419
420 return result
421
422 # ------------------------------------------------------------------------
423 # Ops
424
425 def __invert__(self) -> PandasArray:
426 return type(self)(~self._ndarray)
427
428 def __neg__(self) -> PandasArray:
429 return type(self)(-self._ndarray)
430
431 def __pos__(self) -> PandasArray:
432 return type(self)(+self._ndarray)
433
434 def __abs__(self) -> PandasArray:
435 return type(self)(abs(self._ndarray))
436
437 def _cmp_method(self, other, op):
438 if isinstance(other, PandasArray):
439 other = other._ndarray
440
441 other = ops.maybe_prepare_scalar_for_op(other, (len(self),))
442 pd_op = ops.get_array_op(op)
443 other = ensure_wrapped_if_datetimelike(other)
444 with np.errstate(all="ignore"):
445 result = pd_op(self._ndarray, other)
446
447 if op is divmod or op is ops.rdivmod:
448 a, b = result
449 if isinstance(a, np.ndarray):
450 # for e.g. op vs TimedeltaArray, we may already
451 # have an ExtensionArray, in which case we do not wrap
452 return self._wrap_ndarray_result(a), self._wrap_ndarray_result(b)
453 return a, b
454
455 if isinstance(result, np.ndarray):
456 # for e.g. multiplication vs TimedeltaArray, we may already
457 # have an ExtensionArray, in which case we do not wrap
458 return self._wrap_ndarray_result(result)
459 return result
460
461 _arith_method = _cmp_method
462
463 def _wrap_ndarray_result(self, result: np.ndarray):
464 # If we have timedelta64[ns] result, return a TimedeltaArray instead
465 # of a PandasArray
466 if result.dtype.kind == "m" and is_supported_unit(
467 get_unit_from_dtype(result.dtype)
468 ):
469 from pandas.core.arrays import TimedeltaArray
470
471 return TimedeltaArray._simple_new(result, dtype=result.dtype)
472 return type(self)(result)
473
474 # ------------------------------------------------------------------------
475 # String methods interface
476 _str_na_value = np.nan