1from __future__ import annotations
2
3from typing import (
4 TYPE_CHECKING,
5 Literal,
6)
7
8import numpy as np
9
10from pandas._libs import lib
11from pandas._libs.tslibs import is_supported_dtype
12from pandas.compat.numpy import function as nv
13
14from pandas.core.dtypes.astype import astype_array
15from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
16from pandas.core.dtypes.common import pandas_dtype
17from pandas.core.dtypes.dtypes import NumpyEADtype
18from pandas.core.dtypes.missing import isna
19
20from pandas.core import (
21 arraylike,
22 missing,
23 nanops,
24 ops,
25)
26from pandas.core.arraylike import OpsMixin
27from pandas.core.arrays._mixins import NDArrayBackedExtensionArray
28from pandas.core.construction import ensure_wrapped_if_datetimelike
29from pandas.core.strings.object_array import ObjectStringArrayMixin
30
31if TYPE_CHECKING:
32 from pandas._typing import (
33 AxisInt,
34 Dtype,
35 FillnaOptions,
36 InterpolateOptions,
37 NpDtype,
38 Scalar,
39 Self,
40 npt,
41 )
42
43 from pandas import Index
44
45
46# error: Definition of "_concat_same_type" in base class "NDArrayBacked" is
47# incompatible with definition in base class "ExtensionArray"
48class NumpyExtensionArray( # type: ignore[misc]
49 OpsMixin,
50 NDArrayBackedExtensionArray,
51 ObjectStringArrayMixin,
52):
53 """
54 A pandas ExtensionArray for NumPy data.
55
56 This is mostly for internal compatibility, and is not especially
57 useful on its own.
58
59 Parameters
60 ----------
61 values : ndarray
62 The NumPy ndarray to wrap. Must be 1-dimensional.
63 copy : bool, default False
64 Whether to copy `values`.
65
66 Attributes
67 ----------
68 None
69
70 Methods
71 -------
72 None
73
74 Examples
75 --------
76 >>> pd.arrays.NumpyExtensionArray(np.array([0, 1, 2, 3]))
77 <NumpyExtensionArray>
78 [0, 1, 2, 3]
79 Length: 4, dtype: int64
80 """
81
82 # If you're wondering why pd.Series(cls) doesn't put the array in an
83 # ExtensionBlock, search for `ABCNumpyExtensionArray`. We check for
84 # that _typ to ensure that users don't unnecessarily use EAs inside
85 # pandas internals, which turns off things like block consolidation.
86 _typ = "npy_extension"
87 __array_priority__ = 1000
88 _ndarray: np.ndarray
89 _dtype: NumpyEADtype
90 _internal_fill_value = np.nan
91
92 # ------------------------------------------------------------------------
93 # Constructors
94
95 def __init__(
96 self, values: np.ndarray | NumpyExtensionArray, copy: bool = False
97 ) -> None:
98 if isinstance(values, type(self)):
99 values = values._ndarray
100 if not isinstance(values, np.ndarray):
101 raise ValueError(
102 f"'values' must be a NumPy array, not {type(values).__name__}"
103 )
104
105 if values.ndim == 0:
106 # Technically we support 2, but do not advertise that fact.
107 raise ValueError("NumpyExtensionArray must be 1-dimensional.")
108
109 if copy:
110 values = values.copy()
111
112 dtype = NumpyEADtype(values.dtype)
113 super().__init__(values, dtype)
114
115 @classmethod
116 def _from_sequence(
117 cls, scalars, *, dtype: Dtype | None = None, copy: bool = False
118 ) -> NumpyExtensionArray:
119 if isinstance(dtype, NumpyEADtype):
120 dtype = dtype._dtype
121
122 # error: Argument "dtype" to "asarray" has incompatible type
123 # "Union[ExtensionDtype, str, dtype[Any], dtype[floating[_64Bit]], Type[object],
124 # None]"; expected "Union[dtype[Any], None, type, _SupportsDType, str,
125 # Union[Tuple[Any, int], Tuple[Any, Union[int, Sequence[int]]], List[Any],
126 # _DTypeDict, Tuple[Any, Any]]]"
127 result = np.asarray(scalars, dtype=dtype) # type: ignore[arg-type]
128 if (
129 result.ndim > 1
130 and not hasattr(scalars, "dtype")
131 and (dtype is None or dtype == object)
132 ):
133 # e.g. list-of-tuples
134 result = construct_1d_object_array_from_listlike(scalars)
135
136 if copy and result is scalars:
137 result = result.copy()
138 return cls(result)
139
140 def _from_backing_data(self, arr: np.ndarray) -> NumpyExtensionArray:
141 return type(self)(arr)
142
143 # ------------------------------------------------------------------------
144 # Data
145
146 @property
147 def dtype(self) -> NumpyEADtype:
148 return self._dtype
149
150 # ------------------------------------------------------------------------
151 # NumPy Array Interface
152
153 def __array__(
154 self, dtype: NpDtype | None = None, copy: bool | None = None
155 ) -> np.ndarray:
156 return np.asarray(self._ndarray, dtype=dtype)
157
158 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
159 # Lightly modified version of
160 # https://numpy.org/doc/stable/reference/generated/numpy.lib.mixins.NDArrayOperatorsMixin.html
161 # The primary modification is not boxing scalar return values
162 # in NumpyExtensionArray, since pandas' ExtensionArrays are 1-d.
163 out = kwargs.get("out", ())
164
165 result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
166 self, ufunc, method, *inputs, **kwargs
167 )
168 if result is not NotImplemented:
169 return result
170
171 if "out" in kwargs:
172 # e.g. test_ufunc_unary
173 return arraylike.dispatch_ufunc_with_out(
174 self, ufunc, method, *inputs, **kwargs
175 )
176
177 if method == "reduce":
178 result = arraylike.dispatch_reduction_ufunc(
179 self, ufunc, method, *inputs, **kwargs
180 )
181 if result is not NotImplemented:
182 # e.g. tests.series.test_ufunc.TestNumpyReductions
183 return result
184
185 # Defer to the implementation of the ufunc on unwrapped values.
186 inputs = tuple(
187 x._ndarray if isinstance(x, NumpyExtensionArray) else x for x in inputs
188 )
189 if out:
190 kwargs["out"] = tuple(
191 x._ndarray if isinstance(x, NumpyExtensionArray) else x for x in out
192 )
193 result = getattr(ufunc, method)(*inputs, **kwargs)
194
195 if ufunc.nout > 1:
196 # multiple return values; re-box array-like results
197 return tuple(type(self)(x) for x in result)
198 elif method == "at":
199 # no return value
200 return None
201 elif method == "reduce":
202 if isinstance(result, np.ndarray):
203 # e.g. test_np_reduce_2d
204 return type(self)(result)
205
206 # e.g. test_np_max_nested_tuples
207 return result
208 else:
209 # one return value; re-box array-like results
210 return type(self)(result)
211
212 # ------------------------------------------------------------------------
213 # Pandas ExtensionArray Interface
214
215 def astype(self, dtype, copy: bool = True):
216 dtype = pandas_dtype(dtype)
217
218 if dtype == self.dtype:
219 if copy:
220 return self.copy()
221 return self
222
223 result = astype_array(self._ndarray, dtype=dtype, copy=copy)
224 return result
225
226 def isna(self) -> np.ndarray:
227 return isna(self._ndarray)
228
229 def _validate_scalar(self, fill_value):
230 if fill_value is None:
231 # Primarily for subclasses
232 fill_value = self.dtype.na_value
233 return fill_value
234
235 def _values_for_factorize(self) -> tuple[np.ndarray, float | None]:
236 if self.dtype.kind in "iub":
237 fv = None
238 else:
239 fv = np.nan
240 return self._ndarray, fv
241
242 # Base EA class (and all other EA classes) don't have limit_area keyword
243 # This can be removed here as well when the interpolate ffill/bfill method
244 # deprecation is enforced
245 def _pad_or_backfill(
246 self,
247 *,
248 method: FillnaOptions,
249 limit: int | None = None,
250 limit_area: Literal["inside", "outside"] | None = None,
251 copy: bool = True,
252 ) -> Self:
253 """
254 ffill or bfill along axis=0.
255 """
256 if copy:
257 out_data = self._ndarray.copy()
258 else:
259 out_data = self._ndarray
260
261 meth = missing.clean_fill_method(method)
262 missing.pad_or_backfill_inplace(
263 out_data.T,
264 method=meth,
265 axis=0,
266 limit=limit,
267 limit_area=limit_area,
268 )
269
270 if not copy:
271 return self
272 return type(self)._simple_new(out_data, dtype=self.dtype)
273
274 def interpolate(
275 self,
276 *,
277 method: InterpolateOptions,
278 axis: int,
279 index: Index,
280 limit,
281 limit_direction,
282 limit_area,
283 copy: bool,
284 **kwargs,
285 ) -> Self:
286 """
287 See NDFrame.interpolate.__doc__.
288 """
289 # NB: we return type(self) even if copy=False
290 if not copy:
291 out_data = self._ndarray
292 else:
293 out_data = self._ndarray.copy()
294
295 # TODO: assert we have floating dtype?
296 missing.interpolate_2d_inplace(
297 out_data,
298 method=method,
299 axis=axis,
300 index=index,
301 limit=limit,
302 limit_direction=limit_direction,
303 limit_area=limit_area,
304 **kwargs,
305 )
306 if not copy:
307 return self
308 return type(self)._simple_new(out_data, dtype=self.dtype)
309
310 # ------------------------------------------------------------------------
311 # Reductions
312
313 def any(
314 self,
315 *,
316 axis: AxisInt | None = None,
317 out=None,
318 keepdims: bool = False,
319 skipna: bool = True,
320 ):
321 nv.validate_any((), {"out": out, "keepdims": keepdims})
322 result = nanops.nanany(self._ndarray, axis=axis, skipna=skipna)
323 return self._wrap_reduction_result(axis, result)
324
325 def all(
326 self,
327 *,
328 axis: AxisInt | None = None,
329 out=None,
330 keepdims: bool = False,
331 skipna: bool = True,
332 ):
333 nv.validate_all((), {"out": out, "keepdims": keepdims})
334 result = nanops.nanall(self._ndarray, axis=axis, skipna=skipna)
335 return self._wrap_reduction_result(axis, result)
336
337 def min(
338 self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs
339 ) -> Scalar:
340 nv.validate_min((), kwargs)
341 result = nanops.nanmin(
342 values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna
343 )
344 return self._wrap_reduction_result(axis, result)
345
346 def max(
347 self, *, axis: AxisInt | None = None, skipna: bool = True, **kwargs
348 ) -> Scalar:
349 nv.validate_max((), kwargs)
350 result = nanops.nanmax(
351 values=self._ndarray, axis=axis, mask=self.isna(), skipna=skipna
352 )
353 return self._wrap_reduction_result(axis, result)
354
355 def sum(
356 self,
357 *,
358 axis: AxisInt | None = None,
359 skipna: bool = True,
360 min_count: int = 0,
361 **kwargs,
362 ) -> Scalar:
363 nv.validate_sum((), kwargs)
364 result = nanops.nansum(
365 self._ndarray, axis=axis, skipna=skipna, min_count=min_count
366 )
367 return self._wrap_reduction_result(axis, result)
368
369 def prod(
370 self,
371 *,
372 axis: AxisInt | None = None,
373 skipna: bool = True,
374 min_count: int = 0,
375 **kwargs,
376 ) -> Scalar:
377 nv.validate_prod((), kwargs)
378 result = nanops.nanprod(
379 self._ndarray, axis=axis, skipna=skipna, min_count=min_count
380 )
381 return self._wrap_reduction_result(axis, result)
382
383 def mean(
384 self,
385 *,
386 axis: AxisInt | None = None,
387 dtype: NpDtype | None = None,
388 out=None,
389 keepdims: bool = False,
390 skipna: bool = True,
391 ):
392 nv.validate_mean((), {"dtype": dtype, "out": out, "keepdims": keepdims})
393 result = nanops.nanmean(self._ndarray, axis=axis, skipna=skipna)
394 return self._wrap_reduction_result(axis, result)
395
396 def median(
397 self,
398 *,
399 axis: AxisInt | None = None,
400 out=None,
401 overwrite_input: bool = False,
402 keepdims: bool = False,
403 skipna: bool = True,
404 ):
405 nv.validate_median(
406 (), {"out": out, "overwrite_input": overwrite_input, "keepdims": keepdims}
407 )
408 result = nanops.nanmedian(self._ndarray, axis=axis, skipna=skipna)
409 return self._wrap_reduction_result(axis, result)
410
411 def std(
412 self,
413 *,
414 axis: AxisInt | None = None,
415 dtype: NpDtype | None = None,
416 out=None,
417 ddof: int = 1,
418 keepdims: bool = False,
419 skipna: bool = True,
420 ):
421 nv.validate_stat_ddof_func(
422 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="std"
423 )
424 result = nanops.nanstd(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
425 return self._wrap_reduction_result(axis, result)
426
427 def var(
428 self,
429 *,
430 axis: AxisInt | None = None,
431 dtype: NpDtype | None = None,
432 out=None,
433 ddof: int = 1,
434 keepdims: bool = False,
435 skipna: bool = True,
436 ):
437 nv.validate_stat_ddof_func(
438 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="var"
439 )
440 result = nanops.nanvar(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
441 return self._wrap_reduction_result(axis, result)
442
443 def sem(
444 self,
445 *,
446 axis: AxisInt | None = None,
447 dtype: NpDtype | None = None,
448 out=None,
449 ddof: int = 1,
450 keepdims: bool = False,
451 skipna: bool = True,
452 ):
453 nv.validate_stat_ddof_func(
454 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="sem"
455 )
456 result = nanops.nansem(self._ndarray, axis=axis, skipna=skipna, ddof=ddof)
457 return self._wrap_reduction_result(axis, result)
458
459 def kurt(
460 self,
461 *,
462 axis: AxisInt | None = None,
463 dtype: NpDtype | None = None,
464 out=None,
465 keepdims: bool = False,
466 skipna: bool = True,
467 ):
468 nv.validate_stat_ddof_func(
469 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="kurt"
470 )
471 result = nanops.nankurt(self._ndarray, axis=axis, skipna=skipna)
472 return self._wrap_reduction_result(axis, result)
473
474 def skew(
475 self,
476 *,
477 axis: AxisInt | None = None,
478 dtype: NpDtype | None = None,
479 out=None,
480 keepdims: bool = False,
481 skipna: bool = True,
482 ):
483 nv.validate_stat_ddof_func(
484 (), {"dtype": dtype, "out": out, "keepdims": keepdims}, fname="skew"
485 )
486 result = nanops.nanskew(self._ndarray, axis=axis, skipna=skipna)
487 return self._wrap_reduction_result(axis, result)
488
489 # ------------------------------------------------------------------------
490 # Additional Methods
491
492 def to_numpy(
493 self,
494 dtype: npt.DTypeLike | None = None,
495 copy: bool = False,
496 na_value: object = lib.no_default,
497 ) -> np.ndarray:
498 mask = self.isna()
499 if na_value is not lib.no_default and mask.any():
500 result = self._ndarray.copy()
501 result[mask] = na_value
502 else:
503 result = self._ndarray
504
505 result = np.asarray(result, dtype=dtype)
506
507 if copy and result is self._ndarray:
508 result = result.copy()
509
510 return result
511
512 # ------------------------------------------------------------------------
513 # Ops
514
515 def __invert__(self) -> NumpyExtensionArray:
516 return type(self)(~self._ndarray)
517
518 def __neg__(self) -> NumpyExtensionArray:
519 return type(self)(-self._ndarray)
520
521 def __pos__(self) -> NumpyExtensionArray:
522 return type(self)(+self._ndarray)
523
524 def __abs__(self) -> NumpyExtensionArray:
525 return type(self)(abs(self._ndarray))
526
527 def _cmp_method(self, other, op):
528 if isinstance(other, NumpyExtensionArray):
529 other = other._ndarray
530
531 other = ops.maybe_prepare_scalar_for_op(other, (len(self),))
532 pd_op = ops.get_array_op(op)
533 other = ensure_wrapped_if_datetimelike(other)
534 result = pd_op(self._ndarray, other)
535
536 if op is divmod or op is ops.rdivmod:
537 a, b = result
538 if isinstance(a, np.ndarray):
539 # for e.g. op vs TimedeltaArray, we may already
540 # have an ExtensionArray, in which case we do not wrap
541 return self._wrap_ndarray_result(a), self._wrap_ndarray_result(b)
542 return a, b
543
544 if isinstance(result, np.ndarray):
545 # for e.g. multiplication vs TimedeltaArray, we may already
546 # have an ExtensionArray, in which case we do not wrap
547 return self._wrap_ndarray_result(result)
548 return result
549
550 _arith_method = _cmp_method
551
552 def _wrap_ndarray_result(self, result: np.ndarray):
553 # If we have timedelta64[ns] result, return a TimedeltaArray instead
554 # of a NumpyExtensionArray
555 if result.dtype.kind == "m" and is_supported_dtype(result.dtype):
556 from pandas.core.arrays import TimedeltaArray
557
558 return TimedeltaArray._simple_new(result, dtype=result.dtype)
559 return type(self)(result)
560
561 # ------------------------------------------------------------------------
562 # String methods interface
563 _str_na_value = np.nan