1from __future__ import annotations
2
3from typing import (
4 TYPE_CHECKING,
5 Any,
6 Callable,
7 Literal,
8 overload,
9)
10import warnings
11
12import numpy as np
13
14from pandas._libs import (
15 lib,
16 missing as libmissing,
17)
18from pandas._libs.tslibs import is_supported_dtype
19from pandas._typing import (
20 ArrayLike,
21 AstypeArg,
22 AxisInt,
23 DtypeObj,
24 FillnaOptions,
25 InterpolateOptions,
26 NpDtype,
27 PositionalIndexer,
28 Scalar,
29 ScalarIndexer,
30 Self,
31 SequenceIndexer,
32 Shape,
33 npt,
34)
35from pandas.compat import (
36 IS64,
37 is_platform_windows,
38)
39from pandas.errors import AbstractMethodError
40from pandas.util._decorators import doc
41from pandas.util._validators import validate_fillna_kwargs
42
43from pandas.core.dtypes.base import ExtensionDtype
44from pandas.core.dtypes.common import (
45 is_bool,
46 is_integer_dtype,
47 is_list_like,
48 is_scalar,
49 is_string_dtype,
50 pandas_dtype,
51)
52from pandas.core.dtypes.dtypes import BaseMaskedDtype
53from pandas.core.dtypes.missing import (
54 array_equivalent,
55 is_valid_na_for_dtype,
56 isna,
57 notna,
58)
59
60from pandas.core import (
61 algorithms as algos,
62 arraylike,
63 missing,
64 nanops,
65 ops,
66)
67from pandas.core.algorithms import (
68 factorize_array,
69 isin,
70 map_array,
71 mode,
72 take,
73)
74from pandas.core.array_algos import (
75 masked_accumulations,
76 masked_reductions,
77)
78from pandas.core.array_algos.quantile import quantile_with_mask
79from pandas.core.arraylike import OpsMixin
80from pandas.core.arrays._utils import to_numpy_dtype_inference
81from pandas.core.arrays.base import ExtensionArray
82from pandas.core.construction import (
83 array as pd_array,
84 ensure_wrapped_if_datetimelike,
85 extract_array,
86)
87from pandas.core.indexers import check_array_indexer
88from pandas.core.ops import invalid_comparison
89from pandas.core.util.hashing import hash_array
90
91if TYPE_CHECKING:
92 from collections.abc import (
93 Iterator,
94 Sequence,
95 )
96 from pandas import Series
97 from pandas.core.arrays import BooleanArray
98 from pandas._typing import (
99 NumpySorter,
100 NumpyValueArrayLike,
101 )
102 from pandas.core.arrays import FloatingArray
103
104from pandas.compat.numpy import function as nv
105
106
107class BaseMaskedArray(OpsMixin, ExtensionArray):
108 """
109 Base class for masked arrays (which use _data and _mask to store the data).
110
111 numpy based
112 """
113
114 # The value used to fill '_data' to avoid upcasting
115 _internal_fill_value: Scalar
116 # our underlying data and mask are each ndarrays
117 _data: np.ndarray
118 _mask: npt.NDArray[np.bool_]
119
120 # Fill values used for any/all
121 _truthy_value = Scalar # bool(_truthy_value) = True
122 _falsey_value = Scalar # bool(_falsey_value) = False
123
124 @classmethod
125 def _simple_new(cls, values: np.ndarray, mask: npt.NDArray[np.bool_]) -> Self:
126 result = BaseMaskedArray.__new__(cls)
127 result._data = values
128 result._mask = mask
129 return result
130
131 def __init__(
132 self, values: np.ndarray, mask: npt.NDArray[np.bool_], copy: bool = False
133 ) -> None:
134 # values is supposed to already be validated in the subclass
135 if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_):
136 raise TypeError(
137 "mask should be boolean numpy array. Use "
138 "the 'pd.array' function instead"
139 )
140 if values.shape != mask.shape:
141 raise ValueError("values.shape must match mask.shape")
142
143 if copy:
144 values = values.copy()
145 mask = mask.copy()
146
147 self._data = values
148 self._mask = mask
149
150 @classmethod
151 def _from_sequence(cls, scalars, *, dtype=None, copy: bool = False) -> Self:
152 values, mask = cls._coerce_to_array(scalars, dtype=dtype, copy=copy)
153 return cls(values, mask)
154
155 @classmethod
156 @doc(ExtensionArray._empty)
157 def _empty(cls, shape: Shape, dtype: ExtensionDtype):
158 values = np.empty(shape, dtype=dtype.type)
159 values.fill(cls._internal_fill_value)
160 mask = np.ones(shape, dtype=bool)
161 result = cls(values, mask)
162 if not isinstance(result, cls) or dtype != result.dtype:
163 raise NotImplementedError(
164 f"Default 'empty' implementation is invalid for dtype='{dtype}'"
165 )
166 return result
167
168 def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
169 # NEP 51: https://github.com/numpy/numpy/pull/22449
170 return str
171
172 @property
173 def dtype(self) -> BaseMaskedDtype:
174 raise AbstractMethodError(self)
175
176 @overload
177 def __getitem__(self, item: ScalarIndexer) -> Any:
178 ...
179
180 @overload
181 def __getitem__(self, item: SequenceIndexer) -> Self:
182 ...
183
184 def __getitem__(self, item: PositionalIndexer) -> Self | Any:
185 item = check_array_indexer(self, item)
186
187 newmask = self._mask[item]
188 if is_bool(newmask):
189 # This is a scalar indexing
190 if newmask:
191 return self.dtype.na_value
192 return self._data[item]
193
194 return self._simple_new(self._data[item], newmask)
195
196 def _pad_or_backfill(
197 self,
198 *,
199 method: FillnaOptions,
200 limit: int | None = None,
201 limit_area: Literal["inside", "outside"] | None = None,
202 copy: bool = True,
203 ) -> Self:
204 mask = self._mask
205
206 if mask.any():
207 func = missing.get_fill_func(method, ndim=self.ndim)
208
209 npvalues = self._data.T
210 new_mask = mask.T
211 if copy:
212 npvalues = npvalues.copy()
213 new_mask = new_mask.copy()
214 elif limit_area is not None:
215 mask = mask.copy()
216 func(npvalues, limit=limit, mask=new_mask)
217
218 if limit_area is not None and not mask.all():
219 mask = mask.T
220 neg_mask = ~mask
221 first = neg_mask.argmax()
222 last = len(neg_mask) - neg_mask[::-1].argmax() - 1
223 if limit_area == "inside":
224 new_mask[:first] |= mask[:first]
225 new_mask[last + 1 :] |= mask[last + 1 :]
226 elif limit_area == "outside":
227 new_mask[first + 1 : last] |= mask[first + 1 : last]
228
229 if copy:
230 return self._simple_new(npvalues.T, new_mask.T)
231 else:
232 return self
233 else:
234 if copy:
235 new_values = self.copy()
236 else:
237 new_values = self
238 return new_values
239
240 @doc(ExtensionArray.fillna)
241 def fillna(
242 self, value=None, method=None, limit: int | None = None, copy: bool = True
243 ) -> Self:
244 value, method = validate_fillna_kwargs(value, method)
245
246 mask = self._mask
247
248 value = missing.check_value_size(value, mask, len(self))
249
250 if mask.any():
251 if method is not None:
252 func = missing.get_fill_func(method, ndim=self.ndim)
253 npvalues = self._data.T
254 new_mask = mask.T
255 if copy:
256 npvalues = npvalues.copy()
257 new_mask = new_mask.copy()
258 func(npvalues, limit=limit, mask=new_mask)
259 return self._simple_new(npvalues.T, new_mask.T)
260 else:
261 # fill with value
262 if copy:
263 new_values = self.copy()
264 else:
265 new_values = self[:]
266 new_values[mask] = value
267 else:
268 if copy:
269 new_values = self.copy()
270 else:
271 new_values = self[:]
272 return new_values
273
274 @classmethod
275 def _coerce_to_array(
276 cls, values, *, dtype: DtypeObj, copy: bool = False
277 ) -> tuple[np.ndarray, np.ndarray]:
278 raise AbstractMethodError(cls)
279
280 def _validate_setitem_value(self, value):
281 """
282 Check if we have a scalar that we can cast losslessly.
283
284 Raises
285 ------
286 TypeError
287 """
288 kind = self.dtype.kind
289 # TODO: get this all from np_can_hold_element?
290 if kind == "b":
291 if lib.is_bool(value):
292 return value
293
294 elif kind == "f":
295 if lib.is_integer(value) or lib.is_float(value):
296 return value
297
298 else:
299 if lib.is_integer(value) or (lib.is_float(value) and value.is_integer()):
300 return value
301 # TODO: unsigned checks
302
303 # Note: without the "str" here, the f-string rendering raises in
304 # py38 builds.
305 raise TypeError(f"Invalid value '{str(value)}' for dtype {self.dtype}")
306
307 def __setitem__(self, key, value) -> None:
308 key = check_array_indexer(self, key)
309
310 if is_scalar(value):
311 if is_valid_na_for_dtype(value, self.dtype):
312 self._mask[key] = True
313 else:
314 value = self._validate_setitem_value(value)
315 self._data[key] = value
316 self._mask[key] = False
317 return
318
319 value, mask = self._coerce_to_array(value, dtype=self.dtype)
320
321 self._data[key] = value
322 self._mask[key] = mask
323
324 def __contains__(self, key) -> bool:
325 if isna(key) and key is not self.dtype.na_value:
326 # GH#52840
327 if self._data.dtype.kind == "f" and lib.is_float(key):
328 return bool((np.isnan(self._data) & ~self._mask).any())
329
330 return bool(super().__contains__(key))
331
332 def __iter__(self) -> Iterator:
333 if self.ndim == 1:
334 if not self._hasna:
335 for val in self._data:
336 yield val
337 else:
338 na_value = self.dtype.na_value
339 for isna_, val in zip(self._mask, self._data):
340 if isna_:
341 yield na_value
342 else:
343 yield val
344 else:
345 for i in range(len(self)):
346 yield self[i]
347
348 def __len__(self) -> int:
349 return len(self._data)
350
351 @property
352 def shape(self) -> Shape:
353 return self._data.shape
354
355 @property
356 def ndim(self) -> int:
357 return self._data.ndim
358
359 def swapaxes(self, axis1, axis2) -> Self:
360 data = self._data.swapaxes(axis1, axis2)
361 mask = self._mask.swapaxes(axis1, axis2)
362 return self._simple_new(data, mask)
363
364 def delete(self, loc, axis: AxisInt = 0) -> Self:
365 data = np.delete(self._data, loc, axis=axis)
366 mask = np.delete(self._mask, loc, axis=axis)
367 return self._simple_new(data, mask)
368
369 def reshape(self, *args, **kwargs) -> Self:
370 data = self._data.reshape(*args, **kwargs)
371 mask = self._mask.reshape(*args, **kwargs)
372 return self._simple_new(data, mask)
373
374 def ravel(self, *args, **kwargs) -> Self:
375 # TODO: need to make sure we have the same order for data/mask
376 data = self._data.ravel(*args, **kwargs)
377 mask = self._mask.ravel(*args, **kwargs)
378 return type(self)(data, mask)
379
380 @property
381 def T(self) -> Self:
382 return self._simple_new(self._data.T, self._mask.T)
383
384 def round(self, decimals: int = 0, *args, **kwargs):
385 """
386 Round each value in the array a to the given number of decimals.
387
388 Parameters
389 ----------
390 decimals : int, default 0
391 Number of decimal places to round to. If decimals is negative,
392 it specifies the number of positions to the left of the decimal point.
393 *args, **kwargs
394 Additional arguments and keywords have no effect but might be
395 accepted for compatibility with NumPy.
396
397 Returns
398 -------
399 NumericArray
400 Rounded values of the NumericArray.
401
402 See Also
403 --------
404 numpy.around : Round values of an np.array.
405 DataFrame.round : Round values of a DataFrame.
406 Series.round : Round values of a Series.
407 """
408 if self.dtype.kind == "b":
409 return self
410 nv.validate_round(args, kwargs)
411 values = np.round(self._data, decimals=decimals, **kwargs)
412
413 # Usually we'll get same type as self, but ndarray[bool] casts to float
414 return self._maybe_mask_result(values, self._mask.copy())
415
416 # ------------------------------------------------------------------
417 # Unary Methods
418
419 def __invert__(self) -> Self:
420 return self._simple_new(~self._data, self._mask.copy())
421
422 def __neg__(self) -> Self:
423 return self._simple_new(-self._data, self._mask.copy())
424
425 def __pos__(self) -> Self:
426 return self.copy()
427
428 def __abs__(self) -> Self:
429 return self._simple_new(abs(self._data), self._mask.copy())
430
431 # ------------------------------------------------------------------
432
433 def _values_for_json(self) -> np.ndarray:
434 return np.asarray(self, dtype=object)
435
436 def to_numpy(
437 self,
438 dtype: npt.DTypeLike | None = None,
439 copy: bool = False,
440 na_value: object = lib.no_default,
441 ) -> np.ndarray:
442 """
443 Convert to a NumPy Array.
444
445 By default converts to an object-dtype NumPy array. Specify the `dtype` and
446 `na_value` keywords to customize the conversion.
447
448 Parameters
449 ----------
450 dtype : dtype, default object
451 The numpy dtype to convert to.
452 copy : bool, default False
453 Whether to ensure that the returned value is a not a view on
454 the array. Note that ``copy=False`` does not *ensure* that
455 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
456 a copy is made, even if not strictly necessary. This is typically
457 only possible when no missing values are present and `dtype`
458 is the equivalent numpy dtype.
459 na_value : scalar, optional
460 Scalar missing value indicator to use in numpy array. Defaults
461 to the native missing value indicator of this array (pd.NA).
462
463 Returns
464 -------
465 numpy.ndarray
466
467 Examples
468 --------
469 An object-dtype is the default result
470
471 >>> a = pd.array([True, False, pd.NA], dtype="boolean")
472 >>> a.to_numpy()
473 array([True, False, <NA>], dtype=object)
474
475 When no missing values are present, an equivalent dtype can be used.
476
477 >>> pd.array([True, False], dtype="boolean").to_numpy(dtype="bool")
478 array([ True, False])
479 >>> pd.array([1, 2], dtype="Int64").to_numpy("int64")
480 array([1, 2])
481
482 However, requesting such dtype will raise a ValueError if
483 missing values are present and the default missing value :attr:`NA`
484 is used.
485
486 >>> a = pd.array([True, False, pd.NA], dtype="boolean")
487 >>> a
488 <BooleanArray>
489 [True, False, <NA>]
490 Length: 3, dtype: boolean
491
492 >>> a.to_numpy(dtype="bool")
493 Traceback (most recent call last):
494 ...
495 ValueError: cannot convert to bool numpy array in presence of missing values
496
497 Specify a valid `na_value` instead
498
499 >>> a.to_numpy(dtype="bool", na_value=False)
500 array([ True, False, False])
501 """
502 hasna = self._hasna
503 dtype, na_value = to_numpy_dtype_inference(self, dtype, na_value, hasna)
504 if dtype is None:
505 dtype = object
506
507 if hasna:
508 if (
509 dtype != object
510 and not is_string_dtype(dtype)
511 and na_value is libmissing.NA
512 ):
513 raise ValueError(
514 f"cannot convert to '{dtype}'-dtype NumPy array "
515 "with missing values. Specify an appropriate 'na_value' "
516 "for this dtype."
517 )
518 # don't pass copy to astype -> always need a copy since we are mutating
519 with warnings.catch_warnings():
520 warnings.filterwarnings("ignore", category=RuntimeWarning)
521 data = self._data.astype(dtype)
522 data[self._mask] = na_value
523 else:
524 with warnings.catch_warnings():
525 warnings.filterwarnings("ignore", category=RuntimeWarning)
526 data = self._data.astype(dtype, copy=copy)
527 return data
528
529 @doc(ExtensionArray.tolist)
530 def tolist(self):
531 if self.ndim > 1:
532 return [x.tolist() for x in self]
533 dtype = None if self._hasna else self._data.dtype
534 return self.to_numpy(dtype=dtype, na_value=libmissing.NA).tolist()
535
536 @overload
537 def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
538 ...
539
540 @overload
541 def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:
542 ...
543
544 @overload
545 def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:
546 ...
547
548 def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
549 dtype = pandas_dtype(dtype)
550
551 if dtype == self.dtype:
552 if copy:
553 return self.copy()
554 return self
555
556 # if we are astyping to another nullable masked dtype, we can fastpath
557 if isinstance(dtype, BaseMaskedDtype):
558 # TODO deal with NaNs for FloatingArray case
559 with warnings.catch_warnings():
560 warnings.filterwarnings("ignore", category=RuntimeWarning)
561 # TODO: Is rounding what we want long term?
562 data = self._data.astype(dtype.numpy_dtype, copy=copy)
563 # mask is copied depending on whether the data was copied, and
564 # not directly depending on the `copy` keyword
565 mask = self._mask if data is self._data else self._mask.copy()
566 cls = dtype.construct_array_type()
567 return cls(data, mask, copy=False)
568
569 if isinstance(dtype, ExtensionDtype):
570 eacls = dtype.construct_array_type()
571 return eacls._from_sequence(self, dtype=dtype, copy=copy)
572
573 na_value: float | np.datetime64 | lib.NoDefault
574
575 # coerce
576 if dtype.kind == "f":
577 # In astype, we consider dtype=float to also mean na_value=np.nan
578 na_value = np.nan
579 elif dtype.kind == "M":
580 na_value = np.datetime64("NaT")
581 else:
582 na_value = lib.no_default
583
584 # to_numpy will also raise, but we get somewhat nicer exception messages here
585 if dtype.kind in "iu" and self._hasna:
586 raise ValueError("cannot convert NA to integer")
587 if dtype.kind == "b" and self._hasna:
588 # careful: astype_nansafe converts np.nan to True
589 raise ValueError("cannot convert float NaN to bool")
590
591 data = self.to_numpy(dtype=dtype, na_value=na_value, copy=copy)
592 return data
593
594 __array_priority__ = 1000 # higher than ndarray so ops dispatch to us
595
596 def __array__(
597 self, dtype: NpDtype | None = None, copy: bool | None = None
598 ) -> np.ndarray:
599 """
600 the array interface, return my values
601 We return an object array here to preserve our scalar values
602 """
603 return self.to_numpy(dtype=dtype)
604
605 _HANDLED_TYPES: tuple[type, ...]
606
607 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
608 # For MaskedArray inputs, we apply the ufunc to ._data
609 # and mask the result.
610
611 out = kwargs.get("out", ())
612
613 for x in inputs + out:
614 if not isinstance(x, self._HANDLED_TYPES + (BaseMaskedArray,)):
615 return NotImplemented
616
617 # for binary ops, use our custom dunder methods
618 result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
619 self, ufunc, method, *inputs, **kwargs
620 )
621 if result is not NotImplemented:
622 return result
623
624 if "out" in kwargs:
625 # e.g. test_ufunc_with_out
626 return arraylike.dispatch_ufunc_with_out(
627 self, ufunc, method, *inputs, **kwargs
628 )
629
630 if method == "reduce":
631 result = arraylike.dispatch_reduction_ufunc(
632 self, ufunc, method, *inputs, **kwargs
633 )
634 if result is not NotImplemented:
635 return result
636
637 mask = np.zeros(len(self), dtype=bool)
638 inputs2 = []
639 for x in inputs:
640 if isinstance(x, BaseMaskedArray):
641 mask |= x._mask
642 inputs2.append(x._data)
643 else:
644 inputs2.append(x)
645
646 def reconstruct(x: np.ndarray):
647 # we don't worry about scalar `x` here, since we
648 # raise for reduce up above.
649 from pandas.core.arrays import (
650 BooleanArray,
651 FloatingArray,
652 IntegerArray,
653 )
654
655 if x.dtype.kind == "b":
656 m = mask.copy()
657 return BooleanArray(x, m)
658 elif x.dtype.kind in "iu":
659 m = mask.copy()
660 return IntegerArray(x, m)
661 elif x.dtype.kind == "f":
662 m = mask.copy()
663 if x.dtype == np.float16:
664 # reached in e.g. np.sqrt on BooleanArray
665 # we don't support float16
666 x = x.astype(np.float32)
667 return FloatingArray(x, m)
668 else:
669 x[mask] = np.nan
670 return x
671
672 result = getattr(ufunc, method)(*inputs2, **kwargs)
673 if ufunc.nout > 1:
674 # e.g. np.divmod
675 return tuple(reconstruct(x) for x in result)
676 elif method == "reduce":
677 # e.g. np.add.reduce; test_ufunc_reduce_raises
678 if self._mask.any():
679 return self._na_value
680 return result
681 else:
682 return reconstruct(result)
683
684 def __arrow_array__(self, type=None):
685 """
686 Convert myself into a pyarrow Array.
687 """
688 import pyarrow as pa
689
690 return pa.array(self._data, mask=self._mask, type=type)
691
692 @property
693 def _hasna(self) -> bool:
694 # Note: this is expensive right now! The hope is that we can
695 # make this faster by having an optional mask, but not have to change
696 # source code using it..
697
698 # error: Incompatible return value type (got "bool_", expected "bool")
699 return self._mask.any() # type: ignore[return-value]
700
701 def _propagate_mask(
702 self, mask: npt.NDArray[np.bool_] | None, other
703 ) -> npt.NDArray[np.bool_]:
704 if mask is None:
705 mask = self._mask.copy() # TODO: need test for BooleanArray needing a copy
706 if other is libmissing.NA:
707 # GH#45421 don't alter inplace
708 mask = mask | True
709 elif is_list_like(other) and len(other) == len(mask):
710 mask = mask | isna(other)
711 else:
712 mask = self._mask | mask
713 # Incompatible return value type (got "Optional[ndarray[Any, dtype[bool_]]]",
714 # expected "ndarray[Any, dtype[bool_]]")
715 return mask # type: ignore[return-value]
716
717 def _arith_method(self, other, op):
718 op_name = op.__name__
719 omask = None
720
721 if (
722 not hasattr(other, "dtype")
723 and is_list_like(other)
724 and len(other) == len(self)
725 ):
726 # Try inferring masked dtype instead of casting to object
727 other = pd_array(other)
728 other = extract_array(other, extract_numpy=True)
729
730 if isinstance(other, BaseMaskedArray):
731 other, omask = other._data, other._mask
732
733 elif is_list_like(other):
734 if not isinstance(other, ExtensionArray):
735 other = np.asarray(other)
736 if other.ndim > 1:
737 raise NotImplementedError("can only perform ops with 1-d structures")
738
739 # We wrap the non-masked arithmetic logic used for numpy dtypes
740 # in Series/Index arithmetic ops.
741 other = ops.maybe_prepare_scalar_for_op(other, (len(self),))
742 pd_op = ops.get_array_op(op)
743 other = ensure_wrapped_if_datetimelike(other)
744
745 if op_name in {"pow", "rpow"} and isinstance(other, np.bool_):
746 # Avoid DeprecationWarning: In future, it will be an error
747 # for 'np.bool_' scalars to be interpreted as an index
748 # e.g. test_array_scalar_like_equivalence
749 other = bool(other)
750
751 mask = self._propagate_mask(omask, other)
752
753 if other is libmissing.NA:
754 result = np.ones_like(self._data)
755 if self.dtype.kind == "b":
756 if op_name in {
757 "floordiv",
758 "rfloordiv",
759 "pow",
760 "rpow",
761 "truediv",
762 "rtruediv",
763 }:
764 # GH#41165 Try to match non-masked Series behavior
765 # This is still imperfect GH#46043
766 raise NotImplementedError(
767 f"operator '{op_name}' not implemented for bool dtypes"
768 )
769 if op_name in {"mod", "rmod"}:
770 dtype = "int8"
771 else:
772 dtype = "bool"
773 result = result.astype(dtype)
774 elif "truediv" in op_name and self.dtype.kind != "f":
775 # The actual data here doesn't matter since the mask
776 # will be all-True, but since this is division, we want
777 # to end up with floating dtype.
778 result = result.astype(np.float64)
779 else:
780 # Make sure we do this before the "pow" mask checks
781 # to get an expected exception message on shape mismatch.
782 if self.dtype.kind in "iu" and op_name in ["floordiv", "mod"]:
783 # TODO(GH#30188) ATM we don't match the behavior of non-masked
784 # types with respect to floordiv-by-zero
785 pd_op = op
786
787 with np.errstate(all="ignore"):
788 result = pd_op(self._data, other)
789
790 if op_name == "pow":
791 # 1 ** x is 1.
792 mask = np.where((self._data == 1) & ~self._mask, False, mask)
793 # x ** 0 is 1.
794 if omask is not None:
795 mask = np.where((other == 0) & ~omask, False, mask)
796 elif other is not libmissing.NA:
797 mask = np.where(other == 0, False, mask)
798
799 elif op_name == "rpow":
800 # 1 ** x is 1.
801 if omask is not None:
802 mask = np.where((other == 1) & ~omask, False, mask)
803 elif other is not libmissing.NA:
804 mask = np.where(other == 1, False, mask)
805 # x ** 0 is 1.
806 mask = np.where((self._data == 0) & ~self._mask, False, mask)
807
808 return self._maybe_mask_result(result, mask)
809
810 _logical_method = _arith_method
811
812 def _cmp_method(self, other, op) -> BooleanArray:
813 from pandas.core.arrays import BooleanArray
814
815 mask = None
816
817 if isinstance(other, BaseMaskedArray):
818 other, mask = other._data, other._mask
819
820 elif is_list_like(other):
821 other = np.asarray(other)
822 if other.ndim > 1:
823 raise NotImplementedError("can only perform ops with 1-d structures")
824 if len(self) != len(other):
825 raise ValueError("Lengths must match to compare")
826
827 if other is libmissing.NA:
828 # numpy does not handle pd.NA well as "other" scalar (it returns
829 # a scalar False instead of an array)
830 # This may be fixed by NA.__array_ufunc__. Revisit this check
831 # once that's implemented.
832 result = np.zeros(self._data.shape, dtype="bool")
833 mask = np.ones(self._data.shape, dtype="bool")
834 else:
835 with warnings.catch_warnings():
836 # numpy may show a FutureWarning or DeprecationWarning:
837 # elementwise comparison failed; returning scalar instead,
838 # but in the future will perform elementwise comparison
839 # before returning NotImplemented. We fall back to the correct
840 # behavior today, so that should be fine to ignore.
841 warnings.filterwarnings("ignore", "elementwise", FutureWarning)
842 warnings.filterwarnings("ignore", "elementwise", DeprecationWarning)
843 method = getattr(self._data, f"__{op.__name__}__")
844 result = method(other)
845
846 if result is NotImplemented:
847 result = invalid_comparison(self._data, other, op)
848
849 mask = self._propagate_mask(mask, other)
850 return BooleanArray(result, mask, copy=False)
851
852 def _maybe_mask_result(
853 self, result: np.ndarray | tuple[np.ndarray, np.ndarray], mask: np.ndarray
854 ):
855 """
856 Parameters
857 ----------
858 result : array-like or tuple[array-like]
859 mask : array-like bool
860 """
861 if isinstance(result, tuple):
862 # i.e. divmod
863 div, mod = result
864 return (
865 self._maybe_mask_result(div, mask),
866 self._maybe_mask_result(mod, mask),
867 )
868
869 if result.dtype.kind == "f":
870 from pandas.core.arrays import FloatingArray
871
872 return FloatingArray(result, mask, copy=False)
873
874 elif result.dtype.kind == "b":
875 from pandas.core.arrays import BooleanArray
876
877 return BooleanArray(result, mask, copy=False)
878
879 elif lib.is_np_dtype(result.dtype, "m") and is_supported_dtype(result.dtype):
880 # e.g. test_numeric_arr_mul_tdscalar_numexpr_path
881 from pandas.core.arrays import TimedeltaArray
882
883 result[mask] = result.dtype.type("NaT")
884
885 if not isinstance(result, TimedeltaArray):
886 return TimedeltaArray._simple_new(result, dtype=result.dtype)
887
888 return result
889
890 elif result.dtype.kind in "iu":
891 from pandas.core.arrays import IntegerArray
892
893 return IntegerArray(result, mask, copy=False)
894
895 else:
896 result[mask] = np.nan
897 return result
898
899 def isna(self) -> np.ndarray:
900 return self._mask.copy()
901
902 @property
903 def _na_value(self):
904 return self.dtype.na_value
905
906 @property
907 def nbytes(self) -> int:
908 return self._data.nbytes + self._mask.nbytes
909
910 @classmethod
911 def _concat_same_type(
912 cls,
913 to_concat: Sequence[Self],
914 axis: AxisInt = 0,
915 ) -> Self:
916 data = np.concatenate([x._data for x in to_concat], axis=axis)
917 mask = np.concatenate([x._mask for x in to_concat], axis=axis)
918 return cls(data, mask)
919
920 def _hash_pandas_object(
921 self, *, encoding: str, hash_key: str, categorize: bool
922 ) -> npt.NDArray[np.uint64]:
923 hashed_array = hash_array(
924 self._data, encoding=encoding, hash_key=hash_key, categorize=categorize
925 )
926 hashed_array[self.isna()] = hash(self.dtype.na_value)
927 return hashed_array
928
929 def take(
930 self,
931 indexer,
932 *,
933 allow_fill: bool = False,
934 fill_value: Scalar | None = None,
935 axis: AxisInt = 0,
936 ) -> Self:
937 # we always fill with 1 internally
938 # to avoid upcasting
939 data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value
940 result = take(
941 self._data,
942 indexer,
943 fill_value=data_fill_value,
944 allow_fill=allow_fill,
945 axis=axis,
946 )
947
948 mask = take(
949 self._mask, indexer, fill_value=True, allow_fill=allow_fill, axis=axis
950 )
951
952 # if we are filling
953 # we only fill where the indexer is null
954 # not existing missing values
955 # TODO(jreback) what if we have a non-na float as a fill value?
956 if allow_fill and notna(fill_value):
957 fill_mask = np.asarray(indexer) == -1
958 result[fill_mask] = fill_value
959 mask = mask ^ fill_mask
960
961 return self._simple_new(result, mask)
962
963 # error: Return type "BooleanArray" of "isin" incompatible with return type
964 # "ndarray" in supertype "ExtensionArray"
965 def isin(self, values: ArrayLike) -> BooleanArray: # type: ignore[override]
966 from pandas.core.arrays import BooleanArray
967
968 # algorithms.isin will eventually convert values to an ndarray, so no extra
969 # cost to doing it here first
970 values_arr = np.asarray(values)
971 result = isin(self._data, values_arr)
972
973 if self._hasna:
974 values_have_NA = values_arr.dtype == object and any(
975 val is self.dtype.na_value for val in values_arr
976 )
977
978 # For now, NA does not propagate so set result according to presence of NA,
979 # see https://github.com/pandas-dev/pandas/pull/38379 for some discussion
980 result[self._mask] = values_have_NA
981
982 mask = np.zeros(self._data.shape, dtype=bool)
983 return BooleanArray(result, mask, copy=False)
984
985 def copy(self) -> Self:
986 data = self._data.copy()
987 mask = self._mask.copy()
988 return self._simple_new(data, mask)
989
990 @doc(ExtensionArray.duplicated)
991 def duplicated(
992 self, keep: Literal["first", "last", False] = "first"
993 ) -> npt.NDArray[np.bool_]:
994 values = self._data
995 mask = self._mask
996 return algos.duplicated(values, keep=keep, mask=mask)
997
998 def unique(self) -> Self:
999 """
1000 Compute the BaseMaskedArray of unique values.
1001
1002 Returns
1003 -------
1004 uniques : BaseMaskedArray
1005 """
1006 uniques, mask = algos.unique_with_mask(self._data, self._mask)
1007 return self._simple_new(uniques, mask)
1008
1009 @doc(ExtensionArray.searchsorted)
1010 def searchsorted(
1011 self,
1012 value: NumpyValueArrayLike | ExtensionArray,
1013 side: Literal["left", "right"] = "left",
1014 sorter: NumpySorter | None = None,
1015 ) -> npt.NDArray[np.intp] | np.intp:
1016 if self._hasna:
1017 raise ValueError(
1018 "searchsorted requires array to be sorted, which is impossible "
1019 "with NAs present."
1020 )
1021 if isinstance(value, ExtensionArray):
1022 value = value.astype(object)
1023 # Base class searchsorted would cast to object, which is *much* slower.
1024 return self._data.searchsorted(value, side=side, sorter=sorter)
1025
1026 @doc(ExtensionArray.factorize)
1027 def factorize(
1028 self,
1029 use_na_sentinel: bool = True,
1030 ) -> tuple[np.ndarray, ExtensionArray]:
1031 arr = self._data
1032 mask = self._mask
1033
1034 # Use a sentinel for na; recode and add NA to uniques if necessary below
1035 codes, uniques = factorize_array(arr, use_na_sentinel=True, mask=mask)
1036
1037 # check that factorize_array correctly preserves dtype.
1038 assert uniques.dtype == self.dtype.numpy_dtype, (uniques.dtype, self.dtype)
1039
1040 has_na = mask.any()
1041 if use_na_sentinel or not has_na:
1042 size = len(uniques)
1043 else:
1044 # Make room for an NA value
1045 size = len(uniques) + 1
1046 uniques_mask = np.zeros(size, dtype=bool)
1047 if not use_na_sentinel and has_na:
1048 na_index = mask.argmax()
1049 # Insert na with the proper code
1050 if na_index == 0:
1051 na_code = np.intp(0)
1052 else:
1053 na_code = codes[:na_index].max() + 1
1054 codes[codes >= na_code] += 1
1055 codes[codes == -1] = na_code
1056 # dummy value for uniques; not used since uniques_mask will be True
1057 uniques = np.insert(uniques, na_code, 0)
1058 uniques_mask[na_code] = True
1059 uniques_ea = self._simple_new(uniques, uniques_mask)
1060
1061 return codes, uniques_ea
1062
1063 @doc(ExtensionArray._values_for_argsort)
1064 def _values_for_argsort(self) -> np.ndarray:
1065 return self._data
1066
1067 def value_counts(self, dropna: bool = True) -> Series:
1068 """
1069 Returns a Series containing counts of each unique value.
1070
1071 Parameters
1072 ----------
1073 dropna : bool, default True
1074 Don't include counts of missing values.
1075
1076 Returns
1077 -------
1078 counts : Series
1079
1080 See Also
1081 --------
1082 Series.value_counts
1083 """
1084 from pandas import (
1085 Index,
1086 Series,
1087 )
1088 from pandas.arrays import IntegerArray
1089
1090 keys, value_counts, na_counter = algos.value_counts_arraylike(
1091 self._data, dropna=dropna, mask=self._mask
1092 )
1093 mask_index = np.zeros((len(value_counts),), dtype=np.bool_)
1094 mask = mask_index.copy()
1095
1096 if na_counter > 0:
1097 mask_index[-1] = True
1098
1099 arr = IntegerArray(value_counts, mask)
1100 index = Index(
1101 self.dtype.construct_array_type()(
1102 keys, mask_index # type: ignore[arg-type]
1103 )
1104 )
1105 return Series(arr, index=index, name="count", copy=False)
1106
1107 def _mode(self, dropna: bool = True) -> Self:
1108 if dropna:
1109 result = mode(self._data, dropna=dropna, mask=self._mask)
1110 res_mask = np.zeros(result.shape, dtype=np.bool_)
1111 else:
1112 result, res_mask = mode(self._data, dropna=dropna, mask=self._mask)
1113 result = type(self)(result, res_mask) # type: ignore[arg-type]
1114 return result[result.argsort()]
1115
1116 @doc(ExtensionArray.equals)
1117 def equals(self, other) -> bool:
1118 if type(self) != type(other):
1119 return False
1120 if other.dtype != self.dtype:
1121 return False
1122
1123 # GH#44382 if e.g. self[1] is np.nan and other[1] is pd.NA, we are NOT
1124 # equal.
1125 if not np.array_equal(self._mask, other._mask):
1126 return False
1127
1128 left = self._data[~self._mask]
1129 right = other._data[~other._mask]
1130 return array_equivalent(left, right, strict_nan=True, dtype_equal=True)
1131
1132 def _quantile(
1133 self, qs: npt.NDArray[np.float64], interpolation: str
1134 ) -> BaseMaskedArray:
1135 """
1136 Dispatch to quantile_with_mask, needed because we do not have
1137 _from_factorized.
1138
1139 Notes
1140 -----
1141 We assume that all impacted cases are 1D-only.
1142 """
1143 res = quantile_with_mask(
1144 self._data,
1145 mask=self._mask,
1146 # TODO(GH#40932): na_value_for_dtype(self.dtype.numpy_dtype)
1147 # instead of np.nan
1148 fill_value=np.nan,
1149 qs=qs,
1150 interpolation=interpolation,
1151 )
1152
1153 if self._hasna:
1154 # Our result mask is all-False unless we are all-NA, in which
1155 # case it is all-True.
1156 if self.ndim == 2:
1157 # I think this should be out_mask=self.isna().all(axis=1)
1158 # but am holding off until we have tests
1159 raise NotImplementedError
1160 if self.isna().all():
1161 out_mask = np.ones(res.shape, dtype=bool)
1162
1163 if is_integer_dtype(self.dtype):
1164 # We try to maintain int dtype if possible for not all-na case
1165 # as well
1166 res = np.zeros(res.shape, dtype=self.dtype.numpy_dtype)
1167 else:
1168 out_mask = np.zeros(res.shape, dtype=bool)
1169 else:
1170 out_mask = np.zeros(res.shape, dtype=bool)
1171 return self._maybe_mask_result(res, mask=out_mask)
1172
1173 # ------------------------------------------------------------------
1174 # Reductions
1175
1176 def _reduce(
1177 self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
1178 ):
1179 if name in {"any", "all", "min", "max", "sum", "prod", "mean", "var", "std"}:
1180 result = getattr(self, name)(skipna=skipna, **kwargs)
1181 else:
1182 # median, skew, kurt, sem
1183 data = self._data
1184 mask = self._mask
1185 op = getattr(nanops, f"nan{name}")
1186 axis = kwargs.pop("axis", None)
1187 result = op(data, axis=axis, skipna=skipna, mask=mask, **kwargs)
1188
1189 if keepdims:
1190 if isna(result):
1191 return self._wrap_na_result(name=name, axis=0, mask_size=(1,))
1192 else:
1193 result = result.reshape(1)
1194 mask = np.zeros(1, dtype=bool)
1195 return self._maybe_mask_result(result, mask)
1196
1197 if isna(result):
1198 return libmissing.NA
1199 else:
1200 return result
1201
1202 def _wrap_reduction_result(self, name: str, result, *, skipna, axis):
1203 if isinstance(result, np.ndarray):
1204 if skipna:
1205 # we only retain mask for all-NA rows/columns
1206 mask = self._mask.all(axis=axis)
1207 else:
1208 mask = self._mask.any(axis=axis)
1209
1210 return self._maybe_mask_result(result, mask)
1211 return result
1212
1213 def _wrap_na_result(self, *, name, axis, mask_size):
1214 mask = np.ones(mask_size, dtype=bool)
1215
1216 float_dtyp = "float32" if self.dtype == "Float32" else "float64"
1217 if name in ["mean", "median", "var", "std", "skew", "kurt"]:
1218 np_dtype = float_dtyp
1219 elif name in ["min", "max"] or self.dtype.itemsize == 8:
1220 np_dtype = self.dtype.numpy_dtype.name
1221 else:
1222 is_windows_or_32bit = is_platform_windows() or not IS64
1223 int_dtyp = "int32" if is_windows_or_32bit else "int64"
1224 uint_dtyp = "uint32" if is_windows_or_32bit else "uint64"
1225 np_dtype = {"b": int_dtyp, "i": int_dtyp, "u": uint_dtyp, "f": float_dtyp}[
1226 self.dtype.kind
1227 ]
1228
1229 value = np.array([1], dtype=np_dtype)
1230 return self._maybe_mask_result(value, mask=mask)
1231
1232 def _wrap_min_count_reduction_result(
1233 self, name: str, result, *, skipna, min_count, axis
1234 ):
1235 if min_count == 0 and isinstance(result, np.ndarray):
1236 return self._maybe_mask_result(result, np.zeros(result.shape, dtype=bool))
1237 return self._wrap_reduction_result(name, result, skipna=skipna, axis=axis)
1238
1239 def sum(
1240 self,
1241 *,
1242 skipna: bool = True,
1243 min_count: int = 0,
1244 axis: AxisInt | None = 0,
1245 **kwargs,
1246 ):
1247 nv.validate_sum((), kwargs)
1248
1249 result = masked_reductions.sum(
1250 self._data,
1251 self._mask,
1252 skipna=skipna,
1253 min_count=min_count,
1254 axis=axis,
1255 )
1256 return self._wrap_min_count_reduction_result(
1257 "sum", result, skipna=skipna, min_count=min_count, axis=axis
1258 )
1259
1260 def prod(
1261 self,
1262 *,
1263 skipna: bool = True,
1264 min_count: int = 0,
1265 axis: AxisInt | None = 0,
1266 **kwargs,
1267 ):
1268 nv.validate_prod((), kwargs)
1269
1270 result = masked_reductions.prod(
1271 self._data,
1272 self._mask,
1273 skipna=skipna,
1274 min_count=min_count,
1275 axis=axis,
1276 )
1277 return self._wrap_min_count_reduction_result(
1278 "prod", result, skipna=skipna, min_count=min_count, axis=axis
1279 )
1280
1281 def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
1282 nv.validate_mean((), kwargs)
1283 result = masked_reductions.mean(
1284 self._data,
1285 self._mask,
1286 skipna=skipna,
1287 axis=axis,
1288 )
1289 return self._wrap_reduction_result("mean", result, skipna=skipna, axis=axis)
1290
1291 def var(
1292 self, *, skipna: bool = True, axis: AxisInt | None = 0, ddof: int = 1, **kwargs
1293 ):
1294 nv.validate_stat_ddof_func((), kwargs, fname="var")
1295 result = masked_reductions.var(
1296 self._data,
1297 self._mask,
1298 skipna=skipna,
1299 axis=axis,
1300 ddof=ddof,
1301 )
1302 return self._wrap_reduction_result("var", result, skipna=skipna, axis=axis)
1303
1304 def std(
1305 self, *, skipna: bool = True, axis: AxisInt | None = 0, ddof: int = 1, **kwargs
1306 ):
1307 nv.validate_stat_ddof_func((), kwargs, fname="std")
1308 result = masked_reductions.std(
1309 self._data,
1310 self._mask,
1311 skipna=skipna,
1312 axis=axis,
1313 ddof=ddof,
1314 )
1315 return self._wrap_reduction_result("std", result, skipna=skipna, axis=axis)
1316
1317 def min(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
1318 nv.validate_min((), kwargs)
1319 result = masked_reductions.min(
1320 self._data,
1321 self._mask,
1322 skipna=skipna,
1323 axis=axis,
1324 )
1325 return self._wrap_reduction_result("min", result, skipna=skipna, axis=axis)
1326
1327 def max(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
1328 nv.validate_max((), kwargs)
1329 result = masked_reductions.max(
1330 self._data,
1331 self._mask,
1332 skipna=skipna,
1333 axis=axis,
1334 )
1335 return self._wrap_reduction_result("max", result, skipna=skipna, axis=axis)
1336
1337 def map(self, mapper, na_action=None):
1338 return map_array(self.to_numpy(), mapper, na_action=na_action)
1339
1340 def any(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
1341 """
1342 Return whether any element is truthy.
1343
1344 Returns False unless there is at least one element that is truthy.
1345 By default, NAs are skipped. If ``skipna=False`` is specified and
1346 missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
1347 is used as for logical operations.
1348
1349 .. versionchanged:: 1.4.0
1350
1351 Parameters
1352 ----------
1353 skipna : bool, default True
1354 Exclude NA values. If the entire array is NA and `skipna` is
1355 True, then the result will be False, as for an empty array.
1356 If `skipna` is False, the result will still be True if there is
1357 at least one element that is truthy, otherwise NA will be returned
1358 if there are NA's present.
1359 axis : int, optional, default 0
1360 **kwargs : any, default None
1361 Additional keywords have no effect but might be accepted for
1362 compatibility with NumPy.
1363
1364 Returns
1365 -------
1366 bool or :attr:`pandas.NA`
1367
1368 See Also
1369 --------
1370 numpy.any : Numpy version of this method.
1371 BaseMaskedArray.all : Return whether all elements are truthy.
1372
1373 Examples
1374 --------
1375 The result indicates whether any element is truthy (and by default
1376 skips NAs):
1377
1378 >>> pd.array([True, False, True]).any()
1379 True
1380 >>> pd.array([True, False, pd.NA]).any()
1381 True
1382 >>> pd.array([False, False, pd.NA]).any()
1383 False
1384 >>> pd.array([], dtype="boolean").any()
1385 False
1386 >>> pd.array([pd.NA], dtype="boolean").any()
1387 False
1388 >>> pd.array([pd.NA], dtype="Float64").any()
1389 False
1390
1391 With ``skipna=False``, the result can be NA if this is logically
1392 required (whether ``pd.NA`` is True or False influences the result):
1393
1394 >>> pd.array([True, False, pd.NA]).any(skipna=False)
1395 True
1396 >>> pd.array([1, 0, pd.NA]).any(skipna=False)
1397 True
1398 >>> pd.array([False, False, pd.NA]).any(skipna=False)
1399 <NA>
1400 >>> pd.array([0, 0, pd.NA]).any(skipna=False)
1401 <NA>
1402 """
1403 nv.validate_any((), kwargs)
1404
1405 values = self._data.copy()
1406 # error: Argument 3 to "putmask" has incompatible type "object";
1407 # expected "Union[_SupportsArray[dtype[Any]],
1408 # _NestedSequence[_SupportsArray[dtype[Any]]],
1409 # bool, int, float, complex, str, bytes,
1410 # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
1411 np.putmask(values, self._mask, self._falsey_value) # type: ignore[arg-type]
1412 result = values.any()
1413 if skipna:
1414 return result
1415 else:
1416 if result or len(self) == 0 or not self._mask.any():
1417 return result
1418 else:
1419 return self.dtype.na_value
1420
1421 def all(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):
1422 """
1423 Return whether all elements are truthy.
1424
1425 Returns True unless there is at least one element that is falsey.
1426 By default, NAs are skipped. If ``skipna=False`` is specified and
1427 missing values are present, similar :ref:`Kleene logic <boolean.kleene>`
1428 is used as for logical operations.
1429
1430 .. versionchanged:: 1.4.0
1431
1432 Parameters
1433 ----------
1434 skipna : bool, default True
1435 Exclude NA values. If the entire array is NA and `skipna` is
1436 True, then the result will be True, as for an empty array.
1437 If `skipna` is False, the result will still be False if there is
1438 at least one element that is falsey, otherwise NA will be returned
1439 if there are NA's present.
1440 axis : int, optional, default 0
1441 **kwargs : any, default None
1442 Additional keywords have no effect but might be accepted for
1443 compatibility with NumPy.
1444
1445 Returns
1446 -------
1447 bool or :attr:`pandas.NA`
1448
1449 See Also
1450 --------
1451 numpy.all : Numpy version of this method.
1452 BooleanArray.any : Return whether any element is truthy.
1453
1454 Examples
1455 --------
1456 The result indicates whether all elements are truthy (and by default
1457 skips NAs):
1458
1459 >>> pd.array([True, True, pd.NA]).all()
1460 True
1461 >>> pd.array([1, 1, pd.NA]).all()
1462 True
1463 >>> pd.array([True, False, pd.NA]).all()
1464 False
1465 >>> pd.array([], dtype="boolean").all()
1466 True
1467 >>> pd.array([pd.NA], dtype="boolean").all()
1468 True
1469 >>> pd.array([pd.NA], dtype="Float64").all()
1470 True
1471
1472 With ``skipna=False``, the result can be NA if this is logically
1473 required (whether ``pd.NA`` is True or False influences the result):
1474
1475 >>> pd.array([True, True, pd.NA]).all(skipna=False)
1476 <NA>
1477 >>> pd.array([1, 1, pd.NA]).all(skipna=False)
1478 <NA>
1479 >>> pd.array([True, False, pd.NA]).all(skipna=False)
1480 False
1481 >>> pd.array([1, 0, pd.NA]).all(skipna=False)
1482 False
1483 """
1484 nv.validate_all((), kwargs)
1485
1486 values = self._data.copy()
1487 # error: Argument 3 to "putmask" has incompatible type "object";
1488 # expected "Union[_SupportsArray[dtype[Any]],
1489 # _NestedSequence[_SupportsArray[dtype[Any]]],
1490 # bool, int, float, complex, str, bytes,
1491 # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"
1492 np.putmask(values, self._mask, self._truthy_value) # type: ignore[arg-type]
1493 result = values.all(axis=axis)
1494
1495 if skipna:
1496 return result
1497 else:
1498 if not result or len(self) == 0 or not self._mask.any():
1499 return result
1500 else:
1501 return self.dtype.na_value
1502
1503 def interpolate(
1504 self,
1505 *,
1506 method: InterpolateOptions,
1507 axis: int,
1508 index,
1509 limit,
1510 limit_direction,
1511 limit_area,
1512 copy: bool,
1513 **kwargs,
1514 ) -> FloatingArray:
1515 """
1516 See NDFrame.interpolate.__doc__.
1517 """
1518 # NB: we return type(self) even if copy=False
1519 if self.dtype.kind == "f":
1520 if copy:
1521 data = self._data.copy()
1522 mask = self._mask.copy()
1523 else:
1524 data = self._data
1525 mask = self._mask
1526 elif self.dtype.kind in "iu":
1527 copy = True
1528 data = self._data.astype("f8")
1529 mask = self._mask.copy()
1530 else:
1531 raise NotImplementedError(
1532 f"interpolate is not implemented for dtype={self.dtype}"
1533 )
1534
1535 missing.interpolate_2d_inplace(
1536 data,
1537 method=method,
1538 axis=0,
1539 index=index,
1540 limit=limit,
1541 limit_direction=limit_direction,
1542 limit_area=limit_area,
1543 mask=mask,
1544 **kwargs,
1545 )
1546 if not copy:
1547 return self # type: ignore[return-value]
1548 if self.dtype.kind == "f":
1549 return type(self)._simple_new(data, mask) # type: ignore[return-value]
1550 else:
1551 from pandas.core.arrays import FloatingArray
1552
1553 return FloatingArray._simple_new(data, mask)
1554
1555 def _accumulate(
1556 self, name: str, *, skipna: bool = True, **kwargs
1557 ) -> BaseMaskedArray:
1558 data = self._data
1559 mask = self._mask
1560
1561 op = getattr(masked_accumulations, name)
1562 data, mask = op(data, mask, skipna=skipna, **kwargs)
1563
1564 return self._simple_new(data, mask)
1565
1566 # ------------------------------------------------------------------
1567 # GroupBy Methods
1568
1569 def _groupby_op(
1570 self,
1571 *,
1572 how: str,
1573 has_dropped_na: bool,
1574 min_count: int,
1575 ngroups: int,
1576 ids: npt.NDArray[np.intp],
1577 **kwargs,
1578 ):
1579 from pandas.core.groupby.ops import WrappedCythonOp
1580
1581 kind = WrappedCythonOp.get_kind_from_how(how)
1582 op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na)
1583
1584 # libgroupby functions are responsible for NOT altering mask
1585 mask = self._mask
1586 if op.kind != "aggregate":
1587 result_mask = mask.copy()
1588 else:
1589 result_mask = np.zeros(ngroups, dtype=bool)
1590
1591 if how == "rank" and kwargs.get("na_option") in ["top", "bottom"]:
1592 result_mask[:] = False
1593
1594 res_values = op._cython_op_ndim_compat(
1595 self._data,
1596 min_count=min_count,
1597 ngroups=ngroups,
1598 comp_ids=ids,
1599 mask=mask,
1600 result_mask=result_mask,
1601 **kwargs,
1602 )
1603
1604 if op.how == "ohlc":
1605 arity = op._cython_arity.get(op.how, 1)
1606 result_mask = np.tile(result_mask, (arity, 1)).T
1607
1608 if op.how in ["idxmin", "idxmax"]:
1609 # Result values are indexes to take, keep as ndarray
1610 return res_values
1611 else:
1612 # res_values should already have the correct dtype, we just need to
1613 # wrap in a MaskedArray
1614 return self._maybe_mask_result(res_values, result_mask)
1615
1616
1617def transpose_homogeneous_masked_arrays(
1618 masked_arrays: Sequence[BaseMaskedArray],
1619) -> list[BaseMaskedArray]:
1620 """Transpose masked arrays in a list, but faster.
1621
1622 Input should be a list of 1-dim masked arrays of equal length and all have the
1623 same dtype. The caller is responsible for ensuring validity of input data.
1624 """
1625 masked_arrays = list(masked_arrays)
1626 dtype = masked_arrays[0].dtype
1627
1628 values = [arr._data.reshape(1, -1) for arr in masked_arrays]
1629 transposed_values = np.concatenate(
1630 values,
1631 axis=0,
1632 out=np.empty(
1633 (len(masked_arrays), len(masked_arrays[0])),
1634 order="F",
1635 dtype=dtype.numpy_dtype,
1636 ),
1637 )
1638
1639 masks = [arr._mask.reshape(1, -1) for arr in masked_arrays]
1640 transposed_masks = np.concatenate(
1641 masks, axis=0, out=np.empty_like(transposed_values, dtype=bool)
1642 )
1643
1644 arr_type = dtype.construct_array_type()
1645 transposed_arrays: list[BaseMaskedArray] = []
1646 for i in range(transposed_values.shape[1]):
1647 transposed_arr = arr_type(transposed_values[:, i], mask=transposed_masks[:, i])
1648 transposed_arrays.append(transposed_arr)
1649
1650 return transposed_arrays