Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/arrays/masked.py: 21%

1from __future__ import annotations

3from typing import (

4 TYPE_CHECKING,

5 Any,

6 Iterator,

7 Literal,

8 Sequence,

9 TypeVar,

10 overload,

11)

12import warnings

14import numpy as np

16from pandas._libs import (

17 lib,

18 missing as libmissing,

19)

20from pandas._libs.tslibs import (

21 get_unit_from_dtype,

22 is_supported_unit,

23)

24from pandas._typing import (

25 ArrayLike,

26 AstypeArg,

27 AxisInt,

28 DtypeObj,

29 NpDtype,

30 PositionalIndexer,

31 Scalar,

32 ScalarIndexer,

33 SequenceIndexer,

34 Shape,

35 npt,

36)

37from pandas.errors import AbstractMethodError

38from pandas.util._decorators import doc

39from pandas.util._validators import validate_fillna_kwargs

41from pandas.core.dtypes.base import ExtensionDtype

42from pandas.core.dtypes.common import (

43 is_bool,

44 is_bool_dtype,

45 is_datetime64_dtype,

46 is_dtype_equal,

47 is_float_dtype,

48 is_integer_dtype,

49 is_list_like,

50 is_object_dtype,

51 is_scalar,

52 is_string_dtype,

53 pandas_dtype,

54)

55from pandas.core.dtypes.dtypes import BaseMaskedDtype

56from pandas.core.dtypes.inference import is_array_like

57from pandas.core.dtypes.missing import (

58 array_equivalent,

59 is_valid_na_for_dtype,

60 isna,

61 notna,

62)

64from pandas.core import (

65 algorithms as algos,

66 arraylike,

67 missing,

68 nanops,

69 ops,

70)

71from pandas.core.algorithms import (

72 factorize_array,

73 isin,

74 take,

75)

76from pandas.core.array_algos import (

77 masked_accumulations,

78 masked_reductions,

79)

80from pandas.core.array_algos.quantile import quantile_with_mask

81from pandas.core.arraylike import OpsMixin

82from pandas.core.arrays import ExtensionArray

83from pandas.core.construction import ensure_wrapped_if_datetimelike

84from pandas.core.indexers import check_array_indexer

85from pandas.core.ops import invalid_comparison

87if TYPE_CHECKING:

88 from pandas import Series

89 from pandas.core.arrays import BooleanArray

90 from pandas._typing import (

91 NumpySorter,

92 NumpyValueArrayLike,

93 )

95from pandas.compat.numpy import function as nv

97BaseMaskedArrayT = TypeVar("BaseMaskedArrayT", bound="BaseMaskedArray")

100class BaseMaskedArray(OpsMixin, ExtensionArray):

101 """

102 Base class for masked arrays (which use _data and _mask to store the data).

103

104 numpy based

105 """

106

107 # The value used to fill '_data' to avoid upcasting

108 _internal_fill_value: Scalar

109 # our underlying data and mask are each ndarrays

110 _data: np.ndarray

111 _mask: npt.NDArray[np.bool_]

112

113 # Fill values used for any/all

114 _truthy_value = Scalar # bool(_truthy_value) = True

115 _falsey_value = Scalar # bool(_falsey_value) = False

116

117 def __init__(

118 self, values: np.ndarray, mask: npt.NDArray[np.bool_], copy: bool = False

119 ) -> None:

120 # values is supposed to already be validated in the subclass

121 if not (isinstance(mask, np.ndarray) and mask.dtype == np.bool_):

122 raise TypeError(

123 "mask should be boolean numpy array. Use "

124 "the 'pd.array' function instead"

125 )

126 if values.shape != mask.shape:

127 raise ValueError("values.shape must match mask.shape")

128

129 if copy:

130 values = values.copy()

131 mask = mask.copy()

132

133 self._data = values

134 self._mask = mask

135

136 @classmethod

137 def _from_sequence(

138 cls: type[BaseMaskedArrayT], scalars, *, dtype=None, copy: bool = False

139 ) -> BaseMaskedArrayT:

140 values, mask = cls._coerce_to_array(scalars, dtype=dtype, copy=copy)

141 return cls(values, mask)

142

143 @property

144 def dtype(self) -> BaseMaskedDtype:

145 raise AbstractMethodError(self)

146

147 @overload

148 def __getitem__(self, item: ScalarIndexer) -> Any:

149 ...

150

151 @overload

152 def __getitem__(self: BaseMaskedArrayT, item: SequenceIndexer) -> BaseMaskedArrayT:

153 ...

154

155 def __getitem__(

156 self: BaseMaskedArrayT, item: PositionalIndexer

157 ) -> BaseMaskedArrayT | Any:

158 item = check_array_indexer(self, item)

159

160 newmask = self._mask[item]

161 if is_bool(newmask):

162 # This is a scalar indexing

163 if newmask:

164 return self.dtype.na_value

165 return self._data[item]

166

167 return type(self)(self._data[item], newmask)

168

169 @doc(ExtensionArray.fillna)

170 def fillna(

171 self: BaseMaskedArrayT, value=None, method=None, limit=None

172 ) -> BaseMaskedArrayT:

173 value, method = validate_fillna_kwargs(value, method)

174

175 mask = self._mask

176

177 if is_array_like(value):

178 if len(value) != len(self):

179 raise ValueError(

180 f"Length of 'value' does not match. Got ({len(value)}) "

181 f" expected {len(self)}"

182 )

183 value = value[mask]

184

185 if mask.any():

186 if method is not None:

187 func = missing.get_fill_func(method, ndim=self.ndim)

188 npvalues = self._data.copy().T

189 new_mask = mask.copy().T

190 func(npvalues, limit=limit, mask=new_mask)

191 return type(self)(npvalues.T, new_mask.T)

192 else:

193 # fill with value

194 new_values = self.copy()

195 new_values[mask] = value

196 else:

197 new_values = self.copy()

198 return new_values

199

200 @classmethod

201 def _coerce_to_array(

202 cls, values, *, dtype: DtypeObj, copy: bool = False

203 ) -> tuple[np.ndarray, np.ndarray]:

204 raise AbstractMethodError(cls)

205

206 def _validate_setitem_value(self, value):

207 """

208 Check if we have a scalar that we can cast losslessly.

209

210 Raises

211 ------

212 TypeError

213 """

214 kind = self.dtype.kind

215 # TODO: get this all from np_can_hold_element?

216 if kind == "b":

217 if lib.is_bool(value):

218 return value

219

220 elif kind == "f":

221 if lib.is_integer(value) or lib.is_float(value):

222 return value

223

224 else:

225 if lib.is_integer(value) or (lib.is_float(value) and value.is_integer()):

226 return value

227 # TODO: unsigned checks

228

229 # Note: without the "str" here, the f-string rendering raises in

230 # py38 builds.

231 raise TypeError(f"Invalid value '{str(value)}' for dtype {self.dtype}")

232

233 def __setitem__(self, key, value) -> None:

234 key = check_array_indexer(self, key)

235

236 if is_scalar(value):

237 if is_valid_na_for_dtype(value, self.dtype):

238 self._mask[key] = True

239 else:

240 value = self._validate_setitem_value(value)

241 self._data[key] = value

242 self._mask[key] = False

243 return

244

245 value, mask = self._coerce_to_array(value, dtype=self.dtype)

246

247 self._data[key] = value

248 self._mask[key] = mask

249

250 def __iter__(self) -> Iterator:

251 if self.ndim == 1:

252 if not self._hasna:

253 for val in self._data:

254 yield val

255 else:

256 na_value = self.dtype.na_value

257 for isna_, val in zip(self._mask, self._data):

258 if isna_:

259 yield na_value

260 else:

261 yield val

262 else:

263 for i in range(len(self)):

264 yield self[i]

265

266 def __len__(self) -> int:

267 return len(self._data)

268

269 @property

270 def shape(self) -> Shape:

271 return self._data.shape

272

273 @property

274 def ndim(self) -> int:

275 return self._data.ndim

276

277 def swapaxes(self: BaseMaskedArrayT, axis1, axis2) -> BaseMaskedArrayT:

278 data = self._data.swapaxes(axis1, axis2)

279 mask = self._mask.swapaxes(axis1, axis2)

280 return type(self)(data, mask)

281

282 def delete(self: BaseMaskedArrayT, loc, axis: AxisInt = 0) -> BaseMaskedArrayT:

283 data = np.delete(self._data, loc, axis=axis)

284 mask = np.delete(self._mask, loc, axis=axis)

285 return type(self)(data, mask)

286

287 def reshape(self: BaseMaskedArrayT, *args, **kwargs) -> BaseMaskedArrayT:

288 data = self._data.reshape(*args, **kwargs)

289 mask = self._mask.reshape(*args, **kwargs)

290 return type(self)(data, mask)

291

292 def ravel(self: BaseMaskedArrayT, *args, **kwargs) -> BaseMaskedArrayT:

293 # TODO: need to make sure we have the same order for data/mask

294 data = self._data.ravel(*args, **kwargs)

295 mask = self._mask.ravel(*args, **kwargs)

296 return type(self)(data, mask)

297

298 @property

299 def T(self: BaseMaskedArrayT) -> BaseMaskedArrayT:

300 return type(self)(self._data.T, self._mask.T)

301

302 def round(self, decimals: int = 0, *args, **kwargs):

303 """

304 Round each value in the array a to the given number of decimals.

305

306 Parameters

307 ----------

308 decimals : int, default 0

309 Number of decimal places to round to. If decimals is negative,

310 it specifies the number of positions to the left of the decimal point.

311 *args, **kwargs

312 Additional arguments and keywords have no effect but might be

313 accepted for compatibility with NumPy.

314

315 Returns

316 -------

317 NumericArray

318 Rounded values of the NumericArray.

319

320 See Also

321 --------

322 numpy.around : Round values of an np.array.

323 DataFrame.round : Round values of a DataFrame.

324 Series.round : Round values of a Series.

325 """

326 nv.validate_round(args, kwargs)

327 values = np.round(self._data, decimals=decimals, **kwargs)

328

329 # Usually we'll get same type as self, but ndarray[bool] casts to float

330 return self._maybe_mask_result(values, self._mask.copy())

331

332 # ------------------------------------------------------------------

333 # Unary Methods

334

335 def __invert__(self: BaseMaskedArrayT) -> BaseMaskedArrayT:

336 return type(self)(~self._data, self._mask.copy())

337

338 def __neg__(self: BaseMaskedArrayT) -> BaseMaskedArrayT:

339 return type(self)(-self._data, self._mask.copy())

340

341 def __pos__(self: BaseMaskedArrayT) -> BaseMaskedArrayT:

342 return self.copy()

343

344 def __abs__(self: BaseMaskedArrayT) -> BaseMaskedArrayT:

345 return type(self)(abs(self._data), self._mask.copy())

346

347 # ------------------------------------------------------------------

348

349 def to_numpy(

350 self,

351 dtype: npt.DTypeLike | None = None,

352 copy: bool = False,

353 na_value: object = lib.no_default,

354 ) -> np.ndarray:

355 """

356 Convert to a NumPy Array.

357

358 By default converts to an object-dtype NumPy array. Specify the `dtype` and

359 `na_value` keywords to customize the conversion.

360

361 Parameters

362 ----------

363 dtype : dtype, default object

364 The numpy dtype to convert to.

365 copy : bool, default False

366 Whether to ensure that the returned value is a not a view on

367 the array. Note that ``copy=False`` does not *ensure* that

368 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that

369 a copy is made, even if not strictly necessary. This is typically

370 only possible when no missing values are present and `dtype`

371 is the equivalent numpy dtype.

372 na_value : scalar, optional

373 Scalar missing value indicator to use in numpy array. Defaults

374 to the native missing value indicator of this array (pd.NA).

375

376 Returns

377 -------

378 numpy.ndarray

379

380 Examples

381 --------

382 An object-dtype is the default result

383

384 >>> a = pd.array([True, False, pd.NA], dtype="boolean")

385 >>> a.to_numpy()

386 array([True, False, <NA>], dtype=object)

387

388 When no missing values are present, an equivalent dtype can be used.

389

390 >>> pd.array([True, False], dtype="boolean").to_numpy(dtype="bool")

391 array([ True, False])

392 >>> pd.array([1, 2], dtype="Int64").to_numpy("int64")

393 array([1, 2])

394

395 However, requesting such dtype will raise a ValueError if

396 missing values are present and the default missing value :attr:`NA`

397 is used.

398

399 >>> a = pd.array([True, False, pd.NA], dtype="boolean")

400 >>> a

401 <BooleanArray>

402 [True, False, <NA>]

403 Length: 3, dtype: boolean

404

405 >>> a.to_numpy(dtype="bool")

406 Traceback (most recent call last):

407 ...

408 ValueError: cannot convert to bool numpy array in presence of missing values

409

410 Specify a valid `na_value` instead

411

412 >>> a.to_numpy(dtype="bool", na_value=False)

413 array([ True, False, False])

414 """

415 if na_value is lib.no_default:

416 na_value = libmissing.NA

417 if dtype is None:

418 dtype = object

419 if self._hasna:

420 if (

421 not is_object_dtype(dtype)

422 and not is_string_dtype(dtype)

423 and na_value is libmissing.NA

424 ):

425 raise ValueError(

426 f"cannot convert to '{dtype}'-dtype NumPy array "

427 "with missing values. Specify an appropriate 'na_value' "

428 "for this dtype."

429 )

430 # don't pass copy to astype -> always need a copy since we are mutating

431 with warnings.catch_warnings():

432 warnings.filterwarnings("ignore", category=RuntimeWarning)

433 data = self._data.astype(dtype)

434 data[self._mask] = na_value

435 else:

436 with warnings.catch_warnings():

437 warnings.filterwarnings("ignore", category=RuntimeWarning)

438 data = self._data.astype(dtype, copy=copy)

439 return data

440

441 @doc(ExtensionArray.tolist)

442 def tolist(self):

443 if self.ndim > 1:

444 return [x.tolist() for x in self]

445 dtype = None if self._hasna else self._data.dtype

446 return self.to_numpy(dtype=dtype).tolist()

447

448 @overload

449 def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:

450 ...

451

452 @overload

453 def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:

454 ...

455

456 @overload

457 def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:

458 ...

459

460 def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:

461 dtype = pandas_dtype(dtype)

462

463 if is_dtype_equal(dtype, self.dtype):

464 if copy:

465 return self.copy()

466 return self

467

468 # if we are astyping to another nullable masked dtype, we can fastpath

469 if isinstance(dtype, BaseMaskedDtype):

470 # TODO deal with NaNs for FloatingArray case

471 with warnings.catch_warnings():

472 warnings.filterwarnings("ignore", category=RuntimeWarning)

473 # TODO: Is rounding what we want long term?

474 data = self._data.astype(dtype.numpy_dtype, copy=copy)

475 # mask is copied depending on whether the data was copied, and

476 # not directly depending on the `copy` keyword

477 mask = self._mask if data is self._data else self._mask.copy()

478 cls = dtype.construct_array_type()

479 return cls(data, mask, copy=False)

480

481 if isinstance(dtype, ExtensionDtype):

482 eacls = dtype.construct_array_type()

483 return eacls._from_sequence(self, dtype=dtype, copy=copy)

484

485 na_value: float | np.datetime64 | lib.NoDefault

486

487 # coerce

488 if is_float_dtype(dtype):

489 # In astype, we consider dtype=float to also mean na_value=np.nan

490 na_value = np.nan

491 elif is_datetime64_dtype(dtype):

492 na_value = np.datetime64("NaT")

493 else:

494 na_value = lib.no_default

495

496 # to_numpy will also raise, but we get somewhat nicer exception messages here

497 if is_integer_dtype(dtype) and self._hasna:

498 raise ValueError("cannot convert NA to integer")

499 if is_bool_dtype(dtype) and self._hasna:

500 # careful: astype_nansafe converts np.nan to True

501 raise ValueError("cannot convert float NaN to bool")

502

503 data = self.to_numpy(dtype=dtype, na_value=na_value, copy=copy)

504 return data

505

506 __array_priority__ = 1000 # higher than ndarray so ops dispatch to us

507

508 def __array__(self, dtype: NpDtype | None = None) -> np.ndarray:

509 """

510 the array interface, return my values

511 We return an object array here to preserve our scalar values

512 """

513 return self.to_numpy(dtype=dtype)

514

515 _HANDLED_TYPES: tuple[type, ...]

516

517 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

518 # For MaskedArray inputs, we apply the ufunc to ._data

519 # and mask the result.

520

521 out = kwargs.get("out", ())

522

523 for x in inputs + out:

524 if not isinstance(x, self._HANDLED_TYPES + (BaseMaskedArray,)):

525 return NotImplemented

526

527 # for binary ops, use our custom dunder methods

528 result = ops.maybe_dispatch_ufunc_to_dunder_op(

529 self, ufunc, method, *inputs, **kwargs

530 )

531 if result is not NotImplemented:

532 return result

533

534 if "out" in kwargs:

535 # e.g. test_ufunc_with_out

536 return arraylike.dispatch_ufunc_with_out(

537 self, ufunc, method, *inputs, **kwargs

538 )

539

540 if method == "reduce":

541 result = arraylike.dispatch_reduction_ufunc(

542 self, ufunc, method, *inputs, **kwargs

543 )

544 if result is not NotImplemented:

545 return result

546

547 mask = np.zeros(len(self), dtype=bool)

548 inputs2 = []

549 for x in inputs:

550 if isinstance(x, BaseMaskedArray):

551 mask |= x._mask

552 inputs2.append(x._data)

553 else:

554 inputs2.append(x)

555

556 def reconstruct(x):

557 # we don't worry about scalar `x` here, since we

558 # raise for reduce up above.

559 from pandas.core.arrays import (

560 BooleanArray,

561 FloatingArray,

562 IntegerArray,

563 )

564

565 if is_bool_dtype(x.dtype):

566 m = mask.copy()

567 return BooleanArray(x, m)

568 elif is_integer_dtype(x.dtype):

569 m = mask.copy()

570 return IntegerArray(x, m)

571 elif is_float_dtype(x.dtype):

572 m = mask.copy()

573 if x.dtype == np.float16:

574 # reached in e.g. np.sqrt on BooleanArray

575 # we don't support float16

576 x = x.astype(np.float32)

577 return FloatingArray(x, m)

578 else:

579 x[mask] = np.nan

580 return x

581

582 result = getattr(ufunc, method)(*inputs2, **kwargs)

583 if ufunc.nout > 1:

584 # e.g. np.divmod

585 return tuple(reconstruct(x) for x in result)

586 elif method == "reduce":

587 # e.g. np.add.reduce; test_ufunc_reduce_raises

588 if self._mask.any():

589 return self._na_value

590 return result

591 else:

592 return reconstruct(result)

593

594 def __arrow_array__(self, type=None):

595 """

596 Convert myself into a pyarrow Array.

597 """

598 import pyarrow as pa

599

600 return pa.array(self._data, mask=self._mask, type=type)

601

602 @property

603 def _hasna(self) -> bool:

604 # Note: this is expensive right now! The hope is that we can

605 # make this faster by having an optional mask, but not have to change

606 # source code using it..

607

608 # error: Incompatible return value type (got "bool_", expected "bool")

609 return self._mask.any() # type: ignore[return-value]

610

611 def _propagate_mask(

612 self, mask: npt.NDArray[np.bool_] | None, other

613 ) -> npt.NDArray[np.bool_]:

614 if mask is None:

615 mask = self._mask.copy() # TODO: need test for BooleanArray needing a copy

616 if other is libmissing.NA:

617 # GH#45421 don't alter inplace

618 mask = mask | True

619 elif is_list_like(other) and len(other) == len(mask):

620 mask = mask | isna(other)

621 else:

622 mask = self._mask | mask

623 # Incompatible return value type (got "Optional[ndarray[Any, dtype[bool_]]]",

624 # expected "ndarray[Any, dtype[bool_]]")

625 return mask # type: ignore[return-value]

626

627 def _arith_method(self, other, op):

628 op_name = op.__name__

629 omask = None

630

631 if (

632 not hasattr(other, "dtype")

633 and is_list_like(other)

634 and len(other) == len(self)

635 ):

636 # Try inferring masked dtype instead of casting to object

637 inferred_dtype = lib.infer_dtype(other, skipna=True)

638 if inferred_dtype == "integer":

639 from pandas.core.arrays import IntegerArray

640

641 other = IntegerArray._from_sequence(other)

642 elif inferred_dtype in ["floating", "mixed-integer-float"]:

643 from pandas.core.arrays import FloatingArray

644

645 other = FloatingArray._from_sequence(other)

646

647 elif inferred_dtype in ["boolean"]:

648 from pandas.core.arrays import BooleanArray

649

650 other = BooleanArray._from_sequence(other)

651

652 if isinstance(other, BaseMaskedArray):

653 other, omask = other._data, other._mask

654

655 elif is_list_like(other):

656 if not isinstance(other, ExtensionArray):

657 other = np.asarray(other)

658 if other.ndim > 1:

659 raise NotImplementedError("can only perform ops with 1-d structures")

660

661 # We wrap the non-masked arithmetic logic used for numpy dtypes

662 # in Series/Index arithmetic ops.

663 other = ops.maybe_prepare_scalar_for_op(other, (len(self),))

664 pd_op = ops.get_array_op(op)

665 other = ensure_wrapped_if_datetimelike(other)

666

667 if op_name in {"pow", "rpow"} and isinstance(other, np.bool_):

668 # Avoid DeprecationWarning: In future, it will be an error

669 # for 'np.bool_' scalars to be interpreted as an index

670 # e.g. test_array_scalar_like_equivalence

671 other = bool(other)

672

673 mask = self._propagate_mask(omask, other)

674

675 if other is libmissing.NA:

676 result = np.ones_like(self._data)

677 if self.dtype.kind == "b":

678 if op_name in {

679 "floordiv",

680 "rfloordiv",

681 "pow",

682 "rpow",

683 "truediv",

684 "rtruediv",

685 }:

686 # GH#41165 Try to match non-masked Series behavior

687 # This is still imperfect GH#46043

688 raise NotImplementedError(

689 f"operator '{op_name}' not implemented for bool dtypes"

690 )

691 if op_name in {"mod", "rmod"}:

692 dtype = "int8"

693 else:

694 dtype = "bool"

695 result = result.astype(dtype)

696 elif "truediv" in op_name and self.dtype.kind != "f":

697 # The actual data here doesn't matter since the mask

698 # will be all-True, but since this is division, we want

699 # to end up with floating dtype.

700 result = result.astype(np.float64)

701 else:

702 # Make sure we do this before the "pow" mask checks

703 # to get an expected exception message on shape mismatch.

704 if self.dtype.kind in ["i", "u"] and op_name in ["floordiv", "mod"]:

705 # TODO(GH#30188) ATM we don't match the behavior of non-masked

706 # types with respect to floordiv-by-zero

707 pd_op = op

708

709 with np.errstate(all="ignore"):

710 result = pd_op(self._data, other)

711

712 if op_name == "pow":

713 # 1 ** x is 1.

714 mask = np.where((self._data == 1) & ~self._mask, False, mask)

715 # x ** 0 is 1.

716 if omask is not None:

717 mask = np.where((other == 0) & ~omask, False, mask)

718 elif other is not libmissing.NA:

719 mask = np.where(other == 0, False, mask)

720

721 elif op_name == "rpow":

722 # 1 ** x is 1.

723 if omask is not None:

724 mask = np.where((other == 1) & ~omask, False, mask)

725 elif other is not libmissing.NA:

726 mask = np.where(other == 1, False, mask)

727 # x ** 0 is 1.

728 mask = np.where((self._data == 0) & ~self._mask, False, mask)

729

730 return self._maybe_mask_result(result, mask)

731

732 _logical_method = _arith_method

733

734 def _cmp_method(self, other, op) -> BooleanArray:

735 from pandas.core.arrays import BooleanArray

736

737 mask = None

738

739 if isinstance(other, BaseMaskedArray):

740 other, mask = other._data, other._mask

741

742 elif is_list_like(other):

743 other = np.asarray(other)

744 if other.ndim > 1:

745 raise NotImplementedError("can only perform ops with 1-d structures")

746 if len(self) != len(other):

747 raise ValueError("Lengths must match to compare")

748

749 if other is libmissing.NA:

750 # numpy does not handle pd.NA well as "other" scalar (it returns

751 # a scalar False instead of an array)

752 # This may be fixed by NA.__array_ufunc__. Revisit this check

753 # once that's implemented.

754 result = np.zeros(self._data.shape, dtype="bool")

755 mask = np.ones(self._data.shape, dtype="bool")

756 else:

757 with warnings.catch_warnings():

758 # numpy may show a FutureWarning or DeprecationWarning:

759 # elementwise comparison failed; returning scalar instead,

760 # but in the future will perform elementwise comparison

761 # before returning NotImplemented. We fall back to the correct

762 # behavior today, so that should be fine to ignore.

763 warnings.filterwarnings("ignore", "elementwise", FutureWarning)

764 warnings.filterwarnings("ignore", "elementwise", DeprecationWarning)

765 with np.errstate(all="ignore"):

766 method = getattr(self._data, f"__{op.__name__}__")

767 result = method(other)

768

769 if result is NotImplemented:

770 result = invalid_comparison(self._data, other, op)

771

772 mask = self._propagate_mask(mask, other)

773 return BooleanArray(result, mask, copy=False)

774

775 def _maybe_mask_result(self, result, mask):

776 """

777 Parameters

778 ----------

779 result : array-like or tuple[array-like]

780 mask : array-like bool

781 """

782 if isinstance(result, tuple):

783 # i.e. divmod

784 div, mod = result

785 return (

786 self._maybe_mask_result(div, mask),

787 self._maybe_mask_result(mod, mask),

788 )

789

790 if is_float_dtype(result.dtype):

791 from pandas.core.arrays import FloatingArray

792

793 return FloatingArray(result, mask, copy=False)

794

795 elif is_bool_dtype(result.dtype):

796 from pandas.core.arrays import BooleanArray

797

798 return BooleanArray(result, mask, copy=False)

799

800 elif (

801 isinstance(result.dtype, np.dtype)

802 and result.dtype.kind == "m"

803 and is_supported_unit(get_unit_from_dtype(result.dtype))

804 ):

805 # e.g. test_numeric_arr_mul_tdscalar_numexpr_path

806 from pandas.core.arrays import TimedeltaArray

807

808 if not isinstance(result, TimedeltaArray):

809 result = TimedeltaArray._simple_new(result, dtype=result.dtype)

810

811 result[mask] = result.dtype.type("NaT")

812 return result

813

814 elif is_integer_dtype(result.dtype):

815 from pandas.core.arrays import IntegerArray

816

817 return IntegerArray(result, mask, copy=False)

818

819 else:

820 result[mask] = np.nan

821 return result

822

823 def isna(self) -> np.ndarray:

824 return self._mask.copy()

825

826 @property

827 def _na_value(self):

828 return self.dtype.na_value

829

830 @property

831 def nbytes(self) -> int:

832 return self._data.nbytes + self._mask.nbytes

833

834 @classmethod

835 def _concat_same_type(

836 cls: type[BaseMaskedArrayT],

837 to_concat: Sequence[BaseMaskedArrayT],

838 axis: AxisInt = 0,

839 ) -> BaseMaskedArrayT:

840 data = np.concatenate([x._data for x in to_concat], axis=axis)

841 mask = np.concatenate([x._mask for x in to_concat], axis=axis)

842 return cls(data, mask)

843

844 def take(

845 self: BaseMaskedArrayT,

846 indexer,

847 *,

848 allow_fill: bool = False,

849 fill_value: Scalar | None = None,

850 axis: AxisInt = 0,

851 ) -> BaseMaskedArrayT:

852 # we always fill with 1 internally

853 # to avoid upcasting

854 data_fill_value = self._internal_fill_value if isna(fill_value) else fill_value

855 result = take(

856 self._data,

857 indexer,

858 fill_value=data_fill_value,

859 allow_fill=allow_fill,

860 axis=axis,

861 )

862

863 mask = take(

864 self._mask, indexer, fill_value=True, allow_fill=allow_fill, axis=axis

865 )

866

867 # if we are filling

868 # we only fill where the indexer is null

869 # not existing missing values

870 # TODO(jreback) what if we have a non-na float as a fill value?

871 if allow_fill and notna(fill_value):

872 fill_mask = np.asarray(indexer) == -1

873 result[fill_mask] = fill_value

874 mask = mask ^ fill_mask

875

876 return type(self)(result, mask, copy=False)

877

878 # error: Return type "BooleanArray" of "isin" incompatible with return type

879 # "ndarray" in supertype "ExtensionArray"

880 def isin(self, values) -> BooleanArray: # type: ignore[override]

881 from pandas.core.arrays import BooleanArray

882

883 # algorithms.isin will eventually convert values to an ndarray, so no extra

884 # cost to doing it here first

885 values_arr = np.asarray(values)

886 result = isin(self._data, values_arr)

887

888 if self._hasna:

889 values_have_NA = is_object_dtype(values_arr.dtype) and any(

890 val is self.dtype.na_value for val in values_arr

891 )

892

893 # For now, NA does not propagate so set result according to presence of NA,

894 # see https://github.com/pandas-dev/pandas/pull/38379 for some discussion

895 result[self._mask] = values_have_NA

896

897 mask = np.zeros(self._data.shape, dtype=bool)

898 return BooleanArray(result, mask, copy=False)

899

900 def copy(self: BaseMaskedArrayT) -> BaseMaskedArrayT:

901 data, mask = self._data, self._mask

902 data = data.copy()

903 mask = mask.copy()

904 return type(self)(data, mask, copy=False)

905

906 def unique(self: BaseMaskedArrayT) -> BaseMaskedArrayT:

907 """

908 Compute the BaseMaskedArray of unique values.

909

910 Returns

911 -------

912 uniques : BaseMaskedArray

913 """

914 uniques, mask = algos.unique_with_mask(self._data, self._mask)

915 return type(self)(uniques, mask, copy=False)

916

917 @doc(ExtensionArray.searchsorted)

918 def searchsorted(

919 self,

920 value: NumpyValueArrayLike | ExtensionArray,

921 side: Literal["left", "right"] = "left",

922 sorter: NumpySorter = None,

923 ) -> npt.NDArray[np.intp] | np.intp:

924 if self._hasna:

925 raise ValueError(

926 "searchsorted requires array to be sorted, which is impossible "

927 "with NAs present."

928 )

929 if isinstance(value, ExtensionArray):

930 value = value.astype(object)

931 # Base class searchsorted would cast to object, which is *much* slower.

932 return self._data.searchsorted(value, side=side, sorter=sorter)

933

934 @doc(ExtensionArray.factorize)

935 def factorize(

936 self,

937 use_na_sentinel: bool = True,

938 ) -> tuple[np.ndarray, ExtensionArray]:

939 arr = self._data

940 mask = self._mask

941

942 # Use a sentinel for na; recode and add NA to uniques if necessary below

943 codes, uniques = factorize_array(arr, use_na_sentinel=True, mask=mask)

944

945 # check that factorize_array correctly preserves dtype.

946 assert uniques.dtype == self.dtype.numpy_dtype, (uniques.dtype, self.dtype)

947

948 has_na = mask.any()

949 if use_na_sentinel or not has_na:

950 size = len(uniques)

951 else:

952 # Make room for an NA value

953 size = len(uniques) + 1

954 uniques_mask = np.zeros(size, dtype=bool)

955 if not use_na_sentinel and has_na:

956 na_index = mask.argmax()

957 # Insert na with the proper code

958 if na_index == 0:

959 na_code = np.intp(0)

960 else:

961 # mypy error: Slice index must be an integer or None

962 # https://github.com/python/mypy/issues/2410

963 na_code = codes[:na_index].max() + 1 # type: ignore[misc]

964 codes[codes >= na_code] += 1

965 codes[codes == -1] = na_code

966 # dummy value for uniques; not used since uniques_mask will be True

967 uniques = np.insert(uniques, na_code, 0)

968 uniques_mask[na_code] = True

969 uniques_ea = type(self)(uniques, uniques_mask)

970

971 return codes, uniques_ea

972

973 @doc(ExtensionArray._values_for_argsort)

974 def _values_for_argsort(self) -> np.ndarray:

975 return self._data

976

977 def value_counts(self, dropna: bool = True) -> Series:

978 """

979 Returns a Series containing counts of each unique value.

980

981 Parameters

982 ----------

983 dropna : bool, default True

984 Don't include counts of missing values.

985

986 Returns

987 -------

988 counts : Series

989

990 See Also

991 --------

992 Series.value_counts

993 """

994 from pandas import (

995 Index,

996 Series,

997 )

998 from pandas.arrays import IntegerArray

999

1000 keys, value_counts = algos.value_counts_arraylike(

1001 self._data, dropna=True, mask=self._mask

1002 )

1003

1004 if dropna:

1005 res = Series(value_counts, index=keys, name="count", copy=False)

1006 res.index = res.index.astype(self.dtype)

1007 res = res.astype("Int64")

1008 return res

1009

1010 # if we want nans, count the mask

1011 counts = np.empty(len(value_counts) + 1, dtype="int64")

1012 counts[:-1] = value_counts

1013 counts[-1] = self._mask.sum()

1014

1015 index = Index(keys, dtype=self.dtype).insert(len(keys), self.dtype.na_value)

1016 index = index.astype(self.dtype)

1017

1018 mask = np.zeros(len(counts), dtype="bool")

1019 counts_array = IntegerArray(counts, mask)

1020

1021 return Series(counts_array, index=index, name="count", copy=False)

1022

1023 @doc(ExtensionArray.equals)

1024 def equals(self, other) -> bool:

1025 if type(self) != type(other):

1026 return False

1027 if other.dtype != self.dtype:

1028 return False

1029

1030 # GH#44382 if e.g. self[1] is np.nan and other[1] is pd.NA, we are NOT

1031 # equal.

1032 if not np.array_equal(self._mask, other._mask):

1033 return False

1034

1035 left = self._data[~self._mask]

1036 right = other._data[~other._mask]

1037 return array_equivalent(left, right, dtype_equal=True)

1038

1039 def _quantile(

1040 self, qs: npt.NDArray[np.float64], interpolation: str

1041 ) -> BaseMaskedArray:

1042 """

1043 Dispatch to quantile_with_mask, needed because we do not have

1044 _from_factorized.

1045

1046 Notes

1047 -----

1048 We assume that all impacted cases are 1D-only.

1049 """

1050 res = quantile_with_mask(

1051 self._data,

1052 mask=self._mask,

1053 # TODO(GH#40932): na_value_for_dtype(self.dtype.numpy_dtype)

1054 # instead of np.nan

1055 fill_value=np.nan,

1056 qs=qs,

1057 interpolation=interpolation,

1058 )

1059

1060 if self._hasna:

1061 # Our result mask is all-False unless we are all-NA, in which

1062 # case it is all-True.

1063 if self.ndim == 2:

1064 # I think this should be out_mask=self.isna().all(axis=1)

1065 # but am holding off until we have tests

1066 raise NotImplementedError

1067 if self.isna().all():

1068 out_mask = np.ones(res.shape, dtype=bool)

1069

1070 if is_integer_dtype(self.dtype):

1071 # We try to maintain int dtype if possible for not all-na case

1072 # as well

1073 res = np.zeros(res.shape, dtype=self.dtype.numpy_dtype)

1074 else:

1075 out_mask = np.zeros(res.shape, dtype=bool)

1076 else:

1077 out_mask = np.zeros(res.shape, dtype=bool)

1078 return self._maybe_mask_result(res, mask=out_mask)

1079

1080 # ------------------------------------------------------------------

1081 # Reductions

1082

1083 def _reduce(self, name: str, *, skipna: bool = True, **kwargs):

1084 if name in {"any", "all", "min", "max", "sum", "prod", "mean", "var", "std"}:

1085 return getattr(self, name)(skipna=skipna, **kwargs)

1086

1087 data = self._data

1088 mask = self._mask

1089

1090 # median, skew, kurt, sem

1091 op = getattr(nanops, f"nan{name}")

1092 result = op(data, axis=0, skipna=skipna, mask=mask, **kwargs)

1093

1094 if np.isnan(result):

1095 return libmissing.NA

1096

1097 return result

1098

1099 def _wrap_reduction_result(self, name: str, result, skipna, **kwargs):

1100 if isinstance(result, np.ndarray):

1101 axis = kwargs["axis"]

1102 if skipna:

1103 # we only retain mask for all-NA rows/columns

1104 mask = self._mask.all(axis=axis)

1105 else:

1106 mask = self._mask.any(axis=axis)

1107

1108 return self._maybe_mask_result(result, mask)

1109 return result

1110

1111 def sum(

1112 self,

1113 *,

1114 skipna: bool = True,

1115 min_count: int = 0,

1116 axis: AxisInt | None = 0,

1117 **kwargs,

1118 ):

1119 nv.validate_sum((), kwargs)

1120

1121 # TODO: do this in validate_sum?

1122 if "out" in kwargs:

1123 # np.sum; test_floating_array_numpy_sum

1124 if kwargs["out"] is not None:

1125 raise NotImplementedError

1126 kwargs.pop("out")

1127

1128 result = masked_reductions.sum(

1129 self._data,

1130 self._mask,

1131 skipna=skipna,

1132 min_count=min_count,

1133 axis=axis,

1134 )

1135 return self._wrap_reduction_result(

1136 "sum", result, skipna=skipna, axis=axis, **kwargs

1137 )

1138

1139 def prod(

1140 self,

1141 *,

1142 skipna: bool = True,

1143 min_count: int = 0,

1144 axis: AxisInt | None = 0,

1145 **kwargs,

1146 ):

1147 nv.validate_prod((), kwargs)

1148 result = masked_reductions.prod(

1149 self._data,

1150 self._mask,

1151 skipna=skipna,

1152 min_count=min_count,

1153 axis=axis,

1154 )

1155 return self._wrap_reduction_result(

1156 "prod", result, skipna=skipna, axis=axis, **kwargs

1157 )

1158

1159 def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):

1160 nv.validate_mean((), kwargs)

1161 result = masked_reductions.mean(

1162 self._data,

1163 self._mask,

1164 skipna=skipna,

1165 axis=axis,

1166 )

1167 return self._wrap_reduction_result(

1168 "mean", result, skipna=skipna, axis=axis, **kwargs

1169 )

1170

1171 def var(

1172 self, *, skipna: bool = True, axis: AxisInt | None = 0, ddof: int = 1, **kwargs

1173 ):

1174 nv.validate_stat_ddof_func((), kwargs, fname="var")

1175 result = masked_reductions.var(

1176 self._data,

1177 self._mask,

1178 skipna=skipna,

1179 axis=axis,

1180 ddof=ddof,

1181 )

1182 return self._wrap_reduction_result(

1183 "var", result, skipna=skipna, axis=axis, **kwargs

1184 )

1185

1186 def std(

1187 self, *, skipna: bool = True, axis: AxisInt | None = 0, ddof: int = 1, **kwargs

1188 ):

1189 nv.validate_stat_ddof_func((), kwargs, fname="std")

1190 result = masked_reductions.std(

1191 self._data,

1192 self._mask,

1193 skipna=skipna,

1194 axis=axis,

1195 ddof=ddof,

1196 )

1197 return self._wrap_reduction_result(

1198 "std", result, skipna=skipna, axis=axis, **kwargs

1199 )

1200

1201 def min(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):

1202 nv.validate_min((), kwargs)

1203 return masked_reductions.min(

1204 self._data,

1205 self._mask,

1206 skipna=skipna,

1207 axis=axis,

1208 )

1209

1210 def max(self, *, skipna: bool = True, axis: AxisInt | None = 0, **kwargs):

1211 nv.validate_max((), kwargs)

1212 return masked_reductions.max(

1213 self._data,

1214 self._mask,

1215 skipna=skipna,

1216 axis=axis,

1217 )

1218

1219 def any(self, *, skipna: bool = True, **kwargs):

1220 """

1221 Return whether any element is truthy.

1222

1223 Returns False unless there is at least one element that is truthy.

1224 By default, NAs are skipped. If ``skipna=False`` is specified and

1225 missing values are present, similar :ref:`Kleene logic <boolean.kleene>`

1226 is used as for logical operations.

1227

1228 .. versionchanged:: 1.4.0

1229

1230 Parameters

1231 ----------

1232 skipna : bool, default True

1233 Exclude NA values. If the entire array is NA and `skipna` is

1234 True, then the result will be False, as for an empty array.

1235 If `skipna` is False, the result will still be True if there is

1236 at least one element that is truthy, otherwise NA will be returned

1237 if there are NA's present.

1238 **kwargs : any, default None

1239 Additional keywords have no effect but might be accepted for

1240 compatibility with NumPy.

1241

1242 Returns

1243 -------

1244 bool or :attr:`pandas.NA`

1245

1246 See Also

1247 --------

1248 numpy.any : Numpy version of this method.

1249 BaseMaskedArray.all : Return whether all elements are truthy.

1250

1251 Examples

1252 --------

1253 The result indicates whether any element is truthy (and by default

1254 skips NAs):

1255

1256 >>> pd.array([True, False, True]).any()

1257 True

1258 >>> pd.array([True, False, pd.NA]).any()

1259 True

1260 >>> pd.array([False, False, pd.NA]).any()

1261 False

1262 >>> pd.array([], dtype="boolean").any()

1263 False

1264 >>> pd.array([pd.NA], dtype="boolean").any()

1265 False

1266 >>> pd.array([pd.NA], dtype="Float64").any()

1267 False

1268

1269 With ``skipna=False``, the result can be NA if this is logically

1270 required (whether ``pd.NA`` is True or False influences the result):

1271

1272 >>> pd.array([True, False, pd.NA]).any(skipna=False)

1273 True

1274 >>> pd.array([1, 0, pd.NA]).any(skipna=False)

1275 True

1276 >>> pd.array([False, False, pd.NA]).any(skipna=False)

1277 <NA>

1278 >>> pd.array([0, 0, pd.NA]).any(skipna=False)

1279 <NA>

1280 """

1281 kwargs.pop("axis", None)

1282 nv.validate_any((), kwargs)

1283

1284 values = self._data.copy()

1285 # error: Argument 3 to "putmask" has incompatible type "object";

1286 # expected "Union[_SupportsArray[dtype[Any]],

1287 # _NestedSequence[_SupportsArray[dtype[Any]]],

1288 # bool, int, float, complex, str, bytes,

1289 # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"

1290 np.putmask(values, self._mask, self._falsey_value) # type: ignore[arg-type]

1291 result = values.any()

1292 if skipna:

1293 return result

1294 else:

1295 if result or len(self) == 0 or not self._mask.any():

1296 return result

1297 else:

1298 return self.dtype.na_value

1299

1300 def all(self, *, skipna: bool = True, **kwargs):

1301 """

1302 Return whether all elements are truthy.

1303

1304 Returns True unless there is at least one element that is falsey.

1305 By default, NAs are skipped. If ``skipna=False`` is specified and

1306 missing values are present, similar :ref:`Kleene logic <boolean.kleene>`

1307 is used as for logical operations.

1308

1309 .. versionchanged:: 1.4.0

1310

1311 Parameters

1312 ----------

1313 skipna : bool, default True

1314 Exclude NA values. If the entire array is NA and `skipna` is

1315 True, then the result will be True, as for an empty array.

1316 If `skipna` is False, the result will still be False if there is

1317 at least one element that is falsey, otherwise NA will be returned

1318 if there are NA's present.

1319 **kwargs : any, default None

1320 Additional keywords have no effect but might be accepted for

1321 compatibility with NumPy.

1322

1323 Returns

1324 -------

1325 bool or :attr:`pandas.NA`

1326

1327 See Also

1328 --------

1329 numpy.all : Numpy version of this method.

1330 BooleanArray.any : Return whether any element is truthy.

1331

1332 Examples

1333 --------

1334 The result indicates whether all elements are truthy (and by default

1335 skips NAs):

1336

1337 >>> pd.array([True, True, pd.NA]).all()

1338 True

1339 >>> pd.array([1, 1, pd.NA]).all()

1340 True

1341 >>> pd.array([True, False, pd.NA]).all()

1342 False

1343 >>> pd.array([], dtype="boolean").all()

1344 True

1345 >>> pd.array([pd.NA], dtype="boolean").all()

1346 True

1347 >>> pd.array([pd.NA], dtype="Float64").all()

1348 True

1349

1350 With ``skipna=False``, the result can be NA if this is logically

1351 required (whether ``pd.NA`` is True or False influences the result):

1352

1353 >>> pd.array([True, True, pd.NA]).all(skipna=False)

1354 <NA>

1355 >>> pd.array([1, 1, pd.NA]).all(skipna=False)

1356 <NA>

1357 >>> pd.array([True, False, pd.NA]).all(skipna=False)

1358 False

1359 >>> pd.array([1, 0, pd.NA]).all(skipna=False)

1360 False

1361 """

1362 kwargs.pop("axis", None)

1363 nv.validate_all((), kwargs)

1364

1365 values = self._data.copy()

1366 # error: Argument 3 to "putmask" has incompatible type "object";

1367 # expected "Union[_SupportsArray[dtype[Any]],

1368 # _NestedSequence[_SupportsArray[dtype[Any]]],

1369 # bool, int, float, complex, str, bytes,

1370 # _NestedSequence[Union[bool, int, float, complex, str, bytes]]]"

1371 np.putmask(values, self._mask, self._truthy_value) # type: ignore[arg-type]

1372 result = values.all()

1373

1374 if skipna:

1375 return result

1376 else:

1377 if not result or len(self) == 0 or not self._mask.any():

1378 return result

1379 else:

1380 return self.dtype.na_value

1381

1382 def _accumulate(

1383 self, name: str, *, skipna: bool = True, **kwargs

1384 ) -> BaseMaskedArray:

1385 data = self._data

1386 mask = self._mask

1387

1388 op = getattr(masked_accumulations, name)

1389 data, mask = op(data, mask, skipna=skipna, **kwargs)

1390

1391 return type(self)(data, mask, copy=False)