Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/arrays/base.py: 31%

1"""

2An interface for extending pandas with custom arrays.

4.. warning::

6 This is an experimental API and subject to breaking changes

7 without warning.

8"""

9from __future__ import annotations

11import operator

12from typing import (

13 TYPE_CHECKING,

14 Any,

15 Callable,

16 ClassVar,

17 Literal,

18 cast,

19 overload,

20)

21import warnings

23import numpy as np

25from pandas._libs import (

26 algos as libalgos,

27 lib,

28)

29from pandas.compat import set_function_name

30from pandas.compat.numpy import function as nv

31from pandas.errors import AbstractMethodError

32from pandas.util._decorators import (

33 Appender,

34 Substitution,

35 cache_readonly,

36)

37from pandas.util._exceptions import find_stack_level

38from pandas.util._validators import (

39 validate_bool_kwarg,

40 validate_fillna_kwargs,

41 validate_insert_loc,

42)

44from pandas.core.dtypes.cast import maybe_cast_pointwise_result

45from pandas.core.dtypes.common import (

46 is_list_like,

47 is_scalar,

48 pandas_dtype,

49)

50from pandas.core.dtypes.dtypes import ExtensionDtype

51from pandas.core.dtypes.generic import (

52 ABCDataFrame,

53 ABCIndex,

54 ABCSeries,

55)

56from pandas.core.dtypes.missing import isna

58from pandas.core import (

59 arraylike,

60 missing,

61 roperator,

62)

63from pandas.core.algorithms import (

64 duplicated,

65 factorize_array,

66 isin,

67 map_array,

68 mode,

69 rank,

70 unique,

71)

72from pandas.core.array_algos.quantile import quantile_with_mask

73from pandas.core.missing import _fill_limit_area_1d

74from pandas.core.sorting import (

75 nargminmax,

76 nargsort,

77)

79if TYPE_CHECKING:

80 from collections.abc import (

81 Iterator,

82 Sequence,

83 )

85 from pandas._typing import (

86 ArrayLike,

87 AstypeArg,

88 AxisInt,

89 Dtype,

90 DtypeObj,

91 FillnaOptions,

92 InterpolateOptions,

93 NumpySorter,

94 NumpyValueArrayLike,

95 PositionalIndexer,

96 ScalarIndexer,

97 Self,

98 SequenceIndexer,

99 Shape,

100 SortKind,

101 TakeIndexer,

102 npt,

103 )

104

105 from pandas import Index

106

107_extension_array_shared_docs: dict[str, str] = {}

108

109

110class ExtensionArray:

111 """

112 Abstract base class for custom 1-D array types.

113

114 pandas will recognize instances of this class as proper arrays

115 with a custom type and will not attempt to coerce them to objects. They

116 may be stored directly inside a :class:`DataFrame` or :class:`Series`.

117

118 Attributes

119 ----------

120 dtype

121 nbytes

122 ndim

123 shape

124

125 Methods

126 -------

127 argsort

128 astype

129 copy

130 dropna

131 duplicated

132 factorize

133 fillna

134 equals

135 insert

136 interpolate

137 isin

138 isna

139 ravel

140 repeat

141 searchsorted

142 shift

143 take

144 tolist

145 unique

146 view

147 _accumulate

148 _concat_same_type

149 _explode

150 _formatter

151 _from_factorized

152 _from_sequence

153 _from_sequence_of_strings

154 _hash_pandas_object

155 _pad_or_backfill

156 _reduce

157 _values_for_argsort

158 _values_for_factorize

159

160 Notes

161 -----

162 The interface includes the following abstract methods that must be

163 implemented by subclasses:

164

165 * _from_sequence

166 * _from_factorized

167 * __getitem__

168 * __len__

169 * __eq__

170 * dtype

171 * nbytes

172 * isna

173 * take

174 * copy

175 * _concat_same_type

176 * interpolate

177

178 A default repr displaying the type, (truncated) data, length,

179 and dtype is provided. It can be customized or replaced by

180 by overriding:

181

182 * __repr__ : A default repr for the ExtensionArray.

183 * _formatter : Print scalars inside a Series or DataFrame.

184

185 Some methods require casting the ExtensionArray to an ndarray of Python

186 objects with ``self.astype(object)``, which may be expensive. When

187 performance is a concern, we highly recommend overriding the following

188 methods:

189

190 * fillna

191 * _pad_or_backfill

192 * dropna

193 * unique

194 * factorize / _values_for_factorize

195 * argsort, argmax, argmin / _values_for_argsort

196 * searchsorted

197 * map

198

199 The remaining methods implemented on this class should be performant,

200 as they only compose abstract methods. Still, a more efficient

201 implementation may be available, and these methods can be overridden.

202

203 One can implement methods to handle array accumulations or reductions.

204

205 * _accumulate

206 * _reduce

207

208 One can implement methods to handle parsing from strings that will be used

209 in methods such as ``pandas.io.parsers.read_csv``.

210

211 * _from_sequence_of_strings

212

213 This class does not inherit from 'abc.ABCMeta' for performance reasons.

214 Methods and properties required by the interface raise

215 ``pandas.errors.AbstractMethodError`` and no ``register`` method is

216 provided for registering virtual subclasses.

217

218 ExtensionArrays are limited to 1 dimension.

219

220 They may be backed by none, one, or many NumPy arrays. For example,

221 ``pandas.Categorical`` is an extension array backed by two arrays,

222 one for codes and one for categories. An array of IPv6 address may

223 be backed by a NumPy structured array with two fields, one for the

224 lower 64 bits and one for the upper 64 bits. Or they may be backed

225 by some other storage type, like Python lists. Pandas makes no

226 assumptions on how the data are stored, just that it can be converted

227 to a NumPy array.

228 The ExtensionArray interface does not impose any rules on how this data

229 is stored. However, currently, the backing data cannot be stored in

230 attributes called ``.values`` or ``._values`` to ensure full compatibility

231 with pandas internals. But other names as ``.data``, ``._data``,

232 ``._items``, ... can be freely used.

233

234 If implementing NumPy's ``__array_ufunc__`` interface, pandas expects

235 that

236

237 1. You defer by returning ``NotImplemented`` when any Series are present

238 in `inputs`. Pandas will extract the arrays and call the ufunc again.

239 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class.

240 Pandas inspect this to determine whether the ufunc is valid for the

241 types present.

242

243 See :ref:`extending.extension.ufunc` for more.

244

245 By default, ExtensionArrays are not hashable. Immutable subclasses may

246 override this behavior.

247

248 Examples

249 --------

250 Please see the following:

251

252 https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/list/array.py

253 """

254

255 # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray.

256 # Don't override this.

257 _typ = "extension"

258

259 # similar to __array_priority__, positions ExtensionArray after Index,

260 # Series, and DataFrame. EA subclasses may override to choose which EA

261 # subclass takes priority. If overriding, the value should always be

262 # strictly less than 2000 to be below Index.__pandas_priority__.

263 __pandas_priority__ = 1000

264

265 # ------------------------------------------------------------------------

266 # Constructors

267 # ------------------------------------------------------------------------

268

269 @classmethod

270 def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):

271 """

272 Construct a new ExtensionArray from a sequence of scalars.

273

274 Parameters

275 ----------

276 scalars : Sequence

277 Each element will be an instance of the scalar type for this

278 array, ``cls.dtype.type`` or be converted into this type in this method.

279 dtype : dtype, optional

280 Construct for this particular dtype. This should be a Dtype

281 compatible with the ExtensionArray.

282 copy : bool, default False

283 If True, copy the underlying data.

284

285 Returns

286 -------

287 ExtensionArray

288

289 Examples

290 --------

291 >>> pd.arrays.IntegerArray._from_sequence([4, 5])

292 <IntegerArray>

293 [4, 5]

294 Length: 2, dtype: Int64

295 """

296 raise AbstractMethodError(cls)

297

298 @classmethod

299 def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:

300 """

301 Strict analogue to _from_sequence, allowing only sequences of scalars

302 that should be specifically inferred to the given dtype.

303

304 Parameters

305 ----------

306 scalars : sequence

307 dtype : ExtensionDtype

308

309 Raises

310 ------

311 TypeError or ValueError

312

313 Notes

314 -----

315 This is called in a try/except block when casting the result of a

316 pointwise operation.

317 """

318 try:

319 return cls._from_sequence(scalars, dtype=dtype, copy=False)

320 except (ValueError, TypeError):

321 raise

322 except Exception:

323 warnings.warn(

324 "_from_scalars should only raise ValueError or TypeError. "

325 "Consider overriding _from_scalars where appropriate.",

326 stacklevel=find_stack_level(),

327 )

328 raise

329

330 @classmethod

331 def _from_sequence_of_strings(

332 cls, strings, *, dtype: Dtype | None = None, copy: bool = False

333 ):

334 """

335 Construct a new ExtensionArray from a sequence of strings.

336

337 Parameters

338 ----------

339 strings : Sequence

340 Each element will be an instance of the scalar type for this

341 array, ``cls.dtype.type``.

342 dtype : dtype, optional

343 Construct for this particular dtype. This should be a Dtype

344 compatible with the ExtensionArray.

345 copy : bool, default False

346 If True, copy the underlying data.

347

348 Returns

349 -------

350 ExtensionArray

351

352 Examples

353 --------

354 >>> pd.arrays.IntegerArray._from_sequence_of_strings(["1", "2", "3"])

355 <IntegerArray>

356 [1, 2, 3]

357 Length: 3, dtype: Int64

358 """

359 raise AbstractMethodError(cls)

360

361 @classmethod

362 def _from_factorized(cls, values, original):

363 """

364 Reconstruct an ExtensionArray after factorization.

365

366 Parameters

367 ----------

368 values : ndarray

369 An integer ndarray with the factorized values.

370 original : ExtensionArray

371 The original ExtensionArray that factorize was called on.

372

373 See Also

374 --------

375 factorize : Top-level factorize method that dispatches here.

376 ExtensionArray.factorize : Encode the extension array as an enumerated type.

377

378 Examples

379 --------

380 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1),

381 ... pd.Interval(1, 5), pd.Interval(1, 5)])

382 >>> codes, uniques = pd.factorize(interv_arr)

383 >>> pd.arrays.IntervalArray._from_factorized(uniques, interv_arr)

384 <IntervalArray>

385 [(0, 1], (1, 5]]

386 Length: 2, dtype: interval[int64, right]

387 """

388 raise AbstractMethodError(cls)

389

390 # ------------------------------------------------------------------------

391 # Must be a Sequence

392 # ------------------------------------------------------------------------

393 @overload

394 def __getitem__(self, item: ScalarIndexer) -> Any:

395 ...

396

397 @overload

398 def __getitem__(self, item: SequenceIndexer) -> Self:

399 ...

400

401 def __getitem__(self, item: PositionalIndexer) -> Self | Any:

402 """

403 Select a subset of self.

404

405 Parameters

406 ----------

407 item : int, slice, or ndarray

408 * int: The position in 'self' to get.

409

410 * slice: A slice object, where 'start', 'stop', and 'step' are

411 integers or None

412

413 * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'

414

415 * list[int]: A list of int

416

417 Returns

418 -------

419 item : scalar or ExtensionArray

420

421 Notes

422 -----

423 For scalar ``item``, return a scalar value suitable for the array's

424 type. This should be an instance of ``self.dtype.type``.

425

426 For slice ``key``, return an instance of ``ExtensionArray``, even

427 if the slice is length 0 or 1.

428

429 For a boolean mask, return an instance of ``ExtensionArray``, filtered

430 to the values where ``item`` is True.

431 """

432 raise AbstractMethodError(self)

433

434 def __setitem__(self, key, value) -> None:

435 """

436 Set one or more values inplace.

437

438 This method is not required to satisfy the pandas extension array

439 interface.

440

441 Parameters

442 ----------

443 key : int, ndarray, or slice

444 When called from, e.g. ``Series.__setitem__``, ``key`` will be

445 one of

446

447 * scalar int

448 * ndarray of integers.

449 * boolean ndarray

450 * slice object

451

452 value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object

453 value or values to be set of ``key``.

454

455 Returns

456 -------

457 None

458 """

459 # Some notes to the ExtensionArray implementer who may have ended up

460 # here. While this method is not required for the interface, if you

461 # *do* choose to implement __setitem__, then some semantics should be

462 # observed:

463 #

464 # * Setting multiple values : ExtensionArrays should support setting

465 # multiple values at once, 'key' will be a sequence of integers and

466 # 'value' will be a same-length sequence.

467 #

468 # * Broadcasting : For a sequence 'key' and a scalar 'value',

469 # each position in 'key' should be set to 'value'.

470 #

471 # * Coercion : Most users will expect basic coercion to work. For

472 # example, a string like '2018-01-01' is coerced to a datetime

473 # when setting on a datetime64ns array. In general, if the

474 # __init__ method coerces that value, then so should __setitem__

475 # Note, also, that Series/DataFrame.where internally use __setitem__

476 # on a copy of the data.

477 raise NotImplementedError(f"{type(self)} does not implement __setitem__.")

478

479 def __len__(self) -> int:

480 """

481 Length of this array

482

483 Returns

484 -------

485 length : int

486 """

487 raise AbstractMethodError(self)

488

489 def __iter__(self) -> Iterator[Any]:

490 """

491 Iterate over elements of the array.

492 """

493 # This needs to be implemented so that pandas recognizes extension

494 # arrays as list-like. The default implementation makes successive

495 # calls to ``__getitem__``, which may be slower than necessary.

496 for i in range(len(self)):

497 yield self[i]

498

499 def __contains__(self, item: object) -> bool | np.bool_:

500 """

501 Return for `item in self`.

502 """

503 # GH37867

504 # comparisons of any item to pd.NA always return pd.NA, so e.g. "a" in [pd.NA]

505 # would raise a TypeError. The implementation below works around that.

506 if is_scalar(item) and isna(item):

507 if not self._can_hold_na:

508 return False

509 elif item is self.dtype.na_value or isinstance(item, self.dtype.type):

510 return self._hasna

511 else:

512 return False

513 else:

514 # error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no

515 # attribute "any"

516 return (item == self).any() # type: ignore[union-attr]

517

518 # error: Signature of "__eq__" incompatible with supertype "object"

519 def __eq__(self, other: object) -> ArrayLike: # type: ignore[override]

520 """

521 Return for `self == other` (element-wise equality).

522 """

523 # Implementer note: this should return a boolean numpy ndarray or

524 # a boolean ExtensionArray.

525 # When `other` is one of Series, Index, or DataFrame, this method should

526 # return NotImplemented (to ensure that those objects are responsible for

527 # first unpacking the arrays, and then dispatch the operation to the

528 # underlying arrays)

529 raise AbstractMethodError(self)

530

531 # error: Signature of "__ne__" incompatible with supertype "object"

532 def __ne__(self, other: object) -> ArrayLike: # type: ignore[override]

533 """

534 Return for `self != other` (element-wise in-equality).

535 """

536 # error: Unsupported operand type for ~ ("ExtensionArray")

537 return ~(self == other) # type: ignore[operator]

538

539 def to_numpy(

540 self,

541 dtype: npt.DTypeLike | None = None,

542 copy: bool = False,

543 na_value: object = lib.no_default,

544 ) -> np.ndarray:

545 """

546 Convert to a NumPy ndarray.

547

548 This is similar to :meth:`numpy.asarray`, but may provide additional control

549 over how the conversion is done.

550

551 Parameters

552 ----------

553 dtype : str or numpy.dtype, optional

554 The dtype to pass to :meth:`numpy.asarray`.

555 copy : bool, default False

556 Whether to ensure that the returned value is a not a view on

557 another array. Note that ``copy=False`` does not *ensure* that

558 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that

559 a copy is made, even if not strictly necessary.

560 na_value : Any, optional

561 The value to use for missing values. The default value depends

562 on `dtype` and the type of the array.

563

564 Returns

565 -------

566 numpy.ndarray

567 """

568 result = np.asarray(self, dtype=dtype)

569 if copy or na_value is not lib.no_default:

570 result = result.copy()

571 if na_value is not lib.no_default:

572 result[self.isna()] = na_value

573 return result

574

575 # ------------------------------------------------------------------------

576 # Required attributes

577 # ------------------------------------------------------------------------

578

579 @property

580 def dtype(self) -> ExtensionDtype:

581 """

582 An instance of ExtensionDtype.

583

584 Examples

585 --------

586 >>> pd.array([1, 2, 3]).dtype

587 Int64Dtype()

588 """

589 raise AbstractMethodError(self)

590

591 @property

592 def shape(self) -> Shape:

593 """

594 Return a tuple of the array dimensions.

595

596 Examples

597 --------

598 >>> arr = pd.array([1, 2, 3])

599 >>> arr.shape

600 (3,)

601 """

602 return (len(self),)

603

604 @property

605 def size(self) -> int:

606 """

607 The number of elements in the array.

608 """

609 # error: Incompatible return value type (got "signedinteger[_64Bit]",

610 # expected "int") [return-value]

611 return np.prod(self.shape) # type: ignore[return-value]

612

613 @property

614 def ndim(self) -> int:

615 """

616 Extension Arrays are only allowed to be 1-dimensional.

617

618 Examples

619 --------

620 >>> arr = pd.array([1, 2, 3])

621 >>> arr.ndim

622 1

623 """

624 return 1

625

626 @property

627 def nbytes(self) -> int:

628 """

629 The number of bytes needed to store this object in memory.

630

631 Examples

632 --------

633 >>> pd.array([1, 2, 3]).nbytes

634 27

635 """

636 # If this is expensive to compute, return an approximate lower bound

637 # on the number of bytes needed.

638 raise AbstractMethodError(self)

639

640 # ------------------------------------------------------------------------

641 # Additional Methods

642 # ------------------------------------------------------------------------

643

644 @overload

645 def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:

646 ...

647

648 @overload

649 def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:

650 ...

651

652 @overload

653 def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:

654 ...

655

656 def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:

657 """

658 Cast to a NumPy array or ExtensionArray with 'dtype'.

659

660 Parameters

661 ----------

662 dtype : str or dtype

663 Typecode or data-type to which the array is cast.

664 copy : bool, default True

665 Whether to copy the data, even if not necessary. If False,

666 a copy is made only if the old dtype does not match the

667 new dtype.

668

669 Returns

670 -------

671 np.ndarray or pandas.api.extensions.ExtensionArray

672 An ``ExtensionArray`` if ``dtype`` is ``ExtensionDtype``,

673 otherwise a Numpy ndarray with ``dtype`` for its dtype.

674

675 Examples

676 --------

677 >>> arr = pd.array([1, 2, 3])

678 >>> arr

679 <IntegerArray>

680 [1, 2, 3]

681 Length: 3, dtype: Int64

682

683 Casting to another ``ExtensionDtype`` returns an ``ExtensionArray``:

684

685 >>> arr1 = arr.astype('Float64')

686 >>> arr1

687 <FloatingArray>

688 [1.0, 2.0, 3.0]

689 Length: 3, dtype: Float64

690 >>> arr1.dtype

691 Float64Dtype()

692

693 Otherwise, we will get a Numpy ndarray:

694

695 >>> arr2 = arr.astype('float64')

696 >>> arr2

697 array([1., 2., 3.])

698 >>> arr2.dtype

699 dtype('float64')

700 """

701 dtype = pandas_dtype(dtype)

702 if dtype == self.dtype:

703 if not copy:

704 return self

705 else:

706 return self.copy()

707

708 if isinstance(dtype, ExtensionDtype):

709 cls = dtype.construct_array_type()

710 return cls._from_sequence(self, dtype=dtype, copy=copy)

711

712 elif lib.is_np_dtype(dtype, "M"):

713 from pandas.core.arrays import DatetimeArray

714

715 return DatetimeArray._from_sequence(self, dtype=dtype, copy=copy)

716

717 elif lib.is_np_dtype(dtype, "m"):

718 from pandas.core.arrays import TimedeltaArray

719

720 return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy)

721

722 if not copy:

723 return np.asarray(self, dtype=dtype)

724 else:

725 return np.array(self, dtype=dtype, copy=copy)

726

727 def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll:

728 """

729 A 1-D array indicating if each value is missing.

730

731 Returns

732 -------

733 numpy.ndarray or pandas.api.extensions.ExtensionArray

734 In most cases, this should return a NumPy ndarray. For

735 exceptional cases like ``SparseArray``, where returning

736 an ndarray would be expensive, an ExtensionArray may be

737 returned.

738

739 Notes

740 -----

741 If returning an ExtensionArray, then

742

743 * ``na_values._is_boolean`` should be True

744 * `na_values` should implement :func:`ExtensionArray._reduce`

745 * ``na_values.any`` and ``na_values.all`` should be implemented

746

747 Examples

748 --------

749 >>> arr = pd.array([1, 2, np.nan, np.nan])

750 >>> arr.isna()

751 array([False, False, True, True])

752 """

753 raise AbstractMethodError(self)

754

755 @property

756 def _hasna(self) -> bool:

757 # GH#22680

758 """

759 Equivalent to `self.isna().any()`.

760

761 Some ExtensionArray subclasses may be able to optimize this check.

762 """

763 return bool(self.isna().any())

764

765 def _values_for_argsort(self) -> np.ndarray:

766 """

767 Return values for sorting.

768

769 Returns

770 -------

771 ndarray

772 The transformed values should maintain the ordering between values

773 within the array.

774

775 See Also

776 --------

777 ExtensionArray.argsort : Return the indices that would sort this array.

778

779 Notes

780 -----

781 The caller is responsible for *not* modifying these values in-place, so

782 it is safe for implementers to give views on ``self``.

783

784 Functions that use this (e.g. ``ExtensionArray.argsort``) should ignore

785 entries with missing values in the original array (according to

786 ``self.isna()``). This means that the corresponding entries in the returned

787 array don't need to be modified to sort correctly.

788

789 Examples

790 --------

791 In most cases, this is the underlying Numpy array of the ``ExtensionArray``:

792

793 >>> arr = pd.array([1, 2, 3])

794 >>> arr._values_for_argsort()

795 array([1, 2, 3])

796 """

797 # Note: this is used in `ExtensionArray.argsort/argmin/argmax`.

798 return np.array(self)

799

800 def argsort(

801 self,

802 *,

803 ascending: bool = True,

804 kind: SortKind = "quicksort",

805 na_position: str = "last",

806 **kwargs,

807 ) -> np.ndarray:

808 """

809 Return the indices that would sort this array.

810

811 Parameters

812 ----------

813 ascending : bool, default True

814 Whether the indices should result in an ascending

815 or descending sort.

816 kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional

817 Sorting algorithm.

818 na_position : {'first', 'last'}, default 'last'

819 If ``'first'``, put ``NaN`` values at the beginning.

820 If ``'last'``, put ``NaN`` values at the end.

821 *args, **kwargs:

822 Passed through to :func:`numpy.argsort`.

823

824 Returns

825 -------

826 np.ndarray[np.intp]

827 Array of indices that sort ``self``. If NaN values are contained,

828 NaN values are placed at the end.

829

830 See Also

831 --------

832 numpy.argsort : Sorting implementation used internally.

833

834 Examples

835 --------

836 >>> arr = pd.array([3, 1, 2, 5, 4])

837 >>> arr.argsort()

838 array([1, 2, 0, 4, 3])

839 """

840 # Implementer note: You have two places to override the behavior of

841 # argsort.

842 # 1. _values_for_argsort : construct the values passed to np.argsort

843 # 2. argsort : total control over sorting. In case of overriding this,

844 # it is recommended to also override argmax/argmin

845 ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs)

846

847 values = self._values_for_argsort()

848 return nargsort(

849 values,

850 kind=kind,

851 ascending=ascending,

852 na_position=na_position,

853 mask=np.asarray(self.isna()),

854 )

855

856 def argmin(self, skipna: bool = True) -> int:

857 """

858 Return the index of minimum value.

859

860 In case of multiple occurrences of the minimum value, the index

861 corresponding to the first occurrence is returned.

862

863 Parameters

864 ----------

865 skipna : bool, default True

866

867 Returns

868 -------

869 int

870

871 See Also

872 --------

873 ExtensionArray.argmax : Return the index of the maximum value.

874

875 Examples

876 --------

877 >>> arr = pd.array([3, 1, 2, 5, 4])

878 >>> arr.argmin()

879 1

880 """

881 # Implementer note: You have two places to override the behavior of

882 # argmin.

883 # 1. _values_for_argsort : construct the values used in nargminmax

884 # 2. argmin itself : total control over sorting.

885 validate_bool_kwarg(skipna, "skipna")

886 if not skipna and self._hasna:

887 raise NotImplementedError

888 return nargminmax(self, "argmin")

889

890 def argmax(self, skipna: bool = True) -> int:

891 """

892 Return the index of maximum value.

893

894 In case of multiple occurrences of the maximum value, the index

895 corresponding to the first occurrence is returned.

896

897 Parameters

898 ----------

899 skipna : bool, default True

900

901 Returns

902 -------

903 int

904

905 See Also

906 --------

907 ExtensionArray.argmin : Return the index of the minimum value.

908

909 Examples

910 --------

911 >>> arr = pd.array([3, 1, 2, 5, 4])

912 >>> arr.argmax()

913 3

914 """

915 # Implementer note: You have two places to override the behavior of

916 # argmax.

917 # 1. _values_for_argsort : construct the values used in nargminmax

918 # 2. argmax itself : total control over sorting.

919 validate_bool_kwarg(skipna, "skipna")

920 if not skipna and self._hasna:

921 raise NotImplementedError

922 return nargminmax(self, "argmax")

923

924 def interpolate(

925 self,

926 *,

927 method: InterpolateOptions,

928 axis: int,

929 index: Index,

930 limit,

931 limit_direction,

932 limit_area,

933 copy: bool,

934 **kwargs,

935 ) -> Self:

936 """

937 See DataFrame.interpolate.__doc__.

938

939 Examples

940 --------

941 >>> arr = pd.arrays.NumpyExtensionArray(np.array([0, 1, np.nan, 3]))

942 >>> arr.interpolate(method="linear",

943 ... limit=3,

944 ... limit_direction="forward",

945 ... index=pd.Index([1, 2, 3, 4]),

946 ... fill_value=1,

947 ... copy=False,

948 ... axis=0,

949 ... limit_area="inside"

950 ... )

951 <NumpyExtensionArray>

952 [0.0, 1.0, 2.0, 3.0]

953 Length: 4, dtype: float64

954 """

955 # NB: we return type(self) even if copy=False

956 raise NotImplementedError(

957 f"{type(self).__name__} does not implement interpolate"

958 )

959

960 def _pad_or_backfill(

961 self,

962 *,

963 method: FillnaOptions,

964 limit: int | None = None,

965 limit_area: Literal["inside", "outside"] | None = None,

966 copy: bool = True,

967 ) -> Self:

968 """

969 Pad or backfill values, used by Series/DataFrame ffill and bfill.

970

971 Parameters

972 ----------

973 method : {'backfill', 'bfill', 'pad', 'ffill'}

974 Method to use for filling holes in reindexed Series:

975

976 * pad / ffill: propagate last valid observation forward to next valid.

977 * backfill / bfill: use NEXT valid observation to fill gap.

978

979 limit : int, default None

980 This is the maximum number of consecutive

981 NaN values to forward/backward fill. In other words, if there is

982 a gap with more than this number of consecutive NaNs, it will only

983 be partially filled. If method is not specified, this is the

984 maximum number of entries along the entire axis where NaNs will be

985 filled.

986

987 copy : bool, default True

988 Whether to make a copy of the data before filling. If False, then

989 the original should be modified and no new memory should be allocated.

990 For ExtensionArray subclasses that cannot do this, it is at the

991 author's discretion whether to ignore "copy=False" or to raise.

992 The base class implementation ignores the keyword if any NAs are

993 present.

994

995 Returns

996 -------

997 Same type as self

998

999 Examples

1000 --------

1001 >>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan])

1002 >>> arr._pad_or_backfill(method="backfill", limit=1)

1003 <IntegerArray>

1004 [<NA>, 2, 2, 3, <NA>, <NA>]

1005 Length: 6, dtype: Int64

1006 """

1007

1008 # If a 3rd-party EA has implemented this functionality in fillna,

1009 # we warn that they need to implement _pad_or_backfill instead.

1010 if (

1011 type(self).fillna is not ExtensionArray.fillna

1012 and type(self)._pad_or_backfill is ExtensionArray._pad_or_backfill

1013 ):

1014 # Check for _pad_or_backfill here allows us to call

1015 # super()._pad_or_backfill without getting this warning

1016 warnings.warn(

1017 "ExtensionArray.fillna 'method' keyword is deprecated. "

1018 "In a future version. arr._pad_or_backfill will be called "

1019 "instead. 3rd-party ExtensionArray authors need to implement "

1020 "_pad_or_backfill.",

1021 DeprecationWarning,

1022 stacklevel=find_stack_level(),

1023 )

1024 if limit_area is not None:

1025 raise NotImplementedError(

1026 f"{type(self).__name__} does not implement limit_area "

1027 "(added in pandas 2.2). 3rd-party ExtnsionArray authors "

1028 "need to add this argument to _pad_or_backfill."

1029 )

1030 return self.fillna(method=method, limit=limit)

1031

1032 mask = self.isna()

1033

1034 if mask.any():

1035 # NB: the base class does not respect the "copy" keyword

1036 meth = missing.clean_fill_method(method)

1037

1038 npmask = np.asarray(mask)

1039 if limit_area is not None and not npmask.all():

1040 _fill_limit_area_1d(npmask, limit_area)

1041 if meth == "pad":

1042 indexer = libalgos.get_fill_indexer(npmask, limit=limit)

1043 return self.take(indexer, allow_fill=True)

1044 else:

1045 # i.e. meth == "backfill"

1046 indexer = libalgos.get_fill_indexer(npmask[::-1], limit=limit)[::-1]

1047 return self[::-1].take(indexer, allow_fill=True)

1048

1049 else:

1050 if not copy:

1051 return self

1052 new_values = self.copy()

1053 return new_values

1054

1055 def fillna(

1056 self,

1057 value: object | ArrayLike | None = None,

1058 method: FillnaOptions | None = None,

1059 limit: int | None = None,

1060 copy: bool = True,

1061 ) -> Self:

1062 """

1063 Fill NA/NaN values using the specified method.

1064

1065 Parameters

1066 ----------

1067 value : scalar, array-like

1068 If a scalar value is passed it is used to fill all missing values.

1069 Alternatively, an array-like "value" can be given. It's expected

1070 that the array-like have the same length as 'self'.

1071 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None

1072 Method to use for filling holes in reindexed Series:

1073

1074 * pad / ffill: propagate last valid observation forward to next valid.

1075 * backfill / bfill: use NEXT valid observation to fill gap.

1076

1077 .. deprecated:: 2.1.0

1078

1079 limit : int, default None

1080 If method is specified, this is the maximum number of consecutive

1081 NaN values to forward/backward fill. In other words, if there is

1082 a gap with more than this number of consecutive NaNs, it will only

1083 be partially filled. If method is not specified, this is the

1084 maximum number of entries along the entire axis where NaNs will be

1085 filled.

1086

1087 .. deprecated:: 2.1.0

1088

1089 copy : bool, default True

1090 Whether to make a copy of the data before filling. If False, then

1091 the original should be modified and no new memory should be allocated.

1092 For ExtensionArray subclasses that cannot do this, it is at the

1093 author's discretion whether to ignore "copy=False" or to raise.

1094 The base class implementation ignores the keyword in pad/backfill

1095 cases.

1096

1097 Returns

1098 -------

1099 ExtensionArray

1100 With NA/NaN filled.

1101

1102 Examples

1103 --------

1104 >>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan])

1105 >>> arr.fillna(0)

1106 <IntegerArray>

1107 [0, 0, 2, 3, 0, 0]

1108 Length: 6, dtype: Int64

1109 """

1110 if method is not None:

1111 warnings.warn(

1112 f"The 'method' keyword in {type(self).__name__}.fillna is "

1113 "deprecated and will be removed in a future version.",

1114 FutureWarning,

1115 stacklevel=find_stack_level(),

1116 )

1117

1118 value, method = validate_fillna_kwargs(value, method)

1119

1120 mask = self.isna()

1121 # error: Argument 2 to "check_value_size" has incompatible type

1122 # "ExtensionArray"; expected "ndarray"

1123 value = missing.check_value_size(

1124 value, mask, len(self) # type: ignore[arg-type]

1125 )

1126

1127 if mask.any():

1128 if method is not None:

1129 meth = missing.clean_fill_method(method)

1130

1131 npmask = np.asarray(mask)

1132 if meth == "pad":

1133 indexer = libalgos.get_fill_indexer(npmask, limit=limit)

1134 return self.take(indexer, allow_fill=True)

1135 else:

1136 # i.e. meth == "backfill"

1137 indexer = libalgos.get_fill_indexer(npmask[::-1], limit=limit)[::-1]

1138 return self[::-1].take(indexer, allow_fill=True)

1139 else:

1140 # fill with value

1141 if not copy:

1142 new_values = self[:]

1143 else:

1144 new_values = self.copy()

1145 new_values[mask] = value

1146 else:

1147 if not copy:

1148 new_values = self[:]

1149 else:

1150 new_values = self.copy()

1151 return new_values

1152

1153 def dropna(self) -> Self:

1154 """

1155 Return ExtensionArray without NA values.

1156

1157 Returns

1158 -------

1159

1160 Examples

1161 --------

1162 >>> pd.array([1, 2, np.nan]).dropna()

1163 <IntegerArray>

1164 [1, 2]

1165 Length: 2, dtype: Int64

1166 """

1167 # error: Unsupported operand type for ~ ("ExtensionArray")

1168 return self[~self.isna()] # type: ignore[operator]

1169

1170 def duplicated(

1171 self, keep: Literal["first", "last", False] = "first"

1172 ) -> npt.NDArray[np.bool_]:

1173 """

1174 Return boolean ndarray denoting duplicate values.

1175

1176 Parameters

1177 ----------

1178 keep : {'first', 'last', False}, default 'first'

1179 - ``first`` : Mark duplicates as ``True`` except for the first occurrence.

1180 - ``last`` : Mark duplicates as ``True`` except for the last occurrence.

1181 - False : Mark all duplicates as ``True``.

1182

1183 Returns

1184 -------

1185 ndarray[bool]

1186

1187 Examples

1188 --------

1189 >>> pd.array([1, 1, 2, 3, 3], dtype="Int64").duplicated()

1190 array([False, True, False, False, True])

1191 """

1192 mask = self.isna().astype(np.bool_, copy=False)

1193 return duplicated(values=self, keep=keep, mask=mask)

1194

1195 def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray:

1196 """

1197 Shift values by desired number.

1198

1199 Newly introduced missing values are filled with

1200 ``self.dtype.na_value``.

1201

1202 Parameters

1203 ----------

1204 periods : int, default 1

1205 The number of periods to shift. Negative values are allowed

1206 for shifting backwards.

1207

1208 fill_value : object, optional

1209 The scalar value to use for newly introduced missing values.

1210 The default is ``self.dtype.na_value``.

1211

1212 Returns

1213 -------

1214 ExtensionArray

1215 Shifted.

1216

1217 Notes

1218 -----

1219 If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is

1220 returned.

1221

1222 If ``periods > len(self)``, then an array of size

1223 len(self) is returned, with all values filled with

1224 ``self.dtype.na_value``.

1225

1226 For 2-dimensional ExtensionArrays, we are always shifting along axis=0.

1227

1228 Examples

1229 --------

1230 >>> arr = pd.array([1, 2, 3])

1231 >>> arr.shift(2)

1232 <IntegerArray>

1233 [<NA>, <NA>, 1]

1234 Length: 3, dtype: Int64

1235 """

1236 # Note: this implementation assumes that `self.dtype.na_value` can be

1237 # stored in an instance of your ExtensionArray with `self.dtype`.

1238 if not len(self) or periods == 0:

1239 return self.copy()

1240

1241 if isna(fill_value):

1242 fill_value = self.dtype.na_value

1243

1244 empty = self._from_sequence(

1245 [fill_value] * min(abs(periods), len(self)), dtype=self.dtype

1246 )

1247 if periods > 0:

1248 a = empty

1249 b = self[:-periods]

1250 else:

1251 a = self[abs(periods) :]

1252 b = empty

1253 return self._concat_same_type([a, b])

1254

1255 def unique(self) -> Self:

1256 """

1257 Compute the ExtensionArray of unique values.

1258

1259 Returns

1260 -------

1261 pandas.api.extensions.ExtensionArray

1262

1263 Examples

1264 --------

1265 >>> arr = pd.array([1, 2, 3, 1, 2, 3])

1266 >>> arr.unique()

1267 <IntegerArray>

1268 [1, 2, 3]

1269 Length: 3, dtype: Int64

1270 """

1271 uniques = unique(self.astype(object))

1272 return self._from_sequence(uniques, dtype=self.dtype)

1273

1274 def searchsorted(

1275 self,

1276 value: NumpyValueArrayLike | ExtensionArray,

1277 side: Literal["left", "right"] = "left",

1278 sorter: NumpySorter | None = None,

1279 ) -> npt.NDArray[np.intp] | np.intp:

1280 """

1281 Find indices where elements should be inserted to maintain order.

1282

1283 Find the indices into a sorted array `self` (a) such that, if the

1284 corresponding elements in `value` were inserted before the indices,

1285 the order of `self` would be preserved.

1286

1287 Assuming that `self` is sorted:

1288

1289 ====== ================================

1290 `side` returned index `i` satisfies

1291 ====== ================================

1292 left ``self[i-1] < value <= self[i]``

1293 right ``self[i-1] <= value < self[i]``

1294 ====== ================================

1295

1296 Parameters

1297 ----------

1298 value : array-like, list or scalar

1299 Value(s) to insert into `self`.

1300 side : {'left', 'right'}, optional

1301 If 'left', the index of the first suitable location found is given.

1302 If 'right', return the last such index. If there is no suitable

1303 index, return either 0 or N (where N is the length of `self`).

1304 sorter : 1-D array-like, optional

1305 Optional array of integer indices that sort array a into ascending

1306 order. They are typically the result of argsort.

1307

1308 Returns

1309 -------

1310 array of ints or int

1311 If value is array-like, array of insertion points.

1312 If value is scalar, a single integer.

1313

1314 See Also

1315 --------

1316 numpy.searchsorted : Similar method from NumPy.

1317

1318 Examples

1319 --------

1320 >>> arr = pd.array([1, 2, 3, 5])

1321 >>> arr.searchsorted([4])

1322 array([3])

1323 """

1324 # Note: the base tests provided by pandas only test the basics.

1325 # We do not test

1326 # 1. Values outside the range of the `data_for_sorting` fixture

1327 # 2. Values between the values in the `data_for_sorting` fixture

1328 # 3. Missing values.

1329 arr = self.astype(object)

1330 if isinstance(value, ExtensionArray):

1331 value = value.astype(object)

1332 return arr.searchsorted(value, side=side, sorter=sorter)

1333

1334 def equals(self, other: object) -> bool:

1335 """

1336 Return if another array is equivalent to this array.

1337

1338 Equivalent means that both arrays have the same shape and dtype, and

1339 all values compare equal. Missing values in the same location are

1340 considered equal (in contrast with normal equality).

1341

1342 Parameters

1343 ----------

1344 other : ExtensionArray

1345 Array to compare to this Array.

1346

1347 Returns

1348 -------

1349 boolean

1350 Whether the arrays are equivalent.

1351

1352 Examples

1353 --------

1354 >>> arr1 = pd.array([1, 2, np.nan])

1355 >>> arr2 = pd.array([1, 2, np.nan])

1356 >>> arr1.equals(arr2)

1357 True

1358 """

1359 if type(self) != type(other):

1360 return False

1361 other = cast(ExtensionArray, other)

1362 if self.dtype != other.dtype:

1363 return False

1364 elif len(self) != len(other):

1365 return False

1366 else:

1367 equal_values = self == other

1368 if isinstance(equal_values, ExtensionArray):

1369 # boolean array with NA -> fill with False

1370 equal_values = equal_values.fillna(False)

1371 # error: Unsupported left operand type for & ("ExtensionArray")

1372 equal_na = self.isna() & other.isna() # type: ignore[operator]

1373 return bool((equal_values | equal_na).all())

1374

1375 def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:

1376 """

1377 Pointwise comparison for set containment in the given values.

1378

1379 Roughly equivalent to `np.array([x in values for x in self])`

1380

1381 Parameters

1382 ----------

1383 values : np.ndarray or ExtensionArray

1384

1385 Returns

1386 -------

1387 np.ndarray[bool]

1388

1389 Examples

1390 --------

1391 >>> arr = pd.array([1, 2, 3])

1392 >>> arr.isin([1])

1393 <BooleanArray>

1394 [True, False, False]

1395 Length: 3, dtype: boolean

1396 """

1397 return isin(np.asarray(self), values)

1398

1399 def _values_for_factorize(self) -> tuple[np.ndarray, Any]:

1400 """

1401 Return an array and missing value suitable for factorization.

1402

1403 Returns

1404 -------

1405 values : ndarray

1406 An array suitable for factorization. This should maintain order

1407 and be a supported dtype (Float64, Int64, UInt64, String, Object).

1408 By default, the extension array is cast to object dtype.

1409 na_value : object

1410 The value in `values` to consider missing. This will be treated

1411 as NA in the factorization routines, so it will be coded as

1412 `-1` and not included in `uniques`. By default,

1413 ``np.nan`` is used.

1414

1415 Notes

1416 -----

1417 The values returned by this method are also used in

1418 :func:`pandas.util.hash_pandas_object`. If needed, this can be

1419 overridden in the ``self._hash_pandas_object()`` method.

1420

1421 Examples

1422 --------

1423 >>> pd.array([1, 2, 3])._values_for_factorize()

1424 (array([1, 2, 3], dtype=object), nan)

1425 """

1426 return self.astype(object), np.nan

1427

1428 def factorize(

1429 self,

1430 use_na_sentinel: bool = True,

1431 ) -> tuple[np.ndarray, ExtensionArray]:

1432 """

1433 Encode the extension array as an enumerated type.

1434

1435 Parameters

1436 ----------

1437 use_na_sentinel : bool, default True

1438 If True, the sentinel -1 will be used for NaN values. If False,

1439 NaN values will be encoded as non-negative integers and will not drop the

1440 NaN from the uniques of the values.

1441

1442 .. versionadded:: 1.5.0

1443

1444 Returns

1445 -------

1446 codes : ndarray

1447 An integer NumPy array that's an indexer into the original

1448 ExtensionArray.

1449 uniques : ExtensionArray

1450 An ExtensionArray containing the unique values of `self`.

1451

1452 .. note::

1453

1454 uniques will *not* contain an entry for the NA value of

1455 the ExtensionArray if there are any missing values present

1456 in `self`.

1457

1458 See Also

1459 --------

1460 factorize : Top-level factorize method that dispatches here.

1461

1462 Notes

1463 -----

1464 :meth:`pandas.factorize` offers a `sort` keyword as well.

1465

1466 Examples

1467 --------

1468 >>> idx1 = pd.PeriodIndex(["2014-01", "2014-01", "2014-02", "2014-02",

1469 ... "2014-03", "2014-03"], freq="M")

1470 >>> arr, idx = idx1.factorize()

1471 >>> arr

1472 array([0, 0, 1, 1, 2, 2])

1473 >>> idx

1474 PeriodIndex(['2014-01', '2014-02', '2014-03'], dtype='period[M]')

1475 """

1476 # Implementer note: There are two ways to override the behavior of

1477 # pandas.factorize

1478 # 1. _values_for_factorize and _from_factorize.

1479 # Specify the values passed to pandas' internal factorization

1480 # routines, and how to convert from those values back to the

1481 # original ExtensionArray.

1482 # 2. ExtensionArray.factorize.

1483 # Complete control over factorization.

1484 arr, na_value = self._values_for_factorize()

1485

1486 codes, uniques = factorize_array(

1487 arr, use_na_sentinel=use_na_sentinel, na_value=na_value

1488 )

1489

1490 uniques_ea = self._from_factorized(uniques, self)

1491 return codes, uniques_ea

1492

1493 _extension_array_shared_docs[

1494 "repeat"

1495 ] = """

1496 Repeat elements of a %(klass)s.

1497

1498 Returns a new %(klass)s where each element of the current %(klass)s

1499 is repeated consecutively a given number of times.

1500

1501 Parameters

1502 ----------

1503 repeats : int or array of ints

1504 The number of repetitions for each element. This should be a

1505 non-negative integer. Repeating 0 times will return an empty

1506 %(klass)s.

1507 axis : None

1508 Must be ``None``. Has no effect but is accepted for compatibility

1509 with numpy.

1510

1511 Returns

1512 -------

1513 %(klass)s

1514 Newly created %(klass)s with repeated elements.

1515

1516 See Also

1517 --------

1518 Series.repeat : Equivalent function for Series.

1519 Index.repeat : Equivalent function for Index.

1520 numpy.repeat : Similar method for :class:`numpy.ndarray`.

1521 ExtensionArray.take : Take arbitrary positions.

1522

1523 Examples

1524 --------

1525 >>> cat = pd.Categorical(['a', 'b', 'c'])

1526 >>> cat

1527 ['a', 'b', 'c']

1528 Categories (3, object): ['a', 'b', 'c']

1529 >>> cat.repeat(2)

1530 ['a', 'a', 'b', 'b', 'c', 'c']

1531 Categories (3, object): ['a', 'b', 'c']

1532 >>> cat.repeat([1, 2, 3])

1533 ['a', 'b', 'b', 'c', 'c', 'c']

1534 Categories (3, object): ['a', 'b', 'c']

1535 """

1536

1537 @Substitution(klass="ExtensionArray")

1538 @Appender(_extension_array_shared_docs["repeat"])

1539 def repeat(self, repeats: int | Sequence[int], axis: AxisInt | None = None) -> Self:

1540 nv.validate_repeat((), {"axis": axis})

1541 ind = np.arange(len(self)).repeat(repeats)

1542 return self.take(ind)

1543

1544 # ------------------------------------------------------------------------

1545 # Indexing methods

1546 # ------------------------------------------------------------------------

1547

1548 def take(

1549 self,

1550 indices: TakeIndexer,

1551 *,

1552 allow_fill: bool = False,

1553 fill_value: Any = None,

1554 ) -> Self:

1555 """

1556 Take elements from an array.

1557

1558 Parameters

1559 ----------

1560 indices : sequence of int or one-dimensional np.ndarray of int

1561 Indices to be taken.

1562 allow_fill : bool, default False

1563 How to handle negative values in `indices`.

1564

1565 * False: negative values in `indices` indicate positional indices

1566 from the right (the default). This is similar to

1567 :func:`numpy.take`.

1568

1569 * True: negative values in `indices` indicate

1570 missing values. These values are set to `fill_value`. Any other

1571 other negative values raise a ``ValueError``.

1572

1573 fill_value : any, optional

1574 Fill value to use for NA-indices when `allow_fill` is True.

1575 This may be ``None``, in which case the default NA value for

1576 the type, ``self.dtype.na_value``, is used.

1577

1578 For many ExtensionArrays, there will be two representations of

1579 `fill_value`: a user-facing "boxed" scalar, and a low-level

1580 physical NA value. `fill_value` should be the user-facing version,

1581 and the implementation should handle translating that to the

1582 physical version for processing the take if necessary.

1583

1584 Returns

1585 -------

1586 ExtensionArray

1587

1588 Raises

1589 ------

1590 IndexError

1591 When the indices are out of bounds for the array.

1592 ValueError

1593 When `indices` contains negative values other than ``-1``

1594 and `allow_fill` is True.

1595

1596 See Also

1597 --------

1598 numpy.take : Take elements from an array along an axis.

1599 api.extensions.take : Take elements from an array.

1600

1601 Notes

1602 -----

1603 ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,

1604 ``iloc``, when `indices` is a sequence of values. Additionally,

1605 it's called by :meth:`Series.reindex`, or any other method

1606 that causes realignment, with a `fill_value`.

1607

1608 Examples

1609 --------

1610 Here's an example implementation, which relies on casting the

1611 extension array to object dtype. This uses the helper method

1612 :func:`pandas.api.extensions.take`.

1613

1614 .. code-block:: python

1615

1616 def take(self, indices, allow_fill=False, fill_value=None):

1617 from pandas.core.algorithms import take

1618

1619 # If the ExtensionArray is backed by an ndarray, then

1620 # just pass that here instead of coercing to object.

1621 data = self.astype(object)

1622

1623 if allow_fill and fill_value is None:

1624 fill_value = self.dtype.na_value

1625

1626 # fill value should always be translated from the scalar

1627 # type for the array, to the physical storage type for

1628 # the data, before passing to take.

1629

1630 result = take(data, indices, fill_value=fill_value,

1631 allow_fill=allow_fill)

1632 return self._from_sequence(result, dtype=self.dtype)

1633 """

1634 # Implementer note: The `fill_value` parameter should be a user-facing

1635 # value, an instance of self.dtype.type. When passed `fill_value=None`,

1636 # the default of `self.dtype.na_value` should be used.

1637 # This may differ from the physical storage type your ExtensionArray

1638 # uses. In this case, your implementation is responsible for casting

1639 # the user-facing type to the storage type, before using

1640 # pandas.api.extensions.take

1641 raise AbstractMethodError(self)

1642

1643 def copy(self) -> Self:

1644 """

1645 Return a copy of the array.

1646

1647 Returns

1648 -------

1649 ExtensionArray

1650

1651 Examples

1652 --------

1653 >>> arr = pd.array([1, 2, 3])

1654 >>> arr2 = arr.copy()

1655 >>> arr[0] = 2

1656 >>> arr2

1657 <IntegerArray>

1658 [1, 2, 3]

1659 Length: 3, dtype: Int64

1660 """

1661 raise AbstractMethodError(self)

1662

1663 def view(self, dtype: Dtype | None = None) -> ArrayLike:

1664 """

1665 Return a view on the array.

1666

1667 Parameters

1668 ----------

1669 dtype : str, np.dtype, or ExtensionDtype, optional

1670 Default None.

1671

1672 Returns

1673 -------

1674 ExtensionArray or np.ndarray

1675 A view on the :class:`ExtensionArray`'s data.

1676

1677 Examples

1678 --------

1679 This gives view on the underlying data of an ``ExtensionArray`` and is not a

1680 copy. Modifications on either the view or the original ``ExtensionArray``

1681 will be reflectd on the underlying data:

1682

1683 >>> arr = pd.array([1, 2, 3])

1684 >>> arr2 = arr.view()

1685 >>> arr[0] = 2

1686 >>> arr2

1687 <IntegerArray>

1688 [2, 2, 3]

1689 Length: 3, dtype: Int64

1690 """

1691 # NB:

1692 # - This must return a *new* object referencing the same data, not self.

1693 # - The only case that *must* be implemented is with dtype=None,

1694 # giving a view with the same dtype as self.

1695 if dtype is not None:

1696 raise NotImplementedError(dtype)

1697 return self[:]

1698

1699 # ------------------------------------------------------------------------

1700 # Printing

1701 # ------------------------------------------------------------------------

1702

1703 def __repr__(self) -> str:

1704 if self.ndim > 1:

1705 return self._repr_2d()

1706

1707 from pandas.io.formats.printing import format_object_summary

1708

1709 # the short repr has no trailing newline, while the truncated

1710 # repr does. So we include a newline in our template, and strip

1711 # any trailing newlines from format_object_summary

1712 data = format_object_summary(

1713 self, self._formatter(), indent_for_name=False

1714 ).rstrip(", \n")

1715 class_name = f"<{type(self).__name__}>\n"

1716 footer = self._get_repr_footer()

1717 return f"{class_name}{data}\n{footer}"

1718

1719 def _get_repr_footer(self) -> str:

1720 # GH#24278

1721 if self.ndim > 1:

1722 return f"Shape: {self.shape}, dtype: {self.dtype}"

1723 return f"Length: {len(self)}, dtype: {self.dtype}"

1724

1725 def _repr_2d(self) -> str:

1726 from pandas.io.formats.printing import format_object_summary

1727

1728 # the short repr has no trailing newline, while the truncated

1729 # repr does. So we include a newline in our template, and strip

1730 # any trailing newlines from format_object_summary

1731 lines = [

1732 format_object_summary(x, self._formatter(), indent_for_name=False).rstrip(

1733 ", \n"

1734 )

1735 for x in self

1736 ]

1737 data = ",\n".join(lines)

1738 class_name = f"<{type(self).__name__}>"

1739 footer = self._get_repr_footer()

1740 return f"{class_name}\n[\n{data}\n]\n{footer}"

1741

1742 def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:

1743 """

1744 Formatting function for scalar values.

1745

1746 This is used in the default '__repr__'. The returned formatting

1747 function receives instances of your scalar type.

1748

1749 Parameters

1750 ----------

1751 boxed : bool, default False

1752 An indicated for whether or not your array is being printed

1753 within a Series, DataFrame, or Index (True), or just by

1754 itself (False). This may be useful if you want scalar values

1755 to appear differently within a Series versus on its own (e.g.

1756 quoted or not).

1757

1758 Returns

1759 -------

1760 Callable[[Any], str]

1761 A callable that gets instances of the scalar type and

1762 returns a string. By default, :func:`repr` is used

1763 when ``boxed=False`` and :func:`str` is used when

1764 ``boxed=True``.

1765

1766 Examples

1767 --------

1768 >>> class MyExtensionArray(pd.arrays.NumpyExtensionArray):

1769 ... def _formatter(self, boxed=False):

1770 ... return lambda x: '*' + str(x) + '*' if boxed else repr(x) + '*'

1771 >>> MyExtensionArray(np.array([1, 2, 3, 4]))

1772 <MyExtensionArray>

1773 [1*, 2*, 3*, 4*]

1774 Length: 4, dtype: int64

1775 """

1776 if boxed:

1777 return str

1778 return repr

1779

1780 # ------------------------------------------------------------------------

1781 # Reshaping

1782 # ------------------------------------------------------------------------

1783

1784 def transpose(self, *axes: int) -> ExtensionArray:

1785 """

1786 Return a transposed view on this array.

1787

1788 Because ExtensionArrays are always 1D, this is a no-op. It is included

1789 for compatibility with np.ndarray.

1790

1791 Returns

1792 -------

1793 ExtensionArray

1794

1795 Examples

1796 --------

1797 >>> pd.array([1, 2, 3]).transpose()

1798 <IntegerArray>

1799 [1, 2, 3]

1800 Length: 3, dtype: Int64

1801 """

1802 return self[:]

1803

1804 @property

1805 def T(self) -> ExtensionArray:

1806 return self.transpose()

1807

1808 def ravel(self, order: Literal["C", "F", "A", "K"] | None = "C") -> ExtensionArray:

1809 """

1810 Return a flattened view on this array.

1811

1812 Parameters

1813 ----------

1814 order : {None, 'C', 'F', 'A', 'K'}, default 'C'

1815

1816 Returns

1817 -------

1818 ExtensionArray

1819

1820 Notes

1821 -----

1822 - Because ExtensionArrays are 1D-only, this is a no-op.

1823 - The "order" argument is ignored, is for compatibility with NumPy.

1824

1825 Examples

1826 --------

1827 >>> pd.array([1, 2, 3]).ravel()

1828 <IntegerArray>

1829 [1, 2, 3]

1830 Length: 3, dtype: Int64

1831 """

1832 return self

1833

1834 @classmethod

1835 def _concat_same_type(cls, to_concat: Sequence[Self]) -> Self:

1836 """

1837 Concatenate multiple array of this dtype.

1838

1839 Parameters

1840 ----------

1841 to_concat : sequence of this type

1842

1843 Returns

1844 -------

1845 ExtensionArray

1846

1847 Examples

1848 --------

1849 >>> arr1 = pd.array([1, 2, 3])

1850 >>> arr2 = pd.array([4, 5, 6])

1851 >>> pd.arrays.IntegerArray._concat_same_type([arr1, arr2])

1852 <IntegerArray>

1853 [1, 2, 3, 4, 5, 6]

1854 Length: 6, dtype: Int64

1855 """

1856 # Implementer note: this method will only be called with a sequence of

1857 # ExtensionArrays of this class and with the same dtype as self. This

1858 # should allow "easy" concatenation (no upcasting needed), and result

1859 # in a new ExtensionArray of the same dtype.

1860 # Note: this strict behaviour is only guaranteed starting with pandas 1.1

1861 raise AbstractMethodError(cls)

1862

1863 # The _can_hold_na attribute is set to True so that pandas internals

1864 # will use the ExtensionDtype.na_value as the NA value in operations

1865 # such as take(), reindex(), shift(), etc. In addition, those results

1866 # will then be of the ExtensionArray subclass rather than an array

1867 # of objects

1868 @cache_readonly

1869 def _can_hold_na(self) -> bool:

1870 return self.dtype._can_hold_na

1871

1872 def _accumulate(

1873 self, name: str, *, skipna: bool = True, **kwargs

1874 ) -> ExtensionArray:

1875 """

1876 Return an ExtensionArray performing an accumulation operation.

1877

1878 The underlying data type might change.

1879

1880 Parameters

1881 ----------

1882 name : str

1883 Name of the function, supported values are:

1884 - cummin

1885 - cummax

1886 - cumsum

1887 - cumprod

1888 skipna : bool, default True

1889 If True, skip NA values.

1890 **kwargs

1891 Additional keyword arguments passed to the accumulation function.

1892 Currently, there is no supported kwarg.

1893

1894 Returns

1895 -------

1896 array

1897

1898 Raises

1899 ------

1900 NotImplementedError : subclass does not define accumulations

1901

1902 Examples

1903 --------

1904 >>> arr = pd.array([1, 2, 3])

1905 >>> arr._accumulate(name='cumsum')

1906 <IntegerArray>

1907 [1, 3, 6]

1908 Length: 3, dtype: Int64

1909 """

1910 raise NotImplementedError(f"cannot perform {name} with type {self.dtype}")

1911

1912 def _reduce(

1913 self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs

1914 ):

1915 """

1916 Return a scalar result of performing the reduction operation.

1917

1918 Parameters

1919 ----------

1920 name : str

1921 Name of the function, supported values are:

1922 { any, all, min, max, sum, mean, median, prod,

1923 std, var, sem, kurt, skew }.

1924 skipna : bool, default True

1925 If True, skip NaN values.

1926 keepdims : bool, default False

1927 If False, a scalar is returned.

1928 If True, the result has dimension with size one along the reduced axis.

1929

1930 .. versionadded:: 2.1

1931

1932 This parameter is not required in the _reduce signature to keep backward

1933 compatibility, but will become required in the future. If the parameter

1934 is not found in the method signature, a FutureWarning will be emitted.

1935 **kwargs

1936 Additional keyword arguments passed to the reduction function.

1937 Currently, `ddof` is the only supported kwarg.

1938

1939 Returns

1940 -------

1941 scalar

1942

1943 Raises

1944 ------

1945 TypeError : subclass does not define reductions

1946

1947 Examples

1948 --------

1949 >>> pd.array([1, 2, 3])._reduce("min")

1950 1

1951 """

1952 meth = getattr(self, name, None)

1953 if meth is None:

1954 raise TypeError(

1955 f"'{type(self).__name__}' with dtype {self.dtype} "

1956 f"does not support reduction '{name}'"

1957 )

1958 result = meth(skipna=skipna, **kwargs)

1959 if keepdims:

1960 result = np.array([result])

1961

1962 return result

1963

1964 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318

1965 # Incompatible types in assignment (expression has type "None", base class

1966 # "object" defined the type as "Callable[[object], int]")

1967 __hash__: ClassVar[None] # type: ignore[assignment]

1968

1969 # ------------------------------------------------------------------------

1970 # Non-Optimized Default Methods; in the case of the private methods here,

1971 # these are not guaranteed to be stable across pandas versions.

1972

1973 def _values_for_json(self) -> np.ndarray:

1974 """

1975 Specify how to render our entries in to_json.

1976

1977 Notes

1978 -----

1979 The dtype on the returned ndarray is not restricted, but for non-native

1980 types that are not specifically handled in objToJSON.c, to_json is

1981 liable to raise. In these cases, it may be safer to return an ndarray

1982 of strings.

1983 """

1984 return np.asarray(self)

1985

1986 def _hash_pandas_object(

1987 self, *, encoding: str, hash_key: str, categorize: bool

1988 ) -> npt.NDArray[np.uint64]:

1989 """

1990 Hook for hash_pandas_object.

1991

1992 Default is to use the values returned by _values_for_factorize.

1993

1994 Parameters

1995 ----------

1996 encoding : str

1997 Encoding for data & key when strings.

1998 hash_key : str

1999 Hash_key for string key to encode.

2000 categorize : bool

2001 Whether to first categorize object arrays before hashing. This is more

2002 efficient when the array contains duplicate values.

2003

2004 Returns

2005 -------

2006 np.ndarray[uint64]

2007

2008 Examples

2009 --------

2010 >>> pd.array([1, 2])._hash_pandas_object(encoding='utf-8',

2011 ... hash_key="1000000000000000",

2012 ... categorize=False

2013 ... )

2014 array([ 6238072747940578789, 15839785061582574730], dtype=uint64)

2015 """

2016 from pandas.core.util.hashing import hash_array

2017

2018 values, _ = self._values_for_factorize()

2019 return hash_array(

2020 values, encoding=encoding, hash_key=hash_key, categorize=categorize

2021 )

2022

2023 def _explode(self) -> tuple[Self, npt.NDArray[np.uint64]]:

2024 """

2025 Transform each element of list-like to a row.

2026

2027 For arrays that do not contain list-like elements the default

2028 implementation of this method just returns a copy and an array

2029 of ones (unchanged index).

2030

2031 Returns

2032 -------

2033 ExtensionArray

2034 Array with the exploded values.

2035 np.ndarray[uint64]

2036 The original lengths of each list-like for determining the

2037 resulting index.

2038

2039 See Also

2040 --------

2041 Series.explode : The method on the ``Series`` object that this

2042 extension array method is meant to support.

2043

2044 Examples

2045 --------

2046 >>> import pyarrow as pa

2047 >>> a = pd.array([[1, 2, 3], [4], [5, 6]],

2048 ... dtype=pd.ArrowDtype(pa.list_(pa.int64())))

2049 >>> a._explode()

2050 (<ArrowExtensionArray>

2051 [1, 2, 3, 4, 5, 6]

2052 Length: 6, dtype: int64[pyarrow], array([3, 1, 2], dtype=int32))

2053 """

2054 values = self.copy()

2055 counts = np.ones(shape=(len(self),), dtype=np.uint64)

2056 return values, counts

2057

2058 def tolist(self) -> list:

2059 """

2060 Return a list of the values.

2061

2062 These are each a scalar type, which is a Python scalar

2063 (for str, int, float) or a pandas scalar

2064 (for Timestamp/Timedelta/Interval/Period)

2065

2066 Returns

2067 -------

2068 list

2069

2070 Examples

2071 --------

2072 >>> arr = pd.array([1, 2, 3])

2073 >>> arr.tolist()

2074 [1, 2, 3]

2075 """

2076 if self.ndim > 1:

2077 return [x.tolist() for x in self]

2078 return list(self)

2079

2080 def delete(self, loc: PositionalIndexer) -> Self:

2081 indexer = np.delete(np.arange(len(self)), loc)

2082 return self.take(indexer)

2083

2084 def insert(self, loc: int, item) -> Self:

2085 """

2086 Insert an item at the given position.

2087

2088 Parameters

2089 ----------

2090 loc : int

2091 item : scalar-like

2092

2093 Returns

2094 -------

2095 same type as self

2096

2097 Notes

2098 -----

2099 This method should be both type and dtype-preserving. If the item

2100 cannot be held in an array of this type/dtype, either ValueError or

2101 TypeError should be raised.

2102

2103 The default implementation relies on _from_sequence to raise on invalid

2104 items.

2105

2106 Examples

2107 --------

2108 >>> arr = pd.array([1, 2, 3])

2109 >>> arr.insert(2, -1)

2110 <IntegerArray>

2111 [1, 2, -1, 3]

2112 Length: 4, dtype: Int64

2113 """

2114 loc = validate_insert_loc(loc, len(self))

2115

2116 item_arr = type(self)._from_sequence([item], dtype=self.dtype)

2117

2118 return type(self)._concat_same_type([self[:loc], item_arr, self[loc:]])

2119

2120 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:

2121 """

2122 Analogue to np.putmask(self, mask, value)

2123

2124 Parameters

2125 ----------

2126 mask : np.ndarray[bool]

2127 value : scalar or listlike

2128 If listlike, must be arraylike with same length as self.

2129

2130 Returns

2131 -------

2132 None

2133

2134 Notes

2135 -----

2136 Unlike np.putmask, we do not repeat listlike values with mismatched length.

2137 'value' should either be a scalar or an arraylike with the same length

2138 as self.

2139 """

2140 if is_list_like(value):

2141 val = value[mask]

2142 else:

2143 val = value

2144

2145 self[mask] = val

2146

2147 def _where(self, mask: npt.NDArray[np.bool_], value) -> Self:

2148 """

2149 Analogue to np.where(mask, self, value)

2150

2151 Parameters

2152 ----------

2153 mask : np.ndarray[bool]

2154 value : scalar or listlike

2155

2156 Returns

2157 -------

2158 same type as self

2159 """

2160 result = self.copy()

2161

2162 if is_list_like(value):

2163 val = value[~mask]

2164 else:

2165 val = value

2166

2167 result[~mask] = val

2168 return result

2169

2170 # TODO(3.0): this can be removed once GH#33302 deprecation is enforced

2171 def _fill_mask_inplace(

2172 self, method: str, limit: int | None, mask: npt.NDArray[np.bool_]

2173 ) -> None:

2174 """

2175 Replace values in locations specified by 'mask' using pad or backfill.

2176

2177 See also

2178 --------

2179 ExtensionArray.fillna

2180 """

2181 func = missing.get_fill_func(method)

2182 npvalues = self.astype(object)

2183 # NB: if we don't copy mask here, it may be altered inplace, which

2184 # would mess up the `self[mask] = ...` below.

2185 func(npvalues, limit=limit, mask=mask.copy())

2186 new_values = self._from_sequence(npvalues, dtype=self.dtype)

2187 self[mask] = new_values[mask]

2188

2189 def _rank(

2190 self,

2191 *,

2192 axis: AxisInt = 0,

2193 method: str = "average",

2194 na_option: str = "keep",

2195 ascending: bool = True,

2196 pct: bool = False,

2197 ):

2198 """

2199 See Series.rank.__doc__.

2200 """

2201 if axis != 0:

2202 raise NotImplementedError

2203

2204 return rank(

2205 self._values_for_argsort(),

2206 axis=axis,

2207 method=method,

2208 na_option=na_option,

2209 ascending=ascending,

2210 pct=pct,

2211 )

2212

2213 @classmethod

2214 def _empty(cls, shape: Shape, dtype: ExtensionDtype):

2215 """

2216 Create an ExtensionArray with the given shape and dtype.

2217

2218 See also

2219 --------

2220 ExtensionDtype.empty

2221 ExtensionDtype.empty is the 'official' public version of this API.

2222 """

2223 # Implementer note: while ExtensionDtype.empty is the public way to

2224 # call this method, it is still required to implement this `_empty`

2225 # method as well (it is called internally in pandas)

2226 obj = cls._from_sequence([], dtype=dtype)

2227

2228 taker = np.broadcast_to(np.intp(-1), shape)

2229 result = obj.take(taker, allow_fill=True)

2230 if not isinstance(result, cls) or dtype != result.dtype:

2231 raise NotImplementedError(

2232 f"Default 'empty' implementation is invalid for dtype='{dtype}'"

2233 )

2234 return result

2235

2236 def _quantile(self, qs: npt.NDArray[np.float64], interpolation: str) -> Self:

2237 """

2238 Compute the quantiles of self for each quantile in `qs`.

2239

2240 Parameters

2241 ----------

2242 qs : np.ndarray[float64]

2243 interpolation: str

2244

2245 Returns

2246 -------

2247 same type as self

2248 """

2249 mask = np.asarray(self.isna())

2250 arr = np.asarray(self)

2251 fill_value = np.nan

2252

2253 res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation)

2254 return type(self)._from_sequence(res_values)

2255

2256 def _mode(self, dropna: bool = True) -> Self:

2257 """

2258 Returns the mode(s) of the ExtensionArray.

2259

2260 Always returns `ExtensionArray` even if only one value.

2261

2262 Parameters

2263 ----------

2264 dropna : bool, default True

2265 Don't consider counts of NA values.

2266

2267 Returns

2268 -------

2269 same type as self

2270 Sorted, if possible.

2271 """

2272 # error: Incompatible return value type (got "Union[ExtensionArray,

2273 # ndarray[Any, Any]]", expected "Self")

2274 return mode(self, dropna=dropna) # type: ignore[return-value]

2275

2276 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

2277 if any(

2278 isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs

2279 ):

2280 return NotImplemented

2281

2282 result = arraylike.maybe_dispatch_ufunc_to_dunder_op(

2283 self, ufunc, method, *inputs, **kwargs

2284 )

2285 if result is not NotImplemented:

2286 return result

2287

2288 if "out" in kwargs:

2289 return arraylike.dispatch_ufunc_with_out(

2290 self, ufunc, method, *inputs, **kwargs

2291 )

2292

2293 if method == "reduce":

2294 result = arraylike.dispatch_reduction_ufunc(

2295 self, ufunc, method, *inputs, **kwargs

2296 )

2297 if result is not NotImplemented:

2298 return result

2299

2300 return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)

2301

2302 def map(self, mapper, na_action=None):

2303 """

2304 Map values using an input mapping or function.

2305

2306 Parameters

2307 ----------

2308 mapper : function, dict, or Series

2309 Mapping correspondence.

2310 na_action : {None, 'ignore'}, default None

2311 If 'ignore', propagate NA values, without passing them to the

2312 mapping correspondence. If 'ignore' is not supported, a

2313 ``NotImplementedError`` should be raised.

2314

2315 Returns

2316 -------

2317 Union[ndarray, Index, ExtensionArray]

2318 The output of the mapping function applied to the array.

2319 If the function returns a tuple with more than one element

2320 a MultiIndex will be returned.

2321 """

2322 return map_array(self, mapper, na_action=na_action)

2323

2324 # ------------------------------------------------------------------------

2325 # GroupBy Methods

2326

2327 def _groupby_op(

2328 self,

2329 *,

2330 how: str,

2331 has_dropped_na: bool,

2332 min_count: int,

2333 ngroups: int,

2334 ids: npt.NDArray[np.intp],

2335 **kwargs,

2336 ) -> ArrayLike:

2337 """

2338 Dispatch GroupBy reduction or transformation operation.

2339

2340 This is an *experimental* API to allow ExtensionArray authors to implement

2341 reductions and transformations. The API is subject to change.

2342

2343 Parameters

2344 ----------

2345 how : {'any', 'all', 'sum', 'prod', 'min', 'max', 'mean', 'median',

2346 'median', 'var', 'std', 'sem', 'nth', 'last', 'ohlc',

2347 'cumprod', 'cumsum', 'cummin', 'cummax', 'rank'}

2348 has_dropped_na : bool

2349 min_count : int

2350 ngroups : int

2351 ids : np.ndarray[np.intp]

2352 ids[i] gives the integer label for the group that self[i] belongs to.

2353 **kwargs : operation-specific

2354 'any', 'all' -> ['skipna']

2355 'var', 'std', 'sem' -> ['ddof']

2356 'cumprod', 'cumsum', 'cummin', 'cummax' -> ['skipna']

2357 'rank' -> ['ties_method', 'ascending', 'na_option', 'pct']

2358

2359 Returns

2360 -------

2361 np.ndarray or ExtensionArray

2362 """

2363 from pandas.core.arrays.string_ import StringDtype

2364 from pandas.core.groupby.ops import WrappedCythonOp

2365

2366 kind = WrappedCythonOp.get_kind_from_how(how)

2367 op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na)

2368

2369 # GH#43682

2370 if isinstance(self.dtype, StringDtype):

2371 # StringArray

2372 if op.how not in ["any", "all"]:

2373 # Fail early to avoid conversion to object

2374 op._get_cython_function(op.kind, op.how, np.dtype(object), False)

2375 npvalues = self.to_numpy(object, na_value=np.nan)

2376 else:

2377 raise NotImplementedError(

2378 f"function is not implemented for this dtype: {self.dtype}"

2379 )

2380

2381 res_values = op._cython_op_ndim_compat(

2382 npvalues,

2383 min_count=min_count,

2384 ngroups=ngroups,

2385 comp_ids=ids,

2386 mask=None,

2387 **kwargs,

2388 )

2389

2390 if op.how in op.cast_blocklist:

2391 # i.e. how in ["rank"], since other cast_blocklist methods don't go

2392 # through cython_operation

2393 return res_values

2394

2395 if isinstance(self.dtype, StringDtype):

2396 dtype = self.dtype

2397 string_array_cls = dtype.construct_array_type()

2398 return string_array_cls._from_sequence(res_values, dtype=dtype)

2399

2400 else:

2401 raise NotImplementedError

2402

2403

2404class ExtensionArraySupportsAnyAll(ExtensionArray):

2405 def any(self, *, skipna: bool = True) -> bool:

2406 raise AbstractMethodError(self)

2407

2408 def all(self, *, skipna: bool = True) -> bool:

2409 raise AbstractMethodError(self)

2410

2411

2412class ExtensionOpsMixin:

2413 """

2414 A base class for linking the operators to their dunder names.

2415

2416 .. note::

2417

2418 You may want to set ``__array_priority__`` if you want your

2419 implementation to be called when involved in binary operations

2420 with NumPy arrays.

2421 """

2422

2423 @classmethod

2424 def _create_arithmetic_method(cls, op):

2425 raise AbstractMethodError(cls)

2426

2427 @classmethod

2428 def _add_arithmetic_ops(cls) -> None:

2429 setattr(cls, "__add__", cls._create_arithmetic_method(operator.add))

2430 setattr(cls, "__radd__", cls._create_arithmetic_method(roperator.radd))

2431 setattr(cls, "__sub__", cls._create_arithmetic_method(operator.sub))

2432 setattr(cls, "__rsub__", cls._create_arithmetic_method(roperator.rsub))

2433 setattr(cls, "__mul__", cls._create_arithmetic_method(operator.mul))

2434 setattr(cls, "__rmul__", cls._create_arithmetic_method(roperator.rmul))

2435 setattr(cls, "__pow__", cls._create_arithmetic_method(operator.pow))

2436 setattr(cls, "__rpow__", cls._create_arithmetic_method(roperator.rpow))

2437 setattr(cls, "__mod__", cls._create_arithmetic_method(operator.mod))

2438 setattr(cls, "__rmod__", cls._create_arithmetic_method(roperator.rmod))

2439 setattr(cls, "__floordiv__", cls._create_arithmetic_method(operator.floordiv))

2440 setattr(

2441 cls, "__rfloordiv__", cls._create_arithmetic_method(roperator.rfloordiv)

2442 )

2443 setattr(cls, "__truediv__", cls._create_arithmetic_method(operator.truediv))

2444 setattr(cls, "__rtruediv__", cls._create_arithmetic_method(roperator.rtruediv))

2445 setattr(cls, "__divmod__", cls._create_arithmetic_method(divmod))

2446 setattr(cls, "__rdivmod__", cls._create_arithmetic_method(roperator.rdivmod))

2447

2448 @classmethod

2449 def _create_comparison_method(cls, op):

2450 raise AbstractMethodError(cls)

2451

2452 @classmethod

2453 def _add_comparison_ops(cls) -> None:

2454 setattr(cls, "__eq__", cls._create_comparison_method(operator.eq))

2455 setattr(cls, "__ne__", cls._create_comparison_method(operator.ne))

2456 setattr(cls, "__lt__", cls._create_comparison_method(operator.lt))

2457 setattr(cls, "__gt__", cls._create_comparison_method(operator.gt))

2458 setattr(cls, "__le__", cls._create_comparison_method(operator.le))

2459 setattr(cls, "__ge__", cls._create_comparison_method(operator.ge))

2460

2461 @classmethod

2462 def _create_logical_method(cls, op):

2463 raise AbstractMethodError(cls)

2464

2465 @classmethod

2466 def _add_logical_ops(cls) -> None:

2467 setattr(cls, "__and__", cls._create_logical_method(operator.and_))

2468 setattr(cls, "__rand__", cls._create_logical_method(roperator.rand_))

2469 setattr(cls, "__or__", cls._create_logical_method(operator.or_))

2470 setattr(cls, "__ror__", cls._create_logical_method(roperator.ror_))

2471 setattr(cls, "__xor__", cls._create_logical_method(operator.xor))

2472 setattr(cls, "__rxor__", cls._create_logical_method(roperator.rxor))

2473

2474

2475class ExtensionScalarOpsMixin(ExtensionOpsMixin):

2476 """

2477 A mixin for defining ops on an ExtensionArray.

2478

2479 It is assumed that the underlying scalar objects have the operators

2480 already defined.

2481

2482 Notes

2483 -----

2484 If you have defined a subclass MyExtensionArray(ExtensionArray), then

2485 use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to

2486 get the arithmetic operators. After the definition of MyExtensionArray,

2487 insert the lines

2488

2489 MyExtensionArray._add_arithmetic_ops()

2490 MyExtensionArray._add_comparison_ops()

2491

2492 to link the operators to your class.

2493

2494 .. note::

2495

2496 You may want to set ``__array_priority__`` if you want your

2497 implementation to be called when involved in binary operations

2498 with NumPy arrays.

2499 """

2500

2501 @classmethod

2502 def _create_method(cls, op, coerce_to_dtype: bool = True, result_dtype=None):

2503 """

2504 A class method that returns a method that will correspond to an

2505 operator for an ExtensionArray subclass, by dispatching to the

2506 relevant operator defined on the individual elements of the

2507 ExtensionArray.

2508

2509 Parameters

2510 ----------

2511 op : function

2512 An operator that takes arguments op(a, b)

2513 coerce_to_dtype : bool, default True

2514 boolean indicating whether to attempt to convert

2515 the result to the underlying ExtensionArray dtype.

2516 If it's not possible to create a new ExtensionArray with the

2517 values, an ndarray is returned instead.

2518

2519 Returns

2520 -------

2521 Callable[[Any, Any], Union[ndarray, ExtensionArray]]

2522 A method that can be bound to a class. When used, the method

2523 receives the two arguments, one of which is the instance of

2524 this class, and should return an ExtensionArray or an ndarray.

2525

2526 Returning an ndarray may be necessary when the result of the

2527 `op` cannot be stored in the ExtensionArray. The dtype of the

2528 ndarray uses NumPy's normal inference rules.

2529

2530 Examples

2531 --------

2532 Given an ExtensionArray subclass called MyExtensionArray, use

2533

2534 __add__ = cls._create_method(operator.add)

2535

2536 in the class definition of MyExtensionArray to create the operator

2537 for addition, that will be based on the operator implementation

2538 of the underlying elements of the ExtensionArray

2539 """

2540

2541 def _binop(self, other):

2542 def convert_values(param):

2543 if isinstance(param, ExtensionArray) or is_list_like(param):

2544 ovalues = param

2545 else: # Assume its an object

2546 ovalues = [param] * len(self)

2547 return ovalues

2548

2549 if isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)):

2550 # rely on pandas to unbox and dispatch to us

2551 return NotImplemented

2552

2553 lvalues = self

2554 rvalues = convert_values(other)

2555

2556 # If the operator is not defined for the underlying objects,

2557 # a TypeError should be raised

2558 res = [op(a, b) for (a, b) in zip(lvalues, rvalues)]

2559

2560 def _maybe_convert(arr):

2561 if coerce_to_dtype:

2562 # https://github.com/pandas-dev/pandas/issues/22850

2563 # We catch all regular exceptions here, and fall back

2564 # to an ndarray.

2565 res = maybe_cast_pointwise_result(arr, self.dtype, same_dtype=False)

2566 if not isinstance(res, type(self)):

2567 # exception raised in _from_sequence; ensure we have ndarray

2568 res = np.asarray(arr)

2569 else:

2570 res = np.asarray(arr, dtype=result_dtype)

2571 return res

2572

2573 if op.__name__ in {"divmod", "rdivmod"}:

2574 a, b = zip(*res)

2575 return _maybe_convert(a), _maybe_convert(b)

2576

2577 return _maybe_convert(res)

2578

2579 op_name = f"__{op.__name__}__"

2580 return set_function_name(_binop, op_name, cls)

2581

2582 @classmethod

2583 def _create_arithmetic_method(cls, op):

2584 return cls._create_method(op)

2585

2586 @classmethod

2587 def _create_comparison_method(cls, op):

2588 return cls._create_method(op, coerce_to_dtype=False, result_dtype=bool)