1"""
2An interface for extending pandas with custom arrays.
3
4.. warning::
5
6 This is an experimental API and subject to breaking changes
7 without warning.
8"""
9from __future__ import annotations
10
11import operator
12from typing import (
13 TYPE_CHECKING,
14 Any,
15 Callable,
16 ClassVar,
17 Literal,
18 cast,
19 overload,
20)
21import warnings
22
23import numpy as np
24
25from pandas._libs import (
26 algos as libalgos,
27 lib,
28)
29from pandas.compat import set_function_name
30from pandas.compat.numpy import function as nv
31from pandas.errors import AbstractMethodError
32from pandas.util._decorators import (
33 Appender,
34 Substitution,
35 cache_readonly,
36)
37from pandas.util._exceptions import find_stack_level
38from pandas.util._validators import (
39 validate_bool_kwarg,
40 validate_fillna_kwargs,
41 validate_insert_loc,
42)
43
44from pandas.core.dtypes.cast import maybe_cast_pointwise_result
45from pandas.core.dtypes.common import (
46 is_list_like,
47 is_scalar,
48 pandas_dtype,
49)
50from pandas.core.dtypes.dtypes import ExtensionDtype
51from pandas.core.dtypes.generic import (
52 ABCDataFrame,
53 ABCIndex,
54 ABCSeries,
55)
56from pandas.core.dtypes.missing import isna
57
58from pandas.core import (
59 arraylike,
60 missing,
61 roperator,
62)
63from pandas.core.algorithms import (
64 duplicated,
65 factorize_array,
66 isin,
67 map_array,
68 mode,
69 rank,
70 unique,
71)
72from pandas.core.array_algos.quantile import quantile_with_mask
73from pandas.core.missing import _fill_limit_area_1d
74from pandas.core.sorting import (
75 nargminmax,
76 nargsort,
77)
78
79if TYPE_CHECKING:
80 from collections.abc import (
81 Iterator,
82 Sequence,
83 )
84
85 from pandas._typing import (
86 ArrayLike,
87 AstypeArg,
88 AxisInt,
89 Dtype,
90 DtypeObj,
91 FillnaOptions,
92 InterpolateOptions,
93 NumpySorter,
94 NumpyValueArrayLike,
95 PositionalIndexer,
96 ScalarIndexer,
97 Self,
98 SequenceIndexer,
99 Shape,
100 SortKind,
101 TakeIndexer,
102 npt,
103 )
104
105 from pandas import Index
106
107_extension_array_shared_docs: dict[str, str] = {}
108
109
110class ExtensionArray:
111 """
112 Abstract base class for custom 1-D array types.
113
114 pandas will recognize instances of this class as proper arrays
115 with a custom type and will not attempt to coerce them to objects. They
116 may be stored directly inside a :class:`DataFrame` or :class:`Series`.
117
118 Attributes
119 ----------
120 dtype
121 nbytes
122 ndim
123 shape
124
125 Methods
126 -------
127 argsort
128 astype
129 copy
130 dropna
131 duplicated
132 factorize
133 fillna
134 equals
135 insert
136 interpolate
137 isin
138 isna
139 ravel
140 repeat
141 searchsorted
142 shift
143 take
144 tolist
145 unique
146 view
147 _accumulate
148 _concat_same_type
149 _explode
150 _formatter
151 _from_factorized
152 _from_sequence
153 _from_sequence_of_strings
154 _hash_pandas_object
155 _pad_or_backfill
156 _reduce
157 _values_for_argsort
158 _values_for_factorize
159
160 Notes
161 -----
162 The interface includes the following abstract methods that must be
163 implemented by subclasses:
164
165 * _from_sequence
166 * _from_factorized
167 * __getitem__
168 * __len__
169 * __eq__
170 * dtype
171 * nbytes
172 * isna
173 * take
174 * copy
175 * _concat_same_type
176 * interpolate
177
178 A default repr displaying the type, (truncated) data, length,
179 and dtype is provided. It can be customized or replaced by
180 by overriding:
181
182 * __repr__ : A default repr for the ExtensionArray.
183 * _formatter : Print scalars inside a Series or DataFrame.
184
185 Some methods require casting the ExtensionArray to an ndarray of Python
186 objects with ``self.astype(object)``, which may be expensive. When
187 performance is a concern, we highly recommend overriding the following
188 methods:
189
190 * fillna
191 * _pad_or_backfill
192 * dropna
193 * unique
194 * factorize / _values_for_factorize
195 * argsort, argmax, argmin / _values_for_argsort
196 * searchsorted
197 * map
198
199 The remaining methods implemented on this class should be performant,
200 as they only compose abstract methods. Still, a more efficient
201 implementation may be available, and these methods can be overridden.
202
203 One can implement methods to handle array accumulations or reductions.
204
205 * _accumulate
206 * _reduce
207
208 One can implement methods to handle parsing from strings that will be used
209 in methods such as ``pandas.io.parsers.read_csv``.
210
211 * _from_sequence_of_strings
212
213 This class does not inherit from 'abc.ABCMeta' for performance reasons.
214 Methods and properties required by the interface raise
215 ``pandas.errors.AbstractMethodError`` and no ``register`` method is
216 provided for registering virtual subclasses.
217
218 ExtensionArrays are limited to 1 dimension.
219
220 They may be backed by none, one, or many NumPy arrays. For example,
221 ``pandas.Categorical`` is an extension array backed by two arrays,
222 one for codes and one for categories. An array of IPv6 address may
223 be backed by a NumPy structured array with two fields, one for the
224 lower 64 bits and one for the upper 64 bits. Or they may be backed
225 by some other storage type, like Python lists. Pandas makes no
226 assumptions on how the data are stored, just that it can be converted
227 to a NumPy array.
228 The ExtensionArray interface does not impose any rules on how this data
229 is stored. However, currently, the backing data cannot be stored in
230 attributes called ``.values`` or ``._values`` to ensure full compatibility
231 with pandas internals. But other names as ``.data``, ``._data``,
232 ``._items``, ... can be freely used.
233
234 If implementing NumPy's ``__array_ufunc__`` interface, pandas expects
235 that
236
237 1. You defer by returning ``NotImplemented`` when any Series are present
238 in `inputs`. Pandas will extract the arrays and call the ufunc again.
239 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class.
240 Pandas inspect this to determine whether the ufunc is valid for the
241 types present.
242
243 See :ref:`extending.extension.ufunc` for more.
244
245 By default, ExtensionArrays are not hashable. Immutable subclasses may
246 override this behavior.
247
248 Examples
249 --------
250 Please see the following:
251
252 https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/list/array.py
253 """
254
255 # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray.
256 # Don't override this.
257 _typ = "extension"
258
259 # similar to __array_priority__, positions ExtensionArray after Index,
260 # Series, and DataFrame. EA subclasses may override to choose which EA
261 # subclass takes priority. If overriding, the value should always be
262 # strictly less than 2000 to be below Index.__pandas_priority__.
263 __pandas_priority__ = 1000
264
265 # ------------------------------------------------------------------------
266 # Constructors
267 # ------------------------------------------------------------------------
268
269 @classmethod
270 def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
271 """
272 Construct a new ExtensionArray from a sequence of scalars.
273
274 Parameters
275 ----------
276 scalars : Sequence
277 Each element will be an instance of the scalar type for this
278 array, ``cls.dtype.type`` or be converted into this type in this method.
279 dtype : dtype, optional
280 Construct for this particular dtype. This should be a Dtype
281 compatible with the ExtensionArray.
282 copy : bool, default False
283 If True, copy the underlying data.
284
285 Returns
286 -------
287 ExtensionArray
288
289 Examples
290 --------
291 >>> pd.arrays.IntegerArray._from_sequence([4, 5])
292 <IntegerArray>
293 [4, 5]
294 Length: 2, dtype: Int64
295 """
296 raise AbstractMethodError(cls)
297
298 @classmethod
299 def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self:
300 """
301 Strict analogue to _from_sequence, allowing only sequences of scalars
302 that should be specifically inferred to the given dtype.
303
304 Parameters
305 ----------
306 scalars : sequence
307 dtype : ExtensionDtype
308
309 Raises
310 ------
311 TypeError or ValueError
312
313 Notes
314 -----
315 This is called in a try/except block when casting the result of a
316 pointwise operation.
317 """
318 try:
319 return cls._from_sequence(scalars, dtype=dtype, copy=False)
320 except (ValueError, TypeError):
321 raise
322 except Exception:
323 warnings.warn(
324 "_from_scalars should only raise ValueError or TypeError. "
325 "Consider overriding _from_scalars where appropriate.",
326 stacklevel=find_stack_level(),
327 )
328 raise
329
330 @classmethod
331 def _from_sequence_of_strings(
332 cls, strings, *, dtype: Dtype | None = None, copy: bool = False
333 ):
334 """
335 Construct a new ExtensionArray from a sequence of strings.
336
337 Parameters
338 ----------
339 strings : Sequence
340 Each element will be an instance of the scalar type for this
341 array, ``cls.dtype.type``.
342 dtype : dtype, optional
343 Construct for this particular dtype. This should be a Dtype
344 compatible with the ExtensionArray.
345 copy : bool, default False
346 If True, copy the underlying data.
347
348 Returns
349 -------
350 ExtensionArray
351
352 Examples
353 --------
354 >>> pd.arrays.IntegerArray._from_sequence_of_strings(["1", "2", "3"])
355 <IntegerArray>
356 [1, 2, 3]
357 Length: 3, dtype: Int64
358 """
359 raise AbstractMethodError(cls)
360
361 @classmethod
362 def _from_factorized(cls, values, original):
363 """
364 Reconstruct an ExtensionArray after factorization.
365
366 Parameters
367 ----------
368 values : ndarray
369 An integer ndarray with the factorized values.
370 original : ExtensionArray
371 The original ExtensionArray that factorize was called on.
372
373 See Also
374 --------
375 factorize : Top-level factorize method that dispatches here.
376 ExtensionArray.factorize : Encode the extension array as an enumerated type.
377
378 Examples
379 --------
380 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1),
381 ... pd.Interval(1, 5), pd.Interval(1, 5)])
382 >>> codes, uniques = pd.factorize(interv_arr)
383 >>> pd.arrays.IntervalArray._from_factorized(uniques, interv_arr)
384 <IntervalArray>
385 [(0, 1], (1, 5]]
386 Length: 2, dtype: interval[int64, right]
387 """
388 raise AbstractMethodError(cls)
389
390 # ------------------------------------------------------------------------
391 # Must be a Sequence
392 # ------------------------------------------------------------------------
393 @overload
394 def __getitem__(self, item: ScalarIndexer) -> Any:
395 ...
396
397 @overload
398 def __getitem__(self, item: SequenceIndexer) -> Self:
399 ...
400
401 def __getitem__(self, item: PositionalIndexer) -> Self | Any:
402 """
403 Select a subset of self.
404
405 Parameters
406 ----------
407 item : int, slice, or ndarray
408 * int: The position in 'self' to get.
409
410 * slice: A slice object, where 'start', 'stop', and 'step' are
411 integers or None
412
413 * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
414
415 * list[int]: A list of int
416
417 Returns
418 -------
419 item : scalar or ExtensionArray
420
421 Notes
422 -----
423 For scalar ``item``, return a scalar value suitable for the array's
424 type. This should be an instance of ``self.dtype.type``.
425
426 For slice ``key``, return an instance of ``ExtensionArray``, even
427 if the slice is length 0 or 1.
428
429 For a boolean mask, return an instance of ``ExtensionArray``, filtered
430 to the values where ``item`` is True.
431 """
432 raise AbstractMethodError(self)
433
434 def __setitem__(self, key, value) -> None:
435 """
436 Set one or more values inplace.
437
438 This method is not required to satisfy the pandas extension array
439 interface.
440
441 Parameters
442 ----------
443 key : int, ndarray, or slice
444 When called from, e.g. ``Series.__setitem__``, ``key`` will be
445 one of
446
447 * scalar int
448 * ndarray of integers.
449 * boolean ndarray
450 * slice object
451
452 value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
453 value or values to be set of ``key``.
454
455 Returns
456 -------
457 None
458 """
459 # Some notes to the ExtensionArray implementer who may have ended up
460 # here. While this method is not required for the interface, if you
461 # *do* choose to implement __setitem__, then some semantics should be
462 # observed:
463 #
464 # * Setting multiple values : ExtensionArrays should support setting
465 # multiple values at once, 'key' will be a sequence of integers and
466 # 'value' will be a same-length sequence.
467 #
468 # * Broadcasting : For a sequence 'key' and a scalar 'value',
469 # each position in 'key' should be set to 'value'.
470 #
471 # * Coercion : Most users will expect basic coercion to work. For
472 # example, a string like '2018-01-01' is coerced to a datetime
473 # when setting on a datetime64ns array. In general, if the
474 # __init__ method coerces that value, then so should __setitem__
475 # Note, also, that Series/DataFrame.where internally use __setitem__
476 # on a copy of the data.
477 raise NotImplementedError(f"{type(self)} does not implement __setitem__.")
478
479 def __len__(self) -> int:
480 """
481 Length of this array
482
483 Returns
484 -------
485 length : int
486 """
487 raise AbstractMethodError(self)
488
489 def __iter__(self) -> Iterator[Any]:
490 """
491 Iterate over elements of the array.
492 """
493 # This needs to be implemented so that pandas recognizes extension
494 # arrays as list-like. The default implementation makes successive
495 # calls to ``__getitem__``, which may be slower than necessary.
496 for i in range(len(self)):
497 yield self[i]
498
499 def __contains__(self, item: object) -> bool | np.bool_:
500 """
501 Return for `item in self`.
502 """
503 # GH37867
504 # comparisons of any item to pd.NA always return pd.NA, so e.g. "a" in [pd.NA]
505 # would raise a TypeError. The implementation below works around that.
506 if is_scalar(item) and isna(item):
507 if not self._can_hold_na:
508 return False
509 elif item is self.dtype.na_value or isinstance(item, self.dtype.type):
510 return self._hasna
511 else:
512 return False
513 else:
514 # error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no
515 # attribute "any"
516 return (item == self).any() # type: ignore[union-attr]
517
518 # error: Signature of "__eq__" incompatible with supertype "object"
519 def __eq__(self, other: object) -> ArrayLike: # type: ignore[override]
520 """
521 Return for `self == other` (element-wise equality).
522 """
523 # Implementer note: this should return a boolean numpy ndarray or
524 # a boolean ExtensionArray.
525 # When `other` is one of Series, Index, or DataFrame, this method should
526 # return NotImplemented (to ensure that those objects are responsible for
527 # first unpacking the arrays, and then dispatch the operation to the
528 # underlying arrays)
529 raise AbstractMethodError(self)
530
531 # error: Signature of "__ne__" incompatible with supertype "object"
532 def __ne__(self, other: object) -> ArrayLike: # type: ignore[override]
533 """
534 Return for `self != other` (element-wise in-equality).
535 """
536 # error: Unsupported operand type for ~ ("ExtensionArray")
537 return ~(self == other) # type: ignore[operator]
538
539 def to_numpy(
540 self,
541 dtype: npt.DTypeLike | None = None,
542 copy: bool = False,
543 na_value: object = lib.no_default,
544 ) -> np.ndarray:
545 """
546 Convert to a NumPy ndarray.
547
548 This is similar to :meth:`numpy.asarray`, but may provide additional control
549 over how the conversion is done.
550
551 Parameters
552 ----------
553 dtype : str or numpy.dtype, optional
554 The dtype to pass to :meth:`numpy.asarray`.
555 copy : bool, default False
556 Whether to ensure that the returned value is a not a view on
557 another array. Note that ``copy=False`` does not *ensure* that
558 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
559 a copy is made, even if not strictly necessary.
560 na_value : Any, optional
561 The value to use for missing values. The default value depends
562 on `dtype` and the type of the array.
563
564 Returns
565 -------
566 numpy.ndarray
567 """
568 result = np.asarray(self, dtype=dtype)
569 if copy or na_value is not lib.no_default:
570 result = result.copy()
571 if na_value is not lib.no_default:
572 result[self.isna()] = na_value
573 return result
574
575 # ------------------------------------------------------------------------
576 # Required attributes
577 # ------------------------------------------------------------------------
578
579 @property
580 def dtype(self) -> ExtensionDtype:
581 """
582 An instance of ExtensionDtype.
583
584 Examples
585 --------
586 >>> pd.array([1, 2, 3]).dtype
587 Int64Dtype()
588 """
589 raise AbstractMethodError(self)
590
591 @property
592 def shape(self) -> Shape:
593 """
594 Return a tuple of the array dimensions.
595
596 Examples
597 --------
598 >>> arr = pd.array([1, 2, 3])
599 >>> arr.shape
600 (3,)
601 """
602 return (len(self),)
603
604 @property
605 def size(self) -> int:
606 """
607 The number of elements in the array.
608 """
609 # error: Incompatible return value type (got "signedinteger[_64Bit]",
610 # expected "int") [return-value]
611 return np.prod(self.shape) # type: ignore[return-value]
612
613 @property
614 def ndim(self) -> int:
615 """
616 Extension Arrays are only allowed to be 1-dimensional.
617
618 Examples
619 --------
620 >>> arr = pd.array([1, 2, 3])
621 >>> arr.ndim
622 1
623 """
624 return 1
625
626 @property
627 def nbytes(self) -> int:
628 """
629 The number of bytes needed to store this object in memory.
630
631 Examples
632 --------
633 >>> pd.array([1, 2, 3]).nbytes
634 27
635 """
636 # If this is expensive to compute, return an approximate lower bound
637 # on the number of bytes needed.
638 raise AbstractMethodError(self)
639
640 # ------------------------------------------------------------------------
641 # Additional Methods
642 # ------------------------------------------------------------------------
643
644 @overload
645 def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
646 ...
647
648 @overload
649 def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:
650 ...
651
652 @overload
653 def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:
654 ...
655
656 def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
657 """
658 Cast to a NumPy array or ExtensionArray with 'dtype'.
659
660 Parameters
661 ----------
662 dtype : str or dtype
663 Typecode or data-type to which the array is cast.
664 copy : bool, default True
665 Whether to copy the data, even if not necessary. If False,
666 a copy is made only if the old dtype does not match the
667 new dtype.
668
669 Returns
670 -------
671 np.ndarray or pandas.api.extensions.ExtensionArray
672 An ``ExtensionArray`` if ``dtype`` is ``ExtensionDtype``,
673 otherwise a Numpy ndarray with ``dtype`` for its dtype.
674
675 Examples
676 --------
677 >>> arr = pd.array([1, 2, 3])
678 >>> arr
679 <IntegerArray>
680 [1, 2, 3]
681 Length: 3, dtype: Int64
682
683 Casting to another ``ExtensionDtype`` returns an ``ExtensionArray``:
684
685 >>> arr1 = arr.astype('Float64')
686 >>> arr1
687 <FloatingArray>
688 [1.0, 2.0, 3.0]
689 Length: 3, dtype: Float64
690 >>> arr1.dtype
691 Float64Dtype()
692
693 Otherwise, we will get a Numpy ndarray:
694
695 >>> arr2 = arr.astype('float64')
696 >>> arr2
697 array([1., 2., 3.])
698 >>> arr2.dtype
699 dtype('float64')
700 """
701 dtype = pandas_dtype(dtype)
702 if dtype == self.dtype:
703 if not copy:
704 return self
705 else:
706 return self.copy()
707
708 if isinstance(dtype, ExtensionDtype):
709 cls = dtype.construct_array_type()
710 return cls._from_sequence(self, dtype=dtype, copy=copy)
711
712 elif lib.is_np_dtype(dtype, "M"):
713 from pandas.core.arrays import DatetimeArray
714
715 return DatetimeArray._from_sequence(self, dtype=dtype, copy=copy)
716
717 elif lib.is_np_dtype(dtype, "m"):
718 from pandas.core.arrays import TimedeltaArray
719
720 return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy)
721
722 if not copy:
723 return np.asarray(self, dtype=dtype)
724 else:
725 return np.array(self, dtype=dtype, copy=copy)
726
727 def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll:
728 """
729 A 1-D array indicating if each value is missing.
730
731 Returns
732 -------
733 numpy.ndarray or pandas.api.extensions.ExtensionArray
734 In most cases, this should return a NumPy ndarray. For
735 exceptional cases like ``SparseArray``, where returning
736 an ndarray would be expensive, an ExtensionArray may be
737 returned.
738
739 Notes
740 -----
741 If returning an ExtensionArray, then
742
743 * ``na_values._is_boolean`` should be True
744 * `na_values` should implement :func:`ExtensionArray._reduce`
745 * ``na_values.any`` and ``na_values.all`` should be implemented
746
747 Examples
748 --------
749 >>> arr = pd.array([1, 2, np.nan, np.nan])
750 >>> arr.isna()
751 array([False, False, True, True])
752 """
753 raise AbstractMethodError(self)
754
755 @property
756 def _hasna(self) -> bool:
757 # GH#22680
758 """
759 Equivalent to `self.isna().any()`.
760
761 Some ExtensionArray subclasses may be able to optimize this check.
762 """
763 return bool(self.isna().any())
764
765 def _values_for_argsort(self) -> np.ndarray:
766 """
767 Return values for sorting.
768
769 Returns
770 -------
771 ndarray
772 The transformed values should maintain the ordering between values
773 within the array.
774
775 See Also
776 --------
777 ExtensionArray.argsort : Return the indices that would sort this array.
778
779 Notes
780 -----
781 The caller is responsible for *not* modifying these values in-place, so
782 it is safe for implementers to give views on ``self``.
783
784 Functions that use this (e.g. ``ExtensionArray.argsort``) should ignore
785 entries with missing values in the original array (according to
786 ``self.isna()``). This means that the corresponding entries in the returned
787 array don't need to be modified to sort correctly.
788
789 Examples
790 --------
791 In most cases, this is the underlying Numpy array of the ``ExtensionArray``:
792
793 >>> arr = pd.array([1, 2, 3])
794 >>> arr._values_for_argsort()
795 array([1, 2, 3])
796 """
797 # Note: this is used in `ExtensionArray.argsort/argmin/argmax`.
798 return np.array(self)
799
800 def argsort(
801 self,
802 *,
803 ascending: bool = True,
804 kind: SortKind = "quicksort",
805 na_position: str = "last",
806 **kwargs,
807 ) -> np.ndarray:
808 """
809 Return the indices that would sort this array.
810
811 Parameters
812 ----------
813 ascending : bool, default True
814 Whether the indices should result in an ascending
815 or descending sort.
816 kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional
817 Sorting algorithm.
818 na_position : {'first', 'last'}, default 'last'
819 If ``'first'``, put ``NaN`` values at the beginning.
820 If ``'last'``, put ``NaN`` values at the end.
821 *args, **kwargs:
822 Passed through to :func:`numpy.argsort`.
823
824 Returns
825 -------
826 np.ndarray[np.intp]
827 Array of indices that sort ``self``. If NaN values are contained,
828 NaN values are placed at the end.
829
830 See Also
831 --------
832 numpy.argsort : Sorting implementation used internally.
833
834 Examples
835 --------
836 >>> arr = pd.array([3, 1, 2, 5, 4])
837 >>> arr.argsort()
838 array([1, 2, 0, 4, 3])
839 """
840 # Implementer note: You have two places to override the behavior of
841 # argsort.
842 # 1. _values_for_argsort : construct the values passed to np.argsort
843 # 2. argsort : total control over sorting. In case of overriding this,
844 # it is recommended to also override argmax/argmin
845 ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs)
846
847 values = self._values_for_argsort()
848 return nargsort(
849 values,
850 kind=kind,
851 ascending=ascending,
852 na_position=na_position,
853 mask=np.asarray(self.isna()),
854 )
855
856 def argmin(self, skipna: bool = True) -> int:
857 """
858 Return the index of minimum value.
859
860 In case of multiple occurrences of the minimum value, the index
861 corresponding to the first occurrence is returned.
862
863 Parameters
864 ----------
865 skipna : bool, default True
866
867 Returns
868 -------
869 int
870
871 See Also
872 --------
873 ExtensionArray.argmax : Return the index of the maximum value.
874
875 Examples
876 --------
877 >>> arr = pd.array([3, 1, 2, 5, 4])
878 >>> arr.argmin()
879 1
880 """
881 # Implementer note: You have two places to override the behavior of
882 # argmin.
883 # 1. _values_for_argsort : construct the values used in nargminmax
884 # 2. argmin itself : total control over sorting.
885 validate_bool_kwarg(skipna, "skipna")
886 if not skipna and self._hasna:
887 raise NotImplementedError
888 return nargminmax(self, "argmin")
889
890 def argmax(self, skipna: bool = True) -> int:
891 """
892 Return the index of maximum value.
893
894 In case of multiple occurrences of the maximum value, the index
895 corresponding to the first occurrence is returned.
896
897 Parameters
898 ----------
899 skipna : bool, default True
900
901 Returns
902 -------
903 int
904
905 See Also
906 --------
907 ExtensionArray.argmin : Return the index of the minimum value.
908
909 Examples
910 --------
911 >>> arr = pd.array([3, 1, 2, 5, 4])
912 >>> arr.argmax()
913 3
914 """
915 # Implementer note: You have two places to override the behavior of
916 # argmax.
917 # 1. _values_for_argsort : construct the values used in nargminmax
918 # 2. argmax itself : total control over sorting.
919 validate_bool_kwarg(skipna, "skipna")
920 if not skipna and self._hasna:
921 raise NotImplementedError
922 return nargminmax(self, "argmax")
923
924 def interpolate(
925 self,
926 *,
927 method: InterpolateOptions,
928 axis: int,
929 index: Index,
930 limit,
931 limit_direction,
932 limit_area,
933 copy: bool,
934 **kwargs,
935 ) -> Self:
936 """
937 See DataFrame.interpolate.__doc__.
938
939 Examples
940 --------
941 >>> arr = pd.arrays.NumpyExtensionArray(np.array([0, 1, np.nan, 3]))
942 >>> arr.interpolate(method="linear",
943 ... limit=3,
944 ... limit_direction="forward",
945 ... index=pd.Index([1, 2, 3, 4]),
946 ... fill_value=1,
947 ... copy=False,
948 ... axis=0,
949 ... limit_area="inside"
950 ... )
951 <NumpyExtensionArray>
952 [0.0, 1.0, 2.0, 3.0]
953 Length: 4, dtype: float64
954 """
955 # NB: we return type(self) even if copy=False
956 raise NotImplementedError(
957 f"{type(self).__name__} does not implement interpolate"
958 )
959
960 def _pad_or_backfill(
961 self,
962 *,
963 method: FillnaOptions,
964 limit: int | None = None,
965 limit_area: Literal["inside", "outside"] | None = None,
966 copy: bool = True,
967 ) -> Self:
968 """
969 Pad or backfill values, used by Series/DataFrame ffill and bfill.
970
971 Parameters
972 ----------
973 method : {'backfill', 'bfill', 'pad', 'ffill'}
974 Method to use for filling holes in reindexed Series:
975
976 * pad / ffill: propagate last valid observation forward to next valid.
977 * backfill / bfill: use NEXT valid observation to fill gap.
978
979 limit : int, default None
980 This is the maximum number of consecutive
981 NaN values to forward/backward fill. In other words, if there is
982 a gap with more than this number of consecutive NaNs, it will only
983 be partially filled. If method is not specified, this is the
984 maximum number of entries along the entire axis where NaNs will be
985 filled.
986
987 copy : bool, default True
988 Whether to make a copy of the data before filling. If False, then
989 the original should be modified and no new memory should be allocated.
990 For ExtensionArray subclasses that cannot do this, it is at the
991 author's discretion whether to ignore "copy=False" or to raise.
992 The base class implementation ignores the keyword if any NAs are
993 present.
994
995 Returns
996 -------
997 Same type as self
998
999 Examples
1000 --------
1001 >>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan])
1002 >>> arr._pad_or_backfill(method="backfill", limit=1)
1003 <IntegerArray>
1004 [<NA>, 2, 2, 3, <NA>, <NA>]
1005 Length: 6, dtype: Int64
1006 """
1007
1008 # If a 3rd-party EA has implemented this functionality in fillna,
1009 # we warn that they need to implement _pad_or_backfill instead.
1010 if (
1011 type(self).fillna is not ExtensionArray.fillna
1012 and type(self)._pad_or_backfill is ExtensionArray._pad_or_backfill
1013 ):
1014 # Check for _pad_or_backfill here allows us to call
1015 # super()._pad_or_backfill without getting this warning
1016 warnings.warn(
1017 "ExtensionArray.fillna 'method' keyword is deprecated. "
1018 "In a future version. arr._pad_or_backfill will be called "
1019 "instead. 3rd-party ExtensionArray authors need to implement "
1020 "_pad_or_backfill.",
1021 DeprecationWarning,
1022 stacklevel=find_stack_level(),
1023 )
1024 if limit_area is not None:
1025 raise NotImplementedError(
1026 f"{type(self).__name__} does not implement limit_area "
1027 "(added in pandas 2.2). 3rd-party ExtnsionArray authors "
1028 "need to add this argument to _pad_or_backfill."
1029 )
1030 return self.fillna(method=method, limit=limit)
1031
1032 mask = self.isna()
1033
1034 if mask.any():
1035 # NB: the base class does not respect the "copy" keyword
1036 meth = missing.clean_fill_method(method)
1037
1038 npmask = np.asarray(mask)
1039 if limit_area is not None and not npmask.all():
1040 _fill_limit_area_1d(npmask, limit_area)
1041 if meth == "pad":
1042 indexer = libalgos.get_fill_indexer(npmask, limit=limit)
1043 return self.take(indexer, allow_fill=True)
1044 else:
1045 # i.e. meth == "backfill"
1046 indexer = libalgos.get_fill_indexer(npmask[::-1], limit=limit)[::-1]
1047 return self[::-1].take(indexer, allow_fill=True)
1048
1049 else:
1050 if not copy:
1051 return self
1052 new_values = self.copy()
1053 return new_values
1054
1055 def fillna(
1056 self,
1057 value: object | ArrayLike | None = None,
1058 method: FillnaOptions | None = None,
1059 limit: int | None = None,
1060 copy: bool = True,
1061 ) -> Self:
1062 """
1063 Fill NA/NaN values using the specified method.
1064
1065 Parameters
1066 ----------
1067 value : scalar, array-like
1068 If a scalar value is passed it is used to fill all missing values.
1069 Alternatively, an array-like "value" can be given. It's expected
1070 that the array-like have the same length as 'self'.
1071 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
1072 Method to use for filling holes in reindexed Series:
1073
1074 * pad / ffill: propagate last valid observation forward to next valid.
1075 * backfill / bfill: use NEXT valid observation to fill gap.
1076
1077 .. deprecated:: 2.1.0
1078
1079 limit : int, default None
1080 If method is specified, this is the maximum number of consecutive
1081 NaN values to forward/backward fill. In other words, if there is
1082 a gap with more than this number of consecutive NaNs, it will only
1083 be partially filled. If method is not specified, this is the
1084 maximum number of entries along the entire axis where NaNs will be
1085 filled.
1086
1087 .. deprecated:: 2.1.0
1088
1089 copy : bool, default True
1090 Whether to make a copy of the data before filling. If False, then
1091 the original should be modified and no new memory should be allocated.
1092 For ExtensionArray subclasses that cannot do this, it is at the
1093 author's discretion whether to ignore "copy=False" or to raise.
1094 The base class implementation ignores the keyword in pad/backfill
1095 cases.
1096
1097 Returns
1098 -------
1099 ExtensionArray
1100 With NA/NaN filled.
1101
1102 Examples
1103 --------
1104 >>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan])
1105 >>> arr.fillna(0)
1106 <IntegerArray>
1107 [0, 0, 2, 3, 0, 0]
1108 Length: 6, dtype: Int64
1109 """
1110 if method is not None:
1111 warnings.warn(
1112 f"The 'method' keyword in {type(self).__name__}.fillna is "
1113 "deprecated and will be removed in a future version.",
1114 FutureWarning,
1115 stacklevel=find_stack_level(),
1116 )
1117
1118 value, method = validate_fillna_kwargs(value, method)
1119
1120 mask = self.isna()
1121 # error: Argument 2 to "check_value_size" has incompatible type
1122 # "ExtensionArray"; expected "ndarray"
1123 value = missing.check_value_size(
1124 value, mask, len(self) # type: ignore[arg-type]
1125 )
1126
1127 if mask.any():
1128 if method is not None:
1129 meth = missing.clean_fill_method(method)
1130
1131 npmask = np.asarray(mask)
1132 if meth == "pad":
1133 indexer = libalgos.get_fill_indexer(npmask, limit=limit)
1134 return self.take(indexer, allow_fill=True)
1135 else:
1136 # i.e. meth == "backfill"
1137 indexer = libalgos.get_fill_indexer(npmask[::-1], limit=limit)[::-1]
1138 return self[::-1].take(indexer, allow_fill=True)
1139 else:
1140 # fill with value
1141 if not copy:
1142 new_values = self[:]
1143 else:
1144 new_values = self.copy()
1145 new_values[mask] = value
1146 else:
1147 if not copy:
1148 new_values = self[:]
1149 else:
1150 new_values = self.copy()
1151 return new_values
1152
1153 def dropna(self) -> Self:
1154 """
1155 Return ExtensionArray without NA values.
1156
1157 Returns
1158 -------
1159
1160 Examples
1161 --------
1162 >>> pd.array([1, 2, np.nan]).dropna()
1163 <IntegerArray>
1164 [1, 2]
1165 Length: 2, dtype: Int64
1166 """
1167 # error: Unsupported operand type for ~ ("ExtensionArray")
1168 return self[~self.isna()] # type: ignore[operator]
1169
1170 def duplicated(
1171 self, keep: Literal["first", "last", False] = "first"
1172 ) -> npt.NDArray[np.bool_]:
1173 """
1174 Return boolean ndarray denoting duplicate values.
1175
1176 Parameters
1177 ----------
1178 keep : {'first', 'last', False}, default 'first'
1179 - ``first`` : Mark duplicates as ``True`` except for the first occurrence.
1180 - ``last`` : Mark duplicates as ``True`` except for the last occurrence.
1181 - False : Mark all duplicates as ``True``.
1182
1183 Returns
1184 -------
1185 ndarray[bool]
1186
1187 Examples
1188 --------
1189 >>> pd.array([1, 1, 2, 3, 3], dtype="Int64").duplicated()
1190 array([False, True, False, False, True])
1191 """
1192 mask = self.isna().astype(np.bool_, copy=False)
1193 return duplicated(values=self, keep=keep, mask=mask)
1194
1195 def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray:
1196 """
1197 Shift values by desired number.
1198
1199 Newly introduced missing values are filled with
1200 ``self.dtype.na_value``.
1201
1202 Parameters
1203 ----------
1204 periods : int, default 1
1205 The number of periods to shift. Negative values are allowed
1206 for shifting backwards.
1207
1208 fill_value : object, optional
1209 The scalar value to use for newly introduced missing values.
1210 The default is ``self.dtype.na_value``.
1211
1212 Returns
1213 -------
1214 ExtensionArray
1215 Shifted.
1216
1217 Notes
1218 -----
1219 If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is
1220 returned.
1221
1222 If ``periods > len(self)``, then an array of size
1223 len(self) is returned, with all values filled with
1224 ``self.dtype.na_value``.
1225
1226 For 2-dimensional ExtensionArrays, we are always shifting along axis=0.
1227
1228 Examples
1229 --------
1230 >>> arr = pd.array([1, 2, 3])
1231 >>> arr.shift(2)
1232 <IntegerArray>
1233 [<NA>, <NA>, 1]
1234 Length: 3, dtype: Int64
1235 """
1236 # Note: this implementation assumes that `self.dtype.na_value` can be
1237 # stored in an instance of your ExtensionArray with `self.dtype`.
1238 if not len(self) or periods == 0:
1239 return self.copy()
1240
1241 if isna(fill_value):
1242 fill_value = self.dtype.na_value
1243
1244 empty = self._from_sequence(
1245 [fill_value] * min(abs(periods), len(self)), dtype=self.dtype
1246 )
1247 if periods > 0:
1248 a = empty
1249 b = self[:-periods]
1250 else:
1251 a = self[abs(periods) :]
1252 b = empty
1253 return self._concat_same_type([a, b])
1254
1255 def unique(self) -> Self:
1256 """
1257 Compute the ExtensionArray of unique values.
1258
1259 Returns
1260 -------
1261 pandas.api.extensions.ExtensionArray
1262
1263 Examples
1264 --------
1265 >>> arr = pd.array([1, 2, 3, 1, 2, 3])
1266 >>> arr.unique()
1267 <IntegerArray>
1268 [1, 2, 3]
1269 Length: 3, dtype: Int64
1270 """
1271 uniques = unique(self.astype(object))
1272 return self._from_sequence(uniques, dtype=self.dtype)
1273
1274 def searchsorted(
1275 self,
1276 value: NumpyValueArrayLike | ExtensionArray,
1277 side: Literal["left", "right"] = "left",
1278 sorter: NumpySorter | None = None,
1279 ) -> npt.NDArray[np.intp] | np.intp:
1280 """
1281 Find indices where elements should be inserted to maintain order.
1282
1283 Find the indices into a sorted array `self` (a) such that, if the
1284 corresponding elements in `value` were inserted before the indices,
1285 the order of `self` would be preserved.
1286
1287 Assuming that `self` is sorted:
1288
1289 ====== ================================
1290 `side` returned index `i` satisfies
1291 ====== ================================
1292 left ``self[i-1] < value <= self[i]``
1293 right ``self[i-1] <= value < self[i]``
1294 ====== ================================
1295
1296 Parameters
1297 ----------
1298 value : array-like, list or scalar
1299 Value(s) to insert into `self`.
1300 side : {'left', 'right'}, optional
1301 If 'left', the index of the first suitable location found is given.
1302 If 'right', return the last such index. If there is no suitable
1303 index, return either 0 or N (where N is the length of `self`).
1304 sorter : 1-D array-like, optional
1305 Optional array of integer indices that sort array a into ascending
1306 order. They are typically the result of argsort.
1307
1308 Returns
1309 -------
1310 array of ints or int
1311 If value is array-like, array of insertion points.
1312 If value is scalar, a single integer.
1313
1314 See Also
1315 --------
1316 numpy.searchsorted : Similar method from NumPy.
1317
1318 Examples
1319 --------
1320 >>> arr = pd.array([1, 2, 3, 5])
1321 >>> arr.searchsorted([4])
1322 array([3])
1323 """
1324 # Note: the base tests provided by pandas only test the basics.
1325 # We do not test
1326 # 1. Values outside the range of the `data_for_sorting` fixture
1327 # 2. Values between the values in the `data_for_sorting` fixture
1328 # 3. Missing values.
1329 arr = self.astype(object)
1330 if isinstance(value, ExtensionArray):
1331 value = value.astype(object)
1332 return arr.searchsorted(value, side=side, sorter=sorter)
1333
1334 def equals(self, other: object) -> bool:
1335 """
1336 Return if another array is equivalent to this array.
1337
1338 Equivalent means that both arrays have the same shape and dtype, and
1339 all values compare equal. Missing values in the same location are
1340 considered equal (in contrast with normal equality).
1341
1342 Parameters
1343 ----------
1344 other : ExtensionArray
1345 Array to compare to this Array.
1346
1347 Returns
1348 -------
1349 boolean
1350 Whether the arrays are equivalent.
1351
1352 Examples
1353 --------
1354 >>> arr1 = pd.array([1, 2, np.nan])
1355 >>> arr2 = pd.array([1, 2, np.nan])
1356 >>> arr1.equals(arr2)
1357 True
1358 """
1359 if type(self) != type(other):
1360 return False
1361 other = cast(ExtensionArray, other)
1362 if self.dtype != other.dtype:
1363 return False
1364 elif len(self) != len(other):
1365 return False
1366 else:
1367 equal_values = self == other
1368 if isinstance(equal_values, ExtensionArray):
1369 # boolean array with NA -> fill with False
1370 equal_values = equal_values.fillna(False)
1371 # error: Unsupported left operand type for & ("ExtensionArray")
1372 equal_na = self.isna() & other.isna() # type: ignore[operator]
1373 return bool((equal_values | equal_na).all())
1374
1375 def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
1376 """
1377 Pointwise comparison for set containment in the given values.
1378
1379 Roughly equivalent to `np.array([x in values for x in self])`
1380
1381 Parameters
1382 ----------
1383 values : np.ndarray or ExtensionArray
1384
1385 Returns
1386 -------
1387 np.ndarray[bool]
1388
1389 Examples
1390 --------
1391 >>> arr = pd.array([1, 2, 3])
1392 >>> arr.isin([1])
1393 <BooleanArray>
1394 [True, False, False]
1395 Length: 3, dtype: boolean
1396 """
1397 return isin(np.asarray(self), values)
1398
1399 def _values_for_factorize(self) -> tuple[np.ndarray, Any]:
1400 """
1401 Return an array and missing value suitable for factorization.
1402
1403 Returns
1404 -------
1405 values : ndarray
1406 An array suitable for factorization. This should maintain order
1407 and be a supported dtype (Float64, Int64, UInt64, String, Object).
1408 By default, the extension array is cast to object dtype.
1409 na_value : object
1410 The value in `values` to consider missing. This will be treated
1411 as NA in the factorization routines, so it will be coded as
1412 `-1` and not included in `uniques`. By default,
1413 ``np.nan`` is used.
1414
1415 Notes
1416 -----
1417 The values returned by this method are also used in
1418 :func:`pandas.util.hash_pandas_object`. If needed, this can be
1419 overridden in the ``self._hash_pandas_object()`` method.
1420
1421 Examples
1422 --------
1423 >>> pd.array([1, 2, 3])._values_for_factorize()
1424 (array([1, 2, 3], dtype=object), nan)
1425 """
1426 return self.astype(object), np.nan
1427
1428 def factorize(
1429 self,
1430 use_na_sentinel: bool = True,
1431 ) -> tuple[np.ndarray, ExtensionArray]:
1432 """
1433 Encode the extension array as an enumerated type.
1434
1435 Parameters
1436 ----------
1437 use_na_sentinel : bool, default True
1438 If True, the sentinel -1 will be used for NaN values. If False,
1439 NaN values will be encoded as non-negative integers and will not drop the
1440 NaN from the uniques of the values.
1441
1442 .. versionadded:: 1.5.0
1443
1444 Returns
1445 -------
1446 codes : ndarray
1447 An integer NumPy array that's an indexer into the original
1448 ExtensionArray.
1449 uniques : ExtensionArray
1450 An ExtensionArray containing the unique values of `self`.
1451
1452 .. note::
1453
1454 uniques will *not* contain an entry for the NA value of
1455 the ExtensionArray if there are any missing values present
1456 in `self`.
1457
1458 See Also
1459 --------
1460 factorize : Top-level factorize method that dispatches here.
1461
1462 Notes
1463 -----
1464 :meth:`pandas.factorize` offers a `sort` keyword as well.
1465
1466 Examples
1467 --------
1468 >>> idx1 = pd.PeriodIndex(["2014-01", "2014-01", "2014-02", "2014-02",
1469 ... "2014-03", "2014-03"], freq="M")
1470 >>> arr, idx = idx1.factorize()
1471 >>> arr
1472 array([0, 0, 1, 1, 2, 2])
1473 >>> idx
1474 PeriodIndex(['2014-01', '2014-02', '2014-03'], dtype='period[M]')
1475 """
1476 # Implementer note: There are two ways to override the behavior of
1477 # pandas.factorize
1478 # 1. _values_for_factorize and _from_factorize.
1479 # Specify the values passed to pandas' internal factorization
1480 # routines, and how to convert from those values back to the
1481 # original ExtensionArray.
1482 # 2. ExtensionArray.factorize.
1483 # Complete control over factorization.
1484 arr, na_value = self._values_for_factorize()
1485
1486 codes, uniques = factorize_array(
1487 arr, use_na_sentinel=use_na_sentinel, na_value=na_value
1488 )
1489
1490 uniques_ea = self._from_factorized(uniques, self)
1491 return codes, uniques_ea
1492
1493 _extension_array_shared_docs[
1494 "repeat"
1495 ] = """
1496 Repeat elements of a %(klass)s.
1497
1498 Returns a new %(klass)s where each element of the current %(klass)s
1499 is repeated consecutively a given number of times.
1500
1501 Parameters
1502 ----------
1503 repeats : int or array of ints
1504 The number of repetitions for each element. This should be a
1505 non-negative integer. Repeating 0 times will return an empty
1506 %(klass)s.
1507 axis : None
1508 Must be ``None``. Has no effect but is accepted for compatibility
1509 with numpy.
1510
1511 Returns
1512 -------
1513 %(klass)s
1514 Newly created %(klass)s with repeated elements.
1515
1516 See Also
1517 --------
1518 Series.repeat : Equivalent function for Series.
1519 Index.repeat : Equivalent function for Index.
1520 numpy.repeat : Similar method for :class:`numpy.ndarray`.
1521 ExtensionArray.take : Take arbitrary positions.
1522
1523 Examples
1524 --------
1525 >>> cat = pd.Categorical(['a', 'b', 'c'])
1526 >>> cat
1527 ['a', 'b', 'c']
1528 Categories (3, object): ['a', 'b', 'c']
1529 >>> cat.repeat(2)
1530 ['a', 'a', 'b', 'b', 'c', 'c']
1531 Categories (3, object): ['a', 'b', 'c']
1532 >>> cat.repeat([1, 2, 3])
1533 ['a', 'b', 'b', 'c', 'c', 'c']
1534 Categories (3, object): ['a', 'b', 'c']
1535 """
1536
1537 @Substitution(klass="ExtensionArray")
1538 @Appender(_extension_array_shared_docs["repeat"])
1539 def repeat(self, repeats: int | Sequence[int], axis: AxisInt | None = None) -> Self:
1540 nv.validate_repeat((), {"axis": axis})
1541 ind = np.arange(len(self)).repeat(repeats)
1542 return self.take(ind)
1543
1544 # ------------------------------------------------------------------------
1545 # Indexing methods
1546 # ------------------------------------------------------------------------
1547
1548 def take(
1549 self,
1550 indices: TakeIndexer,
1551 *,
1552 allow_fill: bool = False,
1553 fill_value: Any = None,
1554 ) -> Self:
1555 """
1556 Take elements from an array.
1557
1558 Parameters
1559 ----------
1560 indices : sequence of int or one-dimensional np.ndarray of int
1561 Indices to be taken.
1562 allow_fill : bool, default False
1563 How to handle negative values in `indices`.
1564
1565 * False: negative values in `indices` indicate positional indices
1566 from the right (the default). This is similar to
1567 :func:`numpy.take`.
1568
1569 * True: negative values in `indices` indicate
1570 missing values. These values are set to `fill_value`. Any other
1571 other negative values raise a ``ValueError``.
1572
1573 fill_value : any, optional
1574 Fill value to use for NA-indices when `allow_fill` is True.
1575 This may be ``None``, in which case the default NA value for
1576 the type, ``self.dtype.na_value``, is used.
1577
1578 For many ExtensionArrays, there will be two representations of
1579 `fill_value`: a user-facing "boxed" scalar, and a low-level
1580 physical NA value. `fill_value` should be the user-facing version,
1581 and the implementation should handle translating that to the
1582 physical version for processing the take if necessary.
1583
1584 Returns
1585 -------
1586 ExtensionArray
1587
1588 Raises
1589 ------
1590 IndexError
1591 When the indices are out of bounds for the array.
1592 ValueError
1593 When `indices` contains negative values other than ``-1``
1594 and `allow_fill` is True.
1595
1596 See Also
1597 --------
1598 numpy.take : Take elements from an array along an axis.
1599 api.extensions.take : Take elements from an array.
1600
1601 Notes
1602 -----
1603 ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
1604 ``iloc``, when `indices` is a sequence of values. Additionally,
1605 it's called by :meth:`Series.reindex`, or any other method
1606 that causes realignment, with a `fill_value`.
1607
1608 Examples
1609 --------
1610 Here's an example implementation, which relies on casting the
1611 extension array to object dtype. This uses the helper method
1612 :func:`pandas.api.extensions.take`.
1613
1614 .. code-block:: python
1615
1616 def take(self, indices, allow_fill=False, fill_value=None):
1617 from pandas.core.algorithms import take
1618
1619 # If the ExtensionArray is backed by an ndarray, then
1620 # just pass that here instead of coercing to object.
1621 data = self.astype(object)
1622
1623 if allow_fill and fill_value is None:
1624 fill_value = self.dtype.na_value
1625
1626 # fill value should always be translated from the scalar
1627 # type for the array, to the physical storage type for
1628 # the data, before passing to take.
1629
1630 result = take(data, indices, fill_value=fill_value,
1631 allow_fill=allow_fill)
1632 return self._from_sequence(result, dtype=self.dtype)
1633 """
1634 # Implementer note: The `fill_value` parameter should be a user-facing
1635 # value, an instance of self.dtype.type. When passed `fill_value=None`,
1636 # the default of `self.dtype.na_value` should be used.
1637 # This may differ from the physical storage type your ExtensionArray
1638 # uses. In this case, your implementation is responsible for casting
1639 # the user-facing type to the storage type, before using
1640 # pandas.api.extensions.take
1641 raise AbstractMethodError(self)
1642
1643 def copy(self) -> Self:
1644 """
1645 Return a copy of the array.
1646
1647 Returns
1648 -------
1649 ExtensionArray
1650
1651 Examples
1652 --------
1653 >>> arr = pd.array([1, 2, 3])
1654 >>> arr2 = arr.copy()
1655 >>> arr[0] = 2
1656 >>> arr2
1657 <IntegerArray>
1658 [1, 2, 3]
1659 Length: 3, dtype: Int64
1660 """
1661 raise AbstractMethodError(self)
1662
1663 def view(self, dtype: Dtype | None = None) -> ArrayLike:
1664 """
1665 Return a view on the array.
1666
1667 Parameters
1668 ----------
1669 dtype : str, np.dtype, or ExtensionDtype, optional
1670 Default None.
1671
1672 Returns
1673 -------
1674 ExtensionArray or np.ndarray
1675 A view on the :class:`ExtensionArray`'s data.
1676
1677 Examples
1678 --------
1679 This gives view on the underlying data of an ``ExtensionArray`` and is not a
1680 copy. Modifications on either the view or the original ``ExtensionArray``
1681 will be reflectd on the underlying data:
1682
1683 >>> arr = pd.array([1, 2, 3])
1684 >>> arr2 = arr.view()
1685 >>> arr[0] = 2
1686 >>> arr2
1687 <IntegerArray>
1688 [2, 2, 3]
1689 Length: 3, dtype: Int64
1690 """
1691 # NB:
1692 # - This must return a *new* object referencing the same data, not self.
1693 # - The only case that *must* be implemented is with dtype=None,
1694 # giving a view with the same dtype as self.
1695 if dtype is not None:
1696 raise NotImplementedError(dtype)
1697 return self[:]
1698
1699 # ------------------------------------------------------------------------
1700 # Printing
1701 # ------------------------------------------------------------------------
1702
1703 def __repr__(self) -> str:
1704 if self.ndim > 1:
1705 return self._repr_2d()
1706
1707 from pandas.io.formats.printing import format_object_summary
1708
1709 # the short repr has no trailing newline, while the truncated
1710 # repr does. So we include a newline in our template, and strip
1711 # any trailing newlines from format_object_summary
1712 data = format_object_summary(
1713 self, self._formatter(), indent_for_name=False
1714 ).rstrip(", \n")
1715 class_name = f"<{type(self).__name__}>\n"
1716 footer = self._get_repr_footer()
1717 return f"{class_name}{data}\n{footer}"
1718
1719 def _get_repr_footer(self) -> str:
1720 # GH#24278
1721 if self.ndim > 1:
1722 return f"Shape: {self.shape}, dtype: {self.dtype}"
1723 return f"Length: {len(self)}, dtype: {self.dtype}"
1724
1725 def _repr_2d(self) -> str:
1726 from pandas.io.formats.printing import format_object_summary
1727
1728 # the short repr has no trailing newline, while the truncated
1729 # repr does. So we include a newline in our template, and strip
1730 # any trailing newlines from format_object_summary
1731 lines = [
1732 format_object_summary(x, self._formatter(), indent_for_name=False).rstrip(
1733 ", \n"
1734 )
1735 for x in self
1736 ]
1737 data = ",\n".join(lines)
1738 class_name = f"<{type(self).__name__}>"
1739 footer = self._get_repr_footer()
1740 return f"{class_name}\n[\n{data}\n]\n{footer}"
1741
1742 def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
1743 """
1744 Formatting function for scalar values.
1745
1746 This is used in the default '__repr__'. The returned formatting
1747 function receives instances of your scalar type.
1748
1749 Parameters
1750 ----------
1751 boxed : bool, default False
1752 An indicated for whether or not your array is being printed
1753 within a Series, DataFrame, or Index (True), or just by
1754 itself (False). This may be useful if you want scalar values
1755 to appear differently within a Series versus on its own (e.g.
1756 quoted or not).
1757
1758 Returns
1759 -------
1760 Callable[[Any], str]
1761 A callable that gets instances of the scalar type and
1762 returns a string. By default, :func:`repr` is used
1763 when ``boxed=False`` and :func:`str` is used when
1764 ``boxed=True``.
1765
1766 Examples
1767 --------
1768 >>> class MyExtensionArray(pd.arrays.NumpyExtensionArray):
1769 ... def _formatter(self, boxed=False):
1770 ... return lambda x: '*' + str(x) + '*' if boxed else repr(x) + '*'
1771 >>> MyExtensionArray(np.array([1, 2, 3, 4]))
1772 <MyExtensionArray>
1773 [1*, 2*, 3*, 4*]
1774 Length: 4, dtype: int64
1775 """
1776 if boxed:
1777 return str
1778 return repr
1779
1780 # ------------------------------------------------------------------------
1781 # Reshaping
1782 # ------------------------------------------------------------------------
1783
1784 def transpose(self, *axes: int) -> ExtensionArray:
1785 """
1786 Return a transposed view on this array.
1787
1788 Because ExtensionArrays are always 1D, this is a no-op. It is included
1789 for compatibility with np.ndarray.
1790
1791 Returns
1792 -------
1793 ExtensionArray
1794
1795 Examples
1796 --------
1797 >>> pd.array([1, 2, 3]).transpose()
1798 <IntegerArray>
1799 [1, 2, 3]
1800 Length: 3, dtype: Int64
1801 """
1802 return self[:]
1803
1804 @property
1805 def T(self) -> ExtensionArray:
1806 return self.transpose()
1807
1808 def ravel(self, order: Literal["C", "F", "A", "K"] | None = "C") -> ExtensionArray:
1809 """
1810 Return a flattened view on this array.
1811
1812 Parameters
1813 ----------
1814 order : {None, 'C', 'F', 'A', 'K'}, default 'C'
1815
1816 Returns
1817 -------
1818 ExtensionArray
1819
1820 Notes
1821 -----
1822 - Because ExtensionArrays are 1D-only, this is a no-op.
1823 - The "order" argument is ignored, is for compatibility with NumPy.
1824
1825 Examples
1826 --------
1827 >>> pd.array([1, 2, 3]).ravel()
1828 <IntegerArray>
1829 [1, 2, 3]
1830 Length: 3, dtype: Int64
1831 """
1832 return self
1833
1834 @classmethod
1835 def _concat_same_type(cls, to_concat: Sequence[Self]) -> Self:
1836 """
1837 Concatenate multiple array of this dtype.
1838
1839 Parameters
1840 ----------
1841 to_concat : sequence of this type
1842
1843 Returns
1844 -------
1845 ExtensionArray
1846
1847 Examples
1848 --------
1849 >>> arr1 = pd.array([1, 2, 3])
1850 >>> arr2 = pd.array([4, 5, 6])
1851 >>> pd.arrays.IntegerArray._concat_same_type([arr1, arr2])
1852 <IntegerArray>
1853 [1, 2, 3, 4, 5, 6]
1854 Length: 6, dtype: Int64
1855 """
1856 # Implementer note: this method will only be called with a sequence of
1857 # ExtensionArrays of this class and with the same dtype as self. This
1858 # should allow "easy" concatenation (no upcasting needed), and result
1859 # in a new ExtensionArray of the same dtype.
1860 # Note: this strict behaviour is only guaranteed starting with pandas 1.1
1861 raise AbstractMethodError(cls)
1862
1863 # The _can_hold_na attribute is set to True so that pandas internals
1864 # will use the ExtensionDtype.na_value as the NA value in operations
1865 # such as take(), reindex(), shift(), etc. In addition, those results
1866 # will then be of the ExtensionArray subclass rather than an array
1867 # of objects
1868 @cache_readonly
1869 def _can_hold_na(self) -> bool:
1870 return self.dtype._can_hold_na
1871
1872 def _accumulate(
1873 self, name: str, *, skipna: bool = True, **kwargs
1874 ) -> ExtensionArray:
1875 """
1876 Return an ExtensionArray performing an accumulation operation.
1877
1878 The underlying data type might change.
1879
1880 Parameters
1881 ----------
1882 name : str
1883 Name of the function, supported values are:
1884 - cummin
1885 - cummax
1886 - cumsum
1887 - cumprod
1888 skipna : bool, default True
1889 If True, skip NA values.
1890 **kwargs
1891 Additional keyword arguments passed to the accumulation function.
1892 Currently, there is no supported kwarg.
1893
1894 Returns
1895 -------
1896 array
1897
1898 Raises
1899 ------
1900 NotImplementedError : subclass does not define accumulations
1901
1902 Examples
1903 --------
1904 >>> arr = pd.array([1, 2, 3])
1905 >>> arr._accumulate(name='cumsum')
1906 <IntegerArray>
1907 [1, 3, 6]
1908 Length: 3, dtype: Int64
1909 """
1910 raise NotImplementedError(f"cannot perform {name} with type {self.dtype}")
1911
1912 def _reduce(
1913 self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs
1914 ):
1915 """
1916 Return a scalar result of performing the reduction operation.
1917
1918 Parameters
1919 ----------
1920 name : str
1921 Name of the function, supported values are:
1922 { any, all, min, max, sum, mean, median, prod,
1923 std, var, sem, kurt, skew }.
1924 skipna : bool, default True
1925 If True, skip NaN values.
1926 keepdims : bool, default False
1927 If False, a scalar is returned.
1928 If True, the result has dimension with size one along the reduced axis.
1929
1930 .. versionadded:: 2.1
1931
1932 This parameter is not required in the _reduce signature to keep backward
1933 compatibility, but will become required in the future. If the parameter
1934 is not found in the method signature, a FutureWarning will be emitted.
1935 **kwargs
1936 Additional keyword arguments passed to the reduction function.
1937 Currently, `ddof` is the only supported kwarg.
1938
1939 Returns
1940 -------
1941 scalar
1942
1943 Raises
1944 ------
1945 TypeError : subclass does not define reductions
1946
1947 Examples
1948 --------
1949 >>> pd.array([1, 2, 3])._reduce("min")
1950 1
1951 """
1952 meth = getattr(self, name, None)
1953 if meth is None:
1954 raise TypeError(
1955 f"'{type(self).__name__}' with dtype {self.dtype} "
1956 f"does not support reduction '{name}'"
1957 )
1958 result = meth(skipna=skipna, **kwargs)
1959 if keepdims:
1960 result = np.array([result])
1961
1962 return result
1963
1964 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318
1965 # Incompatible types in assignment (expression has type "None", base class
1966 # "object" defined the type as "Callable[[object], int]")
1967 __hash__: ClassVar[None] # type: ignore[assignment]
1968
1969 # ------------------------------------------------------------------------
1970 # Non-Optimized Default Methods; in the case of the private methods here,
1971 # these are not guaranteed to be stable across pandas versions.
1972
1973 def _values_for_json(self) -> np.ndarray:
1974 """
1975 Specify how to render our entries in to_json.
1976
1977 Notes
1978 -----
1979 The dtype on the returned ndarray is not restricted, but for non-native
1980 types that are not specifically handled in objToJSON.c, to_json is
1981 liable to raise. In these cases, it may be safer to return an ndarray
1982 of strings.
1983 """
1984 return np.asarray(self)
1985
1986 def _hash_pandas_object(
1987 self, *, encoding: str, hash_key: str, categorize: bool
1988 ) -> npt.NDArray[np.uint64]:
1989 """
1990 Hook for hash_pandas_object.
1991
1992 Default is to use the values returned by _values_for_factorize.
1993
1994 Parameters
1995 ----------
1996 encoding : str
1997 Encoding for data & key when strings.
1998 hash_key : str
1999 Hash_key for string key to encode.
2000 categorize : bool
2001 Whether to first categorize object arrays before hashing. This is more
2002 efficient when the array contains duplicate values.
2003
2004 Returns
2005 -------
2006 np.ndarray[uint64]
2007
2008 Examples
2009 --------
2010 >>> pd.array([1, 2])._hash_pandas_object(encoding='utf-8',
2011 ... hash_key="1000000000000000",
2012 ... categorize=False
2013 ... )
2014 array([ 6238072747940578789, 15839785061582574730], dtype=uint64)
2015 """
2016 from pandas.core.util.hashing import hash_array
2017
2018 values, _ = self._values_for_factorize()
2019 return hash_array(
2020 values, encoding=encoding, hash_key=hash_key, categorize=categorize
2021 )
2022
2023 def _explode(self) -> tuple[Self, npt.NDArray[np.uint64]]:
2024 """
2025 Transform each element of list-like to a row.
2026
2027 For arrays that do not contain list-like elements the default
2028 implementation of this method just returns a copy and an array
2029 of ones (unchanged index).
2030
2031 Returns
2032 -------
2033 ExtensionArray
2034 Array with the exploded values.
2035 np.ndarray[uint64]
2036 The original lengths of each list-like for determining the
2037 resulting index.
2038
2039 See Also
2040 --------
2041 Series.explode : The method on the ``Series`` object that this
2042 extension array method is meant to support.
2043
2044 Examples
2045 --------
2046 >>> import pyarrow as pa
2047 >>> a = pd.array([[1, 2, 3], [4], [5, 6]],
2048 ... dtype=pd.ArrowDtype(pa.list_(pa.int64())))
2049 >>> a._explode()
2050 (<ArrowExtensionArray>
2051 [1, 2, 3, 4, 5, 6]
2052 Length: 6, dtype: int64[pyarrow], array([3, 1, 2], dtype=int32))
2053 """
2054 values = self.copy()
2055 counts = np.ones(shape=(len(self),), dtype=np.uint64)
2056 return values, counts
2057
2058 def tolist(self) -> list:
2059 """
2060 Return a list of the values.
2061
2062 These are each a scalar type, which is a Python scalar
2063 (for str, int, float) or a pandas scalar
2064 (for Timestamp/Timedelta/Interval/Period)
2065
2066 Returns
2067 -------
2068 list
2069
2070 Examples
2071 --------
2072 >>> arr = pd.array([1, 2, 3])
2073 >>> arr.tolist()
2074 [1, 2, 3]
2075 """
2076 if self.ndim > 1:
2077 return [x.tolist() for x in self]
2078 return list(self)
2079
2080 def delete(self, loc: PositionalIndexer) -> Self:
2081 indexer = np.delete(np.arange(len(self)), loc)
2082 return self.take(indexer)
2083
2084 def insert(self, loc: int, item) -> Self:
2085 """
2086 Insert an item at the given position.
2087
2088 Parameters
2089 ----------
2090 loc : int
2091 item : scalar-like
2092
2093 Returns
2094 -------
2095 same type as self
2096
2097 Notes
2098 -----
2099 This method should be both type and dtype-preserving. If the item
2100 cannot be held in an array of this type/dtype, either ValueError or
2101 TypeError should be raised.
2102
2103 The default implementation relies on _from_sequence to raise on invalid
2104 items.
2105
2106 Examples
2107 --------
2108 >>> arr = pd.array([1, 2, 3])
2109 >>> arr.insert(2, -1)
2110 <IntegerArray>
2111 [1, 2, -1, 3]
2112 Length: 4, dtype: Int64
2113 """
2114 loc = validate_insert_loc(loc, len(self))
2115
2116 item_arr = type(self)._from_sequence([item], dtype=self.dtype)
2117
2118 return type(self)._concat_same_type([self[:loc], item_arr, self[loc:]])
2119
2120 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
2121 """
2122 Analogue to np.putmask(self, mask, value)
2123
2124 Parameters
2125 ----------
2126 mask : np.ndarray[bool]
2127 value : scalar or listlike
2128 If listlike, must be arraylike with same length as self.
2129
2130 Returns
2131 -------
2132 None
2133
2134 Notes
2135 -----
2136 Unlike np.putmask, we do not repeat listlike values with mismatched length.
2137 'value' should either be a scalar or an arraylike with the same length
2138 as self.
2139 """
2140 if is_list_like(value):
2141 val = value[mask]
2142 else:
2143 val = value
2144
2145 self[mask] = val
2146
2147 def _where(self, mask: npt.NDArray[np.bool_], value) -> Self:
2148 """
2149 Analogue to np.where(mask, self, value)
2150
2151 Parameters
2152 ----------
2153 mask : np.ndarray[bool]
2154 value : scalar or listlike
2155
2156 Returns
2157 -------
2158 same type as self
2159 """
2160 result = self.copy()
2161
2162 if is_list_like(value):
2163 val = value[~mask]
2164 else:
2165 val = value
2166
2167 result[~mask] = val
2168 return result
2169
2170 # TODO(3.0): this can be removed once GH#33302 deprecation is enforced
2171 def _fill_mask_inplace(
2172 self, method: str, limit: int | None, mask: npt.NDArray[np.bool_]
2173 ) -> None:
2174 """
2175 Replace values in locations specified by 'mask' using pad or backfill.
2176
2177 See also
2178 --------
2179 ExtensionArray.fillna
2180 """
2181 func = missing.get_fill_func(method)
2182 npvalues = self.astype(object)
2183 # NB: if we don't copy mask here, it may be altered inplace, which
2184 # would mess up the `self[mask] = ...` below.
2185 func(npvalues, limit=limit, mask=mask.copy())
2186 new_values = self._from_sequence(npvalues, dtype=self.dtype)
2187 self[mask] = new_values[mask]
2188
2189 def _rank(
2190 self,
2191 *,
2192 axis: AxisInt = 0,
2193 method: str = "average",
2194 na_option: str = "keep",
2195 ascending: bool = True,
2196 pct: bool = False,
2197 ):
2198 """
2199 See Series.rank.__doc__.
2200 """
2201 if axis != 0:
2202 raise NotImplementedError
2203
2204 return rank(
2205 self._values_for_argsort(),
2206 axis=axis,
2207 method=method,
2208 na_option=na_option,
2209 ascending=ascending,
2210 pct=pct,
2211 )
2212
2213 @classmethod
2214 def _empty(cls, shape: Shape, dtype: ExtensionDtype):
2215 """
2216 Create an ExtensionArray with the given shape and dtype.
2217
2218 See also
2219 --------
2220 ExtensionDtype.empty
2221 ExtensionDtype.empty is the 'official' public version of this API.
2222 """
2223 # Implementer note: while ExtensionDtype.empty is the public way to
2224 # call this method, it is still required to implement this `_empty`
2225 # method as well (it is called internally in pandas)
2226 obj = cls._from_sequence([], dtype=dtype)
2227
2228 taker = np.broadcast_to(np.intp(-1), shape)
2229 result = obj.take(taker, allow_fill=True)
2230 if not isinstance(result, cls) or dtype != result.dtype:
2231 raise NotImplementedError(
2232 f"Default 'empty' implementation is invalid for dtype='{dtype}'"
2233 )
2234 return result
2235
2236 def _quantile(self, qs: npt.NDArray[np.float64], interpolation: str) -> Self:
2237 """
2238 Compute the quantiles of self for each quantile in `qs`.
2239
2240 Parameters
2241 ----------
2242 qs : np.ndarray[float64]
2243 interpolation: str
2244
2245 Returns
2246 -------
2247 same type as self
2248 """
2249 mask = np.asarray(self.isna())
2250 arr = np.asarray(self)
2251 fill_value = np.nan
2252
2253 res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation)
2254 return type(self)._from_sequence(res_values)
2255
2256 def _mode(self, dropna: bool = True) -> Self:
2257 """
2258 Returns the mode(s) of the ExtensionArray.
2259
2260 Always returns `ExtensionArray` even if only one value.
2261
2262 Parameters
2263 ----------
2264 dropna : bool, default True
2265 Don't consider counts of NA values.
2266
2267 Returns
2268 -------
2269 same type as self
2270 Sorted, if possible.
2271 """
2272 # error: Incompatible return value type (got "Union[ExtensionArray,
2273 # ndarray[Any, Any]]", expected "Self")
2274 return mode(self, dropna=dropna) # type: ignore[return-value]
2275
2276 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
2277 if any(
2278 isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs
2279 ):
2280 return NotImplemented
2281
2282 result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
2283 self, ufunc, method, *inputs, **kwargs
2284 )
2285 if result is not NotImplemented:
2286 return result
2287
2288 if "out" in kwargs:
2289 return arraylike.dispatch_ufunc_with_out(
2290 self, ufunc, method, *inputs, **kwargs
2291 )
2292
2293 if method == "reduce":
2294 result = arraylike.dispatch_reduction_ufunc(
2295 self, ufunc, method, *inputs, **kwargs
2296 )
2297 if result is not NotImplemented:
2298 return result
2299
2300 return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)
2301
2302 def map(self, mapper, na_action=None):
2303 """
2304 Map values using an input mapping or function.
2305
2306 Parameters
2307 ----------
2308 mapper : function, dict, or Series
2309 Mapping correspondence.
2310 na_action : {None, 'ignore'}, default None
2311 If 'ignore', propagate NA values, without passing them to the
2312 mapping correspondence. If 'ignore' is not supported, a
2313 ``NotImplementedError`` should be raised.
2314
2315 Returns
2316 -------
2317 Union[ndarray, Index, ExtensionArray]
2318 The output of the mapping function applied to the array.
2319 If the function returns a tuple with more than one element
2320 a MultiIndex will be returned.
2321 """
2322 return map_array(self, mapper, na_action=na_action)
2323
2324 # ------------------------------------------------------------------------
2325 # GroupBy Methods
2326
2327 def _groupby_op(
2328 self,
2329 *,
2330 how: str,
2331 has_dropped_na: bool,
2332 min_count: int,
2333 ngroups: int,
2334 ids: npt.NDArray[np.intp],
2335 **kwargs,
2336 ) -> ArrayLike:
2337 """
2338 Dispatch GroupBy reduction or transformation operation.
2339
2340 This is an *experimental* API to allow ExtensionArray authors to implement
2341 reductions and transformations. The API is subject to change.
2342
2343 Parameters
2344 ----------
2345 how : {'any', 'all', 'sum', 'prod', 'min', 'max', 'mean', 'median',
2346 'median', 'var', 'std', 'sem', 'nth', 'last', 'ohlc',
2347 'cumprod', 'cumsum', 'cummin', 'cummax', 'rank'}
2348 has_dropped_na : bool
2349 min_count : int
2350 ngroups : int
2351 ids : np.ndarray[np.intp]
2352 ids[i] gives the integer label for the group that self[i] belongs to.
2353 **kwargs : operation-specific
2354 'any', 'all' -> ['skipna']
2355 'var', 'std', 'sem' -> ['ddof']
2356 'cumprod', 'cumsum', 'cummin', 'cummax' -> ['skipna']
2357 'rank' -> ['ties_method', 'ascending', 'na_option', 'pct']
2358
2359 Returns
2360 -------
2361 np.ndarray or ExtensionArray
2362 """
2363 from pandas.core.arrays.string_ import StringDtype
2364 from pandas.core.groupby.ops import WrappedCythonOp
2365
2366 kind = WrappedCythonOp.get_kind_from_how(how)
2367 op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na)
2368
2369 # GH#43682
2370 if isinstance(self.dtype, StringDtype):
2371 # StringArray
2372 if op.how not in ["any", "all"]:
2373 # Fail early to avoid conversion to object
2374 op._get_cython_function(op.kind, op.how, np.dtype(object), False)
2375 npvalues = self.to_numpy(object, na_value=np.nan)
2376 else:
2377 raise NotImplementedError(
2378 f"function is not implemented for this dtype: {self.dtype}"
2379 )
2380
2381 res_values = op._cython_op_ndim_compat(
2382 npvalues,
2383 min_count=min_count,
2384 ngroups=ngroups,
2385 comp_ids=ids,
2386 mask=None,
2387 **kwargs,
2388 )
2389
2390 if op.how in op.cast_blocklist:
2391 # i.e. how in ["rank"], since other cast_blocklist methods don't go
2392 # through cython_operation
2393 return res_values
2394
2395 if isinstance(self.dtype, StringDtype):
2396 dtype = self.dtype
2397 string_array_cls = dtype.construct_array_type()
2398 return string_array_cls._from_sequence(res_values, dtype=dtype)
2399
2400 else:
2401 raise NotImplementedError
2402
2403
2404class ExtensionArraySupportsAnyAll(ExtensionArray):
2405 def any(self, *, skipna: bool = True) -> bool:
2406 raise AbstractMethodError(self)
2407
2408 def all(self, *, skipna: bool = True) -> bool:
2409 raise AbstractMethodError(self)
2410
2411
2412class ExtensionOpsMixin:
2413 """
2414 A base class for linking the operators to their dunder names.
2415
2416 .. note::
2417
2418 You may want to set ``__array_priority__`` if you want your
2419 implementation to be called when involved in binary operations
2420 with NumPy arrays.
2421 """
2422
2423 @classmethod
2424 def _create_arithmetic_method(cls, op):
2425 raise AbstractMethodError(cls)
2426
2427 @classmethod
2428 def _add_arithmetic_ops(cls) -> None:
2429 setattr(cls, "__add__", cls._create_arithmetic_method(operator.add))
2430 setattr(cls, "__radd__", cls._create_arithmetic_method(roperator.radd))
2431 setattr(cls, "__sub__", cls._create_arithmetic_method(operator.sub))
2432 setattr(cls, "__rsub__", cls._create_arithmetic_method(roperator.rsub))
2433 setattr(cls, "__mul__", cls._create_arithmetic_method(operator.mul))
2434 setattr(cls, "__rmul__", cls._create_arithmetic_method(roperator.rmul))
2435 setattr(cls, "__pow__", cls._create_arithmetic_method(operator.pow))
2436 setattr(cls, "__rpow__", cls._create_arithmetic_method(roperator.rpow))
2437 setattr(cls, "__mod__", cls._create_arithmetic_method(operator.mod))
2438 setattr(cls, "__rmod__", cls._create_arithmetic_method(roperator.rmod))
2439 setattr(cls, "__floordiv__", cls._create_arithmetic_method(operator.floordiv))
2440 setattr(
2441 cls, "__rfloordiv__", cls._create_arithmetic_method(roperator.rfloordiv)
2442 )
2443 setattr(cls, "__truediv__", cls._create_arithmetic_method(operator.truediv))
2444 setattr(cls, "__rtruediv__", cls._create_arithmetic_method(roperator.rtruediv))
2445 setattr(cls, "__divmod__", cls._create_arithmetic_method(divmod))
2446 setattr(cls, "__rdivmod__", cls._create_arithmetic_method(roperator.rdivmod))
2447
2448 @classmethod
2449 def _create_comparison_method(cls, op):
2450 raise AbstractMethodError(cls)
2451
2452 @classmethod
2453 def _add_comparison_ops(cls) -> None:
2454 setattr(cls, "__eq__", cls._create_comparison_method(operator.eq))
2455 setattr(cls, "__ne__", cls._create_comparison_method(operator.ne))
2456 setattr(cls, "__lt__", cls._create_comparison_method(operator.lt))
2457 setattr(cls, "__gt__", cls._create_comparison_method(operator.gt))
2458 setattr(cls, "__le__", cls._create_comparison_method(operator.le))
2459 setattr(cls, "__ge__", cls._create_comparison_method(operator.ge))
2460
2461 @classmethod
2462 def _create_logical_method(cls, op):
2463 raise AbstractMethodError(cls)
2464
2465 @classmethod
2466 def _add_logical_ops(cls) -> None:
2467 setattr(cls, "__and__", cls._create_logical_method(operator.and_))
2468 setattr(cls, "__rand__", cls._create_logical_method(roperator.rand_))
2469 setattr(cls, "__or__", cls._create_logical_method(operator.or_))
2470 setattr(cls, "__ror__", cls._create_logical_method(roperator.ror_))
2471 setattr(cls, "__xor__", cls._create_logical_method(operator.xor))
2472 setattr(cls, "__rxor__", cls._create_logical_method(roperator.rxor))
2473
2474
2475class ExtensionScalarOpsMixin(ExtensionOpsMixin):
2476 """
2477 A mixin for defining ops on an ExtensionArray.
2478
2479 It is assumed that the underlying scalar objects have the operators
2480 already defined.
2481
2482 Notes
2483 -----
2484 If you have defined a subclass MyExtensionArray(ExtensionArray), then
2485 use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to
2486 get the arithmetic operators. After the definition of MyExtensionArray,
2487 insert the lines
2488
2489 MyExtensionArray._add_arithmetic_ops()
2490 MyExtensionArray._add_comparison_ops()
2491
2492 to link the operators to your class.
2493
2494 .. note::
2495
2496 You may want to set ``__array_priority__`` if you want your
2497 implementation to be called when involved in binary operations
2498 with NumPy arrays.
2499 """
2500
2501 @classmethod
2502 def _create_method(cls, op, coerce_to_dtype: bool = True, result_dtype=None):
2503 """
2504 A class method that returns a method that will correspond to an
2505 operator for an ExtensionArray subclass, by dispatching to the
2506 relevant operator defined on the individual elements of the
2507 ExtensionArray.
2508
2509 Parameters
2510 ----------
2511 op : function
2512 An operator that takes arguments op(a, b)
2513 coerce_to_dtype : bool, default True
2514 boolean indicating whether to attempt to convert
2515 the result to the underlying ExtensionArray dtype.
2516 If it's not possible to create a new ExtensionArray with the
2517 values, an ndarray is returned instead.
2518
2519 Returns
2520 -------
2521 Callable[[Any, Any], Union[ndarray, ExtensionArray]]
2522 A method that can be bound to a class. When used, the method
2523 receives the two arguments, one of which is the instance of
2524 this class, and should return an ExtensionArray or an ndarray.
2525
2526 Returning an ndarray may be necessary when the result of the
2527 `op` cannot be stored in the ExtensionArray. The dtype of the
2528 ndarray uses NumPy's normal inference rules.
2529
2530 Examples
2531 --------
2532 Given an ExtensionArray subclass called MyExtensionArray, use
2533
2534 __add__ = cls._create_method(operator.add)
2535
2536 in the class definition of MyExtensionArray to create the operator
2537 for addition, that will be based on the operator implementation
2538 of the underlying elements of the ExtensionArray
2539 """
2540
2541 def _binop(self, other):
2542 def convert_values(param):
2543 if isinstance(param, ExtensionArray) or is_list_like(param):
2544 ovalues = param
2545 else: # Assume its an object
2546 ovalues = [param] * len(self)
2547 return ovalues
2548
2549 if isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)):
2550 # rely on pandas to unbox and dispatch to us
2551 return NotImplemented
2552
2553 lvalues = self
2554 rvalues = convert_values(other)
2555
2556 # If the operator is not defined for the underlying objects,
2557 # a TypeError should be raised
2558 res = [op(a, b) for (a, b) in zip(lvalues, rvalues)]
2559
2560 def _maybe_convert(arr):
2561 if coerce_to_dtype:
2562 # https://github.com/pandas-dev/pandas/issues/22850
2563 # We catch all regular exceptions here, and fall back
2564 # to an ndarray.
2565 res = maybe_cast_pointwise_result(arr, self.dtype, same_dtype=False)
2566 if not isinstance(res, type(self)):
2567 # exception raised in _from_sequence; ensure we have ndarray
2568 res = np.asarray(arr)
2569 else:
2570 res = np.asarray(arr, dtype=result_dtype)
2571 return res
2572
2573 if op.__name__ in {"divmod", "rdivmod"}:
2574 a, b = zip(*res)
2575 return _maybe_convert(a), _maybe_convert(b)
2576
2577 return _maybe_convert(res)
2578
2579 op_name = f"__{op.__name__}__"
2580 return set_function_name(_binop, op_name, cls)
2581
2582 @classmethod
2583 def _create_arithmetic_method(cls, op):
2584 return cls._create_method(op)
2585
2586 @classmethod
2587 def _create_comparison_method(cls, op):
2588 return cls._create_method(op, coerce_to_dtype=False, result_dtype=bool)