1"""
2An interface for extending pandas with custom arrays.
3
4.. warning::
5
6 This is an experimental API and subject to breaking changes
7 without warning.
8"""
9from __future__ import annotations
10
11import operator
12from typing import (
13 TYPE_CHECKING,
14 Any,
15 Callable,
16 ClassVar,
17 Iterator,
18 Literal,
19 Sequence,
20 TypeVar,
21 cast,
22 overload,
23)
24
25import numpy as np
26
27from pandas._libs import lib
28from pandas._typing import (
29 ArrayLike,
30 AstypeArg,
31 AxisInt,
32 Dtype,
33 FillnaOptions,
34 PositionalIndexer,
35 ScalarIndexer,
36 SequenceIndexer,
37 Shape,
38 SortKind,
39 TakeIndexer,
40 npt,
41)
42from pandas.compat import set_function_name
43from pandas.compat.numpy import function as nv
44from pandas.errors import AbstractMethodError
45from pandas.util._decorators import (
46 Appender,
47 Substitution,
48 cache_readonly,
49)
50from pandas.util._validators import (
51 validate_bool_kwarg,
52 validate_fillna_kwargs,
53 validate_insert_loc,
54)
55
56from pandas.core.dtypes.cast import maybe_cast_to_extension_array
57from pandas.core.dtypes.common import (
58 is_datetime64_dtype,
59 is_dtype_equal,
60 is_list_like,
61 is_scalar,
62 is_timedelta64_dtype,
63 pandas_dtype,
64)
65from pandas.core.dtypes.dtypes import ExtensionDtype
66from pandas.core.dtypes.generic import (
67 ABCDataFrame,
68 ABCIndex,
69 ABCSeries,
70)
71from pandas.core.dtypes.missing import isna
72
73from pandas.core import (
74 arraylike,
75 missing,
76 roperator,
77)
78from pandas.core.algorithms import (
79 factorize_array,
80 isin,
81 mode,
82 rank,
83 unique,
84)
85from pandas.core.array_algos.quantile import quantile_with_mask
86from pandas.core.sorting import (
87 nargminmax,
88 nargsort,
89)
90
91if TYPE_CHECKING:
92 from pandas._typing import (
93 NumpySorter,
94 NumpyValueArrayLike,
95 )
96
97_extension_array_shared_docs: dict[str, str] = {}
98
99ExtensionArrayT = TypeVar("ExtensionArrayT", bound="ExtensionArray")
100
101
102class ExtensionArray:
103 """
104 Abstract base class for custom 1-D array types.
105
106 pandas will recognize instances of this class as proper arrays
107 with a custom type and will not attempt to coerce them to objects. They
108 may be stored directly inside a :class:`DataFrame` or :class:`Series`.
109
110 Attributes
111 ----------
112 dtype
113 nbytes
114 ndim
115 shape
116
117 Methods
118 -------
119 argsort
120 astype
121 copy
122 dropna
123 factorize
124 fillna
125 equals
126 insert
127 isin
128 isna
129 ravel
130 repeat
131 searchsorted
132 shift
133 take
134 tolist
135 unique
136 view
137 _accumulate
138 _concat_same_type
139 _formatter
140 _from_factorized
141 _from_sequence
142 _from_sequence_of_strings
143 _reduce
144 _values_for_argsort
145 _values_for_factorize
146
147 Notes
148 -----
149 The interface includes the following abstract methods that must be
150 implemented by subclasses:
151
152 * _from_sequence
153 * _from_factorized
154 * __getitem__
155 * __len__
156 * __eq__
157 * dtype
158 * nbytes
159 * isna
160 * take
161 * copy
162 * _concat_same_type
163
164 A default repr displaying the type, (truncated) data, length,
165 and dtype is provided. It can be customized or replaced by
166 by overriding:
167
168 * __repr__ : A default repr for the ExtensionArray.
169 * _formatter : Print scalars inside a Series or DataFrame.
170
171 Some methods require casting the ExtensionArray to an ndarray of Python
172 objects with ``self.astype(object)``, which may be expensive. When
173 performance is a concern, we highly recommend overriding the following
174 methods:
175
176 * fillna
177 * dropna
178 * unique
179 * factorize / _values_for_factorize
180 * argsort, argmax, argmin / _values_for_argsort
181 * searchsorted
182
183 The remaining methods implemented on this class should be performant,
184 as they only compose abstract methods. Still, a more efficient
185 implementation may be available, and these methods can be overridden.
186
187 One can implement methods to handle array accumulations or reductions.
188
189 * _accumulate
190 * _reduce
191
192 One can implement methods to handle parsing from strings that will be used
193 in methods such as ``pandas.io.parsers.read_csv``.
194
195 * _from_sequence_of_strings
196
197 This class does not inherit from 'abc.ABCMeta' for performance reasons.
198 Methods and properties required by the interface raise
199 ``pandas.errors.AbstractMethodError`` and no ``register`` method is
200 provided for registering virtual subclasses.
201
202 ExtensionArrays are limited to 1 dimension.
203
204 They may be backed by none, one, or many NumPy arrays. For example,
205 ``pandas.Categorical`` is an extension array backed by two arrays,
206 one for codes and one for categories. An array of IPv6 address may
207 be backed by a NumPy structured array with two fields, one for the
208 lower 64 bits and one for the upper 64 bits. Or they may be backed
209 by some other storage type, like Python lists. Pandas makes no
210 assumptions on how the data are stored, just that it can be converted
211 to a NumPy array.
212 The ExtensionArray interface does not impose any rules on how this data
213 is stored. However, currently, the backing data cannot be stored in
214 attributes called ``.values`` or ``._values`` to ensure full compatibility
215 with pandas internals. But other names as ``.data``, ``._data``,
216 ``._items``, ... can be freely used.
217
218 If implementing NumPy's ``__array_ufunc__`` interface, pandas expects
219 that
220
221 1. You defer by returning ``NotImplemented`` when any Series are present
222 in `inputs`. Pandas will extract the arrays and call the ufunc again.
223 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class.
224 Pandas inspect this to determine whether the ufunc is valid for the
225 types present.
226
227 See :ref:`extending.extension.ufunc` for more.
228
229 By default, ExtensionArrays are not hashable. Immutable subclasses may
230 override this behavior.
231 """
232
233 # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray.
234 # Don't override this.
235 _typ = "extension"
236
237 # ------------------------------------------------------------------------
238 # Constructors
239 # ------------------------------------------------------------------------
240
241 @classmethod
242 def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):
243 """
244 Construct a new ExtensionArray from a sequence of scalars.
245
246 Parameters
247 ----------
248 scalars : Sequence
249 Each element will be an instance of the scalar type for this
250 array, ``cls.dtype.type`` or be converted into this type in this method.
251 dtype : dtype, optional
252 Construct for this particular dtype. This should be a Dtype
253 compatible with the ExtensionArray.
254 copy : bool, default False
255 If True, copy the underlying data.
256
257 Returns
258 -------
259 ExtensionArray
260 """
261 raise AbstractMethodError(cls)
262
263 @classmethod
264 def _from_sequence_of_strings(
265 cls, strings, *, dtype: Dtype | None = None, copy: bool = False
266 ):
267 """
268 Construct a new ExtensionArray from a sequence of strings.
269
270 Parameters
271 ----------
272 strings : Sequence
273 Each element will be an instance of the scalar type for this
274 array, ``cls.dtype.type``.
275 dtype : dtype, optional
276 Construct for this particular dtype. This should be a Dtype
277 compatible with the ExtensionArray.
278 copy : bool, default False
279 If True, copy the underlying data.
280
281 Returns
282 -------
283 ExtensionArray
284 """
285 raise AbstractMethodError(cls)
286
287 @classmethod
288 def _from_factorized(cls, values, original):
289 """
290 Reconstruct an ExtensionArray after factorization.
291
292 Parameters
293 ----------
294 values : ndarray
295 An integer ndarray with the factorized values.
296 original : ExtensionArray
297 The original ExtensionArray that factorize was called on.
298
299 See Also
300 --------
301 factorize : Top-level factorize method that dispatches here.
302 ExtensionArray.factorize : Encode the extension array as an enumerated type.
303 """
304 raise AbstractMethodError(cls)
305
306 # ------------------------------------------------------------------------
307 # Must be a Sequence
308 # ------------------------------------------------------------------------
309 @overload
310 def __getitem__(self, item: ScalarIndexer) -> Any:
311 ...
312
313 @overload
314 def __getitem__(self: ExtensionArrayT, item: SequenceIndexer) -> ExtensionArrayT:
315 ...
316
317 def __getitem__(
318 self: ExtensionArrayT, item: PositionalIndexer
319 ) -> ExtensionArrayT | Any:
320 """
321 Select a subset of self.
322
323 Parameters
324 ----------
325 item : int, slice, or ndarray
326 * int: The position in 'self' to get.
327
328 * slice: A slice object, where 'start', 'stop', and 'step' are
329 integers or None
330
331 * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'
332
333 * list[int]: A list of int
334
335 Returns
336 -------
337 item : scalar or ExtensionArray
338
339 Notes
340 -----
341 For scalar ``item``, return a scalar value suitable for the array's
342 type. This should be an instance of ``self.dtype.type``.
343
344 For slice ``key``, return an instance of ``ExtensionArray``, even
345 if the slice is length 0 or 1.
346
347 For a boolean mask, return an instance of ``ExtensionArray``, filtered
348 to the values where ``item`` is True.
349 """
350 raise AbstractMethodError(self)
351
352 def __setitem__(self, key, value) -> None:
353 """
354 Set one or more values inplace.
355
356 This method is not required to satisfy the pandas extension array
357 interface.
358
359 Parameters
360 ----------
361 key : int, ndarray, or slice
362 When called from, e.g. ``Series.__setitem__``, ``key`` will be
363 one of
364
365 * scalar int
366 * ndarray of integers.
367 * boolean ndarray
368 * slice object
369
370 value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object
371 value or values to be set of ``key``.
372
373 Returns
374 -------
375 None
376 """
377 # Some notes to the ExtensionArray implementor who may have ended up
378 # here. While this method is not required for the interface, if you
379 # *do* choose to implement __setitem__, then some semantics should be
380 # observed:
381 #
382 # * Setting multiple values : ExtensionArrays should support setting
383 # multiple values at once, 'key' will be a sequence of integers and
384 # 'value' will be a same-length sequence.
385 #
386 # * Broadcasting : For a sequence 'key' and a scalar 'value',
387 # each position in 'key' should be set to 'value'.
388 #
389 # * Coercion : Most users will expect basic coercion to work. For
390 # example, a string like '2018-01-01' is coerced to a datetime
391 # when setting on a datetime64ns array. In general, if the
392 # __init__ method coerces that value, then so should __setitem__
393 # Note, also, that Series/DataFrame.where internally use __setitem__
394 # on a copy of the data.
395 raise NotImplementedError(f"{type(self)} does not implement __setitem__.")
396
397 def __len__(self) -> int:
398 """
399 Length of this array
400
401 Returns
402 -------
403 length : int
404 """
405 raise AbstractMethodError(self)
406
407 def __iter__(self) -> Iterator[Any]:
408 """
409 Iterate over elements of the array.
410 """
411 # This needs to be implemented so that pandas recognizes extension
412 # arrays as list-like. The default implementation makes successive
413 # calls to ``__getitem__``, which may be slower than necessary.
414 for i in range(len(self)):
415 yield self[i]
416
417 def __contains__(self, item: object) -> bool | np.bool_:
418 """
419 Return for `item in self`.
420 """
421 # GH37867
422 # comparisons of any item to pd.NA always return pd.NA, so e.g. "a" in [pd.NA]
423 # would raise a TypeError. The implementation below works around that.
424 if is_scalar(item) and isna(item):
425 if not self._can_hold_na:
426 return False
427 elif item is self.dtype.na_value or isinstance(item, self.dtype.type):
428 return self._hasna
429 else:
430 return False
431 else:
432 # error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no
433 # attribute "any"
434 return (item == self).any() # type: ignore[union-attr]
435
436 # error: Signature of "__eq__" incompatible with supertype "object"
437 def __eq__(self, other: Any) -> ArrayLike: # type: ignore[override]
438 """
439 Return for `self == other` (element-wise equality).
440 """
441 # Implementer note: this should return a boolean numpy ndarray or
442 # a boolean ExtensionArray.
443 # When `other` is one of Series, Index, or DataFrame, this method should
444 # return NotImplemented (to ensure that those objects are responsible for
445 # first unpacking the arrays, and then dispatch the operation to the
446 # underlying arrays)
447 raise AbstractMethodError(self)
448
449 # error: Signature of "__ne__" incompatible with supertype "object"
450 def __ne__(self, other: Any) -> ArrayLike: # type: ignore[override]
451 """
452 Return for `self != other` (element-wise in-equality).
453 """
454 return ~(self == other)
455
456 def to_numpy(
457 self,
458 dtype: npt.DTypeLike | None = None,
459 copy: bool = False,
460 na_value: object = lib.no_default,
461 ) -> np.ndarray:
462 """
463 Convert to a NumPy ndarray.
464
465 This is similar to :meth:`numpy.asarray`, but may provide additional control
466 over how the conversion is done.
467
468 Parameters
469 ----------
470 dtype : str or numpy.dtype, optional
471 The dtype to pass to :meth:`numpy.asarray`.
472 copy : bool, default False
473 Whether to ensure that the returned value is a not a view on
474 another array. Note that ``copy=False`` does not *ensure* that
475 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
476 a copy is made, even if not strictly necessary.
477 na_value : Any, optional
478 The value to use for missing values. The default value depends
479 on `dtype` and the type of the array.
480
481 Returns
482 -------
483 numpy.ndarray
484 """
485 result = np.asarray(self, dtype=dtype)
486 if copy or na_value is not lib.no_default:
487 result = result.copy()
488 if na_value is not lib.no_default:
489 result[self.isna()] = na_value
490 return result
491
492 # ------------------------------------------------------------------------
493 # Required attributes
494 # ------------------------------------------------------------------------
495
496 @property
497 def dtype(self) -> ExtensionDtype:
498 """
499 An instance of 'ExtensionDtype'.
500 """
501 raise AbstractMethodError(self)
502
503 @property
504 def shape(self) -> Shape:
505 """
506 Return a tuple of the array dimensions.
507 """
508 return (len(self),)
509
510 @property
511 def size(self) -> int:
512 """
513 The number of elements in the array.
514 """
515 # error: Incompatible return value type (got "signedinteger[_64Bit]",
516 # expected "int") [return-value]
517 return np.prod(self.shape) # type: ignore[return-value]
518
519 @property
520 def ndim(self) -> int:
521 """
522 Extension Arrays are only allowed to be 1-dimensional.
523 """
524 return 1
525
526 @property
527 def nbytes(self) -> int:
528 """
529 The number of bytes needed to store this object in memory.
530 """
531 # If this is expensive to compute, return an approximate lower bound
532 # on the number of bytes needed.
533 raise AbstractMethodError(self)
534
535 # ------------------------------------------------------------------------
536 # Additional Methods
537 # ------------------------------------------------------------------------
538
539 @overload
540 def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:
541 ...
542
543 @overload
544 def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:
545 ...
546
547 @overload
548 def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:
549 ...
550
551 def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:
552 """
553 Cast to a NumPy array or ExtensionArray with 'dtype'.
554
555 Parameters
556 ----------
557 dtype : str or dtype
558 Typecode or data-type to which the array is cast.
559 copy : bool, default True
560 Whether to copy the data, even if not necessary. If False,
561 a copy is made only if the old dtype does not match the
562 new dtype.
563
564 Returns
565 -------
566 np.ndarray or pandas.api.extensions.ExtensionArray
567 An ExtensionArray if dtype is ExtensionDtype,
568 Otherwise a NumPy ndarray with 'dtype' for its dtype.
569 """
570
571 dtype = pandas_dtype(dtype)
572 if is_dtype_equal(dtype, self.dtype):
573 if not copy:
574 return self
575 else:
576 return self.copy()
577
578 if isinstance(dtype, ExtensionDtype):
579 cls = dtype.construct_array_type()
580 return cls._from_sequence(self, dtype=dtype, copy=copy)
581
582 elif is_datetime64_dtype(dtype):
583 from pandas.core.arrays import DatetimeArray
584
585 return DatetimeArray._from_sequence(self, dtype=dtype, copy=copy)
586
587 elif is_timedelta64_dtype(dtype):
588 from pandas.core.arrays import TimedeltaArray
589
590 return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy)
591
592 return np.array(self, dtype=dtype, copy=copy)
593
594 def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll:
595 """
596 A 1-D array indicating if each value is missing.
597
598 Returns
599 -------
600 numpy.ndarray or pandas.api.extensions.ExtensionArray
601 In most cases, this should return a NumPy ndarray. For
602 exceptional cases like ``SparseArray``, where returning
603 an ndarray would be expensive, an ExtensionArray may be
604 returned.
605
606 Notes
607 -----
608 If returning an ExtensionArray, then
609
610 * ``na_values._is_boolean`` should be True
611 * `na_values` should implement :func:`ExtensionArray._reduce`
612 * ``na_values.any`` and ``na_values.all`` should be implemented
613 """
614 raise AbstractMethodError(self)
615
616 @property
617 def _hasna(self) -> bool:
618 # GH#22680
619 """
620 Equivalent to `self.isna().any()`.
621
622 Some ExtensionArray subclasses may be able to optimize this check.
623 """
624 return bool(self.isna().any())
625
626 def _values_for_argsort(self) -> np.ndarray:
627 """
628 Return values for sorting.
629
630 Returns
631 -------
632 ndarray
633 The transformed values should maintain the ordering between values
634 within the array.
635
636 See Also
637 --------
638 ExtensionArray.argsort : Return the indices that would sort this array.
639
640 Notes
641 -----
642 The caller is responsible for *not* modifying these values in-place, so
643 it is safe for implementors to give views on `self`.
644
645 Functions that use this (e.g. ExtensionArray.argsort) should ignore
646 entries with missing values in the original array (according to `self.isna()`).
647 This means that the corresponding entries in the returned array don't need to
648 be modified to sort correctly.
649 """
650 # Note: this is used in `ExtensionArray.argsort/argmin/argmax`.
651 return np.array(self)
652
653 def argsort(
654 self,
655 *,
656 ascending: bool = True,
657 kind: SortKind = "quicksort",
658 na_position: str = "last",
659 **kwargs,
660 ) -> np.ndarray:
661 """
662 Return the indices that would sort this array.
663
664 Parameters
665 ----------
666 ascending : bool, default True
667 Whether the indices should result in an ascending
668 or descending sort.
669 kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional
670 Sorting algorithm.
671 *args, **kwargs:
672 Passed through to :func:`numpy.argsort`.
673
674 Returns
675 -------
676 np.ndarray[np.intp]
677 Array of indices that sort ``self``. If NaN values are contained,
678 NaN values are placed at the end.
679
680 See Also
681 --------
682 numpy.argsort : Sorting implementation used internally.
683 """
684 # Implementor note: You have two places to override the behavior of
685 # argsort.
686 # 1. _values_for_argsort : construct the values passed to np.argsort
687 # 2. argsort : total control over sorting. In case of overriding this,
688 # it is recommended to also override argmax/argmin
689 ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs)
690
691 values = self._values_for_argsort()
692 return nargsort(
693 values,
694 kind=kind,
695 ascending=ascending,
696 na_position=na_position,
697 mask=np.asarray(self.isna()),
698 )
699
700 def argmin(self, skipna: bool = True) -> int:
701 """
702 Return the index of minimum value.
703
704 In case of multiple occurrences of the minimum value, the index
705 corresponding to the first occurrence is returned.
706
707 Parameters
708 ----------
709 skipna : bool, default True
710
711 Returns
712 -------
713 int
714
715 See Also
716 --------
717 ExtensionArray.argmax
718 """
719 # Implementor note: You have two places to override the behavior of
720 # argmin.
721 # 1. _values_for_argsort : construct the values used in nargminmax
722 # 2. argmin itself : total control over sorting.
723 validate_bool_kwarg(skipna, "skipna")
724 if not skipna and self._hasna:
725 raise NotImplementedError
726 return nargminmax(self, "argmin")
727
728 def argmax(self, skipna: bool = True) -> int:
729 """
730 Return the index of maximum value.
731
732 In case of multiple occurrences of the maximum value, the index
733 corresponding to the first occurrence is returned.
734
735 Parameters
736 ----------
737 skipna : bool, default True
738
739 Returns
740 -------
741 int
742
743 See Also
744 --------
745 ExtensionArray.argmin
746 """
747 # Implementor note: You have two places to override the behavior of
748 # argmax.
749 # 1. _values_for_argsort : construct the values used in nargminmax
750 # 2. argmax itself : total control over sorting.
751 validate_bool_kwarg(skipna, "skipna")
752 if not skipna and self._hasna:
753 raise NotImplementedError
754 return nargminmax(self, "argmax")
755
756 def fillna(
757 self: ExtensionArrayT,
758 value: object | ArrayLike | None = None,
759 method: FillnaOptions | None = None,
760 limit: int | None = None,
761 ) -> ExtensionArrayT:
762 """
763 Fill NA/NaN values using the specified method.
764
765 Parameters
766 ----------
767 value : scalar, array-like
768 If a scalar value is passed it is used to fill all missing values.
769 Alternatively, an array-like 'value' can be given. It's expected
770 that the array-like have the same length as 'self'.
771 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
772 Method to use for filling holes in reindexed Series:
773
774 * pad / ffill: propagate last valid observation forward to next valid.
775 * backfill / bfill: use NEXT valid observation to fill gap.
776
777 limit : int, default None
778 If method is specified, this is the maximum number of consecutive
779 NaN values to forward/backward fill. In other words, if there is
780 a gap with more than this number of consecutive NaNs, it will only
781 be partially filled. If method is not specified, this is the
782 maximum number of entries along the entire axis where NaNs will be
783 filled.
784
785 Returns
786 -------
787 ExtensionArray
788 With NA/NaN filled.
789 """
790 value, method = validate_fillna_kwargs(value, method)
791
792 mask = self.isna()
793 # error: Argument 2 to "check_value_size" has incompatible type
794 # "ExtensionArray"; expected "ndarray"
795 value = missing.check_value_size(
796 value, mask, len(self) # type: ignore[arg-type]
797 )
798
799 if mask.any():
800 if method is not None:
801 func = missing.get_fill_func(method)
802 npvalues = self.astype(object)
803 func(npvalues, limit=limit, mask=mask)
804 new_values = self._from_sequence(npvalues, dtype=self.dtype)
805 else:
806 # fill with value
807 new_values = self.copy()
808 new_values[mask] = value
809 else:
810 new_values = self.copy()
811 return new_values
812
813 def dropna(self: ExtensionArrayT) -> ExtensionArrayT:
814 """
815 Return ExtensionArray without NA values.
816
817 Returns
818 -------
819 pandas.api.extensions.ExtensionArray
820 """
821 # error: Unsupported operand type for ~ ("ExtensionArray")
822 return self[~self.isna()] # type: ignore[operator]
823
824 def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray:
825 """
826 Shift values by desired number.
827
828 Newly introduced missing values are filled with
829 ``self.dtype.na_value``.
830
831 Parameters
832 ----------
833 periods : int, default 1
834 The number of periods to shift. Negative values are allowed
835 for shifting backwards.
836
837 fill_value : object, optional
838 The scalar value to use for newly introduced missing values.
839 The default is ``self.dtype.na_value``.
840
841 Returns
842 -------
843 ExtensionArray
844 Shifted.
845
846 Notes
847 -----
848 If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is
849 returned.
850
851 If ``periods > len(self)``, then an array of size
852 len(self) is returned, with all values filled with
853 ``self.dtype.na_value``.
854 """
855 # Note: this implementation assumes that `self.dtype.na_value` can be
856 # stored in an instance of your ExtensionArray with `self.dtype`.
857 if not len(self) or periods == 0:
858 return self.copy()
859
860 if isna(fill_value):
861 fill_value = self.dtype.na_value
862
863 empty = self._from_sequence(
864 [fill_value] * min(abs(periods), len(self)), dtype=self.dtype
865 )
866 if periods > 0:
867 a = empty
868 b = self[:-periods]
869 else:
870 a = self[abs(periods) :]
871 b = empty
872 return self._concat_same_type([a, b])
873
874 def unique(self: ExtensionArrayT) -> ExtensionArrayT:
875 """
876 Compute the ExtensionArray of unique values.
877
878 Returns
879 -------
880 pandas.api.extensions.ExtensionArray
881 """
882 uniques = unique(self.astype(object))
883 return self._from_sequence(uniques, dtype=self.dtype)
884
885 def searchsorted(
886 self,
887 value: NumpyValueArrayLike | ExtensionArray,
888 side: Literal["left", "right"] = "left",
889 sorter: NumpySorter = None,
890 ) -> npt.NDArray[np.intp] | np.intp:
891 """
892 Find indices where elements should be inserted to maintain order.
893
894 Find the indices into a sorted array `self` (a) such that, if the
895 corresponding elements in `value` were inserted before the indices,
896 the order of `self` would be preserved.
897
898 Assuming that `self` is sorted:
899
900 ====== ================================
901 `side` returned index `i` satisfies
902 ====== ================================
903 left ``self[i-1] < value <= self[i]``
904 right ``self[i-1] <= value < self[i]``
905 ====== ================================
906
907 Parameters
908 ----------
909 value : array-like, list or scalar
910 Value(s) to insert into `self`.
911 side : {'left', 'right'}, optional
912 If 'left', the index of the first suitable location found is given.
913 If 'right', return the last such index. If there is no suitable
914 index, return either 0 or N (where N is the length of `self`).
915 sorter : 1-D array-like, optional
916 Optional array of integer indices that sort array a into ascending
917 order. They are typically the result of argsort.
918
919 Returns
920 -------
921 array of ints or int
922 If value is array-like, array of insertion points.
923 If value is scalar, a single integer.
924
925 See Also
926 --------
927 numpy.searchsorted : Similar method from NumPy.
928 """
929 # Note: the base tests provided by pandas only test the basics.
930 # We do not test
931 # 1. Values outside the range of the `data_for_sorting` fixture
932 # 2. Values between the values in the `data_for_sorting` fixture
933 # 3. Missing values.
934 arr = self.astype(object)
935 if isinstance(value, ExtensionArray):
936 value = value.astype(object)
937 return arr.searchsorted(value, side=side, sorter=sorter)
938
939 def equals(self, other: object) -> bool:
940 """
941 Return if another array is equivalent to this array.
942
943 Equivalent means that both arrays have the same shape and dtype, and
944 all values compare equal. Missing values in the same location are
945 considered equal (in contrast with normal equality).
946
947 Parameters
948 ----------
949 other : ExtensionArray
950 Array to compare to this Array.
951
952 Returns
953 -------
954 boolean
955 Whether the arrays are equivalent.
956 """
957 if type(self) != type(other):
958 return False
959 other = cast(ExtensionArray, other)
960 if not is_dtype_equal(self.dtype, other.dtype):
961 return False
962 elif len(self) != len(other):
963 return False
964 else:
965 equal_values = self == other
966 if isinstance(equal_values, ExtensionArray):
967 # boolean array with NA -> fill with False
968 equal_values = equal_values.fillna(False)
969 # error: Unsupported left operand type for & ("ExtensionArray")
970 equal_na = self.isna() & other.isna() # type: ignore[operator]
971 return bool((equal_values | equal_na).all())
972
973 def isin(self, values) -> npt.NDArray[np.bool_]:
974 """
975 Pointwise comparison for set containment in the given values.
976
977 Roughly equivalent to `np.array([x in values for x in self])`
978
979 Parameters
980 ----------
981 values : Sequence
982
983 Returns
984 -------
985 np.ndarray[bool]
986 """
987 return isin(np.asarray(self), values)
988
989 def _values_for_factorize(self) -> tuple[np.ndarray, Any]:
990 """
991 Return an array and missing value suitable for factorization.
992
993 Returns
994 -------
995 values : ndarray
996
997 An array suitable for factorization. This should maintain order
998 and be a supported dtype (Float64, Int64, UInt64, String, Object).
999 By default, the extension array is cast to object dtype.
1000 na_value : object
1001 The value in `values` to consider missing. This will be treated
1002 as NA in the factorization routines, so it will be coded as
1003 `-1` and not included in `uniques`. By default,
1004 ``np.nan`` is used.
1005
1006 Notes
1007 -----
1008 The values returned by this method are also used in
1009 :func:`pandas.util.hash_pandas_object`.
1010 """
1011 return self.astype(object), np.nan
1012
1013 def factorize(
1014 self,
1015 use_na_sentinel: bool = True,
1016 ) -> tuple[np.ndarray, ExtensionArray]:
1017 """
1018 Encode the extension array as an enumerated type.
1019
1020 Parameters
1021 ----------
1022 use_na_sentinel : bool, default True
1023 If True, the sentinel -1 will be used for NaN values. If False,
1024 NaN values will be encoded as non-negative integers and will not drop the
1025 NaN from the uniques of the values.
1026
1027 .. versionadded:: 1.5.0
1028
1029 Returns
1030 -------
1031 codes : ndarray
1032 An integer NumPy array that's an indexer into the original
1033 ExtensionArray.
1034 uniques : ExtensionArray
1035 An ExtensionArray containing the unique values of `self`.
1036
1037 .. note::
1038
1039 uniques will *not* contain an entry for the NA value of
1040 the ExtensionArray if there are any missing values present
1041 in `self`.
1042
1043 See Also
1044 --------
1045 factorize : Top-level factorize method that dispatches here.
1046
1047 Notes
1048 -----
1049 :meth:`pandas.factorize` offers a `sort` keyword as well.
1050 """
1051 # Implementer note: There are two ways to override the behavior of
1052 # pandas.factorize
1053 # 1. _values_for_factorize and _from_factorize.
1054 # Specify the values passed to pandas' internal factorization
1055 # routines, and how to convert from those values back to the
1056 # original ExtensionArray.
1057 # 2. ExtensionArray.factorize.
1058 # Complete control over factorization.
1059 arr, na_value = self._values_for_factorize()
1060
1061 codes, uniques = factorize_array(
1062 arr, use_na_sentinel=use_na_sentinel, na_value=na_value
1063 )
1064
1065 uniques_ea = self._from_factorized(uniques, self)
1066 return codes, uniques_ea
1067
1068 _extension_array_shared_docs[
1069 "repeat"
1070 ] = """
1071 Repeat elements of a %(klass)s.
1072
1073 Returns a new %(klass)s where each element of the current %(klass)s
1074 is repeated consecutively a given number of times.
1075
1076 Parameters
1077 ----------
1078 repeats : int or array of ints
1079 The number of repetitions for each element. This should be a
1080 non-negative integer. Repeating 0 times will return an empty
1081 %(klass)s.
1082 axis : None
1083 Must be ``None``. Has no effect but is accepted for compatibility
1084 with numpy.
1085
1086 Returns
1087 -------
1088 %(klass)s
1089 Newly created %(klass)s with repeated elements.
1090
1091 See Also
1092 --------
1093 Series.repeat : Equivalent function for Series.
1094 Index.repeat : Equivalent function for Index.
1095 numpy.repeat : Similar method for :class:`numpy.ndarray`.
1096 ExtensionArray.take : Take arbitrary positions.
1097
1098 Examples
1099 --------
1100 >>> cat = pd.Categorical(['a', 'b', 'c'])
1101 >>> cat
1102 ['a', 'b', 'c']
1103 Categories (3, object): ['a', 'b', 'c']
1104 >>> cat.repeat(2)
1105 ['a', 'a', 'b', 'b', 'c', 'c']
1106 Categories (3, object): ['a', 'b', 'c']
1107 >>> cat.repeat([1, 2, 3])
1108 ['a', 'b', 'b', 'c', 'c', 'c']
1109 Categories (3, object): ['a', 'b', 'c']
1110 """
1111
1112 @Substitution(klass="ExtensionArray")
1113 @Appender(_extension_array_shared_docs["repeat"])
1114 def repeat(
1115 self: ExtensionArrayT, repeats: int | Sequence[int], axis: AxisInt | None = None
1116 ) -> ExtensionArrayT:
1117 nv.validate_repeat((), {"axis": axis})
1118 ind = np.arange(len(self)).repeat(repeats)
1119 return self.take(ind)
1120
1121 # ------------------------------------------------------------------------
1122 # Indexing methods
1123 # ------------------------------------------------------------------------
1124
1125 def take(
1126 self: ExtensionArrayT,
1127 indices: TakeIndexer,
1128 *,
1129 allow_fill: bool = False,
1130 fill_value: Any = None,
1131 ) -> ExtensionArrayT:
1132 """
1133 Take elements from an array.
1134
1135 Parameters
1136 ----------
1137 indices : sequence of int or one-dimensional np.ndarray of int
1138 Indices to be taken.
1139 allow_fill : bool, default False
1140 How to handle negative values in `indices`.
1141
1142 * False: negative values in `indices` indicate positional indices
1143 from the right (the default). This is similar to
1144 :func:`numpy.take`.
1145
1146 * True: negative values in `indices` indicate
1147 missing values. These values are set to `fill_value`. Any other
1148 other negative values raise a ``ValueError``.
1149
1150 fill_value : any, optional
1151 Fill value to use for NA-indices when `allow_fill` is True.
1152 This may be ``None``, in which case the default NA value for
1153 the type, ``self.dtype.na_value``, is used.
1154
1155 For many ExtensionArrays, there will be two representations of
1156 `fill_value`: a user-facing "boxed" scalar, and a low-level
1157 physical NA value. `fill_value` should be the user-facing version,
1158 and the implementation should handle translating that to the
1159 physical version for processing the take if necessary.
1160
1161 Returns
1162 -------
1163 ExtensionArray
1164
1165 Raises
1166 ------
1167 IndexError
1168 When the indices are out of bounds for the array.
1169 ValueError
1170 When `indices` contains negative values other than ``-1``
1171 and `allow_fill` is True.
1172
1173 See Also
1174 --------
1175 numpy.take : Take elements from an array along an axis.
1176 api.extensions.take : Take elements from an array.
1177
1178 Notes
1179 -----
1180 ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,
1181 ``iloc``, when `indices` is a sequence of values. Additionally,
1182 it's called by :meth:`Series.reindex`, or any other method
1183 that causes realignment, with a `fill_value`.
1184
1185 Examples
1186 --------
1187 Here's an example implementation, which relies on casting the
1188 extension array to object dtype. This uses the helper method
1189 :func:`pandas.api.extensions.take`.
1190
1191 .. code-block:: python
1192
1193 def take(self, indices, allow_fill=False, fill_value=None):
1194 from pandas.core.algorithms import take
1195
1196 # If the ExtensionArray is backed by an ndarray, then
1197 # just pass that here instead of coercing to object.
1198 data = self.astype(object)
1199
1200 if allow_fill and fill_value is None:
1201 fill_value = self.dtype.na_value
1202
1203 # fill value should always be translated from the scalar
1204 # type for the array, to the physical storage type for
1205 # the data, before passing to take.
1206
1207 result = take(data, indices, fill_value=fill_value,
1208 allow_fill=allow_fill)
1209 return self._from_sequence(result, dtype=self.dtype)
1210 """
1211 # Implementer note: The `fill_value` parameter should be a user-facing
1212 # value, an instance of self.dtype.type. When passed `fill_value=None`,
1213 # the default of `self.dtype.na_value` should be used.
1214 # This may differ from the physical storage type your ExtensionArray
1215 # uses. In this case, your implementation is responsible for casting
1216 # the user-facing type to the storage type, before using
1217 # pandas.api.extensions.take
1218 raise AbstractMethodError(self)
1219
1220 def copy(self: ExtensionArrayT) -> ExtensionArrayT:
1221 """
1222 Return a copy of the array.
1223
1224 Returns
1225 -------
1226 ExtensionArray
1227 """
1228 raise AbstractMethodError(self)
1229
1230 def view(self, dtype: Dtype | None = None) -> ArrayLike:
1231 """
1232 Return a view on the array.
1233
1234 Parameters
1235 ----------
1236 dtype : str, np.dtype, or ExtensionDtype, optional
1237 Default None.
1238
1239 Returns
1240 -------
1241 ExtensionArray or np.ndarray
1242 A view on the :class:`ExtensionArray`'s data.
1243 """
1244 # NB:
1245 # - This must return a *new* object referencing the same data, not self.
1246 # - The only case that *must* be implemented is with dtype=None,
1247 # giving a view with the same dtype as self.
1248 if dtype is not None:
1249 raise NotImplementedError(dtype)
1250 return self[:]
1251
1252 # ------------------------------------------------------------------------
1253 # Printing
1254 # ------------------------------------------------------------------------
1255
1256 def __repr__(self) -> str:
1257 if self.ndim > 1:
1258 return self._repr_2d()
1259
1260 from pandas.io.formats.printing import format_object_summary
1261
1262 # the short repr has no trailing newline, while the truncated
1263 # repr does. So we include a newline in our template, and strip
1264 # any trailing newlines from format_object_summary
1265 data = format_object_summary(
1266 self, self._formatter(), indent_for_name=False
1267 ).rstrip(", \n")
1268 class_name = f"<{type(self).__name__}>\n"
1269 return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}"
1270
1271 def _repr_2d(self) -> str:
1272 from pandas.io.formats.printing import format_object_summary
1273
1274 # the short repr has no trailing newline, while the truncated
1275 # repr does. So we include a newline in our template, and strip
1276 # any trailing newlines from format_object_summary
1277 lines = [
1278 format_object_summary(x, self._formatter(), indent_for_name=False).rstrip(
1279 ", \n"
1280 )
1281 for x in self
1282 ]
1283 data = ",\n".join(lines)
1284 class_name = f"<{type(self).__name__}>"
1285 return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}"
1286
1287 def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:
1288 """
1289 Formatting function for scalar values.
1290
1291 This is used in the default '__repr__'. The returned formatting
1292 function receives instances of your scalar type.
1293
1294 Parameters
1295 ----------
1296 boxed : bool, default False
1297 An indicated for whether or not your array is being printed
1298 within a Series, DataFrame, or Index (True), or just by
1299 itself (False). This may be useful if you want scalar values
1300 to appear differently within a Series versus on its own (e.g.
1301 quoted or not).
1302
1303 Returns
1304 -------
1305 Callable[[Any], str]
1306 A callable that gets instances of the scalar type and
1307 returns a string. By default, :func:`repr` is used
1308 when ``boxed=False`` and :func:`str` is used when
1309 ``boxed=True``.
1310 """
1311 if boxed:
1312 return str
1313 return repr
1314
1315 # ------------------------------------------------------------------------
1316 # Reshaping
1317 # ------------------------------------------------------------------------
1318
1319 def transpose(self, *axes: int) -> ExtensionArray:
1320 """
1321 Return a transposed view on this array.
1322
1323 Because ExtensionArrays are always 1D, this is a no-op. It is included
1324 for compatibility with np.ndarray.
1325 """
1326 return self[:]
1327
1328 @property
1329 def T(self) -> ExtensionArray:
1330 return self.transpose()
1331
1332 def ravel(self, order: Literal["C", "F", "A", "K"] | None = "C") -> ExtensionArray:
1333 """
1334 Return a flattened view on this array.
1335
1336 Parameters
1337 ----------
1338 order : {None, 'C', 'F', 'A', 'K'}, default 'C'
1339
1340 Returns
1341 -------
1342 ExtensionArray
1343
1344 Notes
1345 -----
1346 - Because ExtensionArrays are 1D-only, this is a no-op.
1347 - The "order" argument is ignored, is for compatibility with NumPy.
1348 """
1349 return self
1350
1351 @classmethod
1352 def _concat_same_type(
1353 cls: type[ExtensionArrayT], to_concat: Sequence[ExtensionArrayT]
1354 ) -> ExtensionArrayT:
1355 """
1356 Concatenate multiple array of this dtype.
1357
1358 Parameters
1359 ----------
1360 to_concat : sequence of this type
1361
1362 Returns
1363 -------
1364 ExtensionArray
1365 """
1366 # Implementer note: this method will only be called with a sequence of
1367 # ExtensionArrays of this class and with the same dtype as self. This
1368 # should allow "easy" concatenation (no upcasting needed), and result
1369 # in a new ExtensionArray of the same dtype.
1370 # Note: this strict behaviour is only guaranteed starting with pandas 1.1
1371 raise AbstractMethodError(cls)
1372
1373 # The _can_hold_na attribute is set to True so that pandas internals
1374 # will use the ExtensionDtype.na_value as the NA value in operations
1375 # such as take(), reindex(), shift(), etc. In addition, those results
1376 # will then be of the ExtensionArray subclass rather than an array
1377 # of objects
1378 @cache_readonly
1379 def _can_hold_na(self) -> bool:
1380 return self.dtype._can_hold_na
1381
1382 def _accumulate(
1383 self, name: str, *, skipna: bool = True, **kwargs
1384 ) -> ExtensionArray:
1385 """
1386 Return an ExtensionArray performing an accumulation operation.
1387
1388 The underlying data type might change.
1389
1390 Parameters
1391 ----------
1392 name : str
1393 Name of the function, supported values are:
1394 - cummin
1395 - cummax
1396 - cumsum
1397 - cumprod
1398 skipna : bool, default True
1399 If True, skip NA values.
1400 **kwargs
1401 Additional keyword arguments passed to the accumulation function.
1402 Currently, there is no supported kwarg.
1403
1404 Returns
1405 -------
1406 array
1407
1408 Raises
1409 ------
1410 NotImplementedError : subclass does not define accumulations
1411 """
1412 raise NotImplementedError(f"cannot perform {name} with type {self.dtype}")
1413
1414 def _reduce(self, name: str, *, skipna: bool = True, **kwargs):
1415 """
1416 Return a scalar result of performing the reduction operation.
1417
1418 Parameters
1419 ----------
1420 name : str
1421 Name of the function, supported values are:
1422 { any, all, min, max, sum, mean, median, prod,
1423 std, var, sem, kurt, skew }.
1424 skipna : bool, default True
1425 If True, skip NaN values.
1426 **kwargs
1427 Additional keyword arguments passed to the reduction function.
1428 Currently, `ddof` is the only supported kwarg.
1429
1430 Returns
1431 -------
1432 scalar
1433
1434 Raises
1435 ------
1436 TypeError : subclass does not define reductions
1437 """
1438 meth = getattr(self, name, None)
1439 if meth is None:
1440 raise TypeError(
1441 f"'{type(self).__name__}' with dtype {self.dtype} "
1442 f"does not support reduction '{name}'"
1443 )
1444 return meth(skipna=skipna, **kwargs)
1445
1446 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318
1447 # Incompatible types in assignment (expression has type "None", base class
1448 # "object" defined the type as "Callable[[object], int]")
1449 __hash__: ClassVar[None] # type: ignore[assignment]
1450
1451 # ------------------------------------------------------------------------
1452 # Non-Optimized Default Methods; in the case of the private methods here,
1453 # these are not guaranteed to be stable across pandas versions.
1454
1455 def tolist(self) -> list:
1456 """
1457 Return a list of the values.
1458
1459 These are each a scalar type, which is a Python scalar
1460 (for str, int, float) or a pandas scalar
1461 (for Timestamp/Timedelta/Interval/Period)
1462
1463 Returns
1464 -------
1465 list
1466 """
1467 if self.ndim > 1:
1468 return [x.tolist() for x in self]
1469 return list(self)
1470
1471 def delete(self: ExtensionArrayT, loc: PositionalIndexer) -> ExtensionArrayT:
1472 indexer = np.delete(np.arange(len(self)), loc)
1473 return self.take(indexer)
1474
1475 def insert(self: ExtensionArrayT, loc: int, item) -> ExtensionArrayT:
1476 """
1477 Insert an item at the given position.
1478
1479 Parameters
1480 ----------
1481 loc : int
1482 item : scalar-like
1483
1484 Returns
1485 -------
1486 same type as self
1487
1488 Notes
1489 -----
1490 This method should be both type and dtype-preserving. If the item
1491 cannot be held in an array of this type/dtype, either ValueError or
1492 TypeError should be raised.
1493
1494 The default implementation relies on _from_sequence to raise on invalid
1495 items.
1496 """
1497 loc = validate_insert_loc(loc, len(self))
1498
1499 item_arr = type(self)._from_sequence([item], dtype=self.dtype)
1500
1501 return type(self)._concat_same_type([self[:loc], item_arr, self[loc:]])
1502
1503 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
1504 """
1505 Analogue to np.putmask(self, mask, value)
1506
1507 Parameters
1508 ----------
1509 mask : np.ndarray[bool]
1510 value : scalar or listlike
1511 If listlike, must be arraylike with same length as self.
1512
1513 Returns
1514 -------
1515 None
1516
1517 Notes
1518 -----
1519 Unlike np.putmask, we do not repeat listlike values with mismatched length.
1520 'value' should either be a scalar or an arraylike with the same length
1521 as self.
1522 """
1523 if is_list_like(value):
1524 val = value[mask]
1525 else:
1526 val = value
1527
1528 self[mask] = val
1529
1530 def _where(
1531 self: ExtensionArrayT, mask: npt.NDArray[np.bool_], value
1532 ) -> ExtensionArrayT:
1533 """
1534 Analogue to np.where(mask, self, value)
1535
1536 Parameters
1537 ----------
1538 mask : np.ndarray[bool]
1539 value : scalar or listlike
1540
1541 Returns
1542 -------
1543 same type as self
1544 """
1545 result = self.copy()
1546
1547 if is_list_like(value):
1548 val = value[~mask]
1549 else:
1550 val = value
1551
1552 result[~mask] = val
1553 return result
1554
1555 def _fill_mask_inplace(
1556 self, method: str, limit, mask: npt.NDArray[np.bool_]
1557 ) -> None:
1558 """
1559 Replace values in locations specified by 'mask' using pad or backfill.
1560
1561 See also
1562 --------
1563 ExtensionArray.fillna
1564 """
1565 func = missing.get_fill_func(method)
1566 npvalues = self.astype(object)
1567 # NB: if we don't copy mask here, it may be altered inplace, which
1568 # would mess up the `self[mask] = ...` below.
1569 func(npvalues, limit=limit, mask=mask.copy())
1570 new_values = self._from_sequence(npvalues, dtype=self.dtype)
1571 self[mask] = new_values[mask]
1572
1573 def _rank(
1574 self,
1575 *,
1576 axis: AxisInt = 0,
1577 method: str = "average",
1578 na_option: str = "keep",
1579 ascending: bool = True,
1580 pct: bool = False,
1581 ):
1582 """
1583 See Series.rank.__doc__.
1584 """
1585 if axis != 0:
1586 raise NotImplementedError
1587
1588 return rank(
1589 self,
1590 axis=axis,
1591 method=method,
1592 na_option=na_option,
1593 ascending=ascending,
1594 pct=pct,
1595 )
1596
1597 @classmethod
1598 def _empty(cls, shape: Shape, dtype: ExtensionDtype):
1599 """
1600 Create an ExtensionArray with the given shape and dtype.
1601
1602 See also
1603 --------
1604 ExtensionDtype.empty
1605 ExtensionDtype.empty is the 'official' public version of this API.
1606 """
1607 # Implementer note: while ExtensionDtype.empty is the public way to
1608 # call this method, it is still required to implement this `_empty`
1609 # method as well (it is called internally in pandas)
1610 obj = cls._from_sequence([], dtype=dtype)
1611
1612 taker = np.broadcast_to(np.intp(-1), shape)
1613 result = obj.take(taker, allow_fill=True)
1614 if not isinstance(result, cls) or dtype != result.dtype:
1615 raise NotImplementedError(
1616 f"Default 'empty' implementation is invalid for dtype='{dtype}'"
1617 )
1618 return result
1619
1620 def _quantile(
1621 self: ExtensionArrayT, qs: npt.NDArray[np.float64], interpolation: str
1622 ) -> ExtensionArrayT:
1623 """
1624 Compute the quantiles of self for each quantile in `qs`.
1625
1626 Parameters
1627 ----------
1628 qs : np.ndarray[float64]
1629 interpolation: str
1630
1631 Returns
1632 -------
1633 same type as self
1634 """
1635 mask = np.asarray(self.isna())
1636 arr = np.asarray(self)
1637 fill_value = np.nan
1638
1639 res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation)
1640 return type(self)._from_sequence(res_values)
1641
1642 def _mode(self: ExtensionArrayT, dropna: bool = True) -> ExtensionArrayT:
1643 """
1644 Returns the mode(s) of the ExtensionArray.
1645
1646 Always returns `ExtensionArray` even if only one value.
1647
1648 Parameters
1649 ----------
1650 dropna : bool, default True
1651 Don't consider counts of NA values.
1652
1653 Returns
1654 -------
1655 same type as self
1656 Sorted, if possible.
1657 """
1658 # error: Incompatible return value type (got "Union[ExtensionArray,
1659 # ndarray[Any, Any]]", expected "ExtensionArrayT")
1660 return mode(self, dropna=dropna) # type: ignore[return-value]
1661
1662 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):
1663 if any(
1664 isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs
1665 ):
1666 return NotImplemented
1667
1668 result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
1669 self, ufunc, method, *inputs, **kwargs
1670 )
1671 if result is not NotImplemented:
1672 return result
1673
1674 if "out" in kwargs:
1675 return arraylike.dispatch_ufunc_with_out(
1676 self, ufunc, method, *inputs, **kwargs
1677 )
1678
1679 if method == "reduce":
1680 result = arraylike.dispatch_reduction_ufunc(
1681 self, ufunc, method, *inputs, **kwargs
1682 )
1683 if result is not NotImplemented:
1684 return result
1685
1686 return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)
1687
1688
1689class ExtensionArraySupportsAnyAll(ExtensionArray):
1690 def any(self, *, skipna: bool = True) -> bool:
1691 raise AbstractMethodError(self)
1692
1693 def all(self, *, skipna: bool = True) -> bool:
1694 raise AbstractMethodError(self)
1695
1696
1697class ExtensionOpsMixin:
1698 """
1699 A base class for linking the operators to their dunder names.
1700
1701 .. note::
1702
1703 You may want to set ``__array_priority__`` if you want your
1704 implementation to be called when involved in binary operations
1705 with NumPy arrays.
1706 """
1707
1708 @classmethod
1709 def _create_arithmetic_method(cls, op):
1710 raise AbstractMethodError(cls)
1711
1712 @classmethod
1713 def _add_arithmetic_ops(cls) -> None:
1714 setattr(cls, "__add__", cls._create_arithmetic_method(operator.add))
1715 setattr(cls, "__radd__", cls._create_arithmetic_method(roperator.radd))
1716 setattr(cls, "__sub__", cls._create_arithmetic_method(operator.sub))
1717 setattr(cls, "__rsub__", cls._create_arithmetic_method(roperator.rsub))
1718 setattr(cls, "__mul__", cls._create_arithmetic_method(operator.mul))
1719 setattr(cls, "__rmul__", cls._create_arithmetic_method(roperator.rmul))
1720 setattr(cls, "__pow__", cls._create_arithmetic_method(operator.pow))
1721 setattr(cls, "__rpow__", cls._create_arithmetic_method(roperator.rpow))
1722 setattr(cls, "__mod__", cls._create_arithmetic_method(operator.mod))
1723 setattr(cls, "__rmod__", cls._create_arithmetic_method(roperator.rmod))
1724 setattr(cls, "__floordiv__", cls._create_arithmetic_method(operator.floordiv))
1725 setattr(
1726 cls, "__rfloordiv__", cls._create_arithmetic_method(roperator.rfloordiv)
1727 )
1728 setattr(cls, "__truediv__", cls._create_arithmetic_method(operator.truediv))
1729 setattr(cls, "__rtruediv__", cls._create_arithmetic_method(roperator.rtruediv))
1730 setattr(cls, "__divmod__", cls._create_arithmetic_method(divmod))
1731 setattr(cls, "__rdivmod__", cls._create_arithmetic_method(roperator.rdivmod))
1732
1733 @classmethod
1734 def _create_comparison_method(cls, op):
1735 raise AbstractMethodError(cls)
1736
1737 @classmethod
1738 def _add_comparison_ops(cls) -> None:
1739 setattr(cls, "__eq__", cls._create_comparison_method(operator.eq))
1740 setattr(cls, "__ne__", cls._create_comparison_method(operator.ne))
1741 setattr(cls, "__lt__", cls._create_comparison_method(operator.lt))
1742 setattr(cls, "__gt__", cls._create_comparison_method(operator.gt))
1743 setattr(cls, "__le__", cls._create_comparison_method(operator.le))
1744 setattr(cls, "__ge__", cls._create_comparison_method(operator.ge))
1745
1746 @classmethod
1747 def _create_logical_method(cls, op):
1748 raise AbstractMethodError(cls)
1749
1750 @classmethod
1751 def _add_logical_ops(cls) -> None:
1752 setattr(cls, "__and__", cls._create_logical_method(operator.and_))
1753 setattr(cls, "__rand__", cls._create_logical_method(roperator.rand_))
1754 setattr(cls, "__or__", cls._create_logical_method(operator.or_))
1755 setattr(cls, "__ror__", cls._create_logical_method(roperator.ror_))
1756 setattr(cls, "__xor__", cls._create_logical_method(operator.xor))
1757 setattr(cls, "__rxor__", cls._create_logical_method(roperator.rxor))
1758
1759
1760class ExtensionScalarOpsMixin(ExtensionOpsMixin):
1761 """
1762 A mixin for defining ops on an ExtensionArray.
1763
1764 It is assumed that the underlying scalar objects have the operators
1765 already defined.
1766
1767 Notes
1768 -----
1769 If you have defined a subclass MyExtensionArray(ExtensionArray), then
1770 use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to
1771 get the arithmetic operators. After the definition of MyExtensionArray,
1772 insert the lines
1773
1774 MyExtensionArray._add_arithmetic_ops()
1775 MyExtensionArray._add_comparison_ops()
1776
1777 to link the operators to your class.
1778
1779 .. note::
1780
1781 You may want to set ``__array_priority__`` if you want your
1782 implementation to be called when involved in binary operations
1783 with NumPy arrays.
1784 """
1785
1786 @classmethod
1787 def _create_method(cls, op, coerce_to_dtype: bool = True, result_dtype=None):
1788 """
1789 A class method that returns a method that will correspond to an
1790 operator for an ExtensionArray subclass, by dispatching to the
1791 relevant operator defined on the individual elements of the
1792 ExtensionArray.
1793
1794 Parameters
1795 ----------
1796 op : function
1797 An operator that takes arguments op(a, b)
1798 coerce_to_dtype : bool, default True
1799 boolean indicating whether to attempt to convert
1800 the result to the underlying ExtensionArray dtype.
1801 If it's not possible to create a new ExtensionArray with the
1802 values, an ndarray is returned instead.
1803
1804 Returns
1805 -------
1806 Callable[[Any, Any], Union[ndarray, ExtensionArray]]
1807 A method that can be bound to a class. When used, the method
1808 receives the two arguments, one of which is the instance of
1809 this class, and should return an ExtensionArray or an ndarray.
1810
1811 Returning an ndarray may be necessary when the result of the
1812 `op` cannot be stored in the ExtensionArray. The dtype of the
1813 ndarray uses NumPy's normal inference rules.
1814
1815 Examples
1816 --------
1817 Given an ExtensionArray subclass called MyExtensionArray, use
1818
1819 __add__ = cls._create_method(operator.add)
1820
1821 in the class definition of MyExtensionArray to create the operator
1822 for addition, that will be based on the operator implementation
1823 of the underlying elements of the ExtensionArray
1824 """
1825
1826 def _binop(self, other):
1827 def convert_values(param):
1828 if isinstance(param, ExtensionArray) or is_list_like(param):
1829 ovalues = param
1830 else: # Assume its an object
1831 ovalues = [param] * len(self)
1832 return ovalues
1833
1834 if isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)):
1835 # rely on pandas to unbox and dispatch to us
1836 return NotImplemented
1837
1838 lvalues = self
1839 rvalues = convert_values(other)
1840
1841 # If the operator is not defined for the underlying objects,
1842 # a TypeError should be raised
1843 res = [op(a, b) for (a, b) in zip(lvalues, rvalues)]
1844
1845 def _maybe_convert(arr):
1846 if coerce_to_dtype:
1847 # https://github.com/pandas-dev/pandas/issues/22850
1848 # We catch all regular exceptions here, and fall back
1849 # to an ndarray.
1850 res = maybe_cast_to_extension_array(type(self), arr)
1851 if not isinstance(res, type(self)):
1852 # exception raised in _from_sequence; ensure we have ndarray
1853 res = np.asarray(arr)
1854 else:
1855 res = np.asarray(arr, dtype=result_dtype)
1856 return res
1857
1858 if op.__name__ in {"divmod", "rdivmod"}:
1859 a, b = zip(*res)
1860 return _maybe_convert(a), _maybe_convert(b)
1861
1862 return _maybe_convert(res)
1863
1864 op_name = f"__{op.__name__}__"
1865 return set_function_name(_binop, op_name, cls)
1866
1867 @classmethod
1868 def _create_arithmetic_method(cls, op):
1869 return cls._create_method(op)
1870
1871 @classmethod
1872 def _create_comparison_method(cls, op):
1873 return cls._create_method(op, coerce_to_dtype=False, result_dtype=bool)