Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/arrays/base.py: 34%

1"""

2An interface for extending pandas with custom arrays.

4.. warning::

6 This is an experimental API and subject to breaking changes

7 without warning.

8"""

9from __future__ import annotations

11import operator

12from typing import (

13 TYPE_CHECKING,

14 Any,

15 Callable,

16 ClassVar,

17 Iterator,

18 Literal,

19 Sequence,

20 TypeVar,

21 cast,

22 overload,

23)

25import numpy as np

27from pandas._libs import lib

28from pandas._typing import (

29 ArrayLike,

30 AstypeArg,

31 AxisInt,

32 Dtype,

33 FillnaOptions,

34 PositionalIndexer,

35 ScalarIndexer,

36 SequenceIndexer,

37 Shape,

38 SortKind,

39 TakeIndexer,

40 npt,

41)

42from pandas.compat import set_function_name

43from pandas.compat.numpy import function as nv

44from pandas.errors import AbstractMethodError

45from pandas.util._decorators import (

46 Appender,

47 Substitution,

48 cache_readonly,

49)

50from pandas.util._validators import (

51 validate_bool_kwarg,

52 validate_fillna_kwargs,

53 validate_insert_loc,

54)

56from pandas.core.dtypes.cast import maybe_cast_to_extension_array

57from pandas.core.dtypes.common import (

58 is_datetime64_dtype,

59 is_dtype_equal,

60 is_list_like,

61 is_scalar,

62 is_timedelta64_dtype,

63 pandas_dtype,

64)

65from pandas.core.dtypes.dtypes import ExtensionDtype

66from pandas.core.dtypes.generic import (

67 ABCDataFrame,

68 ABCIndex,

69 ABCSeries,

70)

71from pandas.core.dtypes.missing import isna

73from pandas.core import (

74 arraylike,

75 missing,

76 roperator,

77)

78from pandas.core.algorithms import (

79 factorize_array,

80 isin,

81 mode,

82 rank,

83 unique,

84)

85from pandas.core.array_algos.quantile import quantile_with_mask

86from pandas.core.sorting import (

87 nargminmax,

88 nargsort,

89)

91if TYPE_CHECKING:

92 from pandas._typing import (

93 NumpySorter,

94 NumpyValueArrayLike,

95 )

97_extension_array_shared_docs: dict[str, str] = {}

99ExtensionArrayT = TypeVar("ExtensionArrayT", bound="ExtensionArray")

100

101

102class ExtensionArray:

103 """

104 Abstract base class for custom 1-D array types.

105

106 pandas will recognize instances of this class as proper arrays

107 with a custom type and will not attempt to coerce them to objects. They

108 may be stored directly inside a :class:`DataFrame` or :class:`Series`.

109

110 Attributes

111 ----------

112 dtype

113 nbytes

114 ndim

115 shape

116

117 Methods

118 -------

119 argsort

120 astype

121 copy

122 dropna

123 factorize

124 fillna

125 equals

126 insert

127 isin

128 isna

129 ravel

130 repeat

131 searchsorted

132 shift

133 take

134 tolist

135 unique

136 view

137 _accumulate

138 _concat_same_type

139 _formatter

140 _from_factorized

141 _from_sequence

142 _from_sequence_of_strings

143 _reduce

144 _values_for_argsort

145 _values_for_factorize

146

147 Notes

148 -----

149 The interface includes the following abstract methods that must be

150 implemented by subclasses:

151

152 * _from_sequence

153 * _from_factorized

154 * __getitem__

155 * __len__

156 * __eq__

157 * dtype

158 * nbytes

159 * isna

160 * take

161 * copy

162 * _concat_same_type

163

164 A default repr displaying the type, (truncated) data, length,

165 and dtype is provided. It can be customized or replaced by

166 by overriding:

167

168 * __repr__ : A default repr for the ExtensionArray.

169 * _formatter : Print scalars inside a Series or DataFrame.

170

171 Some methods require casting the ExtensionArray to an ndarray of Python

172 objects with ``self.astype(object)``, which may be expensive. When

173 performance is a concern, we highly recommend overriding the following

174 methods:

175

176 * fillna

177 * dropna

178 * unique

179 * factorize / _values_for_factorize

180 * argsort, argmax, argmin / _values_for_argsort

181 * searchsorted

182

183 The remaining methods implemented on this class should be performant,

184 as they only compose abstract methods. Still, a more efficient

185 implementation may be available, and these methods can be overridden.

186

187 One can implement methods to handle array accumulations or reductions.

188

189 * _accumulate

190 * _reduce

191

192 One can implement methods to handle parsing from strings that will be used

193 in methods such as ``pandas.io.parsers.read_csv``.

194

195 * _from_sequence_of_strings

196

197 This class does not inherit from 'abc.ABCMeta' for performance reasons.

198 Methods and properties required by the interface raise

199 ``pandas.errors.AbstractMethodError`` and no ``register`` method is

200 provided for registering virtual subclasses.

201

202 ExtensionArrays are limited to 1 dimension.

203

204 They may be backed by none, one, or many NumPy arrays. For example,

205 ``pandas.Categorical`` is an extension array backed by two arrays,

206 one for codes and one for categories. An array of IPv6 address may

207 be backed by a NumPy structured array with two fields, one for the

208 lower 64 bits and one for the upper 64 bits. Or they may be backed

209 by some other storage type, like Python lists. Pandas makes no

210 assumptions on how the data are stored, just that it can be converted

211 to a NumPy array.

212 The ExtensionArray interface does not impose any rules on how this data

213 is stored. However, currently, the backing data cannot be stored in

214 attributes called ``.values`` or ``._values`` to ensure full compatibility

215 with pandas internals. But other names as ``.data``, ``._data``,

216 ``._items``, ... can be freely used.

217

218 If implementing NumPy's ``__array_ufunc__`` interface, pandas expects

219 that

220

221 1. You defer by returning ``NotImplemented`` when any Series are present

222 in `inputs`. Pandas will extract the arrays and call the ufunc again.

223 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class.

224 Pandas inspect this to determine whether the ufunc is valid for the

225 types present.

226

227 See :ref:`extending.extension.ufunc` for more.

228

229 By default, ExtensionArrays are not hashable. Immutable subclasses may

230 override this behavior.

231 """

232

233 # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray.

234 # Don't override this.

235 _typ = "extension"

236

237 # ------------------------------------------------------------------------

238 # Constructors

239 # ------------------------------------------------------------------------

240

241 @classmethod

242 def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False):

243 """

244 Construct a new ExtensionArray from a sequence of scalars.

245

246 Parameters

247 ----------

248 scalars : Sequence

249 Each element will be an instance of the scalar type for this

250 array, ``cls.dtype.type`` or be converted into this type in this method.

251 dtype : dtype, optional

252 Construct for this particular dtype. This should be a Dtype

253 compatible with the ExtensionArray.

254 copy : bool, default False

255 If True, copy the underlying data.

256

257 Returns

258 -------

259 ExtensionArray

260 """

261 raise AbstractMethodError(cls)

262

263 @classmethod

264 def _from_sequence_of_strings(

265 cls, strings, *, dtype: Dtype | None = None, copy: bool = False

266 ):

267 """

268 Construct a new ExtensionArray from a sequence of strings.

269

270 Parameters

271 ----------

272 strings : Sequence

273 Each element will be an instance of the scalar type for this

274 array, ``cls.dtype.type``.

275 dtype : dtype, optional

276 Construct for this particular dtype. This should be a Dtype

277 compatible with the ExtensionArray.

278 copy : bool, default False

279 If True, copy the underlying data.

280

281 Returns

282 -------

283 ExtensionArray

284 """

285 raise AbstractMethodError(cls)

286

287 @classmethod

288 def _from_factorized(cls, values, original):

289 """

290 Reconstruct an ExtensionArray after factorization.

291

292 Parameters

293 ----------

294 values : ndarray

295 An integer ndarray with the factorized values.

296 original : ExtensionArray

297 The original ExtensionArray that factorize was called on.

298

299 See Also

300 --------

301 factorize : Top-level factorize method that dispatches here.

302 ExtensionArray.factorize : Encode the extension array as an enumerated type.

303 """

304 raise AbstractMethodError(cls)

305

306 # ------------------------------------------------------------------------

307 # Must be a Sequence

308 # ------------------------------------------------------------------------

309 @overload

310 def __getitem__(self, item: ScalarIndexer) -> Any:

311 ...

312

313 @overload

314 def __getitem__(self: ExtensionArrayT, item: SequenceIndexer) -> ExtensionArrayT:

315 ...

316

317 def __getitem__(

318 self: ExtensionArrayT, item: PositionalIndexer

319 ) -> ExtensionArrayT | Any:

320 """

321 Select a subset of self.

322

323 Parameters

324 ----------

325 item : int, slice, or ndarray

326 * int: The position in 'self' to get.

327

328 * slice: A slice object, where 'start', 'stop', and 'step' are

329 integers or None

330

331 * ndarray: A 1-d boolean NumPy ndarray the same length as 'self'

332

333 * list[int]: A list of int

334

335 Returns

336 -------

337 item : scalar or ExtensionArray

338

339 Notes

340 -----

341 For scalar ``item``, return a scalar value suitable for the array's

342 type. This should be an instance of ``self.dtype.type``.

343

344 For slice ``key``, return an instance of ``ExtensionArray``, even

345 if the slice is length 0 or 1.

346

347 For a boolean mask, return an instance of ``ExtensionArray``, filtered

348 to the values where ``item`` is True.

349 """

350 raise AbstractMethodError(self)

351

352 def __setitem__(self, key, value) -> None:

353 """

354 Set one or more values inplace.

355

356 This method is not required to satisfy the pandas extension array

357 interface.

358

359 Parameters

360 ----------

361 key : int, ndarray, or slice

362 When called from, e.g. ``Series.__setitem__``, ``key`` will be

363 one of

364

365 * scalar int

366 * ndarray of integers.

367 * boolean ndarray

368 * slice object

369

370 value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object

371 value or values to be set of ``key``.

372

373 Returns

374 -------

375 None

376 """

377 # Some notes to the ExtensionArray implementor who may have ended up

378 # here. While this method is not required for the interface, if you

379 # *do* choose to implement __setitem__, then some semantics should be

380 # observed:

381 #

382 # * Setting multiple values : ExtensionArrays should support setting

383 # multiple values at once, 'key' will be a sequence of integers and

384 # 'value' will be a same-length sequence.

385 #

386 # * Broadcasting : For a sequence 'key' and a scalar 'value',

387 # each position in 'key' should be set to 'value'.

388 #

389 # * Coercion : Most users will expect basic coercion to work. For

390 # example, a string like '2018-01-01' is coerced to a datetime

391 # when setting on a datetime64ns array. In general, if the

392 # __init__ method coerces that value, then so should __setitem__

393 # Note, also, that Series/DataFrame.where internally use __setitem__

394 # on a copy of the data.

395 raise NotImplementedError(f"{type(self)} does not implement __setitem__.")

396

397 def __len__(self) -> int:

398 """

399 Length of this array

400

401 Returns

402 -------

403 length : int

404 """

405 raise AbstractMethodError(self)

406

407 def __iter__(self) -> Iterator[Any]:

408 """

409 Iterate over elements of the array.

410 """

411 # This needs to be implemented so that pandas recognizes extension

412 # arrays as list-like. The default implementation makes successive

413 # calls to ``__getitem__``, which may be slower than necessary.

414 for i in range(len(self)):

415 yield self[i]

416

417 def __contains__(self, item: object) -> bool | np.bool_:

418 """

419 Return for `item in self`.

420 """

421 # GH37867

422 # comparisons of any item to pd.NA always return pd.NA, so e.g. "a" in [pd.NA]

423 # would raise a TypeError. The implementation below works around that.

424 if is_scalar(item) and isna(item):

425 if not self._can_hold_na:

426 return False

427 elif item is self.dtype.na_value or isinstance(item, self.dtype.type):

428 return self._hasna

429 else:

430 return False

431 else:

432 # error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no

433 # attribute "any"

434 return (item == self).any() # type: ignore[union-attr]

435

436 # error: Signature of "__eq__" incompatible with supertype "object"

437 def __eq__(self, other: Any) -> ArrayLike: # type: ignore[override]

438 """

439 Return for `self == other` (element-wise equality).

440 """

441 # Implementer note: this should return a boolean numpy ndarray or

442 # a boolean ExtensionArray.

443 # When `other` is one of Series, Index, or DataFrame, this method should

444 # return NotImplemented (to ensure that those objects are responsible for

445 # first unpacking the arrays, and then dispatch the operation to the

446 # underlying arrays)

447 raise AbstractMethodError(self)

448

449 # error: Signature of "__ne__" incompatible with supertype "object"

450 def __ne__(self, other: Any) -> ArrayLike: # type: ignore[override]

451 """

452 Return for `self != other` (element-wise in-equality).

453 """

454 return ~(self == other)

455

456 def to_numpy(

457 self,

458 dtype: npt.DTypeLike | None = None,

459 copy: bool = False,

460 na_value: object = lib.no_default,

461 ) -> np.ndarray:

462 """

463 Convert to a NumPy ndarray.

464

465 This is similar to :meth:`numpy.asarray`, but may provide additional control

466 over how the conversion is done.

467

468 Parameters

469 ----------

470 dtype : str or numpy.dtype, optional

471 The dtype to pass to :meth:`numpy.asarray`.

472 copy : bool, default False

473 Whether to ensure that the returned value is a not a view on

474 another array. Note that ``copy=False`` does not *ensure* that

475 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that

476 a copy is made, even if not strictly necessary.

477 na_value : Any, optional

478 The value to use for missing values. The default value depends

479 on `dtype` and the type of the array.

480

481 Returns

482 -------

483 numpy.ndarray

484 """

485 result = np.asarray(self, dtype=dtype)

486 if copy or na_value is not lib.no_default:

487 result = result.copy()

488 if na_value is not lib.no_default:

489 result[self.isna()] = na_value

490 return result

491

492 # ------------------------------------------------------------------------

493 # Required attributes

494 # ------------------------------------------------------------------------

495

496 @property

497 def dtype(self) -> ExtensionDtype:

498 """

499 An instance of 'ExtensionDtype'.

500 """

501 raise AbstractMethodError(self)

502

503 @property

504 def shape(self) -> Shape:

505 """

506 Return a tuple of the array dimensions.

507 """

508 return (len(self),)

509

510 @property

511 def size(self) -> int:

512 """

513 The number of elements in the array.

514 """

515 # error: Incompatible return value type (got "signedinteger[_64Bit]",

516 # expected "int") [return-value]

517 return np.prod(self.shape) # type: ignore[return-value]

518

519 @property

520 def ndim(self) -> int:

521 """

522 Extension Arrays are only allowed to be 1-dimensional.

523 """

524 return 1

525

526 @property

527 def nbytes(self) -> int:

528 """

529 The number of bytes needed to store this object in memory.

530 """

531 # If this is expensive to compute, return an approximate lower bound

532 # on the number of bytes needed.

533 raise AbstractMethodError(self)

534

535 # ------------------------------------------------------------------------

536 # Additional Methods

537 # ------------------------------------------------------------------------

538

539 @overload

540 def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray:

541 ...

542

543 @overload

544 def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray:

545 ...

546

547 @overload

548 def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike:

549 ...

550

551 def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike:

552 """

553 Cast to a NumPy array or ExtensionArray with 'dtype'.

554

555 Parameters

556 ----------

557 dtype : str or dtype

558 Typecode or data-type to which the array is cast.

559 copy : bool, default True

560 Whether to copy the data, even if not necessary. If False,

561 a copy is made only if the old dtype does not match the

562 new dtype.

563

564 Returns

565 -------

566 np.ndarray or pandas.api.extensions.ExtensionArray

567 An ExtensionArray if dtype is ExtensionDtype,

568 Otherwise a NumPy ndarray with 'dtype' for its dtype.

569 """

570

571 dtype = pandas_dtype(dtype)

572 if is_dtype_equal(dtype, self.dtype):

573 if not copy:

574 return self

575 else:

576 return self.copy()

577

578 if isinstance(dtype, ExtensionDtype):

579 cls = dtype.construct_array_type()

580 return cls._from_sequence(self, dtype=dtype, copy=copy)

581

582 elif is_datetime64_dtype(dtype):

583 from pandas.core.arrays import DatetimeArray

584

585 return DatetimeArray._from_sequence(self, dtype=dtype, copy=copy)

586

587 elif is_timedelta64_dtype(dtype):

588 from pandas.core.arrays import TimedeltaArray

589

590 return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy)

591

592 return np.array(self, dtype=dtype, copy=copy)

593

594 def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll:

595 """

596 A 1-D array indicating if each value is missing.

597

598 Returns

599 -------

600 numpy.ndarray or pandas.api.extensions.ExtensionArray

601 In most cases, this should return a NumPy ndarray. For

602 exceptional cases like ``SparseArray``, where returning

603 an ndarray would be expensive, an ExtensionArray may be

604 returned.

605

606 Notes

607 -----

608 If returning an ExtensionArray, then

609

610 * ``na_values._is_boolean`` should be True

611 * `na_values` should implement :func:`ExtensionArray._reduce`

612 * ``na_values.any`` and ``na_values.all`` should be implemented

613 """

614 raise AbstractMethodError(self)

615

616 @property

617 def _hasna(self) -> bool:

618 # GH#22680

619 """

620 Equivalent to `self.isna().any()`.

621

622 Some ExtensionArray subclasses may be able to optimize this check.

623 """

624 return bool(self.isna().any())

625

626 def _values_for_argsort(self) -> np.ndarray:

627 """

628 Return values for sorting.

629

630 Returns

631 -------

632 ndarray

633 The transformed values should maintain the ordering between values

634 within the array.

635

636 See Also

637 --------

638 ExtensionArray.argsort : Return the indices that would sort this array.

639

640 Notes

641 -----

642 The caller is responsible for *not* modifying these values in-place, so

643 it is safe for implementors to give views on `self`.

644

645 Functions that use this (e.g. ExtensionArray.argsort) should ignore

646 entries with missing values in the original array (according to `self.isna()`).

647 This means that the corresponding entries in the returned array don't need to

648 be modified to sort correctly.

649 """

650 # Note: this is used in `ExtensionArray.argsort/argmin/argmax`.

651 return np.array(self)

652

653 def argsort(

654 self,

655 *,

656 ascending: bool = True,

657 kind: SortKind = "quicksort",

658 na_position: str = "last",

659 **kwargs,

660 ) -> np.ndarray:

661 """

662 Return the indices that would sort this array.

663

664 Parameters

665 ----------

666 ascending : bool, default True

667 Whether the indices should result in an ascending

668 or descending sort.

669 kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional

670 Sorting algorithm.

671 *args, **kwargs:

672 Passed through to :func:`numpy.argsort`.

673

674 Returns

675 -------

676 np.ndarray[np.intp]

677 Array of indices that sort ``self``. If NaN values are contained,

678 NaN values are placed at the end.

679

680 See Also

681 --------

682 numpy.argsort : Sorting implementation used internally.

683 """

684 # Implementor note: You have two places to override the behavior of

685 # argsort.

686 # 1. _values_for_argsort : construct the values passed to np.argsort

687 # 2. argsort : total control over sorting. In case of overriding this,

688 # it is recommended to also override argmax/argmin

689 ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs)

690

691 values = self._values_for_argsort()

692 return nargsort(

693 values,

694 kind=kind,

695 ascending=ascending,

696 na_position=na_position,

697 mask=np.asarray(self.isna()),

698 )

699

700 def argmin(self, skipna: bool = True) -> int:

701 """

702 Return the index of minimum value.

703

704 In case of multiple occurrences of the minimum value, the index

705 corresponding to the first occurrence is returned.

706

707 Parameters

708 ----------

709 skipna : bool, default True

710

711 Returns

712 -------

713 int

714

715 See Also

716 --------

717 ExtensionArray.argmax

718 """

719 # Implementor note: You have two places to override the behavior of

720 # argmin.

721 # 1. _values_for_argsort : construct the values used in nargminmax

722 # 2. argmin itself : total control over sorting.

723 validate_bool_kwarg(skipna, "skipna")

724 if not skipna and self._hasna:

725 raise NotImplementedError

726 return nargminmax(self, "argmin")

727

728 def argmax(self, skipna: bool = True) -> int:

729 """

730 Return the index of maximum value.

731

732 In case of multiple occurrences of the maximum value, the index

733 corresponding to the first occurrence is returned.

734

735 Parameters

736 ----------

737 skipna : bool, default True

738

739 Returns

740 -------

741 int

742

743 See Also

744 --------

745 ExtensionArray.argmin

746 """

747 # Implementor note: You have two places to override the behavior of

748 # argmax.

749 # 1. _values_for_argsort : construct the values used in nargminmax

750 # 2. argmax itself : total control over sorting.

751 validate_bool_kwarg(skipna, "skipna")

752 if not skipna and self._hasna:

753 raise NotImplementedError

754 return nargminmax(self, "argmax")

755

756 def fillna(

757 self: ExtensionArrayT,

758 value: object | ArrayLike | None = None,

759 method: FillnaOptions | None = None,

760 limit: int | None = None,

761 ) -> ExtensionArrayT:

762 """

763 Fill NA/NaN values using the specified method.

764

765 Parameters

766 ----------

767 value : scalar, array-like

768 If a scalar value is passed it is used to fill all missing values.

769 Alternatively, an array-like 'value' can be given. It's expected

770 that the array-like have the same length as 'self'.

771 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None

772 Method to use for filling holes in reindexed Series:

773

774 * pad / ffill: propagate last valid observation forward to next valid.

775 * backfill / bfill: use NEXT valid observation to fill gap.

776

777 limit : int, default None

778 If method is specified, this is the maximum number of consecutive

779 NaN values to forward/backward fill. In other words, if there is

780 a gap with more than this number of consecutive NaNs, it will only

781 be partially filled. If method is not specified, this is the

782 maximum number of entries along the entire axis where NaNs will be

783 filled.

784

785 Returns

786 -------

787 ExtensionArray

788 With NA/NaN filled.

789 """

790 value, method = validate_fillna_kwargs(value, method)

791

792 mask = self.isna()

793 # error: Argument 2 to "check_value_size" has incompatible type

794 # "ExtensionArray"; expected "ndarray"

795 value = missing.check_value_size(

796 value, mask, len(self) # type: ignore[arg-type]

797 )

798

799 if mask.any():

800 if method is not None:

801 func = missing.get_fill_func(method)

802 npvalues = self.astype(object)

803 func(npvalues, limit=limit, mask=mask)

804 new_values = self._from_sequence(npvalues, dtype=self.dtype)

805 else:

806 # fill with value

807 new_values = self.copy()

808 new_values[mask] = value

809 else:

810 new_values = self.copy()

811 return new_values

812

813 def dropna(self: ExtensionArrayT) -> ExtensionArrayT:

814 """

815 Return ExtensionArray without NA values.

816

817 Returns

818 -------

819 pandas.api.extensions.ExtensionArray

820 """

821 # error: Unsupported operand type for ~ ("ExtensionArray")

822 return self[~self.isna()] # type: ignore[operator]

823

824 def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray:

825 """

826 Shift values by desired number.

827

828 Newly introduced missing values are filled with

829 ``self.dtype.na_value``.

830

831 Parameters

832 ----------

833 periods : int, default 1

834 The number of periods to shift. Negative values are allowed

835 for shifting backwards.

836

837 fill_value : object, optional

838 The scalar value to use for newly introduced missing values.

839 The default is ``self.dtype.na_value``.

840

841 Returns

842 -------

843 ExtensionArray

844 Shifted.

845

846 Notes

847 -----

848 If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is

849 returned.

850

851 If ``periods > len(self)``, then an array of size

852 len(self) is returned, with all values filled with

853 ``self.dtype.na_value``.

854 """

855 # Note: this implementation assumes that `self.dtype.na_value` can be

856 # stored in an instance of your ExtensionArray with `self.dtype`.

857 if not len(self) or periods == 0:

858 return self.copy()

859

860 if isna(fill_value):

861 fill_value = self.dtype.na_value

862

863 empty = self._from_sequence(

864 [fill_value] * min(abs(periods), len(self)), dtype=self.dtype

865 )

866 if periods > 0:

867 a = empty

868 b = self[:-periods]

869 else:

870 a = self[abs(periods) :]

871 b = empty

872 return self._concat_same_type([a, b])

873

874 def unique(self: ExtensionArrayT) -> ExtensionArrayT:

875 """

876 Compute the ExtensionArray of unique values.

877

878 Returns

879 -------

880 pandas.api.extensions.ExtensionArray

881 """

882 uniques = unique(self.astype(object))

883 return self._from_sequence(uniques, dtype=self.dtype)

884

885 def searchsorted(

886 self,

887 value: NumpyValueArrayLike | ExtensionArray,

888 side: Literal["left", "right"] = "left",

889 sorter: NumpySorter = None,

890 ) -> npt.NDArray[np.intp] | np.intp:

891 """

892 Find indices where elements should be inserted to maintain order.

893

894 Find the indices into a sorted array `self` (a) such that, if the

895 corresponding elements in `value` were inserted before the indices,

896 the order of `self` would be preserved.

897

898 Assuming that `self` is sorted:

899

900 ====== ================================

901 `side` returned index `i` satisfies

902 ====== ================================

903 left ``self[i-1] < value <= self[i]``

904 right ``self[i-1] <= value < self[i]``

905 ====== ================================

906

907 Parameters

908 ----------

909 value : array-like, list or scalar

910 Value(s) to insert into `self`.

911 side : {'left', 'right'}, optional

912 If 'left', the index of the first suitable location found is given.

913 If 'right', return the last such index. If there is no suitable

914 index, return either 0 or N (where N is the length of `self`).

915 sorter : 1-D array-like, optional

916 Optional array of integer indices that sort array a into ascending

917 order. They are typically the result of argsort.

918

919 Returns

920 -------

921 array of ints or int

922 If value is array-like, array of insertion points.

923 If value is scalar, a single integer.

924

925 See Also

926 --------

927 numpy.searchsorted : Similar method from NumPy.

928 """

929 # Note: the base tests provided by pandas only test the basics.

930 # We do not test

931 # 1. Values outside the range of the `data_for_sorting` fixture

932 # 2. Values between the values in the `data_for_sorting` fixture

933 # 3. Missing values.

934 arr = self.astype(object)

935 if isinstance(value, ExtensionArray):

936 value = value.astype(object)

937 return arr.searchsorted(value, side=side, sorter=sorter)

938

939 def equals(self, other: object) -> bool:

940 """

941 Return if another array is equivalent to this array.

942

943 Equivalent means that both arrays have the same shape and dtype, and

944 all values compare equal. Missing values in the same location are

945 considered equal (in contrast with normal equality).

946

947 Parameters

948 ----------

949 other : ExtensionArray

950 Array to compare to this Array.

951

952 Returns

953 -------

954 boolean

955 Whether the arrays are equivalent.

956 """

957 if type(self) != type(other):

958 return False

959 other = cast(ExtensionArray, other)

960 if not is_dtype_equal(self.dtype, other.dtype):

961 return False

962 elif len(self) != len(other):

963 return False

964 else:

965 equal_values = self == other

966 if isinstance(equal_values, ExtensionArray):

967 # boolean array with NA -> fill with False

968 equal_values = equal_values.fillna(False)

969 # error: Unsupported left operand type for & ("ExtensionArray")

970 equal_na = self.isna() & other.isna() # type: ignore[operator]

971 return bool((equal_values | equal_na).all())

972

973 def isin(self, values) -> npt.NDArray[np.bool_]:

974 """

975 Pointwise comparison for set containment in the given values.

976

977 Roughly equivalent to `np.array([x in values for x in self])`

978

979 Parameters

980 ----------

981 values : Sequence

982

983 Returns

984 -------

985 np.ndarray[bool]

986 """

987 return isin(np.asarray(self), values)

988

989 def _values_for_factorize(self) -> tuple[np.ndarray, Any]:

990 """

991 Return an array and missing value suitable for factorization.

992

993 Returns

994 -------

995 values : ndarray

996

997 An array suitable for factorization. This should maintain order

998 and be a supported dtype (Float64, Int64, UInt64, String, Object).

999 By default, the extension array is cast to object dtype.

1000 na_value : object

1001 The value in `values` to consider missing. This will be treated

1002 as NA in the factorization routines, so it will be coded as

1003 `-1` and not included in `uniques`. By default,

1004 ``np.nan`` is used.

1005

1006 Notes

1007 -----

1008 The values returned by this method are also used in

1009 :func:`pandas.util.hash_pandas_object`.

1010 """

1011 return self.astype(object), np.nan

1012

1013 def factorize(

1014 self,

1015 use_na_sentinel: bool = True,

1016 ) -> tuple[np.ndarray, ExtensionArray]:

1017 """

1018 Encode the extension array as an enumerated type.

1019

1020 Parameters

1021 ----------

1022 use_na_sentinel : bool, default True

1023 If True, the sentinel -1 will be used for NaN values. If False,

1024 NaN values will be encoded as non-negative integers and will not drop the

1025 NaN from the uniques of the values.

1026

1027 .. versionadded:: 1.5.0

1028

1029 Returns

1030 -------

1031 codes : ndarray

1032 An integer NumPy array that's an indexer into the original

1033 ExtensionArray.

1034 uniques : ExtensionArray

1035 An ExtensionArray containing the unique values of `self`.

1036

1037 .. note::

1038

1039 uniques will *not* contain an entry for the NA value of

1040 the ExtensionArray if there are any missing values present

1041 in `self`.

1042

1043 See Also

1044 --------

1045 factorize : Top-level factorize method that dispatches here.

1046

1047 Notes

1048 -----

1049 :meth:`pandas.factorize` offers a `sort` keyword as well.

1050 """

1051 # Implementer note: There are two ways to override the behavior of

1052 # pandas.factorize

1053 # 1. _values_for_factorize and _from_factorize.

1054 # Specify the values passed to pandas' internal factorization

1055 # routines, and how to convert from those values back to the

1056 # original ExtensionArray.

1057 # 2. ExtensionArray.factorize.

1058 # Complete control over factorization.

1059 arr, na_value = self._values_for_factorize()

1060

1061 codes, uniques = factorize_array(

1062 arr, use_na_sentinel=use_na_sentinel, na_value=na_value

1063 )

1064

1065 uniques_ea = self._from_factorized(uniques, self)

1066 return codes, uniques_ea

1067

1068 _extension_array_shared_docs[

1069 "repeat"

1070 ] = """

1071 Repeat elements of a %(klass)s.

1072

1073 Returns a new %(klass)s where each element of the current %(klass)s

1074 is repeated consecutively a given number of times.

1075

1076 Parameters

1077 ----------

1078 repeats : int or array of ints

1079 The number of repetitions for each element. This should be a

1080 non-negative integer. Repeating 0 times will return an empty

1081 %(klass)s.

1082 axis : None

1083 Must be ``None``. Has no effect but is accepted for compatibility

1084 with numpy.

1085

1086 Returns

1087 -------

1088 %(klass)s

1089 Newly created %(klass)s with repeated elements.

1090

1091 See Also

1092 --------

1093 Series.repeat : Equivalent function for Series.

1094 Index.repeat : Equivalent function for Index.

1095 numpy.repeat : Similar method for :class:`numpy.ndarray`.

1096 ExtensionArray.take : Take arbitrary positions.

1097

1098 Examples

1099 --------

1100 >>> cat = pd.Categorical(['a', 'b', 'c'])

1101 >>> cat

1102 ['a', 'b', 'c']

1103 Categories (3, object): ['a', 'b', 'c']

1104 >>> cat.repeat(2)

1105 ['a', 'a', 'b', 'b', 'c', 'c']

1106 Categories (3, object): ['a', 'b', 'c']

1107 >>> cat.repeat([1, 2, 3])

1108 ['a', 'b', 'b', 'c', 'c', 'c']

1109 Categories (3, object): ['a', 'b', 'c']

1110 """

1111

1112 @Substitution(klass="ExtensionArray")

1113 @Appender(_extension_array_shared_docs["repeat"])

1114 def repeat(

1115 self: ExtensionArrayT, repeats: int | Sequence[int], axis: AxisInt | None = None

1116 ) -> ExtensionArrayT:

1117 nv.validate_repeat((), {"axis": axis})

1118 ind = np.arange(len(self)).repeat(repeats)

1119 return self.take(ind)

1120

1121 # ------------------------------------------------------------------------

1122 # Indexing methods

1123 # ------------------------------------------------------------------------

1124

1125 def take(

1126 self: ExtensionArrayT,

1127 indices: TakeIndexer,

1128 *,

1129 allow_fill: bool = False,

1130 fill_value: Any = None,

1131 ) -> ExtensionArrayT:

1132 """

1133 Take elements from an array.

1134

1135 Parameters

1136 ----------

1137 indices : sequence of int or one-dimensional np.ndarray of int

1138 Indices to be taken.

1139 allow_fill : bool, default False

1140 How to handle negative values in `indices`.

1141

1142 * False: negative values in `indices` indicate positional indices

1143 from the right (the default). This is similar to

1144 :func:`numpy.take`.

1145

1146 * True: negative values in `indices` indicate

1147 missing values. These values are set to `fill_value`. Any other

1148 other negative values raise a ``ValueError``.

1149

1150 fill_value : any, optional

1151 Fill value to use for NA-indices when `allow_fill` is True.

1152 This may be ``None``, in which case the default NA value for

1153 the type, ``self.dtype.na_value``, is used.

1154

1155 For many ExtensionArrays, there will be two representations of

1156 `fill_value`: a user-facing "boxed" scalar, and a low-level

1157 physical NA value. `fill_value` should be the user-facing version,

1158 and the implementation should handle translating that to the

1159 physical version for processing the take if necessary.

1160

1161 Returns

1162 -------

1163 ExtensionArray

1164

1165 Raises

1166 ------

1167 IndexError

1168 When the indices are out of bounds for the array.

1169 ValueError

1170 When `indices` contains negative values other than ``-1``

1171 and `allow_fill` is True.

1172

1173 See Also

1174 --------

1175 numpy.take : Take elements from an array along an axis.

1176 api.extensions.take : Take elements from an array.

1177

1178 Notes

1179 -----

1180 ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``,

1181 ``iloc``, when `indices` is a sequence of values. Additionally,

1182 it's called by :meth:`Series.reindex`, or any other method

1183 that causes realignment, with a `fill_value`.

1184

1185 Examples

1186 --------

1187 Here's an example implementation, which relies on casting the

1188 extension array to object dtype. This uses the helper method

1189 :func:`pandas.api.extensions.take`.

1190

1191 .. code-block:: python

1192

1193 def take(self, indices, allow_fill=False, fill_value=None):

1194 from pandas.core.algorithms import take

1195

1196 # If the ExtensionArray is backed by an ndarray, then

1197 # just pass that here instead of coercing to object.

1198 data = self.astype(object)

1199

1200 if allow_fill and fill_value is None:

1201 fill_value = self.dtype.na_value

1202

1203 # fill value should always be translated from the scalar

1204 # type for the array, to the physical storage type for

1205 # the data, before passing to take.

1206

1207 result = take(data, indices, fill_value=fill_value,

1208 allow_fill=allow_fill)

1209 return self._from_sequence(result, dtype=self.dtype)

1210 """

1211 # Implementer note: The `fill_value` parameter should be a user-facing

1212 # value, an instance of self.dtype.type. When passed `fill_value=None`,

1213 # the default of `self.dtype.na_value` should be used.

1214 # This may differ from the physical storage type your ExtensionArray

1215 # uses. In this case, your implementation is responsible for casting

1216 # the user-facing type to the storage type, before using

1217 # pandas.api.extensions.take

1218 raise AbstractMethodError(self)

1219

1220 def copy(self: ExtensionArrayT) -> ExtensionArrayT:

1221 """

1222 Return a copy of the array.

1223

1224 Returns

1225 -------

1226 ExtensionArray

1227 """

1228 raise AbstractMethodError(self)

1229

1230 def view(self, dtype: Dtype | None = None) -> ArrayLike:

1231 """

1232 Return a view on the array.

1233

1234 Parameters

1235 ----------

1236 dtype : str, np.dtype, or ExtensionDtype, optional

1237 Default None.

1238

1239 Returns

1240 -------

1241 ExtensionArray or np.ndarray

1242 A view on the :class:`ExtensionArray`'s data.

1243 """

1244 # NB:

1245 # - This must return a *new* object referencing the same data, not self.

1246 # - The only case that *must* be implemented is with dtype=None,

1247 # giving a view with the same dtype as self.

1248 if dtype is not None:

1249 raise NotImplementedError(dtype)

1250 return self[:]

1251

1252 # ------------------------------------------------------------------------

1253 # Printing

1254 # ------------------------------------------------------------------------

1255

1256 def __repr__(self) -> str:

1257 if self.ndim > 1:

1258 return self._repr_2d()

1259

1260 from pandas.io.formats.printing import format_object_summary

1261

1262 # the short repr has no trailing newline, while the truncated

1263 # repr does. So we include a newline in our template, and strip

1264 # any trailing newlines from format_object_summary

1265 data = format_object_summary(

1266 self, self._formatter(), indent_for_name=False

1267 ).rstrip(", \n")

1268 class_name = f"<{type(self).__name__}>\n"

1269 return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}"

1270

1271 def _repr_2d(self) -> str:

1272 from pandas.io.formats.printing import format_object_summary

1273

1274 # the short repr has no trailing newline, while the truncated

1275 # repr does. So we include a newline in our template, and strip

1276 # any trailing newlines from format_object_summary

1277 lines = [

1278 format_object_summary(x, self._formatter(), indent_for_name=False).rstrip(

1279 ", \n"

1280 )

1281 for x in self

1282 ]

1283 data = ",\n".join(lines)

1284 class_name = f"<{type(self).__name__}>"

1285 return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}"

1286

1287 def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]:

1288 """

1289 Formatting function for scalar values.

1290

1291 This is used in the default '__repr__'. The returned formatting

1292 function receives instances of your scalar type.

1293

1294 Parameters

1295 ----------

1296 boxed : bool, default False

1297 An indicated for whether or not your array is being printed

1298 within a Series, DataFrame, or Index (True), or just by

1299 itself (False). This may be useful if you want scalar values

1300 to appear differently within a Series versus on its own (e.g.

1301 quoted or not).

1302

1303 Returns

1304 -------

1305 Callable[[Any], str]

1306 A callable that gets instances of the scalar type and

1307 returns a string. By default, :func:`repr` is used

1308 when ``boxed=False`` and :func:`str` is used when

1309 ``boxed=True``.

1310 """

1311 if boxed:

1312 return str

1313 return repr

1314

1315 # ------------------------------------------------------------------------

1316 # Reshaping

1317 # ------------------------------------------------------------------------

1318

1319 def transpose(self, *axes: int) -> ExtensionArray:

1320 """

1321 Return a transposed view on this array.

1322

1323 Because ExtensionArrays are always 1D, this is a no-op. It is included

1324 for compatibility with np.ndarray.

1325 """

1326 return self[:]

1327

1328 @property

1329 def T(self) -> ExtensionArray:

1330 return self.transpose()

1331

1332 def ravel(self, order: Literal["C", "F", "A", "K"] | None = "C") -> ExtensionArray:

1333 """

1334 Return a flattened view on this array.

1335

1336 Parameters

1337 ----------

1338 order : {None, 'C', 'F', 'A', 'K'}, default 'C'

1339

1340 Returns

1341 -------

1342 ExtensionArray

1343

1344 Notes

1345 -----

1346 - Because ExtensionArrays are 1D-only, this is a no-op.

1347 - The "order" argument is ignored, is for compatibility with NumPy.

1348 """

1349 return self

1350

1351 @classmethod

1352 def _concat_same_type(

1353 cls: type[ExtensionArrayT], to_concat: Sequence[ExtensionArrayT]

1354 ) -> ExtensionArrayT:

1355 """

1356 Concatenate multiple array of this dtype.

1357

1358 Parameters

1359 ----------

1360 to_concat : sequence of this type

1361

1362 Returns

1363 -------

1364 ExtensionArray

1365 """

1366 # Implementer note: this method will only be called with a sequence of

1367 # ExtensionArrays of this class and with the same dtype as self. This

1368 # should allow "easy" concatenation (no upcasting needed), and result

1369 # in a new ExtensionArray of the same dtype.

1370 # Note: this strict behaviour is only guaranteed starting with pandas 1.1

1371 raise AbstractMethodError(cls)

1372

1373 # The _can_hold_na attribute is set to True so that pandas internals

1374 # will use the ExtensionDtype.na_value as the NA value in operations

1375 # such as take(), reindex(), shift(), etc. In addition, those results

1376 # will then be of the ExtensionArray subclass rather than an array

1377 # of objects

1378 @cache_readonly

1379 def _can_hold_na(self) -> bool:

1380 return self.dtype._can_hold_na

1381

1382 def _accumulate(

1383 self, name: str, *, skipna: bool = True, **kwargs

1384 ) -> ExtensionArray:

1385 """

1386 Return an ExtensionArray performing an accumulation operation.

1387

1388 The underlying data type might change.

1389

1390 Parameters

1391 ----------

1392 name : str

1393 Name of the function, supported values are:

1394 - cummin

1395 - cummax

1396 - cumsum

1397 - cumprod

1398 skipna : bool, default True

1399 If True, skip NA values.

1400 **kwargs

1401 Additional keyword arguments passed to the accumulation function.

1402 Currently, there is no supported kwarg.

1403

1404 Returns

1405 -------

1406 array

1407

1408 Raises

1409 ------

1410 NotImplementedError : subclass does not define accumulations

1411 """

1412 raise NotImplementedError(f"cannot perform {name} with type {self.dtype}")

1413

1414 def _reduce(self, name: str, *, skipna: bool = True, **kwargs):

1415 """

1416 Return a scalar result of performing the reduction operation.

1417

1418 Parameters

1419 ----------

1420 name : str

1421 Name of the function, supported values are:

1422 { any, all, min, max, sum, mean, median, prod,

1423 std, var, sem, kurt, skew }.

1424 skipna : bool, default True

1425 If True, skip NaN values.

1426 **kwargs

1427 Additional keyword arguments passed to the reduction function.

1428 Currently, `ddof` is the only supported kwarg.

1429

1430 Returns

1431 -------

1432 scalar

1433

1434 Raises

1435 ------

1436 TypeError : subclass does not define reductions

1437 """

1438 meth = getattr(self, name, None)

1439 if meth is None:

1440 raise TypeError(

1441 f"'{type(self).__name__}' with dtype {self.dtype} "

1442 f"does not support reduction '{name}'"

1443 )

1444 return meth(skipna=skipna, **kwargs)

1445

1446 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318

1447 # Incompatible types in assignment (expression has type "None", base class

1448 # "object" defined the type as "Callable[[object], int]")

1449 __hash__: ClassVar[None] # type: ignore[assignment]

1450

1451 # ------------------------------------------------------------------------

1452 # Non-Optimized Default Methods; in the case of the private methods here,

1453 # these are not guaranteed to be stable across pandas versions.

1454

1455 def tolist(self) -> list:

1456 """

1457 Return a list of the values.

1458

1459 These are each a scalar type, which is a Python scalar

1460 (for str, int, float) or a pandas scalar

1461 (for Timestamp/Timedelta/Interval/Period)

1462

1463 Returns

1464 -------

1465 list

1466 """

1467 if self.ndim > 1:

1468 return [x.tolist() for x in self]

1469 return list(self)

1470

1471 def delete(self: ExtensionArrayT, loc: PositionalIndexer) -> ExtensionArrayT:

1472 indexer = np.delete(np.arange(len(self)), loc)

1473 return self.take(indexer)

1474

1475 def insert(self: ExtensionArrayT, loc: int, item) -> ExtensionArrayT:

1476 """

1477 Insert an item at the given position.

1478

1479 Parameters

1480 ----------

1481 loc : int

1482 item : scalar-like

1483

1484 Returns

1485 -------

1486 same type as self

1487

1488 Notes

1489 -----

1490 This method should be both type and dtype-preserving. If the item

1491 cannot be held in an array of this type/dtype, either ValueError or

1492 TypeError should be raised.

1493

1494 The default implementation relies on _from_sequence to raise on invalid

1495 items.

1496 """

1497 loc = validate_insert_loc(loc, len(self))

1498

1499 item_arr = type(self)._from_sequence([item], dtype=self.dtype)

1500

1501 return type(self)._concat_same_type([self[:loc], item_arr, self[loc:]])

1502

1503 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:

1504 """

1505 Analogue to np.putmask(self, mask, value)

1506

1507 Parameters

1508 ----------

1509 mask : np.ndarray[bool]

1510 value : scalar or listlike

1511 If listlike, must be arraylike with same length as self.

1512

1513 Returns

1514 -------

1515 None

1516

1517 Notes

1518 -----

1519 Unlike np.putmask, we do not repeat listlike values with mismatched length.

1520 'value' should either be a scalar or an arraylike with the same length

1521 as self.

1522 """

1523 if is_list_like(value):

1524 val = value[mask]

1525 else:

1526 val = value

1527

1528 self[mask] = val

1529

1530 def _where(

1531 self: ExtensionArrayT, mask: npt.NDArray[np.bool_], value

1532 ) -> ExtensionArrayT:

1533 """

1534 Analogue to np.where(mask, self, value)

1535

1536 Parameters

1537 ----------

1538 mask : np.ndarray[bool]

1539 value : scalar or listlike

1540

1541 Returns

1542 -------

1543 same type as self

1544 """

1545 result = self.copy()

1546

1547 if is_list_like(value):

1548 val = value[~mask]

1549 else:

1550 val = value

1551

1552 result[~mask] = val

1553 return result

1554

1555 def _fill_mask_inplace(

1556 self, method: str, limit, mask: npt.NDArray[np.bool_]

1557 ) -> None:

1558 """

1559 Replace values in locations specified by 'mask' using pad or backfill.

1560

1561 See also

1562 --------

1563 ExtensionArray.fillna

1564 """

1565 func = missing.get_fill_func(method)

1566 npvalues = self.astype(object)

1567 # NB: if we don't copy mask here, it may be altered inplace, which

1568 # would mess up the `self[mask] = ...` below.

1569 func(npvalues, limit=limit, mask=mask.copy())

1570 new_values = self._from_sequence(npvalues, dtype=self.dtype)

1571 self[mask] = new_values[mask]

1572

1573 def _rank(

1574 self,

1575 *,

1576 axis: AxisInt = 0,

1577 method: str = "average",

1578 na_option: str = "keep",

1579 ascending: bool = True,

1580 pct: bool = False,

1581 ):

1582 """

1583 See Series.rank.__doc__.

1584 """

1585 if axis != 0:

1586 raise NotImplementedError

1587

1588 return rank(

1589 self,

1590 axis=axis,

1591 method=method,

1592 na_option=na_option,

1593 ascending=ascending,

1594 pct=pct,

1595 )

1596

1597 @classmethod

1598 def _empty(cls, shape: Shape, dtype: ExtensionDtype):

1599 """

1600 Create an ExtensionArray with the given shape and dtype.

1601

1602 See also

1603 --------

1604 ExtensionDtype.empty

1605 ExtensionDtype.empty is the 'official' public version of this API.

1606 """

1607 # Implementer note: while ExtensionDtype.empty is the public way to

1608 # call this method, it is still required to implement this `_empty`

1609 # method as well (it is called internally in pandas)

1610 obj = cls._from_sequence([], dtype=dtype)

1611

1612 taker = np.broadcast_to(np.intp(-1), shape)

1613 result = obj.take(taker, allow_fill=True)

1614 if not isinstance(result, cls) or dtype != result.dtype:

1615 raise NotImplementedError(

1616 f"Default 'empty' implementation is invalid for dtype='{dtype}'"

1617 )

1618 return result

1619

1620 def _quantile(

1621 self: ExtensionArrayT, qs: npt.NDArray[np.float64], interpolation: str

1622 ) -> ExtensionArrayT:

1623 """

1624 Compute the quantiles of self for each quantile in `qs`.

1625

1626 Parameters

1627 ----------

1628 qs : np.ndarray[float64]

1629 interpolation: str

1630

1631 Returns

1632 -------

1633 same type as self

1634 """

1635 mask = np.asarray(self.isna())

1636 arr = np.asarray(self)

1637 fill_value = np.nan

1638

1639 res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation)

1640 return type(self)._from_sequence(res_values)

1641

1642 def _mode(self: ExtensionArrayT, dropna: bool = True) -> ExtensionArrayT:

1643 """

1644 Returns the mode(s) of the ExtensionArray.

1645

1646 Always returns `ExtensionArray` even if only one value.

1647

1648 Parameters

1649 ----------

1650 dropna : bool, default True

1651 Don't consider counts of NA values.

1652

1653 Returns

1654 -------

1655 same type as self

1656 Sorted, if possible.

1657 """

1658 # error: Incompatible return value type (got "Union[ExtensionArray,

1659 # ndarray[Any, Any]]", expected "ExtensionArrayT")

1660 return mode(self, dropna=dropna) # type: ignore[return-value]

1661

1662 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs):

1663 if any(

1664 isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs

1665 ):

1666 return NotImplemented

1667

1668 result = arraylike.maybe_dispatch_ufunc_to_dunder_op(

1669 self, ufunc, method, *inputs, **kwargs

1670 )

1671 if result is not NotImplemented:

1672 return result

1673

1674 if "out" in kwargs:

1675 return arraylike.dispatch_ufunc_with_out(

1676 self, ufunc, method, *inputs, **kwargs

1677 )

1678

1679 if method == "reduce":

1680 result = arraylike.dispatch_reduction_ufunc(

1681 self, ufunc, method, *inputs, **kwargs

1682 )

1683 if result is not NotImplemented:

1684 return result

1685

1686 return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs)

1687

1688

1689class ExtensionArraySupportsAnyAll(ExtensionArray):

1690 def any(self, *, skipna: bool = True) -> bool:

1691 raise AbstractMethodError(self)

1692

1693 def all(self, *, skipna: bool = True) -> bool:

1694 raise AbstractMethodError(self)

1695

1696

1697class ExtensionOpsMixin:

1698 """

1699 A base class for linking the operators to their dunder names.

1700

1701 .. note::

1702

1703 You may want to set ``__array_priority__`` if you want your

1704 implementation to be called when involved in binary operations

1705 with NumPy arrays.

1706 """

1707

1708 @classmethod

1709 def _create_arithmetic_method(cls, op):

1710 raise AbstractMethodError(cls)

1711

1712 @classmethod

1713 def _add_arithmetic_ops(cls) -> None:

1714 setattr(cls, "__add__", cls._create_arithmetic_method(operator.add))

1715 setattr(cls, "__radd__", cls._create_arithmetic_method(roperator.radd))

1716 setattr(cls, "__sub__", cls._create_arithmetic_method(operator.sub))

1717 setattr(cls, "__rsub__", cls._create_arithmetic_method(roperator.rsub))

1718 setattr(cls, "__mul__", cls._create_arithmetic_method(operator.mul))

1719 setattr(cls, "__rmul__", cls._create_arithmetic_method(roperator.rmul))

1720 setattr(cls, "__pow__", cls._create_arithmetic_method(operator.pow))

1721 setattr(cls, "__rpow__", cls._create_arithmetic_method(roperator.rpow))

1722 setattr(cls, "__mod__", cls._create_arithmetic_method(operator.mod))

1723 setattr(cls, "__rmod__", cls._create_arithmetic_method(roperator.rmod))

1724 setattr(cls, "__floordiv__", cls._create_arithmetic_method(operator.floordiv))

1725 setattr(

1726 cls, "__rfloordiv__", cls._create_arithmetic_method(roperator.rfloordiv)

1727 )

1728 setattr(cls, "__truediv__", cls._create_arithmetic_method(operator.truediv))

1729 setattr(cls, "__rtruediv__", cls._create_arithmetic_method(roperator.rtruediv))

1730 setattr(cls, "__divmod__", cls._create_arithmetic_method(divmod))

1731 setattr(cls, "__rdivmod__", cls._create_arithmetic_method(roperator.rdivmod))

1732

1733 @classmethod

1734 def _create_comparison_method(cls, op):

1735 raise AbstractMethodError(cls)

1736

1737 @classmethod

1738 def _add_comparison_ops(cls) -> None:

1739 setattr(cls, "__eq__", cls._create_comparison_method(operator.eq))

1740 setattr(cls, "__ne__", cls._create_comparison_method(operator.ne))

1741 setattr(cls, "__lt__", cls._create_comparison_method(operator.lt))

1742 setattr(cls, "__gt__", cls._create_comparison_method(operator.gt))

1743 setattr(cls, "__le__", cls._create_comparison_method(operator.le))

1744 setattr(cls, "__ge__", cls._create_comparison_method(operator.ge))

1745

1746 @classmethod

1747 def _create_logical_method(cls, op):

1748 raise AbstractMethodError(cls)

1749

1750 @classmethod

1751 def _add_logical_ops(cls) -> None:

1752 setattr(cls, "__and__", cls._create_logical_method(operator.and_))

1753 setattr(cls, "__rand__", cls._create_logical_method(roperator.rand_))

1754 setattr(cls, "__or__", cls._create_logical_method(operator.or_))

1755 setattr(cls, "__ror__", cls._create_logical_method(roperator.ror_))

1756 setattr(cls, "__xor__", cls._create_logical_method(operator.xor))

1757 setattr(cls, "__rxor__", cls._create_logical_method(roperator.rxor))

1758

1759

1760class ExtensionScalarOpsMixin(ExtensionOpsMixin):

1761 """

1762 A mixin for defining ops on an ExtensionArray.

1763

1764 It is assumed that the underlying scalar objects have the operators

1765 already defined.

1766

1767 Notes

1768 -----

1769 If you have defined a subclass MyExtensionArray(ExtensionArray), then

1770 use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to

1771 get the arithmetic operators. After the definition of MyExtensionArray,

1772 insert the lines

1773

1774 MyExtensionArray._add_arithmetic_ops()

1775 MyExtensionArray._add_comparison_ops()

1776

1777 to link the operators to your class.

1778

1779 .. note::

1780

1781 You may want to set ``__array_priority__`` if you want your

1782 implementation to be called when involved in binary operations

1783 with NumPy arrays.

1784 """

1785

1786 @classmethod

1787 def _create_method(cls, op, coerce_to_dtype: bool = True, result_dtype=None):

1788 """

1789 A class method that returns a method that will correspond to an

1790 operator for an ExtensionArray subclass, by dispatching to the

1791 relevant operator defined on the individual elements of the

1792 ExtensionArray.

1793

1794 Parameters

1795 ----------

1796 op : function

1797 An operator that takes arguments op(a, b)

1798 coerce_to_dtype : bool, default True

1799 boolean indicating whether to attempt to convert

1800 the result to the underlying ExtensionArray dtype.

1801 If it's not possible to create a new ExtensionArray with the

1802 values, an ndarray is returned instead.

1803

1804 Returns

1805 -------

1806 Callable[[Any, Any], Union[ndarray, ExtensionArray]]

1807 A method that can be bound to a class. When used, the method

1808 receives the two arguments, one of which is the instance of

1809 this class, and should return an ExtensionArray or an ndarray.

1810

1811 Returning an ndarray may be necessary when the result of the

1812 `op` cannot be stored in the ExtensionArray. The dtype of the

1813 ndarray uses NumPy's normal inference rules.

1814

1815 Examples

1816 --------

1817 Given an ExtensionArray subclass called MyExtensionArray, use

1818

1819 __add__ = cls._create_method(operator.add)

1820

1821 in the class definition of MyExtensionArray to create the operator

1822 for addition, that will be based on the operator implementation

1823 of the underlying elements of the ExtensionArray

1824 """

1825

1826 def _binop(self, other):

1827 def convert_values(param):

1828 if isinstance(param, ExtensionArray) or is_list_like(param):

1829 ovalues = param

1830 else: # Assume its an object

1831 ovalues = [param] * len(self)

1832 return ovalues

1833

1834 if isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)):

1835 # rely on pandas to unbox and dispatch to us

1836 return NotImplemented

1837

1838 lvalues = self

1839 rvalues = convert_values(other)

1840

1841 # If the operator is not defined for the underlying objects,

1842 # a TypeError should be raised

1843 res = [op(a, b) for (a, b) in zip(lvalues, rvalues)]

1844

1845 def _maybe_convert(arr):

1846 if coerce_to_dtype:

1847 # https://github.com/pandas-dev/pandas/issues/22850

1848 # We catch all regular exceptions here, and fall back

1849 # to an ndarray.

1850 res = maybe_cast_to_extension_array(type(self), arr)

1851 if not isinstance(res, type(self)):

1852 # exception raised in _from_sequence; ensure we have ndarray

1853 res = np.asarray(arr)

1854 else:

1855 res = np.asarray(arr, dtype=result_dtype)

1856 return res

1857

1858 if op.__name__ in {"divmod", "rdivmod"}:

1859 a, b = zip(*res)

1860 return _maybe_convert(a), _maybe_convert(b)

1861

1862 return _maybe_convert(res)

1863

1864 op_name = f"__{op.__name__}__"

1865 return set_function_name(_binop, op_name, cls)

1866

1867 @classmethod

1868 def _create_arithmetic_method(cls, op):

1869 return cls._create_method(op)

1870

1871 @classmethod

1872 def _create_comparison_method(cls, op):

1873 return cls._create_method(op, coerce_to_dtype=False, result_dtype=bool)