Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/arrays/base.py: 31%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

474 statements  

1""" 

2An interface for extending pandas with custom arrays. 

3 

4.. warning:: 

5 

6 This is an experimental API and subject to breaking changes 

7 without warning. 

8""" 

9from __future__ import annotations 

10 

11import operator 

12from typing import ( 

13 TYPE_CHECKING, 

14 Any, 

15 Callable, 

16 ClassVar, 

17 Literal, 

18 cast, 

19 overload, 

20) 

21import warnings 

22 

23import numpy as np 

24 

25from pandas._libs import ( 

26 algos as libalgos, 

27 lib, 

28) 

29from pandas.compat import set_function_name 

30from pandas.compat.numpy import function as nv 

31from pandas.errors import AbstractMethodError 

32from pandas.util._decorators import ( 

33 Appender, 

34 Substitution, 

35 cache_readonly, 

36) 

37from pandas.util._exceptions import find_stack_level 

38from pandas.util._validators import ( 

39 validate_bool_kwarg, 

40 validate_fillna_kwargs, 

41 validate_insert_loc, 

42) 

43 

44from pandas.core.dtypes.cast import maybe_cast_pointwise_result 

45from pandas.core.dtypes.common import ( 

46 is_list_like, 

47 is_scalar, 

48 pandas_dtype, 

49) 

50from pandas.core.dtypes.dtypes import ExtensionDtype 

51from pandas.core.dtypes.generic import ( 

52 ABCDataFrame, 

53 ABCIndex, 

54 ABCSeries, 

55) 

56from pandas.core.dtypes.missing import isna 

57 

58from pandas.core import ( 

59 arraylike, 

60 missing, 

61 roperator, 

62) 

63from pandas.core.algorithms import ( 

64 duplicated, 

65 factorize_array, 

66 isin, 

67 map_array, 

68 mode, 

69 rank, 

70 unique, 

71) 

72from pandas.core.array_algos.quantile import quantile_with_mask 

73from pandas.core.missing import _fill_limit_area_1d 

74from pandas.core.sorting import ( 

75 nargminmax, 

76 nargsort, 

77) 

78 

79if TYPE_CHECKING: 

80 from collections.abc import ( 

81 Iterator, 

82 Sequence, 

83 ) 

84 

85 from pandas._typing import ( 

86 ArrayLike, 

87 AstypeArg, 

88 AxisInt, 

89 Dtype, 

90 DtypeObj, 

91 FillnaOptions, 

92 InterpolateOptions, 

93 NumpySorter, 

94 NumpyValueArrayLike, 

95 PositionalIndexer, 

96 ScalarIndexer, 

97 Self, 

98 SequenceIndexer, 

99 Shape, 

100 SortKind, 

101 TakeIndexer, 

102 npt, 

103 ) 

104 

105 from pandas import Index 

106 

107_extension_array_shared_docs: dict[str, str] = {} 

108 

109 

110class ExtensionArray: 

111 """ 

112 Abstract base class for custom 1-D array types. 

113 

114 pandas will recognize instances of this class as proper arrays 

115 with a custom type and will not attempt to coerce them to objects. They 

116 may be stored directly inside a :class:`DataFrame` or :class:`Series`. 

117 

118 Attributes 

119 ---------- 

120 dtype 

121 nbytes 

122 ndim 

123 shape 

124 

125 Methods 

126 ------- 

127 argsort 

128 astype 

129 copy 

130 dropna 

131 duplicated 

132 factorize 

133 fillna 

134 equals 

135 insert 

136 interpolate 

137 isin 

138 isna 

139 ravel 

140 repeat 

141 searchsorted 

142 shift 

143 take 

144 tolist 

145 unique 

146 view 

147 _accumulate 

148 _concat_same_type 

149 _explode 

150 _formatter 

151 _from_factorized 

152 _from_sequence 

153 _from_sequence_of_strings 

154 _hash_pandas_object 

155 _pad_or_backfill 

156 _reduce 

157 _values_for_argsort 

158 _values_for_factorize 

159 

160 Notes 

161 ----- 

162 The interface includes the following abstract methods that must be 

163 implemented by subclasses: 

164 

165 * _from_sequence 

166 * _from_factorized 

167 * __getitem__ 

168 * __len__ 

169 * __eq__ 

170 * dtype 

171 * nbytes 

172 * isna 

173 * take 

174 * copy 

175 * _concat_same_type 

176 * interpolate 

177 

178 A default repr displaying the type, (truncated) data, length, 

179 and dtype is provided. It can be customized or replaced by 

180 by overriding: 

181 

182 * __repr__ : A default repr for the ExtensionArray. 

183 * _formatter : Print scalars inside a Series or DataFrame. 

184 

185 Some methods require casting the ExtensionArray to an ndarray of Python 

186 objects with ``self.astype(object)``, which may be expensive. When 

187 performance is a concern, we highly recommend overriding the following 

188 methods: 

189 

190 * fillna 

191 * _pad_or_backfill 

192 * dropna 

193 * unique 

194 * factorize / _values_for_factorize 

195 * argsort, argmax, argmin / _values_for_argsort 

196 * searchsorted 

197 * map 

198 

199 The remaining methods implemented on this class should be performant, 

200 as they only compose abstract methods. Still, a more efficient 

201 implementation may be available, and these methods can be overridden. 

202 

203 One can implement methods to handle array accumulations or reductions. 

204 

205 * _accumulate 

206 * _reduce 

207 

208 One can implement methods to handle parsing from strings that will be used 

209 in methods such as ``pandas.io.parsers.read_csv``. 

210 

211 * _from_sequence_of_strings 

212 

213 This class does not inherit from 'abc.ABCMeta' for performance reasons. 

214 Methods and properties required by the interface raise 

215 ``pandas.errors.AbstractMethodError`` and no ``register`` method is 

216 provided for registering virtual subclasses. 

217 

218 ExtensionArrays are limited to 1 dimension. 

219 

220 They may be backed by none, one, or many NumPy arrays. For example, 

221 ``pandas.Categorical`` is an extension array backed by two arrays, 

222 one for codes and one for categories. An array of IPv6 address may 

223 be backed by a NumPy structured array with two fields, one for the 

224 lower 64 bits and one for the upper 64 bits. Or they may be backed 

225 by some other storage type, like Python lists. Pandas makes no 

226 assumptions on how the data are stored, just that it can be converted 

227 to a NumPy array. 

228 The ExtensionArray interface does not impose any rules on how this data 

229 is stored. However, currently, the backing data cannot be stored in 

230 attributes called ``.values`` or ``._values`` to ensure full compatibility 

231 with pandas internals. But other names as ``.data``, ``._data``, 

232 ``._items``, ... can be freely used. 

233 

234 If implementing NumPy's ``__array_ufunc__`` interface, pandas expects 

235 that 

236 

237 1. You defer by returning ``NotImplemented`` when any Series are present 

238 in `inputs`. Pandas will extract the arrays and call the ufunc again. 

239 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class. 

240 Pandas inspect this to determine whether the ufunc is valid for the 

241 types present. 

242 

243 See :ref:`extending.extension.ufunc` for more. 

244 

245 By default, ExtensionArrays are not hashable. Immutable subclasses may 

246 override this behavior. 

247 

248 Examples 

249 -------- 

250 Please see the following: 

251 

252 https://github.com/pandas-dev/pandas/blob/main/pandas/tests/extension/list/array.py 

253 """ 

254 

255 # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. 

256 # Don't override this. 

257 _typ = "extension" 

258 

259 # similar to __array_priority__, positions ExtensionArray after Index, 

260 # Series, and DataFrame. EA subclasses may override to choose which EA 

261 # subclass takes priority. If overriding, the value should always be 

262 # strictly less than 2000 to be below Index.__pandas_priority__. 

263 __pandas_priority__ = 1000 

264 

265 # ------------------------------------------------------------------------ 

266 # Constructors 

267 # ------------------------------------------------------------------------ 

268 

269 @classmethod 

270 def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False): 

271 """ 

272 Construct a new ExtensionArray from a sequence of scalars. 

273 

274 Parameters 

275 ---------- 

276 scalars : Sequence 

277 Each element will be an instance of the scalar type for this 

278 array, ``cls.dtype.type`` or be converted into this type in this method. 

279 dtype : dtype, optional 

280 Construct for this particular dtype. This should be a Dtype 

281 compatible with the ExtensionArray. 

282 copy : bool, default False 

283 If True, copy the underlying data. 

284 

285 Returns 

286 ------- 

287 ExtensionArray 

288 

289 Examples 

290 -------- 

291 >>> pd.arrays.IntegerArray._from_sequence([4, 5]) 

292 <IntegerArray> 

293 [4, 5] 

294 Length: 2, dtype: Int64 

295 """ 

296 raise AbstractMethodError(cls) 

297 

298 @classmethod 

299 def _from_scalars(cls, scalars, *, dtype: DtypeObj) -> Self: 

300 """ 

301 Strict analogue to _from_sequence, allowing only sequences of scalars 

302 that should be specifically inferred to the given dtype. 

303 

304 Parameters 

305 ---------- 

306 scalars : sequence 

307 dtype : ExtensionDtype 

308 

309 Raises 

310 ------ 

311 TypeError or ValueError 

312 

313 Notes 

314 ----- 

315 This is called in a try/except block when casting the result of a 

316 pointwise operation. 

317 """ 

318 try: 

319 return cls._from_sequence(scalars, dtype=dtype, copy=False) 

320 except (ValueError, TypeError): 

321 raise 

322 except Exception: 

323 warnings.warn( 

324 "_from_scalars should only raise ValueError or TypeError. " 

325 "Consider overriding _from_scalars where appropriate.", 

326 stacklevel=find_stack_level(), 

327 ) 

328 raise 

329 

330 @classmethod 

331 def _from_sequence_of_strings( 

332 cls, strings, *, dtype: Dtype | None = None, copy: bool = False 

333 ): 

334 """ 

335 Construct a new ExtensionArray from a sequence of strings. 

336 

337 Parameters 

338 ---------- 

339 strings : Sequence 

340 Each element will be an instance of the scalar type for this 

341 array, ``cls.dtype.type``. 

342 dtype : dtype, optional 

343 Construct for this particular dtype. This should be a Dtype 

344 compatible with the ExtensionArray. 

345 copy : bool, default False 

346 If True, copy the underlying data. 

347 

348 Returns 

349 ------- 

350 ExtensionArray 

351 

352 Examples 

353 -------- 

354 >>> pd.arrays.IntegerArray._from_sequence_of_strings(["1", "2", "3"]) 

355 <IntegerArray> 

356 [1, 2, 3] 

357 Length: 3, dtype: Int64 

358 """ 

359 raise AbstractMethodError(cls) 

360 

361 @classmethod 

362 def _from_factorized(cls, values, original): 

363 """ 

364 Reconstruct an ExtensionArray after factorization. 

365 

366 Parameters 

367 ---------- 

368 values : ndarray 

369 An integer ndarray with the factorized values. 

370 original : ExtensionArray 

371 The original ExtensionArray that factorize was called on. 

372 

373 See Also 

374 -------- 

375 factorize : Top-level factorize method that dispatches here. 

376 ExtensionArray.factorize : Encode the extension array as an enumerated type. 

377 

378 Examples 

379 -------- 

380 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), 

381 ... pd.Interval(1, 5), pd.Interval(1, 5)]) 

382 >>> codes, uniques = pd.factorize(interv_arr) 

383 >>> pd.arrays.IntervalArray._from_factorized(uniques, interv_arr) 

384 <IntervalArray> 

385 [(0, 1], (1, 5]] 

386 Length: 2, dtype: interval[int64, right] 

387 """ 

388 raise AbstractMethodError(cls) 

389 

390 # ------------------------------------------------------------------------ 

391 # Must be a Sequence 

392 # ------------------------------------------------------------------------ 

393 @overload 

394 def __getitem__(self, item: ScalarIndexer) -> Any: 

395 ... 

396 

397 @overload 

398 def __getitem__(self, item: SequenceIndexer) -> Self: 

399 ... 

400 

401 def __getitem__(self, item: PositionalIndexer) -> Self | Any: 

402 """ 

403 Select a subset of self. 

404 

405 Parameters 

406 ---------- 

407 item : int, slice, or ndarray 

408 * int: The position in 'self' to get. 

409 

410 * slice: A slice object, where 'start', 'stop', and 'step' are 

411 integers or None 

412 

413 * ndarray: A 1-d boolean NumPy ndarray the same length as 'self' 

414 

415 * list[int]: A list of int 

416 

417 Returns 

418 ------- 

419 item : scalar or ExtensionArray 

420 

421 Notes 

422 ----- 

423 For scalar ``item``, return a scalar value suitable for the array's 

424 type. This should be an instance of ``self.dtype.type``. 

425 

426 For slice ``key``, return an instance of ``ExtensionArray``, even 

427 if the slice is length 0 or 1. 

428 

429 For a boolean mask, return an instance of ``ExtensionArray``, filtered 

430 to the values where ``item`` is True. 

431 """ 

432 raise AbstractMethodError(self) 

433 

434 def __setitem__(self, key, value) -> None: 

435 """ 

436 Set one or more values inplace. 

437 

438 This method is not required to satisfy the pandas extension array 

439 interface. 

440 

441 Parameters 

442 ---------- 

443 key : int, ndarray, or slice 

444 When called from, e.g. ``Series.__setitem__``, ``key`` will be 

445 one of 

446 

447 * scalar int 

448 * ndarray of integers. 

449 * boolean ndarray 

450 * slice object 

451 

452 value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object 

453 value or values to be set of ``key``. 

454 

455 Returns 

456 ------- 

457 None 

458 """ 

459 # Some notes to the ExtensionArray implementer who may have ended up 

460 # here. While this method is not required for the interface, if you 

461 # *do* choose to implement __setitem__, then some semantics should be 

462 # observed: 

463 # 

464 # * Setting multiple values : ExtensionArrays should support setting 

465 # multiple values at once, 'key' will be a sequence of integers and 

466 # 'value' will be a same-length sequence. 

467 # 

468 # * Broadcasting : For a sequence 'key' and a scalar 'value', 

469 # each position in 'key' should be set to 'value'. 

470 # 

471 # * Coercion : Most users will expect basic coercion to work. For 

472 # example, a string like '2018-01-01' is coerced to a datetime 

473 # when setting on a datetime64ns array. In general, if the 

474 # __init__ method coerces that value, then so should __setitem__ 

475 # Note, also, that Series/DataFrame.where internally use __setitem__ 

476 # on a copy of the data. 

477 raise NotImplementedError(f"{type(self)} does not implement __setitem__.") 

478 

479 def __len__(self) -> int: 

480 """ 

481 Length of this array 

482 

483 Returns 

484 ------- 

485 length : int 

486 """ 

487 raise AbstractMethodError(self) 

488 

489 def __iter__(self) -> Iterator[Any]: 

490 """ 

491 Iterate over elements of the array. 

492 """ 

493 # This needs to be implemented so that pandas recognizes extension 

494 # arrays as list-like. The default implementation makes successive 

495 # calls to ``__getitem__``, which may be slower than necessary. 

496 for i in range(len(self)): 

497 yield self[i] 

498 

499 def __contains__(self, item: object) -> bool | np.bool_: 

500 """ 

501 Return for `item in self`. 

502 """ 

503 # GH37867 

504 # comparisons of any item to pd.NA always return pd.NA, so e.g. "a" in [pd.NA] 

505 # would raise a TypeError. The implementation below works around that. 

506 if is_scalar(item) and isna(item): 

507 if not self._can_hold_na: 

508 return False 

509 elif item is self.dtype.na_value or isinstance(item, self.dtype.type): 

510 return self._hasna 

511 else: 

512 return False 

513 else: 

514 # error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no 

515 # attribute "any" 

516 return (item == self).any() # type: ignore[union-attr] 

517 

518 # error: Signature of "__eq__" incompatible with supertype "object" 

519 def __eq__(self, other: object) -> ArrayLike: # type: ignore[override] 

520 """ 

521 Return for `self == other` (element-wise equality). 

522 """ 

523 # Implementer note: this should return a boolean numpy ndarray or 

524 # a boolean ExtensionArray. 

525 # When `other` is one of Series, Index, or DataFrame, this method should 

526 # return NotImplemented (to ensure that those objects are responsible for 

527 # first unpacking the arrays, and then dispatch the operation to the 

528 # underlying arrays) 

529 raise AbstractMethodError(self) 

530 

531 # error: Signature of "__ne__" incompatible with supertype "object" 

532 def __ne__(self, other: object) -> ArrayLike: # type: ignore[override] 

533 """ 

534 Return for `self != other` (element-wise in-equality). 

535 """ 

536 # error: Unsupported operand type for ~ ("ExtensionArray") 

537 return ~(self == other) # type: ignore[operator] 

538 

539 def to_numpy( 

540 self, 

541 dtype: npt.DTypeLike | None = None, 

542 copy: bool = False, 

543 na_value: object = lib.no_default, 

544 ) -> np.ndarray: 

545 """ 

546 Convert to a NumPy ndarray. 

547 

548 This is similar to :meth:`numpy.asarray`, but may provide additional control 

549 over how the conversion is done. 

550 

551 Parameters 

552 ---------- 

553 dtype : str or numpy.dtype, optional 

554 The dtype to pass to :meth:`numpy.asarray`. 

555 copy : bool, default False 

556 Whether to ensure that the returned value is a not a view on 

557 another array. Note that ``copy=False`` does not *ensure* that 

558 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that 

559 a copy is made, even if not strictly necessary. 

560 na_value : Any, optional 

561 The value to use for missing values. The default value depends 

562 on `dtype` and the type of the array. 

563 

564 Returns 

565 ------- 

566 numpy.ndarray 

567 """ 

568 result = np.asarray(self, dtype=dtype) 

569 if copy or na_value is not lib.no_default: 

570 result = result.copy() 

571 if na_value is not lib.no_default: 

572 result[self.isna()] = na_value 

573 return result 

574 

575 # ------------------------------------------------------------------------ 

576 # Required attributes 

577 # ------------------------------------------------------------------------ 

578 

579 @property 

580 def dtype(self) -> ExtensionDtype: 

581 """ 

582 An instance of ExtensionDtype. 

583 

584 Examples 

585 -------- 

586 >>> pd.array([1, 2, 3]).dtype 

587 Int64Dtype() 

588 """ 

589 raise AbstractMethodError(self) 

590 

591 @property 

592 def shape(self) -> Shape: 

593 """ 

594 Return a tuple of the array dimensions. 

595 

596 Examples 

597 -------- 

598 >>> arr = pd.array([1, 2, 3]) 

599 >>> arr.shape 

600 (3,) 

601 """ 

602 return (len(self),) 

603 

604 @property 

605 def size(self) -> int: 

606 """ 

607 The number of elements in the array. 

608 """ 

609 # error: Incompatible return value type (got "signedinteger[_64Bit]", 

610 # expected "int") [return-value] 

611 return np.prod(self.shape) # type: ignore[return-value] 

612 

613 @property 

614 def ndim(self) -> int: 

615 """ 

616 Extension Arrays are only allowed to be 1-dimensional. 

617 

618 Examples 

619 -------- 

620 >>> arr = pd.array([1, 2, 3]) 

621 >>> arr.ndim 

622 1 

623 """ 

624 return 1 

625 

626 @property 

627 def nbytes(self) -> int: 

628 """ 

629 The number of bytes needed to store this object in memory. 

630 

631 Examples 

632 -------- 

633 >>> pd.array([1, 2, 3]).nbytes 

634 27 

635 """ 

636 # If this is expensive to compute, return an approximate lower bound 

637 # on the number of bytes needed. 

638 raise AbstractMethodError(self) 

639 

640 # ------------------------------------------------------------------------ 

641 # Additional Methods 

642 # ------------------------------------------------------------------------ 

643 

644 @overload 

645 def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: 

646 ... 

647 

648 @overload 

649 def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: 

650 ... 

651 

652 @overload 

653 def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: 

654 ... 

655 

656 def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: 

657 """ 

658 Cast to a NumPy array or ExtensionArray with 'dtype'. 

659 

660 Parameters 

661 ---------- 

662 dtype : str or dtype 

663 Typecode or data-type to which the array is cast. 

664 copy : bool, default True 

665 Whether to copy the data, even if not necessary. If False, 

666 a copy is made only if the old dtype does not match the 

667 new dtype. 

668 

669 Returns 

670 ------- 

671 np.ndarray or pandas.api.extensions.ExtensionArray 

672 An ``ExtensionArray`` if ``dtype`` is ``ExtensionDtype``, 

673 otherwise a Numpy ndarray with ``dtype`` for its dtype. 

674 

675 Examples 

676 -------- 

677 >>> arr = pd.array([1, 2, 3]) 

678 >>> arr 

679 <IntegerArray> 

680 [1, 2, 3] 

681 Length: 3, dtype: Int64 

682 

683 Casting to another ``ExtensionDtype`` returns an ``ExtensionArray``: 

684 

685 >>> arr1 = arr.astype('Float64') 

686 >>> arr1 

687 <FloatingArray> 

688 [1.0, 2.0, 3.0] 

689 Length: 3, dtype: Float64 

690 >>> arr1.dtype 

691 Float64Dtype() 

692 

693 Otherwise, we will get a Numpy ndarray: 

694 

695 >>> arr2 = arr.astype('float64') 

696 >>> arr2 

697 array([1., 2., 3.]) 

698 >>> arr2.dtype 

699 dtype('float64') 

700 """ 

701 dtype = pandas_dtype(dtype) 

702 if dtype == self.dtype: 

703 if not copy: 

704 return self 

705 else: 

706 return self.copy() 

707 

708 if isinstance(dtype, ExtensionDtype): 

709 cls = dtype.construct_array_type() 

710 return cls._from_sequence(self, dtype=dtype, copy=copy) 

711 

712 elif lib.is_np_dtype(dtype, "M"): 

713 from pandas.core.arrays import DatetimeArray 

714 

715 return DatetimeArray._from_sequence(self, dtype=dtype, copy=copy) 

716 

717 elif lib.is_np_dtype(dtype, "m"): 

718 from pandas.core.arrays import TimedeltaArray 

719 

720 return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy) 

721 

722 if not copy: 

723 return np.asarray(self, dtype=dtype) 

724 else: 

725 return np.array(self, dtype=dtype, copy=copy) 

726 

727 def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll: 

728 """ 

729 A 1-D array indicating if each value is missing. 

730 

731 Returns 

732 ------- 

733 numpy.ndarray or pandas.api.extensions.ExtensionArray 

734 In most cases, this should return a NumPy ndarray. For 

735 exceptional cases like ``SparseArray``, where returning 

736 an ndarray would be expensive, an ExtensionArray may be 

737 returned. 

738 

739 Notes 

740 ----- 

741 If returning an ExtensionArray, then 

742 

743 * ``na_values._is_boolean`` should be True 

744 * `na_values` should implement :func:`ExtensionArray._reduce` 

745 * ``na_values.any`` and ``na_values.all`` should be implemented 

746 

747 Examples 

748 -------- 

749 >>> arr = pd.array([1, 2, np.nan, np.nan]) 

750 >>> arr.isna() 

751 array([False, False, True, True]) 

752 """ 

753 raise AbstractMethodError(self) 

754 

755 @property 

756 def _hasna(self) -> bool: 

757 # GH#22680 

758 """ 

759 Equivalent to `self.isna().any()`. 

760 

761 Some ExtensionArray subclasses may be able to optimize this check. 

762 """ 

763 return bool(self.isna().any()) 

764 

765 def _values_for_argsort(self) -> np.ndarray: 

766 """ 

767 Return values for sorting. 

768 

769 Returns 

770 ------- 

771 ndarray 

772 The transformed values should maintain the ordering between values 

773 within the array. 

774 

775 See Also 

776 -------- 

777 ExtensionArray.argsort : Return the indices that would sort this array. 

778 

779 Notes 

780 ----- 

781 The caller is responsible for *not* modifying these values in-place, so 

782 it is safe for implementers to give views on ``self``. 

783 

784 Functions that use this (e.g. ``ExtensionArray.argsort``) should ignore 

785 entries with missing values in the original array (according to 

786 ``self.isna()``). This means that the corresponding entries in the returned 

787 array don't need to be modified to sort correctly. 

788 

789 Examples 

790 -------- 

791 In most cases, this is the underlying Numpy array of the ``ExtensionArray``: 

792 

793 >>> arr = pd.array([1, 2, 3]) 

794 >>> arr._values_for_argsort() 

795 array([1, 2, 3]) 

796 """ 

797 # Note: this is used in `ExtensionArray.argsort/argmin/argmax`. 

798 return np.array(self) 

799 

800 def argsort( 

801 self, 

802 *, 

803 ascending: bool = True, 

804 kind: SortKind = "quicksort", 

805 na_position: str = "last", 

806 **kwargs, 

807 ) -> np.ndarray: 

808 """ 

809 Return the indices that would sort this array. 

810 

811 Parameters 

812 ---------- 

813 ascending : bool, default True 

814 Whether the indices should result in an ascending 

815 or descending sort. 

816 kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional 

817 Sorting algorithm. 

818 na_position : {'first', 'last'}, default 'last' 

819 If ``'first'``, put ``NaN`` values at the beginning. 

820 If ``'last'``, put ``NaN`` values at the end. 

821 *args, **kwargs: 

822 Passed through to :func:`numpy.argsort`. 

823 

824 Returns 

825 ------- 

826 np.ndarray[np.intp] 

827 Array of indices that sort ``self``. If NaN values are contained, 

828 NaN values are placed at the end. 

829 

830 See Also 

831 -------- 

832 numpy.argsort : Sorting implementation used internally. 

833 

834 Examples 

835 -------- 

836 >>> arr = pd.array([3, 1, 2, 5, 4]) 

837 >>> arr.argsort() 

838 array([1, 2, 0, 4, 3]) 

839 """ 

840 # Implementer note: You have two places to override the behavior of 

841 # argsort. 

842 # 1. _values_for_argsort : construct the values passed to np.argsort 

843 # 2. argsort : total control over sorting. In case of overriding this, 

844 # it is recommended to also override argmax/argmin 

845 ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs) 

846 

847 values = self._values_for_argsort() 

848 return nargsort( 

849 values, 

850 kind=kind, 

851 ascending=ascending, 

852 na_position=na_position, 

853 mask=np.asarray(self.isna()), 

854 ) 

855 

856 def argmin(self, skipna: bool = True) -> int: 

857 """ 

858 Return the index of minimum value. 

859 

860 In case of multiple occurrences of the minimum value, the index 

861 corresponding to the first occurrence is returned. 

862 

863 Parameters 

864 ---------- 

865 skipna : bool, default True 

866 

867 Returns 

868 ------- 

869 int 

870 

871 See Also 

872 -------- 

873 ExtensionArray.argmax : Return the index of the maximum value. 

874 

875 Examples 

876 -------- 

877 >>> arr = pd.array([3, 1, 2, 5, 4]) 

878 >>> arr.argmin() 

879 1 

880 """ 

881 # Implementer note: You have two places to override the behavior of 

882 # argmin. 

883 # 1. _values_for_argsort : construct the values used in nargminmax 

884 # 2. argmin itself : total control over sorting. 

885 validate_bool_kwarg(skipna, "skipna") 

886 if not skipna and self._hasna: 

887 raise NotImplementedError 

888 return nargminmax(self, "argmin") 

889 

890 def argmax(self, skipna: bool = True) -> int: 

891 """ 

892 Return the index of maximum value. 

893 

894 In case of multiple occurrences of the maximum value, the index 

895 corresponding to the first occurrence is returned. 

896 

897 Parameters 

898 ---------- 

899 skipna : bool, default True 

900 

901 Returns 

902 ------- 

903 int 

904 

905 See Also 

906 -------- 

907 ExtensionArray.argmin : Return the index of the minimum value. 

908 

909 Examples 

910 -------- 

911 >>> arr = pd.array([3, 1, 2, 5, 4]) 

912 >>> arr.argmax() 

913 3 

914 """ 

915 # Implementer note: You have two places to override the behavior of 

916 # argmax. 

917 # 1. _values_for_argsort : construct the values used in nargminmax 

918 # 2. argmax itself : total control over sorting. 

919 validate_bool_kwarg(skipna, "skipna") 

920 if not skipna and self._hasna: 

921 raise NotImplementedError 

922 return nargminmax(self, "argmax") 

923 

924 def interpolate( 

925 self, 

926 *, 

927 method: InterpolateOptions, 

928 axis: int, 

929 index: Index, 

930 limit, 

931 limit_direction, 

932 limit_area, 

933 copy: bool, 

934 **kwargs, 

935 ) -> Self: 

936 """ 

937 See DataFrame.interpolate.__doc__. 

938 

939 Examples 

940 -------- 

941 >>> arr = pd.arrays.NumpyExtensionArray(np.array([0, 1, np.nan, 3])) 

942 >>> arr.interpolate(method="linear", 

943 ... limit=3, 

944 ... limit_direction="forward", 

945 ... index=pd.Index([1, 2, 3, 4]), 

946 ... fill_value=1, 

947 ... copy=False, 

948 ... axis=0, 

949 ... limit_area="inside" 

950 ... ) 

951 <NumpyExtensionArray> 

952 [0.0, 1.0, 2.0, 3.0] 

953 Length: 4, dtype: float64 

954 """ 

955 # NB: we return type(self) even if copy=False 

956 raise NotImplementedError( 

957 f"{type(self).__name__} does not implement interpolate" 

958 ) 

959 

960 def _pad_or_backfill( 

961 self, 

962 *, 

963 method: FillnaOptions, 

964 limit: int | None = None, 

965 limit_area: Literal["inside", "outside"] | None = None, 

966 copy: bool = True, 

967 ) -> Self: 

968 """ 

969 Pad or backfill values, used by Series/DataFrame ffill and bfill. 

970 

971 Parameters 

972 ---------- 

973 method : {'backfill', 'bfill', 'pad', 'ffill'} 

974 Method to use for filling holes in reindexed Series: 

975 

976 * pad / ffill: propagate last valid observation forward to next valid. 

977 * backfill / bfill: use NEXT valid observation to fill gap. 

978 

979 limit : int, default None 

980 This is the maximum number of consecutive 

981 NaN values to forward/backward fill. In other words, if there is 

982 a gap with more than this number of consecutive NaNs, it will only 

983 be partially filled. If method is not specified, this is the 

984 maximum number of entries along the entire axis where NaNs will be 

985 filled. 

986 

987 copy : bool, default True 

988 Whether to make a copy of the data before filling. If False, then 

989 the original should be modified and no new memory should be allocated. 

990 For ExtensionArray subclasses that cannot do this, it is at the 

991 author's discretion whether to ignore "copy=False" or to raise. 

992 The base class implementation ignores the keyword if any NAs are 

993 present. 

994 

995 Returns 

996 ------- 

997 Same type as self 

998 

999 Examples 

1000 -------- 

1001 >>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan]) 

1002 >>> arr._pad_or_backfill(method="backfill", limit=1) 

1003 <IntegerArray> 

1004 [<NA>, 2, 2, 3, <NA>, <NA>] 

1005 Length: 6, dtype: Int64 

1006 """ 

1007 

1008 # If a 3rd-party EA has implemented this functionality in fillna, 

1009 # we warn that they need to implement _pad_or_backfill instead. 

1010 if ( 

1011 type(self).fillna is not ExtensionArray.fillna 

1012 and type(self)._pad_or_backfill is ExtensionArray._pad_or_backfill 

1013 ): 

1014 # Check for _pad_or_backfill here allows us to call 

1015 # super()._pad_or_backfill without getting this warning 

1016 warnings.warn( 

1017 "ExtensionArray.fillna 'method' keyword is deprecated. " 

1018 "In a future version. arr._pad_or_backfill will be called " 

1019 "instead. 3rd-party ExtensionArray authors need to implement " 

1020 "_pad_or_backfill.", 

1021 DeprecationWarning, 

1022 stacklevel=find_stack_level(), 

1023 ) 

1024 if limit_area is not None: 

1025 raise NotImplementedError( 

1026 f"{type(self).__name__} does not implement limit_area " 

1027 "(added in pandas 2.2). 3rd-party ExtnsionArray authors " 

1028 "need to add this argument to _pad_or_backfill." 

1029 ) 

1030 return self.fillna(method=method, limit=limit) 

1031 

1032 mask = self.isna() 

1033 

1034 if mask.any(): 

1035 # NB: the base class does not respect the "copy" keyword 

1036 meth = missing.clean_fill_method(method) 

1037 

1038 npmask = np.asarray(mask) 

1039 if limit_area is not None and not npmask.all(): 

1040 _fill_limit_area_1d(npmask, limit_area) 

1041 if meth == "pad": 

1042 indexer = libalgos.get_fill_indexer(npmask, limit=limit) 

1043 return self.take(indexer, allow_fill=True) 

1044 else: 

1045 # i.e. meth == "backfill" 

1046 indexer = libalgos.get_fill_indexer(npmask[::-1], limit=limit)[::-1] 

1047 return self[::-1].take(indexer, allow_fill=True) 

1048 

1049 else: 

1050 if not copy: 

1051 return self 

1052 new_values = self.copy() 

1053 return new_values 

1054 

1055 def fillna( 

1056 self, 

1057 value: object | ArrayLike | None = None, 

1058 method: FillnaOptions | None = None, 

1059 limit: int | None = None, 

1060 copy: bool = True, 

1061 ) -> Self: 

1062 """ 

1063 Fill NA/NaN values using the specified method. 

1064 

1065 Parameters 

1066 ---------- 

1067 value : scalar, array-like 

1068 If a scalar value is passed it is used to fill all missing values. 

1069 Alternatively, an array-like "value" can be given. It's expected 

1070 that the array-like have the same length as 'self'. 

1071 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None 

1072 Method to use for filling holes in reindexed Series: 

1073 

1074 * pad / ffill: propagate last valid observation forward to next valid. 

1075 * backfill / bfill: use NEXT valid observation to fill gap. 

1076 

1077 .. deprecated:: 2.1.0 

1078 

1079 limit : int, default None 

1080 If method is specified, this is the maximum number of consecutive 

1081 NaN values to forward/backward fill. In other words, if there is 

1082 a gap with more than this number of consecutive NaNs, it will only 

1083 be partially filled. If method is not specified, this is the 

1084 maximum number of entries along the entire axis where NaNs will be 

1085 filled. 

1086 

1087 .. deprecated:: 2.1.0 

1088 

1089 copy : bool, default True 

1090 Whether to make a copy of the data before filling. If False, then 

1091 the original should be modified and no new memory should be allocated. 

1092 For ExtensionArray subclasses that cannot do this, it is at the 

1093 author's discretion whether to ignore "copy=False" or to raise. 

1094 The base class implementation ignores the keyword in pad/backfill 

1095 cases. 

1096 

1097 Returns 

1098 ------- 

1099 ExtensionArray 

1100 With NA/NaN filled. 

1101 

1102 Examples 

1103 -------- 

1104 >>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan]) 

1105 >>> arr.fillna(0) 

1106 <IntegerArray> 

1107 [0, 0, 2, 3, 0, 0] 

1108 Length: 6, dtype: Int64 

1109 """ 

1110 if method is not None: 

1111 warnings.warn( 

1112 f"The 'method' keyword in {type(self).__name__}.fillna is " 

1113 "deprecated and will be removed in a future version.", 

1114 FutureWarning, 

1115 stacklevel=find_stack_level(), 

1116 ) 

1117 

1118 value, method = validate_fillna_kwargs(value, method) 

1119 

1120 mask = self.isna() 

1121 # error: Argument 2 to "check_value_size" has incompatible type 

1122 # "ExtensionArray"; expected "ndarray" 

1123 value = missing.check_value_size( 

1124 value, mask, len(self) # type: ignore[arg-type] 

1125 ) 

1126 

1127 if mask.any(): 

1128 if method is not None: 

1129 meth = missing.clean_fill_method(method) 

1130 

1131 npmask = np.asarray(mask) 

1132 if meth == "pad": 

1133 indexer = libalgos.get_fill_indexer(npmask, limit=limit) 

1134 return self.take(indexer, allow_fill=True) 

1135 else: 

1136 # i.e. meth == "backfill" 

1137 indexer = libalgos.get_fill_indexer(npmask[::-1], limit=limit)[::-1] 

1138 return self[::-1].take(indexer, allow_fill=True) 

1139 else: 

1140 # fill with value 

1141 if not copy: 

1142 new_values = self[:] 

1143 else: 

1144 new_values = self.copy() 

1145 new_values[mask] = value 

1146 else: 

1147 if not copy: 

1148 new_values = self[:] 

1149 else: 

1150 new_values = self.copy() 

1151 return new_values 

1152 

1153 def dropna(self) -> Self: 

1154 """ 

1155 Return ExtensionArray without NA values. 

1156 

1157 Returns 

1158 ------- 

1159 

1160 Examples 

1161 -------- 

1162 >>> pd.array([1, 2, np.nan]).dropna() 

1163 <IntegerArray> 

1164 [1, 2] 

1165 Length: 2, dtype: Int64 

1166 """ 

1167 # error: Unsupported operand type for ~ ("ExtensionArray") 

1168 return self[~self.isna()] # type: ignore[operator] 

1169 

1170 def duplicated( 

1171 self, keep: Literal["first", "last", False] = "first" 

1172 ) -> npt.NDArray[np.bool_]: 

1173 """ 

1174 Return boolean ndarray denoting duplicate values. 

1175 

1176 Parameters 

1177 ---------- 

1178 keep : {'first', 'last', False}, default 'first' 

1179 - ``first`` : Mark duplicates as ``True`` except for the first occurrence. 

1180 - ``last`` : Mark duplicates as ``True`` except for the last occurrence. 

1181 - False : Mark all duplicates as ``True``. 

1182 

1183 Returns 

1184 ------- 

1185 ndarray[bool] 

1186 

1187 Examples 

1188 -------- 

1189 >>> pd.array([1, 1, 2, 3, 3], dtype="Int64").duplicated() 

1190 array([False, True, False, False, True]) 

1191 """ 

1192 mask = self.isna().astype(np.bool_, copy=False) 

1193 return duplicated(values=self, keep=keep, mask=mask) 

1194 

1195 def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray: 

1196 """ 

1197 Shift values by desired number. 

1198 

1199 Newly introduced missing values are filled with 

1200 ``self.dtype.na_value``. 

1201 

1202 Parameters 

1203 ---------- 

1204 periods : int, default 1 

1205 The number of periods to shift. Negative values are allowed 

1206 for shifting backwards. 

1207 

1208 fill_value : object, optional 

1209 The scalar value to use for newly introduced missing values. 

1210 The default is ``self.dtype.na_value``. 

1211 

1212 Returns 

1213 ------- 

1214 ExtensionArray 

1215 Shifted. 

1216 

1217 Notes 

1218 ----- 

1219 If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is 

1220 returned. 

1221 

1222 If ``periods > len(self)``, then an array of size 

1223 len(self) is returned, with all values filled with 

1224 ``self.dtype.na_value``. 

1225 

1226 For 2-dimensional ExtensionArrays, we are always shifting along axis=0. 

1227 

1228 Examples 

1229 -------- 

1230 >>> arr = pd.array([1, 2, 3]) 

1231 >>> arr.shift(2) 

1232 <IntegerArray> 

1233 [<NA>, <NA>, 1] 

1234 Length: 3, dtype: Int64 

1235 """ 

1236 # Note: this implementation assumes that `self.dtype.na_value` can be 

1237 # stored in an instance of your ExtensionArray with `self.dtype`. 

1238 if not len(self) or periods == 0: 

1239 return self.copy() 

1240 

1241 if isna(fill_value): 

1242 fill_value = self.dtype.na_value 

1243 

1244 empty = self._from_sequence( 

1245 [fill_value] * min(abs(periods), len(self)), dtype=self.dtype 

1246 ) 

1247 if periods > 0: 

1248 a = empty 

1249 b = self[:-periods] 

1250 else: 

1251 a = self[abs(periods) :] 

1252 b = empty 

1253 return self._concat_same_type([a, b]) 

1254 

1255 def unique(self) -> Self: 

1256 """ 

1257 Compute the ExtensionArray of unique values. 

1258 

1259 Returns 

1260 ------- 

1261 pandas.api.extensions.ExtensionArray 

1262 

1263 Examples 

1264 -------- 

1265 >>> arr = pd.array([1, 2, 3, 1, 2, 3]) 

1266 >>> arr.unique() 

1267 <IntegerArray> 

1268 [1, 2, 3] 

1269 Length: 3, dtype: Int64 

1270 """ 

1271 uniques = unique(self.astype(object)) 

1272 return self._from_sequence(uniques, dtype=self.dtype) 

1273 

1274 def searchsorted( 

1275 self, 

1276 value: NumpyValueArrayLike | ExtensionArray, 

1277 side: Literal["left", "right"] = "left", 

1278 sorter: NumpySorter | None = None, 

1279 ) -> npt.NDArray[np.intp] | np.intp: 

1280 """ 

1281 Find indices where elements should be inserted to maintain order. 

1282 

1283 Find the indices into a sorted array `self` (a) such that, if the 

1284 corresponding elements in `value` were inserted before the indices, 

1285 the order of `self` would be preserved. 

1286 

1287 Assuming that `self` is sorted: 

1288 

1289 ====== ================================ 

1290 `side` returned index `i` satisfies 

1291 ====== ================================ 

1292 left ``self[i-1] < value <= self[i]`` 

1293 right ``self[i-1] <= value < self[i]`` 

1294 ====== ================================ 

1295 

1296 Parameters 

1297 ---------- 

1298 value : array-like, list or scalar 

1299 Value(s) to insert into `self`. 

1300 side : {'left', 'right'}, optional 

1301 If 'left', the index of the first suitable location found is given. 

1302 If 'right', return the last such index. If there is no suitable 

1303 index, return either 0 or N (where N is the length of `self`). 

1304 sorter : 1-D array-like, optional 

1305 Optional array of integer indices that sort array a into ascending 

1306 order. They are typically the result of argsort. 

1307 

1308 Returns 

1309 ------- 

1310 array of ints or int 

1311 If value is array-like, array of insertion points. 

1312 If value is scalar, a single integer. 

1313 

1314 See Also 

1315 -------- 

1316 numpy.searchsorted : Similar method from NumPy. 

1317 

1318 Examples 

1319 -------- 

1320 >>> arr = pd.array([1, 2, 3, 5]) 

1321 >>> arr.searchsorted([4]) 

1322 array([3]) 

1323 """ 

1324 # Note: the base tests provided by pandas only test the basics. 

1325 # We do not test 

1326 # 1. Values outside the range of the `data_for_sorting` fixture 

1327 # 2. Values between the values in the `data_for_sorting` fixture 

1328 # 3. Missing values. 

1329 arr = self.astype(object) 

1330 if isinstance(value, ExtensionArray): 

1331 value = value.astype(object) 

1332 return arr.searchsorted(value, side=side, sorter=sorter) 

1333 

1334 def equals(self, other: object) -> bool: 

1335 """ 

1336 Return if another array is equivalent to this array. 

1337 

1338 Equivalent means that both arrays have the same shape and dtype, and 

1339 all values compare equal. Missing values in the same location are 

1340 considered equal (in contrast with normal equality). 

1341 

1342 Parameters 

1343 ---------- 

1344 other : ExtensionArray 

1345 Array to compare to this Array. 

1346 

1347 Returns 

1348 ------- 

1349 boolean 

1350 Whether the arrays are equivalent. 

1351 

1352 Examples 

1353 -------- 

1354 >>> arr1 = pd.array([1, 2, np.nan]) 

1355 >>> arr2 = pd.array([1, 2, np.nan]) 

1356 >>> arr1.equals(arr2) 

1357 True 

1358 """ 

1359 if type(self) != type(other): 

1360 return False 

1361 other = cast(ExtensionArray, other) 

1362 if self.dtype != other.dtype: 

1363 return False 

1364 elif len(self) != len(other): 

1365 return False 

1366 else: 

1367 equal_values = self == other 

1368 if isinstance(equal_values, ExtensionArray): 

1369 # boolean array with NA -> fill with False 

1370 equal_values = equal_values.fillna(False) 

1371 # error: Unsupported left operand type for & ("ExtensionArray") 

1372 equal_na = self.isna() & other.isna() # type: ignore[operator] 

1373 return bool((equal_values | equal_na).all()) 

1374 

1375 def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]: 

1376 """ 

1377 Pointwise comparison for set containment in the given values. 

1378 

1379 Roughly equivalent to `np.array([x in values for x in self])` 

1380 

1381 Parameters 

1382 ---------- 

1383 values : np.ndarray or ExtensionArray 

1384 

1385 Returns 

1386 ------- 

1387 np.ndarray[bool] 

1388 

1389 Examples 

1390 -------- 

1391 >>> arr = pd.array([1, 2, 3]) 

1392 >>> arr.isin([1]) 

1393 <BooleanArray> 

1394 [True, False, False] 

1395 Length: 3, dtype: boolean 

1396 """ 

1397 return isin(np.asarray(self), values) 

1398 

1399 def _values_for_factorize(self) -> tuple[np.ndarray, Any]: 

1400 """ 

1401 Return an array and missing value suitable for factorization. 

1402 

1403 Returns 

1404 ------- 

1405 values : ndarray 

1406 An array suitable for factorization. This should maintain order 

1407 and be a supported dtype (Float64, Int64, UInt64, String, Object). 

1408 By default, the extension array is cast to object dtype. 

1409 na_value : object 

1410 The value in `values` to consider missing. This will be treated 

1411 as NA in the factorization routines, so it will be coded as 

1412 `-1` and not included in `uniques`. By default, 

1413 ``np.nan`` is used. 

1414 

1415 Notes 

1416 ----- 

1417 The values returned by this method are also used in 

1418 :func:`pandas.util.hash_pandas_object`. If needed, this can be 

1419 overridden in the ``self._hash_pandas_object()`` method. 

1420 

1421 Examples 

1422 -------- 

1423 >>> pd.array([1, 2, 3])._values_for_factorize() 

1424 (array([1, 2, 3], dtype=object), nan) 

1425 """ 

1426 return self.astype(object), np.nan 

1427 

1428 def factorize( 

1429 self, 

1430 use_na_sentinel: bool = True, 

1431 ) -> tuple[np.ndarray, ExtensionArray]: 

1432 """ 

1433 Encode the extension array as an enumerated type. 

1434 

1435 Parameters 

1436 ---------- 

1437 use_na_sentinel : bool, default True 

1438 If True, the sentinel -1 will be used for NaN values. If False, 

1439 NaN values will be encoded as non-negative integers and will not drop the 

1440 NaN from the uniques of the values. 

1441 

1442 .. versionadded:: 1.5.0 

1443 

1444 Returns 

1445 ------- 

1446 codes : ndarray 

1447 An integer NumPy array that's an indexer into the original 

1448 ExtensionArray. 

1449 uniques : ExtensionArray 

1450 An ExtensionArray containing the unique values of `self`. 

1451 

1452 .. note:: 

1453 

1454 uniques will *not* contain an entry for the NA value of 

1455 the ExtensionArray if there are any missing values present 

1456 in `self`. 

1457 

1458 See Also 

1459 -------- 

1460 factorize : Top-level factorize method that dispatches here. 

1461 

1462 Notes 

1463 ----- 

1464 :meth:`pandas.factorize` offers a `sort` keyword as well. 

1465 

1466 Examples 

1467 -------- 

1468 >>> idx1 = pd.PeriodIndex(["2014-01", "2014-01", "2014-02", "2014-02", 

1469 ... "2014-03", "2014-03"], freq="M") 

1470 >>> arr, idx = idx1.factorize() 

1471 >>> arr 

1472 array([0, 0, 1, 1, 2, 2]) 

1473 >>> idx 

1474 PeriodIndex(['2014-01', '2014-02', '2014-03'], dtype='period[M]') 

1475 """ 

1476 # Implementer note: There are two ways to override the behavior of 

1477 # pandas.factorize 

1478 # 1. _values_for_factorize and _from_factorize. 

1479 # Specify the values passed to pandas' internal factorization 

1480 # routines, and how to convert from those values back to the 

1481 # original ExtensionArray. 

1482 # 2. ExtensionArray.factorize. 

1483 # Complete control over factorization. 

1484 arr, na_value = self._values_for_factorize() 

1485 

1486 codes, uniques = factorize_array( 

1487 arr, use_na_sentinel=use_na_sentinel, na_value=na_value 

1488 ) 

1489 

1490 uniques_ea = self._from_factorized(uniques, self) 

1491 return codes, uniques_ea 

1492 

1493 _extension_array_shared_docs[ 

1494 "repeat" 

1495 ] = """ 

1496 Repeat elements of a %(klass)s. 

1497 

1498 Returns a new %(klass)s where each element of the current %(klass)s 

1499 is repeated consecutively a given number of times. 

1500 

1501 Parameters 

1502 ---------- 

1503 repeats : int or array of ints 

1504 The number of repetitions for each element. This should be a 

1505 non-negative integer. Repeating 0 times will return an empty 

1506 %(klass)s. 

1507 axis : None 

1508 Must be ``None``. Has no effect but is accepted for compatibility 

1509 with numpy. 

1510 

1511 Returns 

1512 ------- 

1513 %(klass)s 

1514 Newly created %(klass)s with repeated elements. 

1515 

1516 See Also 

1517 -------- 

1518 Series.repeat : Equivalent function for Series. 

1519 Index.repeat : Equivalent function for Index. 

1520 numpy.repeat : Similar method for :class:`numpy.ndarray`. 

1521 ExtensionArray.take : Take arbitrary positions. 

1522 

1523 Examples 

1524 -------- 

1525 >>> cat = pd.Categorical(['a', 'b', 'c']) 

1526 >>> cat 

1527 ['a', 'b', 'c'] 

1528 Categories (3, object): ['a', 'b', 'c'] 

1529 >>> cat.repeat(2) 

1530 ['a', 'a', 'b', 'b', 'c', 'c'] 

1531 Categories (3, object): ['a', 'b', 'c'] 

1532 >>> cat.repeat([1, 2, 3]) 

1533 ['a', 'b', 'b', 'c', 'c', 'c'] 

1534 Categories (3, object): ['a', 'b', 'c'] 

1535 """ 

1536 

1537 @Substitution(klass="ExtensionArray") 

1538 @Appender(_extension_array_shared_docs["repeat"]) 

1539 def repeat(self, repeats: int | Sequence[int], axis: AxisInt | None = None) -> Self: 

1540 nv.validate_repeat((), {"axis": axis}) 

1541 ind = np.arange(len(self)).repeat(repeats) 

1542 return self.take(ind) 

1543 

1544 # ------------------------------------------------------------------------ 

1545 # Indexing methods 

1546 # ------------------------------------------------------------------------ 

1547 

1548 def take( 

1549 self, 

1550 indices: TakeIndexer, 

1551 *, 

1552 allow_fill: bool = False, 

1553 fill_value: Any = None, 

1554 ) -> Self: 

1555 """ 

1556 Take elements from an array. 

1557 

1558 Parameters 

1559 ---------- 

1560 indices : sequence of int or one-dimensional np.ndarray of int 

1561 Indices to be taken. 

1562 allow_fill : bool, default False 

1563 How to handle negative values in `indices`. 

1564 

1565 * False: negative values in `indices` indicate positional indices 

1566 from the right (the default). This is similar to 

1567 :func:`numpy.take`. 

1568 

1569 * True: negative values in `indices` indicate 

1570 missing values. These values are set to `fill_value`. Any other 

1571 other negative values raise a ``ValueError``. 

1572 

1573 fill_value : any, optional 

1574 Fill value to use for NA-indices when `allow_fill` is True. 

1575 This may be ``None``, in which case the default NA value for 

1576 the type, ``self.dtype.na_value``, is used. 

1577 

1578 For many ExtensionArrays, there will be two representations of 

1579 `fill_value`: a user-facing "boxed" scalar, and a low-level 

1580 physical NA value. `fill_value` should be the user-facing version, 

1581 and the implementation should handle translating that to the 

1582 physical version for processing the take if necessary. 

1583 

1584 Returns 

1585 ------- 

1586 ExtensionArray 

1587 

1588 Raises 

1589 ------ 

1590 IndexError 

1591 When the indices are out of bounds for the array. 

1592 ValueError 

1593 When `indices` contains negative values other than ``-1`` 

1594 and `allow_fill` is True. 

1595 

1596 See Also 

1597 -------- 

1598 numpy.take : Take elements from an array along an axis. 

1599 api.extensions.take : Take elements from an array. 

1600 

1601 Notes 

1602 ----- 

1603 ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``, 

1604 ``iloc``, when `indices` is a sequence of values. Additionally, 

1605 it's called by :meth:`Series.reindex`, or any other method 

1606 that causes realignment, with a `fill_value`. 

1607 

1608 Examples 

1609 -------- 

1610 Here's an example implementation, which relies on casting the 

1611 extension array to object dtype. This uses the helper method 

1612 :func:`pandas.api.extensions.take`. 

1613 

1614 .. code-block:: python 

1615 

1616 def take(self, indices, allow_fill=False, fill_value=None): 

1617 from pandas.core.algorithms import take 

1618 

1619 # If the ExtensionArray is backed by an ndarray, then 

1620 # just pass that here instead of coercing to object. 

1621 data = self.astype(object) 

1622 

1623 if allow_fill and fill_value is None: 

1624 fill_value = self.dtype.na_value 

1625 

1626 # fill value should always be translated from the scalar 

1627 # type for the array, to the physical storage type for 

1628 # the data, before passing to take. 

1629 

1630 result = take(data, indices, fill_value=fill_value, 

1631 allow_fill=allow_fill) 

1632 return self._from_sequence(result, dtype=self.dtype) 

1633 """ 

1634 # Implementer note: The `fill_value` parameter should be a user-facing 

1635 # value, an instance of self.dtype.type. When passed `fill_value=None`, 

1636 # the default of `self.dtype.na_value` should be used. 

1637 # This may differ from the physical storage type your ExtensionArray 

1638 # uses. In this case, your implementation is responsible for casting 

1639 # the user-facing type to the storage type, before using 

1640 # pandas.api.extensions.take 

1641 raise AbstractMethodError(self) 

1642 

1643 def copy(self) -> Self: 

1644 """ 

1645 Return a copy of the array. 

1646 

1647 Returns 

1648 ------- 

1649 ExtensionArray 

1650 

1651 Examples 

1652 -------- 

1653 >>> arr = pd.array([1, 2, 3]) 

1654 >>> arr2 = arr.copy() 

1655 >>> arr[0] = 2 

1656 >>> arr2 

1657 <IntegerArray> 

1658 [1, 2, 3] 

1659 Length: 3, dtype: Int64 

1660 """ 

1661 raise AbstractMethodError(self) 

1662 

1663 def view(self, dtype: Dtype | None = None) -> ArrayLike: 

1664 """ 

1665 Return a view on the array. 

1666 

1667 Parameters 

1668 ---------- 

1669 dtype : str, np.dtype, or ExtensionDtype, optional 

1670 Default None. 

1671 

1672 Returns 

1673 ------- 

1674 ExtensionArray or np.ndarray 

1675 A view on the :class:`ExtensionArray`'s data. 

1676 

1677 Examples 

1678 -------- 

1679 This gives view on the underlying data of an ``ExtensionArray`` and is not a 

1680 copy. Modifications on either the view or the original ``ExtensionArray`` 

1681 will be reflectd on the underlying data: 

1682 

1683 >>> arr = pd.array([1, 2, 3]) 

1684 >>> arr2 = arr.view() 

1685 >>> arr[0] = 2 

1686 >>> arr2 

1687 <IntegerArray> 

1688 [2, 2, 3] 

1689 Length: 3, dtype: Int64 

1690 """ 

1691 # NB: 

1692 # - This must return a *new* object referencing the same data, not self. 

1693 # - The only case that *must* be implemented is with dtype=None, 

1694 # giving a view with the same dtype as self. 

1695 if dtype is not None: 

1696 raise NotImplementedError(dtype) 

1697 return self[:] 

1698 

1699 # ------------------------------------------------------------------------ 

1700 # Printing 

1701 # ------------------------------------------------------------------------ 

1702 

1703 def __repr__(self) -> str: 

1704 if self.ndim > 1: 

1705 return self._repr_2d() 

1706 

1707 from pandas.io.formats.printing import format_object_summary 

1708 

1709 # the short repr has no trailing newline, while the truncated 

1710 # repr does. So we include a newline in our template, and strip 

1711 # any trailing newlines from format_object_summary 

1712 data = format_object_summary( 

1713 self, self._formatter(), indent_for_name=False 

1714 ).rstrip(", \n") 

1715 class_name = f"<{type(self).__name__}>\n" 

1716 footer = self._get_repr_footer() 

1717 return f"{class_name}{data}\n{footer}" 

1718 

1719 def _get_repr_footer(self) -> str: 

1720 # GH#24278 

1721 if self.ndim > 1: 

1722 return f"Shape: {self.shape}, dtype: {self.dtype}" 

1723 return f"Length: {len(self)}, dtype: {self.dtype}" 

1724 

1725 def _repr_2d(self) -> str: 

1726 from pandas.io.formats.printing import format_object_summary 

1727 

1728 # the short repr has no trailing newline, while the truncated 

1729 # repr does. So we include a newline in our template, and strip 

1730 # any trailing newlines from format_object_summary 

1731 lines = [ 

1732 format_object_summary(x, self._formatter(), indent_for_name=False).rstrip( 

1733 ", \n" 

1734 ) 

1735 for x in self 

1736 ] 

1737 data = ",\n".join(lines) 

1738 class_name = f"<{type(self).__name__}>" 

1739 footer = self._get_repr_footer() 

1740 return f"{class_name}\n[\n{data}\n]\n{footer}" 

1741 

1742 def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]: 

1743 """ 

1744 Formatting function for scalar values. 

1745 

1746 This is used in the default '__repr__'. The returned formatting 

1747 function receives instances of your scalar type. 

1748 

1749 Parameters 

1750 ---------- 

1751 boxed : bool, default False 

1752 An indicated for whether or not your array is being printed 

1753 within a Series, DataFrame, or Index (True), or just by 

1754 itself (False). This may be useful if you want scalar values 

1755 to appear differently within a Series versus on its own (e.g. 

1756 quoted or not). 

1757 

1758 Returns 

1759 ------- 

1760 Callable[[Any], str] 

1761 A callable that gets instances of the scalar type and 

1762 returns a string. By default, :func:`repr` is used 

1763 when ``boxed=False`` and :func:`str` is used when 

1764 ``boxed=True``. 

1765 

1766 Examples 

1767 -------- 

1768 >>> class MyExtensionArray(pd.arrays.NumpyExtensionArray): 

1769 ... def _formatter(self, boxed=False): 

1770 ... return lambda x: '*' + str(x) + '*' if boxed else repr(x) + '*' 

1771 >>> MyExtensionArray(np.array([1, 2, 3, 4])) 

1772 <MyExtensionArray> 

1773 [1*, 2*, 3*, 4*] 

1774 Length: 4, dtype: int64 

1775 """ 

1776 if boxed: 

1777 return str 

1778 return repr 

1779 

1780 # ------------------------------------------------------------------------ 

1781 # Reshaping 

1782 # ------------------------------------------------------------------------ 

1783 

1784 def transpose(self, *axes: int) -> ExtensionArray: 

1785 """ 

1786 Return a transposed view on this array. 

1787 

1788 Because ExtensionArrays are always 1D, this is a no-op. It is included 

1789 for compatibility with np.ndarray. 

1790 

1791 Returns 

1792 ------- 

1793 ExtensionArray 

1794 

1795 Examples 

1796 -------- 

1797 >>> pd.array([1, 2, 3]).transpose() 

1798 <IntegerArray> 

1799 [1, 2, 3] 

1800 Length: 3, dtype: Int64 

1801 """ 

1802 return self[:] 

1803 

1804 @property 

1805 def T(self) -> ExtensionArray: 

1806 return self.transpose() 

1807 

1808 def ravel(self, order: Literal["C", "F", "A", "K"] | None = "C") -> ExtensionArray: 

1809 """ 

1810 Return a flattened view on this array. 

1811 

1812 Parameters 

1813 ---------- 

1814 order : {None, 'C', 'F', 'A', 'K'}, default 'C' 

1815 

1816 Returns 

1817 ------- 

1818 ExtensionArray 

1819 

1820 Notes 

1821 ----- 

1822 - Because ExtensionArrays are 1D-only, this is a no-op. 

1823 - The "order" argument is ignored, is for compatibility with NumPy. 

1824 

1825 Examples 

1826 -------- 

1827 >>> pd.array([1, 2, 3]).ravel() 

1828 <IntegerArray> 

1829 [1, 2, 3] 

1830 Length: 3, dtype: Int64 

1831 """ 

1832 return self 

1833 

1834 @classmethod 

1835 def _concat_same_type(cls, to_concat: Sequence[Self]) -> Self: 

1836 """ 

1837 Concatenate multiple array of this dtype. 

1838 

1839 Parameters 

1840 ---------- 

1841 to_concat : sequence of this type 

1842 

1843 Returns 

1844 ------- 

1845 ExtensionArray 

1846 

1847 Examples 

1848 -------- 

1849 >>> arr1 = pd.array([1, 2, 3]) 

1850 >>> arr2 = pd.array([4, 5, 6]) 

1851 >>> pd.arrays.IntegerArray._concat_same_type([arr1, arr2]) 

1852 <IntegerArray> 

1853 [1, 2, 3, 4, 5, 6] 

1854 Length: 6, dtype: Int64 

1855 """ 

1856 # Implementer note: this method will only be called with a sequence of 

1857 # ExtensionArrays of this class and with the same dtype as self. This 

1858 # should allow "easy" concatenation (no upcasting needed), and result 

1859 # in a new ExtensionArray of the same dtype. 

1860 # Note: this strict behaviour is only guaranteed starting with pandas 1.1 

1861 raise AbstractMethodError(cls) 

1862 

1863 # The _can_hold_na attribute is set to True so that pandas internals 

1864 # will use the ExtensionDtype.na_value as the NA value in operations 

1865 # such as take(), reindex(), shift(), etc. In addition, those results 

1866 # will then be of the ExtensionArray subclass rather than an array 

1867 # of objects 

1868 @cache_readonly 

1869 def _can_hold_na(self) -> bool: 

1870 return self.dtype._can_hold_na 

1871 

1872 def _accumulate( 

1873 self, name: str, *, skipna: bool = True, **kwargs 

1874 ) -> ExtensionArray: 

1875 """ 

1876 Return an ExtensionArray performing an accumulation operation. 

1877 

1878 The underlying data type might change. 

1879 

1880 Parameters 

1881 ---------- 

1882 name : str 

1883 Name of the function, supported values are: 

1884 - cummin 

1885 - cummax 

1886 - cumsum 

1887 - cumprod 

1888 skipna : bool, default True 

1889 If True, skip NA values. 

1890 **kwargs 

1891 Additional keyword arguments passed to the accumulation function. 

1892 Currently, there is no supported kwarg. 

1893 

1894 Returns 

1895 ------- 

1896 array 

1897 

1898 Raises 

1899 ------ 

1900 NotImplementedError : subclass does not define accumulations 

1901 

1902 Examples 

1903 -------- 

1904 >>> arr = pd.array([1, 2, 3]) 

1905 >>> arr._accumulate(name='cumsum') 

1906 <IntegerArray> 

1907 [1, 3, 6] 

1908 Length: 3, dtype: Int64 

1909 """ 

1910 raise NotImplementedError(f"cannot perform {name} with type {self.dtype}") 

1911 

1912 def _reduce( 

1913 self, name: str, *, skipna: bool = True, keepdims: bool = False, **kwargs 

1914 ): 

1915 """ 

1916 Return a scalar result of performing the reduction operation. 

1917 

1918 Parameters 

1919 ---------- 

1920 name : str 

1921 Name of the function, supported values are: 

1922 { any, all, min, max, sum, mean, median, prod, 

1923 std, var, sem, kurt, skew }. 

1924 skipna : bool, default True 

1925 If True, skip NaN values. 

1926 keepdims : bool, default False 

1927 If False, a scalar is returned. 

1928 If True, the result has dimension with size one along the reduced axis. 

1929 

1930 .. versionadded:: 2.1 

1931 

1932 This parameter is not required in the _reduce signature to keep backward 

1933 compatibility, but will become required in the future. If the parameter 

1934 is not found in the method signature, a FutureWarning will be emitted. 

1935 **kwargs 

1936 Additional keyword arguments passed to the reduction function. 

1937 Currently, `ddof` is the only supported kwarg. 

1938 

1939 Returns 

1940 ------- 

1941 scalar 

1942 

1943 Raises 

1944 ------ 

1945 TypeError : subclass does not define reductions 

1946 

1947 Examples 

1948 -------- 

1949 >>> pd.array([1, 2, 3])._reduce("min") 

1950 1 

1951 """ 

1952 meth = getattr(self, name, None) 

1953 if meth is None: 

1954 raise TypeError( 

1955 f"'{type(self).__name__}' with dtype {self.dtype} " 

1956 f"does not support reduction '{name}'" 

1957 ) 

1958 result = meth(skipna=skipna, **kwargs) 

1959 if keepdims: 

1960 result = np.array([result]) 

1961 

1962 return result 

1963 

1964 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 

1965 # Incompatible types in assignment (expression has type "None", base class 

1966 # "object" defined the type as "Callable[[object], int]") 

1967 __hash__: ClassVar[None] # type: ignore[assignment] 

1968 

1969 # ------------------------------------------------------------------------ 

1970 # Non-Optimized Default Methods; in the case of the private methods here, 

1971 # these are not guaranteed to be stable across pandas versions. 

1972 

1973 def _values_for_json(self) -> np.ndarray: 

1974 """ 

1975 Specify how to render our entries in to_json. 

1976 

1977 Notes 

1978 ----- 

1979 The dtype on the returned ndarray is not restricted, but for non-native 

1980 types that are not specifically handled in objToJSON.c, to_json is 

1981 liable to raise. In these cases, it may be safer to return an ndarray 

1982 of strings. 

1983 """ 

1984 return np.asarray(self) 

1985 

1986 def _hash_pandas_object( 

1987 self, *, encoding: str, hash_key: str, categorize: bool 

1988 ) -> npt.NDArray[np.uint64]: 

1989 """ 

1990 Hook for hash_pandas_object. 

1991 

1992 Default is to use the values returned by _values_for_factorize. 

1993 

1994 Parameters 

1995 ---------- 

1996 encoding : str 

1997 Encoding for data & key when strings. 

1998 hash_key : str 

1999 Hash_key for string key to encode. 

2000 categorize : bool 

2001 Whether to first categorize object arrays before hashing. This is more 

2002 efficient when the array contains duplicate values. 

2003 

2004 Returns 

2005 ------- 

2006 np.ndarray[uint64] 

2007 

2008 Examples 

2009 -------- 

2010 >>> pd.array([1, 2])._hash_pandas_object(encoding='utf-8', 

2011 ... hash_key="1000000000000000", 

2012 ... categorize=False 

2013 ... ) 

2014 array([ 6238072747940578789, 15839785061582574730], dtype=uint64) 

2015 """ 

2016 from pandas.core.util.hashing import hash_array 

2017 

2018 values, _ = self._values_for_factorize() 

2019 return hash_array( 

2020 values, encoding=encoding, hash_key=hash_key, categorize=categorize 

2021 ) 

2022 

2023 def _explode(self) -> tuple[Self, npt.NDArray[np.uint64]]: 

2024 """ 

2025 Transform each element of list-like to a row. 

2026 

2027 For arrays that do not contain list-like elements the default 

2028 implementation of this method just returns a copy and an array 

2029 of ones (unchanged index). 

2030 

2031 Returns 

2032 ------- 

2033 ExtensionArray 

2034 Array with the exploded values. 

2035 np.ndarray[uint64] 

2036 The original lengths of each list-like for determining the 

2037 resulting index. 

2038 

2039 See Also 

2040 -------- 

2041 Series.explode : The method on the ``Series`` object that this 

2042 extension array method is meant to support. 

2043 

2044 Examples 

2045 -------- 

2046 >>> import pyarrow as pa 

2047 >>> a = pd.array([[1, 2, 3], [4], [5, 6]], 

2048 ... dtype=pd.ArrowDtype(pa.list_(pa.int64()))) 

2049 >>> a._explode() 

2050 (<ArrowExtensionArray> 

2051 [1, 2, 3, 4, 5, 6] 

2052 Length: 6, dtype: int64[pyarrow], array([3, 1, 2], dtype=int32)) 

2053 """ 

2054 values = self.copy() 

2055 counts = np.ones(shape=(len(self),), dtype=np.uint64) 

2056 return values, counts 

2057 

2058 def tolist(self) -> list: 

2059 """ 

2060 Return a list of the values. 

2061 

2062 These are each a scalar type, which is a Python scalar 

2063 (for str, int, float) or a pandas scalar 

2064 (for Timestamp/Timedelta/Interval/Period) 

2065 

2066 Returns 

2067 ------- 

2068 list 

2069 

2070 Examples 

2071 -------- 

2072 >>> arr = pd.array([1, 2, 3]) 

2073 >>> arr.tolist() 

2074 [1, 2, 3] 

2075 """ 

2076 if self.ndim > 1: 

2077 return [x.tolist() for x in self] 

2078 return list(self) 

2079 

2080 def delete(self, loc: PositionalIndexer) -> Self: 

2081 indexer = np.delete(np.arange(len(self)), loc) 

2082 return self.take(indexer) 

2083 

2084 def insert(self, loc: int, item) -> Self: 

2085 """ 

2086 Insert an item at the given position. 

2087 

2088 Parameters 

2089 ---------- 

2090 loc : int 

2091 item : scalar-like 

2092 

2093 Returns 

2094 ------- 

2095 same type as self 

2096 

2097 Notes 

2098 ----- 

2099 This method should be both type and dtype-preserving. If the item 

2100 cannot be held in an array of this type/dtype, either ValueError or 

2101 TypeError should be raised. 

2102 

2103 The default implementation relies on _from_sequence to raise on invalid 

2104 items. 

2105 

2106 Examples 

2107 -------- 

2108 >>> arr = pd.array([1, 2, 3]) 

2109 >>> arr.insert(2, -1) 

2110 <IntegerArray> 

2111 [1, 2, -1, 3] 

2112 Length: 4, dtype: Int64 

2113 """ 

2114 loc = validate_insert_loc(loc, len(self)) 

2115 

2116 item_arr = type(self)._from_sequence([item], dtype=self.dtype) 

2117 

2118 return type(self)._concat_same_type([self[:loc], item_arr, self[loc:]]) 

2119 

2120 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None: 

2121 """ 

2122 Analogue to np.putmask(self, mask, value) 

2123 

2124 Parameters 

2125 ---------- 

2126 mask : np.ndarray[bool] 

2127 value : scalar or listlike 

2128 If listlike, must be arraylike with same length as self. 

2129 

2130 Returns 

2131 ------- 

2132 None 

2133 

2134 Notes 

2135 ----- 

2136 Unlike np.putmask, we do not repeat listlike values with mismatched length. 

2137 'value' should either be a scalar or an arraylike with the same length 

2138 as self. 

2139 """ 

2140 if is_list_like(value): 

2141 val = value[mask] 

2142 else: 

2143 val = value 

2144 

2145 self[mask] = val 

2146 

2147 def _where(self, mask: npt.NDArray[np.bool_], value) -> Self: 

2148 """ 

2149 Analogue to np.where(mask, self, value) 

2150 

2151 Parameters 

2152 ---------- 

2153 mask : np.ndarray[bool] 

2154 value : scalar or listlike 

2155 

2156 Returns 

2157 ------- 

2158 same type as self 

2159 """ 

2160 result = self.copy() 

2161 

2162 if is_list_like(value): 

2163 val = value[~mask] 

2164 else: 

2165 val = value 

2166 

2167 result[~mask] = val 

2168 return result 

2169 

2170 # TODO(3.0): this can be removed once GH#33302 deprecation is enforced 

2171 def _fill_mask_inplace( 

2172 self, method: str, limit: int | None, mask: npt.NDArray[np.bool_] 

2173 ) -> None: 

2174 """ 

2175 Replace values in locations specified by 'mask' using pad or backfill. 

2176 

2177 See also 

2178 -------- 

2179 ExtensionArray.fillna 

2180 """ 

2181 func = missing.get_fill_func(method) 

2182 npvalues = self.astype(object) 

2183 # NB: if we don't copy mask here, it may be altered inplace, which 

2184 # would mess up the `self[mask] = ...` below. 

2185 func(npvalues, limit=limit, mask=mask.copy()) 

2186 new_values = self._from_sequence(npvalues, dtype=self.dtype) 

2187 self[mask] = new_values[mask] 

2188 

2189 def _rank( 

2190 self, 

2191 *, 

2192 axis: AxisInt = 0, 

2193 method: str = "average", 

2194 na_option: str = "keep", 

2195 ascending: bool = True, 

2196 pct: bool = False, 

2197 ): 

2198 """ 

2199 See Series.rank.__doc__. 

2200 """ 

2201 if axis != 0: 

2202 raise NotImplementedError 

2203 

2204 return rank( 

2205 self._values_for_argsort(), 

2206 axis=axis, 

2207 method=method, 

2208 na_option=na_option, 

2209 ascending=ascending, 

2210 pct=pct, 

2211 ) 

2212 

2213 @classmethod 

2214 def _empty(cls, shape: Shape, dtype: ExtensionDtype): 

2215 """ 

2216 Create an ExtensionArray with the given shape and dtype. 

2217 

2218 See also 

2219 -------- 

2220 ExtensionDtype.empty 

2221 ExtensionDtype.empty is the 'official' public version of this API. 

2222 """ 

2223 # Implementer note: while ExtensionDtype.empty is the public way to 

2224 # call this method, it is still required to implement this `_empty` 

2225 # method as well (it is called internally in pandas) 

2226 obj = cls._from_sequence([], dtype=dtype) 

2227 

2228 taker = np.broadcast_to(np.intp(-1), shape) 

2229 result = obj.take(taker, allow_fill=True) 

2230 if not isinstance(result, cls) or dtype != result.dtype: 

2231 raise NotImplementedError( 

2232 f"Default 'empty' implementation is invalid for dtype='{dtype}'" 

2233 ) 

2234 return result 

2235 

2236 def _quantile(self, qs: npt.NDArray[np.float64], interpolation: str) -> Self: 

2237 """ 

2238 Compute the quantiles of self for each quantile in `qs`. 

2239 

2240 Parameters 

2241 ---------- 

2242 qs : np.ndarray[float64] 

2243 interpolation: str 

2244 

2245 Returns 

2246 ------- 

2247 same type as self 

2248 """ 

2249 mask = np.asarray(self.isna()) 

2250 arr = np.asarray(self) 

2251 fill_value = np.nan 

2252 

2253 res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation) 

2254 return type(self)._from_sequence(res_values) 

2255 

2256 def _mode(self, dropna: bool = True) -> Self: 

2257 """ 

2258 Returns the mode(s) of the ExtensionArray. 

2259 

2260 Always returns `ExtensionArray` even if only one value. 

2261 

2262 Parameters 

2263 ---------- 

2264 dropna : bool, default True 

2265 Don't consider counts of NA values. 

2266 

2267 Returns 

2268 ------- 

2269 same type as self 

2270 Sorted, if possible. 

2271 """ 

2272 # error: Incompatible return value type (got "Union[ExtensionArray, 

2273 # ndarray[Any, Any]]", expected "Self") 

2274 return mode(self, dropna=dropna) # type: ignore[return-value] 

2275 

2276 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): 

2277 if any( 

2278 isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs 

2279 ): 

2280 return NotImplemented 

2281 

2282 result = arraylike.maybe_dispatch_ufunc_to_dunder_op( 

2283 self, ufunc, method, *inputs, **kwargs 

2284 ) 

2285 if result is not NotImplemented: 

2286 return result 

2287 

2288 if "out" in kwargs: 

2289 return arraylike.dispatch_ufunc_with_out( 

2290 self, ufunc, method, *inputs, **kwargs 

2291 ) 

2292 

2293 if method == "reduce": 

2294 result = arraylike.dispatch_reduction_ufunc( 

2295 self, ufunc, method, *inputs, **kwargs 

2296 ) 

2297 if result is not NotImplemented: 

2298 return result 

2299 

2300 return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs) 

2301 

2302 def map(self, mapper, na_action=None): 

2303 """ 

2304 Map values using an input mapping or function. 

2305 

2306 Parameters 

2307 ---------- 

2308 mapper : function, dict, or Series 

2309 Mapping correspondence. 

2310 na_action : {None, 'ignore'}, default None 

2311 If 'ignore', propagate NA values, without passing them to the 

2312 mapping correspondence. If 'ignore' is not supported, a 

2313 ``NotImplementedError`` should be raised. 

2314 

2315 Returns 

2316 ------- 

2317 Union[ndarray, Index, ExtensionArray] 

2318 The output of the mapping function applied to the array. 

2319 If the function returns a tuple with more than one element 

2320 a MultiIndex will be returned. 

2321 """ 

2322 return map_array(self, mapper, na_action=na_action) 

2323 

2324 # ------------------------------------------------------------------------ 

2325 # GroupBy Methods 

2326 

2327 def _groupby_op( 

2328 self, 

2329 *, 

2330 how: str, 

2331 has_dropped_na: bool, 

2332 min_count: int, 

2333 ngroups: int, 

2334 ids: npt.NDArray[np.intp], 

2335 **kwargs, 

2336 ) -> ArrayLike: 

2337 """ 

2338 Dispatch GroupBy reduction or transformation operation. 

2339 

2340 This is an *experimental* API to allow ExtensionArray authors to implement 

2341 reductions and transformations. The API is subject to change. 

2342 

2343 Parameters 

2344 ---------- 

2345 how : {'any', 'all', 'sum', 'prod', 'min', 'max', 'mean', 'median', 

2346 'median', 'var', 'std', 'sem', 'nth', 'last', 'ohlc', 

2347 'cumprod', 'cumsum', 'cummin', 'cummax', 'rank'} 

2348 has_dropped_na : bool 

2349 min_count : int 

2350 ngroups : int 

2351 ids : np.ndarray[np.intp] 

2352 ids[i] gives the integer label for the group that self[i] belongs to. 

2353 **kwargs : operation-specific 

2354 'any', 'all' -> ['skipna'] 

2355 'var', 'std', 'sem' -> ['ddof'] 

2356 'cumprod', 'cumsum', 'cummin', 'cummax' -> ['skipna'] 

2357 'rank' -> ['ties_method', 'ascending', 'na_option', 'pct'] 

2358 

2359 Returns 

2360 ------- 

2361 np.ndarray or ExtensionArray 

2362 """ 

2363 from pandas.core.arrays.string_ import StringDtype 

2364 from pandas.core.groupby.ops import WrappedCythonOp 

2365 

2366 kind = WrappedCythonOp.get_kind_from_how(how) 

2367 op = WrappedCythonOp(how=how, kind=kind, has_dropped_na=has_dropped_na) 

2368 

2369 # GH#43682 

2370 if isinstance(self.dtype, StringDtype): 

2371 # StringArray 

2372 if op.how not in ["any", "all"]: 

2373 # Fail early to avoid conversion to object 

2374 op._get_cython_function(op.kind, op.how, np.dtype(object), False) 

2375 npvalues = self.to_numpy(object, na_value=np.nan) 

2376 else: 

2377 raise NotImplementedError( 

2378 f"function is not implemented for this dtype: {self.dtype}" 

2379 ) 

2380 

2381 res_values = op._cython_op_ndim_compat( 

2382 npvalues, 

2383 min_count=min_count, 

2384 ngroups=ngroups, 

2385 comp_ids=ids, 

2386 mask=None, 

2387 **kwargs, 

2388 ) 

2389 

2390 if op.how in op.cast_blocklist: 

2391 # i.e. how in ["rank"], since other cast_blocklist methods don't go 

2392 # through cython_operation 

2393 return res_values 

2394 

2395 if isinstance(self.dtype, StringDtype): 

2396 dtype = self.dtype 

2397 string_array_cls = dtype.construct_array_type() 

2398 return string_array_cls._from_sequence(res_values, dtype=dtype) 

2399 

2400 else: 

2401 raise NotImplementedError 

2402 

2403 

2404class ExtensionArraySupportsAnyAll(ExtensionArray): 

2405 def any(self, *, skipna: bool = True) -> bool: 

2406 raise AbstractMethodError(self) 

2407 

2408 def all(self, *, skipna: bool = True) -> bool: 

2409 raise AbstractMethodError(self) 

2410 

2411 

2412class ExtensionOpsMixin: 

2413 """ 

2414 A base class for linking the operators to their dunder names. 

2415 

2416 .. note:: 

2417 

2418 You may want to set ``__array_priority__`` if you want your 

2419 implementation to be called when involved in binary operations 

2420 with NumPy arrays. 

2421 """ 

2422 

2423 @classmethod 

2424 def _create_arithmetic_method(cls, op): 

2425 raise AbstractMethodError(cls) 

2426 

2427 @classmethod 

2428 def _add_arithmetic_ops(cls) -> None: 

2429 setattr(cls, "__add__", cls._create_arithmetic_method(operator.add)) 

2430 setattr(cls, "__radd__", cls._create_arithmetic_method(roperator.radd)) 

2431 setattr(cls, "__sub__", cls._create_arithmetic_method(operator.sub)) 

2432 setattr(cls, "__rsub__", cls._create_arithmetic_method(roperator.rsub)) 

2433 setattr(cls, "__mul__", cls._create_arithmetic_method(operator.mul)) 

2434 setattr(cls, "__rmul__", cls._create_arithmetic_method(roperator.rmul)) 

2435 setattr(cls, "__pow__", cls._create_arithmetic_method(operator.pow)) 

2436 setattr(cls, "__rpow__", cls._create_arithmetic_method(roperator.rpow)) 

2437 setattr(cls, "__mod__", cls._create_arithmetic_method(operator.mod)) 

2438 setattr(cls, "__rmod__", cls._create_arithmetic_method(roperator.rmod)) 

2439 setattr(cls, "__floordiv__", cls._create_arithmetic_method(operator.floordiv)) 

2440 setattr( 

2441 cls, "__rfloordiv__", cls._create_arithmetic_method(roperator.rfloordiv) 

2442 ) 

2443 setattr(cls, "__truediv__", cls._create_arithmetic_method(operator.truediv)) 

2444 setattr(cls, "__rtruediv__", cls._create_arithmetic_method(roperator.rtruediv)) 

2445 setattr(cls, "__divmod__", cls._create_arithmetic_method(divmod)) 

2446 setattr(cls, "__rdivmod__", cls._create_arithmetic_method(roperator.rdivmod)) 

2447 

2448 @classmethod 

2449 def _create_comparison_method(cls, op): 

2450 raise AbstractMethodError(cls) 

2451 

2452 @classmethod 

2453 def _add_comparison_ops(cls) -> None: 

2454 setattr(cls, "__eq__", cls._create_comparison_method(operator.eq)) 

2455 setattr(cls, "__ne__", cls._create_comparison_method(operator.ne)) 

2456 setattr(cls, "__lt__", cls._create_comparison_method(operator.lt)) 

2457 setattr(cls, "__gt__", cls._create_comparison_method(operator.gt)) 

2458 setattr(cls, "__le__", cls._create_comparison_method(operator.le)) 

2459 setattr(cls, "__ge__", cls._create_comparison_method(operator.ge)) 

2460 

2461 @classmethod 

2462 def _create_logical_method(cls, op): 

2463 raise AbstractMethodError(cls) 

2464 

2465 @classmethod 

2466 def _add_logical_ops(cls) -> None: 

2467 setattr(cls, "__and__", cls._create_logical_method(operator.and_)) 

2468 setattr(cls, "__rand__", cls._create_logical_method(roperator.rand_)) 

2469 setattr(cls, "__or__", cls._create_logical_method(operator.or_)) 

2470 setattr(cls, "__ror__", cls._create_logical_method(roperator.ror_)) 

2471 setattr(cls, "__xor__", cls._create_logical_method(operator.xor)) 

2472 setattr(cls, "__rxor__", cls._create_logical_method(roperator.rxor)) 

2473 

2474 

2475class ExtensionScalarOpsMixin(ExtensionOpsMixin): 

2476 """ 

2477 A mixin for defining ops on an ExtensionArray. 

2478 

2479 It is assumed that the underlying scalar objects have the operators 

2480 already defined. 

2481 

2482 Notes 

2483 ----- 

2484 If you have defined a subclass MyExtensionArray(ExtensionArray), then 

2485 use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to 

2486 get the arithmetic operators. After the definition of MyExtensionArray, 

2487 insert the lines 

2488 

2489 MyExtensionArray._add_arithmetic_ops() 

2490 MyExtensionArray._add_comparison_ops() 

2491 

2492 to link the operators to your class. 

2493 

2494 .. note:: 

2495 

2496 You may want to set ``__array_priority__`` if you want your 

2497 implementation to be called when involved in binary operations 

2498 with NumPy arrays. 

2499 """ 

2500 

2501 @classmethod 

2502 def _create_method(cls, op, coerce_to_dtype: bool = True, result_dtype=None): 

2503 """ 

2504 A class method that returns a method that will correspond to an 

2505 operator for an ExtensionArray subclass, by dispatching to the 

2506 relevant operator defined on the individual elements of the 

2507 ExtensionArray. 

2508 

2509 Parameters 

2510 ---------- 

2511 op : function 

2512 An operator that takes arguments op(a, b) 

2513 coerce_to_dtype : bool, default True 

2514 boolean indicating whether to attempt to convert 

2515 the result to the underlying ExtensionArray dtype. 

2516 If it's not possible to create a new ExtensionArray with the 

2517 values, an ndarray is returned instead. 

2518 

2519 Returns 

2520 ------- 

2521 Callable[[Any, Any], Union[ndarray, ExtensionArray]] 

2522 A method that can be bound to a class. When used, the method 

2523 receives the two arguments, one of which is the instance of 

2524 this class, and should return an ExtensionArray or an ndarray. 

2525 

2526 Returning an ndarray may be necessary when the result of the 

2527 `op` cannot be stored in the ExtensionArray. The dtype of the 

2528 ndarray uses NumPy's normal inference rules. 

2529 

2530 Examples 

2531 -------- 

2532 Given an ExtensionArray subclass called MyExtensionArray, use 

2533 

2534 __add__ = cls._create_method(operator.add) 

2535 

2536 in the class definition of MyExtensionArray to create the operator 

2537 for addition, that will be based on the operator implementation 

2538 of the underlying elements of the ExtensionArray 

2539 """ 

2540 

2541 def _binop(self, other): 

2542 def convert_values(param): 

2543 if isinstance(param, ExtensionArray) or is_list_like(param): 

2544 ovalues = param 

2545 else: # Assume its an object 

2546 ovalues = [param] * len(self) 

2547 return ovalues 

2548 

2549 if isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)): 

2550 # rely on pandas to unbox and dispatch to us 

2551 return NotImplemented 

2552 

2553 lvalues = self 

2554 rvalues = convert_values(other) 

2555 

2556 # If the operator is not defined for the underlying objects, 

2557 # a TypeError should be raised 

2558 res = [op(a, b) for (a, b) in zip(lvalues, rvalues)] 

2559 

2560 def _maybe_convert(arr): 

2561 if coerce_to_dtype: 

2562 # https://github.com/pandas-dev/pandas/issues/22850 

2563 # We catch all regular exceptions here, and fall back 

2564 # to an ndarray. 

2565 res = maybe_cast_pointwise_result(arr, self.dtype, same_dtype=False) 

2566 if not isinstance(res, type(self)): 

2567 # exception raised in _from_sequence; ensure we have ndarray 

2568 res = np.asarray(arr) 

2569 else: 

2570 res = np.asarray(arr, dtype=result_dtype) 

2571 return res 

2572 

2573 if op.__name__ in {"divmod", "rdivmod"}: 

2574 a, b = zip(*res) 

2575 return _maybe_convert(a), _maybe_convert(b) 

2576 

2577 return _maybe_convert(res) 

2578 

2579 op_name = f"__{op.__name__}__" 

2580 return set_function_name(_binop, op_name, cls) 

2581 

2582 @classmethod 

2583 def _create_arithmetic_method(cls, op): 

2584 return cls._create_method(op) 

2585 

2586 @classmethod 

2587 def _create_comparison_method(cls, op): 

2588 return cls._create_method(op, coerce_to_dtype=False, result_dtype=bool)