Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/arrays/base.py: 34%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

385 statements  

1""" 

2An interface for extending pandas with custom arrays. 

3 

4.. warning:: 

5 

6 This is an experimental API and subject to breaking changes 

7 without warning. 

8""" 

9from __future__ import annotations 

10 

11import operator 

12from typing import ( 

13 TYPE_CHECKING, 

14 Any, 

15 Callable, 

16 ClassVar, 

17 Iterator, 

18 Literal, 

19 Sequence, 

20 TypeVar, 

21 cast, 

22 overload, 

23) 

24 

25import numpy as np 

26 

27from pandas._libs import lib 

28from pandas._typing import ( 

29 ArrayLike, 

30 AstypeArg, 

31 AxisInt, 

32 Dtype, 

33 FillnaOptions, 

34 PositionalIndexer, 

35 ScalarIndexer, 

36 SequenceIndexer, 

37 Shape, 

38 SortKind, 

39 TakeIndexer, 

40 npt, 

41) 

42from pandas.compat import set_function_name 

43from pandas.compat.numpy import function as nv 

44from pandas.errors import AbstractMethodError 

45from pandas.util._decorators import ( 

46 Appender, 

47 Substitution, 

48 cache_readonly, 

49) 

50from pandas.util._validators import ( 

51 validate_bool_kwarg, 

52 validate_fillna_kwargs, 

53 validate_insert_loc, 

54) 

55 

56from pandas.core.dtypes.cast import maybe_cast_to_extension_array 

57from pandas.core.dtypes.common import ( 

58 is_datetime64_dtype, 

59 is_dtype_equal, 

60 is_list_like, 

61 is_scalar, 

62 is_timedelta64_dtype, 

63 pandas_dtype, 

64) 

65from pandas.core.dtypes.dtypes import ExtensionDtype 

66from pandas.core.dtypes.generic import ( 

67 ABCDataFrame, 

68 ABCIndex, 

69 ABCSeries, 

70) 

71from pandas.core.dtypes.missing import isna 

72 

73from pandas.core import ( 

74 arraylike, 

75 missing, 

76 roperator, 

77) 

78from pandas.core.algorithms import ( 

79 factorize_array, 

80 isin, 

81 mode, 

82 rank, 

83 unique, 

84) 

85from pandas.core.array_algos.quantile import quantile_with_mask 

86from pandas.core.sorting import ( 

87 nargminmax, 

88 nargsort, 

89) 

90 

91if TYPE_CHECKING: 

92 from pandas._typing import ( 

93 NumpySorter, 

94 NumpyValueArrayLike, 

95 ) 

96 

97_extension_array_shared_docs: dict[str, str] = {} 

98 

99ExtensionArrayT = TypeVar("ExtensionArrayT", bound="ExtensionArray") 

100 

101 

102class ExtensionArray: 

103 """ 

104 Abstract base class for custom 1-D array types. 

105 

106 pandas will recognize instances of this class as proper arrays 

107 with a custom type and will not attempt to coerce them to objects. They 

108 may be stored directly inside a :class:`DataFrame` or :class:`Series`. 

109 

110 Attributes 

111 ---------- 

112 dtype 

113 nbytes 

114 ndim 

115 shape 

116 

117 Methods 

118 ------- 

119 argsort 

120 astype 

121 copy 

122 dropna 

123 factorize 

124 fillna 

125 equals 

126 insert 

127 isin 

128 isna 

129 ravel 

130 repeat 

131 searchsorted 

132 shift 

133 take 

134 tolist 

135 unique 

136 view 

137 _accumulate 

138 _concat_same_type 

139 _formatter 

140 _from_factorized 

141 _from_sequence 

142 _from_sequence_of_strings 

143 _reduce 

144 _values_for_argsort 

145 _values_for_factorize 

146 

147 Notes 

148 ----- 

149 The interface includes the following abstract methods that must be 

150 implemented by subclasses: 

151 

152 * _from_sequence 

153 * _from_factorized 

154 * __getitem__ 

155 * __len__ 

156 * __eq__ 

157 * dtype 

158 * nbytes 

159 * isna 

160 * take 

161 * copy 

162 * _concat_same_type 

163 

164 A default repr displaying the type, (truncated) data, length, 

165 and dtype is provided. It can be customized or replaced by 

166 by overriding: 

167 

168 * __repr__ : A default repr for the ExtensionArray. 

169 * _formatter : Print scalars inside a Series or DataFrame. 

170 

171 Some methods require casting the ExtensionArray to an ndarray of Python 

172 objects with ``self.astype(object)``, which may be expensive. When 

173 performance is a concern, we highly recommend overriding the following 

174 methods: 

175 

176 * fillna 

177 * dropna 

178 * unique 

179 * factorize / _values_for_factorize 

180 * argsort, argmax, argmin / _values_for_argsort 

181 * searchsorted 

182 

183 The remaining methods implemented on this class should be performant, 

184 as they only compose abstract methods. Still, a more efficient 

185 implementation may be available, and these methods can be overridden. 

186 

187 One can implement methods to handle array accumulations or reductions. 

188 

189 * _accumulate 

190 * _reduce 

191 

192 One can implement methods to handle parsing from strings that will be used 

193 in methods such as ``pandas.io.parsers.read_csv``. 

194 

195 * _from_sequence_of_strings 

196 

197 This class does not inherit from 'abc.ABCMeta' for performance reasons. 

198 Methods and properties required by the interface raise 

199 ``pandas.errors.AbstractMethodError`` and no ``register`` method is 

200 provided for registering virtual subclasses. 

201 

202 ExtensionArrays are limited to 1 dimension. 

203 

204 They may be backed by none, one, or many NumPy arrays. For example, 

205 ``pandas.Categorical`` is an extension array backed by two arrays, 

206 one for codes and one for categories. An array of IPv6 address may 

207 be backed by a NumPy structured array with two fields, one for the 

208 lower 64 bits and one for the upper 64 bits. Or they may be backed 

209 by some other storage type, like Python lists. Pandas makes no 

210 assumptions on how the data are stored, just that it can be converted 

211 to a NumPy array. 

212 The ExtensionArray interface does not impose any rules on how this data 

213 is stored. However, currently, the backing data cannot be stored in 

214 attributes called ``.values`` or ``._values`` to ensure full compatibility 

215 with pandas internals. But other names as ``.data``, ``._data``, 

216 ``._items``, ... can be freely used. 

217 

218 If implementing NumPy's ``__array_ufunc__`` interface, pandas expects 

219 that 

220 

221 1. You defer by returning ``NotImplemented`` when any Series are present 

222 in `inputs`. Pandas will extract the arrays and call the ufunc again. 

223 2. You define a ``_HANDLED_TYPES`` tuple as an attribute on the class. 

224 Pandas inspect this to determine whether the ufunc is valid for the 

225 types present. 

226 

227 See :ref:`extending.extension.ufunc` for more. 

228 

229 By default, ExtensionArrays are not hashable. Immutable subclasses may 

230 override this behavior. 

231 """ 

232 

233 # '_typ' is for pandas.core.dtypes.generic.ABCExtensionArray. 

234 # Don't override this. 

235 _typ = "extension" 

236 

237 # ------------------------------------------------------------------------ 

238 # Constructors 

239 # ------------------------------------------------------------------------ 

240 

241 @classmethod 

242 def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = False): 

243 """ 

244 Construct a new ExtensionArray from a sequence of scalars. 

245 

246 Parameters 

247 ---------- 

248 scalars : Sequence 

249 Each element will be an instance of the scalar type for this 

250 array, ``cls.dtype.type`` or be converted into this type in this method. 

251 dtype : dtype, optional 

252 Construct for this particular dtype. This should be a Dtype 

253 compatible with the ExtensionArray. 

254 copy : bool, default False 

255 If True, copy the underlying data. 

256 

257 Returns 

258 ------- 

259 ExtensionArray 

260 """ 

261 raise AbstractMethodError(cls) 

262 

263 @classmethod 

264 def _from_sequence_of_strings( 

265 cls, strings, *, dtype: Dtype | None = None, copy: bool = False 

266 ): 

267 """ 

268 Construct a new ExtensionArray from a sequence of strings. 

269 

270 Parameters 

271 ---------- 

272 strings : Sequence 

273 Each element will be an instance of the scalar type for this 

274 array, ``cls.dtype.type``. 

275 dtype : dtype, optional 

276 Construct for this particular dtype. This should be a Dtype 

277 compatible with the ExtensionArray. 

278 copy : bool, default False 

279 If True, copy the underlying data. 

280 

281 Returns 

282 ------- 

283 ExtensionArray 

284 """ 

285 raise AbstractMethodError(cls) 

286 

287 @classmethod 

288 def _from_factorized(cls, values, original): 

289 """ 

290 Reconstruct an ExtensionArray after factorization. 

291 

292 Parameters 

293 ---------- 

294 values : ndarray 

295 An integer ndarray with the factorized values. 

296 original : ExtensionArray 

297 The original ExtensionArray that factorize was called on. 

298 

299 See Also 

300 -------- 

301 factorize : Top-level factorize method that dispatches here. 

302 ExtensionArray.factorize : Encode the extension array as an enumerated type. 

303 """ 

304 raise AbstractMethodError(cls) 

305 

306 # ------------------------------------------------------------------------ 

307 # Must be a Sequence 

308 # ------------------------------------------------------------------------ 

309 @overload 

310 def __getitem__(self, item: ScalarIndexer) -> Any: 

311 ... 

312 

313 @overload 

314 def __getitem__(self: ExtensionArrayT, item: SequenceIndexer) -> ExtensionArrayT: 

315 ... 

316 

317 def __getitem__( 

318 self: ExtensionArrayT, item: PositionalIndexer 

319 ) -> ExtensionArrayT | Any: 

320 """ 

321 Select a subset of self. 

322 

323 Parameters 

324 ---------- 

325 item : int, slice, or ndarray 

326 * int: The position in 'self' to get. 

327 

328 * slice: A slice object, where 'start', 'stop', and 'step' are 

329 integers or None 

330 

331 * ndarray: A 1-d boolean NumPy ndarray the same length as 'self' 

332 

333 * list[int]: A list of int 

334 

335 Returns 

336 ------- 

337 item : scalar or ExtensionArray 

338 

339 Notes 

340 ----- 

341 For scalar ``item``, return a scalar value suitable for the array's 

342 type. This should be an instance of ``self.dtype.type``. 

343 

344 For slice ``key``, return an instance of ``ExtensionArray``, even 

345 if the slice is length 0 or 1. 

346 

347 For a boolean mask, return an instance of ``ExtensionArray``, filtered 

348 to the values where ``item`` is True. 

349 """ 

350 raise AbstractMethodError(self) 

351 

352 def __setitem__(self, key, value) -> None: 

353 """ 

354 Set one or more values inplace. 

355 

356 This method is not required to satisfy the pandas extension array 

357 interface. 

358 

359 Parameters 

360 ---------- 

361 key : int, ndarray, or slice 

362 When called from, e.g. ``Series.__setitem__``, ``key`` will be 

363 one of 

364 

365 * scalar int 

366 * ndarray of integers. 

367 * boolean ndarray 

368 * slice object 

369 

370 value : ExtensionDtype.type, Sequence[ExtensionDtype.type], or object 

371 value or values to be set of ``key``. 

372 

373 Returns 

374 ------- 

375 None 

376 """ 

377 # Some notes to the ExtensionArray implementor who may have ended up 

378 # here. While this method is not required for the interface, if you 

379 # *do* choose to implement __setitem__, then some semantics should be 

380 # observed: 

381 # 

382 # * Setting multiple values : ExtensionArrays should support setting 

383 # multiple values at once, 'key' will be a sequence of integers and 

384 # 'value' will be a same-length sequence. 

385 # 

386 # * Broadcasting : For a sequence 'key' and a scalar 'value', 

387 # each position in 'key' should be set to 'value'. 

388 # 

389 # * Coercion : Most users will expect basic coercion to work. For 

390 # example, a string like '2018-01-01' is coerced to a datetime 

391 # when setting on a datetime64ns array. In general, if the 

392 # __init__ method coerces that value, then so should __setitem__ 

393 # Note, also, that Series/DataFrame.where internally use __setitem__ 

394 # on a copy of the data. 

395 raise NotImplementedError(f"{type(self)} does not implement __setitem__.") 

396 

397 def __len__(self) -> int: 

398 """ 

399 Length of this array 

400 

401 Returns 

402 ------- 

403 length : int 

404 """ 

405 raise AbstractMethodError(self) 

406 

407 def __iter__(self) -> Iterator[Any]: 

408 """ 

409 Iterate over elements of the array. 

410 """ 

411 # This needs to be implemented so that pandas recognizes extension 

412 # arrays as list-like. The default implementation makes successive 

413 # calls to ``__getitem__``, which may be slower than necessary. 

414 for i in range(len(self)): 

415 yield self[i] 

416 

417 def __contains__(self, item: object) -> bool | np.bool_: 

418 """ 

419 Return for `item in self`. 

420 """ 

421 # GH37867 

422 # comparisons of any item to pd.NA always return pd.NA, so e.g. "a" in [pd.NA] 

423 # would raise a TypeError. The implementation below works around that. 

424 if is_scalar(item) and isna(item): 

425 if not self._can_hold_na: 

426 return False 

427 elif item is self.dtype.na_value or isinstance(item, self.dtype.type): 

428 return self._hasna 

429 else: 

430 return False 

431 else: 

432 # error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no 

433 # attribute "any" 

434 return (item == self).any() # type: ignore[union-attr] 

435 

436 # error: Signature of "__eq__" incompatible with supertype "object" 

437 def __eq__(self, other: Any) -> ArrayLike: # type: ignore[override] 

438 """ 

439 Return for `self == other` (element-wise equality). 

440 """ 

441 # Implementer note: this should return a boolean numpy ndarray or 

442 # a boolean ExtensionArray. 

443 # When `other` is one of Series, Index, or DataFrame, this method should 

444 # return NotImplemented (to ensure that those objects are responsible for 

445 # first unpacking the arrays, and then dispatch the operation to the 

446 # underlying arrays) 

447 raise AbstractMethodError(self) 

448 

449 # error: Signature of "__ne__" incompatible with supertype "object" 

450 def __ne__(self, other: Any) -> ArrayLike: # type: ignore[override] 

451 """ 

452 Return for `self != other` (element-wise in-equality). 

453 """ 

454 return ~(self == other) 

455 

456 def to_numpy( 

457 self, 

458 dtype: npt.DTypeLike | None = None, 

459 copy: bool = False, 

460 na_value: object = lib.no_default, 

461 ) -> np.ndarray: 

462 """ 

463 Convert to a NumPy ndarray. 

464 

465 This is similar to :meth:`numpy.asarray`, but may provide additional control 

466 over how the conversion is done. 

467 

468 Parameters 

469 ---------- 

470 dtype : str or numpy.dtype, optional 

471 The dtype to pass to :meth:`numpy.asarray`. 

472 copy : bool, default False 

473 Whether to ensure that the returned value is a not a view on 

474 another array. Note that ``copy=False`` does not *ensure* that 

475 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that 

476 a copy is made, even if not strictly necessary. 

477 na_value : Any, optional 

478 The value to use for missing values. The default value depends 

479 on `dtype` and the type of the array. 

480 

481 Returns 

482 ------- 

483 numpy.ndarray 

484 """ 

485 result = np.asarray(self, dtype=dtype) 

486 if copy or na_value is not lib.no_default: 

487 result = result.copy() 

488 if na_value is not lib.no_default: 

489 result[self.isna()] = na_value 

490 return result 

491 

492 # ------------------------------------------------------------------------ 

493 # Required attributes 

494 # ------------------------------------------------------------------------ 

495 

496 @property 

497 def dtype(self) -> ExtensionDtype: 

498 """ 

499 An instance of 'ExtensionDtype'. 

500 """ 

501 raise AbstractMethodError(self) 

502 

503 @property 

504 def shape(self) -> Shape: 

505 """ 

506 Return a tuple of the array dimensions. 

507 """ 

508 return (len(self),) 

509 

510 @property 

511 def size(self) -> int: 

512 """ 

513 The number of elements in the array. 

514 """ 

515 # error: Incompatible return value type (got "signedinteger[_64Bit]", 

516 # expected "int") [return-value] 

517 return np.prod(self.shape) # type: ignore[return-value] 

518 

519 @property 

520 def ndim(self) -> int: 

521 """ 

522 Extension Arrays are only allowed to be 1-dimensional. 

523 """ 

524 return 1 

525 

526 @property 

527 def nbytes(self) -> int: 

528 """ 

529 The number of bytes needed to store this object in memory. 

530 """ 

531 # If this is expensive to compute, return an approximate lower bound 

532 # on the number of bytes needed. 

533 raise AbstractMethodError(self) 

534 

535 # ------------------------------------------------------------------------ 

536 # Additional Methods 

537 # ------------------------------------------------------------------------ 

538 

539 @overload 

540 def astype(self, dtype: npt.DTypeLike, copy: bool = ...) -> np.ndarray: 

541 ... 

542 

543 @overload 

544 def astype(self, dtype: ExtensionDtype, copy: bool = ...) -> ExtensionArray: 

545 ... 

546 

547 @overload 

548 def astype(self, dtype: AstypeArg, copy: bool = ...) -> ArrayLike: 

549 ... 

550 

551 def astype(self, dtype: AstypeArg, copy: bool = True) -> ArrayLike: 

552 """ 

553 Cast to a NumPy array or ExtensionArray with 'dtype'. 

554 

555 Parameters 

556 ---------- 

557 dtype : str or dtype 

558 Typecode or data-type to which the array is cast. 

559 copy : bool, default True 

560 Whether to copy the data, even if not necessary. If False, 

561 a copy is made only if the old dtype does not match the 

562 new dtype. 

563 

564 Returns 

565 ------- 

566 np.ndarray or pandas.api.extensions.ExtensionArray 

567 An ExtensionArray if dtype is ExtensionDtype, 

568 Otherwise a NumPy ndarray with 'dtype' for its dtype. 

569 """ 

570 

571 dtype = pandas_dtype(dtype) 

572 if is_dtype_equal(dtype, self.dtype): 

573 if not copy: 

574 return self 

575 else: 

576 return self.copy() 

577 

578 if isinstance(dtype, ExtensionDtype): 

579 cls = dtype.construct_array_type() 

580 return cls._from_sequence(self, dtype=dtype, copy=copy) 

581 

582 elif is_datetime64_dtype(dtype): 

583 from pandas.core.arrays import DatetimeArray 

584 

585 return DatetimeArray._from_sequence(self, dtype=dtype, copy=copy) 

586 

587 elif is_timedelta64_dtype(dtype): 

588 from pandas.core.arrays import TimedeltaArray 

589 

590 return TimedeltaArray._from_sequence(self, dtype=dtype, copy=copy) 

591 

592 return np.array(self, dtype=dtype, copy=copy) 

593 

594 def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll: 

595 """ 

596 A 1-D array indicating if each value is missing. 

597 

598 Returns 

599 ------- 

600 numpy.ndarray or pandas.api.extensions.ExtensionArray 

601 In most cases, this should return a NumPy ndarray. For 

602 exceptional cases like ``SparseArray``, where returning 

603 an ndarray would be expensive, an ExtensionArray may be 

604 returned. 

605 

606 Notes 

607 ----- 

608 If returning an ExtensionArray, then 

609 

610 * ``na_values._is_boolean`` should be True 

611 * `na_values` should implement :func:`ExtensionArray._reduce` 

612 * ``na_values.any`` and ``na_values.all`` should be implemented 

613 """ 

614 raise AbstractMethodError(self) 

615 

616 @property 

617 def _hasna(self) -> bool: 

618 # GH#22680 

619 """ 

620 Equivalent to `self.isna().any()`. 

621 

622 Some ExtensionArray subclasses may be able to optimize this check. 

623 """ 

624 return bool(self.isna().any()) 

625 

626 def _values_for_argsort(self) -> np.ndarray: 

627 """ 

628 Return values for sorting. 

629 

630 Returns 

631 ------- 

632 ndarray 

633 The transformed values should maintain the ordering between values 

634 within the array. 

635 

636 See Also 

637 -------- 

638 ExtensionArray.argsort : Return the indices that would sort this array. 

639 

640 Notes 

641 ----- 

642 The caller is responsible for *not* modifying these values in-place, so 

643 it is safe for implementors to give views on `self`. 

644 

645 Functions that use this (e.g. ExtensionArray.argsort) should ignore 

646 entries with missing values in the original array (according to `self.isna()`). 

647 This means that the corresponding entries in the returned array don't need to 

648 be modified to sort correctly. 

649 """ 

650 # Note: this is used in `ExtensionArray.argsort/argmin/argmax`. 

651 return np.array(self) 

652 

653 def argsort( 

654 self, 

655 *, 

656 ascending: bool = True, 

657 kind: SortKind = "quicksort", 

658 na_position: str = "last", 

659 **kwargs, 

660 ) -> np.ndarray: 

661 """ 

662 Return the indices that would sort this array. 

663 

664 Parameters 

665 ---------- 

666 ascending : bool, default True 

667 Whether the indices should result in an ascending 

668 or descending sort. 

669 kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, optional 

670 Sorting algorithm. 

671 *args, **kwargs: 

672 Passed through to :func:`numpy.argsort`. 

673 

674 Returns 

675 ------- 

676 np.ndarray[np.intp] 

677 Array of indices that sort ``self``. If NaN values are contained, 

678 NaN values are placed at the end. 

679 

680 See Also 

681 -------- 

682 numpy.argsort : Sorting implementation used internally. 

683 """ 

684 # Implementor note: You have two places to override the behavior of 

685 # argsort. 

686 # 1. _values_for_argsort : construct the values passed to np.argsort 

687 # 2. argsort : total control over sorting. In case of overriding this, 

688 # it is recommended to also override argmax/argmin 

689 ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs) 

690 

691 values = self._values_for_argsort() 

692 return nargsort( 

693 values, 

694 kind=kind, 

695 ascending=ascending, 

696 na_position=na_position, 

697 mask=np.asarray(self.isna()), 

698 ) 

699 

700 def argmin(self, skipna: bool = True) -> int: 

701 """ 

702 Return the index of minimum value. 

703 

704 In case of multiple occurrences of the minimum value, the index 

705 corresponding to the first occurrence is returned. 

706 

707 Parameters 

708 ---------- 

709 skipna : bool, default True 

710 

711 Returns 

712 ------- 

713 int 

714 

715 See Also 

716 -------- 

717 ExtensionArray.argmax 

718 """ 

719 # Implementor note: You have two places to override the behavior of 

720 # argmin. 

721 # 1. _values_for_argsort : construct the values used in nargminmax 

722 # 2. argmin itself : total control over sorting. 

723 validate_bool_kwarg(skipna, "skipna") 

724 if not skipna and self._hasna: 

725 raise NotImplementedError 

726 return nargminmax(self, "argmin") 

727 

728 def argmax(self, skipna: bool = True) -> int: 

729 """ 

730 Return the index of maximum value. 

731 

732 In case of multiple occurrences of the maximum value, the index 

733 corresponding to the first occurrence is returned. 

734 

735 Parameters 

736 ---------- 

737 skipna : bool, default True 

738 

739 Returns 

740 ------- 

741 int 

742 

743 See Also 

744 -------- 

745 ExtensionArray.argmin 

746 """ 

747 # Implementor note: You have two places to override the behavior of 

748 # argmax. 

749 # 1. _values_for_argsort : construct the values used in nargminmax 

750 # 2. argmax itself : total control over sorting. 

751 validate_bool_kwarg(skipna, "skipna") 

752 if not skipna and self._hasna: 

753 raise NotImplementedError 

754 return nargminmax(self, "argmax") 

755 

756 def fillna( 

757 self: ExtensionArrayT, 

758 value: object | ArrayLike | None = None, 

759 method: FillnaOptions | None = None, 

760 limit: int | None = None, 

761 ) -> ExtensionArrayT: 

762 """ 

763 Fill NA/NaN values using the specified method. 

764 

765 Parameters 

766 ---------- 

767 value : scalar, array-like 

768 If a scalar value is passed it is used to fill all missing values. 

769 Alternatively, an array-like 'value' can be given. It's expected 

770 that the array-like have the same length as 'self'. 

771 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None 

772 Method to use for filling holes in reindexed Series: 

773 

774 * pad / ffill: propagate last valid observation forward to next valid. 

775 * backfill / bfill: use NEXT valid observation to fill gap. 

776 

777 limit : int, default None 

778 If method is specified, this is the maximum number of consecutive 

779 NaN values to forward/backward fill. In other words, if there is 

780 a gap with more than this number of consecutive NaNs, it will only 

781 be partially filled. If method is not specified, this is the 

782 maximum number of entries along the entire axis where NaNs will be 

783 filled. 

784 

785 Returns 

786 ------- 

787 ExtensionArray 

788 With NA/NaN filled. 

789 """ 

790 value, method = validate_fillna_kwargs(value, method) 

791 

792 mask = self.isna() 

793 # error: Argument 2 to "check_value_size" has incompatible type 

794 # "ExtensionArray"; expected "ndarray" 

795 value = missing.check_value_size( 

796 value, mask, len(self) # type: ignore[arg-type] 

797 ) 

798 

799 if mask.any(): 

800 if method is not None: 

801 func = missing.get_fill_func(method) 

802 npvalues = self.astype(object) 

803 func(npvalues, limit=limit, mask=mask) 

804 new_values = self._from_sequence(npvalues, dtype=self.dtype) 

805 else: 

806 # fill with value 

807 new_values = self.copy() 

808 new_values[mask] = value 

809 else: 

810 new_values = self.copy() 

811 return new_values 

812 

813 def dropna(self: ExtensionArrayT) -> ExtensionArrayT: 

814 """ 

815 Return ExtensionArray without NA values. 

816 

817 Returns 

818 ------- 

819 pandas.api.extensions.ExtensionArray 

820 """ 

821 # error: Unsupported operand type for ~ ("ExtensionArray") 

822 return self[~self.isna()] # type: ignore[operator] 

823 

824 def shift(self, periods: int = 1, fill_value: object = None) -> ExtensionArray: 

825 """ 

826 Shift values by desired number. 

827 

828 Newly introduced missing values are filled with 

829 ``self.dtype.na_value``. 

830 

831 Parameters 

832 ---------- 

833 periods : int, default 1 

834 The number of periods to shift. Negative values are allowed 

835 for shifting backwards. 

836 

837 fill_value : object, optional 

838 The scalar value to use for newly introduced missing values. 

839 The default is ``self.dtype.na_value``. 

840 

841 Returns 

842 ------- 

843 ExtensionArray 

844 Shifted. 

845 

846 Notes 

847 ----- 

848 If ``self`` is empty or ``periods`` is 0, a copy of ``self`` is 

849 returned. 

850 

851 If ``periods > len(self)``, then an array of size 

852 len(self) is returned, with all values filled with 

853 ``self.dtype.na_value``. 

854 """ 

855 # Note: this implementation assumes that `self.dtype.na_value` can be 

856 # stored in an instance of your ExtensionArray with `self.dtype`. 

857 if not len(self) or periods == 0: 

858 return self.copy() 

859 

860 if isna(fill_value): 

861 fill_value = self.dtype.na_value 

862 

863 empty = self._from_sequence( 

864 [fill_value] * min(abs(periods), len(self)), dtype=self.dtype 

865 ) 

866 if periods > 0: 

867 a = empty 

868 b = self[:-periods] 

869 else: 

870 a = self[abs(periods) :] 

871 b = empty 

872 return self._concat_same_type([a, b]) 

873 

874 def unique(self: ExtensionArrayT) -> ExtensionArrayT: 

875 """ 

876 Compute the ExtensionArray of unique values. 

877 

878 Returns 

879 ------- 

880 pandas.api.extensions.ExtensionArray 

881 """ 

882 uniques = unique(self.astype(object)) 

883 return self._from_sequence(uniques, dtype=self.dtype) 

884 

885 def searchsorted( 

886 self, 

887 value: NumpyValueArrayLike | ExtensionArray, 

888 side: Literal["left", "right"] = "left", 

889 sorter: NumpySorter = None, 

890 ) -> npt.NDArray[np.intp] | np.intp: 

891 """ 

892 Find indices where elements should be inserted to maintain order. 

893 

894 Find the indices into a sorted array `self` (a) such that, if the 

895 corresponding elements in `value` were inserted before the indices, 

896 the order of `self` would be preserved. 

897 

898 Assuming that `self` is sorted: 

899 

900 ====== ================================ 

901 `side` returned index `i` satisfies 

902 ====== ================================ 

903 left ``self[i-1] < value <= self[i]`` 

904 right ``self[i-1] <= value < self[i]`` 

905 ====== ================================ 

906 

907 Parameters 

908 ---------- 

909 value : array-like, list or scalar 

910 Value(s) to insert into `self`. 

911 side : {'left', 'right'}, optional 

912 If 'left', the index of the first suitable location found is given. 

913 If 'right', return the last such index. If there is no suitable 

914 index, return either 0 or N (where N is the length of `self`). 

915 sorter : 1-D array-like, optional 

916 Optional array of integer indices that sort array a into ascending 

917 order. They are typically the result of argsort. 

918 

919 Returns 

920 ------- 

921 array of ints or int 

922 If value is array-like, array of insertion points. 

923 If value is scalar, a single integer. 

924 

925 See Also 

926 -------- 

927 numpy.searchsorted : Similar method from NumPy. 

928 """ 

929 # Note: the base tests provided by pandas only test the basics. 

930 # We do not test 

931 # 1. Values outside the range of the `data_for_sorting` fixture 

932 # 2. Values between the values in the `data_for_sorting` fixture 

933 # 3. Missing values. 

934 arr = self.astype(object) 

935 if isinstance(value, ExtensionArray): 

936 value = value.astype(object) 

937 return arr.searchsorted(value, side=side, sorter=sorter) 

938 

939 def equals(self, other: object) -> bool: 

940 """ 

941 Return if another array is equivalent to this array. 

942 

943 Equivalent means that both arrays have the same shape and dtype, and 

944 all values compare equal. Missing values in the same location are 

945 considered equal (in contrast with normal equality). 

946 

947 Parameters 

948 ---------- 

949 other : ExtensionArray 

950 Array to compare to this Array. 

951 

952 Returns 

953 ------- 

954 boolean 

955 Whether the arrays are equivalent. 

956 """ 

957 if type(self) != type(other): 

958 return False 

959 other = cast(ExtensionArray, other) 

960 if not is_dtype_equal(self.dtype, other.dtype): 

961 return False 

962 elif len(self) != len(other): 

963 return False 

964 else: 

965 equal_values = self == other 

966 if isinstance(equal_values, ExtensionArray): 

967 # boolean array with NA -> fill with False 

968 equal_values = equal_values.fillna(False) 

969 # error: Unsupported left operand type for & ("ExtensionArray") 

970 equal_na = self.isna() & other.isna() # type: ignore[operator] 

971 return bool((equal_values | equal_na).all()) 

972 

973 def isin(self, values) -> npt.NDArray[np.bool_]: 

974 """ 

975 Pointwise comparison for set containment in the given values. 

976 

977 Roughly equivalent to `np.array([x in values for x in self])` 

978 

979 Parameters 

980 ---------- 

981 values : Sequence 

982 

983 Returns 

984 ------- 

985 np.ndarray[bool] 

986 """ 

987 return isin(np.asarray(self), values) 

988 

989 def _values_for_factorize(self) -> tuple[np.ndarray, Any]: 

990 """ 

991 Return an array and missing value suitable for factorization. 

992 

993 Returns 

994 ------- 

995 values : ndarray 

996 

997 An array suitable for factorization. This should maintain order 

998 and be a supported dtype (Float64, Int64, UInt64, String, Object). 

999 By default, the extension array is cast to object dtype. 

1000 na_value : object 

1001 The value in `values` to consider missing. This will be treated 

1002 as NA in the factorization routines, so it will be coded as 

1003 `-1` and not included in `uniques`. By default, 

1004 ``np.nan`` is used. 

1005 

1006 Notes 

1007 ----- 

1008 The values returned by this method are also used in 

1009 :func:`pandas.util.hash_pandas_object`. 

1010 """ 

1011 return self.astype(object), np.nan 

1012 

1013 def factorize( 

1014 self, 

1015 use_na_sentinel: bool = True, 

1016 ) -> tuple[np.ndarray, ExtensionArray]: 

1017 """ 

1018 Encode the extension array as an enumerated type. 

1019 

1020 Parameters 

1021 ---------- 

1022 use_na_sentinel : bool, default True 

1023 If True, the sentinel -1 will be used for NaN values. If False, 

1024 NaN values will be encoded as non-negative integers and will not drop the 

1025 NaN from the uniques of the values. 

1026 

1027 .. versionadded:: 1.5.0 

1028 

1029 Returns 

1030 ------- 

1031 codes : ndarray 

1032 An integer NumPy array that's an indexer into the original 

1033 ExtensionArray. 

1034 uniques : ExtensionArray 

1035 An ExtensionArray containing the unique values of `self`. 

1036 

1037 .. note:: 

1038 

1039 uniques will *not* contain an entry for the NA value of 

1040 the ExtensionArray if there are any missing values present 

1041 in `self`. 

1042 

1043 See Also 

1044 -------- 

1045 factorize : Top-level factorize method that dispatches here. 

1046 

1047 Notes 

1048 ----- 

1049 :meth:`pandas.factorize` offers a `sort` keyword as well. 

1050 """ 

1051 # Implementer note: There are two ways to override the behavior of 

1052 # pandas.factorize 

1053 # 1. _values_for_factorize and _from_factorize. 

1054 # Specify the values passed to pandas' internal factorization 

1055 # routines, and how to convert from those values back to the 

1056 # original ExtensionArray. 

1057 # 2. ExtensionArray.factorize. 

1058 # Complete control over factorization. 

1059 arr, na_value = self._values_for_factorize() 

1060 

1061 codes, uniques = factorize_array( 

1062 arr, use_na_sentinel=use_na_sentinel, na_value=na_value 

1063 ) 

1064 

1065 uniques_ea = self._from_factorized(uniques, self) 

1066 return codes, uniques_ea 

1067 

1068 _extension_array_shared_docs[ 

1069 "repeat" 

1070 ] = """ 

1071 Repeat elements of a %(klass)s. 

1072 

1073 Returns a new %(klass)s where each element of the current %(klass)s 

1074 is repeated consecutively a given number of times. 

1075 

1076 Parameters 

1077 ---------- 

1078 repeats : int or array of ints 

1079 The number of repetitions for each element. This should be a 

1080 non-negative integer. Repeating 0 times will return an empty 

1081 %(klass)s. 

1082 axis : None 

1083 Must be ``None``. Has no effect but is accepted for compatibility 

1084 with numpy. 

1085 

1086 Returns 

1087 ------- 

1088 %(klass)s 

1089 Newly created %(klass)s with repeated elements. 

1090 

1091 See Also 

1092 -------- 

1093 Series.repeat : Equivalent function for Series. 

1094 Index.repeat : Equivalent function for Index. 

1095 numpy.repeat : Similar method for :class:`numpy.ndarray`. 

1096 ExtensionArray.take : Take arbitrary positions. 

1097 

1098 Examples 

1099 -------- 

1100 >>> cat = pd.Categorical(['a', 'b', 'c']) 

1101 >>> cat 

1102 ['a', 'b', 'c'] 

1103 Categories (3, object): ['a', 'b', 'c'] 

1104 >>> cat.repeat(2) 

1105 ['a', 'a', 'b', 'b', 'c', 'c'] 

1106 Categories (3, object): ['a', 'b', 'c'] 

1107 >>> cat.repeat([1, 2, 3]) 

1108 ['a', 'b', 'b', 'c', 'c', 'c'] 

1109 Categories (3, object): ['a', 'b', 'c'] 

1110 """ 

1111 

1112 @Substitution(klass="ExtensionArray") 

1113 @Appender(_extension_array_shared_docs["repeat"]) 

1114 def repeat( 

1115 self: ExtensionArrayT, repeats: int | Sequence[int], axis: AxisInt | None = None 

1116 ) -> ExtensionArrayT: 

1117 nv.validate_repeat((), {"axis": axis}) 

1118 ind = np.arange(len(self)).repeat(repeats) 

1119 return self.take(ind) 

1120 

1121 # ------------------------------------------------------------------------ 

1122 # Indexing methods 

1123 # ------------------------------------------------------------------------ 

1124 

1125 def take( 

1126 self: ExtensionArrayT, 

1127 indices: TakeIndexer, 

1128 *, 

1129 allow_fill: bool = False, 

1130 fill_value: Any = None, 

1131 ) -> ExtensionArrayT: 

1132 """ 

1133 Take elements from an array. 

1134 

1135 Parameters 

1136 ---------- 

1137 indices : sequence of int or one-dimensional np.ndarray of int 

1138 Indices to be taken. 

1139 allow_fill : bool, default False 

1140 How to handle negative values in `indices`. 

1141 

1142 * False: negative values in `indices` indicate positional indices 

1143 from the right (the default). This is similar to 

1144 :func:`numpy.take`. 

1145 

1146 * True: negative values in `indices` indicate 

1147 missing values. These values are set to `fill_value`. Any other 

1148 other negative values raise a ``ValueError``. 

1149 

1150 fill_value : any, optional 

1151 Fill value to use for NA-indices when `allow_fill` is True. 

1152 This may be ``None``, in which case the default NA value for 

1153 the type, ``self.dtype.na_value``, is used. 

1154 

1155 For many ExtensionArrays, there will be two representations of 

1156 `fill_value`: a user-facing "boxed" scalar, and a low-level 

1157 physical NA value. `fill_value` should be the user-facing version, 

1158 and the implementation should handle translating that to the 

1159 physical version for processing the take if necessary. 

1160 

1161 Returns 

1162 ------- 

1163 ExtensionArray 

1164 

1165 Raises 

1166 ------ 

1167 IndexError 

1168 When the indices are out of bounds for the array. 

1169 ValueError 

1170 When `indices` contains negative values other than ``-1`` 

1171 and `allow_fill` is True. 

1172 

1173 See Also 

1174 -------- 

1175 numpy.take : Take elements from an array along an axis. 

1176 api.extensions.take : Take elements from an array. 

1177 

1178 Notes 

1179 ----- 

1180 ExtensionArray.take is called by ``Series.__getitem__``, ``.loc``, 

1181 ``iloc``, when `indices` is a sequence of values. Additionally, 

1182 it's called by :meth:`Series.reindex`, or any other method 

1183 that causes realignment, with a `fill_value`. 

1184 

1185 Examples 

1186 -------- 

1187 Here's an example implementation, which relies on casting the 

1188 extension array to object dtype. This uses the helper method 

1189 :func:`pandas.api.extensions.take`. 

1190 

1191 .. code-block:: python 

1192 

1193 def take(self, indices, allow_fill=False, fill_value=None): 

1194 from pandas.core.algorithms import take 

1195 

1196 # If the ExtensionArray is backed by an ndarray, then 

1197 # just pass that here instead of coercing to object. 

1198 data = self.astype(object) 

1199 

1200 if allow_fill and fill_value is None: 

1201 fill_value = self.dtype.na_value 

1202 

1203 # fill value should always be translated from the scalar 

1204 # type for the array, to the physical storage type for 

1205 # the data, before passing to take. 

1206 

1207 result = take(data, indices, fill_value=fill_value, 

1208 allow_fill=allow_fill) 

1209 return self._from_sequence(result, dtype=self.dtype) 

1210 """ 

1211 # Implementer note: The `fill_value` parameter should be a user-facing 

1212 # value, an instance of self.dtype.type. When passed `fill_value=None`, 

1213 # the default of `self.dtype.na_value` should be used. 

1214 # This may differ from the physical storage type your ExtensionArray 

1215 # uses. In this case, your implementation is responsible for casting 

1216 # the user-facing type to the storage type, before using 

1217 # pandas.api.extensions.take 

1218 raise AbstractMethodError(self) 

1219 

1220 def copy(self: ExtensionArrayT) -> ExtensionArrayT: 

1221 """ 

1222 Return a copy of the array. 

1223 

1224 Returns 

1225 ------- 

1226 ExtensionArray 

1227 """ 

1228 raise AbstractMethodError(self) 

1229 

1230 def view(self, dtype: Dtype | None = None) -> ArrayLike: 

1231 """ 

1232 Return a view on the array. 

1233 

1234 Parameters 

1235 ---------- 

1236 dtype : str, np.dtype, or ExtensionDtype, optional 

1237 Default None. 

1238 

1239 Returns 

1240 ------- 

1241 ExtensionArray or np.ndarray 

1242 A view on the :class:`ExtensionArray`'s data. 

1243 """ 

1244 # NB: 

1245 # - This must return a *new* object referencing the same data, not self. 

1246 # - The only case that *must* be implemented is with dtype=None, 

1247 # giving a view with the same dtype as self. 

1248 if dtype is not None: 

1249 raise NotImplementedError(dtype) 

1250 return self[:] 

1251 

1252 # ------------------------------------------------------------------------ 

1253 # Printing 

1254 # ------------------------------------------------------------------------ 

1255 

1256 def __repr__(self) -> str: 

1257 if self.ndim > 1: 

1258 return self._repr_2d() 

1259 

1260 from pandas.io.formats.printing import format_object_summary 

1261 

1262 # the short repr has no trailing newline, while the truncated 

1263 # repr does. So we include a newline in our template, and strip 

1264 # any trailing newlines from format_object_summary 

1265 data = format_object_summary( 

1266 self, self._formatter(), indent_for_name=False 

1267 ).rstrip(", \n") 

1268 class_name = f"<{type(self).__name__}>\n" 

1269 return f"{class_name}{data}\nLength: {len(self)}, dtype: {self.dtype}" 

1270 

1271 def _repr_2d(self) -> str: 

1272 from pandas.io.formats.printing import format_object_summary 

1273 

1274 # the short repr has no trailing newline, while the truncated 

1275 # repr does. So we include a newline in our template, and strip 

1276 # any trailing newlines from format_object_summary 

1277 lines = [ 

1278 format_object_summary(x, self._formatter(), indent_for_name=False).rstrip( 

1279 ", \n" 

1280 ) 

1281 for x in self 

1282 ] 

1283 data = ",\n".join(lines) 

1284 class_name = f"<{type(self).__name__}>" 

1285 return f"{class_name}\n[\n{data}\n]\nShape: {self.shape}, dtype: {self.dtype}" 

1286 

1287 def _formatter(self, boxed: bool = False) -> Callable[[Any], str | None]: 

1288 """ 

1289 Formatting function for scalar values. 

1290 

1291 This is used in the default '__repr__'. The returned formatting 

1292 function receives instances of your scalar type. 

1293 

1294 Parameters 

1295 ---------- 

1296 boxed : bool, default False 

1297 An indicated for whether or not your array is being printed 

1298 within a Series, DataFrame, or Index (True), or just by 

1299 itself (False). This may be useful if you want scalar values 

1300 to appear differently within a Series versus on its own (e.g. 

1301 quoted or not). 

1302 

1303 Returns 

1304 ------- 

1305 Callable[[Any], str] 

1306 A callable that gets instances of the scalar type and 

1307 returns a string. By default, :func:`repr` is used 

1308 when ``boxed=False`` and :func:`str` is used when 

1309 ``boxed=True``. 

1310 """ 

1311 if boxed: 

1312 return str 

1313 return repr 

1314 

1315 # ------------------------------------------------------------------------ 

1316 # Reshaping 

1317 # ------------------------------------------------------------------------ 

1318 

1319 def transpose(self, *axes: int) -> ExtensionArray: 

1320 """ 

1321 Return a transposed view on this array. 

1322 

1323 Because ExtensionArrays are always 1D, this is a no-op. It is included 

1324 for compatibility with np.ndarray. 

1325 """ 

1326 return self[:] 

1327 

1328 @property 

1329 def T(self) -> ExtensionArray: 

1330 return self.transpose() 

1331 

1332 def ravel(self, order: Literal["C", "F", "A", "K"] | None = "C") -> ExtensionArray: 

1333 """ 

1334 Return a flattened view on this array. 

1335 

1336 Parameters 

1337 ---------- 

1338 order : {None, 'C', 'F', 'A', 'K'}, default 'C' 

1339 

1340 Returns 

1341 ------- 

1342 ExtensionArray 

1343 

1344 Notes 

1345 ----- 

1346 - Because ExtensionArrays are 1D-only, this is a no-op. 

1347 - The "order" argument is ignored, is for compatibility with NumPy. 

1348 """ 

1349 return self 

1350 

1351 @classmethod 

1352 def _concat_same_type( 

1353 cls: type[ExtensionArrayT], to_concat: Sequence[ExtensionArrayT] 

1354 ) -> ExtensionArrayT: 

1355 """ 

1356 Concatenate multiple array of this dtype. 

1357 

1358 Parameters 

1359 ---------- 

1360 to_concat : sequence of this type 

1361 

1362 Returns 

1363 ------- 

1364 ExtensionArray 

1365 """ 

1366 # Implementer note: this method will only be called with a sequence of 

1367 # ExtensionArrays of this class and with the same dtype as self. This 

1368 # should allow "easy" concatenation (no upcasting needed), and result 

1369 # in a new ExtensionArray of the same dtype. 

1370 # Note: this strict behaviour is only guaranteed starting with pandas 1.1 

1371 raise AbstractMethodError(cls) 

1372 

1373 # The _can_hold_na attribute is set to True so that pandas internals 

1374 # will use the ExtensionDtype.na_value as the NA value in operations 

1375 # such as take(), reindex(), shift(), etc. In addition, those results 

1376 # will then be of the ExtensionArray subclass rather than an array 

1377 # of objects 

1378 @cache_readonly 

1379 def _can_hold_na(self) -> bool: 

1380 return self.dtype._can_hold_na 

1381 

1382 def _accumulate( 

1383 self, name: str, *, skipna: bool = True, **kwargs 

1384 ) -> ExtensionArray: 

1385 """ 

1386 Return an ExtensionArray performing an accumulation operation. 

1387 

1388 The underlying data type might change. 

1389 

1390 Parameters 

1391 ---------- 

1392 name : str 

1393 Name of the function, supported values are: 

1394 - cummin 

1395 - cummax 

1396 - cumsum 

1397 - cumprod 

1398 skipna : bool, default True 

1399 If True, skip NA values. 

1400 **kwargs 

1401 Additional keyword arguments passed to the accumulation function. 

1402 Currently, there is no supported kwarg. 

1403 

1404 Returns 

1405 ------- 

1406 array 

1407 

1408 Raises 

1409 ------ 

1410 NotImplementedError : subclass does not define accumulations 

1411 """ 

1412 raise NotImplementedError(f"cannot perform {name} with type {self.dtype}") 

1413 

1414 def _reduce(self, name: str, *, skipna: bool = True, **kwargs): 

1415 """ 

1416 Return a scalar result of performing the reduction operation. 

1417 

1418 Parameters 

1419 ---------- 

1420 name : str 

1421 Name of the function, supported values are: 

1422 { any, all, min, max, sum, mean, median, prod, 

1423 std, var, sem, kurt, skew }. 

1424 skipna : bool, default True 

1425 If True, skip NaN values. 

1426 **kwargs 

1427 Additional keyword arguments passed to the reduction function. 

1428 Currently, `ddof` is the only supported kwarg. 

1429 

1430 Returns 

1431 ------- 

1432 scalar 

1433 

1434 Raises 

1435 ------ 

1436 TypeError : subclass does not define reductions 

1437 """ 

1438 meth = getattr(self, name, None) 

1439 if meth is None: 

1440 raise TypeError( 

1441 f"'{type(self).__name__}' with dtype {self.dtype} " 

1442 f"does not support reduction '{name}'" 

1443 ) 

1444 return meth(skipna=skipna, **kwargs) 

1445 

1446 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 

1447 # Incompatible types in assignment (expression has type "None", base class 

1448 # "object" defined the type as "Callable[[object], int]") 

1449 __hash__: ClassVar[None] # type: ignore[assignment] 

1450 

1451 # ------------------------------------------------------------------------ 

1452 # Non-Optimized Default Methods; in the case of the private methods here, 

1453 # these are not guaranteed to be stable across pandas versions. 

1454 

1455 def tolist(self) -> list: 

1456 """ 

1457 Return a list of the values. 

1458 

1459 These are each a scalar type, which is a Python scalar 

1460 (for str, int, float) or a pandas scalar 

1461 (for Timestamp/Timedelta/Interval/Period) 

1462 

1463 Returns 

1464 ------- 

1465 list 

1466 """ 

1467 if self.ndim > 1: 

1468 return [x.tolist() for x in self] 

1469 return list(self) 

1470 

1471 def delete(self: ExtensionArrayT, loc: PositionalIndexer) -> ExtensionArrayT: 

1472 indexer = np.delete(np.arange(len(self)), loc) 

1473 return self.take(indexer) 

1474 

1475 def insert(self: ExtensionArrayT, loc: int, item) -> ExtensionArrayT: 

1476 """ 

1477 Insert an item at the given position. 

1478 

1479 Parameters 

1480 ---------- 

1481 loc : int 

1482 item : scalar-like 

1483 

1484 Returns 

1485 ------- 

1486 same type as self 

1487 

1488 Notes 

1489 ----- 

1490 This method should be both type and dtype-preserving. If the item 

1491 cannot be held in an array of this type/dtype, either ValueError or 

1492 TypeError should be raised. 

1493 

1494 The default implementation relies on _from_sequence to raise on invalid 

1495 items. 

1496 """ 

1497 loc = validate_insert_loc(loc, len(self)) 

1498 

1499 item_arr = type(self)._from_sequence([item], dtype=self.dtype) 

1500 

1501 return type(self)._concat_same_type([self[:loc], item_arr, self[loc:]]) 

1502 

1503 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None: 

1504 """ 

1505 Analogue to np.putmask(self, mask, value) 

1506 

1507 Parameters 

1508 ---------- 

1509 mask : np.ndarray[bool] 

1510 value : scalar or listlike 

1511 If listlike, must be arraylike with same length as self. 

1512 

1513 Returns 

1514 ------- 

1515 None 

1516 

1517 Notes 

1518 ----- 

1519 Unlike np.putmask, we do not repeat listlike values with mismatched length. 

1520 'value' should either be a scalar or an arraylike with the same length 

1521 as self. 

1522 """ 

1523 if is_list_like(value): 

1524 val = value[mask] 

1525 else: 

1526 val = value 

1527 

1528 self[mask] = val 

1529 

1530 def _where( 

1531 self: ExtensionArrayT, mask: npt.NDArray[np.bool_], value 

1532 ) -> ExtensionArrayT: 

1533 """ 

1534 Analogue to np.where(mask, self, value) 

1535 

1536 Parameters 

1537 ---------- 

1538 mask : np.ndarray[bool] 

1539 value : scalar or listlike 

1540 

1541 Returns 

1542 ------- 

1543 same type as self 

1544 """ 

1545 result = self.copy() 

1546 

1547 if is_list_like(value): 

1548 val = value[~mask] 

1549 else: 

1550 val = value 

1551 

1552 result[~mask] = val 

1553 return result 

1554 

1555 def _fill_mask_inplace( 

1556 self, method: str, limit, mask: npt.NDArray[np.bool_] 

1557 ) -> None: 

1558 """ 

1559 Replace values in locations specified by 'mask' using pad or backfill. 

1560 

1561 See also 

1562 -------- 

1563 ExtensionArray.fillna 

1564 """ 

1565 func = missing.get_fill_func(method) 

1566 npvalues = self.astype(object) 

1567 # NB: if we don't copy mask here, it may be altered inplace, which 

1568 # would mess up the `self[mask] = ...` below. 

1569 func(npvalues, limit=limit, mask=mask.copy()) 

1570 new_values = self._from_sequence(npvalues, dtype=self.dtype) 

1571 self[mask] = new_values[mask] 

1572 

1573 def _rank( 

1574 self, 

1575 *, 

1576 axis: AxisInt = 0, 

1577 method: str = "average", 

1578 na_option: str = "keep", 

1579 ascending: bool = True, 

1580 pct: bool = False, 

1581 ): 

1582 """ 

1583 See Series.rank.__doc__. 

1584 """ 

1585 if axis != 0: 

1586 raise NotImplementedError 

1587 

1588 return rank( 

1589 self, 

1590 axis=axis, 

1591 method=method, 

1592 na_option=na_option, 

1593 ascending=ascending, 

1594 pct=pct, 

1595 ) 

1596 

1597 @classmethod 

1598 def _empty(cls, shape: Shape, dtype: ExtensionDtype): 

1599 """ 

1600 Create an ExtensionArray with the given shape and dtype. 

1601 

1602 See also 

1603 -------- 

1604 ExtensionDtype.empty 

1605 ExtensionDtype.empty is the 'official' public version of this API. 

1606 """ 

1607 # Implementer note: while ExtensionDtype.empty is the public way to 

1608 # call this method, it is still required to implement this `_empty` 

1609 # method as well (it is called internally in pandas) 

1610 obj = cls._from_sequence([], dtype=dtype) 

1611 

1612 taker = np.broadcast_to(np.intp(-1), shape) 

1613 result = obj.take(taker, allow_fill=True) 

1614 if not isinstance(result, cls) or dtype != result.dtype: 

1615 raise NotImplementedError( 

1616 f"Default 'empty' implementation is invalid for dtype='{dtype}'" 

1617 ) 

1618 return result 

1619 

1620 def _quantile( 

1621 self: ExtensionArrayT, qs: npt.NDArray[np.float64], interpolation: str 

1622 ) -> ExtensionArrayT: 

1623 """ 

1624 Compute the quantiles of self for each quantile in `qs`. 

1625 

1626 Parameters 

1627 ---------- 

1628 qs : np.ndarray[float64] 

1629 interpolation: str 

1630 

1631 Returns 

1632 ------- 

1633 same type as self 

1634 """ 

1635 mask = np.asarray(self.isna()) 

1636 arr = np.asarray(self) 

1637 fill_value = np.nan 

1638 

1639 res_values = quantile_with_mask(arr, mask, fill_value, qs, interpolation) 

1640 return type(self)._from_sequence(res_values) 

1641 

1642 def _mode(self: ExtensionArrayT, dropna: bool = True) -> ExtensionArrayT: 

1643 """ 

1644 Returns the mode(s) of the ExtensionArray. 

1645 

1646 Always returns `ExtensionArray` even if only one value. 

1647 

1648 Parameters 

1649 ---------- 

1650 dropna : bool, default True 

1651 Don't consider counts of NA values. 

1652 

1653 Returns 

1654 ------- 

1655 same type as self 

1656 Sorted, if possible. 

1657 """ 

1658 # error: Incompatible return value type (got "Union[ExtensionArray, 

1659 # ndarray[Any, Any]]", expected "ExtensionArrayT") 

1660 return mode(self, dropna=dropna) # type: ignore[return-value] 

1661 

1662 def __array_ufunc__(self, ufunc: np.ufunc, method: str, *inputs, **kwargs): 

1663 if any( 

1664 isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)) for other in inputs 

1665 ): 

1666 return NotImplemented 

1667 

1668 result = arraylike.maybe_dispatch_ufunc_to_dunder_op( 

1669 self, ufunc, method, *inputs, **kwargs 

1670 ) 

1671 if result is not NotImplemented: 

1672 return result 

1673 

1674 if "out" in kwargs: 

1675 return arraylike.dispatch_ufunc_with_out( 

1676 self, ufunc, method, *inputs, **kwargs 

1677 ) 

1678 

1679 if method == "reduce": 

1680 result = arraylike.dispatch_reduction_ufunc( 

1681 self, ufunc, method, *inputs, **kwargs 

1682 ) 

1683 if result is not NotImplemented: 

1684 return result 

1685 

1686 return arraylike.default_array_ufunc(self, ufunc, method, *inputs, **kwargs) 

1687 

1688 

1689class ExtensionArraySupportsAnyAll(ExtensionArray): 

1690 def any(self, *, skipna: bool = True) -> bool: 

1691 raise AbstractMethodError(self) 

1692 

1693 def all(self, *, skipna: bool = True) -> bool: 

1694 raise AbstractMethodError(self) 

1695 

1696 

1697class ExtensionOpsMixin: 

1698 """ 

1699 A base class for linking the operators to their dunder names. 

1700 

1701 .. note:: 

1702 

1703 You may want to set ``__array_priority__`` if you want your 

1704 implementation to be called when involved in binary operations 

1705 with NumPy arrays. 

1706 """ 

1707 

1708 @classmethod 

1709 def _create_arithmetic_method(cls, op): 

1710 raise AbstractMethodError(cls) 

1711 

1712 @classmethod 

1713 def _add_arithmetic_ops(cls) -> None: 

1714 setattr(cls, "__add__", cls._create_arithmetic_method(operator.add)) 

1715 setattr(cls, "__radd__", cls._create_arithmetic_method(roperator.radd)) 

1716 setattr(cls, "__sub__", cls._create_arithmetic_method(operator.sub)) 

1717 setattr(cls, "__rsub__", cls._create_arithmetic_method(roperator.rsub)) 

1718 setattr(cls, "__mul__", cls._create_arithmetic_method(operator.mul)) 

1719 setattr(cls, "__rmul__", cls._create_arithmetic_method(roperator.rmul)) 

1720 setattr(cls, "__pow__", cls._create_arithmetic_method(operator.pow)) 

1721 setattr(cls, "__rpow__", cls._create_arithmetic_method(roperator.rpow)) 

1722 setattr(cls, "__mod__", cls._create_arithmetic_method(operator.mod)) 

1723 setattr(cls, "__rmod__", cls._create_arithmetic_method(roperator.rmod)) 

1724 setattr(cls, "__floordiv__", cls._create_arithmetic_method(operator.floordiv)) 

1725 setattr( 

1726 cls, "__rfloordiv__", cls._create_arithmetic_method(roperator.rfloordiv) 

1727 ) 

1728 setattr(cls, "__truediv__", cls._create_arithmetic_method(operator.truediv)) 

1729 setattr(cls, "__rtruediv__", cls._create_arithmetic_method(roperator.rtruediv)) 

1730 setattr(cls, "__divmod__", cls._create_arithmetic_method(divmod)) 

1731 setattr(cls, "__rdivmod__", cls._create_arithmetic_method(roperator.rdivmod)) 

1732 

1733 @classmethod 

1734 def _create_comparison_method(cls, op): 

1735 raise AbstractMethodError(cls) 

1736 

1737 @classmethod 

1738 def _add_comparison_ops(cls) -> None: 

1739 setattr(cls, "__eq__", cls._create_comparison_method(operator.eq)) 

1740 setattr(cls, "__ne__", cls._create_comparison_method(operator.ne)) 

1741 setattr(cls, "__lt__", cls._create_comparison_method(operator.lt)) 

1742 setattr(cls, "__gt__", cls._create_comparison_method(operator.gt)) 

1743 setattr(cls, "__le__", cls._create_comparison_method(operator.le)) 

1744 setattr(cls, "__ge__", cls._create_comparison_method(operator.ge)) 

1745 

1746 @classmethod 

1747 def _create_logical_method(cls, op): 

1748 raise AbstractMethodError(cls) 

1749 

1750 @classmethod 

1751 def _add_logical_ops(cls) -> None: 

1752 setattr(cls, "__and__", cls._create_logical_method(operator.and_)) 

1753 setattr(cls, "__rand__", cls._create_logical_method(roperator.rand_)) 

1754 setattr(cls, "__or__", cls._create_logical_method(operator.or_)) 

1755 setattr(cls, "__ror__", cls._create_logical_method(roperator.ror_)) 

1756 setattr(cls, "__xor__", cls._create_logical_method(operator.xor)) 

1757 setattr(cls, "__rxor__", cls._create_logical_method(roperator.rxor)) 

1758 

1759 

1760class ExtensionScalarOpsMixin(ExtensionOpsMixin): 

1761 """ 

1762 A mixin for defining ops on an ExtensionArray. 

1763 

1764 It is assumed that the underlying scalar objects have the operators 

1765 already defined. 

1766 

1767 Notes 

1768 ----- 

1769 If you have defined a subclass MyExtensionArray(ExtensionArray), then 

1770 use MyExtensionArray(ExtensionArray, ExtensionScalarOpsMixin) to 

1771 get the arithmetic operators. After the definition of MyExtensionArray, 

1772 insert the lines 

1773 

1774 MyExtensionArray._add_arithmetic_ops() 

1775 MyExtensionArray._add_comparison_ops() 

1776 

1777 to link the operators to your class. 

1778 

1779 .. note:: 

1780 

1781 You may want to set ``__array_priority__`` if you want your 

1782 implementation to be called when involved in binary operations 

1783 with NumPy arrays. 

1784 """ 

1785 

1786 @classmethod 

1787 def _create_method(cls, op, coerce_to_dtype: bool = True, result_dtype=None): 

1788 """ 

1789 A class method that returns a method that will correspond to an 

1790 operator for an ExtensionArray subclass, by dispatching to the 

1791 relevant operator defined on the individual elements of the 

1792 ExtensionArray. 

1793 

1794 Parameters 

1795 ---------- 

1796 op : function 

1797 An operator that takes arguments op(a, b) 

1798 coerce_to_dtype : bool, default True 

1799 boolean indicating whether to attempt to convert 

1800 the result to the underlying ExtensionArray dtype. 

1801 If it's not possible to create a new ExtensionArray with the 

1802 values, an ndarray is returned instead. 

1803 

1804 Returns 

1805 ------- 

1806 Callable[[Any, Any], Union[ndarray, ExtensionArray]] 

1807 A method that can be bound to a class. When used, the method 

1808 receives the two arguments, one of which is the instance of 

1809 this class, and should return an ExtensionArray or an ndarray. 

1810 

1811 Returning an ndarray may be necessary when the result of the 

1812 `op` cannot be stored in the ExtensionArray. The dtype of the 

1813 ndarray uses NumPy's normal inference rules. 

1814 

1815 Examples 

1816 -------- 

1817 Given an ExtensionArray subclass called MyExtensionArray, use 

1818 

1819 __add__ = cls._create_method(operator.add) 

1820 

1821 in the class definition of MyExtensionArray to create the operator 

1822 for addition, that will be based on the operator implementation 

1823 of the underlying elements of the ExtensionArray 

1824 """ 

1825 

1826 def _binop(self, other): 

1827 def convert_values(param): 

1828 if isinstance(param, ExtensionArray) or is_list_like(param): 

1829 ovalues = param 

1830 else: # Assume its an object 

1831 ovalues = [param] * len(self) 

1832 return ovalues 

1833 

1834 if isinstance(other, (ABCSeries, ABCIndex, ABCDataFrame)): 

1835 # rely on pandas to unbox and dispatch to us 

1836 return NotImplemented 

1837 

1838 lvalues = self 

1839 rvalues = convert_values(other) 

1840 

1841 # If the operator is not defined for the underlying objects, 

1842 # a TypeError should be raised 

1843 res = [op(a, b) for (a, b) in zip(lvalues, rvalues)] 

1844 

1845 def _maybe_convert(arr): 

1846 if coerce_to_dtype: 

1847 # https://github.com/pandas-dev/pandas/issues/22850 

1848 # We catch all regular exceptions here, and fall back 

1849 # to an ndarray. 

1850 res = maybe_cast_to_extension_array(type(self), arr) 

1851 if not isinstance(res, type(self)): 

1852 # exception raised in _from_sequence; ensure we have ndarray 

1853 res = np.asarray(arr) 

1854 else: 

1855 res = np.asarray(arr, dtype=result_dtype) 

1856 return res 

1857 

1858 if op.__name__ in {"divmod", "rdivmod"}: 

1859 a, b = zip(*res) 

1860 return _maybe_convert(a), _maybe_convert(b) 

1861 

1862 return _maybe_convert(res) 

1863 

1864 op_name = f"__{op.__name__}__" 

1865 return set_function_name(_binop, op_name, cls) 

1866 

1867 @classmethod 

1868 def _create_arithmetic_method(cls, op): 

1869 return cls._create_method(op) 

1870 

1871 @classmethod 

1872 def _create_comparison_method(cls, op): 

1873 return cls._create_method(op, coerce_to_dtype=False, result_dtype=bool)