Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/base.py: 50%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

300 statements  

1""" 

2Base and utility classes for pandas objects. 

3""" 

4 

5from __future__ import annotations 

6 

7import textwrap 

8from typing import ( 

9 TYPE_CHECKING, 

10 Any, 

11 Generic, 

12 Literal, 

13 cast, 

14 final, 

15 overload, 

16) 

17import warnings 

18 

19import numpy as np 

20 

21from pandas._config import using_copy_on_write 

22 

23from pandas._libs import lib 

24from pandas._typing import ( 

25 AxisInt, 

26 DtypeObj, 

27 IndexLabel, 

28 NDFrameT, 

29 Self, 

30 Shape, 

31 npt, 

32) 

33from pandas.compat import PYPY 

34from pandas.compat.numpy import function as nv 

35from pandas.errors import AbstractMethodError 

36from pandas.util._decorators import ( 

37 cache_readonly, 

38 doc, 

39) 

40from pandas.util._exceptions import find_stack_level 

41 

42from pandas.core.dtypes.cast import can_hold_element 

43from pandas.core.dtypes.common import ( 

44 is_object_dtype, 

45 is_scalar, 

46) 

47from pandas.core.dtypes.dtypes import ExtensionDtype 

48from pandas.core.dtypes.generic import ( 

49 ABCDataFrame, 

50 ABCIndex, 

51 ABCSeries, 

52) 

53from pandas.core.dtypes.missing import ( 

54 isna, 

55 remove_na_arraylike, 

56) 

57 

58from pandas.core import ( 

59 algorithms, 

60 nanops, 

61 ops, 

62) 

63from pandas.core.accessor import DirNamesMixin 

64from pandas.core.arraylike import OpsMixin 

65from pandas.core.arrays import ExtensionArray 

66from pandas.core.construction import ( 

67 ensure_wrapped_if_datetimelike, 

68 extract_array, 

69) 

70 

71if TYPE_CHECKING: 

72 from collections.abc import ( 

73 Hashable, 

74 Iterator, 

75 ) 

76 

77 from pandas._typing import ( 

78 DropKeep, 

79 NumpySorter, 

80 NumpyValueArrayLike, 

81 ScalarLike_co, 

82 ) 

83 

84 from pandas import ( 

85 DataFrame, 

86 Index, 

87 Series, 

88 ) 

89 

90 

91_shared_docs: dict[str, str] = {} 

92_indexops_doc_kwargs = { 

93 "klass": "IndexOpsMixin", 

94 "inplace": "", 

95 "unique": "IndexOpsMixin", 

96 "duplicated": "IndexOpsMixin", 

97} 

98 

99 

100class PandasObject(DirNamesMixin): 

101 """ 

102 Baseclass for various pandas objects. 

103 """ 

104 

105 # results from calls to methods decorated with cache_readonly get added to _cache 

106 _cache: dict[str, Any] 

107 

108 @property 

109 def _constructor(self): 

110 """ 

111 Class constructor (for this class it's just `__class__`). 

112 """ 

113 return type(self) 

114 

115 def __repr__(self) -> str: 

116 """ 

117 Return a string representation for a particular object. 

118 """ 

119 # Should be overwritten by base classes 

120 return object.__repr__(self) 

121 

122 def _reset_cache(self, key: str | None = None) -> None: 

123 """ 

124 Reset cached properties. If ``key`` is passed, only clears that key. 

125 """ 

126 if not hasattr(self, "_cache"): 

127 return 

128 if key is None: 

129 self._cache.clear() 

130 else: 

131 self._cache.pop(key, None) 

132 

133 def __sizeof__(self) -> int: 

134 """ 

135 Generates the total memory usage for an object that returns 

136 either a value or Series of values 

137 """ 

138 memory_usage = getattr(self, "memory_usage", None) 

139 if memory_usage: 

140 mem = memory_usage(deep=True) # pylint: disable=not-callable 

141 return int(mem if is_scalar(mem) else mem.sum()) 

142 

143 # no memory_usage attribute, so fall back to object's 'sizeof' 

144 return super().__sizeof__() 

145 

146 

147class NoNewAttributesMixin: 

148 """ 

149 Mixin which prevents adding new attributes. 

150 

151 Prevents additional attributes via xxx.attribute = "something" after a 

152 call to `self.__freeze()`. Mainly used to prevent the user from using 

153 wrong attributes on an accessor (`Series.cat/.str/.dt`). 

154 

155 If you really want to add a new attribute at a later time, you need to use 

156 `object.__setattr__(self, key, value)`. 

157 """ 

158 

159 def _freeze(self) -> None: 

160 """ 

161 Prevents setting additional attributes. 

162 """ 

163 object.__setattr__(self, "__frozen", True) 

164 

165 # prevent adding any attribute via s.xxx.new_attribute = ... 

166 def __setattr__(self, key: str, value) -> None: 

167 # _cache is used by a decorator 

168 # We need to check both 1.) cls.__dict__ and 2.) getattr(self, key) 

169 # because 

170 # 1.) getattr is false for attributes that raise errors 

171 # 2.) cls.__dict__ doesn't traverse into base classes 

172 if getattr(self, "__frozen", False) and not ( 

173 key == "_cache" 

174 or key in type(self).__dict__ 

175 or getattr(self, key, None) is not None 

176 ): 

177 raise AttributeError(f"You cannot add any new attribute '{key}'") 

178 object.__setattr__(self, key, value) 

179 

180 

181class SelectionMixin(Generic[NDFrameT]): 

182 """ 

183 mixin implementing the selection & aggregation interface on a group-like 

184 object sub-classes need to define: obj, exclusions 

185 """ 

186 

187 obj: NDFrameT 

188 _selection: IndexLabel | None = None 

189 exclusions: frozenset[Hashable] 

190 _internal_names = ["_cache", "__setstate__"] 

191 _internal_names_set = set(_internal_names) 

192 

193 @final 

194 @property 

195 def _selection_list(self): 

196 if not isinstance( 

197 self._selection, (list, tuple, ABCSeries, ABCIndex, np.ndarray) 

198 ): 

199 return [self._selection] 

200 return self._selection 

201 

202 @cache_readonly 

203 def _selected_obj(self): 

204 if self._selection is None or isinstance(self.obj, ABCSeries): 

205 return self.obj 

206 else: 

207 return self.obj[self._selection] 

208 

209 @final 

210 @cache_readonly 

211 def ndim(self) -> int: 

212 return self._selected_obj.ndim 

213 

214 @final 

215 @cache_readonly 

216 def _obj_with_exclusions(self): 

217 if isinstance(self.obj, ABCSeries): 

218 return self.obj 

219 

220 if self._selection is not None: 

221 return self.obj._getitem_nocopy(self._selection_list) 

222 

223 if len(self.exclusions) > 0: 

224 # equivalent to `self.obj.drop(self.exclusions, axis=1) 

225 # but this avoids consolidating and making a copy 

226 # TODO: following GH#45287 can we now use .drop directly without 

227 # making a copy? 

228 return self.obj._drop_axis(self.exclusions, axis=1, only_slice=True) 

229 else: 

230 return self.obj 

231 

232 def __getitem__(self, key): 

233 if self._selection is not None: 

234 raise IndexError(f"Column(s) {self._selection} already selected") 

235 

236 if isinstance(key, (list, tuple, ABCSeries, ABCIndex, np.ndarray)): 

237 if len(self.obj.columns.intersection(key)) != len(set(key)): 

238 bad_keys = list(set(key).difference(self.obj.columns)) 

239 raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}") 

240 return self._gotitem(list(key), ndim=2) 

241 

242 else: 

243 if key not in self.obj: 

244 raise KeyError(f"Column not found: {key}") 

245 ndim = self.obj[key].ndim 

246 return self._gotitem(key, ndim=ndim) 

247 

248 def _gotitem(self, key, ndim: int, subset=None): 

249 """ 

250 sub-classes to define 

251 return a sliced object 

252 

253 Parameters 

254 ---------- 

255 key : str / list of selections 

256 ndim : {1, 2} 

257 requested ndim of result 

258 subset : object, default None 

259 subset to act on 

260 """ 

261 raise AbstractMethodError(self) 

262 

263 @final 

264 def _infer_selection(self, key, subset: Series | DataFrame): 

265 """ 

266 Infer the `selection` to pass to our constructor in _gotitem. 

267 """ 

268 # Shared by Rolling and Resample 

269 selection = None 

270 if subset.ndim == 2 and ( 

271 (lib.is_scalar(key) and key in subset) or lib.is_list_like(key) 

272 ): 

273 selection = key 

274 elif subset.ndim == 1 and lib.is_scalar(key) and key == subset.name: 

275 selection = key 

276 return selection 

277 

278 def aggregate(self, func, *args, **kwargs): 

279 raise AbstractMethodError(self) 

280 

281 agg = aggregate 

282 

283 

284class IndexOpsMixin(OpsMixin): 

285 """ 

286 Common ops mixin to support a unified interface / docs for Series / Index 

287 """ 

288 

289 # ndarray compatibility 

290 __array_priority__ = 1000 

291 _hidden_attrs: frozenset[str] = frozenset( 

292 ["tolist"] # tolist is not deprecated, just suppressed in the __dir__ 

293 ) 

294 

295 @property 

296 def dtype(self) -> DtypeObj: 

297 # must be defined here as a property for mypy 

298 raise AbstractMethodError(self) 

299 

300 @property 

301 def _values(self) -> ExtensionArray | np.ndarray: 

302 # must be defined here as a property for mypy 

303 raise AbstractMethodError(self) 

304 

305 @final 

306 def transpose(self, *args, **kwargs) -> Self: 

307 """ 

308 Return the transpose, which is by definition self. 

309 

310 Returns 

311 ------- 

312 %(klass)s 

313 """ 

314 nv.validate_transpose(args, kwargs) 

315 return self 

316 

317 T = property( 

318 transpose, 

319 doc=""" 

320 Return the transpose, which is by definition self. 

321 

322 Examples 

323 -------- 

324 For Series: 

325 

326 >>> s = pd.Series(['Ant', 'Bear', 'Cow']) 

327 >>> s 

328 0 Ant 

329 1 Bear 

330 2 Cow 

331 dtype: object 

332 >>> s.T 

333 0 Ant 

334 1 Bear 

335 2 Cow 

336 dtype: object 

337 

338 For Index: 

339 

340 >>> idx = pd.Index([1, 2, 3]) 

341 >>> idx.T 

342 Index([1, 2, 3], dtype='int64') 

343 """, 

344 ) 

345 

346 @property 

347 def shape(self) -> Shape: 

348 """ 

349 Return a tuple of the shape of the underlying data. 

350 

351 Examples 

352 -------- 

353 >>> s = pd.Series([1, 2, 3]) 

354 >>> s.shape 

355 (3,) 

356 """ 

357 return self._values.shape 

358 

359 def __len__(self) -> int: 

360 # We need this defined here for mypy 

361 raise AbstractMethodError(self) 

362 

363 @property 

364 def ndim(self) -> Literal[1]: 

365 """ 

366 Number of dimensions of the underlying data, by definition 1. 

367 

368 Examples 

369 -------- 

370 >>> s = pd.Series(['Ant', 'Bear', 'Cow']) 

371 >>> s 

372 0 Ant 

373 1 Bear 

374 2 Cow 

375 dtype: object 

376 >>> s.ndim 

377 1 

378 

379 For Index: 

380 

381 >>> idx = pd.Index([1, 2, 3]) 

382 >>> idx 

383 Index([1, 2, 3], dtype='int64') 

384 >>> idx.ndim 

385 1 

386 """ 

387 return 1 

388 

389 @final 

390 def item(self): 

391 """ 

392 Return the first element of the underlying data as a Python scalar. 

393 

394 Returns 

395 ------- 

396 scalar 

397 The first element of Series or Index. 

398 

399 Raises 

400 ------ 

401 ValueError 

402 If the data is not length = 1. 

403 

404 Examples 

405 -------- 

406 >>> s = pd.Series([1]) 

407 >>> s.item() 

408 1 

409 

410 For an index: 

411 

412 >>> s = pd.Series([1], index=['a']) 

413 >>> s.index.item() 

414 'a' 

415 """ 

416 if len(self) == 1: 

417 return next(iter(self)) 

418 raise ValueError("can only convert an array of size 1 to a Python scalar") 

419 

420 @property 

421 def nbytes(self) -> int: 

422 """ 

423 Return the number of bytes in the underlying data. 

424 

425 Examples 

426 -------- 

427 For Series: 

428 

429 >>> s = pd.Series(['Ant', 'Bear', 'Cow']) 

430 >>> s 

431 0 Ant 

432 1 Bear 

433 2 Cow 

434 dtype: object 

435 >>> s.nbytes 

436 24 

437 

438 For Index: 

439 

440 >>> idx = pd.Index([1, 2, 3]) 

441 >>> idx 

442 Index([1, 2, 3], dtype='int64') 

443 >>> idx.nbytes 

444 24 

445 """ 

446 return self._values.nbytes 

447 

448 @property 

449 def size(self) -> int: 

450 """ 

451 Return the number of elements in the underlying data. 

452 

453 Examples 

454 -------- 

455 For Series: 

456 

457 >>> s = pd.Series(['Ant', 'Bear', 'Cow']) 

458 >>> s 

459 0 Ant 

460 1 Bear 

461 2 Cow 

462 dtype: object 

463 >>> s.size 

464 3 

465 

466 For Index: 

467 

468 >>> idx = pd.Index([1, 2, 3]) 

469 >>> idx 

470 Index([1, 2, 3], dtype='int64') 

471 >>> idx.size 

472 3 

473 """ 

474 return len(self._values) 

475 

476 @property 

477 def array(self) -> ExtensionArray: 

478 """ 

479 The ExtensionArray of the data backing this Series or Index. 

480 

481 Returns 

482 ------- 

483 ExtensionArray 

484 An ExtensionArray of the values stored within. For extension 

485 types, this is the actual array. For NumPy native types, this 

486 is a thin (no copy) wrapper around :class:`numpy.ndarray`. 

487 

488 ``.array`` differs from ``.values``, which may require converting 

489 the data to a different form. 

490 

491 See Also 

492 -------- 

493 Index.to_numpy : Similar method that always returns a NumPy array. 

494 Series.to_numpy : Similar method that always returns a NumPy array. 

495 

496 Notes 

497 ----- 

498 This table lays out the different array types for each extension 

499 dtype within pandas. 

500 

501 ================== ============================= 

502 dtype array type 

503 ================== ============================= 

504 category Categorical 

505 period PeriodArray 

506 interval IntervalArray 

507 IntegerNA IntegerArray 

508 string StringArray 

509 boolean BooleanArray 

510 datetime64[ns, tz] DatetimeArray 

511 ================== ============================= 

512 

513 For any 3rd-party extension types, the array type will be an 

514 ExtensionArray. 

515 

516 For all remaining dtypes ``.array`` will be a 

517 :class:`arrays.NumpyExtensionArray` wrapping the actual ndarray 

518 stored within. If you absolutely need a NumPy array (possibly with 

519 copying / coercing data), then use :meth:`Series.to_numpy` instead. 

520 

521 Examples 

522 -------- 

523 For regular NumPy types like int, and float, a NumpyExtensionArray 

524 is returned. 

525 

526 >>> pd.Series([1, 2, 3]).array 

527 <NumpyExtensionArray> 

528 [1, 2, 3] 

529 Length: 3, dtype: int64 

530 

531 For extension types, like Categorical, the actual ExtensionArray 

532 is returned 

533 

534 >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) 

535 >>> ser.array 

536 ['a', 'b', 'a'] 

537 Categories (2, object): ['a', 'b'] 

538 """ 

539 raise AbstractMethodError(self) 

540 

541 @final 

542 def to_numpy( 

543 self, 

544 dtype: npt.DTypeLike | None = None, 

545 copy: bool = False, 

546 na_value: object = lib.no_default, 

547 **kwargs, 

548 ) -> np.ndarray: 

549 """ 

550 A NumPy ndarray representing the values in this Series or Index. 

551 

552 Parameters 

553 ---------- 

554 dtype : str or numpy.dtype, optional 

555 The dtype to pass to :meth:`numpy.asarray`. 

556 copy : bool, default False 

557 Whether to ensure that the returned value is not a view on 

558 another array. Note that ``copy=False`` does not *ensure* that 

559 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that 

560 a copy is made, even if not strictly necessary. 

561 na_value : Any, optional 

562 The value to use for missing values. The default value depends 

563 on `dtype` and the type of the array. 

564 **kwargs 

565 Additional keywords passed through to the ``to_numpy`` method 

566 of the underlying array (for extension arrays). 

567 

568 Returns 

569 ------- 

570 numpy.ndarray 

571 

572 See Also 

573 -------- 

574 Series.array : Get the actual data stored within. 

575 Index.array : Get the actual data stored within. 

576 DataFrame.to_numpy : Similar method for DataFrame. 

577 

578 Notes 

579 ----- 

580 The returned array will be the same up to equality (values equal 

581 in `self` will be equal in the returned array; likewise for values 

582 that are not equal). When `self` contains an ExtensionArray, the 

583 dtype may be different. For example, for a category-dtype Series, 

584 ``to_numpy()`` will return a NumPy array and the categorical dtype 

585 will be lost. 

586 

587 For NumPy dtypes, this will be a reference to the actual data stored 

588 in this Series or Index (assuming ``copy=False``). Modifying the result 

589 in place will modify the data stored in the Series or Index (not that 

590 we recommend doing that). 

591 

592 For extension types, ``to_numpy()`` *may* require copying data and 

593 coercing the result to a NumPy type (possibly object), which may be 

594 expensive. When you need a no-copy reference to the underlying data, 

595 :attr:`Series.array` should be used instead. 

596 

597 This table lays out the different dtypes and default return types of 

598 ``to_numpy()`` for various dtypes within pandas. 

599 

600 ================== ================================ 

601 dtype array type 

602 ================== ================================ 

603 category[T] ndarray[T] (same dtype as input) 

604 period ndarray[object] (Periods) 

605 interval ndarray[object] (Intervals) 

606 IntegerNA ndarray[object] 

607 datetime64[ns] datetime64[ns] 

608 datetime64[ns, tz] ndarray[object] (Timestamps) 

609 ================== ================================ 

610 

611 Examples 

612 -------- 

613 >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a'])) 

614 >>> ser.to_numpy() 

615 array(['a', 'b', 'a'], dtype=object) 

616 

617 Specify the `dtype` to control how datetime-aware data is represented. 

618 Use ``dtype=object`` to return an ndarray of pandas :class:`Timestamp` 

619 objects, each with the correct ``tz``. 

620 

621 >>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET")) 

622 >>> ser.to_numpy(dtype=object) 

623 array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'), 

624 Timestamp('2000-01-02 00:00:00+0100', tz='CET')], 

625 dtype=object) 

626 

627 Or ``dtype='datetime64[ns]'`` to return an ndarray of native 

628 datetime64 values. The values are converted to UTC and the timezone 

629 info is dropped. 

630 

631 >>> ser.to_numpy(dtype="datetime64[ns]") 

632 ... # doctest: +ELLIPSIS 

633 array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00...'], 

634 dtype='datetime64[ns]') 

635 """ 

636 if isinstance(self.dtype, ExtensionDtype): 

637 return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs) 

638 elif kwargs: 

639 bad_keys = next(iter(kwargs.keys())) 

640 raise TypeError( 

641 f"to_numpy() got an unexpected keyword argument '{bad_keys}'" 

642 ) 

643 

644 fillna = ( 

645 na_value is not lib.no_default 

646 # no need to fillna with np.nan if we already have a float dtype 

647 and not (na_value is np.nan and np.issubdtype(self.dtype, np.floating)) 

648 ) 

649 

650 values = self._values 

651 if fillna: 

652 if not can_hold_element(values, na_value): 

653 # if we can't hold the na_value asarray either makes a copy or we 

654 # error before modifying values. The asarray later on thus won't make 

655 # another copy 

656 values = np.asarray(values, dtype=dtype) 

657 else: 

658 values = values.copy() 

659 

660 values[np.asanyarray(isna(self))] = na_value 

661 

662 result = np.asarray(values, dtype=dtype) 

663 

664 if (copy and not fillna) or (not copy and using_copy_on_write()): 

665 if np.shares_memory(self._values[:2], result[:2]): 

666 # Take slices to improve performance of check 

667 if using_copy_on_write() and not copy: 

668 result = result.view() 

669 result.flags.writeable = False 

670 else: 

671 result = result.copy() 

672 

673 return result 

674 

675 @final 

676 @property 

677 def empty(self) -> bool: 

678 return not self.size 

679 

680 @doc(op="max", oppose="min", value="largest") 

681 def argmax( 

682 self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs 

683 ) -> int: 

684 """ 

685 Return int position of the {value} value in the Series. 

686 

687 If the {op}imum is achieved in multiple locations, 

688 the first row position is returned. 

689 

690 Parameters 

691 ---------- 

692 axis : {{None}} 

693 Unused. Parameter needed for compatibility with DataFrame. 

694 skipna : bool, default True 

695 Exclude NA/null values when showing the result. 

696 *args, **kwargs 

697 Additional arguments and keywords for compatibility with NumPy. 

698 

699 Returns 

700 ------- 

701 int 

702 Row position of the {op}imum value. 

703 

704 See Also 

705 -------- 

706 Series.arg{op} : Return position of the {op}imum value. 

707 Series.arg{oppose} : Return position of the {oppose}imum value. 

708 numpy.ndarray.arg{op} : Equivalent method for numpy arrays. 

709 Series.idxmax : Return index label of the maximum values. 

710 Series.idxmin : Return index label of the minimum values. 

711 

712 Examples 

713 -------- 

714 Consider dataset containing cereal calories 

715 

716 >>> s = pd.Series({{'Corn Flakes': 100.0, 'Almond Delight': 110.0, 

717 ... 'Cinnamon Toast Crunch': 120.0, 'Cocoa Puff': 110.0}}) 

718 >>> s 

719 Corn Flakes 100.0 

720 Almond Delight 110.0 

721 Cinnamon Toast Crunch 120.0 

722 Cocoa Puff 110.0 

723 dtype: float64 

724 

725 >>> s.argmax() 

726 2 

727 >>> s.argmin() 

728 0 

729 

730 The maximum cereal calories is the third element and 

731 the minimum cereal calories is the first element, 

732 since series is zero-indexed. 

733 """ 

734 delegate = self._values 

735 nv.validate_minmax_axis(axis) 

736 skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs) 

737 

738 if isinstance(delegate, ExtensionArray): 

739 if not skipna and delegate.isna().any(): 

740 warnings.warn( 

741 f"The behavior of {type(self).__name__}.argmax/argmin " 

742 "with skipna=False and NAs, or with all-NAs is deprecated. " 

743 "In a future version this will raise ValueError.", 

744 FutureWarning, 

745 stacklevel=find_stack_level(), 

746 ) 

747 return -1 

748 else: 

749 return delegate.argmax() 

750 else: 

751 result = nanops.nanargmax(delegate, skipna=skipna) 

752 if result == -1: 

753 warnings.warn( 

754 f"The behavior of {type(self).__name__}.argmax/argmin " 

755 "with skipna=False and NAs, or with all-NAs is deprecated. " 

756 "In a future version this will raise ValueError.", 

757 FutureWarning, 

758 stacklevel=find_stack_level(), 

759 ) 

760 # error: Incompatible return value type (got "Union[int, ndarray]", expected 

761 # "int") 

762 return result # type: ignore[return-value] 

763 

764 @doc(argmax, op="min", oppose="max", value="smallest") 

765 def argmin( 

766 self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs 

767 ) -> int: 

768 delegate = self._values 

769 nv.validate_minmax_axis(axis) 

770 skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs) 

771 

772 if isinstance(delegate, ExtensionArray): 

773 if not skipna and delegate.isna().any(): 

774 warnings.warn( 

775 f"The behavior of {type(self).__name__}.argmax/argmin " 

776 "with skipna=False and NAs, or with all-NAs is deprecated. " 

777 "In a future version this will raise ValueError.", 

778 FutureWarning, 

779 stacklevel=find_stack_level(), 

780 ) 

781 return -1 

782 else: 

783 return delegate.argmin() 

784 else: 

785 result = nanops.nanargmin(delegate, skipna=skipna) 

786 if result == -1: 

787 warnings.warn( 

788 f"The behavior of {type(self).__name__}.argmax/argmin " 

789 "with skipna=False and NAs, or with all-NAs is deprecated. " 

790 "In a future version this will raise ValueError.", 

791 FutureWarning, 

792 stacklevel=find_stack_level(), 

793 ) 

794 # error: Incompatible return value type (got "Union[int, ndarray]", expected 

795 # "int") 

796 return result # type: ignore[return-value] 

797 

798 def tolist(self): 

799 """ 

800 Return a list of the values. 

801 

802 These are each a scalar type, which is a Python scalar 

803 (for str, int, float) or a pandas scalar 

804 (for Timestamp/Timedelta/Interval/Period) 

805 

806 Returns 

807 ------- 

808 list 

809 

810 See Also 

811 -------- 

812 numpy.ndarray.tolist : Return the array as an a.ndim-levels deep 

813 nested list of Python scalars. 

814 

815 Examples 

816 -------- 

817 For Series 

818 

819 >>> s = pd.Series([1, 2, 3]) 

820 >>> s.to_list() 

821 [1, 2, 3] 

822 

823 For Index: 

824 

825 >>> idx = pd.Index([1, 2, 3]) 

826 >>> idx 

827 Index([1, 2, 3], dtype='int64') 

828 

829 >>> idx.to_list() 

830 [1, 2, 3] 

831 """ 

832 return self._values.tolist() 

833 

834 to_list = tolist 

835 

836 def __iter__(self) -> Iterator: 

837 """ 

838 Return an iterator of the values. 

839 

840 These are each a scalar type, which is a Python scalar 

841 (for str, int, float) or a pandas scalar 

842 (for Timestamp/Timedelta/Interval/Period) 

843 

844 Returns 

845 ------- 

846 iterator 

847 

848 Examples 

849 -------- 

850 >>> s = pd.Series([1, 2, 3]) 

851 >>> for x in s: 

852 ... print(x) 

853 1 

854 2 

855 3 

856 """ 

857 # We are explicitly making element iterators. 

858 if not isinstance(self._values, np.ndarray): 

859 # Check type instead of dtype to catch DTA/TDA 

860 return iter(self._values) 

861 else: 

862 return map(self._values.item, range(self._values.size)) 

863 

864 @cache_readonly 

865 def hasnans(self) -> bool: 

866 """ 

867 Return True if there are any NaNs. 

868 

869 Enables various performance speedups. 

870 

871 Returns 

872 ------- 

873 bool 

874 

875 Examples 

876 -------- 

877 >>> s = pd.Series([1, 2, 3, None]) 

878 >>> s 

879 0 1.0 

880 1 2.0 

881 2 3.0 

882 3 NaN 

883 dtype: float64 

884 >>> s.hasnans 

885 True 

886 """ 

887 # error: Item "bool" of "Union[bool, ndarray[Any, dtype[bool_]], NDFrame]" 

888 # has no attribute "any" 

889 return bool(isna(self).any()) # type: ignore[union-attr] 

890 

891 @final 

892 def _map_values(self, mapper, na_action=None, convert: bool = True): 

893 """ 

894 An internal function that maps values using the input 

895 correspondence (which can be a dict, Series, or function). 

896 

897 Parameters 

898 ---------- 

899 mapper : function, dict, or Series 

900 The input correspondence object 

901 na_action : {None, 'ignore'} 

902 If 'ignore', propagate NA values, without passing them to the 

903 mapping function 

904 convert : bool, default True 

905 Try to find better dtype for elementwise function results. If 

906 False, leave as dtype=object. Note that the dtype is always 

907 preserved for some extension array dtypes, such as Categorical. 

908 

909 Returns 

910 ------- 

911 Union[Index, MultiIndex], inferred 

912 The output of the mapping function applied to the index. 

913 If the function returns a tuple with more than one element 

914 a MultiIndex will be returned. 

915 """ 

916 arr = self._values 

917 

918 if isinstance(arr, ExtensionArray): 

919 return arr.map(mapper, na_action=na_action) 

920 

921 return algorithms.map_array(arr, mapper, na_action=na_action, convert=convert) 

922 

923 @final 

924 def value_counts( 

925 self, 

926 normalize: bool = False, 

927 sort: bool = True, 

928 ascending: bool = False, 

929 bins=None, 

930 dropna: bool = True, 

931 ) -> Series: 

932 """ 

933 Return a Series containing counts of unique values. 

934 

935 The resulting object will be in descending order so that the 

936 first element is the most frequently-occurring element. 

937 Excludes NA values by default. 

938 

939 Parameters 

940 ---------- 

941 normalize : bool, default False 

942 If True then the object returned will contain the relative 

943 frequencies of the unique values. 

944 sort : bool, default True 

945 Sort by frequencies when True. Preserve the order of the data when False. 

946 ascending : bool, default False 

947 Sort in ascending order. 

948 bins : int, optional 

949 Rather than count values, group them into half-open bins, 

950 a convenience for ``pd.cut``, only works with numeric data. 

951 dropna : bool, default True 

952 Don't include counts of NaN. 

953 

954 Returns 

955 ------- 

956 Series 

957 

958 See Also 

959 -------- 

960 Series.count: Number of non-NA elements in a Series. 

961 DataFrame.count: Number of non-NA elements in a DataFrame. 

962 DataFrame.value_counts: Equivalent method on DataFrames. 

963 

964 Examples 

965 -------- 

966 >>> index = pd.Index([3, 1, 2, 3, 4, np.nan]) 

967 >>> index.value_counts() 

968 3.0 2 

969 1.0 1 

970 2.0 1 

971 4.0 1 

972 Name: count, dtype: int64 

973 

974 With `normalize` set to `True`, returns the relative frequency by 

975 dividing all values by the sum of values. 

976 

977 >>> s = pd.Series([3, 1, 2, 3, 4, np.nan]) 

978 >>> s.value_counts(normalize=True) 

979 3.0 0.4 

980 1.0 0.2 

981 2.0 0.2 

982 4.0 0.2 

983 Name: proportion, dtype: float64 

984 

985 **bins** 

986 

987 Bins can be useful for going from a continuous variable to a 

988 categorical variable; instead of counting unique 

989 apparitions of values, divide the index in the specified 

990 number of half-open bins. 

991 

992 >>> s.value_counts(bins=3) 

993 (0.996, 2.0] 2 

994 (2.0, 3.0] 2 

995 (3.0, 4.0] 1 

996 Name: count, dtype: int64 

997 

998 **dropna** 

999 

1000 With `dropna` set to `False` we can also see NaN index values. 

1001 

1002 >>> s.value_counts(dropna=False) 

1003 3.0 2 

1004 1.0 1 

1005 2.0 1 

1006 4.0 1 

1007 NaN 1 

1008 Name: count, dtype: int64 

1009 """ 

1010 return algorithms.value_counts_internal( 

1011 self, 

1012 sort=sort, 

1013 ascending=ascending, 

1014 normalize=normalize, 

1015 bins=bins, 

1016 dropna=dropna, 

1017 ) 

1018 

1019 def unique(self): 

1020 values = self._values 

1021 if not isinstance(values, np.ndarray): 

1022 # i.e. ExtensionArray 

1023 result = values.unique() 

1024 else: 

1025 result = algorithms.unique1d(values) 

1026 return result 

1027 

1028 @final 

1029 def nunique(self, dropna: bool = True) -> int: 

1030 """ 

1031 Return number of unique elements in the object. 

1032 

1033 Excludes NA values by default. 

1034 

1035 Parameters 

1036 ---------- 

1037 dropna : bool, default True 

1038 Don't include NaN in the count. 

1039 

1040 Returns 

1041 ------- 

1042 int 

1043 

1044 See Also 

1045 -------- 

1046 DataFrame.nunique: Method nunique for DataFrame. 

1047 Series.count: Count non-NA/null observations in the Series. 

1048 

1049 Examples 

1050 -------- 

1051 >>> s = pd.Series([1, 3, 5, 7, 7]) 

1052 >>> s 

1053 0 1 

1054 1 3 

1055 2 5 

1056 3 7 

1057 4 7 

1058 dtype: int64 

1059 

1060 >>> s.nunique() 

1061 4 

1062 """ 

1063 uniqs = self.unique() 

1064 if dropna: 

1065 uniqs = remove_na_arraylike(uniqs) 

1066 return len(uniqs) 

1067 

1068 @property 

1069 def is_unique(self) -> bool: 

1070 """ 

1071 Return boolean if values in the object are unique. 

1072 

1073 Returns 

1074 ------- 

1075 bool 

1076 

1077 Examples 

1078 -------- 

1079 >>> s = pd.Series([1, 2, 3]) 

1080 >>> s.is_unique 

1081 True 

1082 

1083 >>> s = pd.Series([1, 2, 3, 1]) 

1084 >>> s.is_unique 

1085 False 

1086 """ 

1087 return self.nunique(dropna=False) == len(self) 

1088 

1089 @property 

1090 def is_monotonic_increasing(self) -> bool: 

1091 """ 

1092 Return boolean if values in the object are monotonically increasing. 

1093 

1094 Returns 

1095 ------- 

1096 bool 

1097 

1098 Examples 

1099 -------- 

1100 >>> s = pd.Series([1, 2, 2]) 

1101 >>> s.is_monotonic_increasing 

1102 True 

1103 

1104 >>> s = pd.Series([3, 2, 1]) 

1105 >>> s.is_monotonic_increasing 

1106 False 

1107 """ 

1108 from pandas import Index 

1109 

1110 return Index(self).is_monotonic_increasing 

1111 

1112 @property 

1113 def is_monotonic_decreasing(self) -> bool: 

1114 """ 

1115 Return boolean if values in the object are monotonically decreasing. 

1116 

1117 Returns 

1118 ------- 

1119 bool 

1120 

1121 Examples 

1122 -------- 

1123 >>> s = pd.Series([3, 2, 2, 1]) 

1124 >>> s.is_monotonic_decreasing 

1125 True 

1126 

1127 >>> s = pd.Series([1, 2, 3]) 

1128 >>> s.is_monotonic_decreasing 

1129 False 

1130 """ 

1131 from pandas import Index 

1132 

1133 return Index(self).is_monotonic_decreasing 

1134 

1135 @final 

1136 def _memory_usage(self, deep: bool = False) -> int: 

1137 """ 

1138 Memory usage of the values. 

1139 

1140 Parameters 

1141 ---------- 

1142 deep : bool, default False 

1143 Introspect the data deeply, interrogate 

1144 `object` dtypes for system-level memory consumption. 

1145 

1146 Returns 

1147 ------- 

1148 bytes used 

1149 

1150 See Also 

1151 -------- 

1152 numpy.ndarray.nbytes : Total bytes consumed by the elements of the 

1153 array. 

1154 

1155 Notes 

1156 ----- 

1157 Memory usage does not include memory consumed by elements that 

1158 are not components of the array if deep=False or if used on PyPy 

1159 

1160 Examples 

1161 -------- 

1162 >>> idx = pd.Index([1, 2, 3]) 

1163 >>> idx.memory_usage() 

1164 24 

1165 """ 

1166 if hasattr(self.array, "memory_usage"): 

1167 return self.array.memory_usage( # pyright: ignore[reportGeneralTypeIssues] 

1168 deep=deep, 

1169 ) 

1170 

1171 v = self.array.nbytes 

1172 if deep and is_object_dtype(self.dtype) and not PYPY: 

1173 values = cast(np.ndarray, self._values) 

1174 v += lib.memory_usage_of_objects(values) 

1175 return v 

1176 

1177 @doc( 

1178 algorithms.factorize, 

1179 values="", 

1180 order="", 

1181 size_hint="", 

1182 sort=textwrap.dedent( 

1183 """\ 

1184 sort : bool, default False 

1185 Sort `uniques` and shuffle `codes` to maintain the 

1186 relationship. 

1187 """ 

1188 ), 

1189 ) 

1190 def factorize( 

1191 self, 

1192 sort: bool = False, 

1193 use_na_sentinel: bool = True, 

1194 ) -> tuple[npt.NDArray[np.intp], Index]: 

1195 codes, uniques = algorithms.factorize( 

1196 self._values, sort=sort, use_na_sentinel=use_na_sentinel 

1197 ) 

1198 if uniques.dtype == np.float16: 

1199 uniques = uniques.astype(np.float32) 

1200 

1201 if isinstance(self, ABCIndex): 

1202 # preserve e.g. MultiIndex 

1203 uniques = self._constructor(uniques) 

1204 else: 

1205 from pandas import Index 

1206 

1207 uniques = Index(uniques) 

1208 return codes, uniques 

1209 

1210 _shared_docs[ 

1211 "searchsorted" 

1212 ] = """ 

1213 Find indices where elements should be inserted to maintain order. 

1214 

1215 Find the indices into a sorted {klass} `self` such that, if the 

1216 corresponding elements in `value` were inserted before the indices, 

1217 the order of `self` would be preserved. 

1218 

1219 .. note:: 

1220 

1221 The {klass} *must* be monotonically sorted, otherwise 

1222 wrong locations will likely be returned. Pandas does *not* 

1223 check this for you. 

1224 

1225 Parameters 

1226 ---------- 

1227 value : array-like or scalar 

1228 Values to insert into `self`. 

1229 side : {{'left', 'right'}}, optional 

1230 If 'left', the index of the first suitable location found is given. 

1231 If 'right', return the last such index. If there is no suitable 

1232 index, return either 0 or N (where N is the length of `self`). 

1233 sorter : 1-D array-like, optional 

1234 Optional array of integer indices that sort `self` into ascending 

1235 order. They are typically the result of ``np.argsort``. 

1236 

1237 Returns 

1238 ------- 

1239 int or array of int 

1240 A scalar or array of insertion points with the 

1241 same shape as `value`. 

1242 

1243 See Also 

1244 -------- 

1245 sort_values : Sort by the values along either axis. 

1246 numpy.searchsorted : Similar method from NumPy. 

1247 

1248 Notes 

1249 ----- 

1250 Binary search is used to find the required insertion points. 

1251 

1252 Examples 

1253 -------- 

1254 >>> ser = pd.Series([1, 2, 3]) 

1255 >>> ser 

1256 0 1 

1257 1 2 

1258 2 3 

1259 dtype: int64 

1260 

1261 >>> ser.searchsorted(4) 

1262 3 

1263 

1264 >>> ser.searchsorted([0, 4]) 

1265 array([0, 3]) 

1266 

1267 >>> ser.searchsorted([1, 3], side='left') 

1268 array([0, 2]) 

1269 

1270 >>> ser.searchsorted([1, 3], side='right') 

1271 array([1, 3]) 

1272 

1273 >>> ser = pd.Series(pd.to_datetime(['3/11/2000', '3/12/2000', '3/13/2000'])) 

1274 >>> ser 

1275 0 2000-03-11 

1276 1 2000-03-12 

1277 2 2000-03-13 

1278 dtype: datetime64[ns] 

1279 

1280 >>> ser.searchsorted('3/14/2000') 

1281 3 

1282 

1283 >>> ser = pd.Categorical( 

1284 ... ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True 

1285 ... ) 

1286 >>> ser 

1287 ['apple', 'bread', 'bread', 'cheese', 'milk'] 

1288 Categories (4, object): ['apple' < 'bread' < 'cheese' < 'milk'] 

1289 

1290 >>> ser.searchsorted('bread') 

1291 1 

1292 

1293 >>> ser.searchsorted(['bread'], side='right') 

1294 array([3]) 

1295 

1296 If the values are not monotonically sorted, wrong locations 

1297 may be returned: 

1298 

1299 >>> ser = pd.Series([2, 1, 3]) 

1300 >>> ser 

1301 0 2 

1302 1 1 

1303 2 3 

1304 dtype: int64 

1305 

1306 >>> ser.searchsorted(1) # doctest: +SKIP 

1307 0 # wrong result, correct would be 1 

1308 """ 

1309 

1310 # This overload is needed so that the call to searchsorted in 

1311 # pandas.core.resample.TimeGrouper._get_period_bins picks the correct result 

1312 

1313 # error: Overloaded function signatures 1 and 2 overlap with incompatible 

1314 # return types 

1315 @overload 

1316 def searchsorted( # type: ignore[overload-overlap] 

1317 self, 

1318 value: ScalarLike_co, 

1319 side: Literal["left", "right"] = ..., 

1320 sorter: NumpySorter = ..., 

1321 ) -> np.intp: 

1322 ... 

1323 

1324 @overload 

1325 def searchsorted( 

1326 self, 

1327 value: npt.ArrayLike | ExtensionArray, 

1328 side: Literal["left", "right"] = ..., 

1329 sorter: NumpySorter = ..., 

1330 ) -> npt.NDArray[np.intp]: 

1331 ... 

1332 

1333 @doc(_shared_docs["searchsorted"], klass="Index") 

1334 def searchsorted( 

1335 self, 

1336 value: NumpyValueArrayLike | ExtensionArray, 

1337 side: Literal["left", "right"] = "left", 

1338 sorter: NumpySorter | None = None, 

1339 ) -> npt.NDArray[np.intp] | np.intp: 

1340 if isinstance(value, ABCDataFrame): 

1341 msg = ( 

1342 "Value must be 1-D array-like or scalar, " 

1343 f"{type(value).__name__} is not supported" 

1344 ) 

1345 raise ValueError(msg) 

1346 

1347 values = self._values 

1348 if not isinstance(values, np.ndarray): 

1349 # Going through EA.searchsorted directly improves performance GH#38083 

1350 return values.searchsorted(value, side=side, sorter=sorter) 

1351 

1352 return algorithms.searchsorted( 

1353 values, 

1354 value, 

1355 side=side, 

1356 sorter=sorter, 

1357 ) 

1358 

1359 def drop_duplicates(self, *, keep: DropKeep = "first"): 

1360 duplicated = self._duplicated(keep=keep) 

1361 # error: Value of type "IndexOpsMixin" is not indexable 

1362 return self[~duplicated] # type: ignore[index] 

1363 

1364 @final 

1365 def _duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]: 

1366 arr = self._values 

1367 if isinstance(arr, ExtensionArray): 

1368 return arr.duplicated(keep=keep) 

1369 return algorithms.duplicated(arr, keep=keep) 

1370 

1371 def _arith_method(self, other, op): 

1372 res_name = ops.get_op_result_name(self, other) 

1373 

1374 lvalues = self._values 

1375 rvalues = extract_array(other, extract_numpy=True, extract_range=True) 

1376 rvalues = ops.maybe_prepare_scalar_for_op(rvalues, lvalues.shape) 

1377 rvalues = ensure_wrapped_if_datetimelike(rvalues) 

1378 if isinstance(rvalues, range): 

1379 rvalues = np.arange(rvalues.start, rvalues.stop, rvalues.step) 

1380 

1381 with np.errstate(all="ignore"): 

1382 result = ops.arithmetic_op(lvalues, rvalues, op) 

1383 

1384 return self._construct_result(result, name=res_name) 

1385 

1386 def _construct_result(self, result, name): 

1387 """ 

1388 Construct an appropriately-wrapped result from the ArrayLike result 

1389 of an arithmetic-like operation. 

1390 """ 

1391 raise AbstractMethodError(self)