Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/base.py: 50%

1"""

2Base and utility classes for pandas objects.

3"""

5from __future__ import annotations

7import textwrap

8from typing import (

9 TYPE_CHECKING,

10 Any,

11 Generic,

12 Literal,

13 cast,

14 final,

15 overload,

16)

17import warnings

19import numpy as np

21from pandas._config import using_copy_on_write

23from pandas._libs import lib

24from pandas._typing import (

25 AxisInt,

26 DtypeObj,

27 IndexLabel,

28 NDFrameT,

29 Self,

30 Shape,

31 npt,

32)

33from pandas.compat import PYPY

34from pandas.compat.numpy import function as nv

35from pandas.errors import AbstractMethodError

36from pandas.util._decorators import (

37 cache_readonly,

38 doc,

39)

40from pandas.util._exceptions import find_stack_level

42from pandas.core.dtypes.cast import can_hold_element

43from pandas.core.dtypes.common import (

44 is_object_dtype,

45 is_scalar,

46)

47from pandas.core.dtypes.dtypes import ExtensionDtype

48from pandas.core.dtypes.generic import (

49 ABCDataFrame,

50 ABCIndex,

51 ABCSeries,

52)

53from pandas.core.dtypes.missing import (

54 isna,

55 remove_na_arraylike,

56)

58from pandas.core import (

59 algorithms,

60 nanops,

61 ops,

62)

63from pandas.core.accessor import DirNamesMixin

64from pandas.core.arraylike import OpsMixin

65from pandas.core.arrays import ExtensionArray

66from pandas.core.construction import (

67 ensure_wrapped_if_datetimelike,

68 extract_array,

69)

71if TYPE_CHECKING:

72 from collections.abc import (

73 Hashable,

74 Iterator,

75 )

77 from pandas._typing import (

78 DropKeep,

79 NumpySorter,

80 NumpyValueArrayLike,

81 ScalarLike_co,

82 )

84 from pandas import (

85 DataFrame,

86 Index,

87 Series,

88 )

91_shared_docs: dict[str, str] = {}

92_indexops_doc_kwargs = {

93 "klass": "IndexOpsMixin",

94 "inplace": "",

95 "unique": "IndexOpsMixin",

96 "duplicated": "IndexOpsMixin",

97}

100class PandasObject(DirNamesMixin):

101 """

102 Baseclass for various pandas objects.

103 """

104

105 # results from calls to methods decorated with cache_readonly get added to _cache

106 _cache: dict[str, Any]

107

108 @property

109 def _constructor(self):

110 """

111 Class constructor (for this class it's just `__class__`).

112 """

113 return type(self)

114

115 def __repr__(self) -> str:

116 """

117 Return a string representation for a particular object.

118 """

119 # Should be overwritten by base classes

120 return object.__repr__(self)

121

122 def _reset_cache(self, key: str | None = None) -> None:

123 """

124 Reset cached properties. If ``key`` is passed, only clears that key.

125 """

126 if not hasattr(self, "_cache"):

127 return

128 if key is None:

129 self._cache.clear()

130 else:

131 self._cache.pop(key, None)

132

133 def __sizeof__(self) -> int:

134 """

135 Generates the total memory usage for an object that returns

136 either a value or Series of values

137 """

138 memory_usage = getattr(self, "memory_usage", None)

139 if memory_usage:

140 mem = memory_usage(deep=True) # pylint: disable=not-callable

141 return int(mem if is_scalar(mem) else mem.sum())

142

143 # no memory_usage attribute, so fall back to object's 'sizeof'

144 return super().__sizeof__()

145

146

147class NoNewAttributesMixin:

148 """

149 Mixin which prevents adding new attributes.

150

151 Prevents additional attributes via xxx.attribute = "something" after a

152 call to `self.__freeze()`. Mainly used to prevent the user from using

153 wrong attributes on an accessor (`Series.cat/.str/.dt`).

154

155 If you really want to add a new attribute at a later time, you need to use

156 `object.__setattr__(self, key, value)`.

157 """

158

159 def _freeze(self) -> None:

160 """

161 Prevents setting additional attributes.

162 """

163 object.__setattr__(self, "__frozen", True)

164

165 # prevent adding any attribute via s.xxx.new_attribute = ...

166 def __setattr__(self, key: str, value) -> None:

167 # _cache is used by a decorator

168 # We need to check both 1.) cls.__dict__ and 2.) getattr(self, key)

169 # because

170 # 1.) getattr is false for attributes that raise errors

171 # 2.) cls.__dict__ doesn't traverse into base classes

172 if getattr(self, "__frozen", False) and not (

173 key == "_cache"

174 or key in type(self).__dict__

175 or getattr(self, key, None) is not None

176 ):

177 raise AttributeError(f"You cannot add any new attribute '{key}'")

178 object.__setattr__(self, key, value)

179

180

181class SelectionMixin(Generic[NDFrameT]):

182 """

183 mixin implementing the selection & aggregation interface on a group-like

184 object sub-classes need to define: obj, exclusions

185 """

186

187 obj: NDFrameT

188 _selection: IndexLabel | None = None

189 exclusions: frozenset[Hashable]

190 _internal_names = ["_cache", "__setstate__"]

191 _internal_names_set = set(_internal_names)

192

193 @final

194 @property

195 def _selection_list(self):

196 if not isinstance(

197 self._selection, (list, tuple, ABCSeries, ABCIndex, np.ndarray)

198 ):

199 return [self._selection]

200 return self._selection

201

202 @cache_readonly

203 def _selected_obj(self):

204 if self._selection is None or isinstance(self.obj, ABCSeries):

205 return self.obj

206 else:

207 return self.obj[self._selection]

208

209 @final

210 @cache_readonly

211 def ndim(self) -> int:

212 return self._selected_obj.ndim

213

214 @final

215 @cache_readonly

216 def _obj_with_exclusions(self):

217 if isinstance(self.obj, ABCSeries):

218 return self.obj

219

220 if self._selection is not None:

221 return self.obj._getitem_nocopy(self._selection_list)

222

223 if len(self.exclusions) > 0:

224 # equivalent to `self.obj.drop(self.exclusions, axis=1)

225 # but this avoids consolidating and making a copy

226 # TODO: following GH#45287 can we now use .drop directly without

227 # making a copy?

228 return self.obj._drop_axis(self.exclusions, axis=1, only_slice=True)

229 else:

230 return self.obj

231

232 def __getitem__(self, key):

233 if self._selection is not None:

234 raise IndexError(f"Column(s) {self._selection} already selected")

235

236 if isinstance(key, (list, tuple, ABCSeries, ABCIndex, np.ndarray)):

237 if len(self.obj.columns.intersection(key)) != len(set(key)):

238 bad_keys = list(set(key).difference(self.obj.columns))

239 raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}")

240 return self._gotitem(list(key), ndim=2)

241

242 else:

243 if key not in self.obj:

244 raise KeyError(f"Column not found: {key}")

245 ndim = self.obj[key].ndim

246 return self._gotitem(key, ndim=ndim)

247

248 def _gotitem(self, key, ndim: int, subset=None):

249 """

250 sub-classes to define

251 return a sliced object

252

253 Parameters

254 ----------

255 key : str / list of selections

256 ndim : {1, 2}

257 requested ndim of result

258 subset : object, default None

259 subset to act on

260 """

261 raise AbstractMethodError(self)

262

263 @final

264 def _infer_selection(self, key, subset: Series | DataFrame):

265 """

266 Infer the `selection` to pass to our constructor in _gotitem.

267 """

268 # Shared by Rolling and Resample

269 selection = None

270 if subset.ndim == 2 and (

271 (lib.is_scalar(key) and key in subset) or lib.is_list_like(key)

272 ):

273 selection = key

274 elif subset.ndim == 1 and lib.is_scalar(key) and key == subset.name:

275 selection = key

276 return selection

277

278 def aggregate(self, func, *args, **kwargs):

279 raise AbstractMethodError(self)

280

281 agg = aggregate

282

283

284class IndexOpsMixin(OpsMixin):

285 """

286 Common ops mixin to support a unified interface / docs for Series / Index

287 """

288

289 # ndarray compatibility

290 __array_priority__ = 1000

291 _hidden_attrs: frozenset[str] = frozenset(

292 ["tolist"] # tolist is not deprecated, just suppressed in the __dir__

293 )

294

295 @property

296 def dtype(self) -> DtypeObj:

297 # must be defined here as a property for mypy

298 raise AbstractMethodError(self)

299

300 @property

301 def _values(self) -> ExtensionArray | np.ndarray:

302 # must be defined here as a property for mypy

303 raise AbstractMethodError(self)

304

305 @final

306 def transpose(self, *args, **kwargs) -> Self:

307 """

308 Return the transpose, which is by definition self.

309

310 Returns

311 -------

312 %(klass)s

313 """

314 nv.validate_transpose(args, kwargs)

315 return self

316

317 T = property(

318 transpose,

319 doc="""

320 Return the transpose, which is by definition self.

321

322 Examples

323 --------

324 For Series:

325

326 >>> s = pd.Series(['Ant', 'Bear', 'Cow'])

327 >>> s

328 0 Ant

329 1 Bear

330 2 Cow

331 dtype: object

332 >>> s.T

333 0 Ant

334 1 Bear

335 2 Cow

336 dtype: object

337

338 For Index:

339

340 >>> idx = pd.Index([1, 2, 3])

341 >>> idx.T

342 Index([1, 2, 3], dtype='int64')

343 """,

344 )

345

346 @property

347 def shape(self) -> Shape:

348 """

349 Return a tuple of the shape of the underlying data.

350

351 Examples

352 --------

353 >>> s = pd.Series([1, 2, 3])

354 >>> s.shape

355 (3,)

356 """

357 return self._values.shape

358

359 def __len__(self) -> int:

360 # We need this defined here for mypy

361 raise AbstractMethodError(self)

362

363 @property

364 def ndim(self) -> Literal[1]:

365 """

366 Number of dimensions of the underlying data, by definition 1.

367

368 Examples

369 --------

370 >>> s = pd.Series(['Ant', 'Bear', 'Cow'])

371 >>> s

372 0 Ant

373 1 Bear

374 2 Cow

375 dtype: object

376 >>> s.ndim

377 1

378

379 For Index:

380

381 >>> idx = pd.Index([1, 2, 3])

382 >>> idx

383 Index([1, 2, 3], dtype='int64')

384 >>> idx.ndim

385 1

386 """

387 return 1

388

389 @final

390 def item(self):

391 """

392 Return the first element of the underlying data as a Python scalar.

393

394 Returns

395 -------

396 scalar

397 The first element of Series or Index.

398

399 Raises

400 ------

401 ValueError

402 If the data is not length = 1.

403

404 Examples

405 --------

406 >>> s = pd.Series([1])

407 >>> s.item()

408 1

409

410 For an index:

411

412 >>> s = pd.Series([1], index=['a'])

413 >>> s.index.item()

414 'a'

415 """

416 if len(self) == 1:

417 return next(iter(self))

418 raise ValueError("can only convert an array of size 1 to a Python scalar")

419

420 @property

421 def nbytes(self) -> int:

422 """

423 Return the number of bytes in the underlying data.

424

425 Examples

426 --------

427 For Series:

428

429 >>> s = pd.Series(['Ant', 'Bear', 'Cow'])

430 >>> s

431 0 Ant

432 1 Bear

433 2 Cow

434 dtype: object

435 >>> s.nbytes

436 24

437

438 For Index:

439

440 >>> idx = pd.Index([1, 2, 3])

441 >>> idx

442 Index([1, 2, 3], dtype='int64')

443 >>> idx.nbytes

444 24

445 """

446 return self._values.nbytes

447

448 @property

449 def size(self) -> int:

450 """

451 Return the number of elements in the underlying data.

452

453 Examples

454 --------

455 For Series:

456

457 >>> s = pd.Series(['Ant', 'Bear', 'Cow'])

458 >>> s

459 0 Ant

460 1 Bear

461 2 Cow

462 dtype: object

463 >>> s.size

464 3

465

466 For Index:

467

468 >>> idx = pd.Index([1, 2, 3])

469 >>> idx

470 Index([1, 2, 3], dtype='int64')

471 >>> idx.size

472 3

473 """

474 return len(self._values)

475

476 @property

477 def array(self) -> ExtensionArray:

478 """

479 The ExtensionArray of the data backing this Series or Index.

480

481 Returns

482 -------

483 ExtensionArray

484 An ExtensionArray of the values stored within. For extension

485 types, this is the actual array. For NumPy native types, this

486 is a thin (no copy) wrapper around :class:`numpy.ndarray`.

487

488 ``.array`` differs from ``.values``, which may require converting

489 the data to a different form.

490

491 See Also

492 --------

493 Index.to_numpy : Similar method that always returns a NumPy array.

494 Series.to_numpy : Similar method that always returns a NumPy array.

495

496 Notes

497 -----

498 This table lays out the different array types for each extension

499 dtype within pandas.

500

501 ================== =============================

502 dtype array type

503 ================== =============================

504 category Categorical

505 period PeriodArray

506 interval IntervalArray

507 IntegerNA IntegerArray

508 string StringArray

509 boolean BooleanArray

510 datetime64[ns, tz] DatetimeArray

511 ================== =============================

512

513 For any 3rd-party extension types, the array type will be an

514 ExtensionArray.

515

516 For all remaining dtypes ``.array`` will be a

517 :class:`arrays.NumpyExtensionArray` wrapping the actual ndarray

518 stored within. If you absolutely need a NumPy array (possibly with

519 copying / coercing data), then use :meth:`Series.to_numpy` instead.

520

521 Examples

522 --------

523 For regular NumPy types like int, and float, a NumpyExtensionArray

524 is returned.

525

526 >>> pd.Series([1, 2, 3]).array

527 <NumpyExtensionArray>

528 [1, 2, 3]

529 Length: 3, dtype: int64

530

531 For extension types, like Categorical, the actual ExtensionArray

532 is returned

533

534 >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a']))

535 >>> ser.array

536 ['a', 'b', 'a']

537 Categories (2, object): ['a', 'b']

538 """

539 raise AbstractMethodError(self)

540

541 @final

542 def to_numpy(

543 self,

544 dtype: npt.DTypeLike | None = None,

545 copy: bool = False,

546 na_value: object = lib.no_default,

547 **kwargs,

548 ) -> np.ndarray:

549 """

550 A NumPy ndarray representing the values in this Series or Index.

551

552 Parameters

553 ----------

554 dtype : str or numpy.dtype, optional

555 The dtype to pass to :meth:`numpy.asarray`.

556 copy : bool, default False

557 Whether to ensure that the returned value is not a view on

558 another array. Note that ``copy=False`` does not *ensure* that

559 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that

560 a copy is made, even if not strictly necessary.

561 na_value : Any, optional

562 The value to use for missing values. The default value depends

563 on `dtype` and the type of the array.

564 **kwargs

565 Additional keywords passed through to the ``to_numpy`` method

566 of the underlying array (for extension arrays).

567

568 Returns

569 -------

570 numpy.ndarray

571

572 See Also

573 --------

574 Series.array : Get the actual data stored within.

575 Index.array : Get the actual data stored within.

576 DataFrame.to_numpy : Similar method for DataFrame.

577

578 Notes

579 -----

580 The returned array will be the same up to equality (values equal

581 in `self` will be equal in the returned array; likewise for values

582 that are not equal). When `self` contains an ExtensionArray, the

583 dtype may be different. For example, for a category-dtype Series,

584 ``to_numpy()`` will return a NumPy array and the categorical dtype

585 will be lost.

586

587 For NumPy dtypes, this will be a reference to the actual data stored

588 in this Series or Index (assuming ``copy=False``). Modifying the result

589 in place will modify the data stored in the Series or Index (not that

590 we recommend doing that).

591

592 For extension types, ``to_numpy()`` *may* require copying data and

593 coercing the result to a NumPy type (possibly object), which may be

594 expensive. When you need a no-copy reference to the underlying data,

595 :attr:`Series.array` should be used instead.

596

597 This table lays out the different dtypes and default return types of

598 ``to_numpy()`` for various dtypes within pandas.

599

600 ================== ================================

601 dtype array type

602 ================== ================================

603 category[T] ndarray[T] (same dtype as input)

604 period ndarray[object] (Periods)

605 interval ndarray[object] (Intervals)

606 IntegerNA ndarray[object]

607 datetime64[ns] datetime64[ns]

608 datetime64[ns, tz] ndarray[object] (Timestamps)

609 ================== ================================

610

611 Examples

612 --------

613 >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a']))

614 >>> ser.to_numpy()

615 array(['a', 'b', 'a'], dtype=object)

616

617 Specify the `dtype` to control how datetime-aware data is represented.

618 Use ``dtype=object`` to return an ndarray of pandas :class:`Timestamp`

619 objects, each with the correct ``tz``.

620

621 >>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))

622 >>> ser.to_numpy(dtype=object)

623 array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'),

624 Timestamp('2000-01-02 00:00:00+0100', tz='CET')],

625 dtype=object)

626

627 Or ``dtype='datetime64[ns]'`` to return an ndarray of native

628 datetime64 values. The values are converted to UTC and the timezone

629 info is dropped.

630

631 >>> ser.to_numpy(dtype="datetime64[ns]")

632 ... # doctest: +ELLIPSIS

633 array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00...'],

634 dtype='datetime64[ns]')

635 """

636 if isinstance(self.dtype, ExtensionDtype):

637 return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs)

638 elif kwargs:

639 bad_keys = next(iter(kwargs.keys()))

640 raise TypeError(

641 f"to_numpy() got an unexpected keyword argument '{bad_keys}'"

642 )

643

644 fillna = (

645 na_value is not lib.no_default

646 # no need to fillna with np.nan if we already have a float dtype

647 and not (na_value is np.nan and np.issubdtype(self.dtype, np.floating))

648 )

649

650 values = self._values

651 if fillna:

652 if not can_hold_element(values, na_value):

653 # if we can't hold the na_value asarray either makes a copy or we

654 # error before modifying values. The asarray later on thus won't make

655 # another copy

656 values = np.asarray(values, dtype=dtype)

657 else:

658 values = values.copy()

659

660 values[np.asanyarray(isna(self))] = na_value

661

662 result = np.asarray(values, dtype=dtype)

663

664 if (copy and not fillna) or (not copy and using_copy_on_write()):

665 if np.shares_memory(self._values[:2], result[:2]):

666 # Take slices to improve performance of check

667 if using_copy_on_write() and not copy:

668 result = result.view()

669 result.flags.writeable = False

670 else:

671 result = result.copy()

672

673 return result

674

675 @final

676 @property

677 def empty(self) -> bool:

678 return not self.size

679

680 @doc(op="max", oppose="min", value="largest")

681 def argmax(

682 self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs

683 ) -> int:

684 """

685 Return int position of the {value} value in the Series.

686

687 If the {op}imum is achieved in multiple locations,

688 the first row position is returned.

689

690 Parameters

691 ----------

692 axis : {{None}}

693 Unused. Parameter needed for compatibility with DataFrame.

694 skipna : bool, default True

695 Exclude NA/null values when showing the result.

696 *args, **kwargs

697 Additional arguments and keywords for compatibility with NumPy.

698

699 Returns

700 -------

701 int

702 Row position of the {op}imum value.

703

704 See Also

705 --------

706 Series.arg{op} : Return position of the {op}imum value.

707 Series.arg{oppose} : Return position of the {oppose}imum value.

708 numpy.ndarray.arg{op} : Equivalent method for numpy arrays.

709 Series.idxmax : Return index label of the maximum values.

710 Series.idxmin : Return index label of the minimum values.

711

712 Examples

713 --------

714 Consider dataset containing cereal calories

715

716 >>> s = pd.Series({{'Corn Flakes': 100.0, 'Almond Delight': 110.0,

717 ... 'Cinnamon Toast Crunch': 120.0, 'Cocoa Puff': 110.0}})

718 >>> s

719 Corn Flakes 100.0

720 Almond Delight 110.0

721 Cinnamon Toast Crunch 120.0

722 Cocoa Puff 110.0

723 dtype: float64

724

725 >>> s.argmax()

726 2

727 >>> s.argmin()

728 0

729

730 The maximum cereal calories is the third element and

731 the minimum cereal calories is the first element,

732 since series is zero-indexed.

733 """

734 delegate = self._values

735 nv.validate_minmax_axis(axis)

736 skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)

737

738 if isinstance(delegate, ExtensionArray):

739 if not skipna and delegate.isna().any():

740 warnings.warn(

741 f"The behavior of {type(self).__name__}.argmax/argmin "

742 "with skipna=False and NAs, or with all-NAs is deprecated. "

743 "In a future version this will raise ValueError.",

744 FutureWarning,

745 stacklevel=find_stack_level(),

746 )

747 return -1

748 else:

749 return delegate.argmax()

750 else:

751 result = nanops.nanargmax(delegate, skipna=skipna)

752 if result == -1:

753 warnings.warn(

754 f"The behavior of {type(self).__name__}.argmax/argmin "

755 "with skipna=False and NAs, or with all-NAs is deprecated. "

756 "In a future version this will raise ValueError.",

757 FutureWarning,

758 stacklevel=find_stack_level(),

759 )

760 # error: Incompatible return value type (got "Union[int, ndarray]", expected

761 # "int")

762 return result # type: ignore[return-value]

763

764 @doc(argmax, op="min", oppose="max", value="smallest")

765 def argmin(

766 self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs

767 ) -> int:

768 delegate = self._values

769 nv.validate_minmax_axis(axis)

770 skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)

771

772 if isinstance(delegate, ExtensionArray):

773 if not skipna and delegate.isna().any():

774 warnings.warn(

775 f"The behavior of {type(self).__name__}.argmax/argmin "

776 "with skipna=False and NAs, or with all-NAs is deprecated. "

777 "In a future version this will raise ValueError.",

778 FutureWarning,

779 stacklevel=find_stack_level(),

780 )

781 return -1

782 else:

783 return delegate.argmin()

784 else:

785 result = nanops.nanargmin(delegate, skipna=skipna)

786 if result == -1:

787 warnings.warn(

788 f"The behavior of {type(self).__name__}.argmax/argmin "

789 "with skipna=False and NAs, or with all-NAs is deprecated. "

790 "In a future version this will raise ValueError.",

791 FutureWarning,

792 stacklevel=find_stack_level(),

793 )

794 # error: Incompatible return value type (got "Union[int, ndarray]", expected

795 # "int")

796 return result # type: ignore[return-value]

797

798 def tolist(self):

799 """

800 Return a list of the values.

801

802 These are each a scalar type, which is a Python scalar

803 (for str, int, float) or a pandas scalar

804 (for Timestamp/Timedelta/Interval/Period)

805

806 Returns

807 -------

808 list

809

810 See Also

811 --------

812 numpy.ndarray.tolist : Return the array as an a.ndim-levels deep

813 nested list of Python scalars.

814

815 Examples

816 --------

817 For Series

818

819 >>> s = pd.Series([1, 2, 3])

820 >>> s.to_list()

821 [1, 2, 3]

822

823 For Index:

824

825 >>> idx = pd.Index([1, 2, 3])

826 >>> idx

827 Index([1, 2, 3], dtype='int64')

828

829 >>> idx.to_list()

830 [1, 2, 3]

831 """

832 return self._values.tolist()

833

834 to_list = tolist

835

836 def __iter__(self) -> Iterator:

837 """

838 Return an iterator of the values.

839

840 These are each a scalar type, which is a Python scalar

841 (for str, int, float) or a pandas scalar

842 (for Timestamp/Timedelta/Interval/Period)

843

844 Returns

845 -------

846 iterator

847

848 Examples

849 --------

850 >>> s = pd.Series([1, 2, 3])

851 >>> for x in s:

852 ... print(x)

853 1

854 2

855 3

856 """

857 # We are explicitly making element iterators.

858 if not isinstance(self._values, np.ndarray):

859 # Check type instead of dtype to catch DTA/TDA

860 return iter(self._values)

861 else:

862 return map(self._values.item, range(self._values.size))

863

864 @cache_readonly

865 def hasnans(self) -> bool:

866 """

867 Return True if there are any NaNs.

868

869 Enables various performance speedups.

870

871 Returns

872 -------

873 bool

874

875 Examples

876 --------

877 >>> s = pd.Series([1, 2, 3, None])

878 >>> s

879 0 1.0

880 1 2.0

881 2 3.0

882 3 NaN

883 dtype: float64

884 >>> s.hasnans

885 True

886 """

887 # error: Item "bool" of "Union[bool, ndarray[Any, dtype[bool_]], NDFrame]"

888 # has no attribute "any"

889 return bool(isna(self).any()) # type: ignore[union-attr]

890

891 @final

892 def _map_values(self, mapper, na_action=None, convert: bool = True):

893 """

894 An internal function that maps values using the input

895 correspondence (which can be a dict, Series, or function).

896

897 Parameters

898 ----------

899 mapper : function, dict, or Series

900 The input correspondence object

901 na_action : {None, 'ignore'}

902 If 'ignore', propagate NA values, without passing them to the

903 mapping function

904 convert : bool, default True

905 Try to find better dtype for elementwise function results. If

906 False, leave as dtype=object. Note that the dtype is always

907 preserved for some extension array dtypes, such as Categorical.

908

909 Returns

910 -------

911 Union[Index, MultiIndex], inferred

912 The output of the mapping function applied to the index.

913 If the function returns a tuple with more than one element

914 a MultiIndex will be returned.

915 """

916 arr = self._values

917

918 if isinstance(arr, ExtensionArray):

919 return arr.map(mapper, na_action=na_action)

920

921 return algorithms.map_array(arr, mapper, na_action=na_action, convert=convert)

922

923 @final

924 def value_counts(

925 self,

926 normalize: bool = False,

927 sort: bool = True,

928 ascending: bool = False,

929 bins=None,

930 dropna: bool = True,

931 ) -> Series:

932 """

933 Return a Series containing counts of unique values.

934

935 The resulting object will be in descending order so that the

936 first element is the most frequently-occurring element.

937 Excludes NA values by default.

938

939 Parameters

940 ----------

941 normalize : bool, default False

942 If True then the object returned will contain the relative

943 frequencies of the unique values.

944 sort : bool, default True

945 Sort by frequencies when True. Preserve the order of the data when False.

946 ascending : bool, default False

947 Sort in ascending order.

948 bins : int, optional

949 Rather than count values, group them into half-open bins,

950 a convenience for ``pd.cut``, only works with numeric data.

951 dropna : bool, default True

952 Don't include counts of NaN.

953

954 Returns

955 -------

956 Series

957

958 See Also

959 --------

960 Series.count: Number of non-NA elements in a Series.

961 DataFrame.count: Number of non-NA elements in a DataFrame.

962 DataFrame.value_counts: Equivalent method on DataFrames.

963

964 Examples

965 --------

966 >>> index = pd.Index([3, 1, 2, 3, 4, np.nan])

967 >>> index.value_counts()

968 3.0 2

969 1.0 1

970 2.0 1

971 4.0 1

972 Name: count, dtype: int64

973

974 With `normalize` set to `True`, returns the relative frequency by

975 dividing all values by the sum of values.

976

977 >>> s = pd.Series([3, 1, 2, 3, 4, np.nan])

978 >>> s.value_counts(normalize=True)

979 3.0 0.4

980 1.0 0.2

981 2.0 0.2

982 4.0 0.2

983 Name: proportion, dtype: float64

984

985 **bins**

986

987 Bins can be useful for going from a continuous variable to a

988 categorical variable; instead of counting unique

989 apparitions of values, divide the index in the specified

990 number of half-open bins.

991

992 >>> s.value_counts(bins=3)

993 (0.996, 2.0] 2

994 (2.0, 3.0] 2

995 (3.0, 4.0] 1

996 Name: count, dtype: int64

997

998 **dropna**

999

1000 With `dropna` set to `False` we can also see NaN index values.

1001

1002 >>> s.value_counts(dropna=False)

1003 3.0 2

1004 1.0 1

1005 2.0 1

1006 4.0 1

1007 NaN 1

1008 Name: count, dtype: int64

1009 """

1010 return algorithms.value_counts_internal(

1011 self,

1012 sort=sort,

1013 ascending=ascending,

1014 normalize=normalize,

1015 bins=bins,

1016 dropna=dropna,

1017 )

1018

1019 def unique(self):

1020 values = self._values

1021 if not isinstance(values, np.ndarray):

1022 # i.e. ExtensionArray

1023 result = values.unique()

1024 else:

1025 result = algorithms.unique1d(values)

1026 return result

1027

1028 @final

1029 def nunique(self, dropna: bool = True) -> int:

1030 """

1031 Return number of unique elements in the object.

1032

1033 Excludes NA values by default.

1034

1035 Parameters

1036 ----------

1037 dropna : bool, default True

1038 Don't include NaN in the count.

1039

1040 Returns

1041 -------

1042 int

1043

1044 See Also

1045 --------

1046 DataFrame.nunique: Method nunique for DataFrame.

1047 Series.count: Count non-NA/null observations in the Series.

1048

1049 Examples

1050 --------

1051 >>> s = pd.Series([1, 3, 5, 7, 7])

1052 >>> s

1053 0 1

1054 1 3

1055 2 5

1056 3 7

1057 4 7

1058 dtype: int64

1059

1060 >>> s.nunique()

1061 4

1062 """

1063 uniqs = self.unique()

1064 if dropna:

1065 uniqs = remove_na_arraylike(uniqs)

1066 return len(uniqs)

1067

1068 @property

1069 def is_unique(self) -> bool:

1070 """

1071 Return boolean if values in the object are unique.

1072

1073 Returns

1074 -------

1075 bool

1076

1077 Examples

1078 --------

1079 >>> s = pd.Series([1, 2, 3])

1080 >>> s.is_unique

1081 True

1082

1083 >>> s = pd.Series([1, 2, 3, 1])

1084 >>> s.is_unique

1085 False

1086 """

1087 return self.nunique(dropna=False) == len(self)

1088

1089 @property

1090 def is_monotonic_increasing(self) -> bool:

1091 """

1092 Return boolean if values in the object are monotonically increasing.

1093

1094 Returns

1095 -------

1096 bool

1097

1098 Examples

1099 --------

1100 >>> s = pd.Series([1, 2, 2])

1101 >>> s.is_monotonic_increasing

1102 True

1103

1104 >>> s = pd.Series([3, 2, 1])

1105 >>> s.is_monotonic_increasing

1106 False

1107 """

1108 from pandas import Index

1109

1110 return Index(self).is_monotonic_increasing

1111

1112 @property

1113 def is_monotonic_decreasing(self) -> bool:

1114 """

1115 Return boolean if values in the object are monotonically decreasing.

1116

1117 Returns

1118 -------

1119 bool

1120

1121 Examples

1122 --------

1123 >>> s = pd.Series([3, 2, 2, 1])

1124 >>> s.is_monotonic_decreasing

1125 True

1126

1127 >>> s = pd.Series([1, 2, 3])

1128 >>> s.is_monotonic_decreasing

1129 False

1130 """

1131 from pandas import Index

1132

1133 return Index(self).is_monotonic_decreasing

1134

1135 @final

1136 def _memory_usage(self, deep: bool = False) -> int:

1137 """

1138 Memory usage of the values.

1139

1140 Parameters

1141 ----------

1142 deep : bool, default False

1143 Introspect the data deeply, interrogate

1144 `object` dtypes for system-level memory consumption.

1145

1146 Returns

1147 -------

1148 bytes used

1149

1150 See Also

1151 --------

1152 numpy.ndarray.nbytes : Total bytes consumed by the elements of the

1153 array.

1154

1155 Notes

1156 -----

1157 Memory usage does not include memory consumed by elements that

1158 are not components of the array if deep=False or if used on PyPy

1159

1160 Examples

1161 --------

1162 >>> idx = pd.Index([1, 2, 3])

1163 >>> idx.memory_usage()

1164 24

1165 """

1166 if hasattr(self.array, "memory_usage"):

1167 return self.array.memory_usage( # pyright: ignore[reportGeneralTypeIssues]

1168 deep=deep,

1169 )

1170

1171 v = self.array.nbytes

1172 if deep and is_object_dtype(self.dtype) and not PYPY:

1173 values = cast(np.ndarray, self._values)

1174 v += lib.memory_usage_of_objects(values)

1175 return v

1176

1177 @doc(

1178 algorithms.factorize,

1179 values="",

1180 order="",

1181 size_hint="",

1182 sort=textwrap.dedent(

1183 """\

1184 sort : bool, default False

1185 Sort `uniques` and shuffle `codes` to maintain the

1186 relationship.

1187 """

1188 ),

1189 )

1190 def factorize(

1191 self,

1192 sort: bool = False,

1193 use_na_sentinel: bool = True,

1194 ) -> tuple[npt.NDArray[np.intp], Index]:

1195 codes, uniques = algorithms.factorize(

1196 self._values, sort=sort, use_na_sentinel=use_na_sentinel

1197 )

1198 if uniques.dtype == np.float16:

1199 uniques = uniques.astype(np.float32)

1200

1201 if isinstance(self, ABCIndex):

1202 # preserve e.g. MultiIndex

1203 uniques = self._constructor(uniques)

1204 else:

1205 from pandas import Index

1206

1207 uniques = Index(uniques)

1208 return codes, uniques

1209

1210 _shared_docs[

1211 "searchsorted"

1212 ] = """

1213 Find indices where elements should be inserted to maintain order.

1214

1215 Find the indices into a sorted {klass} `self` such that, if the

1216 corresponding elements in `value` were inserted before the indices,

1217 the order of `self` would be preserved.

1218

1219 .. note::

1220

1221 The {klass} *must* be monotonically sorted, otherwise

1222 wrong locations will likely be returned. Pandas does *not*

1223 check this for you.

1224

1225 Parameters

1226 ----------

1227 value : array-like or scalar

1228 Values to insert into `self`.

1229 side : {{'left', 'right'}}, optional

1230 If 'left', the index of the first suitable location found is given.

1231 If 'right', return the last such index. If there is no suitable

1232 index, return either 0 or N (where N is the length of `self`).

1233 sorter : 1-D array-like, optional

1234 Optional array of integer indices that sort `self` into ascending

1235 order. They are typically the result of ``np.argsort``.

1236

1237 Returns

1238 -------

1239 int or array of int

1240 A scalar or array of insertion points with the

1241 same shape as `value`.

1242

1243 See Also

1244 --------

1245 sort_values : Sort by the values along either axis.

1246 numpy.searchsorted : Similar method from NumPy.

1247

1248 Notes

1249 -----

1250 Binary search is used to find the required insertion points.

1251

1252 Examples

1253 --------

1254 >>> ser = pd.Series([1, 2, 3])

1255 >>> ser

1256 0 1

1257 1 2

1258 2 3

1259 dtype: int64

1260

1261 >>> ser.searchsorted(4)

1262 3

1263

1264 >>> ser.searchsorted([0, 4])

1265 array([0, 3])

1266

1267 >>> ser.searchsorted([1, 3], side='left')

1268 array([0, 2])

1269

1270 >>> ser.searchsorted([1, 3], side='right')

1271 array([1, 3])

1272

1273 >>> ser = pd.Series(pd.to_datetime(['3/11/2000', '3/12/2000', '3/13/2000']))

1274 >>> ser

1275 0 2000-03-11

1276 1 2000-03-12

1277 2 2000-03-13

1278 dtype: datetime64[ns]

1279

1280 >>> ser.searchsorted('3/14/2000')

1281 3

1282

1283 >>> ser = pd.Categorical(

1284 ... ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True

1285 ... )

1286 >>> ser

1287 ['apple', 'bread', 'bread', 'cheese', 'milk']

1288 Categories (4, object): ['apple' < 'bread' < 'cheese' < 'milk']

1289

1290 >>> ser.searchsorted('bread')

1291 1

1292

1293 >>> ser.searchsorted(['bread'], side='right')

1294 array([3])

1295

1296 If the values are not monotonically sorted, wrong locations

1297 may be returned:

1298

1299 >>> ser = pd.Series([2, 1, 3])

1300 >>> ser

1301 0 2

1302 1 1

1303 2 3

1304 dtype: int64

1305

1306 >>> ser.searchsorted(1) # doctest: +SKIP

1307 0 # wrong result, correct would be 1

1308 """

1309

1310 # This overload is needed so that the call to searchsorted in

1311 # pandas.core.resample.TimeGrouper._get_period_bins picks the correct result

1312

1313 # error: Overloaded function signatures 1 and 2 overlap with incompatible

1314 # return types

1315 @overload

1316 def searchsorted( # type: ignore[overload-overlap]

1317 self,

1318 value: ScalarLike_co,

1319 side: Literal["left", "right"] = ...,

1320 sorter: NumpySorter = ...,

1321 ) -> np.intp:

1322 ...

1323

1324 @overload

1325 def searchsorted(

1326 self,

1327 value: npt.ArrayLike | ExtensionArray,

1328 side: Literal["left", "right"] = ...,

1329 sorter: NumpySorter = ...,

1330 ) -> npt.NDArray[np.intp]:

1331 ...

1332

1333 @doc(_shared_docs["searchsorted"], klass="Index")

1334 def searchsorted(

1335 self,

1336 value: NumpyValueArrayLike | ExtensionArray,

1337 side: Literal["left", "right"] = "left",

1338 sorter: NumpySorter | None = None,

1339 ) -> npt.NDArray[np.intp] | np.intp:

1340 if isinstance(value, ABCDataFrame):

1341 msg = (

1342 "Value must be 1-D array-like or scalar, "

1343 f"{type(value).__name__} is not supported"

1344 )

1345 raise ValueError(msg)

1346

1347 values = self._values

1348 if not isinstance(values, np.ndarray):

1349 # Going through EA.searchsorted directly improves performance GH#38083

1350 return values.searchsorted(value, side=side, sorter=sorter)

1351

1352 return algorithms.searchsorted(

1353 values,

1354 value,

1355 side=side,

1356 sorter=sorter,

1357 )

1358

1359 def drop_duplicates(self, *, keep: DropKeep = "first"):

1360 duplicated = self._duplicated(keep=keep)

1361 # error: Value of type "IndexOpsMixin" is not indexable

1362 return self[~duplicated] # type: ignore[index]

1363

1364 @final

1365 def _duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:

1366 arr = self._values

1367 if isinstance(arr, ExtensionArray):

1368 return arr.duplicated(keep=keep)

1369 return algorithms.duplicated(arr, keep=keep)

1370

1371 def _arith_method(self, other, op):

1372 res_name = ops.get_op_result_name(self, other)

1373

1374 lvalues = self._values

1375 rvalues = extract_array(other, extract_numpy=True, extract_range=True)

1376 rvalues = ops.maybe_prepare_scalar_for_op(rvalues, lvalues.shape)

1377 rvalues = ensure_wrapped_if_datetimelike(rvalues)

1378 if isinstance(rvalues, range):

1379 rvalues = np.arange(rvalues.start, rvalues.stop, rvalues.step)

1380

1381 with np.errstate(all="ignore"):

1382 result = ops.arithmetic_op(lvalues, rvalues, op)

1383

1384 return self._construct_result(result, name=res_name)

1385

1386 def _construct_result(self, result, name):

1387 """

1388 Construct an appropriately-wrapped result from the ArrayLike result

1389 of an arithmetic-like operation.

1390 """

1391 raise AbstractMethodError(self)