Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/base.py: 50%
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
Shortcuts on this page
r m x toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1"""
2Base and utility classes for pandas objects.
3"""
5from __future__ import annotations
7import textwrap
8from typing import (
9 TYPE_CHECKING,
10 Any,
11 Generic,
12 Literal,
13 cast,
14 final,
15 overload,
16)
17import warnings
19import numpy as np
21from pandas._config import using_copy_on_write
23from pandas._libs import lib
24from pandas._typing import (
25 AxisInt,
26 DtypeObj,
27 IndexLabel,
28 NDFrameT,
29 Self,
30 Shape,
31 npt,
32)
33from pandas.compat import PYPY
34from pandas.compat.numpy import function as nv
35from pandas.errors import AbstractMethodError
36from pandas.util._decorators import (
37 cache_readonly,
38 doc,
39)
40from pandas.util._exceptions import find_stack_level
42from pandas.core.dtypes.cast import can_hold_element
43from pandas.core.dtypes.common import (
44 is_object_dtype,
45 is_scalar,
46)
47from pandas.core.dtypes.dtypes import ExtensionDtype
48from pandas.core.dtypes.generic import (
49 ABCDataFrame,
50 ABCIndex,
51 ABCSeries,
52)
53from pandas.core.dtypes.missing import (
54 isna,
55 remove_na_arraylike,
56)
58from pandas.core import (
59 algorithms,
60 nanops,
61 ops,
62)
63from pandas.core.accessor import DirNamesMixin
64from pandas.core.arraylike import OpsMixin
65from pandas.core.arrays import ExtensionArray
66from pandas.core.construction import (
67 ensure_wrapped_if_datetimelike,
68 extract_array,
69)
71if TYPE_CHECKING:
72 from collections.abc import (
73 Hashable,
74 Iterator,
75 )
77 from pandas._typing import (
78 DropKeep,
79 NumpySorter,
80 NumpyValueArrayLike,
81 ScalarLike_co,
82 )
84 from pandas import (
85 DataFrame,
86 Index,
87 Series,
88 )
91_shared_docs: dict[str, str] = {}
92_indexops_doc_kwargs = {
93 "klass": "IndexOpsMixin",
94 "inplace": "",
95 "unique": "IndexOpsMixin",
96 "duplicated": "IndexOpsMixin",
97}
100class PandasObject(DirNamesMixin):
101 """
102 Baseclass for various pandas objects.
103 """
105 # results from calls to methods decorated with cache_readonly get added to _cache
106 _cache: dict[str, Any]
108 @property
109 def _constructor(self):
110 """
111 Class constructor (for this class it's just `__class__`).
112 """
113 return type(self)
115 def __repr__(self) -> str:
116 """
117 Return a string representation for a particular object.
118 """
119 # Should be overwritten by base classes
120 return object.__repr__(self)
122 def _reset_cache(self, key: str | None = None) -> None:
123 """
124 Reset cached properties. If ``key`` is passed, only clears that key.
125 """
126 if not hasattr(self, "_cache"):
127 return
128 if key is None:
129 self._cache.clear()
130 else:
131 self._cache.pop(key, None)
133 def __sizeof__(self) -> int:
134 """
135 Generates the total memory usage for an object that returns
136 either a value or Series of values
137 """
138 memory_usage = getattr(self, "memory_usage", None)
139 if memory_usage:
140 mem = memory_usage(deep=True) # pylint: disable=not-callable
141 return int(mem if is_scalar(mem) else mem.sum())
143 # no memory_usage attribute, so fall back to object's 'sizeof'
144 return super().__sizeof__()
147class NoNewAttributesMixin:
148 """
149 Mixin which prevents adding new attributes.
151 Prevents additional attributes via xxx.attribute = "something" after a
152 call to `self.__freeze()`. Mainly used to prevent the user from using
153 wrong attributes on an accessor (`Series.cat/.str/.dt`).
155 If you really want to add a new attribute at a later time, you need to use
156 `object.__setattr__(self, key, value)`.
157 """
159 def _freeze(self) -> None:
160 """
161 Prevents setting additional attributes.
162 """
163 object.__setattr__(self, "__frozen", True)
165 # prevent adding any attribute via s.xxx.new_attribute = ...
166 def __setattr__(self, key: str, value) -> None:
167 # _cache is used by a decorator
168 # We need to check both 1.) cls.__dict__ and 2.) getattr(self, key)
169 # because
170 # 1.) getattr is false for attributes that raise errors
171 # 2.) cls.__dict__ doesn't traverse into base classes
172 if getattr(self, "__frozen", False) and not (
173 key == "_cache"
174 or key in type(self).__dict__
175 or getattr(self, key, None) is not None
176 ):
177 raise AttributeError(f"You cannot add any new attribute '{key}'")
178 object.__setattr__(self, key, value)
181class SelectionMixin(Generic[NDFrameT]):
182 """
183 mixin implementing the selection & aggregation interface on a group-like
184 object sub-classes need to define: obj, exclusions
185 """
187 obj: NDFrameT
188 _selection: IndexLabel | None = None
189 exclusions: frozenset[Hashable]
190 _internal_names = ["_cache", "__setstate__"]
191 _internal_names_set = set(_internal_names)
193 @final
194 @property
195 def _selection_list(self):
196 if not isinstance(
197 self._selection, (list, tuple, ABCSeries, ABCIndex, np.ndarray)
198 ):
199 return [self._selection]
200 return self._selection
202 @cache_readonly
203 def _selected_obj(self):
204 if self._selection is None or isinstance(self.obj, ABCSeries):
205 return self.obj
206 else:
207 return self.obj[self._selection]
209 @final
210 @cache_readonly
211 def ndim(self) -> int:
212 return self._selected_obj.ndim
214 @final
215 @cache_readonly
216 def _obj_with_exclusions(self):
217 if isinstance(self.obj, ABCSeries):
218 return self.obj
220 if self._selection is not None:
221 return self.obj._getitem_nocopy(self._selection_list)
223 if len(self.exclusions) > 0:
224 # equivalent to `self.obj.drop(self.exclusions, axis=1)
225 # but this avoids consolidating and making a copy
226 # TODO: following GH#45287 can we now use .drop directly without
227 # making a copy?
228 return self.obj._drop_axis(self.exclusions, axis=1, only_slice=True)
229 else:
230 return self.obj
232 def __getitem__(self, key):
233 if self._selection is not None:
234 raise IndexError(f"Column(s) {self._selection} already selected")
236 if isinstance(key, (list, tuple, ABCSeries, ABCIndex, np.ndarray)):
237 if len(self.obj.columns.intersection(key)) != len(set(key)):
238 bad_keys = list(set(key).difference(self.obj.columns))
239 raise KeyError(f"Columns not found: {str(bad_keys)[1:-1]}")
240 return self._gotitem(list(key), ndim=2)
242 else:
243 if key not in self.obj:
244 raise KeyError(f"Column not found: {key}")
245 ndim = self.obj[key].ndim
246 return self._gotitem(key, ndim=ndim)
248 def _gotitem(self, key, ndim: int, subset=None):
249 """
250 sub-classes to define
251 return a sliced object
253 Parameters
254 ----------
255 key : str / list of selections
256 ndim : {1, 2}
257 requested ndim of result
258 subset : object, default None
259 subset to act on
260 """
261 raise AbstractMethodError(self)
263 @final
264 def _infer_selection(self, key, subset: Series | DataFrame):
265 """
266 Infer the `selection` to pass to our constructor in _gotitem.
267 """
268 # Shared by Rolling and Resample
269 selection = None
270 if subset.ndim == 2 and (
271 (lib.is_scalar(key) and key in subset) or lib.is_list_like(key)
272 ):
273 selection = key
274 elif subset.ndim == 1 and lib.is_scalar(key) and key == subset.name:
275 selection = key
276 return selection
278 def aggregate(self, func, *args, **kwargs):
279 raise AbstractMethodError(self)
281 agg = aggregate
284class IndexOpsMixin(OpsMixin):
285 """
286 Common ops mixin to support a unified interface / docs for Series / Index
287 """
289 # ndarray compatibility
290 __array_priority__ = 1000
291 _hidden_attrs: frozenset[str] = frozenset(
292 ["tolist"] # tolist is not deprecated, just suppressed in the __dir__
293 )
295 @property
296 def dtype(self) -> DtypeObj:
297 # must be defined here as a property for mypy
298 raise AbstractMethodError(self)
300 @property
301 def _values(self) -> ExtensionArray | np.ndarray:
302 # must be defined here as a property for mypy
303 raise AbstractMethodError(self)
305 @final
306 def transpose(self, *args, **kwargs) -> Self:
307 """
308 Return the transpose, which is by definition self.
310 Returns
311 -------
312 %(klass)s
313 """
314 nv.validate_transpose(args, kwargs)
315 return self
317 T = property(
318 transpose,
319 doc="""
320 Return the transpose, which is by definition self.
322 Examples
323 --------
324 For Series:
326 >>> s = pd.Series(['Ant', 'Bear', 'Cow'])
327 >>> s
328 0 Ant
329 1 Bear
330 2 Cow
331 dtype: object
332 >>> s.T
333 0 Ant
334 1 Bear
335 2 Cow
336 dtype: object
338 For Index:
340 >>> idx = pd.Index([1, 2, 3])
341 >>> idx.T
342 Index([1, 2, 3], dtype='int64')
343 """,
344 )
346 @property
347 def shape(self) -> Shape:
348 """
349 Return a tuple of the shape of the underlying data.
351 Examples
352 --------
353 >>> s = pd.Series([1, 2, 3])
354 >>> s.shape
355 (3,)
356 """
357 return self._values.shape
359 def __len__(self) -> int:
360 # We need this defined here for mypy
361 raise AbstractMethodError(self)
363 @property
364 def ndim(self) -> Literal[1]:
365 """
366 Number of dimensions of the underlying data, by definition 1.
368 Examples
369 --------
370 >>> s = pd.Series(['Ant', 'Bear', 'Cow'])
371 >>> s
372 0 Ant
373 1 Bear
374 2 Cow
375 dtype: object
376 >>> s.ndim
377 1
379 For Index:
381 >>> idx = pd.Index([1, 2, 3])
382 >>> idx
383 Index([1, 2, 3], dtype='int64')
384 >>> idx.ndim
385 1
386 """
387 return 1
389 @final
390 def item(self):
391 """
392 Return the first element of the underlying data as a Python scalar.
394 Returns
395 -------
396 scalar
397 The first element of Series or Index.
399 Raises
400 ------
401 ValueError
402 If the data is not length = 1.
404 Examples
405 --------
406 >>> s = pd.Series([1])
407 >>> s.item()
408 1
410 For an index:
412 >>> s = pd.Series([1], index=['a'])
413 >>> s.index.item()
414 'a'
415 """
416 if len(self) == 1:
417 return next(iter(self))
418 raise ValueError("can only convert an array of size 1 to a Python scalar")
420 @property
421 def nbytes(self) -> int:
422 """
423 Return the number of bytes in the underlying data.
425 Examples
426 --------
427 For Series:
429 >>> s = pd.Series(['Ant', 'Bear', 'Cow'])
430 >>> s
431 0 Ant
432 1 Bear
433 2 Cow
434 dtype: object
435 >>> s.nbytes
436 24
438 For Index:
440 >>> idx = pd.Index([1, 2, 3])
441 >>> idx
442 Index([1, 2, 3], dtype='int64')
443 >>> idx.nbytes
444 24
445 """
446 return self._values.nbytes
448 @property
449 def size(self) -> int:
450 """
451 Return the number of elements in the underlying data.
453 Examples
454 --------
455 For Series:
457 >>> s = pd.Series(['Ant', 'Bear', 'Cow'])
458 >>> s
459 0 Ant
460 1 Bear
461 2 Cow
462 dtype: object
463 >>> s.size
464 3
466 For Index:
468 >>> idx = pd.Index([1, 2, 3])
469 >>> idx
470 Index([1, 2, 3], dtype='int64')
471 >>> idx.size
472 3
473 """
474 return len(self._values)
476 @property
477 def array(self) -> ExtensionArray:
478 """
479 The ExtensionArray of the data backing this Series or Index.
481 Returns
482 -------
483 ExtensionArray
484 An ExtensionArray of the values stored within. For extension
485 types, this is the actual array. For NumPy native types, this
486 is a thin (no copy) wrapper around :class:`numpy.ndarray`.
488 ``.array`` differs from ``.values``, which may require converting
489 the data to a different form.
491 See Also
492 --------
493 Index.to_numpy : Similar method that always returns a NumPy array.
494 Series.to_numpy : Similar method that always returns a NumPy array.
496 Notes
497 -----
498 This table lays out the different array types for each extension
499 dtype within pandas.
501 ================== =============================
502 dtype array type
503 ================== =============================
504 category Categorical
505 period PeriodArray
506 interval IntervalArray
507 IntegerNA IntegerArray
508 string StringArray
509 boolean BooleanArray
510 datetime64[ns, tz] DatetimeArray
511 ================== =============================
513 For any 3rd-party extension types, the array type will be an
514 ExtensionArray.
516 For all remaining dtypes ``.array`` will be a
517 :class:`arrays.NumpyExtensionArray` wrapping the actual ndarray
518 stored within. If you absolutely need a NumPy array (possibly with
519 copying / coercing data), then use :meth:`Series.to_numpy` instead.
521 Examples
522 --------
523 For regular NumPy types like int, and float, a NumpyExtensionArray
524 is returned.
526 >>> pd.Series([1, 2, 3]).array
527 <NumpyExtensionArray>
528 [1, 2, 3]
529 Length: 3, dtype: int64
531 For extension types, like Categorical, the actual ExtensionArray
532 is returned
534 >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a']))
535 >>> ser.array
536 ['a', 'b', 'a']
537 Categories (2, object): ['a', 'b']
538 """
539 raise AbstractMethodError(self)
541 @final
542 def to_numpy(
543 self,
544 dtype: npt.DTypeLike | None = None,
545 copy: bool = False,
546 na_value: object = lib.no_default,
547 **kwargs,
548 ) -> np.ndarray:
549 """
550 A NumPy ndarray representing the values in this Series or Index.
552 Parameters
553 ----------
554 dtype : str or numpy.dtype, optional
555 The dtype to pass to :meth:`numpy.asarray`.
556 copy : bool, default False
557 Whether to ensure that the returned value is not a view on
558 another array. Note that ``copy=False`` does not *ensure* that
559 ``to_numpy()`` is no-copy. Rather, ``copy=True`` ensure that
560 a copy is made, even if not strictly necessary.
561 na_value : Any, optional
562 The value to use for missing values. The default value depends
563 on `dtype` and the type of the array.
564 **kwargs
565 Additional keywords passed through to the ``to_numpy`` method
566 of the underlying array (for extension arrays).
568 Returns
569 -------
570 numpy.ndarray
572 See Also
573 --------
574 Series.array : Get the actual data stored within.
575 Index.array : Get the actual data stored within.
576 DataFrame.to_numpy : Similar method for DataFrame.
578 Notes
579 -----
580 The returned array will be the same up to equality (values equal
581 in `self` will be equal in the returned array; likewise for values
582 that are not equal). When `self` contains an ExtensionArray, the
583 dtype may be different. For example, for a category-dtype Series,
584 ``to_numpy()`` will return a NumPy array and the categorical dtype
585 will be lost.
587 For NumPy dtypes, this will be a reference to the actual data stored
588 in this Series or Index (assuming ``copy=False``). Modifying the result
589 in place will modify the data stored in the Series or Index (not that
590 we recommend doing that).
592 For extension types, ``to_numpy()`` *may* require copying data and
593 coercing the result to a NumPy type (possibly object), which may be
594 expensive. When you need a no-copy reference to the underlying data,
595 :attr:`Series.array` should be used instead.
597 This table lays out the different dtypes and default return types of
598 ``to_numpy()`` for various dtypes within pandas.
600 ================== ================================
601 dtype array type
602 ================== ================================
603 category[T] ndarray[T] (same dtype as input)
604 period ndarray[object] (Periods)
605 interval ndarray[object] (Intervals)
606 IntegerNA ndarray[object]
607 datetime64[ns] datetime64[ns]
608 datetime64[ns, tz] ndarray[object] (Timestamps)
609 ================== ================================
611 Examples
612 --------
613 >>> ser = pd.Series(pd.Categorical(['a', 'b', 'a']))
614 >>> ser.to_numpy()
615 array(['a', 'b', 'a'], dtype=object)
617 Specify the `dtype` to control how datetime-aware data is represented.
618 Use ``dtype=object`` to return an ndarray of pandas :class:`Timestamp`
619 objects, each with the correct ``tz``.
621 >>> ser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
622 >>> ser.to_numpy(dtype=object)
623 array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'),
624 Timestamp('2000-01-02 00:00:00+0100', tz='CET')],
625 dtype=object)
627 Or ``dtype='datetime64[ns]'`` to return an ndarray of native
628 datetime64 values. The values are converted to UTC and the timezone
629 info is dropped.
631 >>> ser.to_numpy(dtype="datetime64[ns]")
632 ... # doctest: +ELLIPSIS
633 array(['1999-12-31T23:00:00.000000000', '2000-01-01T23:00:00...'],
634 dtype='datetime64[ns]')
635 """
636 if isinstance(self.dtype, ExtensionDtype):
637 return self.array.to_numpy(dtype, copy=copy, na_value=na_value, **kwargs)
638 elif kwargs:
639 bad_keys = next(iter(kwargs.keys()))
640 raise TypeError(
641 f"to_numpy() got an unexpected keyword argument '{bad_keys}'"
642 )
644 fillna = (
645 na_value is not lib.no_default
646 # no need to fillna with np.nan if we already have a float dtype
647 and not (na_value is np.nan and np.issubdtype(self.dtype, np.floating))
648 )
650 values = self._values
651 if fillna:
652 if not can_hold_element(values, na_value):
653 # if we can't hold the na_value asarray either makes a copy or we
654 # error before modifying values. The asarray later on thus won't make
655 # another copy
656 values = np.asarray(values, dtype=dtype)
657 else:
658 values = values.copy()
660 values[np.asanyarray(isna(self))] = na_value
662 result = np.asarray(values, dtype=dtype)
664 if (copy and not fillna) or (not copy and using_copy_on_write()):
665 if np.shares_memory(self._values[:2], result[:2]):
666 # Take slices to improve performance of check
667 if using_copy_on_write() and not copy:
668 result = result.view()
669 result.flags.writeable = False
670 else:
671 result = result.copy()
673 return result
675 @final
676 @property
677 def empty(self) -> bool:
678 return not self.size
680 @doc(op="max", oppose="min", value="largest")
681 def argmax(
682 self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs
683 ) -> int:
684 """
685 Return int position of the {value} value in the Series.
687 If the {op}imum is achieved in multiple locations,
688 the first row position is returned.
690 Parameters
691 ----------
692 axis : {{None}}
693 Unused. Parameter needed for compatibility with DataFrame.
694 skipna : bool, default True
695 Exclude NA/null values when showing the result.
696 *args, **kwargs
697 Additional arguments and keywords for compatibility with NumPy.
699 Returns
700 -------
701 int
702 Row position of the {op}imum value.
704 See Also
705 --------
706 Series.arg{op} : Return position of the {op}imum value.
707 Series.arg{oppose} : Return position of the {oppose}imum value.
708 numpy.ndarray.arg{op} : Equivalent method for numpy arrays.
709 Series.idxmax : Return index label of the maximum values.
710 Series.idxmin : Return index label of the minimum values.
712 Examples
713 --------
714 Consider dataset containing cereal calories
716 >>> s = pd.Series({{'Corn Flakes': 100.0, 'Almond Delight': 110.0,
717 ... 'Cinnamon Toast Crunch': 120.0, 'Cocoa Puff': 110.0}})
718 >>> s
719 Corn Flakes 100.0
720 Almond Delight 110.0
721 Cinnamon Toast Crunch 120.0
722 Cocoa Puff 110.0
723 dtype: float64
725 >>> s.argmax()
726 2
727 >>> s.argmin()
728 0
730 The maximum cereal calories is the third element and
731 the minimum cereal calories is the first element,
732 since series is zero-indexed.
733 """
734 delegate = self._values
735 nv.validate_minmax_axis(axis)
736 skipna = nv.validate_argmax_with_skipna(skipna, args, kwargs)
738 if isinstance(delegate, ExtensionArray):
739 if not skipna and delegate.isna().any():
740 warnings.warn(
741 f"The behavior of {type(self).__name__}.argmax/argmin "
742 "with skipna=False and NAs, or with all-NAs is deprecated. "
743 "In a future version this will raise ValueError.",
744 FutureWarning,
745 stacklevel=find_stack_level(),
746 )
747 return -1
748 else:
749 return delegate.argmax()
750 else:
751 result = nanops.nanargmax(delegate, skipna=skipna)
752 if result == -1:
753 warnings.warn(
754 f"The behavior of {type(self).__name__}.argmax/argmin "
755 "with skipna=False and NAs, or with all-NAs is deprecated. "
756 "In a future version this will raise ValueError.",
757 FutureWarning,
758 stacklevel=find_stack_level(),
759 )
760 # error: Incompatible return value type (got "Union[int, ndarray]", expected
761 # "int")
762 return result # type: ignore[return-value]
764 @doc(argmax, op="min", oppose="max", value="smallest")
765 def argmin(
766 self, axis: AxisInt | None = None, skipna: bool = True, *args, **kwargs
767 ) -> int:
768 delegate = self._values
769 nv.validate_minmax_axis(axis)
770 skipna = nv.validate_argmin_with_skipna(skipna, args, kwargs)
772 if isinstance(delegate, ExtensionArray):
773 if not skipna and delegate.isna().any():
774 warnings.warn(
775 f"The behavior of {type(self).__name__}.argmax/argmin "
776 "with skipna=False and NAs, or with all-NAs is deprecated. "
777 "In a future version this will raise ValueError.",
778 FutureWarning,
779 stacklevel=find_stack_level(),
780 )
781 return -1
782 else:
783 return delegate.argmin()
784 else:
785 result = nanops.nanargmin(delegate, skipna=skipna)
786 if result == -1:
787 warnings.warn(
788 f"The behavior of {type(self).__name__}.argmax/argmin "
789 "with skipna=False and NAs, or with all-NAs is deprecated. "
790 "In a future version this will raise ValueError.",
791 FutureWarning,
792 stacklevel=find_stack_level(),
793 )
794 # error: Incompatible return value type (got "Union[int, ndarray]", expected
795 # "int")
796 return result # type: ignore[return-value]
798 def tolist(self):
799 """
800 Return a list of the values.
802 These are each a scalar type, which is a Python scalar
803 (for str, int, float) or a pandas scalar
804 (for Timestamp/Timedelta/Interval/Period)
806 Returns
807 -------
808 list
810 See Also
811 --------
812 numpy.ndarray.tolist : Return the array as an a.ndim-levels deep
813 nested list of Python scalars.
815 Examples
816 --------
817 For Series
819 >>> s = pd.Series([1, 2, 3])
820 >>> s.to_list()
821 [1, 2, 3]
823 For Index:
825 >>> idx = pd.Index([1, 2, 3])
826 >>> idx
827 Index([1, 2, 3], dtype='int64')
829 >>> idx.to_list()
830 [1, 2, 3]
831 """
832 return self._values.tolist()
834 to_list = tolist
836 def __iter__(self) -> Iterator:
837 """
838 Return an iterator of the values.
840 These are each a scalar type, which is a Python scalar
841 (for str, int, float) or a pandas scalar
842 (for Timestamp/Timedelta/Interval/Period)
844 Returns
845 -------
846 iterator
848 Examples
849 --------
850 >>> s = pd.Series([1, 2, 3])
851 >>> for x in s:
852 ... print(x)
853 1
854 2
855 3
856 """
857 # We are explicitly making element iterators.
858 if not isinstance(self._values, np.ndarray):
859 # Check type instead of dtype to catch DTA/TDA
860 return iter(self._values)
861 else:
862 return map(self._values.item, range(self._values.size))
864 @cache_readonly
865 def hasnans(self) -> bool:
866 """
867 Return True if there are any NaNs.
869 Enables various performance speedups.
871 Returns
872 -------
873 bool
875 Examples
876 --------
877 >>> s = pd.Series([1, 2, 3, None])
878 >>> s
879 0 1.0
880 1 2.0
881 2 3.0
882 3 NaN
883 dtype: float64
884 >>> s.hasnans
885 True
886 """
887 # error: Item "bool" of "Union[bool, ndarray[Any, dtype[bool_]], NDFrame]"
888 # has no attribute "any"
889 return bool(isna(self).any()) # type: ignore[union-attr]
891 @final
892 def _map_values(self, mapper, na_action=None, convert: bool = True):
893 """
894 An internal function that maps values using the input
895 correspondence (which can be a dict, Series, or function).
897 Parameters
898 ----------
899 mapper : function, dict, or Series
900 The input correspondence object
901 na_action : {None, 'ignore'}
902 If 'ignore', propagate NA values, without passing them to the
903 mapping function
904 convert : bool, default True
905 Try to find better dtype for elementwise function results. If
906 False, leave as dtype=object. Note that the dtype is always
907 preserved for some extension array dtypes, such as Categorical.
909 Returns
910 -------
911 Union[Index, MultiIndex], inferred
912 The output of the mapping function applied to the index.
913 If the function returns a tuple with more than one element
914 a MultiIndex will be returned.
915 """
916 arr = self._values
918 if isinstance(arr, ExtensionArray):
919 return arr.map(mapper, na_action=na_action)
921 return algorithms.map_array(arr, mapper, na_action=na_action, convert=convert)
923 @final
924 def value_counts(
925 self,
926 normalize: bool = False,
927 sort: bool = True,
928 ascending: bool = False,
929 bins=None,
930 dropna: bool = True,
931 ) -> Series:
932 """
933 Return a Series containing counts of unique values.
935 The resulting object will be in descending order so that the
936 first element is the most frequently-occurring element.
937 Excludes NA values by default.
939 Parameters
940 ----------
941 normalize : bool, default False
942 If True then the object returned will contain the relative
943 frequencies of the unique values.
944 sort : bool, default True
945 Sort by frequencies when True. Preserve the order of the data when False.
946 ascending : bool, default False
947 Sort in ascending order.
948 bins : int, optional
949 Rather than count values, group them into half-open bins,
950 a convenience for ``pd.cut``, only works with numeric data.
951 dropna : bool, default True
952 Don't include counts of NaN.
954 Returns
955 -------
956 Series
958 See Also
959 --------
960 Series.count: Number of non-NA elements in a Series.
961 DataFrame.count: Number of non-NA elements in a DataFrame.
962 DataFrame.value_counts: Equivalent method on DataFrames.
964 Examples
965 --------
966 >>> index = pd.Index([3, 1, 2, 3, 4, np.nan])
967 >>> index.value_counts()
968 3.0 2
969 1.0 1
970 2.0 1
971 4.0 1
972 Name: count, dtype: int64
974 With `normalize` set to `True`, returns the relative frequency by
975 dividing all values by the sum of values.
977 >>> s = pd.Series([3, 1, 2, 3, 4, np.nan])
978 >>> s.value_counts(normalize=True)
979 3.0 0.4
980 1.0 0.2
981 2.0 0.2
982 4.0 0.2
983 Name: proportion, dtype: float64
985 **bins**
987 Bins can be useful for going from a continuous variable to a
988 categorical variable; instead of counting unique
989 apparitions of values, divide the index in the specified
990 number of half-open bins.
992 >>> s.value_counts(bins=3)
993 (0.996, 2.0] 2
994 (2.0, 3.0] 2
995 (3.0, 4.0] 1
996 Name: count, dtype: int64
998 **dropna**
1000 With `dropna` set to `False` we can also see NaN index values.
1002 >>> s.value_counts(dropna=False)
1003 3.0 2
1004 1.0 1
1005 2.0 1
1006 4.0 1
1007 NaN 1
1008 Name: count, dtype: int64
1009 """
1010 return algorithms.value_counts_internal(
1011 self,
1012 sort=sort,
1013 ascending=ascending,
1014 normalize=normalize,
1015 bins=bins,
1016 dropna=dropna,
1017 )
1019 def unique(self):
1020 values = self._values
1021 if not isinstance(values, np.ndarray):
1022 # i.e. ExtensionArray
1023 result = values.unique()
1024 else:
1025 result = algorithms.unique1d(values)
1026 return result
1028 @final
1029 def nunique(self, dropna: bool = True) -> int:
1030 """
1031 Return number of unique elements in the object.
1033 Excludes NA values by default.
1035 Parameters
1036 ----------
1037 dropna : bool, default True
1038 Don't include NaN in the count.
1040 Returns
1041 -------
1042 int
1044 See Also
1045 --------
1046 DataFrame.nunique: Method nunique for DataFrame.
1047 Series.count: Count non-NA/null observations in the Series.
1049 Examples
1050 --------
1051 >>> s = pd.Series([1, 3, 5, 7, 7])
1052 >>> s
1053 0 1
1054 1 3
1055 2 5
1056 3 7
1057 4 7
1058 dtype: int64
1060 >>> s.nunique()
1061 4
1062 """
1063 uniqs = self.unique()
1064 if dropna:
1065 uniqs = remove_na_arraylike(uniqs)
1066 return len(uniqs)
1068 @property
1069 def is_unique(self) -> bool:
1070 """
1071 Return boolean if values in the object are unique.
1073 Returns
1074 -------
1075 bool
1077 Examples
1078 --------
1079 >>> s = pd.Series([1, 2, 3])
1080 >>> s.is_unique
1081 True
1083 >>> s = pd.Series([1, 2, 3, 1])
1084 >>> s.is_unique
1085 False
1086 """
1087 return self.nunique(dropna=False) == len(self)
1089 @property
1090 def is_monotonic_increasing(self) -> bool:
1091 """
1092 Return boolean if values in the object are monotonically increasing.
1094 Returns
1095 -------
1096 bool
1098 Examples
1099 --------
1100 >>> s = pd.Series([1, 2, 2])
1101 >>> s.is_monotonic_increasing
1102 True
1104 >>> s = pd.Series([3, 2, 1])
1105 >>> s.is_monotonic_increasing
1106 False
1107 """
1108 from pandas import Index
1110 return Index(self).is_monotonic_increasing
1112 @property
1113 def is_monotonic_decreasing(self) -> bool:
1114 """
1115 Return boolean if values in the object are monotonically decreasing.
1117 Returns
1118 -------
1119 bool
1121 Examples
1122 --------
1123 >>> s = pd.Series([3, 2, 2, 1])
1124 >>> s.is_monotonic_decreasing
1125 True
1127 >>> s = pd.Series([1, 2, 3])
1128 >>> s.is_monotonic_decreasing
1129 False
1130 """
1131 from pandas import Index
1133 return Index(self).is_monotonic_decreasing
1135 @final
1136 def _memory_usage(self, deep: bool = False) -> int:
1137 """
1138 Memory usage of the values.
1140 Parameters
1141 ----------
1142 deep : bool, default False
1143 Introspect the data deeply, interrogate
1144 `object` dtypes for system-level memory consumption.
1146 Returns
1147 -------
1148 bytes used
1150 See Also
1151 --------
1152 numpy.ndarray.nbytes : Total bytes consumed by the elements of the
1153 array.
1155 Notes
1156 -----
1157 Memory usage does not include memory consumed by elements that
1158 are not components of the array if deep=False or if used on PyPy
1160 Examples
1161 --------
1162 >>> idx = pd.Index([1, 2, 3])
1163 >>> idx.memory_usage()
1164 24
1165 """
1166 if hasattr(self.array, "memory_usage"):
1167 return self.array.memory_usage( # pyright: ignore[reportGeneralTypeIssues]
1168 deep=deep,
1169 )
1171 v = self.array.nbytes
1172 if deep and is_object_dtype(self.dtype) and not PYPY:
1173 values = cast(np.ndarray, self._values)
1174 v += lib.memory_usage_of_objects(values)
1175 return v
1177 @doc(
1178 algorithms.factorize,
1179 values="",
1180 order="",
1181 size_hint="",
1182 sort=textwrap.dedent(
1183 """\
1184 sort : bool, default False
1185 Sort `uniques` and shuffle `codes` to maintain the
1186 relationship.
1187 """
1188 ),
1189 )
1190 def factorize(
1191 self,
1192 sort: bool = False,
1193 use_na_sentinel: bool = True,
1194 ) -> tuple[npt.NDArray[np.intp], Index]:
1195 codes, uniques = algorithms.factorize(
1196 self._values, sort=sort, use_na_sentinel=use_na_sentinel
1197 )
1198 if uniques.dtype == np.float16:
1199 uniques = uniques.astype(np.float32)
1201 if isinstance(self, ABCIndex):
1202 # preserve e.g. MultiIndex
1203 uniques = self._constructor(uniques)
1204 else:
1205 from pandas import Index
1207 uniques = Index(uniques)
1208 return codes, uniques
1210 _shared_docs[
1211 "searchsorted"
1212 ] = """
1213 Find indices where elements should be inserted to maintain order.
1215 Find the indices into a sorted {klass} `self` such that, if the
1216 corresponding elements in `value` were inserted before the indices,
1217 the order of `self` would be preserved.
1219 .. note::
1221 The {klass} *must* be monotonically sorted, otherwise
1222 wrong locations will likely be returned. Pandas does *not*
1223 check this for you.
1225 Parameters
1226 ----------
1227 value : array-like or scalar
1228 Values to insert into `self`.
1229 side : {{'left', 'right'}}, optional
1230 If 'left', the index of the first suitable location found is given.
1231 If 'right', return the last such index. If there is no suitable
1232 index, return either 0 or N (where N is the length of `self`).
1233 sorter : 1-D array-like, optional
1234 Optional array of integer indices that sort `self` into ascending
1235 order. They are typically the result of ``np.argsort``.
1237 Returns
1238 -------
1239 int or array of int
1240 A scalar or array of insertion points with the
1241 same shape as `value`.
1243 See Also
1244 --------
1245 sort_values : Sort by the values along either axis.
1246 numpy.searchsorted : Similar method from NumPy.
1248 Notes
1249 -----
1250 Binary search is used to find the required insertion points.
1252 Examples
1253 --------
1254 >>> ser = pd.Series([1, 2, 3])
1255 >>> ser
1256 0 1
1257 1 2
1258 2 3
1259 dtype: int64
1261 >>> ser.searchsorted(4)
1262 3
1264 >>> ser.searchsorted([0, 4])
1265 array([0, 3])
1267 >>> ser.searchsorted([1, 3], side='left')
1268 array([0, 2])
1270 >>> ser.searchsorted([1, 3], side='right')
1271 array([1, 3])
1273 >>> ser = pd.Series(pd.to_datetime(['3/11/2000', '3/12/2000', '3/13/2000']))
1274 >>> ser
1275 0 2000-03-11
1276 1 2000-03-12
1277 2 2000-03-13
1278 dtype: datetime64[ns]
1280 >>> ser.searchsorted('3/14/2000')
1281 3
1283 >>> ser = pd.Categorical(
1284 ... ['apple', 'bread', 'bread', 'cheese', 'milk'], ordered=True
1285 ... )
1286 >>> ser
1287 ['apple', 'bread', 'bread', 'cheese', 'milk']
1288 Categories (4, object): ['apple' < 'bread' < 'cheese' < 'milk']
1290 >>> ser.searchsorted('bread')
1291 1
1293 >>> ser.searchsorted(['bread'], side='right')
1294 array([3])
1296 If the values are not monotonically sorted, wrong locations
1297 may be returned:
1299 >>> ser = pd.Series([2, 1, 3])
1300 >>> ser
1301 0 2
1302 1 1
1303 2 3
1304 dtype: int64
1306 >>> ser.searchsorted(1) # doctest: +SKIP
1307 0 # wrong result, correct would be 1
1308 """
1310 # This overload is needed so that the call to searchsorted in
1311 # pandas.core.resample.TimeGrouper._get_period_bins picks the correct result
1313 # error: Overloaded function signatures 1 and 2 overlap with incompatible
1314 # return types
1315 @overload
1316 def searchsorted( # type: ignore[overload-overlap]
1317 self,
1318 value: ScalarLike_co,
1319 side: Literal["left", "right"] = ...,
1320 sorter: NumpySorter = ...,
1321 ) -> np.intp:
1322 ...
1324 @overload
1325 def searchsorted(
1326 self,
1327 value: npt.ArrayLike | ExtensionArray,
1328 side: Literal["left", "right"] = ...,
1329 sorter: NumpySorter = ...,
1330 ) -> npt.NDArray[np.intp]:
1331 ...
1333 @doc(_shared_docs["searchsorted"], klass="Index")
1334 def searchsorted(
1335 self,
1336 value: NumpyValueArrayLike | ExtensionArray,
1337 side: Literal["left", "right"] = "left",
1338 sorter: NumpySorter | None = None,
1339 ) -> npt.NDArray[np.intp] | np.intp:
1340 if isinstance(value, ABCDataFrame):
1341 msg = (
1342 "Value must be 1-D array-like or scalar, "
1343 f"{type(value).__name__} is not supported"
1344 )
1345 raise ValueError(msg)
1347 values = self._values
1348 if not isinstance(values, np.ndarray):
1349 # Going through EA.searchsorted directly improves performance GH#38083
1350 return values.searchsorted(value, side=side, sorter=sorter)
1352 return algorithms.searchsorted(
1353 values,
1354 value,
1355 side=side,
1356 sorter=sorter,
1357 )
1359 def drop_duplicates(self, *, keep: DropKeep = "first"):
1360 duplicated = self._duplicated(keep=keep)
1361 # error: Value of type "IndexOpsMixin" is not indexable
1362 return self[~duplicated] # type: ignore[index]
1364 @final
1365 def _duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:
1366 arr = self._values
1367 if isinstance(arr, ExtensionArray):
1368 return arr.duplicated(keep=keep)
1369 return algorithms.duplicated(arr, keep=keep)
1371 def _arith_method(self, other, op):
1372 res_name = ops.get_op_result_name(self, other)
1374 lvalues = self._values
1375 rvalues = extract_array(other, extract_numpy=True, extract_range=True)
1376 rvalues = ops.maybe_prepare_scalar_for_op(rvalues, lvalues.shape)
1377 rvalues = ensure_wrapped_if_datetimelike(rvalues)
1378 if isinstance(rvalues, range):
1379 rvalues = np.arange(rvalues.start, rvalues.stop, rvalues.step)
1381 with np.errstate(all="ignore"):
1382 result = ops.arithmetic_op(lvalues, rvalues, op)
1384 return self._construct_result(result, name=res_name)
1386 def _construct_result(self, result, name):
1387 """
1388 Construct an appropriately-wrapped result from the ArrayLike result
1389 of an arithmetic-like operation.
1390 """
1391 raise AbstractMethodError(self)