1"""
2Data structure for 1-dimensional cross-sectional and time series data
3"""
4from __future__ import annotations
5
6import sys
7from textwrap import dedent
8from typing import (
9 IO,
10 TYPE_CHECKING,
11 Any,
12 Callable,
13 Hashable,
14 Iterable,
15 Literal,
16 Mapping,
17 Sequence,
18 Union,
19 cast,
20 overload,
21)
22import warnings
23import weakref
24
25import numpy as np
26
27from pandas._config import (
28 get_option,
29 using_copy_on_write,
30)
31
32from pandas._libs import (
33 lib,
34 properties,
35 reshape,
36)
37from pandas._libs.internals import BlockValuesRefs
38from pandas._libs.lib import is_range_indexer
39from pandas._typing import (
40 AggFuncType,
41 AlignJoin,
42 AnyAll,
43 AnyArrayLike,
44 ArrayLike,
45 Axis,
46 AxisInt,
47 CorrelationMethod,
48 DropKeep,
49 Dtype,
50 DtypeBackend,
51 DtypeObj,
52 FilePath,
53 FillnaOptions,
54 Frequency,
55 IgnoreRaise,
56 IndexKeyFunc,
57 IndexLabel,
58 Level,
59 NaPosition,
60 QuantileInterpolation,
61 Renamer,
62 Scalar,
63 SingleManager,
64 SortKind,
65 StorageOptions,
66 TimedeltaConvertibleTypes,
67 TimestampConvertibleTypes,
68 ValueKeyFunc,
69 WriteBuffer,
70 npt,
71)
72from pandas.compat import PYPY
73from pandas.compat.numpy import function as nv
74from pandas.errors import (
75 ChainedAssignmentError,
76 InvalidIndexError,
77 _chained_assignment_msg,
78)
79from pandas.util._decorators import (
80 Appender,
81 Substitution,
82 doc,
83)
84from pandas.util._exceptions import find_stack_level
85from pandas.util._validators import (
86 validate_ascending,
87 validate_bool_kwarg,
88 validate_percentile,
89)
90
91from pandas.core.dtypes.astype import astype_is_view
92from pandas.core.dtypes.cast import (
93 LossySetitemError,
94 convert_dtypes,
95 maybe_box_native,
96 maybe_cast_pointwise_result,
97)
98from pandas.core.dtypes.common import (
99 ensure_platform_int,
100 is_dict_like,
101 is_extension_array_dtype,
102 is_integer,
103 is_iterator,
104 is_list_like,
105 is_numeric_dtype,
106 is_object_dtype,
107 is_scalar,
108 pandas_dtype,
109 validate_all_hashable,
110)
111from pandas.core.dtypes.generic import ABCDataFrame
112from pandas.core.dtypes.inference import is_hashable
113from pandas.core.dtypes.missing import (
114 isna,
115 na_value_for_dtype,
116 notna,
117 remove_na_arraylike,
118)
119
120from pandas.core import (
121 algorithms,
122 base,
123 common as com,
124 missing,
125 nanops,
126 ops,
127)
128from pandas.core.accessor import CachedAccessor
129from pandas.core.apply import SeriesApply
130from pandas.core.arrays import ExtensionArray
131from pandas.core.arrays.categorical import CategoricalAccessor
132from pandas.core.arrays.sparse import SparseAccessor
133from pandas.core.construction import (
134 extract_array,
135 sanitize_array,
136)
137from pandas.core.generic import NDFrame
138from pandas.core.indexers import (
139 disallow_ndim_indexing,
140 unpack_1tuple,
141)
142from pandas.core.indexes.accessors import CombinedDatetimelikeProperties
143from pandas.core.indexes.api import (
144 DatetimeIndex,
145 Index,
146 MultiIndex,
147 PeriodIndex,
148 default_index,
149 ensure_index,
150)
151import pandas.core.indexes.base as ibase
152from pandas.core.indexes.multi import maybe_droplevels
153from pandas.core.indexing import (
154 check_bool_indexer,
155 check_dict_or_set_indexers,
156)
157from pandas.core.internals import (
158 SingleArrayManager,
159 SingleBlockManager,
160)
161from pandas.core.methods import selectn
162from pandas.core.shared_docs import _shared_docs
163from pandas.core.sorting import (
164 ensure_key_mapped,
165 nargsort,
166)
167from pandas.core.strings.accessor import StringMethods
168from pandas.core.tools.datetimes import to_datetime
169
170import pandas.io.formats.format as fmt
171from pandas.io.formats.info import (
172 INFO_DOCSTRING,
173 SeriesInfo,
174 series_sub_kwargs,
175)
176import pandas.plotting
177
178if TYPE_CHECKING:
179 from pandas._typing import (
180 NumpySorter,
181 NumpyValueArrayLike,
182 Suffixes,
183 )
184
185 from pandas.core.frame import DataFrame
186 from pandas.core.groupby.generic import SeriesGroupBy
187 from pandas.core.resample import Resampler
188
189__all__ = ["Series"]
190
191_shared_doc_kwargs = {
192 "axes": "index",
193 "klass": "Series",
194 "axes_single_arg": "{0 or 'index'}",
195 "axis": """axis : {0 or 'index'}
196 Unused. Parameter needed for compatibility with DataFrame.""",
197 "inplace": """inplace : bool, default False
198 If True, performs operation inplace and returns None.""",
199 "unique": "np.ndarray",
200 "duplicated": "Series",
201 "optional_by": "",
202 "optional_mapper": "",
203 "optional_reindex": """
204index : array-like, optional
205 New labels for the index. Preferably an Index object to avoid
206 duplicating data.
207axis : int or str, optional
208 Unused.""",
209 "replace_iloc": """
210 This differs from updating with ``.loc`` or ``.iloc``, which require
211 you to specify a location to update with some value.""",
212}
213
214
215def _coerce_method(converter):
216 """
217 Install the scalar coercion methods.
218 """
219
220 def wrapper(self):
221 if len(self) == 1:
222 warnings.warn(
223 f"Calling {converter.__name__} on a single element Series is "
224 "deprecated and will raise a TypeError in the future. "
225 f"Use {converter.__name__}(ser.iloc[0]) instead",
226 FutureWarning,
227 stacklevel=find_stack_level(),
228 )
229 return converter(self.iloc[0])
230 raise TypeError(f"cannot convert the series to {converter}")
231
232 wrapper.__name__ = f"__{converter.__name__}__"
233 return wrapper
234
235
236# ----------------------------------------------------------------------
237# Series class
238
239
240# error: Definition of "max" in base class "IndexOpsMixin" is incompatible with
241# definition in base class "NDFrame"
242# error: Definition of "min" in base class "IndexOpsMixin" is incompatible with
243# definition in base class "NDFrame"
244class Series(base.IndexOpsMixin, NDFrame): # type: ignore[misc]
245 """
246 One-dimensional ndarray with axis labels (including time series).
247
248 Labels need not be unique but must be a hashable type. The object
249 supports both integer- and label-based indexing and provides a host of
250 methods for performing operations involving the index. Statistical
251 methods from ndarray have been overridden to automatically exclude
252 missing data (currently represented as NaN).
253
254 Operations between Series (+, -, /, \\*, \\*\\*) align values based on their
255 associated index values-- they need not be the same length. The result
256 index will be the sorted union of the two indexes.
257
258 Parameters
259 ----------
260 data : array-like, Iterable, dict, or scalar value
261 Contains data stored in Series. If data is a dict, argument order is
262 maintained.
263 index : array-like or Index (1d)
264 Values must be hashable and have the same length as `data`.
265 Non-unique index values are allowed. Will default to
266 RangeIndex (0, 1, 2, ..., n) if not provided. If data is dict-like
267 and index is None, then the keys in the data are used as the index. If the
268 index is not None, the resulting Series is reindexed with the index values.
269 dtype : str, numpy.dtype, or ExtensionDtype, optional
270 Data type for the output Series. If not specified, this will be
271 inferred from `data`.
272 See the :ref:`user guide <basics.dtypes>` for more usages.
273 name : Hashable, default None
274 The name to give to the Series.
275 copy : bool, default False
276 Copy input data. Only affects Series or 1d ndarray input. See examples.
277
278 Notes
279 -----
280 Please reference the :ref:`User Guide <basics.series>` for more information.
281
282 Examples
283 --------
284 Constructing Series from a dictionary with an Index specified
285
286 >>> d = {'a': 1, 'b': 2, 'c': 3}
287 >>> ser = pd.Series(data=d, index=['a', 'b', 'c'])
288 >>> ser
289 a 1
290 b 2
291 c 3
292 dtype: int64
293
294 The keys of the dictionary match with the Index values, hence the Index
295 values have no effect.
296
297 >>> d = {'a': 1, 'b': 2, 'c': 3}
298 >>> ser = pd.Series(data=d, index=['x', 'y', 'z'])
299 >>> ser
300 x NaN
301 y NaN
302 z NaN
303 dtype: float64
304
305 Note that the Index is first build with the keys from the dictionary.
306 After this the Series is reindexed with the given Index values, hence we
307 get all NaN as a result.
308
309 Constructing Series from a list with `copy=False`.
310
311 >>> r = [1, 2]
312 >>> ser = pd.Series(r, copy=False)
313 >>> ser.iloc[0] = 999
314 >>> r
315 [1, 2]
316 >>> ser
317 0 999
318 1 2
319 dtype: int64
320
321 Due to input data type the Series has a `copy` of
322 the original data even though `copy=False`, so
323 the data is unchanged.
324
325 Constructing Series from a 1d ndarray with `copy=False`.
326
327 >>> r = np.array([1, 2])
328 >>> ser = pd.Series(r, copy=False)
329 >>> ser.iloc[0] = 999
330 >>> r
331 array([999, 2])
332 >>> ser
333 0 999
334 1 2
335 dtype: int64
336
337 Due to input data type the Series has a `view` on
338 the original data, so
339 the data is changed as well.
340 """
341
342 _typ = "series"
343 _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray)
344
345 _name: Hashable
346 _metadata: list[str] = ["name"]
347 _internal_names_set = {"index"} | NDFrame._internal_names_set
348 _accessors = {"dt", "cat", "str", "sparse"}
349 _hidden_attrs = (
350 base.IndexOpsMixin._hidden_attrs | NDFrame._hidden_attrs | frozenset([])
351 )
352
353 # Override cache_readonly bc Series is mutable
354 # error: Incompatible types in assignment (expression has type "property",
355 # base class "IndexOpsMixin" defined the type as "Callable[[IndexOpsMixin], bool]")
356 hasnans = property( # type: ignore[assignment]
357 # error: "Callable[[IndexOpsMixin], bool]" has no attribute "fget"
358 base.IndexOpsMixin.hasnans.fget, # type: ignore[attr-defined]
359 doc=base.IndexOpsMixin.hasnans.__doc__,
360 )
361 _mgr: SingleManager
362 div: Callable[[Series, Any], Series]
363 rdiv: Callable[[Series, Any], Series]
364
365 # ----------------------------------------------------------------------
366 # Constructors
367
368 def __init__(
369 self,
370 data=None,
371 index=None,
372 dtype: Dtype | None = None,
373 name=None,
374 copy: bool | None = None,
375 fastpath: bool = False,
376 ) -> None:
377 if (
378 isinstance(data, (SingleBlockManager, SingleArrayManager))
379 and index is None
380 and dtype is None
381 and (copy is False or copy is None)
382 ):
383 if using_copy_on_write():
384 data = data.copy(deep=False)
385 # GH#33357 called with just the SingleBlockManager
386 NDFrame.__init__(self, data)
387 if fastpath:
388 # e.g. from _box_col_values, skip validation of name
389 object.__setattr__(self, "_name", name)
390 else:
391 self.name = name
392 return
393
394 if isinstance(data, (ExtensionArray, np.ndarray)):
395 if copy is not False and using_copy_on_write():
396 if dtype is None or astype_is_view(data.dtype, pandas_dtype(dtype)):
397 data = data.copy()
398 if copy is None:
399 copy = False
400
401 # we are called internally, so short-circuit
402 if fastpath:
403 # data is a ndarray, index is defined
404 if not isinstance(data, (SingleBlockManager, SingleArrayManager)):
405 manager = get_option("mode.data_manager")
406 if manager == "block":
407 data = SingleBlockManager.from_array(data, index)
408 elif manager == "array":
409 data = SingleArrayManager.from_array(data, index)
410 elif using_copy_on_write() and not copy:
411 data = data.copy(deep=False)
412 if copy:
413 data = data.copy()
414 # skips validation of the name
415 object.__setattr__(self, "_name", name)
416 NDFrame.__init__(self, data)
417 return
418
419 if isinstance(data, SingleBlockManager) and using_copy_on_write() and not copy:
420 data = data.copy(deep=False)
421
422 name = ibase.maybe_extract_name(name, data, type(self))
423
424 if index is not None:
425 index = ensure_index(index)
426
427 if dtype is not None:
428 dtype = self._validate_dtype(dtype)
429
430 if data is None:
431 index = index if index is not None else default_index(0)
432 if len(index) or dtype is not None:
433 data = na_value_for_dtype(pandas_dtype(dtype), compat=False)
434 else:
435 data = []
436
437 if isinstance(data, MultiIndex):
438 raise NotImplementedError(
439 "initializing a Series from a MultiIndex is not supported"
440 )
441
442 refs = None
443 if isinstance(data, Index):
444 if dtype is not None:
445 data = data.astype(dtype, copy=False)
446
447 if using_copy_on_write():
448 refs = data._references
449 data = data._values
450 else:
451 # GH#24096 we need to ensure the index remains immutable
452 data = data._values.copy()
453 copy = False
454
455 elif isinstance(data, np.ndarray):
456 if len(data.dtype):
457 # GH#13296 we are dealing with a compound dtype, which
458 # should be treated as 2D
459 raise ValueError(
460 "Cannot construct a Series from an ndarray with "
461 "compound dtype. Use DataFrame instead."
462 )
463 elif isinstance(data, Series):
464 if index is None:
465 index = data.index
466 data = data._mgr.copy(deep=False)
467 else:
468 data = data.reindex(index, copy=copy)
469 copy = False
470 data = data._mgr
471 elif is_dict_like(data):
472 data, index = self._init_dict(data, index, dtype)
473 dtype = None
474 copy = False
475 elif isinstance(data, (SingleBlockManager, SingleArrayManager)):
476 if index is None:
477 index = data.index
478 elif not data.index.equals(index) or copy:
479 # GH#19275 SingleBlockManager input should only be called
480 # internally
481 raise AssertionError(
482 "Cannot pass both SingleBlockManager "
483 "`data` argument and a different "
484 "`index` argument. `copy` must be False."
485 )
486
487 elif isinstance(data, ExtensionArray):
488 pass
489 else:
490 data = com.maybe_iterable_to_list(data)
491 if is_list_like(data) and not len(data) and dtype is None:
492 # GH 29405: Pre-2.0, this defaulted to float.
493 dtype = np.dtype(object)
494
495 if index is None:
496 if not is_list_like(data):
497 data = [data]
498 index = default_index(len(data))
499 elif is_list_like(data):
500 com.require_length_match(data, index)
501
502 # create/copy the manager
503 if isinstance(data, (SingleBlockManager, SingleArrayManager)):
504 if dtype is not None:
505 data = data.astype(dtype=dtype, errors="ignore", copy=copy)
506 elif copy:
507 data = data.copy()
508 else:
509 data = sanitize_array(data, index, dtype, copy)
510
511 manager = get_option("mode.data_manager")
512 if manager == "block":
513 data = SingleBlockManager.from_array(data, index, refs=refs)
514 elif manager == "array":
515 data = SingleArrayManager.from_array(data, index)
516
517 NDFrame.__init__(self, data)
518 self.name = name
519 self._set_axis(0, index)
520
521 def _init_dict(
522 self, data, index: Index | None = None, dtype: DtypeObj | None = None
523 ):
524 """
525 Derive the "_mgr" and "index" attributes of a new Series from a
526 dictionary input.
527
528 Parameters
529 ----------
530 data : dict or dict-like
531 Data used to populate the new Series.
532 index : Index or None, default None
533 Index for the new Series: if None, use dict keys.
534 dtype : np.dtype, ExtensionDtype, or None, default None
535 The dtype for the new Series: if None, infer from data.
536
537 Returns
538 -------
539 _data : BlockManager for the new Series
540 index : index for the new Series
541 """
542 keys: Index | tuple
543
544 # Looking for NaN in dict doesn't work ({np.nan : 1}[float('nan')]
545 # raises KeyError), so we iterate the entire dict, and align
546 if data:
547 # GH:34717, issue was using zip to extract key and values from data.
548 # using generators in effects the performance.
549 # Below is the new way of extracting the keys and values
550
551 keys = tuple(data.keys())
552 values = list(data.values()) # Generating list of values- faster way
553 elif index is not None:
554 # fastpath for Series(data=None). Just use broadcasting a scalar
555 # instead of reindexing.
556 if len(index) or dtype is not None:
557 values = na_value_for_dtype(pandas_dtype(dtype), compat=False)
558 else:
559 values = []
560 keys = index
561 else:
562 keys, values = default_index(0), []
563
564 # Input is now list-like, so rely on "standard" construction:
565 s = Series(values, index=keys, dtype=dtype)
566
567 # Now we just make sure the order is respected, if any
568 if data and index is not None:
569 s = s.reindex(index, copy=False)
570 return s._mgr, s.index
571
572 # ----------------------------------------------------------------------
573
574 @property
575 def _constructor(self) -> Callable[..., Series]:
576 return Series
577
578 @property
579 def _constructor_expanddim(self) -> Callable[..., DataFrame]:
580 """
581 Used when a manipulation result has one higher dimension as the
582 original, such as Series.to_frame()
583 """
584 from pandas.core.frame import DataFrame
585
586 return DataFrame
587
588 # types
589 @property
590 def _can_hold_na(self) -> bool:
591 return self._mgr._can_hold_na
592
593 # ndarray compatibility
594 @property
595 def dtype(self) -> DtypeObj:
596 """
597 Return the dtype object of the underlying data.
598
599 Examples
600 --------
601 >>> s = pd.Series([1, 2, 3])
602 >>> s.dtype
603 dtype('int64')
604 """
605 return self._mgr.dtype
606
607 @property
608 def dtypes(self) -> DtypeObj:
609 """
610 Return the dtype object of the underlying data.
611
612 Examples
613 --------
614 >>> s = pd.Series([1, 2, 3])
615 >>> s.dtypes
616 dtype('int64')
617 """
618 # DataFrame compatibility
619 return self.dtype
620
621 @property
622 def name(self) -> Hashable:
623 """
624 Return the name of the Series.
625
626 The name of a Series becomes its index or column name if it is used
627 to form a DataFrame. It is also used whenever displaying the Series
628 using the interpreter.
629
630 Returns
631 -------
632 label (hashable object)
633 The name of the Series, also the column name if part of a DataFrame.
634
635 See Also
636 --------
637 Series.rename : Sets the Series name when given a scalar input.
638 Index.name : Corresponding Index property.
639
640 Examples
641 --------
642 The Series name can be set initially when calling the constructor.
643
644 >>> s = pd.Series([1, 2, 3], dtype=np.int64, name='Numbers')
645 >>> s
646 0 1
647 1 2
648 2 3
649 Name: Numbers, dtype: int64
650 >>> s.name = "Integers"
651 >>> s
652 0 1
653 1 2
654 2 3
655 Name: Integers, dtype: int64
656
657 The name of a Series within a DataFrame is its column name.
658
659 >>> df = pd.DataFrame([[1, 2], [3, 4], [5, 6]],
660 ... columns=["Odd Numbers", "Even Numbers"])
661 >>> df
662 Odd Numbers Even Numbers
663 0 1 2
664 1 3 4
665 2 5 6
666 >>> df["Even Numbers"].name
667 'Even Numbers'
668 """
669 return self._name
670
671 @name.setter
672 def name(self, value: Hashable) -> None:
673 validate_all_hashable(value, error_name=f"{type(self).__name__}.name")
674 object.__setattr__(self, "_name", value)
675
676 @property
677 def values(self):
678 """
679 Return Series as ndarray or ndarray-like depending on the dtype.
680
681 .. warning::
682
683 We recommend using :attr:`Series.array` or
684 :meth:`Series.to_numpy`, depending on whether you need
685 a reference to the underlying data or a NumPy array.
686
687 Returns
688 -------
689 numpy.ndarray or ndarray-like
690
691 See Also
692 --------
693 Series.array : Reference to the underlying data.
694 Series.to_numpy : A NumPy array representing the underlying data.
695
696 Examples
697 --------
698 >>> pd.Series([1, 2, 3]).values
699 array([1, 2, 3])
700
701 >>> pd.Series(list('aabc')).values
702 array(['a', 'a', 'b', 'c'], dtype=object)
703
704 >>> pd.Series(list('aabc')).astype('category').values
705 ['a', 'a', 'b', 'c']
706 Categories (3, object): ['a', 'b', 'c']
707
708 Timezone aware datetime data is converted to UTC:
709
710 >>> pd.Series(pd.date_range('20130101', periods=3,
711 ... tz='US/Eastern')).values
712 array(['2013-01-01T05:00:00.000000000',
713 '2013-01-02T05:00:00.000000000',
714 '2013-01-03T05:00:00.000000000'], dtype='datetime64[ns]')
715 """
716 return self._mgr.external_values()
717
718 @property
719 def _values(self):
720 """
721 Return the internal repr of this data (defined by Block.interval_values).
722 This are the values as stored in the Block (ndarray or ExtensionArray
723 depending on the Block class), with datetime64[ns] and timedelta64[ns]
724 wrapped in ExtensionArrays to match Index._values behavior.
725
726 Differs from the public ``.values`` for certain data types, because of
727 historical backwards compatibility of the public attribute (e.g. period
728 returns object ndarray and datetimetz a datetime64[ns] ndarray for
729 ``.values`` while it returns an ExtensionArray for ``._values`` in those
730 cases).
731
732 Differs from ``.array`` in that this still returns the numpy array if
733 the Block is backed by a numpy array (except for datetime64 and
734 timedelta64 dtypes), while ``.array`` ensures to always return an
735 ExtensionArray.
736
737 Overview:
738
739 dtype | values | _values | array |
740 ----------- | ------------- | ------------- | ------------- |
741 Numeric | ndarray | ndarray | PandasArray |
742 Category | Categorical | Categorical | Categorical |
743 dt64[ns] | ndarray[M8ns] | DatetimeArray | DatetimeArray |
744 dt64[ns tz] | ndarray[M8ns] | DatetimeArray | DatetimeArray |
745 td64[ns] | ndarray[m8ns] | TimedeltaArray| ndarray[m8ns] |
746 Period | ndarray[obj] | PeriodArray | PeriodArray |
747 Nullable | EA | EA | EA |
748
749 """
750 return self._mgr.internal_values()
751
752 @property
753 def _references(self) -> BlockValuesRefs | None:
754 if isinstance(self._mgr, SingleArrayManager):
755 return None
756 return self._mgr._block.refs
757
758 # error: Decorated property not supported
759 @Appender(base.IndexOpsMixin.array.__doc__) # type: ignore[misc]
760 @property
761 def array(self) -> ExtensionArray:
762 return self._mgr.array_values()
763
764 # ops
765 def ravel(self, order: str = "C") -> ArrayLike:
766 """
767 Return the flattened underlying data as an ndarray or ExtensionArray.
768
769 Returns
770 -------
771 numpy.ndarray or ExtensionArray
772 Flattened data of the Series.
773
774 See Also
775 --------
776 numpy.ndarray.ravel : Return a flattened array.
777 """
778 arr = self._values.ravel(order=order)
779 if isinstance(arr, np.ndarray) and using_copy_on_write():
780 arr.flags.writeable = False
781 return arr
782
783 def __len__(self) -> int:
784 """
785 Return the length of the Series.
786 """
787 return len(self._mgr)
788
789 def view(self, dtype: Dtype | None = None) -> Series:
790 """
791 Create a new view of the Series.
792
793 This function will return a new Series with a view of the same
794 underlying values in memory, optionally reinterpreted with a new data
795 type. The new data type must preserve the same size in bytes as to not
796 cause index misalignment.
797
798 Parameters
799 ----------
800 dtype : data type
801 Data type object or one of their string representations.
802
803 Returns
804 -------
805 Series
806 A new Series object as a view of the same data in memory.
807
808 See Also
809 --------
810 numpy.ndarray.view : Equivalent numpy function to create a new view of
811 the same data in memory.
812
813 Notes
814 -----
815 Series are instantiated with ``dtype=float64`` by default. While
816 ``numpy.ndarray.view()`` will return a view with the same data type as
817 the original array, ``Series.view()`` (without specified dtype)
818 will try using ``float64`` and may fail if the original data type size
819 in bytes is not the same.
820
821 Examples
822 --------
823 >>> s = pd.Series([-2, -1, 0, 1, 2], dtype='int8')
824 >>> s
825 0 -2
826 1 -1
827 2 0
828 3 1
829 4 2
830 dtype: int8
831
832 The 8 bit signed integer representation of `-1` is `0b11111111`, but
833 the same bytes represent 255 if read as an 8 bit unsigned integer:
834
835 >>> us = s.view('uint8')
836 >>> us
837 0 254
838 1 255
839 2 0
840 3 1
841 4 2
842 dtype: uint8
843
844 The views share the same underlying values:
845
846 >>> us[0] = 128
847 >>> s
848 0 -128
849 1 -1
850 2 0
851 3 1
852 4 2
853 dtype: int8
854 """
855 # self.array instead of self._values so we piggyback on PandasArray
856 # implementation
857 res_values = self.array.view(dtype)
858 res_ser = self._constructor(res_values, index=self.index, copy=False)
859 if isinstance(res_ser._mgr, SingleBlockManager) and using_copy_on_write():
860 blk = res_ser._mgr._block
861 blk.refs = cast("BlockValuesRefs", self._references)
862 blk.refs.add_reference(blk) # type: ignore[arg-type]
863 return res_ser.__finalize__(self, method="view")
864
865 # ----------------------------------------------------------------------
866 # NDArray Compat
867 _HANDLED_TYPES = (Index, ExtensionArray, np.ndarray)
868
869 def __array__(self, dtype: npt.DTypeLike | None = None) -> np.ndarray:
870 """
871 Return the values as a NumPy array.
872
873 Users should not call this directly. Rather, it is invoked by
874 :func:`numpy.array` and :func:`numpy.asarray`.
875
876 Parameters
877 ----------
878 dtype : str or numpy.dtype, optional
879 The dtype to use for the resulting NumPy array. By default,
880 the dtype is inferred from the data.
881
882 Returns
883 -------
884 numpy.ndarray
885 The values in the series converted to a :class:`numpy.ndarray`
886 with the specified `dtype`.
887
888 See Also
889 --------
890 array : Create a new array from data.
891 Series.array : Zero-copy view to the array backing the Series.
892 Series.to_numpy : Series method for similar behavior.
893
894 Examples
895 --------
896 >>> ser = pd.Series([1, 2, 3])
897 >>> np.asarray(ser)
898 array([1, 2, 3])
899
900 For timezone-aware data, the timezones may be retained with
901 ``dtype='object'``
902
903 >>> tzser = pd.Series(pd.date_range('2000', periods=2, tz="CET"))
904 >>> np.asarray(tzser, dtype="object")
905 array([Timestamp('2000-01-01 00:00:00+0100', tz='CET'),
906 Timestamp('2000-01-02 00:00:00+0100', tz='CET')],
907 dtype=object)
908
909 Or the values may be localized to UTC and the tzinfo discarded with
910 ``dtype='datetime64[ns]'``
911
912 >>> np.asarray(tzser, dtype="datetime64[ns]") # doctest: +ELLIPSIS
913 array(['1999-12-31T23:00:00.000000000', ...],
914 dtype='datetime64[ns]')
915 """
916 values = self._values
917 arr = np.asarray(values, dtype=dtype)
918 if using_copy_on_write() and astype_is_view(values.dtype, arr.dtype):
919 arr = arr.view()
920 arr.flags.writeable = False
921 return arr
922
923 # ----------------------------------------------------------------------
924 # Unary Methods
925
926 # coercion
927 __float__ = _coerce_method(float)
928 __int__ = _coerce_method(int)
929
930 # ----------------------------------------------------------------------
931
932 # indexers
933 @property
934 def axes(self) -> list[Index]:
935 """
936 Return a list of the row axis labels.
937 """
938 return [self.index]
939
940 # ----------------------------------------------------------------------
941 # Indexing Methods
942
943 @Appender(NDFrame.take.__doc__)
944 def take(self, indices, axis: Axis = 0, **kwargs) -> Series:
945 nv.validate_take((), kwargs)
946
947 indices = ensure_platform_int(indices)
948
949 if (
950 indices.ndim == 1
951 and using_copy_on_write()
952 and is_range_indexer(indices, len(self))
953 ):
954 return self.copy(deep=None)
955
956 new_index = self.index.take(indices)
957 new_values = self._values.take(indices)
958
959 result = self._constructor(new_values, index=new_index, fastpath=True)
960 return result.__finalize__(self, method="take")
961
962 def _take_with_is_copy(self, indices, axis: Axis = 0) -> Series:
963 """
964 Internal version of the `take` method that sets the `_is_copy`
965 attribute to keep track of the parent dataframe (using in indexing
966 for the SettingWithCopyWarning). For Series this does the same
967 as the public take (it never sets `_is_copy`).
968
969 See the docstring of `take` for full explanation of the parameters.
970 """
971 return self.take(indices=indices, axis=axis)
972
973 def _ixs(self, i: int, axis: AxisInt = 0) -> Any:
974 """
975 Return the i-th value or values in the Series by location.
976
977 Parameters
978 ----------
979 i : int
980
981 Returns
982 -------
983 scalar (int) or Series (slice, sequence)
984 """
985 return self._values[i]
986
987 def _slice(self, slobj: slice | np.ndarray, axis: Axis = 0) -> Series:
988 # axis kwarg is retained for compat with NDFrame method
989 # _slice is *always* positional
990 return self._get_values(slobj)
991
992 def __getitem__(self, key):
993 check_dict_or_set_indexers(key)
994 key = com.apply_if_callable(key, self)
995
996 if key is Ellipsis:
997 return self
998
999 key_is_scalar = is_scalar(key)
1000 if isinstance(key, (list, tuple)):
1001 key = unpack_1tuple(key)
1002
1003 if is_integer(key) and self.index._should_fallback_to_positional:
1004 return self._values[key]
1005
1006 elif key_is_scalar:
1007 return self._get_value(key)
1008
1009 if is_hashable(key):
1010 # Otherwise index.get_value will raise InvalidIndexError
1011 try:
1012 # For labels that don't resolve as scalars like tuples and frozensets
1013 result = self._get_value(key)
1014
1015 return result
1016
1017 except (KeyError, TypeError, InvalidIndexError):
1018 # InvalidIndexError for e.g. generator
1019 # see test_series_getitem_corner_generator
1020 if isinstance(key, tuple) and isinstance(self.index, MultiIndex):
1021 # We still have the corner case where a tuple is a key
1022 # in the first level of our MultiIndex
1023 return self._get_values_tuple(key)
1024
1025 if is_iterator(key):
1026 key = list(key)
1027
1028 if com.is_bool_indexer(key):
1029 key = check_bool_indexer(self.index, key)
1030 key = np.asarray(key, dtype=bool)
1031 return self._get_values(key)
1032
1033 return self._get_with(key)
1034
1035 def _get_with(self, key):
1036 # other: fancy integer or otherwise
1037 if isinstance(key, slice):
1038 # _convert_slice_indexer to determine if this slice is positional
1039 # or label based, and if the latter, convert to positional
1040 slobj = self.index._convert_slice_indexer(key, kind="getitem")
1041 return self._slice(slobj)
1042 elif isinstance(key, ABCDataFrame):
1043 raise TypeError(
1044 "Indexing a Series with DataFrame is not "
1045 "supported, use the appropriate DataFrame column"
1046 )
1047 elif isinstance(key, tuple):
1048 return self._get_values_tuple(key)
1049
1050 elif not is_list_like(key):
1051 # e.g. scalars that aren't recognized by lib.is_scalar, GH#32684
1052 return self.loc[key]
1053
1054 if not isinstance(key, (list, np.ndarray, ExtensionArray, Series, Index)):
1055 key = list(key)
1056
1057 if isinstance(key, Index):
1058 key_type = key.inferred_type
1059 else:
1060 key_type = lib.infer_dtype(key, skipna=False)
1061
1062 # Note: The key_type == "boolean" case should be caught by the
1063 # com.is_bool_indexer check in __getitem__
1064 if key_type == "integer":
1065 # We need to decide whether to treat this as a positional indexer
1066 # (i.e. self.iloc) or label-based (i.e. self.loc)
1067 if not self.index._should_fallback_to_positional:
1068 return self.loc[key]
1069 else:
1070 return self.iloc[key]
1071
1072 # handle the dup indexing case GH#4246
1073 return self.loc[key]
1074
1075 def _get_values_tuple(self, key: tuple):
1076 # mpl hackaround
1077 if com.any_none(*key):
1078 # mpl compat if we look up e.g. ser[:, np.newaxis];
1079 # see tests.series.timeseries.test_mpl_compat_hack
1080 # the asarray is needed to avoid returning a 2D DatetimeArray
1081 result = np.asarray(self._values[key])
1082 disallow_ndim_indexing(result)
1083 return result
1084
1085 if not isinstance(self.index, MultiIndex):
1086 raise KeyError("key of type tuple not found and not a MultiIndex")
1087
1088 # If key is contained, would have returned by now
1089 indexer, new_index = self.index.get_loc_level(key)
1090 new_ser = self._constructor(self._values[indexer], index=new_index, copy=False)
1091 if using_copy_on_write() and isinstance(indexer, slice):
1092 new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type]
1093 return new_ser.__finalize__(self)
1094
1095 def _get_values(self, indexer: slice | npt.NDArray[np.bool_]) -> Series:
1096 new_mgr = self._mgr.getitem_mgr(indexer)
1097 return self._constructor(new_mgr).__finalize__(self)
1098
1099 def _get_value(self, label, takeable: bool = False):
1100 """
1101 Quickly retrieve single value at passed index label.
1102
1103 Parameters
1104 ----------
1105 label : object
1106 takeable : interpret the index as indexers, default False
1107
1108 Returns
1109 -------
1110 scalar value
1111 """
1112 if takeable:
1113 return self._values[label]
1114
1115 # Similar to Index.get_value, but we do not fall back to positional
1116 loc = self.index.get_loc(label)
1117
1118 if is_integer(loc):
1119 return self._values[loc]
1120
1121 if isinstance(self.index, MultiIndex):
1122 mi = self.index
1123 new_values = self._values[loc]
1124 if len(new_values) == 1 and mi.nlevels == 1:
1125 # If more than one level left, we can not return a scalar
1126 return new_values[0]
1127
1128 new_index = mi[loc]
1129 new_index = maybe_droplevels(new_index, label)
1130 new_ser = self._constructor(
1131 new_values, index=new_index, name=self.name, copy=False
1132 )
1133 if using_copy_on_write() and isinstance(loc, slice):
1134 new_ser._mgr.add_references(self._mgr) # type: ignore[arg-type]
1135 return new_ser.__finalize__(self)
1136
1137 else:
1138 return self.iloc[loc]
1139
1140 def __setitem__(self, key, value) -> None:
1141 if not PYPY and using_copy_on_write():
1142 if sys.getrefcount(self) <= 3:
1143 warnings.warn(
1144 _chained_assignment_msg, ChainedAssignmentError, stacklevel=2
1145 )
1146
1147 check_dict_or_set_indexers(key)
1148 key = com.apply_if_callable(key, self)
1149 cacher_needs_updating = self._check_is_chained_assignment_possible()
1150
1151 if key is Ellipsis:
1152 key = slice(None)
1153
1154 if isinstance(key, slice):
1155 indexer = self.index._convert_slice_indexer(key, kind="getitem")
1156 return self._set_values(indexer, value)
1157
1158 try:
1159 self._set_with_engine(key, value)
1160 except KeyError:
1161 # We have a scalar (or for MultiIndex or object-dtype, scalar-like)
1162 # key that is not present in self.index.
1163 if is_integer(key):
1164 if not self.index._should_fallback_to_positional:
1165 # GH#33469
1166 self.loc[key] = value
1167 else:
1168 # positional setter
1169 # can't use _mgr.setitem_inplace yet bc could have *both*
1170 # KeyError and then ValueError, xref GH#45070
1171 self._set_values(key, value)
1172 else:
1173 # GH#12862 adding a new key to the Series
1174 self.loc[key] = value
1175
1176 except (TypeError, ValueError, LossySetitemError):
1177 # The key was OK, but we cannot set the value losslessly
1178 indexer = self.index.get_loc(key)
1179 self._set_values(indexer, value)
1180
1181 except InvalidIndexError as err:
1182 if isinstance(key, tuple) and not isinstance(self.index, MultiIndex):
1183 # cases with MultiIndex don't get here bc they raise KeyError
1184 # e.g. test_basic_getitem_setitem_corner
1185 raise KeyError(
1186 "key of type tuple not found and not a MultiIndex"
1187 ) from err
1188
1189 if com.is_bool_indexer(key):
1190 key = check_bool_indexer(self.index, key)
1191 key = np.asarray(key, dtype=bool)
1192
1193 if (
1194 is_list_like(value)
1195 and len(value) != len(self)
1196 and not isinstance(value, Series)
1197 and not is_object_dtype(self.dtype)
1198 ):
1199 # Series will be reindexed to have matching length inside
1200 # _where call below
1201 # GH#44265
1202 indexer = key.nonzero()[0]
1203 self._set_values(indexer, value)
1204 return
1205
1206 # otherwise with listlike other we interpret series[mask] = other
1207 # as series[mask] = other[mask]
1208 try:
1209 self._where(~key, value, inplace=True)
1210 except InvalidIndexError:
1211 # test_where_dups
1212 self.iloc[key] = value
1213 return
1214
1215 else:
1216 self._set_with(key, value)
1217
1218 if cacher_needs_updating:
1219 self._maybe_update_cacher(inplace=True)
1220
1221 def _set_with_engine(self, key, value) -> None:
1222 loc = self.index.get_loc(key)
1223
1224 # this is equivalent to self._values[key] = value
1225 self._mgr.setitem_inplace(loc, value)
1226
1227 def _set_with(self, key, value) -> None:
1228 # We got here via exception-handling off of InvalidIndexError, so
1229 # key should always be listlike at this point.
1230 assert not isinstance(key, tuple)
1231
1232 if is_iterator(key):
1233 # Without this, the call to infer_dtype will consume the generator
1234 key = list(key)
1235
1236 if not self.index._should_fallback_to_positional:
1237 # Regardless of the key type, we're treating it as labels
1238 self._set_labels(key, value)
1239
1240 else:
1241 # Note: key_type == "boolean" should not occur because that
1242 # should be caught by the is_bool_indexer check in __setitem__
1243 key_type = lib.infer_dtype(key, skipna=False)
1244
1245 if key_type == "integer":
1246 self._set_values(key, value)
1247 else:
1248 self._set_labels(key, value)
1249
1250 def _set_labels(self, key, value) -> None:
1251 key = com.asarray_tuplesafe(key)
1252 indexer: np.ndarray = self.index.get_indexer(key)
1253 mask = indexer == -1
1254 if mask.any():
1255 raise KeyError(f"{key[mask]} not in index")
1256 self._set_values(indexer, value)
1257
1258 def _set_values(self, key, value) -> None:
1259 if isinstance(key, (Index, Series)):
1260 key = key._values
1261
1262 self._mgr = self._mgr.setitem(indexer=key, value=value)
1263 self._maybe_update_cacher()
1264
1265 def _set_value(self, label, value, takeable: bool = False) -> None:
1266 """
1267 Quickly set single value at passed label.
1268
1269 If label is not contained, a new object is created with the label
1270 placed at the end of the result index.
1271
1272 Parameters
1273 ----------
1274 label : object
1275 Partial indexing with MultiIndex not allowed.
1276 value : object
1277 Scalar value.
1278 takeable : interpret the index as indexers, default False
1279 """
1280 if not takeable:
1281 try:
1282 loc = self.index.get_loc(label)
1283 except KeyError:
1284 # set using a non-recursive method
1285 self.loc[label] = value
1286 return
1287 else:
1288 loc = label
1289
1290 self._set_values(loc, value)
1291
1292 # ----------------------------------------------------------------------
1293 # Lookup Caching
1294
1295 @property
1296 def _is_cached(self) -> bool:
1297 """Return boolean indicating if self is cached or not."""
1298 return getattr(self, "_cacher", None) is not None
1299
1300 def _get_cacher(self):
1301 """return my cacher or None"""
1302 cacher = getattr(self, "_cacher", None)
1303 if cacher is not None:
1304 cacher = cacher[1]()
1305 return cacher
1306
1307 def _reset_cacher(self) -> None:
1308 """
1309 Reset the cacher.
1310 """
1311 if hasattr(self, "_cacher"):
1312 del self._cacher
1313
1314 def _set_as_cached(self, item, cacher) -> None:
1315 """
1316 Set the _cacher attribute on the calling object with a weakref to
1317 cacher.
1318 """
1319 if using_copy_on_write():
1320 return
1321 self._cacher = (item, weakref.ref(cacher))
1322
1323 def _clear_item_cache(self) -> None:
1324 # no-op for Series
1325 pass
1326
1327 def _check_is_chained_assignment_possible(self) -> bool:
1328 """
1329 See NDFrame._check_is_chained_assignment_possible.__doc__
1330 """
1331 if self._is_view and self._is_cached:
1332 ref = self._get_cacher()
1333 if ref is not None and ref._is_mixed_type:
1334 self._check_setitem_copy(t="referent", force=True)
1335 return True
1336 return super()._check_is_chained_assignment_possible()
1337
1338 def _maybe_update_cacher(
1339 self, clear: bool = False, verify_is_copy: bool = True, inplace: bool = False
1340 ) -> None:
1341 """
1342 See NDFrame._maybe_update_cacher.__doc__
1343 """
1344 # for CoW, we never want to update the parent DataFrame cache
1345 # if the Series changed, but don't keep track of any cacher
1346 if using_copy_on_write():
1347 return
1348 cacher = getattr(self, "_cacher", None)
1349 if cacher is not None:
1350 assert self.ndim == 1
1351 ref: DataFrame = cacher[1]()
1352
1353 # we are trying to reference a dead referent, hence
1354 # a copy
1355 if ref is None:
1356 del self._cacher
1357 elif len(self) == len(ref) and self.name in ref.columns:
1358 # GH#42530 self.name must be in ref.columns
1359 # to ensure column still in dataframe
1360 # otherwise, either self or ref has swapped in new arrays
1361 ref._maybe_cache_changed(cacher[0], self, inplace=inplace)
1362 else:
1363 # GH#33675 we have swapped in a new array, so parent
1364 # reference to self is now invalid
1365 ref._item_cache.pop(cacher[0], None)
1366
1367 super()._maybe_update_cacher(
1368 clear=clear, verify_is_copy=verify_is_copy, inplace=inplace
1369 )
1370
1371 # ----------------------------------------------------------------------
1372 # Unsorted
1373
1374 @property
1375 def _is_mixed_type(self) -> bool:
1376 return False
1377
1378 def repeat(self, repeats: int | Sequence[int], axis: None = None) -> Series:
1379 """
1380 Repeat elements of a Series.
1381
1382 Returns a new Series where each element of the current Series
1383 is repeated consecutively a given number of times.
1384
1385 Parameters
1386 ----------
1387 repeats : int or array of ints
1388 The number of repetitions for each element. This should be a
1389 non-negative integer. Repeating 0 times will return an empty
1390 Series.
1391 axis : None
1392 Unused. Parameter needed for compatibility with DataFrame.
1393
1394 Returns
1395 -------
1396 Series
1397 Newly created Series with repeated elements.
1398
1399 See Also
1400 --------
1401 Index.repeat : Equivalent function for Index.
1402 numpy.repeat : Similar method for :class:`numpy.ndarray`.
1403
1404 Examples
1405 --------
1406 >>> s = pd.Series(['a', 'b', 'c'])
1407 >>> s
1408 0 a
1409 1 b
1410 2 c
1411 dtype: object
1412 >>> s.repeat(2)
1413 0 a
1414 0 a
1415 1 b
1416 1 b
1417 2 c
1418 2 c
1419 dtype: object
1420 >>> s.repeat([1, 2, 3])
1421 0 a
1422 1 b
1423 1 b
1424 2 c
1425 2 c
1426 2 c
1427 dtype: object
1428 """
1429 nv.validate_repeat((), {"axis": axis})
1430 new_index = self.index.repeat(repeats)
1431 new_values = self._values.repeat(repeats)
1432 return self._constructor(new_values, index=new_index, copy=False).__finalize__(
1433 self, method="repeat"
1434 )
1435
1436 @overload
1437 def reset_index(
1438 self,
1439 level: IndexLabel = ...,
1440 *,
1441 drop: Literal[False] = ...,
1442 name: Level = ...,
1443 inplace: Literal[False] = ...,
1444 allow_duplicates: bool = ...,
1445 ) -> DataFrame:
1446 ...
1447
1448 @overload
1449 def reset_index(
1450 self,
1451 level: IndexLabel = ...,
1452 *,
1453 drop: Literal[True],
1454 name: Level = ...,
1455 inplace: Literal[False] = ...,
1456 allow_duplicates: bool = ...,
1457 ) -> Series:
1458 ...
1459
1460 @overload
1461 def reset_index(
1462 self,
1463 level: IndexLabel = ...,
1464 *,
1465 drop: bool = ...,
1466 name: Level = ...,
1467 inplace: Literal[True],
1468 allow_duplicates: bool = ...,
1469 ) -> None:
1470 ...
1471
1472 def reset_index(
1473 self,
1474 level: IndexLabel = None,
1475 *,
1476 drop: bool = False,
1477 name: Level = lib.no_default,
1478 inplace: bool = False,
1479 allow_duplicates: bool = False,
1480 ) -> DataFrame | Series | None:
1481 """
1482 Generate a new DataFrame or Series with the index reset.
1483
1484 This is useful when the index needs to be treated as a column, or
1485 when the index is meaningless and needs to be reset to the default
1486 before another operation.
1487
1488 Parameters
1489 ----------
1490 level : int, str, tuple, or list, default optional
1491 For a Series with a MultiIndex, only remove the specified levels
1492 from the index. Removes all levels by default.
1493 drop : bool, default False
1494 Just reset the index, without inserting it as a column in
1495 the new DataFrame.
1496 name : object, optional
1497 The name to use for the column containing the original Series
1498 values. Uses ``self.name`` by default. This argument is ignored
1499 when `drop` is True.
1500 inplace : bool, default False
1501 Modify the Series in place (do not create a new object).
1502 allow_duplicates : bool, default False
1503 Allow duplicate column labels to be created.
1504
1505 .. versionadded:: 1.5.0
1506
1507 Returns
1508 -------
1509 Series or DataFrame or None
1510 When `drop` is False (the default), a DataFrame is returned.
1511 The newly created columns will come first in the DataFrame,
1512 followed by the original Series values.
1513 When `drop` is True, a `Series` is returned.
1514 In either case, if ``inplace=True``, no value is returned.
1515
1516 See Also
1517 --------
1518 DataFrame.reset_index: Analogous function for DataFrame.
1519
1520 Examples
1521 --------
1522 >>> s = pd.Series([1, 2, 3, 4], name='foo',
1523 ... index=pd.Index(['a', 'b', 'c', 'd'], name='idx'))
1524
1525 Generate a DataFrame with default index.
1526
1527 >>> s.reset_index()
1528 idx foo
1529 0 a 1
1530 1 b 2
1531 2 c 3
1532 3 d 4
1533
1534 To specify the name of the new column use `name`.
1535
1536 >>> s.reset_index(name='values')
1537 idx values
1538 0 a 1
1539 1 b 2
1540 2 c 3
1541 3 d 4
1542
1543 To generate a new Series with the default set `drop` to True.
1544
1545 >>> s.reset_index(drop=True)
1546 0 1
1547 1 2
1548 2 3
1549 3 4
1550 Name: foo, dtype: int64
1551
1552 The `level` parameter is interesting for Series with a multi-level
1553 index.
1554
1555 >>> arrays = [np.array(['bar', 'bar', 'baz', 'baz']),
1556 ... np.array(['one', 'two', 'one', 'two'])]
1557 >>> s2 = pd.Series(
1558 ... range(4), name='foo',
1559 ... index=pd.MultiIndex.from_arrays(arrays,
1560 ... names=['a', 'b']))
1561
1562 To remove a specific level from the Index, use `level`.
1563
1564 >>> s2.reset_index(level='a')
1565 a foo
1566 b
1567 one bar 0
1568 two bar 1
1569 one baz 2
1570 two baz 3
1571
1572 If `level` is not set, all levels are removed from the Index.
1573
1574 >>> s2.reset_index()
1575 a b foo
1576 0 bar one 0
1577 1 bar two 1
1578 2 baz one 2
1579 3 baz two 3
1580 """
1581 inplace = validate_bool_kwarg(inplace, "inplace")
1582 if drop:
1583 new_index = default_index(len(self))
1584 if level is not None:
1585 level_list: Sequence[Hashable]
1586 if not isinstance(level, (tuple, list)):
1587 level_list = [level]
1588 else:
1589 level_list = level
1590 level_list = [self.index._get_level_number(lev) for lev in level_list]
1591 if len(level_list) < self.index.nlevels:
1592 new_index = self.index.droplevel(level_list)
1593
1594 if inplace:
1595 self.index = new_index
1596 elif using_copy_on_write():
1597 new_ser = self.copy(deep=False)
1598 new_ser.index = new_index
1599 return new_ser.__finalize__(self, method="reset_index")
1600 else:
1601 return self._constructor(
1602 self._values.copy(), index=new_index, copy=False
1603 ).__finalize__(self, method="reset_index")
1604 elif inplace:
1605 raise TypeError(
1606 "Cannot reset_index inplace on a Series to create a DataFrame"
1607 )
1608 else:
1609 if name is lib.no_default:
1610 # For backwards compatibility, keep columns as [0] instead of
1611 # [None] when self.name is None
1612 if self.name is None:
1613 name = 0
1614 else:
1615 name = self.name
1616
1617 df = self.to_frame(name)
1618 return df.reset_index(
1619 level=level, drop=drop, allow_duplicates=allow_duplicates
1620 )
1621 return None
1622
1623 # ----------------------------------------------------------------------
1624 # Rendering Methods
1625
1626 def __repr__(self) -> str:
1627 """
1628 Return a string representation for a particular Series.
1629 """
1630 # pylint: disable=invalid-repr-returned
1631 repr_params = fmt.get_series_repr_params()
1632 return self.to_string(**repr_params)
1633
1634 @overload
1635 def to_string(
1636 self,
1637 buf: None = ...,
1638 na_rep: str = ...,
1639 float_format: str | None = ...,
1640 header: bool = ...,
1641 index: bool = ...,
1642 length=...,
1643 dtype=...,
1644 name=...,
1645 max_rows: int | None = ...,
1646 min_rows: int | None = ...,
1647 ) -> str:
1648 ...
1649
1650 @overload
1651 def to_string(
1652 self,
1653 buf: FilePath | WriteBuffer[str],
1654 na_rep: str = ...,
1655 float_format: str | None = ...,
1656 header: bool = ...,
1657 index: bool = ...,
1658 length=...,
1659 dtype=...,
1660 name=...,
1661 max_rows: int | None = ...,
1662 min_rows: int | None = ...,
1663 ) -> None:
1664 ...
1665
1666 def to_string(
1667 self,
1668 buf: FilePath | WriteBuffer[str] | None = None,
1669 na_rep: str = "NaN",
1670 float_format: str | None = None,
1671 header: bool = True,
1672 index: bool = True,
1673 length: bool = False,
1674 dtype: bool = False,
1675 name: bool = False,
1676 max_rows: int | None = None,
1677 min_rows: int | None = None,
1678 ) -> str | None:
1679 """
1680 Render a string representation of the Series.
1681
1682 Parameters
1683 ----------
1684 buf : StringIO-like, optional
1685 Buffer to write to.
1686 na_rep : str, optional
1687 String representation of NaN to use, default 'NaN'.
1688 float_format : one-parameter function, optional
1689 Formatter function to apply to columns' elements if they are
1690 floats, default None.
1691 header : bool, default True
1692 Add the Series header (index name).
1693 index : bool, optional
1694 Add index (row) labels, default True.
1695 length : bool, default False
1696 Add the Series length.
1697 dtype : bool, default False
1698 Add the Series dtype.
1699 name : bool, default False
1700 Add the Series name if not None.
1701 max_rows : int, optional
1702 Maximum number of rows to show before truncating. If None, show
1703 all.
1704 min_rows : int, optional
1705 The number of rows to display in a truncated repr (when number
1706 of rows is above `max_rows`).
1707
1708 Returns
1709 -------
1710 str or None
1711 String representation of Series if ``buf=None``, otherwise None.
1712 """
1713 formatter = fmt.SeriesFormatter(
1714 self,
1715 name=name,
1716 length=length,
1717 header=header,
1718 index=index,
1719 dtype=dtype,
1720 na_rep=na_rep,
1721 float_format=float_format,
1722 min_rows=min_rows,
1723 max_rows=max_rows,
1724 )
1725 result = formatter.to_string()
1726
1727 # catch contract violations
1728 if not isinstance(result, str):
1729 raise AssertionError(
1730 "result must be of type str, type "
1731 f"of result is {repr(type(result).__name__)}"
1732 )
1733
1734 if buf is None:
1735 return result
1736 else:
1737 if hasattr(buf, "write"):
1738 buf.write(result)
1739 else:
1740 with open(buf, "w") as f:
1741 f.write(result)
1742 return None
1743
1744 @doc(
1745 klass=_shared_doc_kwargs["klass"],
1746 storage_options=_shared_docs["storage_options"],
1747 examples=dedent(
1748 """Examples
1749 --------
1750 >>> s = pd.Series(["elk", "pig", "dog", "quetzal"], name="animal")
1751 >>> print(s.to_markdown())
1752 | | animal |
1753 |---:|:---------|
1754 | 0 | elk |
1755 | 1 | pig |
1756 | 2 | dog |
1757 | 3 | quetzal |
1758
1759 Output markdown with a tabulate option.
1760
1761 >>> print(s.to_markdown(tablefmt="grid"))
1762 +----+----------+
1763 | | animal |
1764 +====+==========+
1765 | 0 | elk |
1766 +----+----------+
1767 | 1 | pig |
1768 +----+----------+
1769 | 2 | dog |
1770 +----+----------+
1771 | 3 | quetzal |
1772 +----+----------+"""
1773 ),
1774 )
1775 def to_markdown(
1776 self,
1777 buf: IO[str] | None = None,
1778 mode: str = "wt",
1779 index: bool = True,
1780 storage_options: StorageOptions = None,
1781 **kwargs,
1782 ) -> str | None:
1783 """
1784 Print {klass} in Markdown-friendly format.
1785
1786 Parameters
1787 ----------
1788 buf : str, Path or StringIO-like, optional, default None
1789 Buffer to write to. If None, the output is returned as a string.
1790 mode : str, optional
1791 Mode in which file is opened, "wt" by default.
1792 index : bool, optional, default True
1793 Add index (row) labels.
1794
1795 .. versionadded:: 1.1.0
1796 {storage_options}
1797
1798 .. versionadded:: 1.2.0
1799
1800 **kwargs
1801 These parameters will be passed to `tabulate \
1802 <https://pypi.org/project/tabulate>`_.
1803
1804 Returns
1805 -------
1806 str
1807 {klass} in Markdown-friendly format.
1808
1809 Notes
1810 -----
1811 Requires the `tabulate <https://pypi.org/project/tabulate>`_ package.
1812
1813 {examples}
1814 """
1815 return self.to_frame().to_markdown(
1816 buf, mode, index, storage_options=storage_options, **kwargs
1817 )
1818
1819 # ----------------------------------------------------------------------
1820
1821 def items(self) -> Iterable[tuple[Hashable, Any]]:
1822 """
1823 Lazily iterate over (index, value) tuples.
1824
1825 This method returns an iterable tuple (index, value). This is
1826 convenient if you want to create a lazy iterator.
1827
1828 Returns
1829 -------
1830 iterable
1831 Iterable of tuples containing the (index, value) pairs from a
1832 Series.
1833
1834 See Also
1835 --------
1836 DataFrame.items : Iterate over (column name, Series) pairs.
1837 DataFrame.iterrows : Iterate over DataFrame rows as (index, Series) pairs.
1838
1839 Examples
1840 --------
1841 >>> s = pd.Series(['A', 'B', 'C'])
1842 >>> for index, value in s.items():
1843 ... print(f"Index : {index}, Value : {value}")
1844 Index : 0, Value : A
1845 Index : 1, Value : B
1846 Index : 2, Value : C
1847 """
1848 return zip(iter(self.index), iter(self))
1849
1850 # ----------------------------------------------------------------------
1851 # Misc public methods
1852
1853 def keys(self) -> Index:
1854 """
1855 Return alias for index.
1856
1857 Returns
1858 -------
1859 Index
1860 Index of the Series.
1861 """
1862 return self.index
1863
1864 def to_dict(self, into: type[dict] = dict) -> dict:
1865 """
1866 Convert Series to {label -> value} dict or dict-like object.
1867
1868 Parameters
1869 ----------
1870 into : class, default dict
1871 The collections.abc.Mapping subclass to use as the return
1872 object. Can be the actual class or an empty
1873 instance of the mapping type you want. If you want a
1874 collections.defaultdict, you must pass it initialized.
1875
1876 Returns
1877 -------
1878 collections.abc.Mapping
1879 Key-value representation of Series.
1880
1881 Examples
1882 --------
1883 >>> s = pd.Series([1, 2, 3, 4])
1884 >>> s.to_dict()
1885 {0: 1, 1: 2, 2: 3, 3: 4}
1886 >>> from collections import OrderedDict, defaultdict
1887 >>> s.to_dict(OrderedDict)
1888 OrderedDict([(0, 1), (1, 2), (2, 3), (3, 4)])
1889 >>> dd = defaultdict(list)
1890 >>> s.to_dict(dd)
1891 defaultdict(<class 'list'>, {0: 1, 1: 2, 2: 3, 3: 4})
1892 """
1893 # GH16122
1894 into_c = com.standardize_mapping(into)
1895
1896 if is_object_dtype(self) or is_extension_array_dtype(self):
1897 return into_c((k, maybe_box_native(v)) for k, v in self.items())
1898 else:
1899 # Not an object dtype => all types will be the same so let the default
1900 # indexer return native python type
1901 return into_c(self.items())
1902
1903 def to_frame(self, name: Hashable = lib.no_default) -> DataFrame:
1904 """
1905 Convert Series to DataFrame.
1906
1907 Parameters
1908 ----------
1909 name : object, optional
1910 The passed name should substitute for the series name (if it has
1911 one).
1912
1913 Returns
1914 -------
1915 DataFrame
1916 DataFrame representation of Series.
1917
1918 Examples
1919 --------
1920 >>> s = pd.Series(["a", "b", "c"],
1921 ... name="vals")
1922 >>> s.to_frame()
1923 vals
1924 0 a
1925 1 b
1926 2 c
1927 """
1928 columns: Index
1929 if name is lib.no_default:
1930 name = self.name
1931 if name is None:
1932 # default to [0], same as we would get with DataFrame(self)
1933 columns = default_index(1)
1934 else:
1935 columns = Index([name])
1936 else:
1937 columns = Index([name])
1938
1939 mgr = self._mgr.to_2d_mgr(columns)
1940 df = self._constructor_expanddim(mgr)
1941 return df.__finalize__(self, method="to_frame")
1942
1943 def _set_name(
1944 self, name, inplace: bool = False, deep: bool | None = None
1945 ) -> Series:
1946 """
1947 Set the Series name.
1948
1949 Parameters
1950 ----------
1951 name : str
1952 inplace : bool
1953 Whether to modify `self` directly or return a copy.
1954 deep : bool|None, default None
1955 Whether to do a deep copy, a shallow copy, or Copy on Write(None)
1956 """
1957 inplace = validate_bool_kwarg(inplace, "inplace")
1958 ser = self if inplace else self.copy(deep and not using_copy_on_write())
1959 ser.name = name
1960 return ser
1961
1962 @Appender(
1963 """
1964Examples
1965--------
1966>>> ser = pd.Series([390., 350., 30., 20.],
1967... index=['Falcon', 'Falcon', 'Parrot', 'Parrot'], name="Max Speed")
1968>>> ser
1969Falcon 390.0
1970Falcon 350.0
1971Parrot 30.0
1972Parrot 20.0
1973Name: Max Speed, dtype: float64
1974>>> ser.groupby(["a", "b", "a", "b"]).mean()
1975a 210.0
1976b 185.0
1977Name: Max Speed, dtype: float64
1978>>> ser.groupby(level=0).mean()
1979Falcon 370.0
1980Parrot 25.0
1981Name: Max Speed, dtype: float64
1982>>> ser.groupby(ser > 100).mean()
1983Max Speed
1984False 25.0
1985True 370.0
1986Name: Max Speed, dtype: float64
1987
1988**Grouping by Indexes**
1989
1990We can groupby different levels of a hierarchical index
1991using the `level` parameter:
1992
1993>>> arrays = [['Falcon', 'Falcon', 'Parrot', 'Parrot'],
1994... ['Captive', 'Wild', 'Captive', 'Wild']]
1995>>> index = pd.MultiIndex.from_arrays(arrays, names=('Animal', 'Type'))
1996>>> ser = pd.Series([390., 350., 30., 20.], index=index, name="Max Speed")
1997>>> ser
1998Animal Type
1999Falcon Captive 390.0
2000 Wild 350.0
2001Parrot Captive 30.0
2002 Wild 20.0
2003Name: Max Speed, dtype: float64
2004>>> ser.groupby(level=0).mean()
2005Animal
2006Falcon 370.0
2007Parrot 25.0
2008Name: Max Speed, dtype: float64
2009>>> ser.groupby(level="Type").mean()
2010Type
2011Captive 210.0
2012Wild 185.0
2013Name: Max Speed, dtype: float64
2014
2015We can also choose to include `NA` in group keys or not by defining
2016`dropna` parameter, the default setting is `True`.
2017
2018>>> ser = pd.Series([1, 2, 3, 3], index=["a", 'a', 'b', np.nan])
2019>>> ser.groupby(level=0).sum()
2020a 3
2021b 3
2022dtype: int64
2023
2024>>> ser.groupby(level=0, dropna=False).sum()
2025a 3
2026b 3
2027NaN 3
2028dtype: int64
2029
2030>>> arrays = ['Falcon', 'Falcon', 'Parrot', 'Parrot']
2031>>> ser = pd.Series([390., 350., 30., 20.], index=arrays, name="Max Speed")
2032>>> ser.groupby(["a", "b", "a", np.nan]).mean()
2033a 210.0
2034b 350.0
2035Name: Max Speed, dtype: float64
2036
2037>>> ser.groupby(["a", "b", "a", np.nan], dropna=False).mean()
2038a 210.0
2039b 350.0
2040NaN 20.0
2041Name: Max Speed, dtype: float64
2042"""
2043 )
2044 @Appender(_shared_docs["groupby"] % _shared_doc_kwargs)
2045 def groupby(
2046 self,
2047 by=None,
2048 axis: Axis = 0,
2049 level: IndexLabel = None,
2050 as_index: bool = True,
2051 sort: bool = True,
2052 group_keys: bool = True,
2053 observed: bool = False,
2054 dropna: bool = True,
2055 ) -> SeriesGroupBy:
2056 from pandas.core.groupby.generic import SeriesGroupBy
2057
2058 if level is None and by is None:
2059 raise TypeError("You have to supply one of 'by' and 'level'")
2060 if not as_index:
2061 raise TypeError("as_index=False only valid with DataFrame")
2062 axis = self._get_axis_number(axis)
2063
2064 return SeriesGroupBy(
2065 obj=self,
2066 keys=by,
2067 axis=axis,
2068 level=level,
2069 as_index=as_index,
2070 sort=sort,
2071 group_keys=group_keys,
2072 observed=observed,
2073 dropna=dropna,
2074 )
2075
2076 # ----------------------------------------------------------------------
2077 # Statistics, overridden ndarray methods
2078
2079 # TODO: integrate bottleneck
2080 def count(self):
2081 """
2082 Return number of non-NA/null observations in the Series.
2083
2084 Returns
2085 -------
2086 int or Series (if level specified)
2087 Number of non-null values in the Series.
2088
2089 See Also
2090 --------
2091 DataFrame.count : Count non-NA cells for each column or row.
2092
2093 Examples
2094 --------
2095 >>> s = pd.Series([0.0, 1.0, np.nan])
2096 >>> s.count()
2097 2
2098 """
2099 return notna(self._values).sum().astype("int64")
2100
2101 def mode(self, dropna: bool = True) -> Series:
2102 """
2103 Return the mode(s) of the Series.
2104
2105 The mode is the value that appears most often. There can be multiple modes.
2106
2107 Always returns Series even if only one value is returned.
2108
2109 Parameters
2110 ----------
2111 dropna : bool, default True
2112 Don't consider counts of NaN/NaT.
2113
2114 Returns
2115 -------
2116 Series
2117 Modes of the Series in sorted order.
2118 """
2119 # TODO: Add option for bins like value_counts()
2120 values = self._values
2121 if isinstance(values, np.ndarray):
2122 res_values = algorithms.mode(values, dropna=dropna)
2123 else:
2124 res_values = values._mode(dropna=dropna)
2125
2126 # Ensure index is type stable (should always use int index)
2127 return self._constructor(
2128 res_values, index=range(len(res_values)), name=self.name, copy=False
2129 )
2130
2131 def unique(self) -> ArrayLike: # pylint: disable=useless-parent-delegation
2132 """
2133 Return unique values of Series object.
2134
2135 Uniques are returned in order of appearance. Hash table-based unique,
2136 therefore does NOT sort.
2137
2138 Returns
2139 -------
2140 ndarray or ExtensionArray
2141 The unique values returned as a NumPy array. See Notes.
2142
2143 See Also
2144 --------
2145 Series.drop_duplicates : Return Series with duplicate values removed.
2146 unique : Top-level unique method for any 1-d array-like object.
2147 Index.unique : Return Index with unique values from an Index object.
2148
2149 Notes
2150 -----
2151 Returns the unique values as a NumPy array. In case of an
2152 extension-array backed Series, a new
2153 :class:`~api.extensions.ExtensionArray` of that type with just
2154 the unique values is returned. This includes
2155
2156 * Categorical
2157 * Period
2158 * Datetime with Timezone
2159 * Datetime without Timezone
2160 * Timedelta
2161 * Interval
2162 * Sparse
2163 * IntegerNA
2164
2165 See Examples section.
2166
2167 Examples
2168 --------
2169 >>> pd.Series([2, 1, 3, 3], name='A').unique()
2170 array([2, 1, 3])
2171
2172 >>> pd.Series([pd.Timestamp('2016-01-01') for _ in range(3)]).unique()
2173 <DatetimeArray>
2174 ['2016-01-01 00:00:00']
2175 Length: 1, dtype: datetime64[ns]
2176
2177 >>> pd.Series([pd.Timestamp('2016-01-01', tz='US/Eastern')
2178 ... for _ in range(3)]).unique()
2179 <DatetimeArray>
2180 ['2016-01-01 00:00:00-05:00']
2181 Length: 1, dtype: datetime64[ns, US/Eastern]
2182
2183 An Categorical will return categories in the order of
2184 appearance and with the same dtype.
2185
2186 >>> pd.Series(pd.Categorical(list('baabc'))).unique()
2187 ['b', 'a', 'c']
2188 Categories (3, object): ['a', 'b', 'c']
2189 >>> pd.Series(pd.Categorical(list('baabc'), categories=list('abc'),
2190 ... ordered=True)).unique()
2191 ['b', 'a', 'c']
2192 Categories (3, object): ['a' < 'b' < 'c']
2193 """
2194 return super().unique()
2195
2196 @overload
2197 def drop_duplicates(
2198 self,
2199 *,
2200 keep: DropKeep = ...,
2201 inplace: Literal[False] = ...,
2202 ignore_index: bool = ...,
2203 ) -> Series:
2204 ...
2205
2206 @overload
2207 def drop_duplicates(
2208 self, *, keep: DropKeep = ..., inplace: Literal[True], ignore_index: bool = ...
2209 ) -> None:
2210 ...
2211
2212 @overload
2213 def drop_duplicates(
2214 self, *, keep: DropKeep = ..., inplace: bool = ..., ignore_index: bool = ...
2215 ) -> Series | None:
2216 ...
2217
2218 def drop_duplicates(
2219 self,
2220 *,
2221 keep: DropKeep = "first",
2222 inplace: bool = False,
2223 ignore_index: bool = False,
2224 ) -> Series | None:
2225 """
2226 Return Series with duplicate values removed.
2227
2228 Parameters
2229 ----------
2230 keep : {'first', 'last', ``False``}, default 'first'
2231 Method to handle dropping duplicates:
2232
2233 - 'first' : Drop duplicates except for the first occurrence.
2234 - 'last' : Drop duplicates except for the last occurrence.
2235 - ``False`` : Drop all duplicates.
2236
2237 inplace : bool, default ``False``
2238 If ``True``, performs operation inplace and returns None.
2239
2240 ignore_index : bool, default ``False``
2241 If ``True``, the resulting axis will be labeled 0, 1, …, n - 1.
2242
2243 .. versionadded:: 2.0.0
2244
2245 Returns
2246 -------
2247 Series or None
2248 Series with duplicates dropped or None if ``inplace=True``.
2249
2250 See Also
2251 --------
2252 Index.drop_duplicates : Equivalent method on Index.
2253 DataFrame.drop_duplicates : Equivalent method on DataFrame.
2254 Series.duplicated : Related method on Series, indicating duplicate
2255 Series values.
2256 Series.unique : Return unique values as an array.
2257
2258 Examples
2259 --------
2260 Generate a Series with duplicated entries.
2261
2262 >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'],
2263 ... name='animal')
2264 >>> s
2265 0 lama
2266 1 cow
2267 2 lama
2268 3 beetle
2269 4 lama
2270 5 hippo
2271 Name: animal, dtype: object
2272
2273 With the 'keep' parameter, the selection behaviour of duplicated values
2274 can be changed. The value 'first' keeps the first occurrence for each
2275 set of duplicated entries. The default value of keep is 'first'.
2276
2277 >>> s.drop_duplicates()
2278 0 lama
2279 1 cow
2280 3 beetle
2281 5 hippo
2282 Name: animal, dtype: object
2283
2284 The value 'last' for parameter 'keep' keeps the last occurrence for
2285 each set of duplicated entries.
2286
2287 >>> s.drop_duplicates(keep='last')
2288 1 cow
2289 3 beetle
2290 4 lama
2291 5 hippo
2292 Name: animal, dtype: object
2293
2294 The value ``False`` for parameter 'keep' discards all sets of
2295 duplicated entries.
2296
2297 >>> s.drop_duplicates(keep=False)
2298 1 cow
2299 3 beetle
2300 5 hippo
2301 Name: animal, dtype: object
2302 """
2303 inplace = validate_bool_kwarg(inplace, "inplace")
2304 result = super().drop_duplicates(keep=keep)
2305
2306 if ignore_index:
2307 result.index = default_index(len(result))
2308
2309 if inplace:
2310 self._update_inplace(result)
2311 return None
2312 else:
2313 return result
2314
2315 def duplicated(self, keep: DropKeep = "first") -> Series:
2316 """
2317 Indicate duplicate Series values.
2318
2319 Duplicated values are indicated as ``True`` values in the resulting
2320 Series. Either all duplicates, all except the first or all except the
2321 last occurrence of duplicates can be indicated.
2322
2323 Parameters
2324 ----------
2325 keep : {'first', 'last', False}, default 'first'
2326 Method to handle dropping duplicates:
2327
2328 - 'first' : Mark duplicates as ``True`` except for the first
2329 occurrence.
2330 - 'last' : Mark duplicates as ``True`` except for the last
2331 occurrence.
2332 - ``False`` : Mark all duplicates as ``True``.
2333
2334 Returns
2335 -------
2336 Series[bool]
2337 Series indicating whether each value has occurred in the
2338 preceding values.
2339
2340 See Also
2341 --------
2342 Index.duplicated : Equivalent method on pandas.Index.
2343 DataFrame.duplicated : Equivalent method on pandas.DataFrame.
2344 Series.drop_duplicates : Remove duplicate values from Series.
2345
2346 Examples
2347 --------
2348 By default, for each set of duplicated values, the first occurrence is
2349 set on False and all others on True:
2350
2351 >>> animals = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama'])
2352 >>> animals.duplicated()
2353 0 False
2354 1 False
2355 2 True
2356 3 False
2357 4 True
2358 dtype: bool
2359
2360 which is equivalent to
2361
2362 >>> animals.duplicated(keep='first')
2363 0 False
2364 1 False
2365 2 True
2366 3 False
2367 4 True
2368 dtype: bool
2369
2370 By using 'last', the last occurrence of each set of duplicated values
2371 is set on False and all others on True:
2372
2373 >>> animals.duplicated(keep='last')
2374 0 True
2375 1 False
2376 2 True
2377 3 False
2378 4 False
2379 dtype: bool
2380
2381 By setting keep on ``False``, all duplicates are True:
2382
2383 >>> animals.duplicated(keep=False)
2384 0 True
2385 1 False
2386 2 True
2387 3 False
2388 4 True
2389 dtype: bool
2390 """
2391 res = self._duplicated(keep=keep)
2392 result = self._constructor(res, index=self.index, copy=False)
2393 return result.__finalize__(self, method="duplicated")
2394
2395 def idxmin(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable:
2396 """
2397 Return the row label of the minimum value.
2398
2399 If multiple values equal the minimum, the first row label with that
2400 value is returned.
2401
2402 Parameters
2403 ----------
2404 axis : {0 or 'index'}
2405 Unused. Parameter needed for compatibility with DataFrame.
2406 skipna : bool, default True
2407 Exclude NA/null values. If the entire Series is NA, the result
2408 will be NA.
2409 *args, **kwargs
2410 Additional arguments and keywords have no effect but might be
2411 accepted for compatibility with NumPy.
2412
2413 Returns
2414 -------
2415 Index
2416 Label of the minimum value.
2417
2418 Raises
2419 ------
2420 ValueError
2421 If the Series is empty.
2422
2423 See Also
2424 --------
2425 numpy.argmin : Return indices of the minimum values
2426 along the given axis.
2427 DataFrame.idxmin : Return index of first occurrence of minimum
2428 over requested axis.
2429 Series.idxmax : Return index *label* of the first occurrence
2430 of maximum of values.
2431
2432 Notes
2433 -----
2434 This method is the Series version of ``ndarray.argmin``. This method
2435 returns the label of the minimum, while ``ndarray.argmin`` returns
2436 the position. To get the position, use ``series.values.argmin()``.
2437
2438 Examples
2439 --------
2440 >>> s = pd.Series(data=[1, None, 4, 1],
2441 ... index=['A', 'B', 'C', 'D'])
2442 >>> s
2443 A 1.0
2444 B NaN
2445 C 4.0
2446 D 1.0
2447 dtype: float64
2448
2449 >>> s.idxmin()
2450 'A'
2451
2452 If `skipna` is False and there is an NA value in the data,
2453 the function returns ``nan``.
2454
2455 >>> s.idxmin(skipna=False)
2456 nan
2457 """
2458 # error: Argument 1 to "argmin" of "IndexOpsMixin" has incompatible type "Union
2459 # [int, Literal['index', 'columns']]"; expected "Optional[int]"
2460 i = self.argmin(axis, skipna, *args, **kwargs) # type: ignore[arg-type]
2461 if i == -1:
2462 return np.nan
2463 return self.index[i]
2464
2465 def idxmax(self, axis: Axis = 0, skipna: bool = True, *args, **kwargs) -> Hashable:
2466 """
2467 Return the row label of the maximum value.
2468
2469 If multiple values equal the maximum, the first row label with that
2470 value is returned.
2471
2472 Parameters
2473 ----------
2474 axis : {0 or 'index'}
2475 Unused. Parameter needed for compatibility with DataFrame.
2476 skipna : bool, default True
2477 Exclude NA/null values. If the entire Series is NA, the result
2478 will be NA.
2479 *args, **kwargs
2480 Additional arguments and keywords have no effect but might be
2481 accepted for compatibility with NumPy.
2482
2483 Returns
2484 -------
2485 Index
2486 Label of the maximum value.
2487
2488 Raises
2489 ------
2490 ValueError
2491 If the Series is empty.
2492
2493 See Also
2494 --------
2495 numpy.argmax : Return indices of the maximum values
2496 along the given axis.
2497 DataFrame.idxmax : Return index of first occurrence of maximum
2498 over requested axis.
2499 Series.idxmin : Return index *label* of the first occurrence
2500 of minimum of values.
2501
2502 Notes
2503 -----
2504 This method is the Series version of ``ndarray.argmax``. This method
2505 returns the label of the maximum, while ``ndarray.argmax`` returns
2506 the position. To get the position, use ``series.values.argmax()``.
2507
2508 Examples
2509 --------
2510 >>> s = pd.Series(data=[1, None, 4, 3, 4],
2511 ... index=['A', 'B', 'C', 'D', 'E'])
2512 >>> s
2513 A 1.0
2514 B NaN
2515 C 4.0
2516 D 3.0
2517 E 4.0
2518 dtype: float64
2519
2520 >>> s.idxmax()
2521 'C'
2522
2523 If `skipna` is False and there is an NA value in the data,
2524 the function returns ``nan``.
2525
2526 >>> s.idxmax(skipna=False)
2527 nan
2528 """
2529 # error: Argument 1 to "argmax" of "IndexOpsMixin" has incompatible type
2530 # "Union[int, Literal['index', 'columns']]"; expected "Optional[int]"
2531 i = self.argmax(axis, skipna, *args, **kwargs) # type: ignore[arg-type]
2532 if i == -1:
2533 return np.nan
2534 return self.index[i]
2535
2536 def round(self, decimals: int = 0, *args, **kwargs) -> Series:
2537 """
2538 Round each value in a Series to the given number of decimals.
2539
2540 Parameters
2541 ----------
2542 decimals : int, default 0
2543 Number of decimal places to round to. If decimals is negative,
2544 it specifies the number of positions to the left of the decimal point.
2545 *args, **kwargs
2546 Additional arguments and keywords have no effect but might be
2547 accepted for compatibility with NumPy.
2548
2549 Returns
2550 -------
2551 Series
2552 Rounded values of the Series.
2553
2554 See Also
2555 --------
2556 numpy.around : Round values of an np.array.
2557 DataFrame.round : Round values of a DataFrame.
2558
2559 Examples
2560 --------
2561 >>> s = pd.Series([0.1, 1.3, 2.7])
2562 >>> s.round()
2563 0 0.0
2564 1 1.0
2565 2 3.0
2566 dtype: float64
2567 """
2568 nv.validate_round(args, kwargs)
2569 result = self._values.round(decimals)
2570 result = self._constructor(result, index=self.index, copy=False).__finalize__(
2571 self, method="round"
2572 )
2573
2574 return result
2575
2576 @overload
2577 def quantile(
2578 self, q: float = ..., interpolation: QuantileInterpolation = ...
2579 ) -> float:
2580 ...
2581
2582 @overload
2583 def quantile(
2584 self,
2585 q: Sequence[float] | AnyArrayLike,
2586 interpolation: QuantileInterpolation = ...,
2587 ) -> Series:
2588 ...
2589
2590 @overload
2591 def quantile(
2592 self,
2593 q: float | Sequence[float] | AnyArrayLike = ...,
2594 interpolation: QuantileInterpolation = ...,
2595 ) -> float | Series:
2596 ...
2597
2598 def quantile(
2599 self,
2600 q: float | Sequence[float] | AnyArrayLike = 0.5,
2601 interpolation: QuantileInterpolation = "linear",
2602 ) -> float | Series:
2603 """
2604 Return value at the given quantile.
2605
2606 Parameters
2607 ----------
2608 q : float or array-like, default 0.5 (50% quantile)
2609 The quantile(s) to compute, which can lie in range: 0 <= q <= 1.
2610 interpolation : {'linear', 'lower', 'higher', 'midpoint', 'nearest'}
2611 This optional parameter specifies the interpolation method to use,
2612 when the desired quantile lies between two data points `i` and `j`:
2613
2614 * linear: `i + (j - i) * fraction`, where `fraction` is the
2615 fractional part of the index surrounded by `i` and `j`.
2616 * lower: `i`.
2617 * higher: `j`.
2618 * nearest: `i` or `j` whichever is nearest.
2619 * midpoint: (`i` + `j`) / 2.
2620
2621 Returns
2622 -------
2623 float or Series
2624 If ``q`` is an array, a Series will be returned where the
2625 index is ``q`` and the values are the quantiles, otherwise
2626 a float will be returned.
2627
2628 See Also
2629 --------
2630 core.window.Rolling.quantile : Calculate the rolling quantile.
2631 numpy.percentile : Returns the q-th percentile(s) of the array elements.
2632
2633 Examples
2634 --------
2635 >>> s = pd.Series([1, 2, 3, 4])
2636 >>> s.quantile(.5)
2637 2.5
2638 >>> s.quantile([.25, .5, .75])
2639 0.25 1.75
2640 0.50 2.50
2641 0.75 3.25
2642 dtype: float64
2643 """
2644 validate_percentile(q)
2645
2646 # We dispatch to DataFrame so that core.internals only has to worry
2647 # about 2D cases.
2648 df = self.to_frame()
2649
2650 result = df.quantile(q=q, interpolation=interpolation, numeric_only=False)
2651 if result.ndim == 2:
2652 result = result.iloc[:, 0]
2653
2654 if is_list_like(q):
2655 result.name = self.name
2656 idx = Index(q, dtype=np.float64)
2657 return self._constructor(result, index=idx, name=self.name)
2658 else:
2659 # scalar
2660 return result.iloc[0]
2661
2662 def corr(
2663 self,
2664 other: Series,
2665 method: CorrelationMethod = "pearson",
2666 min_periods: int | None = None,
2667 ) -> float:
2668 """
2669 Compute correlation with `other` Series, excluding missing values.
2670
2671 The two `Series` objects are not required to be the same length and will be
2672 aligned internally before the correlation function is applied.
2673
2674 Parameters
2675 ----------
2676 other : Series
2677 Series with which to compute the correlation.
2678 method : {'pearson', 'kendall', 'spearman'} or callable
2679 Method used to compute correlation:
2680
2681 - pearson : Standard correlation coefficient
2682 - kendall : Kendall Tau correlation coefficient
2683 - spearman : Spearman rank correlation
2684 - callable: Callable with input two 1d ndarrays and returning a float.
2685
2686 .. warning::
2687 Note that the returned matrix from corr will have 1 along the
2688 diagonals and will be symmetric regardless of the callable's
2689 behavior.
2690 min_periods : int, optional
2691 Minimum number of observations needed to have a valid result.
2692
2693 Returns
2694 -------
2695 float
2696 Correlation with other.
2697
2698 See Also
2699 --------
2700 DataFrame.corr : Compute pairwise correlation between columns.
2701 DataFrame.corrwith : Compute pairwise correlation with another
2702 DataFrame or Series.
2703
2704 Notes
2705 -----
2706 Pearson, Kendall and Spearman correlation are currently computed using pairwise complete observations.
2707
2708 * `Pearson correlation coefficient <https://en.wikipedia.org/wiki/Pearson_correlation_coefficient>`_
2709 * `Kendall rank correlation coefficient <https://en.wikipedia.org/wiki/Kendall_rank_correlation_coefficient>`_
2710 * `Spearman's rank correlation coefficient <https://en.wikipedia.org/wiki/Spearman%27s_rank_correlation_coefficient>`_
2711
2712 Examples
2713 --------
2714 >>> def histogram_intersection(a, b):
2715 ... v = np.minimum(a, b).sum().round(decimals=1)
2716 ... return v
2717 >>> s1 = pd.Series([.2, .0, .6, .2])
2718 >>> s2 = pd.Series([.3, .6, .0, .1])
2719 >>> s1.corr(s2, method=histogram_intersection)
2720 0.3
2721 """ # noqa:E501
2722 this, other = self.align(other, join="inner", copy=False)
2723 if len(this) == 0:
2724 return np.nan
2725
2726 if method in ["pearson", "spearman", "kendall"] or callable(method):
2727 return nanops.nancorr(
2728 this.values, other.values, method=method, min_periods=min_periods
2729 )
2730
2731 raise ValueError(
2732 "method must be either 'pearson', "
2733 "'spearman', 'kendall', or a callable, "
2734 f"'{method}' was supplied"
2735 )
2736
2737 def cov(
2738 self,
2739 other: Series,
2740 min_periods: int | None = None,
2741 ddof: int | None = 1,
2742 ) -> float:
2743 """
2744 Compute covariance with Series, excluding missing values.
2745
2746 The two `Series` objects are not required to be the same length and
2747 will be aligned internally before the covariance is calculated.
2748
2749 Parameters
2750 ----------
2751 other : Series
2752 Series with which to compute the covariance.
2753 min_periods : int, optional
2754 Minimum number of observations needed to have a valid result.
2755 ddof : int, default 1
2756 Delta degrees of freedom. The divisor used in calculations
2757 is ``N - ddof``, where ``N`` represents the number of elements.
2758
2759 .. versionadded:: 1.1.0
2760
2761 Returns
2762 -------
2763 float
2764 Covariance between Series and other normalized by N-1
2765 (unbiased estimator).
2766
2767 See Also
2768 --------
2769 DataFrame.cov : Compute pairwise covariance of columns.
2770
2771 Examples
2772 --------
2773 >>> s1 = pd.Series([0.90010907, 0.13484424, 0.62036035])
2774 >>> s2 = pd.Series([0.12528585, 0.26962463, 0.51111198])
2775 >>> s1.cov(s2)
2776 -0.01685762652715874
2777 """
2778 this, other = self.align(other, join="inner", copy=False)
2779 if len(this) == 0:
2780 return np.nan
2781 return nanops.nancov(
2782 this.values, other.values, min_periods=min_periods, ddof=ddof
2783 )
2784
2785 @doc(
2786 klass="Series",
2787 extra_params="",
2788 other_klass="DataFrame",
2789 examples=dedent(
2790 """
2791 Difference with previous row
2792
2793 >>> s = pd.Series([1, 1, 2, 3, 5, 8])
2794 >>> s.diff()
2795 0 NaN
2796 1 0.0
2797 2 1.0
2798 3 1.0
2799 4 2.0
2800 5 3.0
2801 dtype: float64
2802
2803 Difference with 3rd previous row
2804
2805 >>> s.diff(periods=3)
2806 0 NaN
2807 1 NaN
2808 2 NaN
2809 3 2.0
2810 4 4.0
2811 5 6.0
2812 dtype: float64
2813
2814 Difference with following row
2815
2816 >>> s.diff(periods=-1)
2817 0 0.0
2818 1 -1.0
2819 2 -1.0
2820 3 -2.0
2821 4 -3.0
2822 5 NaN
2823 dtype: float64
2824
2825 Overflow in input dtype
2826
2827 >>> s = pd.Series([1, 0], dtype=np.uint8)
2828 >>> s.diff()
2829 0 NaN
2830 1 255.0
2831 dtype: float64"""
2832 ),
2833 )
2834 def diff(self, periods: int = 1) -> Series:
2835 """
2836 First discrete difference of element.
2837
2838 Calculates the difference of a {klass} element compared with another
2839 element in the {klass} (default is element in previous row).
2840
2841 Parameters
2842 ----------
2843 periods : int, default 1
2844 Periods to shift for calculating difference, accepts negative
2845 values.
2846 {extra_params}
2847 Returns
2848 -------
2849 {klass}
2850 First differences of the Series.
2851
2852 See Also
2853 --------
2854 {klass}.pct_change: Percent change over given number of periods.
2855 {klass}.shift: Shift index by desired number of periods with an
2856 optional time freq.
2857 {other_klass}.diff: First discrete difference of object.
2858
2859 Notes
2860 -----
2861 For boolean dtypes, this uses :meth:`operator.xor` rather than
2862 :meth:`operator.sub`.
2863 The result is calculated according to current dtype in {klass},
2864 however dtype of the result is always float64.
2865
2866 Examples
2867 --------
2868 {examples}
2869 """
2870 result = algorithms.diff(self._values, periods)
2871 return self._constructor(result, index=self.index, copy=False).__finalize__(
2872 self, method="diff"
2873 )
2874
2875 def autocorr(self, lag: int = 1) -> float:
2876 """
2877 Compute the lag-N autocorrelation.
2878
2879 This method computes the Pearson correlation between
2880 the Series and its shifted self.
2881
2882 Parameters
2883 ----------
2884 lag : int, default 1
2885 Number of lags to apply before performing autocorrelation.
2886
2887 Returns
2888 -------
2889 float
2890 The Pearson correlation between self and self.shift(lag).
2891
2892 See Also
2893 --------
2894 Series.corr : Compute the correlation between two Series.
2895 Series.shift : Shift index by desired number of periods.
2896 DataFrame.corr : Compute pairwise correlation of columns.
2897 DataFrame.corrwith : Compute pairwise correlation between rows or
2898 columns of two DataFrame objects.
2899
2900 Notes
2901 -----
2902 If the Pearson correlation is not well defined return 'NaN'.
2903
2904 Examples
2905 --------
2906 >>> s = pd.Series([0.25, 0.5, 0.2, -0.05])
2907 >>> s.autocorr() # doctest: +ELLIPSIS
2908 0.10355...
2909 >>> s.autocorr(lag=2) # doctest: +ELLIPSIS
2910 -0.99999...
2911
2912 If the Pearson correlation is not well defined, then 'NaN' is returned.
2913
2914 >>> s = pd.Series([1, 0, 0, 0])
2915 >>> s.autocorr()
2916 nan
2917 """
2918 return self.corr(self.shift(lag))
2919
2920 def dot(self, other: AnyArrayLike) -> Series | np.ndarray:
2921 """
2922 Compute the dot product between the Series and the columns of other.
2923
2924 This method computes the dot product between the Series and another
2925 one, or the Series and each columns of a DataFrame, or the Series and
2926 each columns of an array.
2927
2928 It can also be called using `self @ other` in Python >= 3.5.
2929
2930 Parameters
2931 ----------
2932 other : Series, DataFrame or array-like
2933 The other object to compute the dot product with its columns.
2934
2935 Returns
2936 -------
2937 scalar, Series or numpy.ndarray
2938 Return the dot product of the Series and other if other is a
2939 Series, the Series of the dot product of Series and each rows of
2940 other if other is a DataFrame or a numpy.ndarray between the Series
2941 and each columns of the numpy array.
2942
2943 See Also
2944 --------
2945 DataFrame.dot: Compute the matrix product with the DataFrame.
2946 Series.mul: Multiplication of series and other, element-wise.
2947
2948 Notes
2949 -----
2950 The Series and other has to share the same index if other is a Series
2951 or a DataFrame.
2952
2953 Examples
2954 --------
2955 >>> s = pd.Series([0, 1, 2, 3])
2956 >>> other = pd.Series([-1, 2, -3, 4])
2957 >>> s.dot(other)
2958 8
2959 >>> s @ other
2960 8
2961 >>> df = pd.DataFrame([[0, 1], [-2, 3], [4, -5], [6, 7]])
2962 >>> s.dot(df)
2963 0 24
2964 1 14
2965 dtype: int64
2966 >>> arr = np.array([[0, 1], [-2, 3], [4, -5], [6, 7]])
2967 >>> s.dot(arr)
2968 array([24, 14])
2969 """
2970 if isinstance(other, (Series, ABCDataFrame)):
2971 common = self.index.union(other.index)
2972 if len(common) > len(self.index) or len(common) > len(other.index):
2973 raise ValueError("matrices are not aligned")
2974
2975 left = self.reindex(index=common, copy=False)
2976 right = other.reindex(index=common, copy=False)
2977 lvals = left.values
2978 rvals = right.values
2979 else:
2980 lvals = self.values
2981 rvals = np.asarray(other)
2982 if lvals.shape[0] != rvals.shape[0]:
2983 raise Exception(
2984 f"Dot product shape mismatch, {lvals.shape} vs {rvals.shape}"
2985 )
2986
2987 if isinstance(other, ABCDataFrame):
2988 return self._constructor(
2989 np.dot(lvals, rvals), index=other.columns, copy=False
2990 ).__finalize__(self, method="dot")
2991 elif isinstance(other, Series):
2992 return np.dot(lvals, rvals)
2993 elif isinstance(rvals, np.ndarray):
2994 return np.dot(lvals, rvals)
2995 else: # pragma: no cover
2996 raise TypeError(f"unsupported type: {type(other)}")
2997
2998 def __matmul__(self, other):
2999 """
3000 Matrix multiplication using binary `@` operator in Python>=3.5.
3001 """
3002 return self.dot(other)
3003
3004 def __rmatmul__(self, other):
3005 """
3006 Matrix multiplication using binary `@` operator in Python>=3.5.
3007 """
3008 return self.dot(np.transpose(other))
3009
3010 @doc(base.IndexOpsMixin.searchsorted, klass="Series")
3011 # Signature of "searchsorted" incompatible with supertype "IndexOpsMixin"
3012 def searchsorted( # type: ignore[override]
3013 self,
3014 value: NumpyValueArrayLike | ExtensionArray,
3015 side: Literal["left", "right"] = "left",
3016 sorter: NumpySorter = None,
3017 ) -> npt.NDArray[np.intp] | np.intp:
3018 return base.IndexOpsMixin.searchsorted(self, value, side=side, sorter=sorter)
3019
3020 # -------------------------------------------------------------------
3021 # Combination
3022
3023 def _append(
3024 self, to_append, ignore_index: bool = False, verify_integrity: bool = False
3025 ):
3026 from pandas.core.reshape.concat import concat
3027
3028 if isinstance(to_append, (list, tuple)):
3029 to_concat = [self]
3030 to_concat.extend(to_append)
3031 else:
3032 to_concat = [self, to_append]
3033 if any(isinstance(x, (ABCDataFrame,)) for x in to_concat[1:]):
3034 msg = "to_append should be a Series or list/tuple of Series, got DataFrame"
3035 raise TypeError(msg)
3036 return concat(
3037 to_concat, ignore_index=ignore_index, verify_integrity=verify_integrity
3038 )
3039
3040 def _binop(self, other: Series, func, level=None, fill_value=None):
3041 """
3042 Perform generic binary operation with optional fill value.
3043
3044 Parameters
3045 ----------
3046 other : Series
3047 func : binary operator
3048 fill_value : float or object
3049 Value to substitute for NA/null values. If both Series are NA in a
3050 location, the result will be NA regardless of the passed fill value.
3051 level : int or level name, default None
3052 Broadcast across a level, matching Index values on the
3053 passed MultiIndex level.
3054
3055 Returns
3056 -------
3057 Series
3058 """
3059 if not isinstance(other, Series):
3060 raise AssertionError("Other operand must be Series")
3061
3062 this = self
3063
3064 if not self.index.equals(other.index):
3065 this, other = self.align(other, level=level, join="outer", copy=False)
3066
3067 this_vals, other_vals = ops.fill_binop(this._values, other._values, fill_value)
3068
3069 with np.errstate(all="ignore"):
3070 result = func(this_vals, other_vals)
3071
3072 name = ops.get_op_result_name(self, other)
3073 return this._construct_result(result, name)
3074
3075 def _construct_result(
3076 self, result: ArrayLike | tuple[ArrayLike, ArrayLike], name: Hashable
3077 ) -> Series | tuple[Series, Series]:
3078 """
3079 Construct an appropriately-labelled Series from the result of an op.
3080
3081 Parameters
3082 ----------
3083 result : ndarray or ExtensionArray
3084 name : Label
3085
3086 Returns
3087 -------
3088 Series
3089 In the case of __divmod__ or __rdivmod__, a 2-tuple of Series.
3090 """
3091 if isinstance(result, tuple):
3092 # produced by divmod or rdivmod
3093
3094 res1 = self._construct_result(result[0], name=name)
3095 res2 = self._construct_result(result[1], name=name)
3096
3097 # GH#33427 assertions to keep mypy happy
3098 assert isinstance(res1, Series)
3099 assert isinstance(res2, Series)
3100 return (res1, res2)
3101
3102 # TODO: result should always be ArrayLike, but this fails for some
3103 # JSONArray tests
3104 dtype = getattr(result, "dtype", None)
3105 out = self._constructor(result, index=self.index, dtype=dtype)
3106 out = out.__finalize__(self)
3107
3108 # Set the result's name after __finalize__ is called because __finalize__
3109 # would set it back to self.name
3110 out.name = name
3111 return out
3112
3113 @doc(
3114 _shared_docs["compare"],
3115 """
3116Returns
3117-------
3118Series or DataFrame
3119 If axis is 0 or 'index' the result will be a Series.
3120 The resulting index will be a MultiIndex with 'self' and 'other'
3121 stacked alternately at the inner level.
3122
3123 If axis is 1 or 'columns' the result will be a DataFrame.
3124 It will have two columns namely 'self' and 'other'.
3125
3126See Also
3127--------
3128DataFrame.compare : Compare with another DataFrame and show differences.
3129
3130Notes
3131-----
3132Matching NaNs will not appear as a difference.
3133
3134Examples
3135--------
3136>>> s1 = pd.Series(["a", "b", "c", "d", "e"])
3137>>> s2 = pd.Series(["a", "a", "c", "b", "e"])
3138
3139Align the differences on columns
3140
3141>>> s1.compare(s2)
3142 self other
31431 b a
31443 d b
3145
3146Stack the differences on indices
3147
3148>>> s1.compare(s2, align_axis=0)
31491 self b
3150 other a
31513 self d
3152 other b
3153dtype: object
3154
3155Keep all original rows
3156
3157>>> s1.compare(s2, keep_shape=True)
3158 self other
31590 NaN NaN
31601 b a
31612 NaN NaN
31623 d b
31634 NaN NaN
3164
3165Keep all original rows and also all original values
3166
3167>>> s1.compare(s2, keep_shape=True, keep_equal=True)
3168 self other
31690 a a
31701 b a
31712 c c
31723 d b
31734 e e
3174""",
3175 klass=_shared_doc_kwargs["klass"],
3176 )
3177 def compare(
3178 self,
3179 other: Series,
3180 align_axis: Axis = 1,
3181 keep_shape: bool = False,
3182 keep_equal: bool = False,
3183 result_names: Suffixes = ("self", "other"),
3184 ) -> DataFrame | Series:
3185 return super().compare(
3186 other=other,
3187 align_axis=align_axis,
3188 keep_shape=keep_shape,
3189 keep_equal=keep_equal,
3190 result_names=result_names,
3191 )
3192
3193 def combine(
3194 self,
3195 other: Series | Hashable,
3196 func: Callable[[Hashable, Hashable], Hashable],
3197 fill_value: Hashable = None,
3198 ) -> Series:
3199 """
3200 Combine the Series with a Series or scalar according to `func`.
3201
3202 Combine the Series and `other` using `func` to perform elementwise
3203 selection for combined Series.
3204 `fill_value` is assumed when value is missing at some index
3205 from one of the two objects being combined.
3206
3207 Parameters
3208 ----------
3209 other : Series or scalar
3210 The value(s) to be combined with the `Series`.
3211 func : function
3212 Function that takes two scalars as inputs and returns an element.
3213 fill_value : scalar, optional
3214 The value to assume when an index is missing from
3215 one Series or the other. The default specifies to use the
3216 appropriate NaN value for the underlying dtype of the Series.
3217
3218 Returns
3219 -------
3220 Series
3221 The result of combining the Series with the other object.
3222
3223 See Also
3224 --------
3225 Series.combine_first : Combine Series values, choosing the calling
3226 Series' values first.
3227
3228 Examples
3229 --------
3230 Consider 2 Datasets ``s1`` and ``s2`` containing
3231 highest clocked speeds of different birds.
3232
3233 >>> s1 = pd.Series({'falcon': 330.0, 'eagle': 160.0})
3234 >>> s1
3235 falcon 330.0
3236 eagle 160.0
3237 dtype: float64
3238 >>> s2 = pd.Series({'falcon': 345.0, 'eagle': 200.0, 'duck': 30.0})
3239 >>> s2
3240 falcon 345.0
3241 eagle 200.0
3242 duck 30.0
3243 dtype: float64
3244
3245 Now, to combine the two datasets and view the highest speeds
3246 of the birds across the two datasets
3247
3248 >>> s1.combine(s2, max)
3249 duck NaN
3250 eagle 200.0
3251 falcon 345.0
3252 dtype: float64
3253
3254 In the previous example, the resulting value for duck is missing,
3255 because the maximum of a NaN and a float is a NaN.
3256 So, in the example, we set ``fill_value=0``,
3257 so the maximum value returned will be the value from some dataset.
3258
3259 >>> s1.combine(s2, max, fill_value=0)
3260 duck 30.0
3261 eagle 200.0
3262 falcon 345.0
3263 dtype: float64
3264 """
3265 if fill_value is None:
3266 fill_value = na_value_for_dtype(self.dtype, compat=False)
3267
3268 if isinstance(other, Series):
3269 # If other is a Series, result is based on union of Series,
3270 # so do this element by element
3271 new_index = self.index.union(other.index)
3272 new_name = ops.get_op_result_name(self, other)
3273 new_values = np.empty(len(new_index), dtype=object)
3274 for i, idx in enumerate(new_index):
3275 lv = self.get(idx, fill_value)
3276 rv = other.get(idx, fill_value)
3277 with np.errstate(all="ignore"):
3278 new_values[i] = func(lv, rv)
3279 else:
3280 # Assume that other is a scalar, so apply the function for
3281 # each element in the Series
3282 new_index = self.index
3283 new_values = np.empty(len(new_index), dtype=object)
3284 with np.errstate(all="ignore"):
3285 new_values[:] = [func(lv, other) for lv in self._values]
3286 new_name = self.name
3287
3288 # try_float=False is to match agg_series
3289 npvalues = lib.maybe_convert_objects(new_values, try_float=False)
3290 res_values = maybe_cast_pointwise_result(npvalues, self.dtype, same_dtype=False)
3291 return self._constructor(res_values, index=new_index, name=new_name, copy=False)
3292
3293 def combine_first(self, other) -> Series:
3294 """
3295 Update null elements with value in the same location in 'other'.
3296
3297 Combine two Series objects by filling null values in one Series with
3298 non-null values from the other Series. Result index will be the union
3299 of the two indexes.
3300
3301 Parameters
3302 ----------
3303 other : Series
3304 The value(s) to be used for filling null values.
3305
3306 Returns
3307 -------
3308 Series
3309 The result of combining the provided Series with the other object.
3310
3311 See Also
3312 --------
3313 Series.combine : Perform element-wise operation on two Series
3314 using a given function.
3315
3316 Examples
3317 --------
3318 >>> s1 = pd.Series([1, np.nan])
3319 >>> s2 = pd.Series([3, 4, 5])
3320 >>> s1.combine_first(s2)
3321 0 1.0
3322 1 4.0
3323 2 5.0
3324 dtype: float64
3325
3326 Null values still persist if the location of that null value
3327 does not exist in `other`
3328
3329 >>> s1 = pd.Series({'falcon': np.nan, 'eagle': 160.0})
3330 >>> s2 = pd.Series({'eagle': 200.0, 'duck': 30.0})
3331 >>> s1.combine_first(s2)
3332 duck 30.0
3333 eagle 160.0
3334 falcon NaN
3335 dtype: float64
3336 """
3337 new_index = self.index.union(other.index)
3338 this = self.reindex(new_index, copy=False)
3339 other = other.reindex(new_index, copy=False)
3340 if this.dtype.kind == "M" and other.dtype.kind != "M":
3341 other = to_datetime(other)
3342
3343 return this.where(notna(this), other)
3344
3345 def update(self, other: Series | Sequence | Mapping) -> None:
3346 """
3347 Modify Series in place using values from passed Series.
3348
3349 Uses non-NA values from passed Series to make updates. Aligns
3350 on index.
3351
3352 Parameters
3353 ----------
3354 other : Series, or object coercible into Series
3355
3356 Examples
3357 --------
3358 >>> s = pd.Series([1, 2, 3])
3359 >>> s.update(pd.Series([4, 5, 6]))
3360 >>> s
3361 0 4
3362 1 5
3363 2 6
3364 dtype: int64
3365
3366 >>> s = pd.Series(['a', 'b', 'c'])
3367 >>> s.update(pd.Series(['d', 'e'], index=[0, 2]))
3368 >>> s
3369 0 d
3370 1 b
3371 2 e
3372 dtype: object
3373
3374 >>> s = pd.Series([1, 2, 3])
3375 >>> s.update(pd.Series([4, 5, 6, 7, 8]))
3376 >>> s
3377 0 4
3378 1 5
3379 2 6
3380 dtype: int64
3381
3382 If ``other`` contains NaNs the corresponding values are not updated
3383 in the original Series.
3384
3385 >>> s = pd.Series([1, 2, 3])
3386 >>> s.update(pd.Series([4, np.nan, 6]))
3387 >>> s
3388 0 4
3389 1 2
3390 2 6
3391 dtype: int64
3392
3393 ``other`` can also be a non-Series object type
3394 that is coercible into a Series
3395
3396 >>> s = pd.Series([1, 2, 3])
3397 >>> s.update([4, np.nan, 6])
3398 >>> s
3399 0 4
3400 1 2
3401 2 6
3402 dtype: int64
3403
3404 >>> s = pd.Series([1, 2, 3])
3405 >>> s.update({1: 9})
3406 >>> s
3407 0 1
3408 1 9
3409 2 3
3410 dtype: int64
3411 """
3412
3413 if not isinstance(other, Series):
3414 other = Series(other)
3415
3416 other = other.reindex_like(self)
3417 mask = notna(other)
3418
3419 self._mgr = self._mgr.putmask(mask=mask, new=other)
3420 self._maybe_update_cacher()
3421
3422 # ----------------------------------------------------------------------
3423 # Reindexing, sorting
3424
3425 @overload
3426 def sort_values(
3427 self,
3428 *,
3429 axis: Axis = ...,
3430 ascending: bool | int | Sequence[bool] | Sequence[int] = ...,
3431 inplace: Literal[False] = ...,
3432 kind: str = ...,
3433 na_position: str = ...,
3434 ignore_index: bool = ...,
3435 key: ValueKeyFunc = ...,
3436 ) -> Series:
3437 ...
3438
3439 @overload
3440 def sort_values(
3441 self,
3442 *,
3443 axis: Axis = ...,
3444 ascending: bool | int | Sequence[bool] | Sequence[int] = ...,
3445 inplace: Literal[True],
3446 kind: str = ...,
3447 na_position: str = ...,
3448 ignore_index: bool = ...,
3449 key: ValueKeyFunc = ...,
3450 ) -> None:
3451 ...
3452
3453 def sort_values(
3454 self,
3455 *,
3456 axis: Axis = 0,
3457 ascending: bool | int | Sequence[bool] | Sequence[int] = True,
3458 inplace: bool = False,
3459 kind: str = "quicksort",
3460 na_position: str = "last",
3461 ignore_index: bool = False,
3462 key: ValueKeyFunc = None,
3463 ) -> Series | None:
3464 """
3465 Sort by the values.
3466
3467 Sort a Series in ascending or descending order by some
3468 criterion.
3469
3470 Parameters
3471 ----------
3472 axis : {0 or 'index'}
3473 Unused. Parameter needed for compatibility with DataFrame.
3474 ascending : bool or list of bools, default True
3475 If True, sort values in ascending order, otherwise descending.
3476 inplace : bool, default False
3477 If True, perform operation in-place.
3478 kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort'
3479 Choice of sorting algorithm. See also :func:`numpy.sort` for more
3480 information. 'mergesort' and 'stable' are the only stable algorithms.
3481 na_position : {'first' or 'last'}, default 'last'
3482 Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
3483 the end.
3484 ignore_index : bool, default False
3485 If True, the resulting axis will be labeled 0, 1, …, n - 1.
3486 key : callable, optional
3487 If not None, apply the key function to the series values
3488 before sorting. This is similar to the `key` argument in the
3489 builtin :meth:`sorted` function, with the notable difference that
3490 this `key` function should be *vectorized*. It should expect a
3491 ``Series`` and return an array-like.
3492
3493 .. versionadded:: 1.1.0
3494
3495 Returns
3496 -------
3497 Series or None
3498 Series ordered by values or None if ``inplace=True``.
3499
3500 See Also
3501 --------
3502 Series.sort_index : Sort by the Series indices.
3503 DataFrame.sort_values : Sort DataFrame by the values along either axis.
3504 DataFrame.sort_index : Sort DataFrame by indices.
3505
3506 Examples
3507 --------
3508 >>> s = pd.Series([np.nan, 1, 3, 10, 5])
3509 >>> s
3510 0 NaN
3511 1 1.0
3512 2 3.0
3513 3 10.0
3514 4 5.0
3515 dtype: float64
3516
3517 Sort values ascending order (default behaviour)
3518
3519 >>> s.sort_values(ascending=True)
3520 1 1.0
3521 2 3.0
3522 4 5.0
3523 3 10.0
3524 0 NaN
3525 dtype: float64
3526
3527 Sort values descending order
3528
3529 >>> s.sort_values(ascending=False)
3530 3 10.0
3531 4 5.0
3532 2 3.0
3533 1 1.0
3534 0 NaN
3535 dtype: float64
3536
3537 Sort values putting NAs first
3538
3539 >>> s.sort_values(na_position='first')
3540 0 NaN
3541 1 1.0
3542 2 3.0
3543 4 5.0
3544 3 10.0
3545 dtype: float64
3546
3547 Sort a series of strings
3548
3549 >>> s = pd.Series(['z', 'b', 'd', 'a', 'c'])
3550 >>> s
3551 0 z
3552 1 b
3553 2 d
3554 3 a
3555 4 c
3556 dtype: object
3557
3558 >>> s.sort_values()
3559 3 a
3560 1 b
3561 4 c
3562 2 d
3563 0 z
3564 dtype: object
3565
3566 Sort using a key function. Your `key` function will be
3567 given the ``Series`` of values and should return an array-like.
3568
3569 >>> s = pd.Series(['a', 'B', 'c', 'D', 'e'])
3570 >>> s.sort_values()
3571 1 B
3572 3 D
3573 0 a
3574 2 c
3575 4 e
3576 dtype: object
3577 >>> s.sort_values(key=lambda x: x.str.lower())
3578 0 a
3579 1 B
3580 2 c
3581 3 D
3582 4 e
3583 dtype: object
3584
3585 NumPy ufuncs work well here. For example, we can
3586 sort by the ``sin`` of the value
3587
3588 >>> s = pd.Series([-4, -2, 0, 2, 4])
3589 >>> s.sort_values(key=np.sin)
3590 1 -2
3591 4 4
3592 2 0
3593 0 -4
3594 3 2
3595 dtype: int64
3596
3597 More complicated user-defined functions can be used,
3598 as long as they expect a Series and return an array-like
3599
3600 >>> s.sort_values(key=lambda x: (np.tan(x.cumsum())))
3601 0 -4
3602 3 2
3603 4 4
3604 1 -2
3605 2 0
3606 dtype: int64
3607 """
3608 inplace = validate_bool_kwarg(inplace, "inplace")
3609 # Validate the axis parameter
3610 self._get_axis_number(axis)
3611
3612 # GH 5856/5853
3613 if inplace and self._is_cached:
3614 raise ValueError(
3615 "This Series is a view of some other array, to "
3616 "sort in-place you must create a copy"
3617 )
3618
3619 if is_list_like(ascending):
3620 ascending = cast(Sequence[Union[bool, int]], ascending)
3621 if len(ascending) != 1:
3622 raise ValueError(
3623 f"Length of ascending ({len(ascending)}) must be 1 for Series"
3624 )
3625 ascending = ascending[0]
3626
3627 ascending = validate_ascending(ascending)
3628
3629 if na_position not in ["first", "last"]:
3630 raise ValueError(f"invalid na_position: {na_position}")
3631
3632 # GH 35922. Make sorting stable by leveraging nargsort
3633 values_to_sort = ensure_key_mapped(self, key)._values if key else self._values
3634 sorted_index = nargsort(values_to_sort, kind, bool(ascending), na_position)
3635
3636 if is_range_indexer(sorted_index, len(sorted_index)):
3637 if inplace:
3638 return self._update_inplace(self)
3639 return self.copy(deep=None)
3640
3641 result = self._constructor(
3642 self._values[sorted_index], index=self.index[sorted_index], copy=False
3643 )
3644
3645 if ignore_index:
3646 result.index = default_index(len(sorted_index))
3647
3648 if not inplace:
3649 return result.__finalize__(self, method="sort_values")
3650 self._update_inplace(result)
3651 return None
3652
3653 @overload
3654 def sort_index(
3655 self,
3656 *,
3657 axis: Axis = ...,
3658 level: IndexLabel = ...,
3659 ascending: bool | Sequence[bool] = ...,
3660 inplace: Literal[True],
3661 kind: SortKind = ...,
3662 na_position: NaPosition = ...,
3663 sort_remaining: bool = ...,
3664 ignore_index: bool = ...,
3665 key: IndexKeyFunc = ...,
3666 ) -> None:
3667 ...
3668
3669 @overload
3670 def sort_index(
3671 self,
3672 *,
3673 axis: Axis = ...,
3674 level: IndexLabel = ...,
3675 ascending: bool | Sequence[bool] = ...,
3676 inplace: Literal[False] = ...,
3677 kind: SortKind = ...,
3678 na_position: NaPosition = ...,
3679 sort_remaining: bool = ...,
3680 ignore_index: bool = ...,
3681 key: IndexKeyFunc = ...,
3682 ) -> Series:
3683 ...
3684
3685 @overload
3686 def sort_index(
3687 self,
3688 *,
3689 axis: Axis = ...,
3690 level: IndexLabel = ...,
3691 ascending: bool | Sequence[bool] = ...,
3692 inplace: bool = ...,
3693 kind: SortKind = ...,
3694 na_position: NaPosition = ...,
3695 sort_remaining: bool = ...,
3696 ignore_index: bool = ...,
3697 key: IndexKeyFunc = ...,
3698 ) -> Series | None:
3699 ...
3700
3701 def sort_index(
3702 self,
3703 *,
3704 axis: Axis = 0,
3705 level: IndexLabel = None,
3706 ascending: bool | Sequence[bool] = True,
3707 inplace: bool = False,
3708 kind: SortKind = "quicksort",
3709 na_position: NaPosition = "last",
3710 sort_remaining: bool = True,
3711 ignore_index: bool = False,
3712 key: IndexKeyFunc = None,
3713 ) -> Series | None:
3714 """
3715 Sort Series by index labels.
3716
3717 Returns a new Series sorted by label if `inplace` argument is
3718 ``False``, otherwise updates the original series and returns None.
3719
3720 Parameters
3721 ----------
3722 axis : {0 or 'index'}
3723 Unused. Parameter needed for compatibility with DataFrame.
3724 level : int, optional
3725 If not None, sort on values in specified index level(s).
3726 ascending : bool or list-like of bools, default True
3727 Sort ascending vs. descending. When the index is a MultiIndex the
3728 sort direction can be controlled for each level individually.
3729 inplace : bool, default False
3730 If True, perform operation in-place.
3731 kind : {'quicksort', 'mergesort', 'heapsort', 'stable'}, default 'quicksort'
3732 Choice of sorting algorithm. See also :func:`numpy.sort` for more
3733 information. 'mergesort' and 'stable' are the only stable algorithms. For
3734 DataFrames, this option is only applied when sorting on a single
3735 column or label.
3736 na_position : {'first', 'last'}, default 'last'
3737 If 'first' puts NaNs at the beginning, 'last' puts NaNs at the end.
3738 Not implemented for MultiIndex.
3739 sort_remaining : bool, default True
3740 If True and sorting by level and index is multilevel, sort by other
3741 levels too (in order) after sorting by specified level.
3742 ignore_index : bool, default False
3743 If True, the resulting axis will be labeled 0, 1, …, n - 1.
3744 key : callable, optional
3745 If not None, apply the key function to the index values
3746 before sorting. This is similar to the `key` argument in the
3747 builtin :meth:`sorted` function, with the notable difference that
3748 this `key` function should be *vectorized*. It should expect an
3749 ``Index`` and return an ``Index`` of the same shape.
3750
3751 .. versionadded:: 1.1.0
3752
3753 Returns
3754 -------
3755 Series or None
3756 The original Series sorted by the labels or None if ``inplace=True``.
3757
3758 See Also
3759 --------
3760 DataFrame.sort_index: Sort DataFrame by the index.
3761 DataFrame.sort_values: Sort DataFrame by the value.
3762 Series.sort_values : Sort Series by the value.
3763
3764 Examples
3765 --------
3766 >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, 4])
3767 >>> s.sort_index()
3768 1 c
3769 2 b
3770 3 a
3771 4 d
3772 dtype: object
3773
3774 Sort Descending
3775
3776 >>> s.sort_index(ascending=False)
3777 4 d
3778 3 a
3779 2 b
3780 1 c
3781 dtype: object
3782
3783 By default NaNs are put at the end, but use `na_position` to place
3784 them at the beginning
3785
3786 >>> s = pd.Series(['a', 'b', 'c', 'd'], index=[3, 2, 1, np.nan])
3787 >>> s.sort_index(na_position='first')
3788 NaN d
3789 1.0 c
3790 2.0 b
3791 3.0 a
3792 dtype: object
3793
3794 Specify index level to sort
3795
3796 >>> arrays = [np.array(['qux', 'qux', 'foo', 'foo',
3797 ... 'baz', 'baz', 'bar', 'bar']),
3798 ... np.array(['two', 'one', 'two', 'one',
3799 ... 'two', 'one', 'two', 'one'])]
3800 >>> s = pd.Series([1, 2, 3, 4, 5, 6, 7, 8], index=arrays)
3801 >>> s.sort_index(level=1)
3802 bar one 8
3803 baz one 6
3804 foo one 4
3805 qux one 2
3806 bar two 7
3807 baz two 5
3808 foo two 3
3809 qux two 1
3810 dtype: int64
3811
3812 Does not sort by remaining levels when sorting by levels
3813
3814 >>> s.sort_index(level=1, sort_remaining=False)
3815 qux one 2
3816 foo one 4
3817 baz one 6
3818 bar one 8
3819 qux two 1
3820 foo two 3
3821 baz two 5
3822 bar two 7
3823 dtype: int64
3824
3825 Apply a key function before sorting
3826
3827 >>> s = pd.Series([1, 2, 3, 4], index=['A', 'b', 'C', 'd'])
3828 >>> s.sort_index(key=lambda x : x.str.lower())
3829 A 1
3830 b 2
3831 C 3
3832 d 4
3833 dtype: int64
3834 """
3835
3836 return super().sort_index(
3837 axis=axis,
3838 level=level,
3839 ascending=ascending,
3840 inplace=inplace,
3841 kind=kind,
3842 na_position=na_position,
3843 sort_remaining=sort_remaining,
3844 ignore_index=ignore_index,
3845 key=key,
3846 )
3847
3848 def argsort(
3849 self,
3850 axis: Axis = 0,
3851 kind: SortKind = "quicksort",
3852 order: None = None,
3853 ) -> Series:
3854 """
3855 Return the integer indices that would sort the Series values.
3856
3857 Override ndarray.argsort. Argsorts the value, omitting NA/null values,
3858 and places the result in the same locations as the non-NA values.
3859
3860 Parameters
3861 ----------
3862 axis : {0 or 'index'}
3863 Unused. Parameter needed for compatibility with DataFrame.
3864 kind : {'mergesort', 'quicksort', 'heapsort', 'stable'}, default 'quicksort'
3865 Choice of sorting algorithm. See :func:`numpy.sort` for more
3866 information. 'mergesort' and 'stable' are the only stable algorithms.
3867 order : None
3868 Has no effect but is accepted for compatibility with numpy.
3869
3870 Returns
3871 -------
3872 Series[np.intp]
3873 Positions of values within the sort order with -1 indicating
3874 nan values.
3875
3876 See Also
3877 --------
3878 numpy.ndarray.argsort : Returns the indices that would sort this array.
3879 """
3880 values = self._values
3881 mask = isna(values)
3882
3883 if mask.any():
3884 result = np.full(len(self), -1, dtype=np.intp)
3885 notmask = ~mask
3886 result[notmask] = np.argsort(values[notmask], kind=kind)
3887 else:
3888 result = np.argsort(values, kind=kind)
3889
3890 res = self._constructor(
3891 result, index=self.index, name=self.name, dtype=np.intp, copy=False
3892 )
3893 return res.__finalize__(self, method="argsort")
3894
3895 def nlargest(
3896 self, n: int = 5, keep: Literal["first", "last", "all"] = "first"
3897 ) -> Series:
3898 """
3899 Return the largest `n` elements.
3900
3901 Parameters
3902 ----------
3903 n : int, default 5
3904 Return this many descending sorted values.
3905 keep : {'first', 'last', 'all'}, default 'first'
3906 When there are duplicate values that cannot all fit in a
3907 Series of `n` elements:
3908
3909 - ``first`` : return the first `n` occurrences in order
3910 of appearance.
3911 - ``last`` : return the last `n` occurrences in reverse
3912 order of appearance.
3913 - ``all`` : keep all occurrences. This can result in a Series of
3914 size larger than `n`.
3915
3916 Returns
3917 -------
3918 Series
3919 The `n` largest values in the Series, sorted in decreasing order.
3920
3921 See Also
3922 --------
3923 Series.nsmallest: Get the `n` smallest elements.
3924 Series.sort_values: Sort Series by values.
3925 Series.head: Return the first `n` rows.
3926
3927 Notes
3928 -----
3929 Faster than ``.sort_values(ascending=False).head(n)`` for small `n`
3930 relative to the size of the ``Series`` object.
3931
3932 Examples
3933 --------
3934 >>> countries_population = {"Italy": 59000000, "France": 65000000,
3935 ... "Malta": 434000, "Maldives": 434000,
3936 ... "Brunei": 434000, "Iceland": 337000,
3937 ... "Nauru": 11300, "Tuvalu": 11300,
3938 ... "Anguilla": 11300, "Montserrat": 5200}
3939 >>> s = pd.Series(countries_population)
3940 >>> s
3941 Italy 59000000
3942 France 65000000
3943 Malta 434000
3944 Maldives 434000
3945 Brunei 434000
3946 Iceland 337000
3947 Nauru 11300
3948 Tuvalu 11300
3949 Anguilla 11300
3950 Montserrat 5200
3951 dtype: int64
3952
3953 The `n` largest elements where ``n=5`` by default.
3954
3955 >>> s.nlargest()
3956 France 65000000
3957 Italy 59000000
3958 Malta 434000
3959 Maldives 434000
3960 Brunei 434000
3961 dtype: int64
3962
3963 The `n` largest elements where ``n=3``. Default `keep` value is 'first'
3964 so Malta will be kept.
3965
3966 >>> s.nlargest(3)
3967 France 65000000
3968 Italy 59000000
3969 Malta 434000
3970 dtype: int64
3971
3972 The `n` largest elements where ``n=3`` and keeping the last duplicates.
3973 Brunei will be kept since it is the last with value 434000 based on
3974 the index order.
3975
3976 >>> s.nlargest(3, keep='last')
3977 France 65000000
3978 Italy 59000000
3979 Brunei 434000
3980 dtype: int64
3981
3982 The `n` largest elements where ``n=3`` with all duplicates kept. Note
3983 that the returned Series has five elements due to the three duplicates.
3984
3985 >>> s.nlargest(3, keep='all')
3986 France 65000000
3987 Italy 59000000
3988 Malta 434000
3989 Maldives 434000
3990 Brunei 434000
3991 dtype: int64
3992 """
3993 return selectn.SelectNSeries(self, n=n, keep=keep).nlargest()
3994
3995 def nsmallest(self, n: int = 5, keep: str = "first") -> Series:
3996 """
3997 Return the smallest `n` elements.
3998
3999 Parameters
4000 ----------
4001 n : int, default 5
4002 Return this many ascending sorted values.
4003 keep : {'first', 'last', 'all'}, default 'first'
4004 When there are duplicate values that cannot all fit in a
4005 Series of `n` elements:
4006
4007 - ``first`` : return the first `n` occurrences in order
4008 of appearance.
4009 - ``last`` : return the last `n` occurrences in reverse
4010 order of appearance.
4011 - ``all`` : keep all occurrences. This can result in a Series of
4012 size larger than `n`.
4013
4014 Returns
4015 -------
4016 Series
4017 The `n` smallest values in the Series, sorted in increasing order.
4018
4019 See Also
4020 --------
4021 Series.nlargest: Get the `n` largest elements.
4022 Series.sort_values: Sort Series by values.
4023 Series.head: Return the first `n` rows.
4024
4025 Notes
4026 -----
4027 Faster than ``.sort_values().head(n)`` for small `n` relative to
4028 the size of the ``Series`` object.
4029
4030 Examples
4031 --------
4032 >>> countries_population = {"Italy": 59000000, "France": 65000000,
4033 ... "Brunei": 434000, "Malta": 434000,
4034 ... "Maldives": 434000, "Iceland": 337000,
4035 ... "Nauru": 11300, "Tuvalu": 11300,
4036 ... "Anguilla": 11300, "Montserrat": 5200}
4037 >>> s = pd.Series(countries_population)
4038 >>> s
4039 Italy 59000000
4040 France 65000000
4041 Brunei 434000
4042 Malta 434000
4043 Maldives 434000
4044 Iceland 337000
4045 Nauru 11300
4046 Tuvalu 11300
4047 Anguilla 11300
4048 Montserrat 5200
4049 dtype: int64
4050
4051 The `n` smallest elements where ``n=5`` by default.
4052
4053 >>> s.nsmallest()
4054 Montserrat 5200
4055 Nauru 11300
4056 Tuvalu 11300
4057 Anguilla 11300
4058 Iceland 337000
4059 dtype: int64
4060
4061 The `n` smallest elements where ``n=3``. Default `keep` value is
4062 'first' so Nauru and Tuvalu will be kept.
4063
4064 >>> s.nsmallest(3)
4065 Montserrat 5200
4066 Nauru 11300
4067 Tuvalu 11300
4068 dtype: int64
4069
4070 The `n` smallest elements where ``n=3`` and keeping the last
4071 duplicates. Anguilla and Tuvalu will be kept since they are the last
4072 with value 11300 based on the index order.
4073
4074 >>> s.nsmallest(3, keep='last')
4075 Montserrat 5200
4076 Anguilla 11300
4077 Tuvalu 11300
4078 dtype: int64
4079
4080 The `n` smallest elements where ``n=3`` with all duplicates kept. Note
4081 that the returned Series has four elements due to the three duplicates.
4082
4083 >>> s.nsmallest(3, keep='all')
4084 Montserrat 5200
4085 Nauru 11300
4086 Tuvalu 11300
4087 Anguilla 11300
4088 dtype: int64
4089 """
4090 return selectn.SelectNSeries(self, n=n, keep=keep).nsmallest()
4091
4092 @doc(
4093 klass=_shared_doc_kwargs["klass"],
4094 extra_params=dedent(
4095 """copy : bool, default True
4096 Whether to copy underlying data."""
4097 ),
4098 examples=dedent(
4099 """\
4100 Examples
4101 --------
4102 >>> s = pd.Series(
4103 ... ["A", "B", "A", "C"],
4104 ... index=[
4105 ... ["Final exam", "Final exam", "Coursework", "Coursework"],
4106 ... ["History", "Geography", "History", "Geography"],
4107 ... ["January", "February", "March", "April"],
4108 ... ],
4109 ... )
4110 >>> s
4111 Final exam History January A
4112 Geography February B
4113 Coursework History March A
4114 Geography April C
4115 dtype: object
4116
4117 In the following example, we will swap the levels of the indices.
4118 Here, we will swap the levels column-wise, but levels can be swapped row-wise
4119 in a similar manner. Note that column-wise is the default behaviour.
4120 By not supplying any arguments for i and j, we swap the last and second to
4121 last indices.
4122
4123 >>> s.swaplevel()
4124 Final exam January History A
4125 February Geography B
4126 Coursework March History A
4127 April Geography C
4128 dtype: object
4129
4130 By supplying one argument, we can choose which index to swap the last
4131 index with. We can for example swap the first index with the last one as
4132 follows.
4133
4134 >>> s.swaplevel(0)
4135 January History Final exam A
4136 February Geography Final exam B
4137 March History Coursework A
4138 April Geography Coursework C
4139 dtype: object
4140
4141 We can also define explicitly which indices we want to swap by supplying values
4142 for both i and j. Here, we for example swap the first and second indices.
4143
4144 >>> s.swaplevel(0, 1)
4145 History Final exam January A
4146 Geography Final exam February B
4147 History Coursework March A
4148 Geography Coursework April C
4149 dtype: object"""
4150 ),
4151 )
4152 def swaplevel(
4153 self, i: Level = -2, j: Level = -1, copy: bool | None = None
4154 ) -> Series:
4155 """
4156 Swap levels i and j in a :class:`MultiIndex`.
4157
4158 Default is to swap the two innermost levels of the index.
4159
4160 Parameters
4161 ----------
4162 i, j : int or str
4163 Levels of the indices to be swapped. Can pass level name as string.
4164 {extra_params}
4165
4166 Returns
4167 -------
4168 {klass}
4169 {klass} with levels swapped in MultiIndex.
4170
4171 {examples}
4172 """
4173 assert isinstance(self.index, MultiIndex)
4174 result = self.copy(deep=copy and not using_copy_on_write())
4175 result.index = self.index.swaplevel(i, j)
4176 return result
4177
4178 def reorder_levels(self, order: Sequence[Level]) -> Series:
4179 """
4180 Rearrange index levels using input order.
4181
4182 May not drop or duplicate levels.
4183
4184 Parameters
4185 ----------
4186 order : list of int representing new level order
4187 Reference level by number or key.
4188
4189 Returns
4190 -------
4191 type of caller (new object)
4192 """
4193 if not isinstance(self.index, MultiIndex): # pragma: no cover
4194 raise Exception("Can only reorder levels on a hierarchical axis.")
4195
4196 result = self.copy(deep=None)
4197 assert isinstance(result.index, MultiIndex)
4198 result.index = result.index.reorder_levels(order)
4199 return result
4200
4201 def explode(self, ignore_index: bool = False) -> Series:
4202 """
4203 Transform each element of a list-like to a row.
4204
4205 Parameters
4206 ----------
4207 ignore_index : bool, default False
4208 If True, the resulting index will be labeled 0, 1, …, n - 1.
4209
4210 .. versionadded:: 1.1.0
4211
4212 Returns
4213 -------
4214 Series
4215 Exploded lists to rows; index will be duplicated for these rows.
4216
4217 See Also
4218 --------
4219 Series.str.split : Split string values on specified separator.
4220 Series.unstack : Unstack, a.k.a. pivot, Series with MultiIndex
4221 to produce DataFrame.
4222 DataFrame.melt : Unpivot a DataFrame from wide format to long format.
4223 DataFrame.explode : Explode a DataFrame from list-like
4224 columns to long format.
4225
4226 Notes
4227 -----
4228 This routine will explode list-likes including lists, tuples, sets,
4229 Series, and np.ndarray. The result dtype of the subset rows will
4230 be object. Scalars will be returned unchanged, and empty list-likes will
4231 result in a np.nan for that row. In addition, the ordering of elements in
4232 the output will be non-deterministic when exploding sets.
4233
4234 Reference :ref:`the user guide <reshaping.explode>` for more examples.
4235
4236 Examples
4237 --------
4238 >>> s = pd.Series([[1, 2, 3], 'foo', [], [3, 4]])
4239 >>> s
4240 0 [1, 2, 3]
4241 1 foo
4242 2 []
4243 3 [3, 4]
4244 dtype: object
4245
4246 >>> s.explode()
4247 0 1
4248 0 2
4249 0 3
4250 1 foo
4251 2 NaN
4252 3 3
4253 3 4
4254 dtype: object
4255 """
4256 if not len(self) or not is_object_dtype(self):
4257 result = self.copy()
4258 return result.reset_index(drop=True) if ignore_index else result
4259
4260 values, counts = reshape.explode(np.asarray(self._values))
4261
4262 if ignore_index:
4263 index = default_index(len(values))
4264 else:
4265 index = self.index.repeat(counts)
4266
4267 return self._constructor(values, index=index, name=self.name, copy=False)
4268
4269 def unstack(self, level: IndexLabel = -1, fill_value: Hashable = None) -> DataFrame:
4270 """
4271 Unstack, also known as pivot, Series with MultiIndex to produce DataFrame.
4272
4273 Parameters
4274 ----------
4275 level : int, str, or list of these, default last level
4276 Level(s) to unstack, can pass level name.
4277 fill_value : scalar value, default None
4278 Value to use when replacing NaN values.
4279
4280 Returns
4281 -------
4282 DataFrame
4283 Unstacked Series.
4284
4285 Notes
4286 -----
4287 Reference :ref:`the user guide <reshaping.stacking>` for more examples.
4288
4289 Examples
4290 --------
4291 >>> s = pd.Series([1, 2, 3, 4],
4292 ... index=pd.MultiIndex.from_product([['one', 'two'],
4293 ... ['a', 'b']]))
4294 >>> s
4295 one a 1
4296 b 2
4297 two a 3
4298 b 4
4299 dtype: int64
4300
4301 >>> s.unstack(level=-1)
4302 a b
4303 one 1 2
4304 two 3 4
4305
4306 >>> s.unstack(level=0)
4307 one two
4308 a 1 3
4309 b 2 4
4310 """
4311 from pandas.core.reshape.reshape import unstack
4312
4313 return unstack(self, level, fill_value)
4314
4315 # ----------------------------------------------------------------------
4316 # function application
4317
4318 def map(
4319 self,
4320 arg: Callable | Mapping | Series,
4321 na_action: Literal["ignore"] | None = None,
4322 ) -> Series:
4323 """
4324 Map values of Series according to an input mapping or function.
4325
4326 Used for substituting each value in a Series with another value,
4327 that may be derived from a function, a ``dict`` or
4328 a :class:`Series`.
4329
4330 Parameters
4331 ----------
4332 arg : function, collections.abc.Mapping subclass or Series
4333 Mapping correspondence.
4334 na_action : {None, 'ignore'}, default None
4335 If 'ignore', propagate NaN values, without passing them to the
4336 mapping correspondence.
4337
4338 Returns
4339 -------
4340 Series
4341 Same index as caller.
4342
4343 See Also
4344 --------
4345 Series.apply : For applying more complex functions on a Series.
4346 DataFrame.apply : Apply a function row-/column-wise.
4347 DataFrame.applymap : Apply a function elementwise on a whole DataFrame.
4348
4349 Notes
4350 -----
4351 When ``arg`` is a dictionary, values in Series that are not in the
4352 dictionary (as keys) are converted to ``NaN``. However, if the
4353 dictionary is a ``dict`` subclass that defines ``__missing__`` (i.e.
4354 provides a method for default values), then this default is used
4355 rather than ``NaN``.
4356
4357 Examples
4358 --------
4359 >>> s = pd.Series(['cat', 'dog', np.nan, 'rabbit'])
4360 >>> s
4361 0 cat
4362 1 dog
4363 2 NaN
4364 3 rabbit
4365 dtype: object
4366
4367 ``map`` accepts a ``dict`` or a ``Series``. Values that are not found
4368 in the ``dict`` are converted to ``NaN``, unless the dict has a default
4369 value (e.g. ``defaultdict``):
4370
4371 >>> s.map({'cat': 'kitten', 'dog': 'puppy'})
4372 0 kitten
4373 1 puppy
4374 2 NaN
4375 3 NaN
4376 dtype: object
4377
4378 It also accepts a function:
4379
4380 >>> s.map('I am a {}'.format)
4381 0 I am a cat
4382 1 I am a dog
4383 2 I am a nan
4384 3 I am a rabbit
4385 dtype: object
4386
4387 To avoid applying the function to missing values (and keep them as
4388 ``NaN``) ``na_action='ignore'`` can be used:
4389
4390 >>> s.map('I am a {}'.format, na_action='ignore')
4391 0 I am a cat
4392 1 I am a dog
4393 2 NaN
4394 3 I am a rabbit
4395 dtype: object
4396 """
4397 new_values = self._map_values(arg, na_action=na_action)
4398 return self._constructor(new_values, index=self.index, copy=False).__finalize__(
4399 self, method="map"
4400 )
4401
4402 def _gotitem(self, key, ndim, subset=None) -> Series:
4403 """
4404 Sub-classes to define. Return a sliced object.
4405
4406 Parameters
4407 ----------
4408 key : string / list of selections
4409 ndim : {1, 2}
4410 Requested ndim of result.
4411 subset : object, default None
4412 Subset to act on.
4413 """
4414 return self
4415
4416 _agg_see_also_doc = dedent(
4417 """
4418 See Also
4419 --------
4420 Series.apply : Invoke function on a Series.
4421 Series.transform : Transform function producing a Series with like indexes.
4422 """
4423 )
4424
4425 _agg_examples_doc = dedent(
4426 """
4427 Examples
4428 --------
4429 >>> s = pd.Series([1, 2, 3, 4])
4430 >>> s
4431 0 1
4432 1 2
4433 2 3
4434 3 4
4435 dtype: int64
4436
4437 >>> s.agg('min')
4438 1
4439
4440 >>> s.agg(['min', 'max'])
4441 min 1
4442 max 4
4443 dtype: int64
4444 """
4445 )
4446
4447 @doc(
4448 _shared_docs["aggregate"],
4449 klass=_shared_doc_kwargs["klass"],
4450 axis=_shared_doc_kwargs["axis"],
4451 see_also=_agg_see_also_doc,
4452 examples=_agg_examples_doc,
4453 )
4454 def aggregate(self, func=None, axis: Axis = 0, *args, **kwargs):
4455 # Validate the axis parameter
4456 self._get_axis_number(axis)
4457
4458 # if func is None, will switch to user-provided "named aggregation" kwargs
4459 if func is None:
4460 func = dict(kwargs.items())
4461
4462 op = SeriesApply(self, func, convert_dtype=False, args=args, kwargs=kwargs)
4463 result = op.agg()
4464 return result
4465
4466 agg = aggregate
4467
4468 # error: Signature of "any" incompatible with supertype "NDFrame" [override]
4469 @overload # type: ignore[override]
4470 def any(
4471 self,
4472 *,
4473 axis: Axis = ...,
4474 bool_only: bool | None = ...,
4475 skipna: bool = ...,
4476 level: None = ...,
4477 **kwargs,
4478 ) -> bool:
4479 ...
4480
4481 @overload
4482 def any(
4483 self,
4484 *,
4485 axis: Axis = ...,
4486 bool_only: bool | None = ...,
4487 skipna: bool = ...,
4488 level: Level,
4489 **kwargs,
4490 ) -> Series | bool:
4491 ...
4492
4493 # error: Missing return statement
4494 @doc(NDFrame.any, **_shared_doc_kwargs)
4495 def any( # type: ignore[empty-body]
4496 self,
4497 axis: Axis = 0,
4498 bool_only: bool | None = None,
4499 skipna: bool = True,
4500 level: Level | None = None,
4501 **kwargs,
4502 ) -> Series | bool:
4503 ...
4504
4505 @doc(
4506 _shared_docs["transform"],
4507 klass=_shared_doc_kwargs["klass"],
4508 axis=_shared_doc_kwargs["axis"],
4509 )
4510 def transform(
4511 self, func: AggFuncType, axis: Axis = 0, *args, **kwargs
4512 ) -> DataFrame | Series:
4513 # Validate axis argument
4514 self._get_axis_number(axis)
4515 result = SeriesApply(
4516 self, func=func, convert_dtype=True, args=args, kwargs=kwargs
4517 ).transform()
4518 return result
4519
4520 def apply(
4521 self,
4522 func: AggFuncType,
4523 convert_dtype: bool = True,
4524 args: tuple[Any, ...] = (),
4525 **kwargs,
4526 ) -> DataFrame | Series:
4527 """
4528 Invoke function on values of Series.
4529
4530 Can be ufunc (a NumPy function that applies to the entire Series)
4531 or a Python function that only works on single values.
4532
4533 Parameters
4534 ----------
4535 func : function
4536 Python function or NumPy ufunc to apply.
4537 convert_dtype : bool, default True
4538 Try to find better dtype for elementwise function results. If
4539 False, leave as dtype=object. Note that the dtype is always
4540 preserved for some extension array dtypes, such as Categorical.
4541 args : tuple
4542 Positional arguments passed to func after the series value.
4543 **kwargs
4544 Additional keyword arguments passed to func.
4545
4546 Returns
4547 -------
4548 Series or DataFrame
4549 If func returns a Series object the result will be a DataFrame.
4550
4551 See Also
4552 --------
4553 Series.map: For element-wise operations.
4554 Series.agg: Only perform aggregating type operations.
4555 Series.transform: Only perform transforming type operations.
4556
4557 Notes
4558 -----
4559 Functions that mutate the passed object can produce unexpected
4560 behavior or errors and are not supported. See :ref:`gotchas.udf-mutation`
4561 for more details.
4562
4563 Examples
4564 --------
4565 Create a series with typical summer temperatures for each city.
4566
4567 >>> s = pd.Series([20, 21, 12],
4568 ... index=['London', 'New York', 'Helsinki'])
4569 >>> s
4570 London 20
4571 New York 21
4572 Helsinki 12
4573 dtype: int64
4574
4575 Square the values by defining a function and passing it as an
4576 argument to ``apply()``.
4577
4578 >>> def square(x):
4579 ... return x ** 2
4580 >>> s.apply(square)
4581 London 400
4582 New York 441
4583 Helsinki 144
4584 dtype: int64
4585
4586 Square the values by passing an anonymous function as an
4587 argument to ``apply()``.
4588
4589 >>> s.apply(lambda x: x ** 2)
4590 London 400
4591 New York 441
4592 Helsinki 144
4593 dtype: int64
4594
4595 Define a custom function that needs additional positional
4596 arguments and pass these additional arguments using the
4597 ``args`` keyword.
4598
4599 >>> def subtract_custom_value(x, custom_value):
4600 ... return x - custom_value
4601
4602 >>> s.apply(subtract_custom_value, args=(5,))
4603 London 15
4604 New York 16
4605 Helsinki 7
4606 dtype: int64
4607
4608 Define a custom function that takes keyword arguments
4609 and pass these arguments to ``apply``.
4610
4611 >>> def add_custom_values(x, **kwargs):
4612 ... for month in kwargs:
4613 ... x += kwargs[month]
4614 ... return x
4615
4616 >>> s.apply(add_custom_values, june=30, july=20, august=25)
4617 London 95
4618 New York 96
4619 Helsinki 87
4620 dtype: int64
4621
4622 Use a function from the Numpy library.
4623
4624 >>> s.apply(np.log)
4625 London 2.995732
4626 New York 3.044522
4627 Helsinki 2.484907
4628 dtype: float64
4629 """
4630 return SeriesApply(self, func, convert_dtype, args, kwargs).apply()
4631
4632 def _reduce(
4633 self,
4634 op,
4635 name: str,
4636 *,
4637 axis: Axis = 0,
4638 skipna: bool = True,
4639 numeric_only: bool = False,
4640 filter_type=None,
4641 **kwds,
4642 ):
4643 """
4644 Perform a reduction operation.
4645
4646 If we have an ndarray as a value, then simply perform the operation,
4647 otherwise delegate to the object.
4648 """
4649 delegate = self._values
4650
4651 if axis is not None:
4652 self._get_axis_number(axis)
4653
4654 if isinstance(delegate, ExtensionArray):
4655 # dispatch to ExtensionArray interface
4656 return delegate._reduce(name, skipna=skipna, **kwds)
4657
4658 else:
4659 # dispatch to numpy arrays
4660 if numeric_only and not is_numeric_dtype(self.dtype):
4661 kwd_name = "numeric_only"
4662 if name in ["any", "all"]:
4663 kwd_name = "bool_only"
4664 # GH#47500 - change to TypeError to match other methods
4665 raise TypeError(
4666 f"Series.{name} does not allow {kwd_name}={numeric_only} "
4667 "with non-numeric dtypes."
4668 )
4669 with np.errstate(all="ignore"):
4670 return op(delegate, skipna=skipna, **kwds)
4671
4672 def _reindex_indexer(
4673 self,
4674 new_index: Index | None,
4675 indexer: npt.NDArray[np.intp] | None,
4676 copy: bool | None,
4677 ) -> Series:
4678 # Note: new_index is None iff indexer is None
4679 # if not None, indexer is np.intp
4680 if indexer is None and (
4681 new_index is None or new_index.names == self.index.names
4682 ):
4683 if using_copy_on_write():
4684 return self.copy(deep=copy)
4685 if copy or copy is None:
4686 return self.copy(deep=copy)
4687 return self
4688
4689 new_values = algorithms.take_nd(
4690 self._values, indexer, allow_fill=True, fill_value=None
4691 )
4692 return self._constructor(new_values, index=new_index, copy=False)
4693
4694 def _needs_reindex_multi(self, axes, method, level) -> bool:
4695 """
4696 Check if we do need a multi reindex; this is for compat with
4697 higher dims.
4698 """
4699 return False
4700
4701 # error: Cannot determine type of 'align'
4702 @doc(
4703 NDFrame.align, # type: ignore[has-type]
4704 klass=_shared_doc_kwargs["klass"],
4705 axes_single_arg=_shared_doc_kwargs["axes_single_arg"],
4706 )
4707 def align(
4708 self,
4709 other: Series,
4710 join: AlignJoin = "outer",
4711 axis: Axis | None = None,
4712 level: Level = None,
4713 copy: bool | None = None,
4714 fill_value: Hashable = None,
4715 method: FillnaOptions | None = None,
4716 limit: int | None = None,
4717 fill_axis: Axis = 0,
4718 broadcast_axis: Axis | None = None,
4719 ) -> Series:
4720 return super().align(
4721 other,
4722 join=join,
4723 axis=axis,
4724 level=level,
4725 copy=copy,
4726 fill_value=fill_value,
4727 method=method,
4728 limit=limit,
4729 fill_axis=fill_axis,
4730 broadcast_axis=broadcast_axis,
4731 )
4732
4733 @overload
4734 def rename(
4735 self,
4736 index: Renamer | Hashable | None = ...,
4737 *,
4738 axis: Axis | None = ...,
4739 copy: bool = ...,
4740 inplace: Literal[True],
4741 level: Level | None = ...,
4742 errors: IgnoreRaise = ...,
4743 ) -> None:
4744 ...
4745
4746 @overload
4747 def rename(
4748 self,
4749 index: Renamer | Hashable | None = ...,
4750 *,
4751 axis: Axis | None = ...,
4752 copy: bool = ...,
4753 inplace: Literal[False] = ...,
4754 level: Level | None = ...,
4755 errors: IgnoreRaise = ...,
4756 ) -> Series:
4757 ...
4758
4759 @overload
4760 def rename(
4761 self,
4762 index: Renamer | Hashable | None = ...,
4763 *,
4764 axis: Axis | None = ...,
4765 copy: bool = ...,
4766 inplace: bool = ...,
4767 level: Level | None = ...,
4768 errors: IgnoreRaise = ...,
4769 ) -> Series | None:
4770 ...
4771
4772 def rename(
4773 self,
4774 index: Renamer | Hashable | None = None,
4775 *,
4776 axis: Axis | None = None,
4777 copy: bool | None = None,
4778 inplace: bool = False,
4779 level: Level | None = None,
4780 errors: IgnoreRaise = "ignore",
4781 ) -> Series | None:
4782 """
4783 Alter Series index labels or name.
4784
4785 Function / dict values must be unique (1-to-1). Labels not contained in
4786 a dict / Series will be left as-is. Extra labels listed don't throw an
4787 error.
4788
4789 Alternatively, change ``Series.name`` with a scalar value.
4790
4791 See the :ref:`user guide <basics.rename>` for more.
4792
4793 Parameters
4794 ----------
4795 index : scalar, hashable sequence, dict-like or function optional
4796 Functions or dict-like are transformations to apply to
4797 the index.
4798 Scalar or hashable sequence-like will alter the ``Series.name``
4799 attribute.
4800 axis : {0 or 'index'}
4801 Unused. Parameter needed for compatibility with DataFrame.
4802 copy : bool, default True
4803 Also copy underlying data.
4804 inplace : bool, default False
4805 Whether to return a new Series. If True the value of copy is ignored.
4806 level : int or level name, default None
4807 In case of MultiIndex, only rename labels in the specified level.
4808 errors : {'ignore', 'raise'}, default 'ignore'
4809 If 'raise', raise `KeyError` when a `dict-like mapper` or
4810 `index` contains labels that are not present in the index being transformed.
4811 If 'ignore', existing keys will be renamed and extra keys will be ignored.
4812
4813 Returns
4814 -------
4815 Series or None
4816 Series with index labels or name altered or None if ``inplace=True``.
4817
4818 See Also
4819 --------
4820 DataFrame.rename : Corresponding DataFrame method.
4821 Series.rename_axis : Set the name of the axis.
4822
4823 Examples
4824 --------
4825 >>> s = pd.Series([1, 2, 3])
4826 >>> s
4827 0 1
4828 1 2
4829 2 3
4830 dtype: int64
4831 >>> s.rename("my_name") # scalar, changes Series.name
4832 0 1
4833 1 2
4834 2 3
4835 Name: my_name, dtype: int64
4836 >>> s.rename(lambda x: x ** 2) # function, changes labels
4837 0 1
4838 1 2
4839 4 3
4840 dtype: int64
4841 >>> s.rename({1: 3, 2: 5}) # mapping, changes labels
4842 0 1
4843 3 2
4844 5 3
4845 dtype: int64
4846 """
4847 if axis is not None:
4848 # Make sure we raise if an invalid 'axis' is passed.
4849 axis = self._get_axis_number(axis)
4850
4851 if callable(index) or is_dict_like(index):
4852 # error: Argument 1 to "_rename" of "NDFrame" has incompatible
4853 # type "Union[Union[Mapping[Any, Hashable], Callable[[Any],
4854 # Hashable]], Hashable, None]"; expected "Union[Mapping[Any,
4855 # Hashable], Callable[[Any], Hashable], None]"
4856 return super()._rename(
4857 index, # type: ignore[arg-type]
4858 copy=copy,
4859 inplace=inplace,
4860 level=level,
4861 errors=errors,
4862 )
4863 else:
4864 return self._set_name(index, inplace=inplace, deep=copy)
4865
4866 @Appender(
4867 """
4868 Examples
4869 --------
4870 >>> s = pd.Series([1, 2, 3])
4871 >>> s
4872 0 1
4873 1 2
4874 2 3
4875 dtype: int64
4876
4877 >>> s.set_axis(['a', 'b', 'c'], axis=0)
4878 a 1
4879 b 2
4880 c 3
4881 dtype: int64
4882 """
4883 )
4884 @Substitution(
4885 **_shared_doc_kwargs,
4886 extended_summary_sub="",
4887 axis_description_sub="",
4888 see_also_sub="",
4889 )
4890 @Appender(NDFrame.set_axis.__doc__)
4891 def set_axis(
4892 self,
4893 labels,
4894 *,
4895 axis: Axis = 0,
4896 copy: bool | None = None,
4897 ) -> Series:
4898 return super().set_axis(labels, axis=axis, copy=copy)
4899
4900 # error: Cannot determine type of 'reindex'
4901 @doc(
4902 NDFrame.reindex, # type: ignore[has-type]
4903 klass=_shared_doc_kwargs["klass"],
4904 optional_reindex=_shared_doc_kwargs["optional_reindex"],
4905 )
4906 def reindex( # type: ignore[override]
4907 self,
4908 index=None,
4909 *,
4910 axis: Axis | None = None,
4911 method: str | None = None,
4912 copy: bool | None = None,
4913 level: Level | None = None,
4914 fill_value: Scalar | None = None,
4915 limit: int | None = None,
4916 tolerance=None,
4917 ) -> Series:
4918 return super().reindex(
4919 index=index,
4920 method=method,
4921 copy=copy,
4922 level=level,
4923 fill_value=fill_value,
4924 limit=limit,
4925 tolerance=tolerance,
4926 )
4927
4928 @doc(NDFrame.rename_axis)
4929 def rename_axis( # type: ignore[override]
4930 self: Series,
4931 mapper: IndexLabel | lib.NoDefault = lib.no_default,
4932 *,
4933 index=lib.no_default,
4934 axis: Axis = 0,
4935 copy: bool = True,
4936 inplace: bool = False,
4937 ) -> Series | None:
4938 return super().rename_axis(
4939 mapper=mapper,
4940 index=index,
4941 axis=axis,
4942 copy=copy,
4943 inplace=inplace,
4944 )
4945
4946 @overload
4947 def drop(
4948 self,
4949 labels: IndexLabel = ...,
4950 *,
4951 axis: Axis = ...,
4952 index: IndexLabel = ...,
4953 columns: IndexLabel = ...,
4954 level: Level | None = ...,
4955 inplace: Literal[True],
4956 errors: IgnoreRaise = ...,
4957 ) -> None:
4958 ...
4959
4960 @overload
4961 def drop(
4962 self,
4963 labels: IndexLabel = ...,
4964 *,
4965 axis: Axis = ...,
4966 index: IndexLabel = ...,
4967 columns: IndexLabel = ...,
4968 level: Level | None = ...,
4969 inplace: Literal[False] = ...,
4970 errors: IgnoreRaise = ...,
4971 ) -> Series:
4972 ...
4973
4974 @overload
4975 def drop(
4976 self,
4977 labels: IndexLabel = ...,
4978 *,
4979 axis: Axis = ...,
4980 index: IndexLabel = ...,
4981 columns: IndexLabel = ...,
4982 level: Level | None = ...,
4983 inplace: bool = ...,
4984 errors: IgnoreRaise = ...,
4985 ) -> Series | None:
4986 ...
4987
4988 def drop(
4989 self,
4990 labels: IndexLabel = None,
4991 *,
4992 axis: Axis = 0,
4993 index: IndexLabel = None,
4994 columns: IndexLabel = None,
4995 level: Level | None = None,
4996 inplace: bool = False,
4997 errors: IgnoreRaise = "raise",
4998 ) -> Series | None:
4999 """
5000 Return Series with specified index labels removed.
5001
5002 Remove elements of a Series based on specifying the index labels.
5003 When using a multi-index, labels on different levels can be removed
5004 by specifying the level.
5005
5006 Parameters
5007 ----------
5008 labels : single label or list-like
5009 Index labels to drop.
5010 axis : {0 or 'index'}
5011 Unused. Parameter needed for compatibility with DataFrame.
5012 index : single label or list-like
5013 Redundant for application on Series, but 'index' can be used instead
5014 of 'labels'.
5015 columns : single label or list-like
5016 No change is made to the Series; use 'index' or 'labels' instead.
5017 level : int or level name, optional
5018 For MultiIndex, level for which the labels will be removed.
5019 inplace : bool, default False
5020 If True, do operation inplace and return None.
5021 errors : {'ignore', 'raise'}, default 'raise'
5022 If 'ignore', suppress error and only existing labels are dropped.
5023
5024 Returns
5025 -------
5026 Series or None
5027 Series with specified index labels removed or None if ``inplace=True``.
5028
5029 Raises
5030 ------
5031 KeyError
5032 If none of the labels are found in the index.
5033
5034 See Also
5035 --------
5036 Series.reindex : Return only specified index labels of Series.
5037 Series.dropna : Return series without null values.
5038 Series.drop_duplicates : Return Series with duplicate values removed.
5039 DataFrame.drop : Drop specified labels from rows or columns.
5040
5041 Examples
5042 --------
5043 >>> s = pd.Series(data=np.arange(3), index=['A', 'B', 'C'])
5044 >>> s
5045 A 0
5046 B 1
5047 C 2
5048 dtype: int64
5049
5050 Drop labels B en C
5051
5052 >>> s.drop(labels=['B', 'C'])
5053 A 0
5054 dtype: int64
5055
5056 Drop 2nd level label in MultiIndex Series
5057
5058 >>> midx = pd.MultiIndex(levels=[['lama', 'cow', 'falcon'],
5059 ... ['speed', 'weight', 'length']],
5060 ... codes=[[0, 0, 0, 1, 1, 1, 2, 2, 2],
5061 ... [0, 1, 2, 0, 1, 2, 0, 1, 2]])
5062 >>> s = pd.Series([45, 200, 1.2, 30, 250, 1.5, 320, 1, 0.3],
5063 ... index=midx)
5064 >>> s
5065 lama speed 45.0
5066 weight 200.0
5067 length 1.2
5068 cow speed 30.0
5069 weight 250.0
5070 length 1.5
5071 falcon speed 320.0
5072 weight 1.0
5073 length 0.3
5074 dtype: float64
5075
5076 >>> s.drop(labels='weight', level=1)
5077 lama speed 45.0
5078 length 1.2
5079 cow speed 30.0
5080 length 1.5
5081 falcon speed 320.0
5082 length 0.3
5083 dtype: float64
5084 """
5085 return super().drop(
5086 labels=labels,
5087 axis=axis,
5088 index=index,
5089 columns=columns,
5090 level=level,
5091 inplace=inplace,
5092 errors=errors,
5093 )
5094
5095 @overload
5096 def fillna(
5097 self,
5098 value: Hashable | Mapping | Series | DataFrame = ...,
5099 *,
5100 method: FillnaOptions | None = ...,
5101 axis: Axis | None = ...,
5102 inplace: Literal[False] = ...,
5103 limit: int | None = ...,
5104 downcast: dict | None = ...,
5105 ) -> Series:
5106 ...
5107
5108 @overload
5109 def fillna(
5110 self,
5111 value: Hashable | Mapping | Series | DataFrame = ...,
5112 *,
5113 method: FillnaOptions | None = ...,
5114 axis: Axis | None = ...,
5115 inplace: Literal[True],
5116 limit: int | None = ...,
5117 downcast: dict | None = ...,
5118 ) -> None:
5119 ...
5120
5121 @overload
5122 def fillna(
5123 self,
5124 value: Hashable | Mapping | Series | DataFrame = ...,
5125 *,
5126 method: FillnaOptions | None = ...,
5127 axis: Axis | None = ...,
5128 inplace: bool = ...,
5129 limit: int | None = ...,
5130 downcast: dict | None = ...,
5131 ) -> Series | None:
5132 ...
5133
5134 @doc(NDFrame.fillna, **_shared_doc_kwargs)
5135 def fillna(
5136 self,
5137 value: Hashable | Mapping | Series | DataFrame = None,
5138 *,
5139 method: FillnaOptions | None = None,
5140 axis: Axis | None = None,
5141 inplace: bool = False,
5142 limit: int | None = None,
5143 downcast: dict | None = None,
5144 ) -> Series | None:
5145 return super().fillna(
5146 value=value,
5147 method=method,
5148 axis=axis,
5149 inplace=inplace,
5150 limit=limit,
5151 downcast=downcast,
5152 )
5153
5154 def pop(self, item: Hashable) -> Any:
5155 """
5156 Return item and drops from series. Raise KeyError if not found.
5157
5158 Parameters
5159 ----------
5160 item : label
5161 Index of the element that needs to be removed.
5162
5163 Returns
5164 -------
5165 Value that is popped from series.
5166
5167 Examples
5168 --------
5169 >>> ser = pd.Series([1,2,3])
5170
5171 >>> ser.pop(0)
5172 1
5173
5174 >>> ser
5175 1 2
5176 2 3
5177 dtype: int64
5178 """
5179 return super().pop(item=item)
5180
5181 @overload
5182 def replace(
5183 self,
5184 to_replace=...,
5185 value=...,
5186 *,
5187 inplace: Literal[False] = ...,
5188 limit: int | None = ...,
5189 regex: bool = ...,
5190 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ...,
5191 ) -> Series:
5192 ...
5193
5194 @overload
5195 def replace(
5196 self,
5197 to_replace=...,
5198 value=...,
5199 *,
5200 inplace: Literal[True],
5201 limit: int | None = ...,
5202 regex: bool = ...,
5203 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = ...,
5204 ) -> None:
5205 ...
5206
5207 @doc(
5208 NDFrame.replace,
5209 klass=_shared_doc_kwargs["klass"],
5210 inplace=_shared_doc_kwargs["inplace"],
5211 replace_iloc=_shared_doc_kwargs["replace_iloc"],
5212 )
5213 def replace(
5214 self,
5215 to_replace=None,
5216 value=lib.no_default,
5217 *,
5218 inplace: bool = False,
5219 limit: int | None = None,
5220 regex: bool = False,
5221 method: Literal["pad", "ffill", "bfill"] | lib.NoDefault = lib.no_default,
5222 ) -> Series | None:
5223 return super().replace(
5224 to_replace=to_replace,
5225 value=value,
5226 inplace=inplace,
5227 limit=limit,
5228 regex=regex,
5229 method=method,
5230 )
5231
5232 @doc(INFO_DOCSTRING, **series_sub_kwargs)
5233 def info(
5234 self,
5235 verbose: bool | None = None,
5236 buf: IO[str] | None = None,
5237 max_cols: int | None = None,
5238 memory_usage: bool | str | None = None,
5239 show_counts: bool = True,
5240 ) -> None:
5241 return SeriesInfo(self, memory_usage).render(
5242 buf=buf,
5243 max_cols=max_cols,
5244 verbose=verbose,
5245 show_counts=show_counts,
5246 )
5247
5248 def _replace_single(self, to_replace, method: str, inplace: bool, limit):
5249 """
5250 Replaces values in a Series using the fill method specified when no
5251 replacement value is given in the replace method
5252 """
5253
5254 result = self if inplace else self.copy()
5255
5256 values = result._values
5257 mask = missing.mask_missing(values, to_replace)
5258
5259 if isinstance(values, ExtensionArray):
5260 # dispatch to the EA's _pad_mask_inplace method
5261 values._fill_mask_inplace(method, limit, mask)
5262 else:
5263 fill_f = missing.get_fill_func(method)
5264 fill_f(values, limit=limit, mask=mask)
5265
5266 if inplace:
5267 return
5268 return result
5269
5270 # error: Cannot determine type of 'shift'
5271 @doc(NDFrame.shift, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type]
5272 def shift(
5273 self, periods: int = 1, freq=None, axis: Axis = 0, fill_value: Hashable = None
5274 ) -> Series:
5275 return super().shift(
5276 periods=periods, freq=freq, axis=axis, fill_value=fill_value
5277 )
5278
5279 def memory_usage(self, index: bool = True, deep: bool = False) -> int:
5280 """
5281 Return the memory usage of the Series.
5282
5283 The memory usage can optionally include the contribution of
5284 the index and of elements of `object` dtype.
5285
5286 Parameters
5287 ----------
5288 index : bool, default True
5289 Specifies whether to include the memory usage of the Series index.
5290 deep : bool, default False
5291 If True, introspect the data deeply by interrogating
5292 `object` dtypes for system-level memory consumption, and include
5293 it in the returned value.
5294
5295 Returns
5296 -------
5297 int
5298 Bytes of memory consumed.
5299
5300 See Also
5301 --------
5302 numpy.ndarray.nbytes : Total bytes consumed by the elements of the
5303 array.
5304 DataFrame.memory_usage : Bytes consumed by a DataFrame.
5305
5306 Examples
5307 --------
5308 >>> s = pd.Series(range(3))
5309 >>> s.memory_usage()
5310 152
5311
5312 Not including the index gives the size of the rest of the data, which
5313 is necessarily smaller:
5314
5315 >>> s.memory_usage(index=False)
5316 24
5317
5318 The memory footprint of `object` values is ignored by default:
5319
5320 >>> s = pd.Series(["a", "b"])
5321 >>> s.values
5322 array(['a', 'b'], dtype=object)
5323 >>> s.memory_usage()
5324 144
5325 >>> s.memory_usage(deep=True)
5326 244
5327 """
5328 v = self._memory_usage(deep=deep)
5329 if index:
5330 v += self.index.memory_usage(deep=deep)
5331 return v
5332
5333 def isin(self, values) -> Series:
5334 """
5335 Whether elements in Series are contained in `values`.
5336
5337 Return a boolean Series showing whether each element in the Series
5338 matches an element in the passed sequence of `values` exactly.
5339
5340 Parameters
5341 ----------
5342 values : set or list-like
5343 The sequence of values to test. Passing in a single string will
5344 raise a ``TypeError``. Instead, turn a single string into a
5345 list of one element.
5346
5347 Returns
5348 -------
5349 Series
5350 Series of booleans indicating if each element is in values.
5351
5352 Raises
5353 ------
5354 TypeError
5355 * If `values` is a string
5356
5357 See Also
5358 --------
5359 DataFrame.isin : Equivalent method on DataFrame.
5360
5361 Examples
5362 --------
5363 >>> s = pd.Series(['lama', 'cow', 'lama', 'beetle', 'lama',
5364 ... 'hippo'], name='animal')
5365 >>> s.isin(['cow', 'lama'])
5366 0 True
5367 1 True
5368 2 True
5369 3 False
5370 4 True
5371 5 False
5372 Name: animal, dtype: bool
5373
5374 To invert the boolean values, use the ``~`` operator:
5375
5376 >>> ~s.isin(['cow', 'lama'])
5377 0 False
5378 1 False
5379 2 False
5380 3 True
5381 4 False
5382 5 True
5383 Name: animal, dtype: bool
5384
5385 Passing a single string as ``s.isin('lama')`` will raise an error. Use
5386 a list of one element instead:
5387
5388 >>> s.isin(['lama'])
5389 0 True
5390 1 False
5391 2 True
5392 3 False
5393 4 True
5394 5 False
5395 Name: animal, dtype: bool
5396
5397 Strings and integers are distinct and are therefore not comparable:
5398
5399 >>> pd.Series([1]).isin(['1'])
5400 0 False
5401 dtype: bool
5402 >>> pd.Series([1.1]).isin(['1.1'])
5403 0 False
5404 dtype: bool
5405 """
5406 result = algorithms.isin(self._values, values)
5407 return self._constructor(result, index=self.index, copy=False).__finalize__(
5408 self, method="isin"
5409 )
5410
5411 def between(
5412 self,
5413 left,
5414 right,
5415 inclusive: Literal["both", "neither", "left", "right"] = "both",
5416 ) -> Series:
5417 """
5418 Return boolean Series equivalent to left <= series <= right.
5419
5420 This function returns a boolean vector containing `True` wherever the
5421 corresponding Series element is between the boundary values `left` and
5422 `right`. NA values are treated as `False`.
5423
5424 Parameters
5425 ----------
5426 left : scalar or list-like
5427 Left boundary.
5428 right : scalar or list-like
5429 Right boundary.
5430 inclusive : {"both", "neither", "left", "right"}
5431 Include boundaries. Whether to set each bound as closed or open.
5432
5433 .. versionchanged:: 1.3.0
5434
5435 Returns
5436 -------
5437 Series
5438 Series representing whether each element is between left and
5439 right (inclusive).
5440
5441 See Also
5442 --------
5443 Series.gt : Greater than of series and other.
5444 Series.lt : Less than of series and other.
5445
5446 Notes
5447 -----
5448 This function is equivalent to ``(left <= ser) & (ser <= right)``
5449
5450 Examples
5451 --------
5452 >>> s = pd.Series([2, 0, 4, 8, np.nan])
5453
5454 Boundary values are included by default:
5455
5456 >>> s.between(1, 4)
5457 0 True
5458 1 False
5459 2 True
5460 3 False
5461 4 False
5462 dtype: bool
5463
5464 With `inclusive` set to ``"neither"`` boundary values are excluded:
5465
5466 >>> s.between(1, 4, inclusive="neither")
5467 0 True
5468 1 False
5469 2 False
5470 3 False
5471 4 False
5472 dtype: bool
5473
5474 `left` and `right` can be any scalar value:
5475
5476 >>> s = pd.Series(['Alice', 'Bob', 'Carol', 'Eve'])
5477 >>> s.between('Anna', 'Daniel')
5478 0 False
5479 1 True
5480 2 True
5481 3 False
5482 dtype: bool
5483 """
5484 if inclusive == "both":
5485 lmask = self >= left
5486 rmask = self <= right
5487 elif inclusive == "left":
5488 lmask = self >= left
5489 rmask = self < right
5490 elif inclusive == "right":
5491 lmask = self > left
5492 rmask = self <= right
5493 elif inclusive == "neither":
5494 lmask = self > left
5495 rmask = self < right
5496 else:
5497 raise ValueError(
5498 "Inclusive has to be either string of 'both',"
5499 "'left', 'right', or 'neither'."
5500 )
5501
5502 return lmask & rmask
5503
5504 # ----------------------------------------------------------------------
5505 # Convert to types that support pd.NA
5506
5507 def _convert_dtypes(
5508 self,
5509 infer_objects: bool = True,
5510 convert_string: bool = True,
5511 convert_integer: bool = True,
5512 convert_boolean: bool = True,
5513 convert_floating: bool = True,
5514 dtype_backend: DtypeBackend = "numpy_nullable",
5515 ) -> Series:
5516 input_series = self
5517 if infer_objects:
5518 input_series = input_series.infer_objects()
5519 if is_object_dtype(input_series):
5520 input_series = input_series.copy(deep=None)
5521
5522 if convert_string or convert_integer or convert_boolean or convert_floating:
5523 inferred_dtype = convert_dtypes(
5524 input_series._values,
5525 convert_string,
5526 convert_integer,
5527 convert_boolean,
5528 convert_floating,
5529 infer_objects,
5530 dtype_backend,
5531 )
5532 result = input_series.astype(inferred_dtype)
5533 else:
5534 result = input_series.copy(deep=None)
5535 return result
5536
5537 # error: Cannot determine type of 'isna'
5538 # error: Return type "Series" of "isna" incompatible with return type "ndarray
5539 # [Any, dtype[bool_]]" in supertype "IndexOpsMixin"
5540 @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type]
5541 def isna(self) -> Series: # type: ignore[override]
5542 return NDFrame.isna(self)
5543
5544 # error: Cannot determine type of 'isna'
5545 @doc(NDFrame.isna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type]
5546 def isnull(self) -> Series:
5547 """
5548 Series.isnull is an alias for Series.isna.
5549 """
5550 return super().isnull()
5551
5552 # error: Cannot determine type of 'notna'
5553 @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type]
5554 def notna(self) -> Series:
5555 return super().notna()
5556
5557 # error: Cannot determine type of 'notna'
5558 @doc(NDFrame.notna, klass=_shared_doc_kwargs["klass"]) # type: ignore[has-type]
5559 def notnull(self) -> Series:
5560 """
5561 Series.notnull is an alias for Series.notna.
5562 """
5563 return super().notnull()
5564
5565 @overload
5566 def dropna(
5567 self,
5568 *,
5569 axis: Axis = ...,
5570 inplace: Literal[False] = ...,
5571 how: AnyAll | None = ...,
5572 ignore_index: bool = ...,
5573 ) -> Series:
5574 ...
5575
5576 @overload
5577 def dropna(
5578 self,
5579 *,
5580 axis: Axis = ...,
5581 inplace: Literal[True],
5582 how: AnyAll | None = ...,
5583 ignore_index: bool = ...,
5584 ) -> None:
5585 ...
5586
5587 def dropna(
5588 self,
5589 *,
5590 axis: Axis = 0,
5591 inplace: bool = False,
5592 how: AnyAll | None = None,
5593 ignore_index: bool = False,
5594 ) -> Series | None:
5595 """
5596 Return a new Series with missing values removed.
5597
5598 See the :ref:`User Guide <missing_data>` for more on which values are
5599 considered missing, and how to work with missing data.
5600
5601 Parameters
5602 ----------
5603 axis : {0 or 'index'}
5604 Unused. Parameter needed for compatibility with DataFrame.
5605 inplace : bool, default False
5606 If True, do operation inplace and return None.
5607 how : str, optional
5608 Not in use. Kept for compatibility.
5609 ignore_index : bool, default ``False``
5610 If ``True``, the resulting axis will be labeled 0, 1, …, n - 1.
5611
5612 .. versionadded:: 2.0.0
5613
5614 Returns
5615 -------
5616 Series or None
5617 Series with NA entries dropped from it or None if ``inplace=True``.
5618
5619 See Also
5620 --------
5621 Series.isna: Indicate missing values.
5622 Series.notna : Indicate existing (non-missing) values.
5623 Series.fillna : Replace missing values.
5624 DataFrame.dropna : Drop rows or columns which contain NA values.
5625 Index.dropna : Drop missing indices.
5626
5627 Examples
5628 --------
5629 >>> ser = pd.Series([1., 2., np.nan])
5630 >>> ser
5631 0 1.0
5632 1 2.0
5633 2 NaN
5634 dtype: float64
5635
5636 Drop NA values from a Series.
5637
5638 >>> ser.dropna()
5639 0 1.0
5640 1 2.0
5641 dtype: float64
5642
5643 Empty strings are not considered NA values. ``None`` is considered an
5644 NA value.
5645
5646 >>> ser = pd.Series([np.NaN, 2, pd.NaT, '', None, 'I stay'])
5647 >>> ser
5648 0 NaN
5649 1 2
5650 2 NaT
5651 3
5652 4 None
5653 5 I stay
5654 dtype: object
5655 >>> ser.dropna()
5656 1 2
5657 3
5658 5 I stay
5659 dtype: object
5660 """
5661 inplace = validate_bool_kwarg(inplace, "inplace")
5662 ignore_index = validate_bool_kwarg(ignore_index, "ignore_index")
5663 # Validate the axis parameter
5664 self._get_axis_number(axis or 0)
5665
5666 if self._can_hold_na:
5667 result = remove_na_arraylike(self)
5668 else:
5669 if not inplace:
5670 result = self.copy(deep=None)
5671 else:
5672 result = self
5673
5674 if ignore_index:
5675 result.index = default_index(len(result))
5676
5677 if inplace:
5678 return self._update_inplace(result)
5679 else:
5680 return result
5681
5682 # ----------------------------------------------------------------------
5683 # Time series-oriented methods
5684
5685 # error: Cannot determine type of 'asfreq'
5686 @doc(NDFrame.asfreq, **_shared_doc_kwargs) # type: ignore[has-type]
5687 def asfreq(
5688 self,
5689 freq: Frequency,
5690 method: FillnaOptions | None = None,
5691 how: str | None = None,
5692 normalize: bool = False,
5693 fill_value: Hashable = None,
5694 ) -> Series:
5695 return super().asfreq(
5696 freq=freq,
5697 method=method,
5698 how=how,
5699 normalize=normalize,
5700 fill_value=fill_value,
5701 )
5702
5703 # error: Cannot determine type of 'resample'
5704 @doc(NDFrame.resample, **_shared_doc_kwargs) # type: ignore[has-type]
5705 def resample(
5706 self,
5707 rule,
5708 axis: Axis = 0,
5709 closed: str | None = None,
5710 label: str | None = None,
5711 convention: str = "start",
5712 kind: str | None = None,
5713 on: Level = None,
5714 level: Level = None,
5715 origin: str | TimestampConvertibleTypes = "start_day",
5716 offset: TimedeltaConvertibleTypes | None = None,
5717 group_keys: bool = False,
5718 ) -> Resampler:
5719 return super().resample(
5720 rule=rule,
5721 axis=axis,
5722 closed=closed,
5723 label=label,
5724 convention=convention,
5725 kind=kind,
5726 on=on,
5727 level=level,
5728 origin=origin,
5729 offset=offset,
5730 group_keys=group_keys,
5731 )
5732
5733 def to_timestamp(
5734 self,
5735 freq=None,
5736 how: Literal["s", "e", "start", "end"] = "start",
5737 copy: bool | None = None,
5738 ) -> Series:
5739 """
5740 Cast to DatetimeIndex of Timestamps, at *beginning* of period.
5741
5742 Parameters
5743 ----------
5744 freq : str, default frequency of PeriodIndex
5745 Desired frequency.
5746 how : {'s', 'e', 'start', 'end'}
5747 Convention for converting period to timestamp; start of period
5748 vs. end.
5749 copy : bool, default True
5750 Whether or not to return a copy.
5751
5752 Returns
5753 -------
5754 Series with DatetimeIndex
5755
5756 Examples
5757 --------
5758 >>> idx = pd.PeriodIndex(['2023', '2024', '2025'], freq='Y')
5759 >>> s1 = pd.Series([1, 2, 3], index=idx)
5760 >>> s1
5761 2023 1
5762 2024 2
5763 2025 3
5764 Freq: A-DEC, dtype: int64
5765
5766 The resulting frequency of the Timestamps is `YearBegin`
5767
5768 >>> s1 = s1.to_timestamp()
5769 >>> s1
5770 2023-01-01 1
5771 2024-01-01 2
5772 2025-01-01 3
5773 Freq: AS-JAN, dtype: int64
5774
5775 Using `freq` which is the offset that the Timestamps will have
5776
5777 >>> s2 = pd.Series([1, 2, 3], index=idx)
5778 >>> s2 = s2.to_timestamp(freq='M')
5779 >>> s2
5780 2023-01-31 1
5781 2024-01-31 2
5782 2025-01-31 3
5783 Freq: A-JAN, dtype: int64
5784 """
5785 if not isinstance(self.index, PeriodIndex):
5786 raise TypeError(f"unsupported Type {type(self.index).__name__}")
5787
5788 new_obj = self.copy(deep=copy and not using_copy_on_write())
5789 new_index = self.index.to_timestamp(freq=freq, how=how)
5790 setattr(new_obj, "index", new_index)
5791 return new_obj
5792
5793 def to_period(self, freq: str | None = None, copy: bool | None = None) -> Series:
5794 """
5795 Convert Series from DatetimeIndex to PeriodIndex.
5796
5797 Parameters
5798 ----------
5799 freq : str, default None
5800 Frequency associated with the PeriodIndex.
5801 copy : bool, default True
5802 Whether or not to return a copy.
5803
5804 Returns
5805 -------
5806 Series
5807 Series with index converted to PeriodIndex.
5808
5809 Examples
5810 --------
5811 >>> idx = pd.DatetimeIndex(['2023', '2024', '2025'])
5812 >>> s = pd.Series([1, 2, 3], index=idx)
5813 >>> s = s.to_period()
5814 >>> s
5815 2023 1
5816 2024 2
5817 2025 3
5818 Freq: A-DEC, dtype: int64
5819
5820 Viewing the index
5821
5822 >>> s.index
5823 PeriodIndex(['2023', '2024', '2025'], dtype='period[A-DEC]')
5824 """
5825 if not isinstance(self.index, DatetimeIndex):
5826 raise TypeError(f"unsupported Type {type(self.index).__name__}")
5827
5828 new_obj = self.copy(deep=copy and not using_copy_on_write())
5829 new_index = self.index.to_period(freq=freq)
5830 setattr(new_obj, "index", new_index)
5831 return new_obj
5832
5833 @overload
5834 def ffill(
5835 self,
5836 *,
5837 axis: None | Axis = ...,
5838 inplace: Literal[False] = ...,
5839 limit: None | int = ...,
5840 downcast: dict | None = ...,
5841 ) -> Series:
5842 ...
5843
5844 @overload
5845 def ffill(
5846 self,
5847 *,
5848 axis: None | Axis = ...,
5849 inplace: Literal[True],
5850 limit: None | int = ...,
5851 downcast: dict | None = ...,
5852 ) -> None:
5853 ...
5854
5855 @overload
5856 def ffill(
5857 self,
5858 *,
5859 axis: None | Axis = ...,
5860 inplace: bool = ...,
5861 limit: None | int = ...,
5862 downcast: dict | None = ...,
5863 ) -> Series | None:
5864 ...
5865
5866 def ffill(
5867 self,
5868 *,
5869 axis: None | Axis = None,
5870 inplace: bool = False,
5871 limit: None | int = None,
5872 downcast: dict | None = None,
5873 ) -> Series | None:
5874 return super().ffill(axis=axis, inplace=inplace, limit=limit, downcast=downcast)
5875
5876 @overload
5877 def bfill(
5878 self,
5879 *,
5880 axis: None | Axis = ...,
5881 inplace: Literal[False] = ...,
5882 limit: None | int = ...,
5883 downcast: dict | None = ...,
5884 ) -> Series:
5885 ...
5886
5887 @overload
5888 def bfill(
5889 self,
5890 *,
5891 axis: None | Axis = ...,
5892 inplace: Literal[True],
5893 limit: None | int = ...,
5894 downcast: dict | None = ...,
5895 ) -> None:
5896 ...
5897
5898 @overload
5899 def bfill(
5900 self,
5901 *,
5902 axis: None | Axis = ...,
5903 inplace: bool = ...,
5904 limit: None | int = ...,
5905 downcast: dict | None = ...,
5906 ) -> Series | None:
5907 ...
5908
5909 def bfill(
5910 self,
5911 *,
5912 axis: None | Axis = None,
5913 inplace: bool = False,
5914 limit: None | int = None,
5915 downcast: dict | None = None,
5916 ) -> Series | None:
5917 return super().bfill(axis=axis, inplace=inplace, limit=limit, downcast=downcast)
5918
5919 def clip(
5920 self: Series,
5921 lower=None,
5922 upper=None,
5923 *,
5924 axis: Axis | None = None,
5925 inplace: bool = False,
5926 **kwargs,
5927 ) -> Series | None:
5928 return super().clip(lower, upper, axis=axis, inplace=inplace, **kwargs)
5929
5930 def interpolate(
5931 self: Series,
5932 method: str = "linear",
5933 *,
5934 axis: Axis = 0,
5935 limit: int | None = None,
5936 inplace: bool = False,
5937 limit_direction: str | None = None,
5938 limit_area: str | None = None,
5939 downcast: str | None = None,
5940 **kwargs,
5941 ) -> Series | None:
5942 return super().interpolate(
5943 method=method,
5944 axis=axis,
5945 limit=limit,
5946 inplace=inplace,
5947 limit_direction=limit_direction,
5948 limit_area=limit_area,
5949 downcast=downcast,
5950 **kwargs,
5951 )
5952
5953 @overload
5954 def where(
5955 self,
5956 cond,
5957 other=...,
5958 *,
5959 inplace: Literal[False] = ...,
5960 axis: Axis | None = ...,
5961 level: Level = ...,
5962 ) -> Series:
5963 ...
5964
5965 @overload
5966 def where(
5967 self,
5968 cond,
5969 other=...,
5970 *,
5971 inplace: Literal[True],
5972 axis: Axis | None = ...,
5973 level: Level = ...,
5974 ) -> None:
5975 ...
5976
5977 @overload
5978 def where(
5979 self,
5980 cond,
5981 other=...,
5982 *,
5983 inplace: bool = ...,
5984 axis: Axis | None = ...,
5985 level: Level = ...,
5986 ) -> Series | None:
5987 ...
5988
5989 def where(
5990 self,
5991 cond,
5992 other=lib.no_default,
5993 *,
5994 inplace: bool = False,
5995 axis: Axis | None = None,
5996 level: Level = None,
5997 ) -> Series | None:
5998 return super().where(
5999 cond,
6000 other,
6001 inplace=inplace,
6002 axis=axis,
6003 level=level,
6004 )
6005
6006 @overload
6007 def mask(
6008 self,
6009 cond,
6010 other=...,
6011 *,
6012 inplace: Literal[False] = ...,
6013 axis: Axis | None = ...,
6014 level: Level = ...,
6015 ) -> Series:
6016 ...
6017
6018 @overload
6019 def mask(
6020 self,
6021 cond,
6022 other=...,
6023 *,
6024 inplace: Literal[True],
6025 axis: Axis | None = ...,
6026 level: Level = ...,
6027 ) -> None:
6028 ...
6029
6030 @overload
6031 def mask(
6032 self,
6033 cond,
6034 other=...,
6035 *,
6036 inplace: bool = ...,
6037 axis: Axis | None = ...,
6038 level: Level = ...,
6039 ) -> Series | None:
6040 ...
6041
6042 def mask(
6043 self,
6044 cond,
6045 other=lib.no_default,
6046 *,
6047 inplace: bool = False,
6048 axis: Axis | None = None,
6049 level: Level = None,
6050 ) -> Series | None:
6051 return super().mask(
6052 cond,
6053 other,
6054 inplace=inplace,
6055 axis=axis,
6056 level=level,
6057 )
6058
6059 # ----------------------------------------------------------------------
6060 # Add index
6061 _AXIS_ORDERS: list[Literal["index", "columns"]] = ["index"]
6062 _AXIS_LEN = len(_AXIS_ORDERS)
6063 _info_axis_number: Literal[0] = 0
6064 _info_axis_name: Literal["index"] = "index"
6065
6066 index = properties.AxisProperty(
6067 axis=0, doc="The index (axis labels) of the Series."
6068 )
6069
6070 # ----------------------------------------------------------------------
6071 # Accessor Methods
6072 # ----------------------------------------------------------------------
6073 str = CachedAccessor("str", StringMethods)
6074 dt = CachedAccessor("dt", CombinedDatetimelikeProperties)
6075 cat = CachedAccessor("cat", CategoricalAccessor)
6076 plot = CachedAccessor("plot", pandas.plotting.PlotAccessor)
6077 sparse = CachedAccessor("sparse", SparseAccessor)
6078
6079 # ----------------------------------------------------------------------
6080 # Add plotting methods to Series
6081 hist = pandas.plotting.hist_series
6082
6083 # ----------------------------------------------------------------------
6084 # Template-Based Arithmetic/Comparison Methods
6085
6086 def _cmp_method(self, other, op):
6087 res_name = ops.get_op_result_name(self, other)
6088
6089 if isinstance(other, Series) and not self._indexed_same(other):
6090 raise ValueError("Can only compare identically-labeled Series objects")
6091
6092 lvalues = self._values
6093 rvalues = extract_array(other, extract_numpy=True, extract_range=True)
6094
6095 with np.errstate(all="ignore"):
6096 res_values = ops.comparison_op(lvalues, rvalues, op)
6097
6098 return self._construct_result(res_values, name=res_name)
6099
6100 def _logical_method(self, other, op):
6101 res_name = ops.get_op_result_name(self, other)
6102 self, other = ops.align_method_SERIES(self, other, align_asobject=True)
6103
6104 lvalues = self._values
6105 rvalues = extract_array(other, extract_numpy=True, extract_range=True)
6106
6107 res_values = ops.logical_op(lvalues, rvalues, op)
6108 return self._construct_result(res_values, name=res_name)
6109
6110 def _arith_method(self, other, op):
6111 self, other = ops.align_method_SERIES(self, other)
6112 return base.IndexOpsMixin._arith_method(self, other, op)
6113
6114
6115Series._add_numeric_operations()
6116
6117# Add arithmetic!
6118ops.add_flex_arithmetic_methods(Series)