1from __future__ import annotations
2
3from collections import abc
4from datetime import datetime
5import functools
6from itertools import zip_longest
7import operator
8from typing import (
9 TYPE_CHECKING,
10 Any,
11 Callable,
12 ClassVar,
13 Literal,
14 NoReturn,
15 cast,
16 final,
17 overload,
18)
19import warnings
20
21import numpy as np
22
23from pandas._config import (
24 get_option,
25 using_copy_on_write,
26 using_pyarrow_string_dtype,
27)
28
29from pandas._libs import (
30 NaT,
31 algos as libalgos,
32 index as libindex,
33 lib,
34 writers,
35)
36from pandas._libs.internals import BlockValuesRefs
37import pandas._libs.join as libjoin
38from pandas._libs.lib import (
39 is_datetime_array,
40 no_default,
41)
42from pandas._libs.tslibs import (
43 IncompatibleFrequency,
44 OutOfBoundsDatetime,
45 Timestamp,
46 tz_compare,
47)
48from pandas._typing import (
49 AnyAll,
50 ArrayLike,
51 Axes,
52 Axis,
53 DropKeep,
54 DtypeObj,
55 F,
56 IgnoreRaise,
57 IndexLabel,
58 JoinHow,
59 Level,
60 NaPosition,
61 ReindexMethod,
62 Self,
63 Shape,
64 npt,
65)
66from pandas.compat.numpy import function as nv
67from pandas.errors import (
68 DuplicateLabelError,
69 InvalidIndexError,
70)
71from pandas.util._decorators import (
72 Appender,
73 cache_readonly,
74 deprecate_nonkeyword_arguments,
75 doc,
76)
77from pandas.util._exceptions import (
78 find_stack_level,
79 rewrite_exception,
80)
81
82from pandas.core.dtypes.astype import (
83 astype_array,
84 astype_is_view,
85)
86from pandas.core.dtypes.cast import (
87 LossySetitemError,
88 can_hold_element,
89 common_dtype_categorical_compat,
90 find_result_type,
91 infer_dtype_from,
92 maybe_cast_pointwise_result,
93 np_can_hold_element,
94)
95from pandas.core.dtypes.common import (
96 ensure_int64,
97 ensure_object,
98 ensure_platform_int,
99 is_any_real_numeric_dtype,
100 is_bool_dtype,
101 is_ea_or_datetimelike_dtype,
102 is_float,
103 is_hashable,
104 is_integer,
105 is_iterator,
106 is_list_like,
107 is_numeric_dtype,
108 is_object_dtype,
109 is_scalar,
110 is_signed_integer_dtype,
111 is_string_dtype,
112 needs_i8_conversion,
113 pandas_dtype,
114 validate_all_hashable,
115)
116from pandas.core.dtypes.concat import concat_compat
117from pandas.core.dtypes.dtypes import (
118 ArrowDtype,
119 CategoricalDtype,
120 DatetimeTZDtype,
121 ExtensionDtype,
122 IntervalDtype,
123 PeriodDtype,
124 SparseDtype,
125)
126from pandas.core.dtypes.generic import (
127 ABCCategoricalIndex,
128 ABCDataFrame,
129 ABCDatetimeIndex,
130 ABCIntervalIndex,
131 ABCMultiIndex,
132 ABCPeriodIndex,
133 ABCRangeIndex,
134 ABCSeries,
135 ABCTimedeltaIndex,
136)
137from pandas.core.dtypes.inference import is_dict_like
138from pandas.core.dtypes.missing import (
139 array_equivalent,
140 is_valid_na_for_dtype,
141 isna,
142)
143
144from pandas.core import (
145 arraylike,
146 nanops,
147 ops,
148)
149from pandas.core.accessor import CachedAccessor
150import pandas.core.algorithms as algos
151from pandas.core.array_algos.putmask import (
152 setitem_datetimelike_compat,
153 validate_putmask,
154)
155from pandas.core.arrays import (
156 ArrowExtensionArray,
157 BaseMaskedArray,
158 Categorical,
159 DatetimeArray,
160 ExtensionArray,
161 TimedeltaArray,
162)
163from pandas.core.arrays.string_ import (
164 StringArray,
165 StringDtype,
166)
167from pandas.core.base import (
168 IndexOpsMixin,
169 PandasObject,
170)
171import pandas.core.common as com
172from pandas.core.construction import (
173 ensure_wrapped_if_datetimelike,
174 extract_array,
175 sanitize_array,
176)
177from pandas.core.indexers import (
178 disallow_ndim_indexing,
179 is_valid_positional_slice,
180)
181from pandas.core.indexes.frozen import FrozenList
182from pandas.core.missing import clean_reindex_fill_method
183from pandas.core.ops import get_op_result_name
184from pandas.core.ops.invalid import make_invalid_op
185from pandas.core.sorting import (
186 ensure_key_mapped,
187 get_group_index_sorter,
188 nargsort,
189)
190from pandas.core.strings.accessor import StringMethods
191
192from pandas.io.formats.printing import (
193 PrettyDict,
194 default_pprint,
195 format_object_summary,
196 pprint_thing,
197)
198
199if TYPE_CHECKING:
200 from collections.abc import (
201 Hashable,
202 Iterable,
203 Sequence,
204 )
205
206 from pandas import (
207 CategoricalIndex,
208 DataFrame,
209 MultiIndex,
210 Series,
211 )
212 from pandas.core.arrays import (
213 IntervalArray,
214 PeriodArray,
215 )
216
217__all__ = ["Index"]
218
219_unsortable_types = frozenset(("mixed", "mixed-integer"))
220
221_index_doc_kwargs: dict[str, str] = {
222 "klass": "Index",
223 "inplace": "",
224 "target_klass": "Index",
225 "raises_section": "",
226 "unique": "Index",
227 "duplicated": "np.ndarray",
228}
229_index_shared_docs: dict[str, str] = {}
230str_t = str
231
232_dtype_obj = np.dtype("object")
233
234_masked_engines = {
235 "Complex128": libindex.MaskedComplex128Engine,
236 "Complex64": libindex.MaskedComplex64Engine,
237 "Float64": libindex.MaskedFloat64Engine,
238 "Float32": libindex.MaskedFloat32Engine,
239 "UInt64": libindex.MaskedUInt64Engine,
240 "UInt32": libindex.MaskedUInt32Engine,
241 "UInt16": libindex.MaskedUInt16Engine,
242 "UInt8": libindex.MaskedUInt8Engine,
243 "Int64": libindex.MaskedInt64Engine,
244 "Int32": libindex.MaskedInt32Engine,
245 "Int16": libindex.MaskedInt16Engine,
246 "Int8": libindex.MaskedInt8Engine,
247 "boolean": libindex.MaskedBoolEngine,
248 "double[pyarrow]": libindex.MaskedFloat64Engine,
249 "float64[pyarrow]": libindex.MaskedFloat64Engine,
250 "float32[pyarrow]": libindex.MaskedFloat32Engine,
251 "float[pyarrow]": libindex.MaskedFloat32Engine,
252 "uint64[pyarrow]": libindex.MaskedUInt64Engine,
253 "uint32[pyarrow]": libindex.MaskedUInt32Engine,
254 "uint16[pyarrow]": libindex.MaskedUInt16Engine,
255 "uint8[pyarrow]": libindex.MaskedUInt8Engine,
256 "int64[pyarrow]": libindex.MaskedInt64Engine,
257 "int32[pyarrow]": libindex.MaskedInt32Engine,
258 "int16[pyarrow]": libindex.MaskedInt16Engine,
259 "int8[pyarrow]": libindex.MaskedInt8Engine,
260 "bool[pyarrow]": libindex.MaskedBoolEngine,
261}
262
263
264def _maybe_return_indexers(meth: F) -> F:
265 """
266 Decorator to simplify 'return_indexers' checks in Index.join.
267 """
268
269 @functools.wraps(meth)
270 def join(
271 self,
272 other: Index,
273 *,
274 how: JoinHow = "left",
275 level=None,
276 return_indexers: bool = False,
277 sort: bool = False,
278 ):
279 join_index, lidx, ridx = meth(self, other, how=how, level=level, sort=sort)
280 if not return_indexers:
281 return join_index
282
283 if lidx is not None:
284 lidx = ensure_platform_int(lidx)
285 if ridx is not None:
286 ridx = ensure_platform_int(ridx)
287 return join_index, lidx, ridx
288
289 return cast(F, join)
290
291
292def _new_Index(cls, d):
293 """
294 This is called upon unpickling, rather than the default which doesn't
295 have arguments and breaks __new__.
296 """
297 # required for backward compat, because PI can't be instantiated with
298 # ordinals through __new__ GH #13277
299 if issubclass(cls, ABCPeriodIndex):
300 from pandas.core.indexes.period import _new_PeriodIndex
301
302 return _new_PeriodIndex(cls, **d)
303
304 if issubclass(cls, ABCMultiIndex):
305 if "labels" in d and "codes" not in d:
306 # GH#23752 "labels" kwarg has been replaced with "codes"
307 d["codes"] = d.pop("labels")
308
309 # Since this was a valid MultiIndex at pickle-time, we don't need to
310 # check validty at un-pickle time.
311 d["verify_integrity"] = False
312
313 elif "dtype" not in d and "data" in d:
314 # Prevent Index.__new__ from conducting inference;
315 # "data" key not in RangeIndex
316 d["dtype"] = d["data"].dtype
317 return cls.__new__(cls, **d)
318
319
320class Index(IndexOpsMixin, PandasObject):
321 """
322 Immutable sequence used for indexing and alignment.
323
324 The basic object storing axis labels for all pandas objects.
325
326 .. versionchanged:: 2.0.0
327
328 Index can hold all numpy numeric dtypes (except float16). Previously only
329 int64/uint64/float64 dtypes were accepted.
330
331 Parameters
332 ----------
333 data : array-like (1-dimensional)
334 dtype : str, numpy.dtype, or ExtensionDtype, optional
335 Data type for the output Index. If not specified, this will be
336 inferred from `data`.
337 See the :ref:`user guide <basics.dtypes>` for more usages.
338 copy : bool, default False
339 Copy input data.
340 name : object
341 Name to be stored in the index.
342 tupleize_cols : bool (default: True)
343 When True, attempt to create a MultiIndex if possible.
344
345 See Also
346 --------
347 RangeIndex : Index implementing a monotonic integer range.
348 CategoricalIndex : Index of :class:`Categorical` s.
349 MultiIndex : A multi-level, or hierarchical Index.
350 IntervalIndex : An Index of :class:`Interval` s.
351 DatetimeIndex : Index of datetime64 data.
352 TimedeltaIndex : Index of timedelta64 data.
353 PeriodIndex : Index of Period data.
354
355 Notes
356 -----
357 An Index instance can **only** contain hashable objects.
358 An Index instance *can not* hold numpy float16 dtype.
359
360 Examples
361 --------
362 >>> pd.Index([1, 2, 3])
363 Index([1, 2, 3], dtype='int64')
364
365 >>> pd.Index(list('abc'))
366 Index(['a', 'b', 'c'], dtype='object')
367
368 >>> pd.Index([1, 2, 3], dtype="uint8")
369 Index([1, 2, 3], dtype='uint8')
370 """
371
372 # similar to __array_priority__, positions Index after Series and DataFrame
373 # but before ExtensionArray. Should NOT be overridden by subclasses.
374 __pandas_priority__ = 2000
375
376 # Cython methods; see github.com/cython/cython/issues/2647
377 # for why we need to wrap these instead of making them class attributes
378 # Moreover, cython will choose the appropriate-dtyped sub-function
379 # given the dtypes of the passed arguments
380
381 @final
382 def _left_indexer_unique(self, other: Self) -> npt.NDArray[np.intp]:
383 # Caller is responsible for ensuring other.dtype == self.dtype
384 sv = self._get_join_target()
385 ov = other._get_join_target()
386 # similar but not identical to ov.searchsorted(sv)
387 return libjoin.left_join_indexer_unique(sv, ov)
388
389 @final
390 def _left_indexer(
391 self, other: Self
392 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
393 # Caller is responsible for ensuring other.dtype == self.dtype
394 sv = self._get_join_target()
395 ov = other._get_join_target()
396 joined_ndarray, lidx, ridx = libjoin.left_join_indexer(sv, ov)
397 joined = self._from_join_target(joined_ndarray)
398 return joined, lidx, ridx
399
400 @final
401 def _inner_indexer(
402 self, other: Self
403 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
404 # Caller is responsible for ensuring other.dtype == self.dtype
405 sv = self._get_join_target()
406 ov = other._get_join_target()
407 joined_ndarray, lidx, ridx = libjoin.inner_join_indexer(sv, ov)
408 joined = self._from_join_target(joined_ndarray)
409 return joined, lidx, ridx
410
411 @final
412 def _outer_indexer(
413 self, other: Self
414 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
415 # Caller is responsible for ensuring other.dtype == self.dtype
416 sv = self._get_join_target()
417 ov = other._get_join_target()
418 joined_ndarray, lidx, ridx = libjoin.outer_join_indexer(sv, ov)
419 joined = self._from_join_target(joined_ndarray)
420 return joined, lidx, ridx
421
422 _typ: str = "index"
423 _data: ExtensionArray | np.ndarray
424 _data_cls: type[ExtensionArray] | tuple[type[np.ndarray], type[ExtensionArray]] = (
425 np.ndarray,
426 ExtensionArray,
427 )
428 _id: object | None = None
429 _name: Hashable = None
430 # MultiIndex.levels previously allowed setting the index name. We
431 # don't allow this anymore, and raise if it happens rather than
432 # failing silently.
433 _no_setting_name: bool = False
434 _comparables: list[str] = ["name"]
435 _attributes: list[str] = ["name"]
436
437 @cache_readonly
438 def _can_hold_strings(self) -> bool:
439 return not is_numeric_dtype(self.dtype)
440
441 _engine_types: dict[np.dtype | ExtensionDtype, type[libindex.IndexEngine]] = {
442 np.dtype(np.int8): libindex.Int8Engine,
443 np.dtype(np.int16): libindex.Int16Engine,
444 np.dtype(np.int32): libindex.Int32Engine,
445 np.dtype(np.int64): libindex.Int64Engine,
446 np.dtype(np.uint8): libindex.UInt8Engine,
447 np.dtype(np.uint16): libindex.UInt16Engine,
448 np.dtype(np.uint32): libindex.UInt32Engine,
449 np.dtype(np.uint64): libindex.UInt64Engine,
450 np.dtype(np.float32): libindex.Float32Engine,
451 np.dtype(np.float64): libindex.Float64Engine,
452 np.dtype(np.complex64): libindex.Complex64Engine,
453 np.dtype(np.complex128): libindex.Complex128Engine,
454 }
455
456 @property
457 def _engine_type(
458 self,
459 ) -> type[libindex.IndexEngine | libindex.ExtensionEngine]:
460 return self._engine_types.get(self.dtype, libindex.ObjectEngine)
461
462 # whether we support partial string indexing. Overridden
463 # in DatetimeIndex and PeriodIndex
464 _supports_partial_string_indexing = False
465
466 _accessors = {"str"}
467
468 str = CachedAccessor("str", StringMethods)
469
470 _references = None
471
472 # --------------------------------------------------------------------
473 # Constructors
474
475 def __new__(
476 cls,
477 data=None,
478 dtype=None,
479 copy: bool = False,
480 name=None,
481 tupleize_cols: bool = True,
482 ) -> Self:
483 from pandas.core.indexes.range import RangeIndex
484
485 name = maybe_extract_name(name, data, cls)
486
487 if dtype is not None:
488 dtype = pandas_dtype(dtype)
489
490 data_dtype = getattr(data, "dtype", None)
491
492 refs = None
493 if not copy and isinstance(data, (ABCSeries, Index)):
494 refs = data._references
495
496 is_pandas_object = isinstance(data, (ABCSeries, Index, ExtensionArray))
497
498 # range
499 if isinstance(data, (range, RangeIndex)):
500 result = RangeIndex(start=data, copy=copy, name=name)
501 if dtype is not None:
502 return result.astype(dtype, copy=False)
503 # error: Incompatible return value type (got "MultiIndex",
504 # expected "Self")
505 return result # type: ignore[return-value]
506
507 elif is_ea_or_datetimelike_dtype(dtype):
508 # non-EA dtype indexes have special casting logic, so we punt here
509 pass
510
511 elif is_ea_or_datetimelike_dtype(data_dtype):
512 pass
513
514 elif isinstance(data, (np.ndarray, Index, ABCSeries)):
515 if isinstance(data, ABCMultiIndex):
516 data = data._values
517
518 if data.dtype.kind not in "iufcbmM":
519 # GH#11836 we need to avoid having numpy coerce
520 # things that look like ints/floats to ints unless
521 # they are actually ints, e.g. '0' and 0.0
522 # should not be coerced
523 data = com.asarray_tuplesafe(data, dtype=_dtype_obj)
524
525 elif is_scalar(data):
526 raise cls._raise_scalar_data_error(data)
527 elif hasattr(data, "__array__"):
528 return cls(np.asarray(data), dtype=dtype, copy=copy, name=name)
529 elif not is_list_like(data) and not isinstance(data, memoryview):
530 # 2022-11-16 the memoryview check is only necessary on some CI
531 # builds, not clear why
532 raise cls._raise_scalar_data_error(data)
533
534 else:
535 if tupleize_cols:
536 # GH21470: convert iterable to list before determining if empty
537 if is_iterator(data):
538 data = list(data)
539
540 if data and all(isinstance(e, tuple) for e in data):
541 # we must be all tuples, otherwise don't construct
542 # 10697
543 from pandas.core.indexes.multi import MultiIndex
544
545 # error: Incompatible return value type (got "MultiIndex",
546 # expected "Self")
547 return MultiIndex.from_tuples( # type: ignore[return-value]
548 data, names=name
549 )
550 # other iterable of some kind
551
552 if not isinstance(data, (list, tuple)):
553 # we allow set/frozenset, which Series/sanitize_array does not, so
554 # cast to list here
555 data = list(data)
556 if len(data) == 0:
557 # unlike Series, we default to object dtype:
558 data = np.array(data, dtype=object)
559
560 if len(data) and isinstance(data[0], tuple):
561 # Ensure we get 1-D array of tuples instead of 2D array.
562 data = com.asarray_tuplesafe(data, dtype=_dtype_obj)
563
564 try:
565 arr = sanitize_array(data, None, dtype=dtype, copy=copy)
566 except ValueError as err:
567 if "index must be specified when data is not list-like" in str(err):
568 raise cls._raise_scalar_data_error(data) from err
569 if "Data must be 1-dimensional" in str(err):
570 raise ValueError("Index data must be 1-dimensional") from err
571 raise
572 arr = ensure_wrapped_if_datetimelike(arr)
573
574 klass = cls._dtype_to_subclass(arr.dtype)
575
576 arr = klass._ensure_array(arr, arr.dtype, copy=False)
577 result = klass._simple_new(arr, name, refs=refs)
578 if dtype is None and is_pandas_object and data_dtype == np.object_:
579 if result.dtype != data_dtype:
580 warnings.warn(
581 "Dtype inference on a pandas object "
582 "(Series, Index, ExtensionArray) is deprecated. The Index "
583 "constructor will keep the original dtype in the future. "
584 "Call `infer_objects` on the result to get the old "
585 "behavior.",
586 FutureWarning,
587 stacklevel=2,
588 )
589 return result # type: ignore[return-value]
590
591 @classmethod
592 def _ensure_array(cls, data, dtype, copy: bool):
593 """
594 Ensure we have a valid array to pass to _simple_new.
595 """
596 if data.ndim > 1:
597 # GH#13601, GH#20285, GH#27125
598 raise ValueError("Index data must be 1-dimensional")
599 elif dtype == np.float16:
600 # float16 not supported (no indexing engine)
601 raise NotImplementedError("float16 indexes are not supported")
602
603 if copy:
604 # asarray_tuplesafe does not always copy underlying data,
605 # so need to make sure that this happens
606 data = data.copy()
607 return data
608
609 @final
610 @classmethod
611 def _dtype_to_subclass(cls, dtype: DtypeObj):
612 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
613
614 if isinstance(dtype, ExtensionDtype):
615 return dtype.index_class
616
617 if dtype.kind == "M":
618 from pandas import DatetimeIndex
619
620 return DatetimeIndex
621
622 elif dtype.kind == "m":
623 from pandas import TimedeltaIndex
624
625 return TimedeltaIndex
626
627 elif dtype.kind == "O":
628 # NB: assuming away MultiIndex
629 return Index
630
631 elif issubclass(dtype.type, str) or is_numeric_dtype(dtype):
632 return Index
633
634 raise NotImplementedError(dtype)
635
636 # NOTE for new Index creation:
637
638 # - _simple_new: It returns new Index with the same type as the caller.
639 # All metadata (such as name) must be provided by caller's responsibility.
640 # Using _shallow_copy is recommended because it fills these metadata
641 # otherwise specified.
642
643 # - _shallow_copy: It returns new Index with the same type (using
644 # _simple_new), but fills caller's metadata otherwise specified. Passed
645 # kwargs will overwrite corresponding metadata.
646
647 # See each method's docstring.
648
649 @classmethod
650 def _simple_new(
651 cls, values: ArrayLike, name: Hashable | None = None, refs=None
652 ) -> Self:
653 """
654 We require that we have a dtype compat for the values. If we are passed
655 a non-dtype compat, then coerce using the constructor.
656
657 Must be careful not to recurse.
658 """
659 assert isinstance(values, cls._data_cls), type(values)
660
661 result = object.__new__(cls)
662 result._data = values
663 result._name = name
664 result._cache = {}
665 result._reset_identity()
666 if refs is not None:
667 result._references = refs
668 else:
669 result._references = BlockValuesRefs()
670 result._references.add_index_reference(result)
671
672 return result
673
674 @classmethod
675 def _with_infer(cls, *args, **kwargs):
676 """
677 Constructor that uses the 1.0.x behavior inferring numeric dtypes
678 for ndarray[object] inputs.
679 """
680 result = cls(*args, **kwargs)
681
682 if result.dtype == _dtype_obj and not result._is_multi:
683 # error: Argument 1 to "maybe_convert_objects" has incompatible type
684 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected
685 # "ndarray[Any, Any]"
686 values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type]
687 if values.dtype.kind in "iufb":
688 return Index(values, name=result.name)
689
690 return result
691
692 @cache_readonly
693 def _constructor(self) -> type[Self]:
694 return type(self)
695
696 @final
697 def _maybe_check_unique(self) -> None:
698 """
699 Check that an Index has no duplicates.
700
701 This is typically only called via
702 `NDFrame.flags.allows_duplicate_labels.setter` when it's set to
703 True (duplicates aren't allowed).
704
705 Raises
706 ------
707 DuplicateLabelError
708 When the index is not unique.
709 """
710 if not self.is_unique:
711 msg = """Index has duplicates."""
712 duplicates = self._format_duplicate_message()
713 msg += f"\n{duplicates}"
714
715 raise DuplicateLabelError(msg)
716
717 @final
718 def _format_duplicate_message(self) -> DataFrame:
719 """
720 Construct the DataFrame for a DuplicateLabelError.
721
722 This returns a DataFrame indicating the labels and positions
723 of duplicates in an index. This should only be called when it's
724 already known that duplicates are present.
725
726 Examples
727 --------
728 >>> idx = pd.Index(['a', 'b', 'a'])
729 >>> idx._format_duplicate_message()
730 positions
731 label
732 a [0, 2]
733 """
734 from pandas import Series
735
736 duplicates = self[self.duplicated(keep="first")].unique()
737 assert len(duplicates)
738
739 out = (
740 Series(np.arange(len(self)), copy=False)
741 .groupby(self, observed=False)
742 .agg(list)[duplicates]
743 )
744 if self._is_multi:
745 # test_format_duplicate_labels_message_multi
746 # error: "Type[Index]" has no attribute "from_tuples" [attr-defined]
747 out.index = type(self).from_tuples(out.index) # type: ignore[attr-defined]
748
749 if self.nlevels == 1:
750 out = out.rename_axis("label")
751 return out.to_frame(name="positions")
752
753 # --------------------------------------------------------------------
754 # Index Internals Methods
755
756 def _shallow_copy(self, values, name: Hashable = no_default) -> Self:
757 """
758 Create a new Index with the same class as the caller, don't copy the
759 data, use the same object attributes with passed in attributes taking
760 precedence.
761
762 *this is an internal non-public method*
763
764 Parameters
765 ----------
766 values : the values to create the new Index, optional
767 name : Label, defaults to self.name
768 """
769 name = self._name if name is no_default else name
770
771 return self._simple_new(values, name=name, refs=self._references)
772
773 def _view(self) -> Self:
774 """
775 fastpath to make a shallow copy, i.e. new object with same data.
776 """
777 result = self._simple_new(self._values, name=self._name, refs=self._references)
778
779 result._cache = self._cache
780 return result
781
782 @final
783 def _rename(self, name: Hashable) -> Self:
784 """
785 fastpath for rename if new name is already validated.
786 """
787 result = self._view()
788 result._name = name
789 return result
790
791 @final
792 def is_(self, other) -> bool:
793 """
794 More flexible, faster check like ``is`` but that works through views.
795
796 Note: this is *not* the same as ``Index.identical()``, which checks
797 that metadata is also the same.
798
799 Parameters
800 ----------
801 other : object
802 Other object to compare against.
803
804 Returns
805 -------
806 bool
807 True if both have same underlying data, False otherwise.
808
809 See Also
810 --------
811 Index.identical : Works like ``Index.is_`` but also checks metadata.
812
813 Examples
814 --------
815 >>> idx1 = pd.Index(['1', '2', '3'])
816 >>> idx1.is_(idx1.view())
817 True
818
819 >>> idx1.is_(idx1.copy())
820 False
821 """
822 if self is other:
823 return True
824 elif not hasattr(other, "_id"):
825 return False
826 elif self._id is None or other._id is None:
827 return False
828 else:
829 return self._id is other._id
830
831 @final
832 def _reset_identity(self) -> None:
833 """
834 Initializes or resets ``_id`` attribute with new object.
835 """
836 self._id = object()
837
838 @final
839 def _cleanup(self) -> None:
840 self._engine.clear_mapping()
841
842 @cache_readonly
843 def _engine(
844 self,
845 ) -> libindex.IndexEngine | libindex.ExtensionEngine | libindex.MaskedIndexEngine:
846 # For base class (object dtype) we get ObjectEngine
847 target_values = self._get_engine_target()
848
849 if isinstance(self._values, ArrowExtensionArray) and self.dtype.kind in "Mm":
850 import pyarrow as pa
851
852 pa_type = self._values._pa_array.type
853 if pa.types.is_timestamp(pa_type):
854 target_values = self._values._to_datetimearray()
855 return libindex.DatetimeEngine(target_values._ndarray)
856 elif pa.types.is_duration(pa_type):
857 target_values = self._values._to_timedeltaarray()
858 return libindex.TimedeltaEngine(target_values._ndarray)
859
860 if isinstance(target_values, ExtensionArray):
861 if isinstance(target_values, (BaseMaskedArray, ArrowExtensionArray)):
862 try:
863 return _masked_engines[target_values.dtype.name](target_values)
864 except KeyError:
865 # Not supported yet e.g. decimal
866 pass
867 elif self._engine_type is libindex.ObjectEngine:
868 return libindex.ExtensionEngine(target_values)
869
870 target_values = cast(np.ndarray, target_values)
871 # to avoid a reference cycle, bind `target_values` to a local variable, so
872 # `self` is not passed into the lambda.
873 if target_values.dtype == bool:
874 return libindex.BoolEngine(target_values)
875 elif target_values.dtype == np.complex64:
876 return libindex.Complex64Engine(target_values)
877 elif target_values.dtype == np.complex128:
878 return libindex.Complex128Engine(target_values)
879 elif needs_i8_conversion(self.dtype):
880 # We need to keep M8/m8 dtype when initializing the Engine,
881 # but don't want to change _get_engine_target bc it is used
882 # elsewhere
883 # error: Item "ExtensionArray" of "Union[ExtensionArray,
884 # ndarray[Any, Any]]" has no attribute "_ndarray" [union-attr]
885 target_values = self._data._ndarray # type: ignore[union-attr]
886
887 # error: Argument 1 to "ExtensionEngine" has incompatible type
888 # "ndarray[Any, Any]"; expected "ExtensionArray"
889 return self._engine_type(target_values) # type: ignore[arg-type]
890
891 @final
892 @cache_readonly
893 def _dir_additions_for_owner(self) -> set[str_t]:
894 """
895 Add the string-like labels to the owner dataframe/series dir output.
896
897 If this is a MultiIndex, it's first level values are used.
898 """
899 return {
900 c
901 for c in self.unique(level=0)[: get_option("display.max_dir_items")]
902 if isinstance(c, str) and c.isidentifier()
903 }
904
905 # --------------------------------------------------------------------
906 # Array-Like Methods
907
908 # ndarray compat
909 def __len__(self) -> int:
910 """
911 Return the length of the Index.
912 """
913 return len(self._data)
914
915 def __array__(self, dtype=None, copy=None) -> np.ndarray:
916 """
917 The array interface, return my values.
918 """
919 return np.asarray(self._data, dtype=dtype)
920
921 def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
922 if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs):
923 return NotImplemented
924
925 result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
926 self, ufunc, method, *inputs, **kwargs
927 )
928 if result is not NotImplemented:
929 return result
930
931 if "out" in kwargs:
932 # e.g. test_dti_isub_tdi
933 return arraylike.dispatch_ufunc_with_out(
934 self, ufunc, method, *inputs, **kwargs
935 )
936
937 if method == "reduce":
938 result = arraylike.dispatch_reduction_ufunc(
939 self, ufunc, method, *inputs, **kwargs
940 )
941 if result is not NotImplemented:
942 return result
943
944 new_inputs = [x if x is not self else x._values for x in inputs]
945 result = getattr(ufunc, method)(*new_inputs, **kwargs)
946 if ufunc.nout == 2:
947 # i.e. np.divmod, np.modf, np.frexp
948 return tuple(self.__array_wrap__(x) for x in result)
949 elif method == "reduce":
950 result = lib.item_from_zerodim(result)
951 return result
952
953 if result.dtype == np.float16:
954 result = result.astype(np.float32)
955
956 return self.__array_wrap__(result)
957
958 @final
959 def __array_wrap__(self, result, context=None, return_scalar=False):
960 """
961 Gets called after a ufunc and other functions e.g. np.split.
962 """
963 result = lib.item_from_zerodim(result)
964 if (not isinstance(result, Index) and is_bool_dtype(result.dtype)) or np.ndim(
965 result
966 ) > 1:
967 # exclude Index to avoid warning from is_bool_dtype deprecation;
968 # in the Index case it doesn't matter which path we go down.
969 # reached in plotting tests with e.g. np.nonzero(index)
970 return result
971
972 return Index(result, name=self.name)
973
974 @cache_readonly
975 def dtype(self) -> DtypeObj:
976 """
977 Return the dtype object of the underlying data.
978
979 Examples
980 --------
981 >>> idx = pd.Index([1, 2, 3])
982 >>> idx
983 Index([1, 2, 3], dtype='int64')
984 >>> idx.dtype
985 dtype('int64')
986 """
987 return self._data.dtype
988
989 @final
990 def ravel(self, order: str_t = "C") -> Self:
991 """
992 Return a view on self.
993
994 Returns
995 -------
996 Index
997
998 See Also
999 --------
1000 numpy.ndarray.ravel : Return a flattened array.
1001
1002 Examples
1003 --------
1004 >>> s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])
1005 >>> s.index.ravel()
1006 Index(['a', 'b', 'c'], dtype='object')
1007 """
1008 return self[:]
1009
1010 def view(self, cls=None):
1011 # we need to see if we are subclassing an
1012 # index type here
1013 if cls is not None and not hasattr(cls, "_typ"):
1014 dtype = cls
1015 if isinstance(cls, str):
1016 dtype = pandas_dtype(cls)
1017
1018 if needs_i8_conversion(dtype):
1019 idx_cls = self._dtype_to_subclass(dtype)
1020 arr = self.array.view(dtype)
1021 if isinstance(arr, ExtensionArray):
1022 # here we exclude non-supported dt64/td64 dtypes
1023 return idx_cls._simple_new(
1024 arr, name=self.name, refs=self._references
1025 )
1026 return arr
1027
1028 result = self._data.view(cls)
1029 else:
1030 if cls is not None:
1031 warnings.warn(
1032 # GH#55709
1033 f"Passing a type in {type(self).__name__}.view is deprecated "
1034 "and will raise in a future version. "
1035 "Call view without any argument to retain the old behavior.",
1036 FutureWarning,
1037 stacklevel=find_stack_level(),
1038 )
1039
1040 result = self._view()
1041 if isinstance(result, Index):
1042 result._id = self._id
1043 return result
1044
1045 def astype(self, dtype, copy: bool = True):
1046 """
1047 Create an Index with values cast to dtypes.
1048
1049 The class of a new Index is determined by dtype. When conversion is
1050 impossible, a TypeError exception is raised.
1051
1052 Parameters
1053 ----------
1054 dtype : numpy dtype or pandas type
1055 Note that any signed integer `dtype` is treated as ``'int64'``,
1056 and any unsigned integer `dtype` is treated as ``'uint64'``,
1057 regardless of the size.
1058 copy : bool, default True
1059 By default, astype always returns a newly allocated object.
1060 If copy is set to False and internal requirements on dtype are
1061 satisfied, the original data is used to create a new Index
1062 or the original Index is returned.
1063
1064 Returns
1065 -------
1066 Index
1067 Index with values cast to specified dtype.
1068
1069 Examples
1070 --------
1071 >>> idx = pd.Index([1, 2, 3])
1072 >>> idx
1073 Index([1, 2, 3], dtype='int64')
1074 >>> idx.astype('float')
1075 Index([1.0, 2.0, 3.0], dtype='float64')
1076 """
1077 if dtype is not None:
1078 dtype = pandas_dtype(dtype)
1079
1080 if self.dtype == dtype:
1081 # Ensure that self.astype(self.dtype) is self
1082 return self.copy() if copy else self
1083
1084 values = self._data
1085 if isinstance(values, ExtensionArray):
1086 with rewrite_exception(type(values).__name__, type(self).__name__):
1087 new_values = values.astype(dtype, copy=copy)
1088
1089 elif isinstance(dtype, ExtensionDtype):
1090 cls = dtype.construct_array_type()
1091 # Note: for RangeIndex and CategoricalDtype self vs self._values
1092 # behaves differently here.
1093 new_values = cls._from_sequence(self, dtype=dtype, copy=copy)
1094
1095 else:
1096 # GH#13149 specifically use astype_array instead of astype
1097 new_values = astype_array(values, dtype=dtype, copy=copy)
1098
1099 # pass copy=False because any copying will be done in the astype above
1100 result = Index(new_values, name=self.name, dtype=new_values.dtype, copy=False)
1101 if (
1102 not copy
1103 and self._references is not None
1104 and astype_is_view(self.dtype, dtype)
1105 ):
1106 result._references = self._references
1107 result._references.add_index_reference(result)
1108 return result
1109
1110 _index_shared_docs[
1111 "take"
1112 ] = """
1113 Return a new %(klass)s of the values selected by the indices.
1114
1115 For internal compatibility with numpy arrays.
1116
1117 Parameters
1118 ----------
1119 indices : array-like
1120 Indices to be taken.
1121 axis : int, optional
1122 The axis over which to select values, always 0.
1123 allow_fill : bool, default True
1124 fill_value : scalar, default None
1125 If allow_fill=True and fill_value is not None, indices specified by
1126 -1 are regarded as NA. If Index doesn't hold NA, raise ValueError.
1127
1128 Returns
1129 -------
1130 Index
1131 An index formed of elements at the given indices. Will be the same
1132 type as self, except for RangeIndex.
1133
1134 See Also
1135 --------
1136 numpy.ndarray.take: Return an array formed from the
1137 elements of a at the given indices.
1138
1139 Examples
1140 --------
1141 >>> idx = pd.Index(['a', 'b', 'c'])
1142 >>> idx.take([2, 2, 1, 2])
1143 Index(['c', 'c', 'b', 'c'], dtype='object')
1144 """
1145
1146 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
1147 def take(
1148 self,
1149 indices,
1150 axis: Axis = 0,
1151 allow_fill: bool = True,
1152 fill_value=None,
1153 **kwargs,
1154 ) -> Self:
1155 if kwargs:
1156 nv.validate_take((), kwargs)
1157 if is_scalar(indices):
1158 raise TypeError("Expected indices to be array-like")
1159 indices = ensure_platform_int(indices)
1160 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices)
1161
1162 # Note: we discard fill_value and use self._na_value, only relevant
1163 # in the case where allow_fill is True and fill_value is not None
1164 values = self._values
1165 if isinstance(values, np.ndarray):
1166 taken = algos.take(
1167 values, indices, allow_fill=allow_fill, fill_value=self._na_value
1168 )
1169 else:
1170 # algos.take passes 'axis' keyword which not all EAs accept
1171 taken = values.take(
1172 indices, allow_fill=allow_fill, fill_value=self._na_value
1173 )
1174 return self._constructor._simple_new(taken, name=self.name)
1175
1176 @final
1177 def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool:
1178 """
1179 We only use pandas-style take when allow_fill is True _and_
1180 fill_value is not None.
1181 """
1182 if allow_fill and fill_value is not None:
1183 # only fill if we are passing a non-None fill_value
1184 if self._can_hold_na:
1185 if (indices < -1).any():
1186 raise ValueError(
1187 "When allow_fill=True and fill_value is not None, "
1188 "all indices must be >= -1"
1189 )
1190 else:
1191 cls_name = type(self).__name__
1192 raise ValueError(
1193 f"Unable to fill values because {cls_name} cannot contain NA"
1194 )
1195 else:
1196 allow_fill = False
1197 return allow_fill
1198
1199 _index_shared_docs[
1200 "repeat"
1201 ] = """
1202 Repeat elements of a %(klass)s.
1203
1204 Returns a new %(klass)s where each element of the current %(klass)s
1205 is repeated consecutively a given number of times.
1206
1207 Parameters
1208 ----------
1209 repeats : int or array of ints
1210 The number of repetitions for each element. This should be a
1211 non-negative integer. Repeating 0 times will return an empty
1212 %(klass)s.
1213 axis : None
1214 Must be ``None``. Has no effect but is accepted for compatibility
1215 with numpy.
1216
1217 Returns
1218 -------
1219 %(klass)s
1220 Newly created %(klass)s with repeated elements.
1221
1222 See Also
1223 --------
1224 Series.repeat : Equivalent function for Series.
1225 numpy.repeat : Similar method for :class:`numpy.ndarray`.
1226
1227 Examples
1228 --------
1229 >>> idx = pd.Index(['a', 'b', 'c'])
1230 >>> idx
1231 Index(['a', 'b', 'c'], dtype='object')
1232 >>> idx.repeat(2)
1233 Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')
1234 >>> idx.repeat([1, 2, 3])
1235 Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')
1236 """
1237
1238 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)
1239 def repeat(self, repeats, axis: None = None) -> Self:
1240 repeats = ensure_platform_int(repeats)
1241 nv.validate_repeat((), {"axis": axis})
1242 res_values = self._values.repeat(repeats)
1243
1244 # _constructor so RangeIndex-> Index with an int64 dtype
1245 return self._constructor._simple_new(res_values, name=self.name)
1246
1247 # --------------------------------------------------------------------
1248 # Copying Methods
1249
1250 def copy(
1251 self,
1252 name: Hashable | None = None,
1253 deep: bool = False,
1254 ) -> Self:
1255 """
1256 Make a copy of this object.
1257
1258 Name is set on the new object.
1259
1260 Parameters
1261 ----------
1262 name : Label, optional
1263 Set name for new object.
1264 deep : bool, default False
1265
1266 Returns
1267 -------
1268 Index
1269 Index refer to new object which is a copy of this object.
1270
1271 Notes
1272 -----
1273 In most cases, there should be no functional difference from using
1274 ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
1275
1276 Examples
1277 --------
1278 >>> idx = pd.Index(['a', 'b', 'c'])
1279 >>> new_idx = idx.copy()
1280 >>> idx is new_idx
1281 False
1282 """
1283
1284 name = self._validate_names(name=name, deep=deep)[0]
1285 if deep:
1286 new_data = self._data.copy()
1287 new_index = type(self)._simple_new(new_data, name=name)
1288 else:
1289 new_index = self._rename(name=name)
1290 return new_index
1291
1292 @final
1293 def __copy__(self, **kwargs) -> Self:
1294 return self.copy(**kwargs)
1295
1296 @final
1297 def __deepcopy__(self, memo=None) -> Self:
1298 """
1299 Parameters
1300 ----------
1301 memo, default None
1302 Standard signature. Unused
1303 """
1304 return self.copy(deep=True)
1305
1306 # --------------------------------------------------------------------
1307 # Rendering Methods
1308
1309 @final
1310 def __repr__(self) -> str_t:
1311 """
1312 Return a string representation for this object.
1313 """
1314 klass_name = type(self).__name__
1315 data = self._format_data()
1316 attrs = self._format_attrs()
1317 attrs_str = [f"{k}={v}" for k, v in attrs]
1318 prepr = ", ".join(attrs_str)
1319
1320 return f"{klass_name}({data}{prepr})"
1321
1322 @property
1323 def _formatter_func(self):
1324 """
1325 Return the formatter function.
1326 """
1327 return default_pprint
1328
1329 @final
1330 def _format_data(self, name=None) -> str_t:
1331 """
1332 Return the formatted data as a unicode string.
1333 """
1334 # do we want to justify (only do so for non-objects)
1335 is_justify = True
1336
1337 if self.inferred_type == "string":
1338 is_justify = False
1339 elif isinstance(self.dtype, CategoricalDtype):
1340 self = cast("CategoricalIndex", self)
1341 if is_object_dtype(self.categories.dtype):
1342 is_justify = False
1343 elif isinstance(self, ABCRangeIndex):
1344 # We will do the relevant formatting via attrs
1345 return ""
1346
1347 return format_object_summary(
1348 self,
1349 self._formatter_func,
1350 is_justify=is_justify,
1351 name=name,
1352 line_break_each_value=self._is_multi,
1353 )
1354
1355 def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]:
1356 """
1357 Return a list of tuples of the (attr,formatted_value).
1358 """
1359 attrs: list[tuple[str_t, str_t | int | bool | None]] = []
1360
1361 if not self._is_multi:
1362 attrs.append(("dtype", f"'{self.dtype}'"))
1363
1364 if self.name is not None:
1365 attrs.append(("name", default_pprint(self.name)))
1366 elif self._is_multi and any(x is not None for x in self.names):
1367 attrs.append(("names", default_pprint(self.names)))
1368
1369 max_seq_items = get_option("display.max_seq_items") or len(self)
1370 if len(self) > max_seq_items:
1371 attrs.append(("length", len(self)))
1372 return attrs
1373
1374 @final
1375 def _get_level_names(self) -> Hashable | Sequence[Hashable]:
1376 """
1377 Return a name or list of names with None replaced by the level number.
1378 """
1379 if self._is_multi:
1380 return [
1381 level if name is None else name for level, name in enumerate(self.names)
1382 ]
1383 else:
1384 return 0 if self.name is None else self.name
1385
1386 @final
1387 def _mpl_repr(self) -> np.ndarray:
1388 # how to represent ourselves to matplotlib
1389 if isinstance(self.dtype, np.dtype) and self.dtype.kind != "M":
1390 return cast(np.ndarray, self.values)
1391 return self.astype(object, copy=False)._values
1392
1393 def format(
1394 self,
1395 name: bool = False,
1396 formatter: Callable | None = None,
1397 na_rep: str_t = "NaN",
1398 ) -> list[str_t]:
1399 """
1400 Render a string representation of the Index.
1401 """
1402 warnings.warn(
1403 # GH#55413
1404 f"{type(self).__name__}.format is deprecated and will be removed "
1405 "in a future version. Convert using index.astype(str) or "
1406 "index.map(formatter) instead.",
1407 FutureWarning,
1408 stacklevel=find_stack_level(),
1409 )
1410 header = []
1411 if name:
1412 header.append(
1413 pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
1414 if self.name is not None
1415 else ""
1416 )
1417
1418 if formatter is not None:
1419 return header + list(self.map(formatter))
1420
1421 return self._format_with_header(header=header, na_rep=na_rep)
1422
1423 _default_na_rep = "NaN"
1424
1425 @final
1426 def _format_flat(
1427 self,
1428 *,
1429 include_name: bool,
1430 formatter: Callable | None = None,
1431 ) -> list[str_t]:
1432 """
1433 Render a string representation of the Index.
1434 """
1435 header = []
1436 if include_name:
1437 header.append(
1438 pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
1439 if self.name is not None
1440 else ""
1441 )
1442
1443 if formatter is not None:
1444 return header + list(self.map(formatter))
1445
1446 return self._format_with_header(header=header, na_rep=self._default_na_rep)
1447
1448 def _format_with_header(self, *, header: list[str_t], na_rep: str_t) -> list[str_t]:
1449 from pandas.io.formats.format import format_array
1450
1451 values = self._values
1452
1453 if (
1454 is_object_dtype(values.dtype)
1455 or is_string_dtype(values.dtype)
1456 or isinstance(self.dtype, (IntervalDtype, CategoricalDtype))
1457 ):
1458 # TODO: why do we need different justify for these cases?
1459 justify = "all"
1460 else:
1461 justify = "left"
1462 # passing leading_space=False breaks test_format_missing,
1463 # test_index_repr_in_frame_with_nan, but would otherwise make
1464 # trim_front unnecessary
1465 formatted = format_array(values, None, justify=justify)
1466 result = trim_front(formatted)
1467 return header + result
1468
1469 def _get_values_for_csv(
1470 self,
1471 *,
1472 na_rep: str_t = "",
1473 decimal: str_t = ".",
1474 float_format=None,
1475 date_format=None,
1476 quoting=None,
1477 ) -> npt.NDArray[np.object_]:
1478 return get_values_for_csv(
1479 self._values,
1480 na_rep=na_rep,
1481 decimal=decimal,
1482 float_format=float_format,
1483 date_format=date_format,
1484 quoting=quoting,
1485 )
1486
1487 def _summary(self, name=None) -> str_t:
1488 """
1489 Return a summarized representation.
1490
1491 Parameters
1492 ----------
1493 name : str
1494 name to use in the summary representation
1495
1496 Returns
1497 -------
1498 String with a summarized representation of the index
1499 """
1500 if len(self) > 0:
1501 head = self[0]
1502 if hasattr(head, "format") and not isinstance(head, str):
1503 head = head.format()
1504 elif needs_i8_conversion(self.dtype):
1505 # e.g. Timedelta, display as values, not quoted
1506 head = self._formatter_func(head).replace("'", "")
1507 tail = self[-1]
1508 if hasattr(tail, "format") and not isinstance(tail, str):
1509 tail = tail.format()
1510 elif needs_i8_conversion(self.dtype):
1511 # e.g. Timedelta, display as values, not quoted
1512 tail = self._formatter_func(tail).replace("'", "")
1513
1514 index_summary = f", {head} to {tail}"
1515 else:
1516 index_summary = ""
1517
1518 if name is None:
1519 name = type(self).__name__
1520 return f"{name}: {len(self)} entries{index_summary}"
1521
1522 # --------------------------------------------------------------------
1523 # Conversion Methods
1524
1525 def to_flat_index(self) -> Self:
1526 """
1527 Identity method.
1528
1529 This is implemented for compatibility with subclass implementations
1530 when chaining.
1531
1532 Returns
1533 -------
1534 pd.Index
1535 Caller.
1536
1537 See Also
1538 --------
1539 MultiIndex.to_flat_index : Subclass implementation.
1540 """
1541 return self
1542
1543 @final
1544 def to_series(self, index=None, name: Hashable | None = None) -> Series:
1545 """
1546 Create a Series with both index and values equal to the index keys.
1547
1548 Useful with map for returning an indexer based on an index.
1549
1550 Parameters
1551 ----------
1552 index : Index, optional
1553 Index of resulting Series. If None, defaults to original index.
1554 name : str, optional
1555 Name of resulting Series. If None, defaults to name of original
1556 index.
1557
1558 Returns
1559 -------
1560 Series
1561 The dtype will be based on the type of the Index values.
1562
1563 See Also
1564 --------
1565 Index.to_frame : Convert an Index to a DataFrame.
1566 Series.to_frame : Convert Series to DataFrame.
1567
1568 Examples
1569 --------
1570 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')
1571
1572 By default, the original index and original name is reused.
1573
1574 >>> idx.to_series()
1575 animal
1576 Ant Ant
1577 Bear Bear
1578 Cow Cow
1579 Name: animal, dtype: object
1580
1581 To enforce a new index, specify new labels to ``index``:
1582
1583 >>> idx.to_series(index=[0, 1, 2])
1584 0 Ant
1585 1 Bear
1586 2 Cow
1587 Name: animal, dtype: object
1588
1589 To override the name of the resulting column, specify ``name``:
1590
1591 >>> idx.to_series(name='zoo')
1592 animal
1593 Ant Ant
1594 Bear Bear
1595 Cow Cow
1596 Name: zoo, dtype: object
1597 """
1598 from pandas import Series
1599
1600 if index is None:
1601 index = self._view()
1602 if name is None:
1603 name = self.name
1604
1605 return Series(self._values.copy(), index=index, name=name)
1606
1607 def to_frame(
1608 self, index: bool = True, name: Hashable = lib.no_default
1609 ) -> DataFrame:
1610 """
1611 Create a DataFrame with a column containing the Index.
1612
1613 Parameters
1614 ----------
1615 index : bool, default True
1616 Set the index of the returned DataFrame as the original Index.
1617
1618 name : object, defaults to index.name
1619 The passed name should substitute for the index name (if it has
1620 one).
1621
1622 Returns
1623 -------
1624 DataFrame
1625 DataFrame containing the original Index data.
1626
1627 See Also
1628 --------
1629 Index.to_series : Convert an Index to a Series.
1630 Series.to_frame : Convert Series to DataFrame.
1631
1632 Examples
1633 --------
1634 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')
1635 >>> idx.to_frame()
1636 animal
1637 animal
1638 Ant Ant
1639 Bear Bear
1640 Cow Cow
1641
1642 By default, the original Index is reused. To enforce a new Index:
1643
1644 >>> idx.to_frame(index=False)
1645 animal
1646 0 Ant
1647 1 Bear
1648 2 Cow
1649
1650 To override the name of the resulting column, specify `name`:
1651
1652 >>> idx.to_frame(index=False, name='zoo')
1653 zoo
1654 0 Ant
1655 1 Bear
1656 2 Cow
1657 """
1658 from pandas import DataFrame
1659
1660 if name is lib.no_default:
1661 name = self._get_level_names()
1662 result = DataFrame({name: self}, copy=not using_copy_on_write())
1663
1664 if index:
1665 result.index = self
1666 return result
1667
1668 # --------------------------------------------------------------------
1669 # Name-Centric Methods
1670
1671 @property
1672 def name(self) -> Hashable:
1673 """
1674 Return Index or MultiIndex name.
1675
1676 Examples
1677 --------
1678 >>> idx = pd.Index([1, 2, 3], name='x')
1679 >>> idx
1680 Index([1, 2, 3], dtype='int64', name='x')
1681 >>> idx.name
1682 'x'
1683 """
1684 return self._name
1685
1686 @name.setter
1687 def name(self, value: Hashable) -> None:
1688 if self._no_setting_name:
1689 # Used in MultiIndex.levels to avoid silently ignoring name updates.
1690 raise RuntimeError(
1691 "Cannot set name on a level of a MultiIndex. Use "
1692 "'MultiIndex.set_names' instead."
1693 )
1694 maybe_extract_name(value, None, type(self))
1695 self._name = value
1696
1697 @final
1698 def _validate_names(
1699 self, name=None, names=None, deep: bool = False
1700 ) -> list[Hashable]:
1701 """
1702 Handles the quirks of having a singular 'name' parameter for general
1703 Index and plural 'names' parameter for MultiIndex.
1704 """
1705 from copy import deepcopy
1706
1707 if names is not None and name is not None:
1708 raise TypeError("Can only provide one of `names` and `name`")
1709 if names is None and name is None:
1710 new_names = deepcopy(self.names) if deep else self.names
1711 elif names is not None:
1712 if not is_list_like(names):
1713 raise TypeError("Must pass list-like as `names`.")
1714 new_names = names
1715 elif not is_list_like(name):
1716 new_names = [name]
1717 else:
1718 new_names = name
1719
1720 if len(new_names) != len(self.names):
1721 raise ValueError(
1722 f"Length of new names must be {len(self.names)}, got {len(new_names)}"
1723 )
1724
1725 # All items in 'new_names' need to be hashable
1726 validate_all_hashable(*new_names, error_name=f"{type(self).__name__}.name")
1727
1728 return new_names
1729
1730 def _get_default_index_names(
1731 self, names: Hashable | Sequence[Hashable] | None = None, default=None
1732 ) -> list[Hashable]:
1733 """
1734 Get names of index.
1735
1736 Parameters
1737 ----------
1738 names : int, str or 1-dimensional list, default None
1739 Index names to set.
1740 default : str
1741 Default name of index.
1742
1743 Raises
1744 ------
1745 TypeError
1746 if names not str or list-like
1747 """
1748 from pandas.core.indexes.multi import MultiIndex
1749
1750 if names is not None:
1751 if isinstance(names, (int, str)):
1752 names = [names]
1753
1754 if not isinstance(names, list) and names is not None:
1755 raise ValueError("Index names must be str or 1-dimensional list")
1756
1757 if not names:
1758 if isinstance(self, MultiIndex):
1759 names = com.fill_missing_names(self.names)
1760 else:
1761 names = [default] if self.name is None else [self.name]
1762
1763 return names
1764
1765 def _get_names(self) -> FrozenList:
1766 return FrozenList((self.name,))
1767
1768 def _set_names(self, values, *, level=None) -> None:
1769 """
1770 Set new names on index. Each name has to be a hashable type.
1771
1772 Parameters
1773 ----------
1774 values : str or sequence
1775 name(s) to set
1776 level : int, level name, or sequence of int/level names (default None)
1777 If the index is a MultiIndex (hierarchical), level(s) to set (None
1778 for all levels). Otherwise level must be None
1779
1780 Raises
1781 ------
1782 TypeError if each name is not hashable.
1783 """
1784 if not is_list_like(values):
1785 raise ValueError("Names must be a list-like")
1786 if len(values) != 1:
1787 raise ValueError(f"Length of new names must be 1, got {len(values)}")
1788
1789 # GH 20527
1790 # All items in 'name' need to be hashable:
1791 validate_all_hashable(*values, error_name=f"{type(self).__name__}.name")
1792
1793 self._name = values[0]
1794
1795 names = property(fset=_set_names, fget=_get_names)
1796
1797 @overload
1798 def set_names(self, names, *, level=..., inplace: Literal[False] = ...) -> Self:
1799 ...
1800
1801 @overload
1802 def set_names(self, names, *, level=..., inplace: Literal[True]) -> None:
1803 ...
1804
1805 @overload
1806 def set_names(self, names, *, level=..., inplace: bool = ...) -> Self | None:
1807 ...
1808
1809 def set_names(self, names, *, level=None, inplace: bool = False) -> Self | None:
1810 """
1811 Set Index or MultiIndex name.
1812
1813 Able to set new names partially and by level.
1814
1815 Parameters
1816 ----------
1817
1818 names : label or list of label or dict-like for MultiIndex
1819 Name(s) to set.
1820
1821 .. versionchanged:: 1.3.0
1822
1823 level : int, label or list of int or label, optional
1824 If the index is a MultiIndex and names is not dict-like, level(s) to set
1825 (None for all levels). Otherwise level must be None.
1826
1827 .. versionchanged:: 1.3.0
1828
1829 inplace : bool, default False
1830 Modifies the object directly, instead of creating a new Index or
1831 MultiIndex.
1832
1833 Returns
1834 -------
1835 Index or None
1836 The same type as the caller or None if ``inplace=True``.
1837
1838 See Also
1839 --------
1840 Index.rename : Able to set new names without level.
1841
1842 Examples
1843 --------
1844 >>> idx = pd.Index([1, 2, 3, 4])
1845 >>> idx
1846 Index([1, 2, 3, 4], dtype='int64')
1847 >>> idx.set_names('quarter')
1848 Index([1, 2, 3, 4], dtype='int64', name='quarter')
1849
1850 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
1851 ... [2018, 2019]])
1852 >>> idx
1853 MultiIndex([('python', 2018),
1854 ('python', 2019),
1855 ( 'cobra', 2018),
1856 ( 'cobra', 2019)],
1857 )
1858 >>> idx = idx.set_names(['kind', 'year'])
1859 >>> idx.set_names('species', level=0)
1860 MultiIndex([('python', 2018),
1861 ('python', 2019),
1862 ( 'cobra', 2018),
1863 ( 'cobra', 2019)],
1864 names=['species', 'year'])
1865
1866 When renaming levels with a dict, levels can not be passed.
1867
1868 >>> idx.set_names({'kind': 'snake'})
1869 MultiIndex([('python', 2018),
1870 ('python', 2019),
1871 ( 'cobra', 2018),
1872 ( 'cobra', 2019)],
1873 names=['snake', 'year'])
1874 """
1875 if level is not None and not isinstance(self, ABCMultiIndex):
1876 raise ValueError("Level must be None for non-MultiIndex")
1877
1878 if level is not None and not is_list_like(level) and is_list_like(names):
1879 raise TypeError("Names must be a string when a single level is provided.")
1880
1881 if not is_list_like(names) and level is None and self.nlevels > 1:
1882 raise TypeError("Must pass list-like as `names`.")
1883
1884 if is_dict_like(names) and not isinstance(self, ABCMultiIndex):
1885 raise TypeError("Can only pass dict-like as `names` for MultiIndex.")
1886
1887 if is_dict_like(names) and level is not None:
1888 raise TypeError("Can not pass level for dictlike `names`.")
1889
1890 if isinstance(self, ABCMultiIndex) and is_dict_like(names) and level is None:
1891 # Transform dict to list of new names and corresponding levels
1892 level, names_adjusted = [], []
1893 for i, name in enumerate(self.names):
1894 if name in names.keys():
1895 level.append(i)
1896 names_adjusted.append(names[name])
1897 names = names_adjusted
1898
1899 if not is_list_like(names):
1900 names = [names]
1901 if level is not None and not is_list_like(level):
1902 level = [level]
1903
1904 if inplace:
1905 idx = self
1906 else:
1907 idx = self._view()
1908
1909 idx._set_names(names, level=level)
1910 if not inplace:
1911 return idx
1912 return None
1913
1914 @overload
1915 def rename(self, name, *, inplace: Literal[False] = ...) -> Self:
1916 ...
1917
1918 @overload
1919 def rename(self, name, *, inplace: Literal[True]) -> None:
1920 ...
1921
1922 @deprecate_nonkeyword_arguments(
1923 version="3.0", allowed_args=["self", "name"], name="rename"
1924 )
1925 def rename(self, name, inplace: bool = False) -> Self | None:
1926 """
1927 Alter Index or MultiIndex name.
1928
1929 Able to set new names without level. Defaults to returning new index.
1930 Length of names must match number of levels in MultiIndex.
1931
1932 Parameters
1933 ----------
1934 name : label or list of labels
1935 Name(s) to set.
1936 inplace : bool, default False
1937 Modifies the object directly, instead of creating a new Index or
1938 MultiIndex.
1939
1940 Returns
1941 -------
1942 Index or None
1943 The same type as the caller or None if ``inplace=True``.
1944
1945 See Also
1946 --------
1947 Index.set_names : Able to set new names partially and by level.
1948
1949 Examples
1950 --------
1951 >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score')
1952 >>> idx.rename('grade')
1953 Index(['A', 'C', 'A', 'B'], dtype='object', name='grade')
1954
1955 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
1956 ... [2018, 2019]],
1957 ... names=['kind', 'year'])
1958 >>> idx
1959 MultiIndex([('python', 2018),
1960 ('python', 2019),
1961 ( 'cobra', 2018),
1962 ( 'cobra', 2019)],
1963 names=['kind', 'year'])
1964 >>> idx.rename(['species', 'year'])
1965 MultiIndex([('python', 2018),
1966 ('python', 2019),
1967 ( 'cobra', 2018),
1968 ( 'cobra', 2019)],
1969 names=['species', 'year'])
1970 >>> idx.rename('species')
1971 Traceback (most recent call last):
1972 TypeError: Must pass list-like as `names`.
1973 """
1974 return self.set_names([name], inplace=inplace)
1975
1976 # --------------------------------------------------------------------
1977 # Level-Centric Methods
1978
1979 @property
1980 def nlevels(self) -> int:
1981 """
1982 Number of levels.
1983 """
1984 return 1
1985
1986 def _sort_levels_monotonic(self) -> Self:
1987 """
1988 Compat with MultiIndex.
1989 """
1990 return self
1991
1992 @final
1993 def _validate_index_level(self, level) -> None:
1994 """
1995 Validate index level.
1996
1997 For single-level Index getting level number is a no-op, but some
1998 verification must be done like in MultiIndex.
1999
2000 """
2001 if isinstance(level, int):
2002 if level < 0 and level != -1:
2003 raise IndexError(
2004 "Too many levels: Index has only 1 level, "
2005 f"{level} is not a valid level number"
2006 )
2007 if level > 0:
2008 raise IndexError(
2009 f"Too many levels: Index has only 1 level, not {level + 1}"
2010 )
2011 elif level != self.name:
2012 raise KeyError(
2013 f"Requested level ({level}) does not match index name ({self.name})"
2014 )
2015
2016 def _get_level_number(self, level) -> int:
2017 self._validate_index_level(level)
2018 return 0
2019
2020 def sortlevel(
2021 self,
2022 level=None,
2023 ascending: bool | list[bool] = True,
2024 sort_remaining=None,
2025 na_position: NaPosition = "first",
2026 ):
2027 """
2028 For internal compatibility with the Index API.
2029
2030 Sort the Index. This is for compat with MultiIndex
2031
2032 Parameters
2033 ----------
2034 ascending : bool, default True
2035 False to sort in descending order
2036 na_position : {'first' or 'last'}, default 'first'
2037 Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
2038 the end.
2039
2040 .. versionadded:: 2.1.0
2041
2042 level, sort_remaining are compat parameters
2043
2044 Returns
2045 -------
2046 Index
2047 """
2048 if not isinstance(ascending, (list, bool)):
2049 raise TypeError(
2050 "ascending must be a single bool value or"
2051 "a list of bool values of length 1"
2052 )
2053
2054 if isinstance(ascending, list):
2055 if len(ascending) != 1:
2056 raise TypeError("ascending must be a list of bool values of length 1")
2057 ascending = ascending[0]
2058
2059 if not isinstance(ascending, bool):
2060 raise TypeError("ascending must be a bool value")
2061
2062 return self.sort_values(
2063 return_indexer=True, ascending=ascending, na_position=na_position
2064 )
2065
2066 def _get_level_values(self, level) -> Index:
2067 """
2068 Return an Index of values for requested level.
2069
2070 This is primarily useful to get an individual level of values from a
2071 MultiIndex, but is provided on Index as well for compatibility.
2072
2073 Parameters
2074 ----------
2075 level : int or str
2076 It is either the integer position or the name of the level.
2077
2078 Returns
2079 -------
2080 Index
2081 Calling object, as there is only one level in the Index.
2082
2083 See Also
2084 --------
2085 MultiIndex.get_level_values : Get values for a level of a MultiIndex.
2086
2087 Notes
2088 -----
2089 For Index, level should be 0, since there are no multiple levels.
2090
2091 Examples
2092 --------
2093 >>> idx = pd.Index(list('abc'))
2094 >>> idx
2095 Index(['a', 'b', 'c'], dtype='object')
2096
2097 Get level values by supplying `level` as integer:
2098
2099 >>> idx.get_level_values(0)
2100 Index(['a', 'b', 'c'], dtype='object')
2101 """
2102 self._validate_index_level(level)
2103 return self
2104
2105 get_level_values = _get_level_values
2106
2107 @final
2108 def droplevel(self, level: IndexLabel = 0):
2109 """
2110 Return index with requested level(s) removed.
2111
2112 If resulting index has only 1 level left, the result will be
2113 of Index type, not MultiIndex. The original index is not modified inplace.
2114
2115 Parameters
2116 ----------
2117 level : int, str, or list-like, default 0
2118 If a string is given, must be the name of a level
2119 If list-like, elements must be names or indexes of levels.
2120
2121 Returns
2122 -------
2123 Index or MultiIndex
2124
2125 Examples
2126 --------
2127 >>> mi = pd.MultiIndex.from_arrays(
2128 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'])
2129 >>> mi
2130 MultiIndex([(1, 3, 5),
2131 (2, 4, 6)],
2132 names=['x', 'y', 'z'])
2133
2134 >>> mi.droplevel()
2135 MultiIndex([(3, 5),
2136 (4, 6)],
2137 names=['y', 'z'])
2138
2139 >>> mi.droplevel(2)
2140 MultiIndex([(1, 3),
2141 (2, 4)],
2142 names=['x', 'y'])
2143
2144 >>> mi.droplevel('z')
2145 MultiIndex([(1, 3),
2146 (2, 4)],
2147 names=['x', 'y'])
2148
2149 >>> mi.droplevel(['x', 'y'])
2150 Index([5, 6], dtype='int64', name='z')
2151 """
2152 if not isinstance(level, (tuple, list)):
2153 level = [level]
2154
2155 levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]
2156
2157 return self._drop_level_numbers(levnums)
2158
2159 @final
2160 def _drop_level_numbers(self, levnums: list[int]):
2161 """
2162 Drop MultiIndex levels by level _number_, not name.
2163 """
2164
2165 if not levnums and not isinstance(self, ABCMultiIndex):
2166 return self
2167 if len(levnums) >= self.nlevels:
2168 raise ValueError(
2169 f"Cannot remove {len(levnums)} levels from an index with "
2170 f"{self.nlevels} levels: at least one level must be left."
2171 )
2172 # The two checks above guarantee that here self is a MultiIndex
2173 self = cast("MultiIndex", self)
2174
2175 new_levels = list(self.levels)
2176 new_codes = list(self.codes)
2177 new_names = list(self.names)
2178
2179 for i in levnums:
2180 new_levels.pop(i)
2181 new_codes.pop(i)
2182 new_names.pop(i)
2183
2184 if len(new_levels) == 1:
2185 lev = new_levels[0]
2186
2187 if len(lev) == 0:
2188 # If lev is empty, lev.take will fail GH#42055
2189 if len(new_codes[0]) == 0:
2190 # GH#45230 preserve RangeIndex here
2191 # see test_reset_index_empty_rangeindex
2192 result = lev[:0]
2193 else:
2194 res_values = algos.take(lev._values, new_codes[0], allow_fill=True)
2195 # _constructor instead of type(lev) for RangeIndex compat GH#35230
2196 result = lev._constructor._simple_new(res_values, name=new_names[0])
2197 else:
2198 # set nan if needed
2199 mask = new_codes[0] == -1
2200 result = new_levels[0].take(new_codes[0])
2201 if mask.any():
2202 result = result.putmask(mask, np.nan)
2203
2204 result._name = new_names[0]
2205
2206 return result
2207 else:
2208 from pandas.core.indexes.multi import MultiIndex
2209
2210 return MultiIndex(
2211 levels=new_levels,
2212 codes=new_codes,
2213 names=new_names,
2214 verify_integrity=False,
2215 )
2216
2217 # --------------------------------------------------------------------
2218 # Introspection Methods
2219
2220 @cache_readonly
2221 @final
2222 def _can_hold_na(self) -> bool:
2223 if isinstance(self.dtype, ExtensionDtype):
2224 return self.dtype._can_hold_na
2225 if self.dtype.kind in "iub":
2226 return False
2227 return True
2228
2229 @property
2230 def is_monotonic_increasing(self) -> bool:
2231 """
2232 Return a boolean if the values are equal or increasing.
2233
2234 Returns
2235 -------
2236 bool
2237
2238 See Also
2239 --------
2240 Index.is_monotonic_decreasing : Check if the values are equal or decreasing.
2241
2242 Examples
2243 --------
2244 >>> pd.Index([1, 2, 3]).is_monotonic_increasing
2245 True
2246 >>> pd.Index([1, 2, 2]).is_monotonic_increasing
2247 True
2248 >>> pd.Index([1, 3, 2]).is_monotonic_increasing
2249 False
2250 """
2251 return self._engine.is_monotonic_increasing
2252
2253 @property
2254 def is_monotonic_decreasing(self) -> bool:
2255 """
2256 Return a boolean if the values are equal or decreasing.
2257
2258 Returns
2259 -------
2260 bool
2261
2262 See Also
2263 --------
2264 Index.is_monotonic_increasing : Check if the values are equal or increasing.
2265
2266 Examples
2267 --------
2268 >>> pd.Index([3, 2, 1]).is_monotonic_decreasing
2269 True
2270 >>> pd.Index([3, 2, 2]).is_monotonic_decreasing
2271 True
2272 >>> pd.Index([3, 1, 2]).is_monotonic_decreasing
2273 False
2274 """
2275 return self._engine.is_monotonic_decreasing
2276
2277 @final
2278 @property
2279 def _is_strictly_monotonic_increasing(self) -> bool:
2280 """
2281 Return if the index is strictly monotonic increasing
2282 (only increasing) values.
2283
2284 Examples
2285 --------
2286 >>> Index([1, 2, 3])._is_strictly_monotonic_increasing
2287 True
2288 >>> Index([1, 2, 2])._is_strictly_monotonic_increasing
2289 False
2290 >>> Index([1, 3, 2])._is_strictly_monotonic_increasing
2291 False
2292 """
2293 return self.is_unique and self.is_monotonic_increasing
2294
2295 @final
2296 @property
2297 def _is_strictly_monotonic_decreasing(self) -> bool:
2298 """
2299 Return if the index is strictly monotonic decreasing
2300 (only decreasing) values.
2301
2302 Examples
2303 --------
2304 >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing
2305 True
2306 >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing
2307 False
2308 >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing
2309 False
2310 """
2311 return self.is_unique and self.is_monotonic_decreasing
2312
2313 @cache_readonly
2314 def is_unique(self) -> bool:
2315 """
2316 Return if the index has unique values.
2317
2318 Returns
2319 -------
2320 bool
2321
2322 See Also
2323 --------
2324 Index.has_duplicates : Inverse method that checks if it has duplicate values.
2325
2326 Examples
2327 --------
2328 >>> idx = pd.Index([1, 5, 7, 7])
2329 >>> idx.is_unique
2330 False
2331
2332 >>> idx = pd.Index([1, 5, 7])
2333 >>> idx.is_unique
2334 True
2335
2336 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
2337 ... "Watermelon"]).astype("category")
2338 >>> idx.is_unique
2339 False
2340
2341 >>> idx = pd.Index(["Orange", "Apple",
2342 ... "Watermelon"]).astype("category")
2343 >>> idx.is_unique
2344 True
2345 """
2346 return self._engine.is_unique
2347
2348 @final
2349 @property
2350 def has_duplicates(self) -> bool:
2351 """
2352 Check if the Index has duplicate values.
2353
2354 Returns
2355 -------
2356 bool
2357 Whether or not the Index has duplicate values.
2358
2359 See Also
2360 --------
2361 Index.is_unique : Inverse method that checks if it has unique values.
2362
2363 Examples
2364 --------
2365 >>> idx = pd.Index([1, 5, 7, 7])
2366 >>> idx.has_duplicates
2367 True
2368
2369 >>> idx = pd.Index([1, 5, 7])
2370 >>> idx.has_duplicates
2371 False
2372
2373 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
2374 ... "Watermelon"]).astype("category")
2375 >>> idx.has_duplicates
2376 True
2377
2378 >>> idx = pd.Index(["Orange", "Apple",
2379 ... "Watermelon"]).astype("category")
2380 >>> idx.has_duplicates
2381 False
2382 """
2383 return not self.is_unique
2384
2385 @final
2386 def is_boolean(self) -> bool:
2387 """
2388 Check if the Index only consists of booleans.
2389
2390 .. deprecated:: 2.0.0
2391 Use `pandas.api.types.is_bool_dtype` instead.
2392
2393 Returns
2394 -------
2395 bool
2396 Whether or not the Index only consists of booleans.
2397
2398 See Also
2399 --------
2400 is_integer : Check if the Index only consists of integers (deprecated).
2401 is_floating : Check if the Index is a floating type (deprecated).
2402 is_numeric : Check if the Index only consists of numeric data (deprecated).
2403 is_object : Check if the Index is of the object dtype (deprecated).
2404 is_categorical : Check if the Index holds categorical data.
2405 is_interval : Check if the Index holds Interval objects (deprecated).
2406
2407 Examples
2408 --------
2409 >>> idx = pd.Index([True, False, True])
2410 >>> idx.is_boolean() # doctest: +SKIP
2411 True
2412
2413 >>> idx = pd.Index(["True", "False", "True"])
2414 >>> idx.is_boolean() # doctest: +SKIP
2415 False
2416
2417 >>> idx = pd.Index([True, False, "True"])
2418 >>> idx.is_boolean() # doctest: +SKIP
2419 False
2420 """
2421 warnings.warn(
2422 f"{type(self).__name__}.is_boolean is deprecated. "
2423 "Use pandas.api.types.is_bool_type instead.",
2424 FutureWarning,
2425 stacklevel=find_stack_level(),
2426 )
2427 return self.inferred_type in ["boolean"]
2428
2429 @final
2430 def is_integer(self) -> bool:
2431 """
2432 Check if the Index only consists of integers.
2433
2434 .. deprecated:: 2.0.0
2435 Use `pandas.api.types.is_integer_dtype` instead.
2436
2437 Returns
2438 -------
2439 bool
2440 Whether or not the Index only consists of integers.
2441
2442 See Also
2443 --------
2444 is_boolean : Check if the Index only consists of booleans (deprecated).
2445 is_floating : Check if the Index is a floating type (deprecated).
2446 is_numeric : Check if the Index only consists of numeric data (deprecated).
2447 is_object : Check if the Index is of the object dtype. (deprecated).
2448 is_categorical : Check if the Index holds categorical data (deprecated).
2449 is_interval : Check if the Index holds Interval objects (deprecated).
2450
2451 Examples
2452 --------
2453 >>> idx = pd.Index([1, 2, 3, 4])
2454 >>> idx.is_integer() # doctest: +SKIP
2455 True
2456
2457 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
2458 >>> idx.is_integer() # doctest: +SKIP
2459 False
2460
2461 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"])
2462 >>> idx.is_integer() # doctest: +SKIP
2463 False
2464 """
2465 warnings.warn(
2466 f"{type(self).__name__}.is_integer is deprecated. "
2467 "Use pandas.api.types.is_integer_dtype instead.",
2468 FutureWarning,
2469 stacklevel=find_stack_level(),
2470 )
2471 return self.inferred_type in ["integer"]
2472
2473 @final
2474 def is_floating(self) -> bool:
2475 """
2476 Check if the Index is a floating type.
2477
2478 .. deprecated:: 2.0.0
2479 Use `pandas.api.types.is_float_dtype` instead
2480
2481 The Index may consist of only floats, NaNs, or a mix of floats,
2482 integers, or NaNs.
2483
2484 Returns
2485 -------
2486 bool
2487 Whether or not the Index only consists of only consists of floats, NaNs, or
2488 a mix of floats, integers, or NaNs.
2489
2490 See Also
2491 --------
2492 is_boolean : Check if the Index only consists of booleans (deprecated).
2493 is_integer : Check if the Index only consists of integers (deprecated).
2494 is_numeric : Check if the Index only consists of numeric data (deprecated).
2495 is_object : Check if the Index is of the object dtype. (deprecated).
2496 is_categorical : Check if the Index holds categorical data (deprecated).
2497 is_interval : Check if the Index holds Interval objects (deprecated).
2498
2499 Examples
2500 --------
2501 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
2502 >>> idx.is_floating() # doctest: +SKIP
2503 True
2504
2505 >>> idx = pd.Index([1.0, 2.0, np.nan, 4.0])
2506 >>> idx.is_floating() # doctest: +SKIP
2507 True
2508
2509 >>> idx = pd.Index([1, 2, 3, 4, np.nan])
2510 >>> idx.is_floating() # doctest: +SKIP
2511 True
2512
2513 >>> idx = pd.Index([1, 2, 3, 4])
2514 >>> idx.is_floating() # doctest: +SKIP
2515 False
2516 """
2517 warnings.warn(
2518 f"{type(self).__name__}.is_floating is deprecated. "
2519 "Use pandas.api.types.is_float_dtype instead.",
2520 FutureWarning,
2521 stacklevel=find_stack_level(),
2522 )
2523 return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"]
2524
2525 @final
2526 def is_numeric(self) -> bool:
2527 """
2528 Check if the Index only consists of numeric data.
2529
2530 .. deprecated:: 2.0.0
2531 Use `pandas.api.types.is_numeric_dtype` instead.
2532
2533 Returns
2534 -------
2535 bool
2536 Whether or not the Index only consists of numeric data.
2537
2538 See Also
2539 --------
2540 is_boolean : Check if the Index only consists of booleans (deprecated).
2541 is_integer : Check if the Index only consists of integers (deprecated).
2542 is_floating : Check if the Index is a floating type (deprecated).
2543 is_object : Check if the Index is of the object dtype. (deprecated).
2544 is_categorical : Check if the Index holds categorical data (deprecated).
2545 is_interval : Check if the Index holds Interval objects (deprecated).
2546
2547 Examples
2548 --------
2549 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
2550 >>> idx.is_numeric() # doctest: +SKIP
2551 True
2552
2553 >>> idx = pd.Index([1, 2, 3, 4.0])
2554 >>> idx.is_numeric() # doctest: +SKIP
2555 True
2556
2557 >>> idx = pd.Index([1, 2, 3, 4])
2558 >>> idx.is_numeric() # doctest: +SKIP
2559 True
2560
2561 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan])
2562 >>> idx.is_numeric() # doctest: +SKIP
2563 True
2564
2565 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan, "Apple"])
2566 >>> idx.is_numeric() # doctest: +SKIP
2567 False
2568 """
2569 warnings.warn(
2570 f"{type(self).__name__}.is_numeric is deprecated. "
2571 "Use pandas.api.types.is_any_real_numeric_dtype instead",
2572 FutureWarning,
2573 stacklevel=find_stack_level(),
2574 )
2575 return self.inferred_type in ["integer", "floating"]
2576
2577 @final
2578 def is_object(self) -> bool:
2579 """
2580 Check if the Index is of the object dtype.
2581
2582 .. deprecated:: 2.0.0
2583 Use `pandas.api.types.is_object_dtype` instead.
2584
2585 Returns
2586 -------
2587 bool
2588 Whether or not the Index is of the object dtype.
2589
2590 See Also
2591 --------
2592 is_boolean : Check if the Index only consists of booleans (deprecated).
2593 is_integer : Check if the Index only consists of integers (deprecated).
2594 is_floating : Check if the Index is a floating type (deprecated).
2595 is_numeric : Check if the Index only consists of numeric data (deprecated).
2596 is_categorical : Check if the Index holds categorical data (deprecated).
2597 is_interval : Check if the Index holds Interval objects (deprecated).
2598
2599 Examples
2600 --------
2601 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"])
2602 >>> idx.is_object() # doctest: +SKIP
2603 True
2604
2605 >>> idx = pd.Index(["Apple", "Mango", 2.0])
2606 >>> idx.is_object() # doctest: +SKIP
2607 True
2608
2609 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
2610 ... "Watermelon"]).astype("category")
2611 >>> idx.is_object() # doctest: +SKIP
2612 False
2613
2614 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
2615 >>> idx.is_object() # doctest: +SKIP
2616 False
2617 """
2618 warnings.warn(
2619 f"{type(self).__name__}.is_object is deprecated."
2620 "Use pandas.api.types.is_object_dtype instead",
2621 FutureWarning,
2622 stacklevel=find_stack_level(),
2623 )
2624 return is_object_dtype(self.dtype)
2625
2626 @final
2627 def is_categorical(self) -> bool:
2628 """
2629 Check if the Index holds categorical data.
2630
2631 .. deprecated:: 2.0.0
2632 Use `isinstance(index.dtype, pd.CategoricalDtype)` instead.
2633
2634 Returns
2635 -------
2636 bool
2637 True if the Index is categorical.
2638
2639 See Also
2640 --------
2641 CategoricalIndex : Index for categorical data.
2642 is_boolean : Check if the Index only consists of booleans (deprecated).
2643 is_integer : Check if the Index only consists of integers (deprecated).
2644 is_floating : Check if the Index is a floating type (deprecated).
2645 is_numeric : Check if the Index only consists of numeric data (deprecated).
2646 is_object : Check if the Index is of the object dtype. (deprecated).
2647 is_interval : Check if the Index holds Interval objects (deprecated).
2648
2649 Examples
2650 --------
2651 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
2652 ... "Watermelon"]).astype("category")
2653 >>> idx.is_categorical() # doctest: +SKIP
2654 True
2655
2656 >>> idx = pd.Index([1, 3, 5, 7])
2657 >>> idx.is_categorical() # doctest: +SKIP
2658 False
2659
2660 >>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"])
2661 >>> s
2662 0 Peter
2663 1 Victor
2664 2 Elisabeth
2665 3 Mar
2666 dtype: object
2667 >>> s.index.is_categorical() # doctest: +SKIP
2668 False
2669 """
2670 warnings.warn(
2671 f"{type(self).__name__}.is_categorical is deprecated."
2672 "Use pandas.api.types.is_categorical_dtype instead",
2673 FutureWarning,
2674 stacklevel=find_stack_level(),
2675 )
2676
2677 return self.inferred_type in ["categorical"]
2678
2679 @final
2680 def is_interval(self) -> bool:
2681 """
2682 Check if the Index holds Interval objects.
2683
2684 .. deprecated:: 2.0.0
2685 Use `isinstance(index.dtype, pd.IntervalDtype)` instead.
2686
2687 Returns
2688 -------
2689 bool
2690 Whether or not the Index holds Interval objects.
2691
2692 See Also
2693 --------
2694 IntervalIndex : Index for Interval objects.
2695 is_boolean : Check if the Index only consists of booleans (deprecated).
2696 is_integer : Check if the Index only consists of integers (deprecated).
2697 is_floating : Check if the Index is a floating type (deprecated).
2698 is_numeric : Check if the Index only consists of numeric data (deprecated).
2699 is_object : Check if the Index is of the object dtype. (deprecated).
2700 is_categorical : Check if the Index holds categorical data (deprecated).
2701
2702 Examples
2703 --------
2704 >>> idx = pd.Index([pd.Interval(left=0, right=5),
2705 ... pd.Interval(left=5, right=10)])
2706 >>> idx.is_interval() # doctest: +SKIP
2707 True
2708
2709 >>> idx = pd.Index([1, 3, 5, 7])
2710 >>> idx.is_interval() # doctest: +SKIP
2711 False
2712 """
2713 warnings.warn(
2714 f"{type(self).__name__}.is_interval is deprecated."
2715 "Use pandas.api.types.is_interval_dtype instead",
2716 FutureWarning,
2717 stacklevel=find_stack_level(),
2718 )
2719 return self.inferred_type in ["interval"]
2720
2721 @final
2722 def _holds_integer(self) -> bool:
2723 """
2724 Whether the type is an integer type.
2725 """
2726 return self.inferred_type in ["integer", "mixed-integer"]
2727
2728 @final
2729 def holds_integer(self) -> bool:
2730 """
2731 Whether the type is an integer type.
2732
2733 .. deprecated:: 2.0.0
2734 Use `pandas.api.types.infer_dtype` instead
2735 """
2736 warnings.warn(
2737 f"{type(self).__name__}.holds_integer is deprecated. "
2738 "Use pandas.api.types.infer_dtype instead.",
2739 FutureWarning,
2740 stacklevel=find_stack_level(),
2741 )
2742 return self._holds_integer()
2743
2744 @cache_readonly
2745 def inferred_type(self) -> str_t:
2746 """
2747 Return a string of the type inferred from the values.
2748
2749 Examples
2750 --------
2751 >>> idx = pd.Index([1, 2, 3])
2752 >>> idx
2753 Index([1, 2, 3], dtype='int64')
2754 >>> idx.inferred_type
2755 'integer'
2756 """
2757 return lib.infer_dtype(self._values, skipna=False)
2758
2759 @cache_readonly
2760 @final
2761 def _is_all_dates(self) -> bool:
2762 """
2763 Whether or not the index values only consist of dates.
2764 """
2765 if needs_i8_conversion(self.dtype):
2766 return True
2767 elif self.dtype != _dtype_obj:
2768 # TODO(ExtensionIndex): 3rd party EA might override?
2769 # Note: this includes IntervalIndex, even when the left/right
2770 # contain datetime-like objects.
2771 return False
2772 elif self._is_multi:
2773 return False
2774 return is_datetime_array(ensure_object(self._values))
2775
2776 @final
2777 @cache_readonly
2778 def _is_multi(self) -> bool:
2779 """
2780 Cached check equivalent to isinstance(self, MultiIndex)
2781 """
2782 return isinstance(self, ABCMultiIndex)
2783
2784 # --------------------------------------------------------------------
2785 # Pickle Methods
2786
2787 def __reduce__(self):
2788 d = {"data": self._data, "name": self.name}
2789 return _new_Index, (type(self), d), None
2790
2791 # --------------------------------------------------------------------
2792 # Null Handling Methods
2793
2794 @cache_readonly
2795 def _na_value(self):
2796 """The expected NA value to use with this index."""
2797 dtype = self.dtype
2798 if isinstance(dtype, np.dtype):
2799 if dtype.kind in "mM":
2800 return NaT
2801 return np.nan
2802 return dtype.na_value
2803
2804 @cache_readonly
2805 def _isnan(self) -> npt.NDArray[np.bool_]:
2806 """
2807 Return if each value is NaN.
2808 """
2809 if self._can_hold_na:
2810 return isna(self)
2811 else:
2812 # shouldn't reach to this condition by checking hasnans beforehand
2813 values = np.empty(len(self), dtype=np.bool_)
2814 values.fill(False)
2815 return values
2816
2817 @cache_readonly
2818 def hasnans(self) -> bool:
2819 """
2820 Return True if there are any NaNs.
2821
2822 Enables various performance speedups.
2823
2824 Returns
2825 -------
2826 bool
2827
2828 Examples
2829 --------
2830 >>> s = pd.Series([1, 2, 3], index=['a', 'b', None])
2831 >>> s
2832 a 1
2833 b 2
2834 None 3
2835 dtype: int64
2836 >>> s.index.hasnans
2837 True
2838 """
2839 if self._can_hold_na:
2840 return bool(self._isnan.any())
2841 else:
2842 return False
2843
2844 @final
2845 def isna(self) -> npt.NDArray[np.bool_]:
2846 """
2847 Detect missing values.
2848
2849 Return a boolean same-sized object indicating if the values are NA.
2850 NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get
2851 mapped to ``True`` values.
2852 Everything else get mapped to ``False`` values. Characters such as
2853 empty strings `''` or :attr:`numpy.inf` are not considered NA values.
2854
2855 Returns
2856 -------
2857 numpy.ndarray[bool]
2858 A boolean array of whether my values are NA.
2859
2860 See Also
2861 --------
2862 Index.notna : Boolean inverse of isna.
2863 Index.dropna : Omit entries with missing values.
2864 isna : Top-level isna.
2865 Series.isna : Detect missing values in Series object.
2866
2867 Examples
2868 --------
2869 Show which entries in a pandas.Index are NA. The result is an
2870 array.
2871
2872 >>> idx = pd.Index([5.2, 6.0, np.nan])
2873 >>> idx
2874 Index([5.2, 6.0, nan], dtype='float64')
2875 >>> idx.isna()
2876 array([False, False, True])
2877
2878 Empty strings are not considered NA values. None is considered an NA
2879 value.
2880
2881 >>> idx = pd.Index(['black', '', 'red', None])
2882 >>> idx
2883 Index(['black', '', 'red', None], dtype='object')
2884 >>> idx.isna()
2885 array([False, False, False, True])
2886
2887 For datetimes, `NaT` (Not a Time) is considered as an NA value.
2888
2889 >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'),
2890 ... pd.Timestamp(''), None, pd.NaT])
2891 >>> idx
2892 DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'],
2893 dtype='datetime64[ns]', freq=None)
2894 >>> idx.isna()
2895 array([False, True, True, True])
2896 """
2897 return self._isnan
2898
2899 isnull = isna
2900
2901 @final
2902 def notna(self) -> npt.NDArray[np.bool_]:
2903 """
2904 Detect existing (non-missing) values.
2905
2906 Return a boolean same-sized object indicating if the values are not NA.
2907 Non-missing values get mapped to ``True``. Characters such as empty
2908 strings ``''`` or :attr:`numpy.inf` are not considered NA values.
2909 NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False``
2910 values.
2911
2912 Returns
2913 -------
2914 numpy.ndarray[bool]
2915 Boolean array to indicate which entries are not NA.
2916
2917 See Also
2918 --------
2919 Index.notnull : Alias of notna.
2920 Index.isna: Inverse of notna.
2921 notna : Top-level notna.
2922
2923 Examples
2924 --------
2925 Show which entries in an Index are not NA. The result is an
2926 array.
2927
2928 >>> idx = pd.Index([5.2, 6.0, np.nan])
2929 >>> idx
2930 Index([5.2, 6.0, nan], dtype='float64')
2931 >>> idx.notna()
2932 array([ True, True, False])
2933
2934 Empty strings are not considered NA values. None is considered a NA
2935 value.
2936
2937 >>> idx = pd.Index(['black', '', 'red', None])
2938 >>> idx
2939 Index(['black', '', 'red', None], dtype='object')
2940 >>> idx.notna()
2941 array([ True, True, True, False])
2942 """
2943 return ~self.isna()
2944
2945 notnull = notna
2946
2947 def fillna(self, value=None, downcast=lib.no_default):
2948 """
2949 Fill NA/NaN values with the specified value.
2950
2951 Parameters
2952 ----------
2953 value : scalar
2954 Scalar value to use to fill holes (e.g. 0).
2955 This value cannot be a list-likes.
2956 downcast : dict, default is None
2957 A dict of item->dtype of what to downcast if possible,
2958 or the string 'infer' which will try to downcast to an appropriate
2959 equal type (e.g. float64 to int64 if possible).
2960
2961 .. deprecated:: 2.1.0
2962
2963 Returns
2964 -------
2965 Index
2966
2967 See Also
2968 --------
2969 DataFrame.fillna : Fill NaN values of a DataFrame.
2970 Series.fillna : Fill NaN Values of a Series.
2971
2972 Examples
2973 --------
2974 >>> idx = pd.Index([np.nan, np.nan, 3])
2975 >>> idx.fillna(0)
2976 Index([0.0, 0.0, 3.0], dtype='float64')
2977 """
2978 if not is_scalar(value):
2979 raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}")
2980 if downcast is not lib.no_default:
2981 warnings.warn(
2982 f"The 'downcast' keyword in {type(self).__name__}.fillna is "
2983 "deprecated and will be removed in a future version. "
2984 "It was previously silently ignored.",
2985 FutureWarning,
2986 stacklevel=find_stack_level(),
2987 )
2988 else:
2989 downcast = None
2990
2991 if self.hasnans:
2992 result = self.putmask(self._isnan, value)
2993 if downcast is None:
2994 # no need to care metadata other than name
2995 # because it can't have freq if it has NaTs
2996 # _with_infer needed for test_fillna_categorical
2997 return Index._with_infer(result, name=self.name)
2998 raise NotImplementedError(
2999 f"{type(self).__name__}.fillna does not support 'downcast' "
3000 "argument values other than 'None'."
3001 )
3002 return self._view()
3003
3004 def dropna(self, how: AnyAll = "any") -> Self:
3005 """
3006 Return Index without NA/NaN values.
3007
3008 Parameters
3009 ----------
3010 how : {'any', 'all'}, default 'any'
3011 If the Index is a MultiIndex, drop the value when any or all levels
3012 are NaN.
3013
3014 Returns
3015 -------
3016 Index
3017
3018 Examples
3019 --------
3020 >>> idx = pd.Index([1, np.nan, 3])
3021 >>> idx.dropna()
3022 Index([1.0, 3.0], dtype='float64')
3023 """
3024 if how not in ("any", "all"):
3025 raise ValueError(f"invalid how option: {how}")
3026
3027 if self.hasnans:
3028 res_values = self._values[~self._isnan]
3029 return type(self)._simple_new(res_values, name=self.name)
3030 return self._view()
3031
3032 # --------------------------------------------------------------------
3033 # Uniqueness Methods
3034
3035 def unique(self, level: Hashable | None = None) -> Self:
3036 """
3037 Return unique values in the index.
3038
3039 Unique values are returned in order of appearance, this does NOT sort.
3040
3041 Parameters
3042 ----------
3043 level : int or hashable, optional
3044 Only return values from specified level (for MultiIndex).
3045 If int, gets the level by integer position, else by level name.
3046
3047 Returns
3048 -------
3049 Index
3050
3051 See Also
3052 --------
3053 unique : Numpy array of unique values in that column.
3054 Series.unique : Return unique values of Series object.
3055
3056 Examples
3057 --------
3058 >>> idx = pd.Index([1, 1, 2, 3, 3])
3059 >>> idx.unique()
3060 Index([1, 2, 3], dtype='int64')
3061 """
3062 if level is not None:
3063 self._validate_index_level(level)
3064
3065 if self.is_unique:
3066 return self._view()
3067
3068 result = super().unique()
3069 return self._shallow_copy(result)
3070
3071 def drop_duplicates(self, *, keep: DropKeep = "first") -> Self:
3072 """
3073 Return Index with duplicate values removed.
3074
3075 Parameters
3076 ----------
3077 keep : {'first', 'last', ``False``}, default 'first'
3078 - 'first' : Drop duplicates except for the first occurrence.
3079 - 'last' : Drop duplicates except for the last occurrence.
3080 - ``False`` : Drop all duplicates.
3081
3082 Returns
3083 -------
3084 Index
3085
3086 See Also
3087 --------
3088 Series.drop_duplicates : Equivalent method on Series.
3089 DataFrame.drop_duplicates : Equivalent method on DataFrame.
3090 Index.duplicated : Related method on Index, indicating duplicate
3091 Index values.
3092
3093 Examples
3094 --------
3095 Generate an pandas.Index with duplicate values.
3096
3097 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])
3098
3099 The `keep` parameter controls which duplicate values are removed.
3100 The value 'first' keeps the first occurrence for each
3101 set of duplicated entries. The default value of keep is 'first'.
3102
3103 >>> idx.drop_duplicates(keep='first')
3104 Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object')
3105
3106 The value 'last' keeps the last occurrence for each set of duplicated
3107 entries.
3108
3109 >>> idx.drop_duplicates(keep='last')
3110 Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object')
3111
3112 The value ``False`` discards all sets of duplicated entries.
3113
3114 >>> idx.drop_duplicates(keep=False)
3115 Index(['cow', 'beetle', 'hippo'], dtype='object')
3116 """
3117 if self.is_unique:
3118 return self._view()
3119
3120 return super().drop_duplicates(keep=keep)
3121
3122 def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:
3123 """
3124 Indicate duplicate index values.
3125
3126 Duplicated values are indicated as ``True`` values in the resulting
3127 array. Either all duplicates, all except the first, or all except the
3128 last occurrence of duplicates can be indicated.
3129
3130 Parameters
3131 ----------
3132 keep : {'first', 'last', False}, default 'first'
3133 The value or values in a set of duplicates to mark as missing.
3134
3135 - 'first' : Mark duplicates as ``True`` except for the first
3136 occurrence.
3137 - 'last' : Mark duplicates as ``True`` except for the last
3138 occurrence.
3139 - ``False`` : Mark all duplicates as ``True``.
3140
3141 Returns
3142 -------
3143 np.ndarray[bool]
3144
3145 See Also
3146 --------
3147 Series.duplicated : Equivalent method on pandas.Series.
3148 DataFrame.duplicated : Equivalent method on pandas.DataFrame.
3149 Index.drop_duplicates : Remove duplicate values from Index.
3150
3151 Examples
3152 --------
3153 By default, for each set of duplicated values, the first occurrence is
3154 set to False and all others to True:
3155
3156 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama'])
3157 >>> idx.duplicated()
3158 array([False, False, True, False, True])
3159
3160 which is equivalent to
3161
3162 >>> idx.duplicated(keep='first')
3163 array([False, False, True, False, True])
3164
3165 By using 'last', the last occurrence of each set of duplicated values
3166 is set on False and all others on True:
3167
3168 >>> idx.duplicated(keep='last')
3169 array([ True, False, True, False, False])
3170
3171 By setting keep on ``False``, all duplicates are True:
3172
3173 >>> idx.duplicated(keep=False)
3174 array([ True, False, True, False, True])
3175 """
3176 if self.is_unique:
3177 # fastpath available bc we are immutable
3178 return np.zeros(len(self), dtype=bool)
3179 return self._duplicated(keep=keep)
3180
3181 # --------------------------------------------------------------------
3182 # Arithmetic & Logical Methods
3183
3184 def __iadd__(self, other):
3185 # alias for __add__
3186 return self + other
3187
3188 @final
3189 def __nonzero__(self) -> NoReturn:
3190 raise ValueError(
3191 f"The truth value of a {type(self).__name__} is ambiguous. "
3192 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
3193 )
3194
3195 __bool__ = __nonzero__
3196
3197 # --------------------------------------------------------------------
3198 # Set Operation Methods
3199
3200 def _get_reconciled_name_object(self, other):
3201 """
3202 If the result of a set operation will be self,
3203 return self, unless the name changes, in which
3204 case make a shallow copy of self.
3205 """
3206 name = get_op_result_name(self, other)
3207 if self.name is not name:
3208 return self.rename(name)
3209 return self
3210
3211 @final
3212 def _validate_sort_keyword(self, sort):
3213 if sort not in [None, False, True]:
3214 raise ValueError(
3215 "The 'sort' keyword only takes the values of "
3216 f"None, True, or False; {sort} was passed."
3217 )
3218
3219 @final
3220 def _dti_setop_align_tzs(self, other: Index, setop: str_t) -> tuple[Index, Index]:
3221 """
3222 With mismatched timezones, cast both to UTC.
3223 """
3224 # Caller is responsibelf or checking
3225 # `self.dtype != other.dtype`
3226 if (
3227 isinstance(self, ABCDatetimeIndex)
3228 and isinstance(other, ABCDatetimeIndex)
3229 and self.tz is not None
3230 and other.tz is not None
3231 ):
3232 # GH#39328, GH#45357
3233 left = self.tz_convert("UTC")
3234 right = other.tz_convert("UTC")
3235 return left, right
3236 return self, other
3237
3238 @final
3239 def union(self, other, sort=None):
3240 """
3241 Form the union of two Index objects.
3242
3243 If the Index objects are incompatible, both Index objects will be
3244 cast to dtype('object') first.
3245
3246 Parameters
3247 ----------
3248 other : Index or array-like
3249 sort : bool or None, default None
3250 Whether to sort the resulting Index.
3251
3252 * None : Sort the result, except when
3253
3254 1. `self` and `other` are equal.
3255 2. `self` or `other` has length 0.
3256 3. Some values in `self` or `other` cannot be compared.
3257 A RuntimeWarning is issued in this case.
3258
3259 * False : do not sort the result.
3260 * True : Sort the result (which may raise TypeError).
3261
3262 Returns
3263 -------
3264 Index
3265
3266 Examples
3267 --------
3268 Union matching dtypes
3269
3270 >>> idx1 = pd.Index([1, 2, 3, 4])
3271 >>> idx2 = pd.Index([3, 4, 5, 6])
3272 >>> idx1.union(idx2)
3273 Index([1, 2, 3, 4, 5, 6], dtype='int64')
3274
3275 Union mismatched dtypes
3276
3277 >>> idx1 = pd.Index(['a', 'b', 'c', 'd'])
3278 >>> idx2 = pd.Index([1, 2, 3, 4])
3279 >>> idx1.union(idx2)
3280 Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object')
3281
3282 MultiIndex case
3283
3284 >>> idx1 = pd.MultiIndex.from_arrays(
3285 ... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]
3286 ... )
3287 >>> idx1
3288 MultiIndex([(1, 'Red'),
3289 (1, 'Blue'),
3290 (2, 'Red'),
3291 (2, 'Blue')],
3292 )
3293 >>> idx2 = pd.MultiIndex.from_arrays(
3294 ... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]]
3295 ... )
3296 >>> idx2
3297 MultiIndex([(3, 'Red'),
3298 (3, 'Green'),
3299 (2, 'Red'),
3300 (2, 'Green')],
3301 )
3302 >>> idx1.union(idx2)
3303 MultiIndex([(1, 'Blue'),
3304 (1, 'Red'),
3305 (2, 'Blue'),
3306 (2, 'Green'),
3307 (2, 'Red'),
3308 (3, 'Green'),
3309 (3, 'Red')],
3310 )
3311 >>> idx1.union(idx2, sort=False)
3312 MultiIndex([(1, 'Red'),
3313 (1, 'Blue'),
3314 (2, 'Red'),
3315 (2, 'Blue'),
3316 (3, 'Red'),
3317 (3, 'Green'),
3318 (2, 'Green')],
3319 )
3320 """
3321 self._validate_sort_keyword(sort)
3322 self._assert_can_do_setop(other)
3323 other, result_name = self._convert_can_do_setop(other)
3324
3325 if self.dtype != other.dtype:
3326 if (
3327 isinstance(self, ABCMultiIndex)
3328 and not is_object_dtype(_unpack_nested_dtype(other))
3329 and len(other) > 0
3330 ):
3331 raise NotImplementedError(
3332 "Can only union MultiIndex with MultiIndex or Index of tuples, "
3333 "try mi.to_flat_index().union(other) instead."
3334 )
3335 self, other = self._dti_setop_align_tzs(other, "union")
3336
3337 dtype = self._find_common_type_compat(other)
3338 left = self.astype(dtype, copy=False)
3339 right = other.astype(dtype, copy=False)
3340 return left.union(right, sort=sort)
3341
3342 elif not len(other) or self.equals(other):
3343 # NB: whether this (and the `if not len(self)` check below) come before
3344 # or after the dtype equality check above affects the returned dtype
3345 result = self._get_reconciled_name_object(other)
3346 if sort is True:
3347 return result.sort_values()
3348 return result
3349
3350 elif not len(self):
3351 result = other._get_reconciled_name_object(self)
3352 if sort is True:
3353 return result.sort_values()
3354 return result
3355
3356 result = self._union(other, sort=sort)
3357
3358 return self._wrap_setop_result(other, result)
3359
3360 def _union(self, other: Index, sort: bool | None):
3361 """
3362 Specific union logic should go here. In subclasses, union behavior
3363 should be overwritten here rather than in `self.union`.
3364
3365 Parameters
3366 ----------
3367 other : Index or array-like
3368 sort : False or None, default False
3369 Whether to sort the resulting index.
3370
3371 * True : sort the result
3372 * False : do not sort the result.
3373 * None : sort the result, except when `self` and `other` are equal
3374 or when the values cannot be compared.
3375
3376 Returns
3377 -------
3378 Index
3379 """
3380 lvals = self._values
3381 rvals = other._values
3382
3383 if (
3384 sort in (None, True)
3385 and self.is_monotonic_increasing
3386 and other.is_monotonic_increasing
3387 and not (self.has_duplicates and other.has_duplicates)
3388 and self._can_use_libjoin
3389 and other._can_use_libjoin
3390 ):
3391 # Both are monotonic and at least one is unique, so can use outer join
3392 # (actually don't need either unique, but without this restriction
3393 # test_union_same_value_duplicated_in_both fails)
3394 try:
3395 return self._outer_indexer(other)[0]
3396 except (TypeError, IncompatibleFrequency):
3397 # incomparable objects; should only be for object dtype
3398 value_list = list(lvals)
3399
3400 # worth making this faster? a very unusual case
3401 value_set = set(lvals)
3402 value_list.extend([x for x in rvals if x not in value_set])
3403 # If objects are unorderable, we must have object dtype.
3404 return np.array(value_list, dtype=object)
3405
3406 elif not other.is_unique:
3407 # other has duplicates
3408 result_dups = algos.union_with_duplicates(self, other)
3409 return _maybe_try_sort(result_dups, sort)
3410
3411 # The rest of this method is analogous to Index._intersection_via_get_indexer
3412
3413 # Self may have duplicates; other already checked as unique
3414 # find indexes of things in "other" that are not in "self"
3415 if self._index_as_unique:
3416 indexer = self.get_indexer(other)
3417 missing = (indexer == -1).nonzero()[0]
3418 else:
3419 missing = algos.unique1d(self.get_indexer_non_unique(other)[1])
3420
3421 result: Index | MultiIndex | ArrayLike
3422 if self._is_multi:
3423 # Preserve MultiIndex to avoid losing dtypes
3424 result = self.append(other.take(missing))
3425
3426 else:
3427 if len(missing) > 0:
3428 other_diff = rvals.take(missing)
3429 result = concat_compat((lvals, other_diff))
3430 else:
3431 result = lvals
3432
3433 if not self.is_monotonic_increasing or not other.is_monotonic_increasing:
3434 # if both are monotonic then result should already be sorted
3435 result = _maybe_try_sort(result, sort)
3436
3437 return result
3438
3439 @final
3440 def _wrap_setop_result(self, other: Index, result) -> Index:
3441 name = get_op_result_name(self, other)
3442 if isinstance(result, Index):
3443 if result.name != name:
3444 result = result.rename(name)
3445 else:
3446 result = self._shallow_copy(result, name=name)
3447 return result
3448
3449 @final
3450 def intersection(self, other, sort: bool = False):
3451 # default sort keyword is different here from other setops intentionally
3452 # done in GH#25063
3453 """
3454 Form the intersection of two Index objects.
3455
3456 This returns a new Index with elements common to the index and `other`.
3457
3458 Parameters
3459 ----------
3460 other : Index or array-like
3461 sort : True, False or None, default False
3462 Whether to sort the resulting index.
3463
3464 * None : sort the result, except when `self` and `other` are equal
3465 or when the values cannot be compared.
3466 * False : do not sort the result.
3467 * True : Sort the result (which may raise TypeError).
3468
3469 Returns
3470 -------
3471 Index
3472
3473 Examples
3474 --------
3475 >>> idx1 = pd.Index([1, 2, 3, 4])
3476 >>> idx2 = pd.Index([3, 4, 5, 6])
3477 >>> idx1.intersection(idx2)
3478 Index([3, 4], dtype='int64')
3479 """
3480 self._validate_sort_keyword(sort)
3481 self._assert_can_do_setop(other)
3482 other, result_name = self._convert_can_do_setop(other)
3483
3484 if self.dtype != other.dtype:
3485 self, other = self._dti_setop_align_tzs(other, "intersection")
3486
3487 if self.equals(other):
3488 if not self.is_unique:
3489 result = self.unique()._get_reconciled_name_object(other)
3490 else:
3491 result = self._get_reconciled_name_object(other)
3492 if sort is True:
3493 result = result.sort_values()
3494 return result
3495
3496 if len(self) == 0 or len(other) == 0:
3497 # fastpath; we need to be careful about having commutativity
3498
3499 if self._is_multi or other._is_multi:
3500 # _convert_can_do_setop ensures that we have both or neither
3501 # We retain self.levels
3502 return self[:0].rename(result_name)
3503
3504 dtype = self._find_common_type_compat(other)
3505 if self.dtype == dtype:
3506 # Slicing allows us to retain DTI/TDI.freq, RangeIndex
3507
3508 # Note: self[:0] vs other[:0] affects
3509 # 1) which index's `freq` we get in DTI/TDI cases
3510 # This may be a historical artifact, i.e. no documented
3511 # reason for this choice.
3512 # 2) The `step` we get in RangeIndex cases
3513 if len(self) == 0:
3514 return self[:0].rename(result_name)
3515 else:
3516 return other[:0].rename(result_name)
3517
3518 return Index([], dtype=dtype, name=result_name)
3519
3520 elif not self._should_compare(other):
3521 # We can infer that the intersection is empty.
3522 if isinstance(self, ABCMultiIndex):
3523 return self[:0].rename(result_name)
3524 return Index([], name=result_name)
3525
3526 elif self.dtype != other.dtype:
3527 dtype = self._find_common_type_compat(other)
3528 this = self.astype(dtype, copy=False)
3529 other = other.astype(dtype, copy=False)
3530 return this.intersection(other, sort=sort)
3531
3532 result = self._intersection(other, sort=sort)
3533 return self._wrap_intersection_result(other, result)
3534
3535 def _intersection(self, other: Index, sort: bool = False):
3536 """
3537 intersection specialized to the case with matching dtypes.
3538 """
3539 if (
3540 self.is_monotonic_increasing
3541 and other.is_monotonic_increasing
3542 and self._can_use_libjoin
3543 and other._can_use_libjoin
3544 ):
3545 try:
3546 res_indexer, indexer, _ = self._inner_indexer(other)
3547 except TypeError:
3548 # non-comparable; should only be for object dtype
3549 pass
3550 else:
3551 # TODO: algos.unique1d should preserve DTA/TDA
3552 if is_numeric_dtype(self.dtype):
3553 # This is faster, because Index.unique() checks for uniqueness
3554 # before calculating the unique values.
3555 res = algos.unique1d(res_indexer)
3556 else:
3557 result = self.take(indexer)
3558 res = result.drop_duplicates()
3559 return ensure_wrapped_if_datetimelike(res)
3560
3561 res_values = self._intersection_via_get_indexer(other, sort=sort)
3562 res_values = _maybe_try_sort(res_values, sort)
3563 return res_values
3564
3565 def _wrap_intersection_result(self, other, result):
3566 # We will override for MultiIndex to handle empty results
3567 return self._wrap_setop_result(other, result)
3568
3569 @final
3570 def _intersection_via_get_indexer(
3571 self, other: Index | MultiIndex, sort
3572 ) -> ArrayLike | MultiIndex:
3573 """
3574 Find the intersection of two Indexes using get_indexer.
3575
3576 Returns
3577 -------
3578 np.ndarray or ExtensionArray or MultiIndex
3579 The returned array will be unique.
3580 """
3581 left_unique = self.unique()
3582 right_unique = other.unique()
3583
3584 # even though we are unique, we need get_indexer_for for IntervalIndex
3585 indexer = left_unique.get_indexer_for(right_unique)
3586
3587 mask = indexer != -1
3588
3589 taker = indexer.take(mask.nonzero()[0])
3590 if sort is False:
3591 # sort bc we want the elements in the same order they are in self
3592 # unnecessary in the case with sort=None bc we will sort later
3593 taker = np.sort(taker)
3594
3595 result: MultiIndex | ExtensionArray | np.ndarray
3596 if isinstance(left_unique, ABCMultiIndex):
3597 result = left_unique.take(taker)
3598 else:
3599 result = left_unique.take(taker)._values
3600 return result
3601
3602 @final
3603 def difference(self, other, sort=None):
3604 """
3605 Return a new Index with elements of index not in `other`.
3606
3607 This is the set difference of two Index objects.
3608
3609 Parameters
3610 ----------
3611 other : Index or array-like
3612 sort : bool or None, default None
3613 Whether to sort the resulting index. By default, the
3614 values are attempted to be sorted, but any TypeError from
3615 incomparable elements is caught by pandas.
3616
3617 * None : Attempt to sort the result, but catch any TypeErrors
3618 from comparing incomparable elements.
3619 * False : Do not sort the result.
3620 * True : Sort the result (which may raise TypeError).
3621
3622 Returns
3623 -------
3624 Index
3625
3626 Examples
3627 --------
3628 >>> idx1 = pd.Index([2, 1, 3, 4])
3629 >>> idx2 = pd.Index([3, 4, 5, 6])
3630 >>> idx1.difference(idx2)
3631 Index([1, 2], dtype='int64')
3632 >>> idx1.difference(idx2, sort=False)
3633 Index([2, 1], dtype='int64')
3634 """
3635 self._validate_sort_keyword(sort)
3636 self._assert_can_do_setop(other)
3637 other, result_name = self._convert_can_do_setop(other)
3638
3639 # Note: we do NOT call _dti_setop_align_tzs here, as there
3640 # is no requirement that .difference be commutative, so it does
3641 # not cast to object.
3642
3643 if self.equals(other):
3644 # Note: we do not (yet) sort even if sort=None GH#24959
3645 return self[:0].rename(result_name)
3646
3647 if len(other) == 0:
3648 # Note: we do not (yet) sort even if sort=None GH#24959
3649 result = self.unique().rename(result_name)
3650 if sort is True:
3651 return result.sort_values()
3652 return result
3653
3654 if not self._should_compare(other):
3655 # Nothing matches -> difference is everything
3656 result = self.unique().rename(result_name)
3657 if sort is True:
3658 return result.sort_values()
3659 return result
3660
3661 result = self._difference(other, sort=sort)
3662 return self._wrap_difference_result(other, result)
3663
3664 def _difference(self, other, sort):
3665 # overridden by RangeIndex
3666 this = self
3667 if isinstance(self, ABCCategoricalIndex) and self.hasnans and other.hasnans:
3668 this = this.dropna()
3669 other = other.unique()
3670 the_diff = this[other.get_indexer_for(this) == -1]
3671 the_diff = the_diff if this.is_unique else the_diff.unique()
3672 the_diff = _maybe_try_sort(the_diff, sort)
3673 return the_diff
3674
3675 def _wrap_difference_result(self, other, result):
3676 # We will override for MultiIndex to handle empty results
3677 return self._wrap_setop_result(other, result)
3678
3679 def symmetric_difference(self, other, result_name=None, sort=None):
3680 """
3681 Compute the symmetric difference of two Index objects.
3682
3683 Parameters
3684 ----------
3685 other : Index or array-like
3686 result_name : str
3687 sort : bool or None, default None
3688 Whether to sort the resulting index. By default, the
3689 values are attempted to be sorted, but any TypeError from
3690 incomparable elements is caught by pandas.
3691
3692 * None : Attempt to sort the result, but catch any TypeErrors
3693 from comparing incomparable elements.
3694 * False : Do not sort the result.
3695 * True : Sort the result (which may raise TypeError).
3696
3697 Returns
3698 -------
3699 Index
3700
3701 Notes
3702 -----
3703 ``symmetric_difference`` contains elements that appear in either
3704 ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by
3705 ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates
3706 dropped.
3707
3708 Examples
3709 --------
3710 >>> idx1 = pd.Index([1, 2, 3, 4])
3711 >>> idx2 = pd.Index([2, 3, 4, 5])
3712 >>> idx1.symmetric_difference(idx2)
3713 Index([1, 5], dtype='int64')
3714 """
3715 self._validate_sort_keyword(sort)
3716 self._assert_can_do_setop(other)
3717 other, result_name_update = self._convert_can_do_setop(other)
3718 if result_name is None:
3719 result_name = result_name_update
3720
3721 if self.dtype != other.dtype:
3722 self, other = self._dti_setop_align_tzs(other, "symmetric_difference")
3723
3724 if not self._should_compare(other):
3725 return self.union(other, sort=sort).rename(result_name)
3726
3727 elif self.dtype != other.dtype:
3728 dtype = self._find_common_type_compat(other)
3729 this = self.astype(dtype, copy=False)
3730 that = other.astype(dtype, copy=False)
3731 return this.symmetric_difference(that, sort=sort).rename(result_name)
3732
3733 this = self.unique()
3734 other = other.unique()
3735 indexer = this.get_indexer_for(other)
3736
3737 # {this} minus {other}
3738 common_indexer = indexer.take((indexer != -1).nonzero()[0])
3739 left_indexer = np.setdiff1d(
3740 np.arange(this.size), common_indexer, assume_unique=True
3741 )
3742 left_diff = this.take(left_indexer)
3743
3744 # {other} minus {this}
3745 right_indexer = (indexer == -1).nonzero()[0]
3746 right_diff = other.take(right_indexer)
3747
3748 res_values = left_diff.append(right_diff)
3749 result = _maybe_try_sort(res_values, sort)
3750
3751 if not self._is_multi:
3752 return Index(result, name=result_name, dtype=res_values.dtype)
3753 else:
3754 left_diff = cast("MultiIndex", left_diff)
3755 if len(result) == 0:
3756 # result might be an Index, if other was an Index
3757 return left_diff.remove_unused_levels().set_names(result_name)
3758 return result.set_names(result_name)
3759
3760 @final
3761 def _assert_can_do_setop(self, other) -> bool:
3762 if not is_list_like(other):
3763 raise TypeError("Input must be Index or array-like")
3764 return True
3765
3766 def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]:
3767 if not isinstance(other, Index):
3768 other = Index(other, name=self.name)
3769 result_name = self.name
3770 else:
3771 result_name = get_op_result_name(self, other)
3772 return other, result_name
3773
3774 # --------------------------------------------------------------------
3775 # Indexing Methods
3776
3777 def get_loc(self, key):
3778 """
3779 Get integer location, slice or boolean mask for requested label.
3780
3781 Parameters
3782 ----------
3783 key : label
3784
3785 Returns
3786 -------
3787 int if unique index, slice if monotonic index, else mask
3788
3789 Examples
3790 --------
3791 >>> unique_index = pd.Index(list('abc'))
3792 >>> unique_index.get_loc('b')
3793 1
3794
3795 >>> monotonic_index = pd.Index(list('abbc'))
3796 >>> monotonic_index.get_loc('b')
3797 slice(1, 3, None)
3798
3799 >>> non_monotonic_index = pd.Index(list('abcb'))
3800 >>> non_monotonic_index.get_loc('b')
3801 array([False, True, False, True])
3802 """
3803 casted_key = self._maybe_cast_indexer(key)
3804 try:
3805 return self._engine.get_loc(casted_key)
3806 except KeyError as err:
3807 if isinstance(casted_key, slice) or (
3808 isinstance(casted_key, abc.Iterable)
3809 and any(isinstance(x, slice) for x in casted_key)
3810 ):
3811 raise InvalidIndexError(key)
3812 raise KeyError(key) from err
3813 except TypeError:
3814 # If we have a listlike key, _check_indexing_error will raise
3815 # InvalidIndexError. Otherwise we fall through and re-raise
3816 # the TypeError.
3817 self._check_indexing_error(key)
3818 raise
3819
3820 @final
3821 def get_indexer(
3822 self,
3823 target,
3824 method: ReindexMethod | None = None,
3825 limit: int | None = None,
3826 tolerance=None,
3827 ) -> npt.NDArray[np.intp]:
3828 """
3829 Compute indexer and mask for new index given the current index.
3830
3831 The indexer should be then used as an input to ndarray.take to align the
3832 current data to the new index.
3833
3834 Parameters
3835 ----------
3836 target : Index
3837 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
3838 * default: exact matches only.
3839 * pad / ffill: find the PREVIOUS index value if no exact match.
3840 * backfill / bfill: use NEXT index value if no exact match
3841 * nearest: use the NEAREST index value if no exact match. Tied
3842 distances are broken by preferring the larger index value.
3843 limit : int, optional
3844 Maximum number of consecutive labels in ``target`` to match for
3845 inexact matches.
3846 tolerance : optional
3847 Maximum distance between original and new labels for inexact
3848 matches. The values of the index at the matching locations must
3849 satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
3850
3851 Tolerance may be a scalar value, which applies the same tolerance
3852 to all values, or list-like, which applies variable tolerance per
3853 element. List-like includes list, tuple, array, Series, and must be
3854 the same size as the index and its dtype must exactly match the
3855 index's type.
3856
3857 Returns
3858 -------
3859 np.ndarray[np.intp]
3860 Integers from 0 to n - 1 indicating that the index at these
3861 positions matches the corresponding target values. Missing values
3862 in the target are marked by -1.
3863
3864 Notes
3865 -----
3866 Returns -1 for unmatched values, for further explanation see the
3867 example below.
3868
3869 Examples
3870 --------
3871 >>> index = pd.Index(['c', 'a', 'b'])
3872 >>> index.get_indexer(['a', 'b', 'x'])
3873 array([ 1, 2, -1])
3874
3875 Notice that the return value is an array of locations in ``index``
3876 and ``x`` is marked by -1, as it is not in ``index``.
3877 """
3878 method = clean_reindex_fill_method(method)
3879 orig_target = target
3880 target = self._maybe_cast_listlike_indexer(target)
3881
3882 self._check_indexing_method(method, limit, tolerance)
3883
3884 if not self._index_as_unique:
3885 raise InvalidIndexError(self._requires_unique_msg)
3886
3887 if len(target) == 0:
3888 return np.array([], dtype=np.intp)
3889
3890 if not self._should_compare(target) and not self._should_partial_index(target):
3891 # IntervalIndex get special treatment bc numeric scalars can be
3892 # matched to Interval scalars
3893 return self._get_indexer_non_comparable(target, method=method, unique=True)
3894
3895 if isinstance(self.dtype, CategoricalDtype):
3896 # _maybe_cast_listlike_indexer ensures target has our dtype
3897 # (could improve perf by doing _should_compare check earlier?)
3898 assert self.dtype == target.dtype
3899
3900 indexer = self._engine.get_indexer(target.codes)
3901 if self.hasnans and target.hasnans:
3902 # After _maybe_cast_listlike_indexer, target elements which do not
3903 # belong to some category are changed to NaNs
3904 # Mask to track actual NaN values compared to inserted NaN values
3905 # GH#45361
3906 target_nans = isna(orig_target)
3907 loc = self.get_loc(np.nan)
3908 mask = target.isna()
3909 indexer[target_nans] = loc
3910 indexer[mask & ~target_nans] = -1
3911 return indexer
3912
3913 if isinstance(target.dtype, CategoricalDtype):
3914 # potential fastpath
3915 # get an indexer for unique categories then propagate to codes via take_nd
3916 # get_indexer instead of _get_indexer needed for MultiIndex cases
3917 # e.g. test_append_different_columns_types
3918 categories_indexer = self.get_indexer(target.categories)
3919
3920 indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1)
3921
3922 if (not self._is_multi and self.hasnans) and target.hasnans:
3923 # Exclude MultiIndex because hasnans raises NotImplementedError
3924 # we should only get here if we are unique, so loc is an integer
3925 # GH#41934
3926 loc = self.get_loc(np.nan)
3927 mask = target.isna()
3928 indexer[mask] = loc
3929
3930 return ensure_platform_int(indexer)
3931
3932 pself, ptarget = self._maybe_downcast_for_indexing(target)
3933 if pself is not self or ptarget is not target:
3934 return pself.get_indexer(
3935 ptarget, method=method, limit=limit, tolerance=tolerance
3936 )
3937
3938 if self.dtype == target.dtype and self.equals(target):
3939 # Only call equals if we have same dtype to avoid inference/casting
3940 return np.arange(len(target), dtype=np.intp)
3941
3942 if self.dtype != target.dtype and not self._should_partial_index(target):
3943 # _should_partial_index e.g. IntervalIndex with numeric scalars
3944 # that can be matched to Interval scalars.
3945 dtype = self._find_common_type_compat(target)
3946
3947 this = self.astype(dtype, copy=False)
3948 target = target.astype(dtype, copy=False)
3949 return this._get_indexer(
3950 target, method=method, limit=limit, tolerance=tolerance
3951 )
3952
3953 return self._get_indexer(target, method, limit, tolerance)
3954
3955 def _get_indexer(
3956 self,
3957 target: Index,
3958 method: str_t | None = None,
3959 limit: int | None = None,
3960 tolerance=None,
3961 ) -> npt.NDArray[np.intp]:
3962 if tolerance is not None:
3963 tolerance = self._convert_tolerance(tolerance, target)
3964
3965 if method in ["pad", "backfill"]:
3966 indexer = self._get_fill_indexer(target, method, limit, tolerance)
3967 elif method == "nearest":
3968 indexer = self._get_nearest_indexer(target, limit, tolerance)
3969 else:
3970 if target._is_multi and self._is_multi:
3971 engine = self._engine
3972 # error: Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]"
3973 # has no attribute "_extract_level_codes"
3974 tgt_values = engine._extract_level_codes( # type: ignore[union-attr]
3975 target
3976 )
3977 else:
3978 tgt_values = target._get_engine_target()
3979
3980 indexer = self._engine.get_indexer(tgt_values)
3981
3982 return ensure_platform_int(indexer)
3983
3984 @final
3985 def _should_partial_index(self, target: Index) -> bool:
3986 """
3987 Should we attempt partial-matching indexing?
3988 """
3989 if isinstance(self.dtype, IntervalDtype):
3990 if isinstance(target.dtype, IntervalDtype):
3991 return False
3992 # "Index" has no attribute "left"
3993 return self.left._should_compare(target) # type: ignore[attr-defined]
3994 return False
3995
3996 @final
3997 def _check_indexing_method(
3998 self,
3999 method: str_t | None,
4000 limit: int | None = None,
4001 tolerance=None,
4002 ) -> None:
4003 """
4004 Raise if we have a get_indexer `method` that is not supported or valid.
4005 """
4006 if method not in [None, "bfill", "backfill", "pad", "ffill", "nearest"]:
4007 # in practice the clean_reindex_fill_method call would raise
4008 # before we get here
4009 raise ValueError("Invalid fill method") # pragma: no cover
4010
4011 if self._is_multi:
4012 if method == "nearest":
4013 raise NotImplementedError(
4014 "method='nearest' not implemented yet "
4015 "for MultiIndex; see GitHub issue 9365"
4016 )
4017 if method in ("pad", "backfill"):
4018 if tolerance is not None:
4019 raise NotImplementedError(
4020 "tolerance not implemented yet for MultiIndex"
4021 )
4022
4023 if isinstance(self.dtype, (IntervalDtype, CategoricalDtype)):
4024 # GH#37871 for now this is only for IntervalIndex and CategoricalIndex
4025 if method is not None:
4026 raise NotImplementedError(
4027 f"method {method} not yet implemented for {type(self).__name__}"
4028 )
4029
4030 if method is None:
4031 if tolerance is not None:
4032 raise ValueError(
4033 "tolerance argument only valid if doing pad, "
4034 "backfill or nearest reindexing"
4035 )
4036 if limit is not None:
4037 raise ValueError(
4038 "limit argument only valid if doing pad, "
4039 "backfill or nearest reindexing"
4040 )
4041
4042 def _convert_tolerance(self, tolerance, target: np.ndarray | Index) -> np.ndarray:
4043 # override this method on subclasses
4044 tolerance = np.asarray(tolerance)
4045 if target.size != tolerance.size and tolerance.size > 1:
4046 raise ValueError("list-like tolerance size must match target index size")
4047 elif is_numeric_dtype(self) and not np.issubdtype(tolerance.dtype, np.number):
4048 if tolerance.ndim > 0:
4049 raise ValueError(
4050 f"tolerance argument for {type(self).__name__} with dtype "
4051 f"{self.dtype} must contain numeric elements if it is list type"
4052 )
4053
4054 raise ValueError(
4055 f"tolerance argument for {type(self).__name__} with dtype {self.dtype} "
4056 f"must be numeric if it is a scalar: {repr(tolerance)}"
4057 )
4058 return tolerance
4059
4060 @final
4061 def _get_fill_indexer(
4062 self, target: Index, method: str_t, limit: int | None = None, tolerance=None
4063 ) -> npt.NDArray[np.intp]:
4064 if self._is_multi:
4065 if not (self.is_monotonic_increasing or self.is_monotonic_decreasing):
4066 raise ValueError("index must be monotonic increasing or decreasing")
4067 encoded = self.append(target)._engine.values # type: ignore[union-attr]
4068 self_encoded = Index(encoded[: len(self)])
4069 target_encoded = Index(encoded[len(self) :])
4070 return self_encoded._get_fill_indexer(
4071 target_encoded, method, limit, tolerance
4072 )
4073
4074 if self.is_monotonic_increasing and target.is_monotonic_increasing:
4075 target_values = target._get_engine_target()
4076 own_values = self._get_engine_target()
4077 if not isinstance(target_values, np.ndarray) or not isinstance(
4078 own_values, np.ndarray
4079 ):
4080 raise NotImplementedError
4081
4082 if method == "pad":
4083 indexer = libalgos.pad(own_values, target_values, limit=limit)
4084 else:
4085 # i.e. "backfill"
4086 indexer = libalgos.backfill(own_values, target_values, limit=limit)
4087 else:
4088 indexer = self._get_fill_indexer_searchsorted(target, method, limit)
4089 if tolerance is not None and len(self):
4090 indexer = self._filter_indexer_tolerance(target, indexer, tolerance)
4091 return indexer
4092
4093 @final
4094 def _get_fill_indexer_searchsorted(
4095 self, target: Index, method: str_t, limit: int | None = None
4096 ) -> npt.NDArray[np.intp]:
4097 """
4098 Fallback pad/backfill get_indexer that works for monotonic decreasing
4099 indexes and non-monotonic targets.
4100 """
4101 if limit is not None:
4102 raise ValueError(
4103 f"limit argument for {repr(method)} method only well-defined "
4104 "if index and target are monotonic"
4105 )
4106
4107 side: Literal["left", "right"] = "left" if method == "pad" else "right"
4108
4109 # find exact matches first (this simplifies the algorithm)
4110 indexer = self.get_indexer(target)
4111 nonexact = indexer == -1
4112 indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side)
4113 if side == "left":
4114 # searchsorted returns "indices into a sorted array such that,
4115 # if the corresponding elements in v were inserted before the
4116 # indices, the order of a would be preserved".
4117 # Thus, we need to subtract 1 to find values to the left.
4118 indexer[nonexact] -= 1
4119 # This also mapped not found values (values of 0 from
4120 # np.searchsorted) to -1, which conveniently is also our
4121 # sentinel for missing values
4122 else:
4123 # Mark indices to the right of the largest value as not found
4124 indexer[indexer == len(self)] = -1
4125 return indexer
4126
4127 @final
4128 def _get_nearest_indexer(
4129 self, target: Index, limit: int | None, tolerance
4130 ) -> npt.NDArray[np.intp]:
4131 """
4132 Get the indexer for the nearest index labels; requires an index with
4133 values that can be subtracted from each other (e.g., not strings or
4134 tuples).
4135 """
4136 if not len(self):
4137 return self._get_fill_indexer(target, "pad")
4138
4139 left_indexer = self.get_indexer(target, "pad", limit=limit)
4140 right_indexer = self.get_indexer(target, "backfill", limit=limit)
4141
4142 left_distances = self._difference_compat(target, left_indexer)
4143 right_distances = self._difference_compat(target, right_indexer)
4144
4145 op = operator.lt if self.is_monotonic_increasing else operator.le
4146 indexer = np.where(
4147 # error: Argument 1&2 has incompatible type "Union[ExtensionArray,
4148 # ndarray[Any, Any]]"; expected "Union[SupportsDunderLE,
4149 # SupportsDunderGE, SupportsDunderGT, SupportsDunderLT]"
4150 op(left_distances, right_distances) # type: ignore[arg-type]
4151 | (right_indexer == -1),
4152 left_indexer,
4153 right_indexer,
4154 )
4155 if tolerance is not None:
4156 indexer = self._filter_indexer_tolerance(target, indexer, tolerance)
4157 return indexer
4158
4159 @final
4160 def _filter_indexer_tolerance(
4161 self,
4162 target: Index,
4163 indexer: npt.NDArray[np.intp],
4164 tolerance,
4165 ) -> npt.NDArray[np.intp]:
4166 distance = self._difference_compat(target, indexer)
4167
4168 return np.where(distance <= tolerance, indexer, -1)
4169
4170 @final
4171 def _difference_compat(
4172 self, target: Index, indexer: npt.NDArray[np.intp]
4173 ) -> ArrayLike:
4174 # Compatibility for PeriodArray, for which __sub__ returns an ndarray[object]
4175 # of DateOffset objects, which do not support __abs__ (and would be slow
4176 # if they did)
4177
4178 if isinstance(self.dtype, PeriodDtype):
4179 # Note: we only get here with matching dtypes
4180 own_values = cast("PeriodArray", self._data)._ndarray
4181 target_values = cast("PeriodArray", target._data)._ndarray
4182 diff = own_values[indexer] - target_values
4183 else:
4184 # error: Unsupported left operand type for - ("ExtensionArray")
4185 diff = self._values[indexer] - target._values # type: ignore[operator]
4186 return abs(diff)
4187
4188 # --------------------------------------------------------------------
4189 # Indexer Conversion Methods
4190
4191 @final
4192 def _validate_positional_slice(self, key: slice) -> None:
4193 """
4194 For positional indexing, a slice must have either int or None
4195 for each of start, stop, and step.
4196 """
4197 self._validate_indexer("positional", key.start, "iloc")
4198 self._validate_indexer("positional", key.stop, "iloc")
4199 self._validate_indexer("positional", key.step, "iloc")
4200
4201 def _convert_slice_indexer(self, key: slice, kind: Literal["loc", "getitem"]):
4202 """
4203 Convert a slice indexer.
4204
4205 By definition, these are labels unless 'iloc' is passed in.
4206 Floats are not allowed as the start, step, or stop of the slice.
4207
4208 Parameters
4209 ----------
4210 key : label of the slice bound
4211 kind : {'loc', 'getitem'}
4212 """
4213
4214 # potentially cast the bounds to integers
4215 start, stop, step = key.start, key.stop, key.step
4216
4217 # figure out if this is a positional indexer
4218 is_index_slice = is_valid_positional_slice(key)
4219
4220 # TODO(GH#50617): once Series.__[gs]etitem__ is removed we should be able
4221 # to simplify this.
4222 if lib.is_np_dtype(self.dtype, "f"):
4223 # We always treat __getitem__ slicing as label-based
4224 # translate to locations
4225 if kind == "getitem" and is_index_slice and not start == stop and step != 0:
4226 # exclude step=0 from the warning because it will raise anyway
4227 # start/stop both None e.g. [:] or [::-1] won't change.
4228 # exclude start==stop since it will be empty either way, or
4229 # will be [:] or [::-1] which won't change
4230 warnings.warn(
4231 # GH#49612
4232 "The behavior of obj[i:j] with a float-dtype index is "
4233 "deprecated. In a future version, this will be treated as "
4234 "positional instead of label-based. For label-based slicing, "
4235 "use obj.loc[i:j] instead",
4236 FutureWarning,
4237 stacklevel=find_stack_level(),
4238 )
4239 return self.slice_indexer(start, stop, step)
4240
4241 if kind == "getitem":
4242 # called from the getitem slicers, validate that we are in fact integers
4243 if is_index_slice:
4244 # In this case the _validate_indexer checks below are redundant
4245 return key
4246 elif self.dtype.kind in "iu":
4247 # Note: these checks are redundant if we know is_index_slice
4248 self._validate_indexer("slice", key.start, "getitem")
4249 self._validate_indexer("slice", key.stop, "getitem")
4250 self._validate_indexer("slice", key.step, "getitem")
4251 return key
4252
4253 # convert the slice to an indexer here; checking that the user didn't
4254 # pass a positional slice to loc
4255 is_positional = is_index_slice and self._should_fallback_to_positional
4256
4257 # if we are mixed and have integers
4258 if is_positional:
4259 try:
4260 # Validate start & stop
4261 if start is not None:
4262 self.get_loc(start)
4263 if stop is not None:
4264 self.get_loc(stop)
4265 is_positional = False
4266 except KeyError:
4267 pass
4268
4269 if com.is_null_slice(key):
4270 # It doesn't matter if we are positional or label based
4271 indexer = key
4272 elif is_positional:
4273 if kind == "loc":
4274 # GH#16121, GH#24612, GH#31810
4275 raise TypeError(
4276 "Slicing a positional slice with .loc is not allowed, "
4277 "Use .loc with labels or .iloc with positions instead.",
4278 )
4279 indexer = key
4280 else:
4281 indexer = self.slice_indexer(start, stop, step)
4282
4283 return indexer
4284
4285 @final
4286 def _raise_invalid_indexer(
4287 self,
4288 form: Literal["slice", "positional"],
4289 key,
4290 reraise: lib.NoDefault | None | Exception = lib.no_default,
4291 ) -> None:
4292 """
4293 Raise consistent invalid indexer message.
4294 """
4295 msg = (
4296 f"cannot do {form} indexing on {type(self).__name__} with these "
4297 f"indexers [{key}] of type {type(key).__name__}"
4298 )
4299 if reraise is not lib.no_default:
4300 raise TypeError(msg) from reraise
4301 raise TypeError(msg)
4302
4303 # --------------------------------------------------------------------
4304 # Reindex Methods
4305
4306 @final
4307 def _validate_can_reindex(self, indexer: np.ndarray) -> None:
4308 """
4309 Check if we are allowing reindexing with this particular indexer.
4310
4311 Parameters
4312 ----------
4313 indexer : an integer ndarray
4314
4315 Raises
4316 ------
4317 ValueError if its a duplicate axis
4318 """
4319 # trying to reindex on an axis with duplicates
4320 if not self._index_as_unique and len(indexer):
4321 raise ValueError("cannot reindex on an axis with duplicate labels")
4322
4323 def reindex(
4324 self,
4325 target,
4326 method: ReindexMethod | None = None,
4327 level=None,
4328 limit: int | None = None,
4329 tolerance: float | None = None,
4330 ) -> tuple[Index, npt.NDArray[np.intp] | None]:
4331 """
4332 Create index with target's values.
4333
4334 Parameters
4335 ----------
4336 target : an iterable
4337 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
4338 * default: exact matches only.
4339 * pad / ffill: find the PREVIOUS index value if no exact match.
4340 * backfill / bfill: use NEXT index value if no exact match
4341 * nearest: use the NEAREST index value if no exact match. Tied
4342 distances are broken by preferring the larger index value.
4343 level : int, optional
4344 Level of multiindex.
4345 limit : int, optional
4346 Maximum number of consecutive labels in ``target`` to match for
4347 inexact matches.
4348 tolerance : int or float, optional
4349 Maximum distance between original and new labels for inexact
4350 matches. The values of the index at the matching locations must
4351 satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
4352
4353 Tolerance may be a scalar value, which applies the same tolerance
4354 to all values, or list-like, which applies variable tolerance per
4355 element. List-like includes list, tuple, array, Series, and must be
4356 the same size as the index and its dtype must exactly match the
4357 index's type.
4358
4359 Returns
4360 -------
4361 new_index : pd.Index
4362 Resulting index.
4363 indexer : np.ndarray[np.intp] or None
4364 Indices of output values in original index.
4365
4366 Raises
4367 ------
4368 TypeError
4369 If ``method`` passed along with ``level``.
4370 ValueError
4371 If non-unique multi-index
4372 ValueError
4373 If non-unique index and ``method`` or ``limit`` passed.
4374
4375 See Also
4376 --------
4377 Series.reindex : Conform Series to new index with optional filling logic.
4378 DataFrame.reindex : Conform DataFrame to new index with optional filling logic.
4379
4380 Examples
4381 --------
4382 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor'])
4383 >>> idx
4384 Index(['car', 'bike', 'train', 'tractor'], dtype='object')
4385 >>> idx.reindex(['car', 'bike'])
4386 (Index(['car', 'bike'], dtype='object'), array([0, 1]))
4387 """
4388 # GH6552: preserve names when reindexing to non-named target
4389 # (i.e. neither Index nor Series).
4390 preserve_names = not hasattr(target, "name")
4391
4392 # GH7774: preserve dtype/tz if target is empty and not an Index.
4393 target = ensure_has_len(target) # target may be an iterator
4394
4395 if not isinstance(target, Index) and len(target) == 0:
4396 if level is not None and self._is_multi:
4397 # "Index" has no attribute "levels"; maybe "nlevels"?
4398 idx = self.levels[level] # type: ignore[attr-defined]
4399 else:
4400 idx = self
4401 target = idx[:0]
4402 else:
4403 target = ensure_index(target)
4404
4405 if level is not None and (
4406 isinstance(self, ABCMultiIndex) or isinstance(target, ABCMultiIndex)
4407 ):
4408 if method is not None:
4409 raise TypeError("Fill method not supported if level passed")
4410
4411 # TODO: tests where passing `keep_order=not self._is_multi`
4412 # makes a difference for non-MultiIndex case
4413 target, indexer, _ = self._join_level(
4414 target, level, how="right", keep_order=not self._is_multi
4415 )
4416
4417 else:
4418 if self.equals(target):
4419 indexer = None
4420 else:
4421 if self._index_as_unique:
4422 indexer = self.get_indexer(
4423 target, method=method, limit=limit, tolerance=tolerance
4424 )
4425 elif self._is_multi:
4426 raise ValueError("cannot handle a non-unique multi-index!")
4427 elif not self.is_unique:
4428 # GH#42568
4429 raise ValueError("cannot reindex on an axis with duplicate labels")
4430 else:
4431 indexer, _ = self.get_indexer_non_unique(target)
4432
4433 target = self._wrap_reindex_result(target, indexer, preserve_names)
4434 return target, indexer
4435
4436 def _wrap_reindex_result(self, target, indexer, preserve_names: bool):
4437 target = self._maybe_preserve_names(target, preserve_names)
4438 return target
4439
4440 def _maybe_preserve_names(self, target: Index, preserve_names: bool):
4441 if preserve_names and target.nlevels == 1 and target.name != self.name:
4442 target = target.copy(deep=False)
4443 target.name = self.name
4444 return target
4445
4446 @final
4447 def _reindex_non_unique(
4448 self, target: Index
4449 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp] | None]:
4450 """
4451 Create a new index with target's values (move/add/delete values as
4452 necessary) use with non-unique Index and a possibly non-unique target.
4453
4454 Parameters
4455 ----------
4456 target : an iterable
4457
4458 Returns
4459 -------
4460 new_index : pd.Index
4461 Resulting index.
4462 indexer : np.ndarray[np.intp]
4463 Indices of output values in original index.
4464 new_indexer : np.ndarray[np.intp] or None
4465
4466 """
4467 target = ensure_index(target)
4468 if len(target) == 0:
4469 # GH#13691
4470 return self[:0], np.array([], dtype=np.intp), None
4471
4472 indexer, missing = self.get_indexer_non_unique(target)
4473 check = indexer != -1
4474 new_labels: Index | np.ndarray = self.take(indexer[check])
4475 new_indexer = None
4476
4477 if len(missing):
4478 length = np.arange(len(indexer), dtype=np.intp)
4479
4480 missing = ensure_platform_int(missing)
4481 missing_labels = target.take(missing)
4482 missing_indexer = length[~check]
4483 cur_labels = self.take(indexer[check]).values
4484 cur_indexer = length[check]
4485
4486 # Index constructor below will do inference
4487 new_labels = np.empty((len(indexer),), dtype=object)
4488 new_labels[cur_indexer] = cur_labels
4489 new_labels[missing_indexer] = missing_labels
4490
4491 # GH#38906
4492 if not len(self):
4493 new_indexer = np.arange(0, dtype=np.intp)
4494
4495 # a unique indexer
4496 elif target.is_unique:
4497 # see GH5553, make sure we use the right indexer
4498 new_indexer = np.arange(len(indexer), dtype=np.intp)
4499 new_indexer[cur_indexer] = np.arange(len(cur_labels))
4500 new_indexer[missing_indexer] = -1
4501
4502 # we have a non_unique selector, need to use the original
4503 # indexer here
4504 else:
4505 # need to retake to have the same size as the indexer
4506 indexer[~check] = -1
4507
4508 # reset the new indexer to account for the new size
4509 new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp)
4510 new_indexer[~check] = -1
4511
4512 if not isinstance(self, ABCMultiIndex):
4513 new_index = Index(new_labels, name=self.name)
4514 else:
4515 new_index = type(self).from_tuples(new_labels, names=self.names)
4516 return new_index, indexer, new_indexer
4517
4518 # --------------------------------------------------------------------
4519 # Join Methods
4520
4521 @overload
4522 def join(
4523 self,
4524 other: Index,
4525 *,
4526 how: JoinHow = ...,
4527 level: Level = ...,
4528 return_indexers: Literal[True],
4529 sort: bool = ...,
4530 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4531 ...
4532
4533 @overload
4534 def join(
4535 self,
4536 other: Index,
4537 *,
4538 how: JoinHow = ...,
4539 level: Level = ...,
4540 return_indexers: Literal[False] = ...,
4541 sort: bool = ...,
4542 ) -> Index:
4543 ...
4544
4545 @overload
4546 def join(
4547 self,
4548 other: Index,
4549 *,
4550 how: JoinHow = ...,
4551 level: Level = ...,
4552 return_indexers: bool = ...,
4553 sort: bool = ...,
4554 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4555 ...
4556
4557 @final
4558 @_maybe_return_indexers
4559 def join(
4560 self,
4561 other: Index,
4562 *,
4563 how: JoinHow = "left",
4564 level: Level | None = None,
4565 return_indexers: bool = False,
4566 sort: bool = False,
4567 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4568 """
4569 Compute join_index and indexers to conform data structures to the new index.
4570
4571 Parameters
4572 ----------
4573 other : Index
4574 how : {'left', 'right', 'inner', 'outer'}
4575 level : int or level name, default None
4576 return_indexers : bool, default False
4577 sort : bool, default False
4578 Sort the join keys lexicographically in the result Index. If False,
4579 the order of the join keys depends on the join type (how keyword).
4580
4581 Returns
4582 -------
4583 join_index, (left_indexer, right_indexer)
4584
4585 Examples
4586 --------
4587 >>> idx1 = pd.Index([1, 2, 3])
4588 >>> idx2 = pd.Index([4, 5, 6])
4589 >>> idx1.join(idx2, how='outer')
4590 Index([1, 2, 3, 4, 5, 6], dtype='int64')
4591 """
4592 other = ensure_index(other)
4593 sort = sort or how == "outer"
4594
4595 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex):
4596 if (self.tz is None) ^ (other.tz is None):
4597 # Raise instead of casting to object below.
4598 raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")
4599
4600 if not self._is_multi and not other._is_multi:
4601 # We have specific handling for MultiIndex below
4602 pself, pother = self._maybe_downcast_for_indexing(other)
4603 if pself is not self or pother is not other:
4604 return pself.join(
4605 pother, how=how, level=level, return_indexers=True, sort=sort
4606 )
4607
4608 # try to figure out the join level
4609 # GH3662
4610 if level is None and (self._is_multi or other._is_multi):
4611 # have the same levels/names so a simple join
4612 if self.names == other.names:
4613 pass
4614 else:
4615 return self._join_multi(other, how=how)
4616
4617 # join on the level
4618 if level is not None and (self._is_multi or other._is_multi):
4619 return self._join_level(other, level, how=how)
4620
4621 if len(self) == 0 or len(other) == 0:
4622 try:
4623 return self._join_empty(other, how, sort)
4624 except TypeError:
4625 # object dtype; non-comparable objects
4626 pass
4627
4628 if self.dtype != other.dtype:
4629 dtype = self._find_common_type_compat(other)
4630 this = self.astype(dtype, copy=False)
4631 other = other.astype(dtype, copy=False)
4632 return this.join(other, how=how, return_indexers=True)
4633 elif (
4634 isinstance(self, ABCCategoricalIndex)
4635 and isinstance(other, ABCCategoricalIndex)
4636 and not self.ordered
4637 and not self.categories.equals(other.categories)
4638 ):
4639 # dtypes are "equal" but categories are in different order
4640 other = Index(other._values.reorder_categories(self.categories))
4641
4642 _validate_join_method(how)
4643
4644 if (
4645 self.is_monotonic_increasing
4646 and other.is_monotonic_increasing
4647 and self._can_use_libjoin
4648 and other._can_use_libjoin
4649 and (self.is_unique or other.is_unique)
4650 ):
4651 try:
4652 return self._join_monotonic(other, how=how)
4653 except TypeError:
4654 # object dtype; non-comparable objects
4655 pass
4656 elif not self.is_unique or not other.is_unique:
4657 return self._join_non_unique(other, how=how, sort=sort)
4658
4659 return self._join_via_get_indexer(other, how, sort)
4660
4661 @final
4662 def _join_empty(
4663 self, other: Index, how: JoinHow, sort: bool
4664 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4665 assert len(self) == 0 or len(other) == 0
4666 _validate_join_method(how)
4667
4668 lidx: np.ndarray | None
4669 ridx: np.ndarray | None
4670
4671 if len(other):
4672 how = cast(JoinHow, {"left": "right", "right": "left"}.get(how, how))
4673 join_index, ridx, lidx = other._join_empty(self, how, sort)
4674 elif how in ["left", "outer"]:
4675 if sort and not self.is_monotonic_increasing:
4676 lidx = self.argsort()
4677 join_index = self.take(lidx)
4678 else:
4679 lidx = None
4680 join_index = self._view()
4681 ridx = np.broadcast_to(np.intp(-1), len(join_index))
4682 else:
4683 join_index = other._view()
4684 lidx = np.array([], dtype=np.intp)
4685 ridx = None
4686 return join_index, lidx, ridx
4687
4688 @final
4689 def _join_via_get_indexer(
4690 self, other: Index, how: JoinHow, sort: bool
4691 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4692 # Fallback if we do not have any fastpaths available based on
4693 # uniqueness/monotonicity
4694
4695 # Note: at this point we have checked matching dtypes
4696
4697 if how == "left":
4698 join_index = self.sort_values() if sort else self
4699 elif how == "right":
4700 join_index = other.sort_values() if sort else other
4701 elif how == "inner":
4702 join_index = self.intersection(other, sort=sort)
4703 elif how == "outer":
4704 try:
4705 join_index = self.union(other, sort=sort)
4706 except TypeError:
4707 join_index = self.union(other)
4708 try:
4709 join_index = _maybe_try_sort(join_index, sort)
4710 except TypeError:
4711 pass
4712
4713 if join_index is self:
4714 lindexer = None
4715 else:
4716 lindexer = self.get_indexer_for(join_index)
4717 if join_index is other:
4718 rindexer = None
4719 else:
4720 rindexer = other.get_indexer_for(join_index)
4721 return join_index, lindexer, rindexer
4722
4723 @final
4724 def _join_multi(self, other: Index, how: JoinHow):
4725 from pandas.core.indexes.multi import MultiIndex
4726 from pandas.core.reshape.merge import restore_dropped_levels_multijoin
4727
4728 # figure out join names
4729 self_names_list = list(com.not_none(*self.names))
4730 other_names_list = list(com.not_none(*other.names))
4731 self_names_order = self_names_list.index
4732 other_names_order = other_names_list.index
4733 self_names = set(self_names_list)
4734 other_names = set(other_names_list)
4735 overlap = self_names & other_names
4736
4737 # need at least 1 in common
4738 if not overlap:
4739 raise ValueError("cannot join with no overlapping index names")
4740
4741 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
4742 # Drop the non-matching levels from left and right respectively
4743 ldrop_names = sorted(self_names - overlap, key=self_names_order)
4744 rdrop_names = sorted(other_names - overlap, key=other_names_order)
4745
4746 # if only the order differs
4747 if not len(ldrop_names + rdrop_names):
4748 self_jnlevels = self
4749 other_jnlevels = other.reorder_levels(self.names)
4750 else:
4751 self_jnlevels = self.droplevel(ldrop_names)
4752 other_jnlevels = other.droplevel(rdrop_names)
4753
4754 # Join left and right
4755 # Join on same leveled multi-index frames is supported
4756 join_idx, lidx, ridx = self_jnlevels.join(
4757 other_jnlevels, how=how, return_indexers=True
4758 )
4759
4760 # Restore the dropped levels
4761 # Returned index level order is
4762 # common levels, ldrop_names, rdrop_names
4763 dropped_names = ldrop_names + rdrop_names
4764
4765 # error: Argument 5/6 to "restore_dropped_levels_multijoin" has
4766 # incompatible type "Optional[ndarray[Any, dtype[signedinteger[Any
4767 # ]]]]"; expected "ndarray[Any, dtype[signedinteger[Any]]]"
4768 levels, codes, names = restore_dropped_levels_multijoin(
4769 self,
4770 other,
4771 dropped_names,
4772 join_idx,
4773 lidx, # type: ignore[arg-type]
4774 ridx, # type: ignore[arg-type]
4775 )
4776
4777 # Re-create the multi-index
4778 multi_join_idx = MultiIndex(
4779 levels=levels, codes=codes, names=names, verify_integrity=False
4780 )
4781
4782 multi_join_idx = multi_join_idx.remove_unused_levels()
4783
4784 # maintain the order of the index levels
4785 if how == "right":
4786 level_order = other_names_list + ldrop_names
4787 else:
4788 level_order = self_names_list + rdrop_names
4789 multi_join_idx = multi_join_idx.reorder_levels(level_order)
4790
4791 return multi_join_idx, lidx, ridx
4792
4793 jl = next(iter(overlap))
4794
4795 # Case where only one index is multi
4796 # make the indices into mi's that match
4797 flip_order = False
4798 if isinstance(self, MultiIndex):
4799 self, other = other, self
4800 flip_order = True
4801 # flip if join method is right or left
4802 flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"}
4803 how = flip.get(how, how)
4804
4805 level = other.names.index(jl)
4806 result = self._join_level(other, level, how=how)
4807
4808 if flip_order:
4809 return result[0], result[2], result[1]
4810 return result
4811
4812 @final
4813 def _join_non_unique(
4814 self, other: Index, how: JoinHow = "left", sort: bool = False
4815 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
4816 from pandas.core.reshape.merge import get_join_indexers_non_unique
4817
4818 # We only get here if dtypes match
4819 assert self.dtype == other.dtype
4820
4821 left_idx, right_idx = get_join_indexers_non_unique(
4822 self._values, other._values, how=how, sort=sort
4823 )
4824 mask = left_idx == -1
4825
4826 join_idx = self.take(left_idx)
4827 right = other.take(right_idx)
4828 join_index = join_idx.putmask(mask, right)
4829 if isinstance(join_index, ABCMultiIndex) and how == "outer":
4830 # test_join_index_levels
4831 join_index = join_index._sort_levels_monotonic()
4832 return join_index, left_idx, right_idx
4833
4834 @final
4835 def _join_level(
4836 self, other: Index, level, how: JoinHow = "left", keep_order: bool = True
4837 ) -> tuple[MultiIndex, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4838 """
4839 The join method *only* affects the level of the resulting
4840 MultiIndex. Otherwise it just exactly aligns the Index data to the
4841 labels of the level in the MultiIndex.
4842
4843 If ```keep_order == True```, the order of the data indexed by the
4844 MultiIndex will not be changed; otherwise, it will tie out
4845 with `other`.
4846 """
4847 from pandas.core.indexes.multi import MultiIndex
4848
4849 def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]:
4850 """
4851 Returns sorter for the inner most level while preserving the
4852 order of higher levels.
4853
4854 Parameters
4855 ----------
4856 labels : list[np.ndarray]
4857 Each ndarray has signed integer dtype, not necessarily identical.
4858
4859 Returns
4860 -------
4861 np.ndarray[np.intp]
4862 """
4863 if labels[0].size == 0:
4864 return np.empty(0, dtype=np.intp)
4865
4866 if len(labels) == 1:
4867 return get_group_index_sorter(ensure_platform_int(labels[0]))
4868
4869 # find indexers of beginning of each set of
4870 # same-key labels w.r.t all but last level
4871 tic = labels[0][:-1] != labels[0][1:]
4872 for lab in labels[1:-1]:
4873 tic |= lab[:-1] != lab[1:]
4874
4875 starts = np.hstack(([True], tic, [True])).nonzero()[0]
4876 lab = ensure_int64(labels[-1])
4877 return lib.get_level_sorter(lab, ensure_platform_int(starts))
4878
4879 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
4880 raise TypeError("Join on level between two MultiIndex objects is ambiguous")
4881
4882 left, right = self, other
4883
4884 flip_order = not isinstance(self, MultiIndex)
4885 if flip_order:
4886 left, right = right, left
4887 flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"}
4888 how = flip.get(how, how)
4889
4890 assert isinstance(left, MultiIndex)
4891
4892 level = left._get_level_number(level)
4893 old_level = left.levels[level]
4894
4895 if not right.is_unique:
4896 raise NotImplementedError(
4897 "Index._join_level on non-unique index is not implemented"
4898 )
4899
4900 new_level, left_lev_indexer, right_lev_indexer = old_level.join(
4901 right, how=how, return_indexers=True
4902 )
4903
4904 if left_lev_indexer is None:
4905 if keep_order or len(left) == 0:
4906 left_indexer = None
4907 join_index = left
4908 else: # sort the leaves
4909 left_indexer = _get_leaf_sorter(left.codes[: level + 1])
4910 join_index = left[left_indexer]
4911
4912 else:
4913 left_lev_indexer = ensure_platform_int(left_lev_indexer)
4914 rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level))
4915 old_codes = left.codes[level]
4916
4917 taker = old_codes[old_codes != -1]
4918 new_lev_codes = rev_indexer.take(taker)
4919
4920 new_codes = list(left.codes)
4921 new_codes[level] = new_lev_codes
4922
4923 new_levels = list(left.levels)
4924 new_levels[level] = new_level
4925
4926 if keep_order: # just drop missing values. o.w. keep order
4927 left_indexer = np.arange(len(left), dtype=np.intp)
4928 left_indexer = cast(np.ndarray, left_indexer)
4929 mask = new_lev_codes != -1
4930 if not mask.all():
4931 new_codes = [lab[mask] for lab in new_codes]
4932 left_indexer = left_indexer[mask]
4933
4934 else: # tie out the order with other
4935 if level == 0: # outer most level, take the fast route
4936 max_new_lev = 0 if len(new_lev_codes) == 0 else new_lev_codes.max()
4937 ngroups = 1 + max_new_lev
4938 left_indexer, counts = libalgos.groupsort_indexer(
4939 new_lev_codes, ngroups
4940 )
4941
4942 # missing values are placed first; drop them!
4943 left_indexer = left_indexer[counts[0] :]
4944 new_codes = [lab[left_indexer] for lab in new_codes]
4945
4946 else: # sort the leaves
4947 mask = new_lev_codes != -1
4948 mask_all = mask.all()
4949 if not mask_all:
4950 new_codes = [lab[mask] for lab in new_codes]
4951
4952 left_indexer = _get_leaf_sorter(new_codes[: level + 1])
4953 new_codes = [lab[left_indexer] for lab in new_codes]
4954
4955 # left_indexers are w.r.t masked frame.
4956 # reverse to original frame!
4957 if not mask_all:
4958 left_indexer = mask.nonzero()[0][left_indexer]
4959
4960 join_index = MultiIndex(
4961 levels=new_levels,
4962 codes=new_codes,
4963 names=left.names,
4964 verify_integrity=False,
4965 )
4966
4967 if right_lev_indexer is not None:
4968 right_indexer = right_lev_indexer.take(join_index.codes[level])
4969 else:
4970 right_indexer = join_index.codes[level]
4971
4972 if flip_order:
4973 left_indexer, right_indexer = right_indexer, left_indexer
4974
4975 left_indexer = (
4976 None if left_indexer is None else ensure_platform_int(left_indexer)
4977 )
4978 right_indexer = (
4979 None if right_indexer is None else ensure_platform_int(right_indexer)
4980 )
4981 return join_index, left_indexer, right_indexer
4982
4983 @final
4984 def _join_monotonic(
4985 self, other: Index, how: JoinHow = "left"
4986 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4987 # We only get here with matching dtypes and both monotonic increasing
4988 assert other.dtype == self.dtype
4989 assert self._can_use_libjoin and other._can_use_libjoin
4990
4991 if self.equals(other):
4992 # This is a convenient place for this check, but its correctness
4993 # does not depend on monotonicity, so it could go earlier
4994 # in the calling method.
4995 ret_index = other if how == "right" else self
4996 return ret_index, None, None
4997
4998 ridx: npt.NDArray[np.intp] | None
4999 lidx: npt.NDArray[np.intp] | None
5000
5001 if self.is_unique and other.is_unique:
5002 # We can perform much better than the general case
5003 if how == "left":
5004 join_index = self
5005 lidx = None
5006 ridx = self._left_indexer_unique(other)
5007 elif how == "right":
5008 join_index = other
5009 lidx = other._left_indexer_unique(self)
5010 ridx = None
5011 elif how == "inner":
5012 join_array, lidx, ridx = self._inner_indexer(other)
5013 join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
5014 elif how == "outer":
5015 join_array, lidx, ridx = self._outer_indexer(other)
5016 join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
5017 else:
5018 if how == "left":
5019 join_array, lidx, ridx = self._left_indexer(other)
5020 elif how == "right":
5021 join_array, ridx, lidx = other._left_indexer(self)
5022 elif how == "inner":
5023 join_array, lidx, ridx = self._inner_indexer(other)
5024 elif how == "outer":
5025 join_array, lidx, ridx = self._outer_indexer(other)
5026
5027 assert lidx is not None
5028 assert ridx is not None
5029
5030 join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
5031
5032 lidx = None if lidx is None else ensure_platform_int(lidx)
5033 ridx = None if ridx is None else ensure_platform_int(ridx)
5034 return join_index, lidx, ridx
5035
5036 def _wrap_joined_index(
5037 self,
5038 joined: ArrayLike,
5039 other: Self,
5040 lidx: npt.NDArray[np.intp],
5041 ridx: npt.NDArray[np.intp],
5042 ) -> Self:
5043 assert other.dtype == self.dtype
5044
5045 if isinstance(self, ABCMultiIndex):
5046 name = self.names if self.names == other.names else None
5047 # error: Incompatible return value type (got "MultiIndex",
5048 # expected "Self")
5049 mask = lidx == -1
5050 join_idx = self.take(lidx)
5051 right = cast("MultiIndex", other.take(ridx))
5052 join_index = join_idx.putmask(mask, right)._sort_levels_monotonic()
5053 return join_index.set_names(name) # type: ignore[return-value]
5054 else:
5055 name = get_op_result_name(self, other)
5056 return self._constructor._with_infer(joined, name=name, dtype=self.dtype)
5057
5058 @final
5059 @cache_readonly
5060 def _can_use_libjoin(self) -> bool:
5061 """
5062 Whether we can use the fastpaths implemented in _libs.join.
5063
5064 This is driven by whether (in monotonic increasing cases that are
5065 guaranteed not to have NAs) we can convert to a np.ndarray without
5066 making a copy. If we cannot, this negates the performance benefit
5067 of using libjoin.
5068 """
5069 if type(self) is Index:
5070 # excludes EAs, but include masks, we get here with monotonic
5071 # values only, meaning no NA
5072 return (
5073 isinstance(self.dtype, np.dtype)
5074 or isinstance(self._values, (ArrowExtensionArray, BaseMaskedArray))
5075 or self.dtype == "string[python]"
5076 )
5077 # Exclude index types where the conversion to numpy converts to object dtype,
5078 # which negates the performance benefit of libjoin
5079 # Subclasses should override to return False if _get_join_target is
5080 # not zero-copy.
5081 # TODO: exclude RangeIndex (which allocates memory)?
5082 # Doing so seems to break test_concat_datetime_timezone
5083 return not isinstance(self, (ABCIntervalIndex, ABCMultiIndex))
5084
5085 # --------------------------------------------------------------------
5086 # Uncategorized Methods
5087
5088 @property
5089 def values(self) -> ArrayLike:
5090 """
5091 Return an array representing the data in the Index.
5092
5093 .. warning::
5094
5095 We recommend using :attr:`Index.array` or
5096 :meth:`Index.to_numpy`, depending on whether you need
5097 a reference to the underlying data or a NumPy array.
5098
5099 Returns
5100 -------
5101 array: numpy.ndarray or ExtensionArray
5102
5103 See Also
5104 --------
5105 Index.array : Reference to the underlying data.
5106 Index.to_numpy : A NumPy array representing the underlying data.
5107
5108 Examples
5109 --------
5110 For :class:`pandas.Index`:
5111
5112 >>> idx = pd.Index([1, 2, 3])
5113 >>> idx
5114 Index([1, 2, 3], dtype='int64')
5115 >>> idx.values
5116 array([1, 2, 3])
5117
5118 For :class:`pandas.IntervalIndex`:
5119
5120 >>> idx = pd.interval_range(start=0, end=5)
5121 >>> idx.values
5122 <IntervalArray>
5123 [(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]]
5124 Length: 5, dtype: interval[int64, right]
5125 """
5126 if using_copy_on_write():
5127 data = self._data
5128 if isinstance(data, np.ndarray):
5129 data = data.view()
5130 data.flags.writeable = False
5131 return data
5132 return self._data
5133
5134 @cache_readonly
5135 @doc(IndexOpsMixin.array)
5136 def array(self) -> ExtensionArray:
5137 array = self._data
5138 if isinstance(array, np.ndarray):
5139 from pandas.core.arrays.numpy_ import NumpyExtensionArray
5140
5141 array = NumpyExtensionArray(array)
5142 return array
5143
5144 @property
5145 def _values(self) -> ExtensionArray | np.ndarray:
5146 """
5147 The best array representation.
5148
5149 This is an ndarray or ExtensionArray.
5150
5151 ``_values`` are consistent between ``Series`` and ``Index``.
5152
5153 It may differ from the public '.values' method.
5154
5155 index | values | _values |
5156 ----------------- | --------------- | ------------- |
5157 Index | ndarray | ndarray |
5158 CategoricalIndex | Categorical | Categorical |
5159 DatetimeIndex | ndarray[M8ns] | DatetimeArray |
5160 DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray |
5161 PeriodIndex | ndarray[object] | PeriodArray |
5162 IntervalIndex | IntervalArray | IntervalArray |
5163
5164 See Also
5165 --------
5166 values : Values
5167 """
5168 return self._data
5169
5170 def _get_engine_target(self) -> ArrayLike:
5171 """
5172 Get the ndarray or ExtensionArray that we can pass to the IndexEngine
5173 constructor.
5174 """
5175 vals = self._values
5176 if isinstance(vals, StringArray):
5177 # GH#45652 much more performant than ExtensionEngine
5178 return vals._ndarray
5179 if isinstance(vals, ArrowExtensionArray) and self.dtype.kind in "Mm":
5180 import pyarrow as pa
5181
5182 pa_type = vals._pa_array.type
5183 if pa.types.is_timestamp(pa_type):
5184 vals = vals._to_datetimearray()
5185 return vals._ndarray.view("i8")
5186 elif pa.types.is_duration(pa_type):
5187 vals = vals._to_timedeltaarray()
5188 return vals._ndarray.view("i8")
5189 if (
5190 type(self) is Index
5191 and isinstance(self._values, ExtensionArray)
5192 and not isinstance(self._values, BaseMaskedArray)
5193 and not (
5194 isinstance(self._values, ArrowExtensionArray)
5195 and is_numeric_dtype(self.dtype)
5196 # Exclude decimal
5197 and self.dtype.kind != "O"
5198 )
5199 ):
5200 # TODO(ExtensionIndex): remove special-case, just use self._values
5201 return self._values.astype(object)
5202 return vals
5203
5204 @final
5205 def _get_join_target(self) -> np.ndarray:
5206 """
5207 Get the ndarray or ExtensionArray that we can pass to the join
5208 functions.
5209 """
5210 if isinstance(self._values, BaseMaskedArray):
5211 # This is only used if our array is monotonic, so no NAs present
5212 return self._values._data
5213 elif isinstance(self._values, ArrowExtensionArray):
5214 # This is only used if our array is monotonic, so no missing values
5215 # present
5216 return self._values.to_numpy()
5217
5218 # TODO: exclude ABCRangeIndex case here as it copies
5219 target = self._get_engine_target()
5220 if not isinstance(target, np.ndarray):
5221 raise ValueError("_can_use_libjoin should return False.")
5222 return target
5223
5224 def _from_join_target(self, result: np.ndarray) -> ArrayLike:
5225 """
5226 Cast the ndarray returned from one of the libjoin.foo_indexer functions
5227 back to type(self._data).
5228 """
5229 if isinstance(self.values, BaseMaskedArray):
5230 return type(self.values)(result, np.zeros(result.shape, dtype=np.bool_))
5231 elif isinstance(self.values, (ArrowExtensionArray, StringArray)):
5232 return type(self.values)._from_sequence(result, dtype=self.dtype)
5233 return result
5234
5235 @doc(IndexOpsMixin._memory_usage)
5236 def memory_usage(self, deep: bool = False) -> int:
5237 result = self._memory_usage(deep=deep)
5238
5239 # include our engine hashtable
5240 result += self._engine.sizeof(deep=deep)
5241 return result
5242
5243 @final
5244 def where(self, cond, other=None) -> Index:
5245 """
5246 Replace values where the condition is False.
5247
5248 The replacement is taken from other.
5249
5250 Parameters
5251 ----------
5252 cond : bool array-like with the same length as self
5253 Condition to select the values on.
5254 other : scalar, or array-like, default None
5255 Replacement if the condition is False.
5256
5257 Returns
5258 -------
5259 pandas.Index
5260 A copy of self with values replaced from other
5261 where the condition is False.
5262
5263 See Also
5264 --------
5265 Series.where : Same method for Series.
5266 DataFrame.where : Same method for DataFrame.
5267
5268 Examples
5269 --------
5270 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor'])
5271 >>> idx
5272 Index(['car', 'bike', 'train', 'tractor'], dtype='object')
5273 >>> idx.where(idx.isin(['car', 'train']), 'other')
5274 Index(['car', 'other', 'train', 'other'], dtype='object')
5275 """
5276 if isinstance(self, ABCMultiIndex):
5277 raise NotImplementedError(
5278 ".where is not supported for MultiIndex operations"
5279 )
5280 cond = np.asarray(cond, dtype=bool)
5281 return self.putmask(~cond, other)
5282
5283 # construction helpers
5284 @final
5285 @classmethod
5286 def _raise_scalar_data_error(cls, data):
5287 # We return the TypeError so that we can raise it from the constructor
5288 # in order to keep mypy happy
5289 raise TypeError(
5290 f"{cls.__name__}(...) must be called with a collection of some "
5291 f"kind, {repr(data) if not isinstance(data, np.generic) else str(data)} "
5292 "was passed"
5293 )
5294
5295 def _validate_fill_value(self, value):
5296 """
5297 Check if the value can be inserted into our array without casting,
5298 and convert it to an appropriate native type if necessary.
5299
5300 Raises
5301 ------
5302 TypeError
5303 If the value cannot be inserted into an array of this dtype.
5304 """
5305 dtype = self.dtype
5306 if isinstance(dtype, np.dtype) and dtype.kind not in "mM":
5307 # return np_can_hold_element(dtype, value)
5308 try:
5309 return np_can_hold_element(dtype, value)
5310 except LossySetitemError as err:
5311 # re-raise as TypeError for consistency
5312 raise TypeError from err
5313 elif not can_hold_element(self._values, value):
5314 raise TypeError
5315 return value
5316
5317 def _is_memory_usage_qualified(self) -> bool:
5318 """
5319 Return a boolean if we need a qualified .info display.
5320 """
5321 return is_object_dtype(self.dtype)
5322
5323 def __contains__(self, key: Any) -> bool:
5324 """
5325 Return a boolean indicating whether the provided key is in the index.
5326
5327 Parameters
5328 ----------
5329 key : label
5330 The key to check if it is present in the index.
5331
5332 Returns
5333 -------
5334 bool
5335 Whether the key search is in the index.
5336
5337 Raises
5338 ------
5339 TypeError
5340 If the key is not hashable.
5341
5342 See Also
5343 --------
5344 Index.isin : Returns an ndarray of boolean dtype indicating whether the
5345 list-like key is in the index.
5346
5347 Examples
5348 --------
5349 >>> idx = pd.Index([1, 2, 3, 4])
5350 >>> idx
5351 Index([1, 2, 3, 4], dtype='int64')
5352
5353 >>> 2 in idx
5354 True
5355 >>> 6 in idx
5356 False
5357 """
5358 hash(key)
5359 try:
5360 return key in self._engine
5361 except (OverflowError, TypeError, ValueError):
5362 return False
5363
5364 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318
5365 # Incompatible types in assignment (expression has type "None", base class
5366 # "object" defined the type as "Callable[[object], int]")
5367 __hash__: ClassVar[None] # type: ignore[assignment]
5368
5369 @final
5370 def __setitem__(self, key, value) -> None:
5371 raise TypeError("Index does not support mutable operations")
5372
5373 def __getitem__(self, key):
5374 """
5375 Override numpy.ndarray's __getitem__ method to work as desired.
5376
5377 This function adds lists and Series as valid boolean indexers
5378 (ndarrays only supports ndarray with dtype=bool).
5379
5380 If resulting ndim != 1, plain ndarray is returned instead of
5381 corresponding `Index` subclass.
5382
5383 """
5384 getitem = self._data.__getitem__
5385
5386 if is_integer(key) or is_float(key):
5387 # GH#44051 exclude bool, which would return a 2d ndarray
5388 key = com.cast_scalar_indexer(key)
5389 return getitem(key)
5390
5391 if isinstance(key, slice):
5392 # This case is separated from the conditional above to avoid
5393 # pessimization com.is_bool_indexer and ndim checks.
5394 return self._getitem_slice(key)
5395
5396 if com.is_bool_indexer(key):
5397 # if we have list[bools, length=1e5] then doing this check+convert
5398 # takes 166 µs + 2.1 ms and cuts the ndarray.__getitem__
5399 # time below from 3.8 ms to 496 µs
5400 # if we already have ndarray[bool], the overhead is 1.4 µs or .25%
5401 if isinstance(getattr(key, "dtype", None), ExtensionDtype):
5402 key = key.to_numpy(dtype=bool, na_value=False)
5403 else:
5404 key = np.asarray(key, dtype=bool)
5405
5406 if not isinstance(self.dtype, ExtensionDtype):
5407 if len(key) == 0 and len(key) != len(self):
5408 warnings.warn(
5409 "Using a boolean indexer with length 0 on an Index with "
5410 "length greater than 0 is deprecated and will raise in a "
5411 "future version.",
5412 FutureWarning,
5413 stacklevel=find_stack_level(),
5414 )
5415
5416 result = getitem(key)
5417 # Because we ruled out integer above, we always get an arraylike here
5418 if result.ndim > 1:
5419 disallow_ndim_indexing(result)
5420
5421 # NB: Using _constructor._simple_new would break if MultiIndex
5422 # didn't override __getitem__
5423 return self._constructor._simple_new(result, name=self._name)
5424
5425 def _getitem_slice(self, slobj: slice) -> Self:
5426 """
5427 Fastpath for __getitem__ when we know we have a slice.
5428 """
5429 res = self._data[slobj]
5430 result = type(self)._simple_new(res, name=self._name, refs=self._references)
5431 if "_engine" in self._cache:
5432 reverse = slobj.step is not None and slobj.step < 0
5433 result._engine._update_from_sliced(self._engine, reverse=reverse) # type: ignore[union-attr]
5434
5435 return result
5436
5437 @final
5438 def _can_hold_identifiers_and_holds_name(self, name) -> bool:
5439 """
5440 Faster check for ``name in self`` when we know `name` is a Python
5441 identifier (e.g. in NDFrame.__getattr__, which hits this to support
5442 . key lookup). For indexes that can't hold identifiers (everything
5443 but object & categorical) we just return False.
5444
5445 https://github.com/pandas-dev/pandas/issues/19764
5446 """
5447 if (
5448 is_object_dtype(self.dtype)
5449 or is_string_dtype(self.dtype)
5450 or isinstance(self.dtype, CategoricalDtype)
5451 ):
5452 return name in self
5453 return False
5454
5455 def append(self, other: Index | Sequence[Index]) -> Index:
5456 """
5457 Append a collection of Index options together.
5458
5459 Parameters
5460 ----------
5461 other : Index or list/tuple of indices
5462
5463 Returns
5464 -------
5465 Index
5466
5467 Examples
5468 --------
5469 >>> idx = pd.Index([1, 2, 3])
5470 >>> idx.append(pd.Index([4]))
5471 Index([1, 2, 3, 4], dtype='int64')
5472 """
5473 to_concat = [self]
5474
5475 if isinstance(other, (list, tuple)):
5476 to_concat += list(other)
5477 else:
5478 # error: Argument 1 to "append" of "list" has incompatible type
5479 # "Union[Index, Sequence[Index]]"; expected "Index"
5480 to_concat.append(other) # type: ignore[arg-type]
5481
5482 for obj in to_concat:
5483 if not isinstance(obj, Index):
5484 raise TypeError("all inputs must be Index")
5485
5486 names = {obj.name for obj in to_concat}
5487 name = None if len(names) > 1 else self.name
5488
5489 return self._concat(to_concat, name)
5490
5491 def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
5492 """
5493 Concatenate multiple Index objects.
5494 """
5495 to_concat_vals = [x._values for x in to_concat]
5496
5497 result = concat_compat(to_concat_vals)
5498
5499 return Index._with_infer(result, name=name)
5500
5501 def putmask(self, mask, value) -> Index:
5502 """
5503 Return a new Index of the values set with the mask.
5504
5505 Returns
5506 -------
5507 Index
5508
5509 See Also
5510 --------
5511 numpy.ndarray.putmask : Changes elements of an array
5512 based on conditional and input values.
5513
5514 Examples
5515 --------
5516 >>> idx1 = pd.Index([1, 2, 3])
5517 >>> idx2 = pd.Index([5, 6, 7])
5518 >>> idx1.putmask([True, False, False], idx2)
5519 Index([5, 2, 3], dtype='int64')
5520 """
5521 mask, noop = validate_putmask(self._values, mask)
5522 if noop:
5523 return self.copy()
5524
5525 if self.dtype != object and is_valid_na_for_dtype(value, self.dtype):
5526 # e.g. None -> np.nan, see also Block._standardize_fill_value
5527 value = self._na_value
5528
5529 try:
5530 converted = self._validate_fill_value(value)
5531 except (LossySetitemError, ValueError, TypeError) as err:
5532 if is_object_dtype(self.dtype): # pragma: no cover
5533 raise err
5534
5535 # See also: Block.coerce_to_target_dtype
5536 dtype = self._find_common_type_compat(value)
5537 return self.astype(dtype).putmask(mask, value)
5538
5539 values = self._values.copy()
5540
5541 if isinstance(values, np.ndarray):
5542 converted = setitem_datetimelike_compat(values, mask.sum(), converted)
5543 np.putmask(values, mask, converted)
5544
5545 else:
5546 # Note: we use the original value here, not converted, as
5547 # _validate_fill_value is not idempotent
5548 values._putmask(mask, value)
5549
5550 return self._shallow_copy(values)
5551
5552 def equals(self, other: Any) -> bool:
5553 """
5554 Determine if two Index object are equal.
5555
5556 The things that are being compared are:
5557
5558 * The elements inside the Index object.
5559 * The order of the elements inside the Index object.
5560
5561 Parameters
5562 ----------
5563 other : Any
5564 The other object to compare against.
5565
5566 Returns
5567 -------
5568 bool
5569 True if "other" is an Index and it has the same elements and order
5570 as the calling index; False otherwise.
5571
5572 Examples
5573 --------
5574 >>> idx1 = pd.Index([1, 2, 3])
5575 >>> idx1
5576 Index([1, 2, 3], dtype='int64')
5577 >>> idx1.equals(pd.Index([1, 2, 3]))
5578 True
5579
5580 The elements inside are compared
5581
5582 >>> idx2 = pd.Index(["1", "2", "3"])
5583 >>> idx2
5584 Index(['1', '2', '3'], dtype='object')
5585
5586 >>> idx1.equals(idx2)
5587 False
5588
5589 The order is compared
5590
5591 >>> ascending_idx = pd.Index([1, 2, 3])
5592 >>> ascending_idx
5593 Index([1, 2, 3], dtype='int64')
5594 >>> descending_idx = pd.Index([3, 2, 1])
5595 >>> descending_idx
5596 Index([3, 2, 1], dtype='int64')
5597 >>> ascending_idx.equals(descending_idx)
5598 False
5599
5600 The dtype is *not* compared
5601
5602 >>> int64_idx = pd.Index([1, 2, 3], dtype='int64')
5603 >>> int64_idx
5604 Index([1, 2, 3], dtype='int64')
5605 >>> uint64_idx = pd.Index([1, 2, 3], dtype='uint64')
5606 >>> uint64_idx
5607 Index([1, 2, 3], dtype='uint64')
5608 >>> int64_idx.equals(uint64_idx)
5609 True
5610 """
5611 if self.is_(other):
5612 return True
5613
5614 if not isinstance(other, Index):
5615 return False
5616
5617 if len(self) != len(other):
5618 # quickly return if the lengths are different
5619 return False
5620
5621 if (
5622 isinstance(self.dtype, StringDtype)
5623 and self.dtype.storage == "pyarrow_numpy"
5624 and other.dtype != self.dtype
5625 ):
5626 # special case for object behavior
5627 return other.equals(self.astype(object))
5628
5629 if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype):
5630 # if other is not object, use other's logic for coercion
5631 return other.equals(self)
5632
5633 if isinstance(other, ABCMultiIndex):
5634 # d-level MultiIndex can equal d-tuple Index
5635 return other.equals(self)
5636
5637 if isinstance(self._values, ExtensionArray):
5638 # Dispatch to the ExtensionArray's .equals method.
5639 if not isinstance(other, type(self)):
5640 return False
5641
5642 earr = cast(ExtensionArray, self._data)
5643 return earr.equals(other._data)
5644
5645 if isinstance(other.dtype, ExtensionDtype):
5646 # All EA-backed Index subclasses override equals
5647 return other.equals(self)
5648
5649 return array_equivalent(self._values, other._values)
5650
5651 @final
5652 def identical(self, other) -> bool:
5653 """
5654 Similar to equals, but checks that object attributes and types are also equal.
5655
5656 Returns
5657 -------
5658 bool
5659 If two Index objects have equal elements and same type True,
5660 otherwise False.
5661
5662 Examples
5663 --------
5664 >>> idx1 = pd.Index(['1', '2', '3'])
5665 >>> idx2 = pd.Index(['1', '2', '3'])
5666 >>> idx2.identical(idx1)
5667 True
5668
5669 >>> idx1 = pd.Index(['1', '2', '3'], name="A")
5670 >>> idx2 = pd.Index(['1', '2', '3'], name="B")
5671 >>> idx2.identical(idx1)
5672 False
5673 """
5674 return (
5675 self.equals(other)
5676 and all(
5677 getattr(self, c, None) == getattr(other, c, None)
5678 for c in self._comparables
5679 )
5680 and type(self) == type(other)
5681 and self.dtype == other.dtype
5682 )
5683
5684 @final
5685 def asof(self, label):
5686 """
5687 Return the label from the index, or, if not present, the previous one.
5688
5689 Assuming that the index is sorted, return the passed index label if it
5690 is in the index, or return the previous index label if the passed one
5691 is not in the index.
5692
5693 Parameters
5694 ----------
5695 label : object
5696 The label up to which the method returns the latest index label.
5697
5698 Returns
5699 -------
5700 object
5701 The passed label if it is in the index. The previous label if the
5702 passed label is not in the sorted index or `NaN` if there is no
5703 such label.
5704
5705 See Also
5706 --------
5707 Series.asof : Return the latest value in a Series up to the
5708 passed index.
5709 merge_asof : Perform an asof merge (similar to left join but it
5710 matches on nearest key rather than equal key).
5711 Index.get_loc : An `asof` is a thin wrapper around `get_loc`
5712 with method='pad'.
5713
5714 Examples
5715 --------
5716 `Index.asof` returns the latest index label up to the passed label.
5717
5718 >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03'])
5719 >>> idx.asof('2014-01-01')
5720 '2013-12-31'
5721
5722 If the label is in the index, the method returns the passed label.
5723
5724 >>> idx.asof('2014-01-02')
5725 '2014-01-02'
5726
5727 If all of the labels in the index are later than the passed label,
5728 NaN is returned.
5729
5730 >>> idx.asof('1999-01-02')
5731 nan
5732
5733 If the index is not sorted, an error is raised.
5734
5735 >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02',
5736 ... '2014-01-03'])
5737 >>> idx_not_sorted.asof('2013-12-31')
5738 Traceback (most recent call last):
5739 ValueError: index must be monotonic increasing or decreasing
5740 """
5741 self._searchsorted_monotonic(label) # validate sortedness
5742 try:
5743 loc = self.get_loc(label)
5744 except (KeyError, TypeError):
5745 # KeyError -> No exact match, try for padded
5746 # TypeError -> passed e.g. non-hashable, fall through to get
5747 # the tested exception message
5748 indexer = self.get_indexer([label], method="pad")
5749 if indexer.ndim > 1 or indexer.size > 1:
5750 raise TypeError("asof requires scalar valued input")
5751 loc = indexer.item()
5752 if loc == -1:
5753 return self._na_value
5754 else:
5755 if isinstance(loc, slice):
5756 loc = loc.indices(len(self))[-1]
5757
5758 return self[loc]
5759
5760 def asof_locs(
5761 self, where: Index, mask: npt.NDArray[np.bool_]
5762 ) -> npt.NDArray[np.intp]:
5763 """
5764 Return the locations (indices) of labels in the index.
5765
5766 As in the :meth:`pandas.Index.asof`, if the label (a particular entry in
5767 ``where``) is not in the index, the latest index label up to the
5768 passed label is chosen and its index returned.
5769
5770 If all of the labels in the index are later than a label in ``where``,
5771 -1 is returned.
5772
5773 ``mask`` is used to ignore ``NA`` values in the index during calculation.
5774
5775 Parameters
5776 ----------
5777 where : Index
5778 An Index consisting of an array of timestamps.
5779 mask : np.ndarray[bool]
5780 Array of booleans denoting where values in the original
5781 data are not ``NA``.
5782
5783 Returns
5784 -------
5785 np.ndarray[np.intp]
5786 An array of locations (indices) of the labels from the index
5787 which correspond to the return values of :meth:`pandas.Index.asof`
5788 for every element in ``where``.
5789
5790 See Also
5791 --------
5792 Index.asof : Return the label from the index, or, if not present, the
5793 previous one.
5794
5795 Examples
5796 --------
5797 >>> idx = pd.date_range('2023-06-01', periods=3, freq='D')
5798 >>> where = pd.DatetimeIndex(['2023-05-30 00:12:00', '2023-06-01 00:00:00',
5799 ... '2023-06-02 23:59:59'])
5800 >>> mask = np.ones(3, dtype=bool)
5801 >>> idx.asof_locs(where, mask)
5802 array([-1, 0, 1])
5803
5804 We can use ``mask`` to ignore certain values in the index during calculation.
5805
5806 >>> mask[1] = False
5807 >>> idx.asof_locs(where, mask)
5808 array([-1, 0, 0])
5809 """
5810 # error: No overload variant of "searchsorted" of "ndarray" matches argument
5811 # types "Union[ExtensionArray, ndarray[Any, Any]]", "str"
5812 # TODO: will be fixed when ExtensionArray.searchsorted() is fixed
5813 locs = self._values[mask].searchsorted(
5814 where._values, side="right" # type: ignore[call-overload]
5815 )
5816 locs = np.where(locs > 0, locs - 1, 0)
5817
5818 result = np.arange(len(self), dtype=np.intp)[mask].take(locs)
5819
5820 first_value = self._values[mask.argmax()]
5821 result[(locs == 0) & (where._values < first_value)] = -1
5822
5823 return result
5824
5825 @overload
5826 def sort_values(
5827 self,
5828 *,
5829 return_indexer: Literal[False] = ...,
5830 ascending: bool = ...,
5831 na_position: NaPosition = ...,
5832 key: Callable | None = ...,
5833 ) -> Self:
5834 ...
5835
5836 @overload
5837 def sort_values(
5838 self,
5839 *,
5840 return_indexer: Literal[True],
5841 ascending: bool = ...,
5842 na_position: NaPosition = ...,
5843 key: Callable | None = ...,
5844 ) -> tuple[Self, np.ndarray]:
5845 ...
5846
5847 @overload
5848 def sort_values(
5849 self,
5850 *,
5851 return_indexer: bool = ...,
5852 ascending: bool = ...,
5853 na_position: NaPosition = ...,
5854 key: Callable | None = ...,
5855 ) -> Self | tuple[Self, np.ndarray]:
5856 ...
5857
5858 @deprecate_nonkeyword_arguments(
5859 version="3.0", allowed_args=["self"], name="sort_values"
5860 )
5861 def sort_values(
5862 self,
5863 return_indexer: bool = False,
5864 ascending: bool = True,
5865 na_position: NaPosition = "last",
5866 key: Callable | None = None,
5867 ) -> Self | tuple[Self, np.ndarray]:
5868 """
5869 Return a sorted copy of the index.
5870
5871 Return a sorted copy of the index, and optionally return the indices
5872 that sorted the index itself.
5873
5874 Parameters
5875 ----------
5876 return_indexer : bool, default False
5877 Should the indices that would sort the index be returned.
5878 ascending : bool, default True
5879 Should the index values be sorted in an ascending order.
5880 na_position : {'first' or 'last'}, default 'last'
5881 Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
5882 the end.
5883 key : callable, optional
5884 If not None, apply the key function to the index values
5885 before sorting. This is similar to the `key` argument in the
5886 builtin :meth:`sorted` function, with the notable difference that
5887 this `key` function should be *vectorized*. It should expect an
5888 ``Index`` and return an ``Index`` of the same shape.
5889
5890 Returns
5891 -------
5892 sorted_index : pandas.Index
5893 Sorted copy of the index.
5894 indexer : numpy.ndarray, optional
5895 The indices that the index itself was sorted by.
5896
5897 See Also
5898 --------
5899 Series.sort_values : Sort values of a Series.
5900 DataFrame.sort_values : Sort values in a DataFrame.
5901
5902 Examples
5903 --------
5904 >>> idx = pd.Index([10, 100, 1, 1000])
5905 >>> idx
5906 Index([10, 100, 1, 1000], dtype='int64')
5907
5908 Sort values in ascending order (default behavior).
5909
5910 >>> idx.sort_values()
5911 Index([1, 10, 100, 1000], dtype='int64')
5912
5913 Sort values in descending order, and also get the indices `idx` was
5914 sorted by.
5915
5916 >>> idx.sort_values(ascending=False, return_indexer=True)
5917 (Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2]))
5918 """
5919 if key is None and (
5920 (ascending and self.is_monotonic_increasing)
5921 or (not ascending and self.is_monotonic_decreasing)
5922 ):
5923 if return_indexer:
5924 indexer = np.arange(len(self), dtype=np.intp)
5925 return self.copy(), indexer
5926 else:
5927 return self.copy()
5928
5929 # GH 35584. Sort missing values according to na_position kwarg
5930 # ignore na_position for MultiIndex
5931 if not isinstance(self, ABCMultiIndex):
5932 _as = nargsort(
5933 items=self, ascending=ascending, na_position=na_position, key=key
5934 )
5935 else:
5936 idx = cast(Index, ensure_key_mapped(self, key))
5937 _as = idx.argsort(na_position=na_position)
5938 if not ascending:
5939 _as = _as[::-1]
5940
5941 sorted_index = self.take(_as)
5942
5943 if return_indexer:
5944 return sorted_index, _as
5945 else:
5946 return sorted_index
5947
5948 @final
5949 def sort(self, *args, **kwargs):
5950 """
5951 Use sort_values instead.
5952 """
5953 raise TypeError("cannot sort an Index object in-place, use sort_values instead")
5954
5955 def shift(self, periods: int = 1, freq=None):
5956 """
5957 Shift index by desired number of time frequency increments.
5958
5959 This method is for shifting the values of datetime-like indexes
5960 by a specified time increment a given number of times.
5961
5962 Parameters
5963 ----------
5964 periods : int, default 1
5965 Number of periods (or increments) to shift by,
5966 can be positive or negative.
5967 freq : pandas.DateOffset, pandas.Timedelta or str, optional
5968 Frequency increment to shift by.
5969 If None, the index is shifted by its own `freq` attribute.
5970 Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
5971
5972 Returns
5973 -------
5974 pandas.Index
5975 Shifted index.
5976
5977 See Also
5978 --------
5979 Series.shift : Shift values of Series.
5980
5981 Notes
5982 -----
5983 This method is only implemented for datetime-like index classes,
5984 i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex.
5985
5986 Examples
5987 --------
5988 Put the first 5 month starts of 2011 into an index.
5989
5990 >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS')
5991 >>> month_starts
5992 DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01',
5993 '2011-05-01'],
5994 dtype='datetime64[ns]', freq='MS')
5995
5996 Shift the index by 10 days.
5997
5998 >>> month_starts.shift(10, freq='D')
5999 DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11',
6000 '2011-05-11'],
6001 dtype='datetime64[ns]', freq=None)
6002
6003 The default value of `freq` is the `freq` attribute of the index,
6004 which is 'MS' (month start) in this example.
6005
6006 >>> month_starts.shift(10)
6007 DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01',
6008 '2012-03-01'],
6009 dtype='datetime64[ns]', freq='MS')
6010 """
6011 raise NotImplementedError(
6012 f"This method is only implemented for DatetimeIndex, PeriodIndex and "
6013 f"TimedeltaIndex; Got type {type(self).__name__}"
6014 )
6015
6016 def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:
6017 """
6018 Return the integer indices that would sort the index.
6019
6020 Parameters
6021 ----------
6022 *args
6023 Passed to `numpy.ndarray.argsort`.
6024 **kwargs
6025 Passed to `numpy.ndarray.argsort`.
6026
6027 Returns
6028 -------
6029 np.ndarray[np.intp]
6030 Integer indices that would sort the index if used as
6031 an indexer.
6032
6033 See Also
6034 --------
6035 numpy.argsort : Similar method for NumPy arrays.
6036 Index.sort_values : Return sorted copy of Index.
6037
6038 Examples
6039 --------
6040 >>> idx = pd.Index(['b', 'a', 'd', 'c'])
6041 >>> idx
6042 Index(['b', 'a', 'd', 'c'], dtype='object')
6043
6044 >>> order = idx.argsort()
6045 >>> order
6046 array([1, 0, 3, 2])
6047
6048 >>> idx[order]
6049 Index(['a', 'b', 'c', 'd'], dtype='object')
6050 """
6051 # This works for either ndarray or EA, is overridden
6052 # by RangeIndex, MultIIndex
6053 return self._data.argsort(*args, **kwargs)
6054
6055 def _check_indexing_error(self, key):
6056 if not is_scalar(key):
6057 # if key is not a scalar, directly raise an error (the code below
6058 # would convert to numpy arrays and raise later any way) - GH29926
6059 raise InvalidIndexError(key)
6060
6061 @cache_readonly
6062 def _should_fallback_to_positional(self) -> bool:
6063 """
6064 Should an integer key be treated as positional?
6065 """
6066 return self.inferred_type not in {
6067 "integer",
6068 "mixed-integer",
6069 "floating",
6070 "complex",
6071 }
6072
6073 _index_shared_docs[
6074 "get_indexer_non_unique"
6075 ] = """
6076 Compute indexer and mask for new index given the current index.
6077
6078 The indexer should be then used as an input to ndarray.take to align the
6079 current data to the new index.
6080
6081 Parameters
6082 ----------
6083 target : %(target_klass)s
6084
6085 Returns
6086 -------
6087 indexer : np.ndarray[np.intp]
6088 Integers from 0 to n - 1 indicating that the index at these
6089 positions matches the corresponding target values. Missing values
6090 in the target are marked by -1.
6091 missing : np.ndarray[np.intp]
6092 An indexer into the target of the values not found.
6093 These correspond to the -1 in the indexer array.
6094
6095 Examples
6096 --------
6097 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])
6098 >>> index.get_indexer_non_unique(['b', 'b'])
6099 (array([1, 3, 4, 1, 3, 4]), array([], dtype=int64))
6100
6101 In the example below there are no matched values.
6102
6103 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])
6104 >>> index.get_indexer_non_unique(['q', 'r', 't'])
6105 (array([-1, -1, -1]), array([0, 1, 2]))
6106
6107 For this reason, the returned ``indexer`` contains only integers equal to -1.
6108 It demonstrates that there's no match between the index and the ``target``
6109 values at these positions. The mask [0, 1, 2] in the return value shows that
6110 the first, second, and third elements are missing.
6111
6112 Notice that the return value is a tuple contains two items. In the example
6113 below the first item is an array of locations in ``index``. The second
6114 item is a mask shows that the first and third elements are missing.
6115
6116 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])
6117 >>> index.get_indexer_non_unique(['f', 'b', 's'])
6118 (array([-1, 1, 3, 4, -1]), array([0, 2]))
6119 """
6120
6121 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
6122 def get_indexer_non_unique(
6123 self, target
6124 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
6125 target = ensure_index(target)
6126 target = self._maybe_cast_listlike_indexer(target)
6127
6128 if not self._should_compare(target) and not self._should_partial_index(target):
6129 # _should_partial_index e.g. IntervalIndex with numeric scalars
6130 # that can be matched to Interval scalars.
6131 return self._get_indexer_non_comparable(target, method=None, unique=False)
6132
6133 pself, ptarget = self._maybe_downcast_for_indexing(target)
6134 if pself is not self or ptarget is not target:
6135 return pself.get_indexer_non_unique(ptarget)
6136
6137 if self.dtype != target.dtype:
6138 # TODO: if object, could use infer_dtype to preempt costly
6139 # conversion if still non-comparable?
6140 dtype = self._find_common_type_compat(target)
6141
6142 this = self.astype(dtype, copy=False)
6143 that = target.astype(dtype, copy=False)
6144 return this.get_indexer_non_unique(that)
6145
6146 # TODO: get_indexer has fastpaths for both Categorical-self and
6147 # Categorical-target. Can we do something similar here?
6148
6149 # Note: _maybe_downcast_for_indexing ensures we never get here
6150 # with MultiIndex self and non-Multi target
6151 if self._is_multi and target._is_multi:
6152 engine = self._engine
6153 # Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]" has
6154 # no attribute "_extract_level_codes"
6155 tgt_values = engine._extract_level_codes(target) # type: ignore[union-attr]
6156 else:
6157 tgt_values = target._get_engine_target()
6158
6159 indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
6160 return ensure_platform_int(indexer), ensure_platform_int(missing)
6161
6162 @final
6163 def get_indexer_for(self, target) -> npt.NDArray[np.intp]:
6164 """
6165 Guaranteed return of an indexer even when non-unique.
6166
6167 This dispatches to get_indexer or get_indexer_non_unique
6168 as appropriate.
6169
6170 Returns
6171 -------
6172 np.ndarray[np.intp]
6173 List of indices.
6174
6175 Examples
6176 --------
6177 >>> idx = pd.Index([np.nan, 'var1', np.nan])
6178 >>> idx.get_indexer_for([np.nan])
6179 array([0, 2])
6180 """
6181 if self._index_as_unique:
6182 return self.get_indexer(target)
6183 indexer, _ = self.get_indexer_non_unique(target)
6184 return indexer
6185
6186 def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]:
6187 """
6188 Analogue to get_indexer that raises if any elements are missing.
6189 """
6190 keyarr = key
6191 if not isinstance(keyarr, Index):
6192 keyarr = com.asarray_tuplesafe(keyarr)
6193
6194 if self._index_as_unique:
6195 indexer = self.get_indexer_for(keyarr)
6196 keyarr = self.reindex(keyarr)[0]
6197 else:
6198 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
6199
6200 self._raise_if_missing(keyarr, indexer, axis_name)
6201
6202 keyarr = self.take(indexer)
6203 if isinstance(key, Index):
6204 # GH 42790 - Preserve name from an Index
6205 keyarr.name = key.name
6206 if lib.is_np_dtype(keyarr.dtype, "mM") or isinstance(
6207 keyarr.dtype, DatetimeTZDtype
6208 ):
6209 # DTI/TDI.take can infer a freq in some cases when we dont want one
6210 if isinstance(key, list) or (
6211 isinstance(key, type(self))
6212 # "Index" has no attribute "freq"
6213 and key.freq is None # type: ignore[attr-defined]
6214 ):
6215 keyarr = keyarr._with_freq(None)
6216
6217 return keyarr, indexer
6218
6219 def _raise_if_missing(self, key, indexer, axis_name: str_t) -> None:
6220 """
6221 Check that indexer can be used to return a result.
6222
6223 e.g. at least one element was found,
6224 unless the list of keys was actually empty.
6225
6226 Parameters
6227 ----------
6228 key : list-like
6229 Targeted labels (only used to show correct error message).
6230 indexer: array-like of booleans
6231 Indices corresponding to the key,
6232 (with -1 indicating not found).
6233 axis_name : str
6234
6235 Raises
6236 ------
6237 KeyError
6238 If at least one key was requested but none was found.
6239 """
6240 if len(key) == 0:
6241 return
6242
6243 # Count missing values
6244 missing_mask = indexer < 0
6245 nmissing = missing_mask.sum()
6246
6247 if nmissing:
6248 if nmissing == len(indexer):
6249 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
6250
6251 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
6252 raise KeyError(f"{not_found} not in index")
6253
6254 @overload
6255 def _get_indexer_non_comparable(
6256 self, target: Index, method, unique: Literal[True] = ...
6257 ) -> npt.NDArray[np.intp]:
6258 ...
6259
6260 @overload
6261 def _get_indexer_non_comparable(
6262 self, target: Index, method, unique: Literal[False]
6263 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
6264 ...
6265
6266 @overload
6267 def _get_indexer_non_comparable(
6268 self, target: Index, method, unique: bool = True
6269 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
6270 ...
6271
6272 @final
6273 def _get_indexer_non_comparable(
6274 self, target: Index, method, unique: bool = True
6275 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
6276 """
6277 Called from get_indexer or get_indexer_non_unique when the target
6278 is of a non-comparable dtype.
6279
6280 For get_indexer lookups with method=None, get_indexer is an _equality_
6281 check, so non-comparable dtypes mean we will always have no matches.
6282
6283 For get_indexer lookups with a method, get_indexer is an _inequality_
6284 check, so non-comparable dtypes mean we will always raise TypeError.
6285
6286 Parameters
6287 ----------
6288 target : Index
6289 method : str or None
6290 unique : bool, default True
6291 * True if called from get_indexer.
6292 * False if called from get_indexer_non_unique.
6293
6294 Raises
6295 ------
6296 TypeError
6297 If doing an inequality check, i.e. method is not None.
6298 """
6299 if method is not None:
6300 other_dtype = _unpack_nested_dtype(target)
6301 raise TypeError(f"Cannot compare dtypes {self.dtype} and {other_dtype}")
6302
6303 no_matches = -1 * np.ones(target.shape, dtype=np.intp)
6304 if unique:
6305 # This is for get_indexer
6306 return no_matches
6307 else:
6308 # This is for get_indexer_non_unique
6309 missing = np.arange(len(target), dtype=np.intp)
6310 return no_matches, missing
6311
6312 @property
6313 def _index_as_unique(self) -> bool:
6314 """
6315 Whether we should treat this as unique for the sake of
6316 get_indexer vs get_indexer_non_unique.
6317
6318 For IntervalIndex compat.
6319 """
6320 return self.is_unique
6321
6322 _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects"
6323
6324 @final
6325 def _maybe_downcast_for_indexing(self, other: Index) -> tuple[Index, Index]:
6326 """
6327 When dealing with an object-dtype Index and a non-object Index, see
6328 if we can upcast the object-dtype one to improve performance.
6329 """
6330
6331 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex):
6332 if (
6333 self.tz is not None
6334 and other.tz is not None
6335 and not tz_compare(self.tz, other.tz)
6336 ):
6337 # standardize on UTC
6338 return self.tz_convert("UTC"), other.tz_convert("UTC")
6339
6340 elif self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex):
6341 try:
6342 return type(other)(self), other
6343 except OutOfBoundsDatetime:
6344 return self, other
6345 elif self.inferred_type == "timedelta" and isinstance(other, ABCTimedeltaIndex):
6346 # TODO: we dont have tests that get here
6347 return type(other)(self), other
6348
6349 elif self.dtype.kind == "u" and other.dtype.kind == "i":
6350 # GH#41873
6351 if other.min() >= 0:
6352 # lookup min as it may be cached
6353 # TODO: may need itemsize check if we have non-64-bit Indexes
6354 return self, other.astype(self.dtype)
6355
6356 elif self._is_multi and not other._is_multi:
6357 try:
6358 # "Type[Index]" has no attribute "from_tuples"
6359 other = type(self).from_tuples(other) # type: ignore[attr-defined]
6360 except (TypeError, ValueError):
6361 # let's instead try with a straight Index
6362 self = Index(self._values)
6363
6364 if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):
6365 # Reverse op so we dont need to re-implement on the subclasses
6366 other, self = other._maybe_downcast_for_indexing(self)
6367
6368 return self, other
6369
6370 @final
6371 def _find_common_type_compat(self, target) -> DtypeObj:
6372 """
6373 Implementation of find_common_type that adjusts for Index-specific
6374 special cases.
6375 """
6376 target_dtype, _ = infer_dtype_from(target)
6377
6378 # special case: if one dtype is uint64 and the other a signed int, return object
6379 # See https://github.com/pandas-dev/pandas/issues/26778 for discussion
6380 # Now it's:
6381 # * float | [u]int -> float
6382 # * uint64 | signed int -> object
6383 # We may change union(float | [u]int) to go to object.
6384 if self.dtype == "uint64" or target_dtype == "uint64":
6385 if is_signed_integer_dtype(self.dtype) or is_signed_integer_dtype(
6386 target_dtype
6387 ):
6388 return _dtype_obj
6389
6390 dtype = find_result_type(self.dtype, target)
6391 dtype = common_dtype_categorical_compat([self, target], dtype)
6392 return dtype
6393
6394 @final
6395 def _should_compare(self, other: Index) -> bool:
6396 """
6397 Check if `self == other` can ever have non-False entries.
6398 """
6399
6400 # NB: we use inferred_type rather than is_bool_dtype to catch
6401 # object_dtype_of_bool and categorical[object_dtype_of_bool] cases
6402 if (
6403 other.inferred_type == "boolean" and is_any_real_numeric_dtype(self.dtype)
6404 ) or (
6405 self.inferred_type == "boolean" and is_any_real_numeric_dtype(other.dtype)
6406 ):
6407 # GH#16877 Treat boolean labels passed to a numeric index as not
6408 # found. Without this fix False and True would be treated as 0 and 1
6409 # respectively.
6410 return False
6411
6412 dtype = _unpack_nested_dtype(other)
6413 return self._is_comparable_dtype(dtype) or is_object_dtype(dtype)
6414
6415 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
6416 """
6417 Can we compare values of the given dtype to our own?
6418 """
6419 if self.dtype.kind == "b":
6420 return dtype.kind == "b"
6421 elif is_numeric_dtype(self.dtype):
6422 return is_numeric_dtype(dtype)
6423 # TODO: this was written assuming we only get here with object-dtype,
6424 # which is no longer correct. Can we specialize for EA?
6425 return True
6426
6427 @final
6428 def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]:
6429 """
6430 Group the index labels by a given array of values.
6431
6432 Parameters
6433 ----------
6434 values : array
6435 Values used to determine the groups.
6436
6437 Returns
6438 -------
6439 dict
6440 {group name -> group labels}
6441 """
6442 # TODO: if we are a MultiIndex, we can do better
6443 # that converting to tuples
6444 if isinstance(values, ABCMultiIndex):
6445 values = values._values
6446 values = Categorical(values)
6447 result = values._reverse_indexer()
6448
6449 # map to the label
6450 result = {k: self.take(v) for k, v in result.items()}
6451
6452 return PrettyDict(result)
6453
6454 def map(self, mapper, na_action: Literal["ignore"] | None = None):
6455 """
6456 Map values using an input mapping or function.
6457
6458 Parameters
6459 ----------
6460 mapper : function, dict, or Series
6461 Mapping correspondence.
6462 na_action : {None, 'ignore'}
6463 If 'ignore', propagate NA values, without passing them to the
6464 mapping correspondence.
6465
6466 Returns
6467 -------
6468 Union[Index, MultiIndex]
6469 The output of the mapping function applied to the index.
6470 If the function returns a tuple with more than one element
6471 a MultiIndex will be returned.
6472
6473 Examples
6474 --------
6475 >>> idx = pd.Index([1, 2, 3])
6476 >>> idx.map({1: 'a', 2: 'b', 3: 'c'})
6477 Index(['a', 'b', 'c'], dtype='object')
6478
6479 Using `map` with a function:
6480
6481 >>> idx = pd.Index([1, 2, 3])
6482 >>> idx.map('I am a {}'.format)
6483 Index(['I am a 1', 'I am a 2', 'I am a 3'], dtype='object')
6484
6485 >>> idx = pd.Index(['a', 'b', 'c'])
6486 >>> idx.map(lambda x: x.upper())
6487 Index(['A', 'B', 'C'], dtype='object')
6488 """
6489 from pandas.core.indexes.multi import MultiIndex
6490
6491 new_values = self._map_values(mapper, na_action=na_action)
6492
6493 # we can return a MultiIndex
6494 if new_values.size and isinstance(new_values[0], tuple):
6495 if isinstance(self, MultiIndex):
6496 names = self.names
6497 elif self.name:
6498 names = [self.name] * len(new_values[0])
6499 else:
6500 names = None
6501 return MultiIndex.from_tuples(new_values, names=names)
6502
6503 dtype = None
6504 if not new_values.size:
6505 # empty
6506 dtype = self.dtype
6507
6508 # e.g. if we are floating and new_values is all ints, then we
6509 # don't want to cast back to floating. But if we are UInt64
6510 # and new_values is all ints, we want to try.
6511 same_dtype = lib.infer_dtype(new_values, skipna=False) == self.inferred_type
6512 if same_dtype:
6513 new_values = maybe_cast_pointwise_result(
6514 new_values, self.dtype, same_dtype=same_dtype
6515 )
6516
6517 return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name)
6518
6519 # TODO: De-duplicate with map, xref GH#32349
6520 @final
6521 def _transform_index(self, func, *, level=None) -> Index:
6522 """
6523 Apply function to all values found in index.
6524
6525 This includes transforming multiindex entries separately.
6526 Only apply function to one level of the MultiIndex if level is specified.
6527 """
6528 if isinstance(self, ABCMultiIndex):
6529 values = [
6530 self.get_level_values(i).map(func)
6531 if i == level or level is None
6532 else self.get_level_values(i)
6533 for i in range(self.nlevels)
6534 ]
6535 return type(self).from_arrays(values)
6536 else:
6537 items = [func(x) for x in self]
6538 return Index(items, name=self.name, tupleize_cols=False)
6539
6540 def isin(self, values, level=None) -> npt.NDArray[np.bool_]:
6541 """
6542 Return a boolean array where the index values are in `values`.
6543
6544 Compute boolean array of whether each index value is found in the
6545 passed set of values. The length of the returned boolean array matches
6546 the length of the index.
6547
6548 Parameters
6549 ----------
6550 values : set or list-like
6551 Sought values.
6552 level : str or int, optional
6553 Name or position of the index level to use (if the index is a
6554 `MultiIndex`).
6555
6556 Returns
6557 -------
6558 np.ndarray[bool]
6559 NumPy array of boolean values.
6560
6561 See Also
6562 --------
6563 Series.isin : Same for Series.
6564 DataFrame.isin : Same method for DataFrames.
6565
6566 Notes
6567 -----
6568 In the case of `MultiIndex` you must either specify `values` as a
6569 list-like object containing tuples that are the same length as the
6570 number of levels, or specify `level`. Otherwise it will raise a
6571 ``ValueError``.
6572
6573 If `level` is specified:
6574
6575 - if it is the name of one *and only one* index level, use that level;
6576 - otherwise it should be a number indicating level position.
6577
6578 Examples
6579 --------
6580 >>> idx = pd.Index([1,2,3])
6581 >>> idx
6582 Index([1, 2, 3], dtype='int64')
6583
6584 Check whether each index value in a list of values.
6585
6586 >>> idx.isin([1, 4])
6587 array([ True, False, False])
6588
6589 >>> midx = pd.MultiIndex.from_arrays([[1,2,3],
6590 ... ['red', 'blue', 'green']],
6591 ... names=('number', 'color'))
6592 >>> midx
6593 MultiIndex([(1, 'red'),
6594 (2, 'blue'),
6595 (3, 'green')],
6596 names=['number', 'color'])
6597
6598 Check whether the strings in the 'color' level of the MultiIndex
6599 are in a list of colors.
6600
6601 >>> midx.isin(['red', 'orange', 'yellow'], level='color')
6602 array([ True, False, False])
6603
6604 To check across the levels of a MultiIndex, pass a list of tuples:
6605
6606 >>> midx.isin([(1, 'red'), (3, 'red')])
6607 array([ True, False, False])
6608 """
6609 if level is not None:
6610 self._validate_index_level(level)
6611 return algos.isin(self._values, values)
6612
6613 def _get_string_slice(self, key: str_t):
6614 # this is for partial string indexing,
6615 # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex
6616 raise NotImplementedError
6617
6618 def slice_indexer(
6619 self,
6620 start: Hashable | None = None,
6621 end: Hashable | None = None,
6622 step: int | None = None,
6623 ) -> slice:
6624 """
6625 Compute the slice indexer for input labels and step.
6626
6627 Index needs to be ordered and unique.
6628
6629 Parameters
6630 ----------
6631 start : label, default None
6632 If None, defaults to the beginning.
6633 end : label, default None
6634 If None, defaults to the end.
6635 step : int, default None
6636
6637 Returns
6638 -------
6639 slice
6640
6641 Raises
6642 ------
6643 KeyError : If key does not exist, or key is not unique and index is
6644 not ordered.
6645
6646 Notes
6647 -----
6648 This function assumes that the data is sorted, so use at your own peril
6649
6650 Examples
6651 --------
6652 This is a method on all index types. For example you can do:
6653
6654 >>> idx = pd.Index(list('abcd'))
6655 >>> idx.slice_indexer(start='b', end='c')
6656 slice(1, 3, None)
6657
6658 >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')])
6659 >>> idx.slice_indexer(start='b', end=('c', 'g'))
6660 slice(1, 3, None)
6661 """
6662 start_slice, end_slice = self.slice_locs(start, end, step=step)
6663
6664 # return a slice
6665 if not is_scalar(start_slice):
6666 raise AssertionError("Start slice bound is non-scalar")
6667 if not is_scalar(end_slice):
6668 raise AssertionError("End slice bound is non-scalar")
6669
6670 return slice(start_slice, end_slice, step)
6671
6672 def _maybe_cast_indexer(self, key):
6673 """
6674 If we have a float key and are not a floating index, then try to cast
6675 to an int if equivalent.
6676 """
6677 return key
6678
6679 def _maybe_cast_listlike_indexer(self, target) -> Index:
6680 """
6681 Analogue to maybe_cast_indexer for get_indexer instead of get_loc.
6682 """
6683 return ensure_index(target)
6684
6685 @final
6686 def _validate_indexer(
6687 self,
6688 form: Literal["positional", "slice"],
6689 key,
6690 kind: Literal["getitem", "iloc"],
6691 ) -> None:
6692 """
6693 If we are positional indexer, validate that we have appropriate
6694 typed bounds must be an integer.
6695 """
6696 if not lib.is_int_or_none(key):
6697 self._raise_invalid_indexer(form, key)
6698
6699 def _maybe_cast_slice_bound(self, label, side: str_t):
6700 """
6701 This function should be overloaded in subclasses that allow non-trivial
6702 casting on label-slice bounds, e.g. datetime-like indices allowing
6703 strings containing formatted datetimes.
6704
6705 Parameters
6706 ----------
6707 label : object
6708 side : {'left', 'right'}
6709
6710 Returns
6711 -------
6712 label : object
6713
6714 Notes
6715 -----
6716 Value of `side` parameter should be validated in caller.
6717 """
6718
6719 # We are a plain index here (sub-class override this method if they
6720 # wish to have special treatment for floats/ints, e.g. datetimelike Indexes
6721
6722 if is_numeric_dtype(self.dtype):
6723 return self._maybe_cast_indexer(label)
6724
6725 # reject them, if index does not contain label
6726 if (is_float(label) or is_integer(label)) and label not in self:
6727 self._raise_invalid_indexer("slice", label)
6728
6729 return label
6730
6731 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):
6732 if self.is_monotonic_increasing:
6733 return self.searchsorted(label, side=side)
6734 elif self.is_monotonic_decreasing:
6735 # np.searchsorted expects ascending sort order, have to reverse
6736 # everything for it to work (element ordering, search side and
6737 # resulting value).
6738 pos = self[::-1].searchsorted(
6739 label, side="right" if side == "left" else "left"
6740 )
6741 return len(self) - pos
6742
6743 raise ValueError("index must be monotonic increasing or decreasing")
6744
6745 def get_slice_bound(self, label, side: Literal["left", "right"]) -> int:
6746 """
6747 Calculate slice bound that corresponds to given label.
6748
6749 Returns leftmost (one-past-the-rightmost if ``side=='right'``) position
6750 of given label.
6751
6752 Parameters
6753 ----------
6754 label : object
6755 side : {'left', 'right'}
6756
6757 Returns
6758 -------
6759 int
6760 Index of label.
6761
6762 See Also
6763 --------
6764 Index.get_loc : Get integer location, slice or boolean mask for requested
6765 label.
6766
6767 Examples
6768 --------
6769 >>> idx = pd.RangeIndex(5)
6770 >>> idx.get_slice_bound(3, 'left')
6771 3
6772
6773 >>> idx.get_slice_bound(3, 'right')
6774 4
6775
6776 If ``label`` is non-unique in the index, an error will be raised.
6777
6778 >>> idx_duplicate = pd.Index(['a', 'b', 'a', 'c', 'd'])
6779 >>> idx_duplicate.get_slice_bound('a', 'left')
6780 Traceback (most recent call last):
6781 KeyError: Cannot get left slice bound for non-unique label: 'a'
6782 """
6783
6784 if side not in ("left", "right"):
6785 raise ValueError(
6786 "Invalid value for side kwarg, must be either "
6787 f"'left' or 'right': {side}"
6788 )
6789
6790 original_label = label
6791
6792 # For datetime indices label may be a string that has to be converted
6793 # to datetime boundary according to its resolution.
6794 label = self._maybe_cast_slice_bound(label, side)
6795
6796 # we need to look up the label
6797 try:
6798 slc = self.get_loc(label)
6799 except KeyError as err:
6800 try:
6801 return self._searchsorted_monotonic(label, side)
6802 except ValueError:
6803 # raise the original KeyError
6804 raise err
6805
6806 if isinstance(slc, np.ndarray):
6807 # get_loc may return a boolean array, which
6808 # is OK as long as they are representable by a slice.
6809 assert is_bool_dtype(slc.dtype)
6810 slc = lib.maybe_booleans_to_slice(slc.view("u1"))
6811 if isinstance(slc, np.ndarray):
6812 raise KeyError(
6813 f"Cannot get {side} slice bound for non-unique "
6814 f"label: {repr(original_label)}"
6815 )
6816
6817 if isinstance(slc, slice):
6818 if side == "left":
6819 return slc.start
6820 else:
6821 return slc.stop
6822 else:
6823 if side == "right":
6824 return slc + 1
6825 else:
6826 return slc
6827
6828 def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]:
6829 """
6830 Compute slice locations for input labels.
6831
6832 Parameters
6833 ----------
6834 start : label, default None
6835 If None, defaults to the beginning.
6836 end : label, default None
6837 If None, defaults to the end.
6838 step : int, defaults None
6839 If None, defaults to 1.
6840
6841 Returns
6842 -------
6843 tuple[int, int]
6844
6845 See Also
6846 --------
6847 Index.get_loc : Get location for a single label.
6848
6849 Notes
6850 -----
6851 This method only works if the index is monotonic or unique.
6852
6853 Examples
6854 --------
6855 >>> idx = pd.Index(list('abcd'))
6856 >>> idx.slice_locs(start='b', end='c')
6857 (1, 3)
6858 """
6859 inc = step is None or step >= 0
6860
6861 if not inc:
6862 # If it's a reverse slice, temporarily swap bounds.
6863 start, end = end, start
6864
6865 # GH 16785: If start and end happen to be date strings with UTC offsets
6866 # attempt to parse and check that the offsets are the same
6867 if isinstance(start, (str, datetime)) and isinstance(end, (str, datetime)):
6868 try:
6869 ts_start = Timestamp(start)
6870 ts_end = Timestamp(end)
6871 except (ValueError, TypeError):
6872 pass
6873 else:
6874 if not tz_compare(ts_start.tzinfo, ts_end.tzinfo):
6875 raise ValueError("Both dates must have the same UTC offset")
6876
6877 start_slice = None
6878 if start is not None:
6879 start_slice = self.get_slice_bound(start, "left")
6880 if start_slice is None:
6881 start_slice = 0
6882
6883 end_slice = None
6884 if end is not None:
6885 end_slice = self.get_slice_bound(end, "right")
6886 if end_slice is None:
6887 end_slice = len(self)
6888
6889 if not inc:
6890 # Bounds at this moment are swapped, swap them back and shift by 1.
6891 #
6892 # slice_locs('B', 'A', step=-1): s='B', e='A'
6893 #
6894 # s='A' e='B'
6895 # AFTER SWAP: | |
6896 # v ------------------> V
6897 # -----------------------------------
6898 # | | |A|A|A|A| | | | | |B|B| | | | |
6899 # -----------------------------------
6900 # ^ <------------------ ^
6901 # SHOULD BE: | |
6902 # end=s-1 start=e-1
6903 #
6904 end_slice, start_slice = start_slice - 1, end_slice - 1
6905
6906 # i == -1 triggers ``len(self) + i`` selection that points to the
6907 # last element, not before-the-first one, subtracting len(self)
6908 # compensates that.
6909 if end_slice == -1:
6910 end_slice -= len(self)
6911 if start_slice == -1:
6912 start_slice -= len(self)
6913
6914 return start_slice, end_slice
6915
6916 def delete(self, loc) -> Self:
6917 """
6918 Make new Index with passed location(-s) deleted.
6919
6920 Parameters
6921 ----------
6922 loc : int or list of int
6923 Location of item(-s) which will be deleted.
6924 Use a list of locations to delete more than one value at the same time.
6925
6926 Returns
6927 -------
6928 Index
6929 Will be same type as self, except for RangeIndex.
6930
6931 See Also
6932 --------
6933 numpy.delete : Delete any rows and column from NumPy array (ndarray).
6934
6935 Examples
6936 --------
6937 >>> idx = pd.Index(['a', 'b', 'c'])
6938 >>> idx.delete(1)
6939 Index(['a', 'c'], dtype='object')
6940
6941 >>> idx = pd.Index(['a', 'b', 'c'])
6942 >>> idx.delete([0, 2])
6943 Index(['b'], dtype='object')
6944 """
6945 values = self._values
6946 res_values: ArrayLike
6947 if isinstance(values, np.ndarray):
6948 # TODO(__array_function__): special casing will be unnecessary
6949 res_values = np.delete(values, loc)
6950 else:
6951 res_values = values.delete(loc)
6952
6953 # _constructor so RangeIndex-> Index with an int64 dtype
6954 return self._constructor._simple_new(res_values, name=self.name)
6955
6956 def insert(self, loc: int, item) -> Index:
6957 """
6958 Make new Index inserting new item at location.
6959
6960 Follows Python numpy.insert semantics for negative values.
6961
6962 Parameters
6963 ----------
6964 loc : int
6965 item : object
6966
6967 Returns
6968 -------
6969 Index
6970
6971 Examples
6972 --------
6973 >>> idx = pd.Index(['a', 'b', 'c'])
6974 >>> idx.insert(1, 'x')
6975 Index(['a', 'x', 'b', 'c'], dtype='object')
6976 """
6977 item = lib.item_from_zerodim(item)
6978 if is_valid_na_for_dtype(item, self.dtype) and self.dtype != object:
6979 item = self._na_value
6980
6981 arr = self._values
6982
6983 try:
6984 if isinstance(arr, ExtensionArray):
6985 res_values = arr.insert(loc, item)
6986 return type(self)._simple_new(res_values, name=self.name)
6987 else:
6988 item = self._validate_fill_value(item)
6989 except (TypeError, ValueError, LossySetitemError):
6990 # e.g. trying to insert an integer into a DatetimeIndex
6991 # We cannot keep the same dtype, so cast to the (often object)
6992 # minimal shared dtype before doing the insert.
6993 dtype = self._find_common_type_compat(item)
6994 return self.astype(dtype).insert(loc, item)
6995
6996 if arr.dtype != object or not isinstance(
6997 item, (tuple, np.datetime64, np.timedelta64)
6998 ):
6999 # with object-dtype we need to worry about numpy incorrectly casting
7000 # dt64/td64 to integer, also about treating tuples as sequences
7001 # special-casing dt64/td64 https://github.com/numpy/numpy/issues/12550
7002 casted = arr.dtype.type(item)
7003 new_values = np.insert(arr, loc, casted)
7004
7005 else:
7006 # error: No overload variant of "insert" matches argument types
7007 # "ndarray[Any, Any]", "int", "None"
7008 new_values = np.insert(arr, loc, None) # type: ignore[call-overload]
7009 loc = loc if loc >= 0 else loc - 1
7010 new_values[loc] = item
7011
7012 out = Index._with_infer(new_values, name=self.name)
7013 if (
7014 using_pyarrow_string_dtype()
7015 and is_string_dtype(out.dtype)
7016 and new_values.dtype == object
7017 ):
7018 out = out.astype(new_values.dtype)
7019 if self.dtype == object and out.dtype != object:
7020 # GH#51363
7021 warnings.warn(
7022 "The behavior of Index.insert with object-dtype is deprecated, "
7023 "in a future version this will return an object-dtype Index "
7024 "instead of inferring a non-object dtype. To retain the old "
7025 "behavior, do `idx.insert(loc, item).infer_objects(copy=False)`",
7026 FutureWarning,
7027 stacklevel=find_stack_level(),
7028 )
7029 return out
7030
7031 def drop(
7032 self,
7033 labels: Index | np.ndarray | Iterable[Hashable],
7034 errors: IgnoreRaise = "raise",
7035 ) -> Index:
7036 """
7037 Make new Index with passed list of labels deleted.
7038
7039 Parameters
7040 ----------
7041 labels : array-like or scalar
7042 errors : {'ignore', 'raise'}, default 'raise'
7043 If 'ignore', suppress error and existing labels are dropped.
7044
7045 Returns
7046 -------
7047 Index
7048 Will be same type as self, except for RangeIndex.
7049
7050 Raises
7051 ------
7052 KeyError
7053 If not all of the labels are found in the selected axis
7054
7055 Examples
7056 --------
7057 >>> idx = pd.Index(['a', 'b', 'c'])
7058 >>> idx.drop(['a'])
7059 Index(['b', 'c'], dtype='object')
7060 """
7061 if not isinstance(labels, Index):
7062 # avoid materializing e.g. RangeIndex
7063 arr_dtype = "object" if self.dtype == "object" else None
7064 labels = com.index_labels_to_array(labels, dtype=arr_dtype)
7065
7066 indexer = self.get_indexer_for(labels)
7067 mask = indexer == -1
7068 if mask.any():
7069 if errors != "ignore":
7070 raise KeyError(f"{labels[mask].tolist()} not found in axis")
7071 indexer = indexer[~mask]
7072 return self.delete(indexer)
7073
7074 @final
7075 def infer_objects(self, copy: bool = True) -> Index:
7076 """
7077 If we have an object dtype, try to infer a non-object dtype.
7078
7079 Parameters
7080 ----------
7081 copy : bool, default True
7082 Whether to make a copy in cases where no inference occurs.
7083 """
7084 if self._is_multi:
7085 raise NotImplementedError(
7086 "infer_objects is not implemented for MultiIndex. "
7087 "Use index.to_frame().infer_objects() instead."
7088 )
7089 if self.dtype != object:
7090 return self.copy() if copy else self
7091
7092 values = self._values
7093 values = cast("npt.NDArray[np.object_]", values)
7094 res_values = lib.maybe_convert_objects(
7095 values,
7096 convert_non_numeric=True,
7097 )
7098 if copy and res_values is values:
7099 return self.copy()
7100 result = Index(res_values, name=self.name)
7101 if not copy and res_values is values and self._references is not None:
7102 result._references = self._references
7103 result._references.add_index_reference(result)
7104 return result
7105
7106 @final
7107 def diff(self, periods: int = 1) -> Index:
7108 """
7109 Computes the difference between consecutive values in the Index object.
7110
7111 If periods is greater than 1, computes the difference between values that
7112 are `periods` number of positions apart.
7113
7114 Parameters
7115 ----------
7116 periods : int, optional
7117 The number of positions between the current and previous
7118 value to compute the difference with. Default is 1.
7119
7120 Returns
7121 -------
7122 Index
7123 A new Index object with the computed differences.
7124
7125 Examples
7126 --------
7127 >>> import pandas as pd
7128 >>> idx = pd.Index([10, 20, 30, 40, 50])
7129 >>> idx.diff()
7130 Index([nan, 10.0, 10.0, 10.0, 10.0], dtype='float64')
7131
7132 """
7133 return Index(self.to_series().diff(periods))
7134
7135 @final
7136 def round(self, decimals: int = 0) -> Self:
7137 """
7138 Round each value in the Index to the given number of decimals.
7139
7140 Parameters
7141 ----------
7142 decimals : int, optional
7143 Number of decimal places to round to. If decimals is negative,
7144 it specifies the number of positions to the left of the decimal point.
7145
7146 Returns
7147 -------
7148 Index
7149 A new Index with the rounded values.
7150
7151 Examples
7152 --------
7153 >>> import pandas as pd
7154 >>> idx = pd.Index([10.1234, 20.5678, 30.9123, 40.4567, 50.7890])
7155 >>> idx.round(decimals=2)
7156 Index([10.12, 20.57, 30.91, 40.46, 50.79], dtype='float64')
7157
7158 """
7159 return self._constructor(self.to_series().round(decimals))
7160
7161 # --------------------------------------------------------------------
7162 # Generated Arithmetic, Comparison, and Unary Methods
7163
7164 def _cmp_method(self, other, op):
7165 """
7166 Wrapper used to dispatch comparison operations.
7167 """
7168 if self.is_(other):
7169 # fastpath
7170 if op in {operator.eq, operator.le, operator.ge}:
7171 arr = np.ones(len(self), dtype=bool)
7172 if self._can_hold_na and not isinstance(self, ABCMultiIndex):
7173 # TODO: should set MultiIndex._can_hold_na = False?
7174 arr[self.isna()] = False
7175 return arr
7176 elif op is operator.ne:
7177 arr = np.zeros(len(self), dtype=bool)
7178 if self._can_hold_na and not isinstance(self, ABCMultiIndex):
7179 arr[self.isna()] = True
7180 return arr
7181
7182 if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and len(
7183 self
7184 ) != len(other):
7185 raise ValueError("Lengths must match to compare")
7186
7187 if not isinstance(other, ABCMultiIndex):
7188 other = extract_array(other, extract_numpy=True)
7189 else:
7190 other = np.asarray(other)
7191
7192 if is_object_dtype(self.dtype) and isinstance(other, ExtensionArray):
7193 # e.g. PeriodArray, Categorical
7194 result = op(self._values, other)
7195
7196 elif isinstance(self._values, ExtensionArray):
7197 result = op(self._values, other)
7198
7199 elif is_object_dtype(self.dtype) and not isinstance(self, ABCMultiIndex):
7200 # don't pass MultiIndex
7201 result = ops.comp_method_OBJECT_ARRAY(op, self._values, other)
7202
7203 else:
7204 result = ops.comparison_op(self._values, other, op)
7205
7206 return result
7207
7208 @final
7209 def _logical_method(self, other, op):
7210 res_name = ops.get_op_result_name(self, other)
7211
7212 lvalues = self._values
7213 rvalues = extract_array(other, extract_numpy=True, extract_range=True)
7214
7215 res_values = ops.logical_op(lvalues, rvalues, op)
7216 return self._construct_result(res_values, name=res_name)
7217
7218 @final
7219 def _construct_result(self, result, name):
7220 if isinstance(result, tuple):
7221 return (
7222 Index(result[0], name=name, dtype=result[0].dtype),
7223 Index(result[1], name=name, dtype=result[1].dtype),
7224 )
7225 return Index(result, name=name, dtype=result.dtype)
7226
7227 def _arith_method(self, other, op):
7228 if (
7229 isinstance(other, Index)
7230 and is_object_dtype(other.dtype)
7231 and type(other) is not Index
7232 ):
7233 # We return NotImplemented for object-dtype index *subclasses* so they have
7234 # a chance to implement ops before we unwrap them.
7235 # See https://github.com/pandas-dev/pandas/issues/31109
7236 return NotImplemented
7237
7238 return super()._arith_method(other, op)
7239
7240 @final
7241 def _unary_method(self, op):
7242 result = op(self._values)
7243 return Index(result, name=self.name)
7244
7245 def __abs__(self) -> Index:
7246 return self._unary_method(operator.abs)
7247
7248 def __neg__(self) -> Index:
7249 return self._unary_method(operator.neg)
7250
7251 def __pos__(self) -> Index:
7252 return self._unary_method(operator.pos)
7253
7254 def __invert__(self) -> Index:
7255 # GH#8875
7256 return self._unary_method(operator.inv)
7257
7258 # --------------------------------------------------------------------
7259 # Reductions
7260
7261 def any(self, *args, **kwargs):
7262 """
7263 Return whether any element is Truthy.
7264
7265 Parameters
7266 ----------
7267 *args
7268 Required for compatibility with numpy.
7269 **kwargs
7270 Required for compatibility with numpy.
7271
7272 Returns
7273 -------
7274 bool or array-like (if axis is specified)
7275 A single element array-like may be converted to bool.
7276
7277 See Also
7278 --------
7279 Index.all : Return whether all elements are True.
7280 Series.all : Return whether all elements are True.
7281
7282 Notes
7283 -----
7284 Not a Number (NaN), positive infinity and negative infinity
7285 evaluate to True because these are not equal to zero.
7286
7287 Examples
7288 --------
7289 >>> index = pd.Index([0, 1, 2])
7290 >>> index.any()
7291 True
7292
7293 >>> index = pd.Index([0, 0, 0])
7294 >>> index.any()
7295 False
7296 """
7297 nv.validate_any(args, kwargs)
7298 self._maybe_disable_logical_methods("any")
7299 vals = self._values
7300 if not isinstance(vals, np.ndarray):
7301 # i.e. EA, call _reduce instead of "any" to get TypeError instead
7302 # of AttributeError
7303 return vals._reduce("any")
7304 return np.any(vals)
7305
7306 def all(self, *args, **kwargs):
7307 """
7308 Return whether all elements are Truthy.
7309
7310 Parameters
7311 ----------
7312 *args
7313 Required for compatibility with numpy.
7314 **kwargs
7315 Required for compatibility with numpy.
7316
7317 Returns
7318 -------
7319 bool or array-like (if axis is specified)
7320 A single element array-like may be converted to bool.
7321
7322 See Also
7323 --------
7324 Index.any : Return whether any element in an Index is True.
7325 Series.any : Return whether any element in a Series is True.
7326 Series.all : Return whether all elements in a Series are True.
7327
7328 Notes
7329 -----
7330 Not a Number (NaN), positive infinity and negative infinity
7331 evaluate to True because these are not equal to zero.
7332
7333 Examples
7334 --------
7335 True, because nonzero integers are considered True.
7336
7337 >>> pd.Index([1, 2, 3]).all()
7338 True
7339
7340 False, because ``0`` is considered False.
7341
7342 >>> pd.Index([0, 1, 2]).all()
7343 False
7344 """
7345 nv.validate_all(args, kwargs)
7346 self._maybe_disable_logical_methods("all")
7347 vals = self._values
7348 if not isinstance(vals, np.ndarray):
7349 # i.e. EA, call _reduce instead of "all" to get TypeError instead
7350 # of AttributeError
7351 return vals._reduce("all")
7352 return np.all(vals)
7353
7354 @final
7355 def _maybe_disable_logical_methods(self, opname: str_t) -> None:
7356 """
7357 raise if this Index subclass does not support any or all.
7358 """
7359 if (
7360 isinstance(self, ABCMultiIndex)
7361 # TODO(3.0): PeriodArray and DatetimeArray any/all will raise,
7362 # so checking needs_i8_conversion will be unnecessary
7363 or (needs_i8_conversion(self.dtype) and self.dtype.kind != "m")
7364 ):
7365 # This call will raise
7366 make_invalid_op(opname)(self)
7367
7368 @Appender(IndexOpsMixin.argmin.__doc__)
7369 def argmin(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
7370 nv.validate_argmin(args, kwargs)
7371 nv.validate_minmax_axis(axis)
7372
7373 if not self._is_multi and self.hasnans:
7374 # Take advantage of cache
7375 mask = self._isnan
7376 if not skipna or mask.all():
7377 warnings.warn(
7378 f"The behavior of {type(self).__name__}.argmax/argmin "
7379 "with skipna=False and NAs, or with all-NAs is deprecated. "
7380 "In a future version this will raise ValueError.",
7381 FutureWarning,
7382 stacklevel=find_stack_level(),
7383 )
7384 return -1
7385 return super().argmin(skipna=skipna)
7386
7387 @Appender(IndexOpsMixin.argmax.__doc__)
7388 def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
7389 nv.validate_argmax(args, kwargs)
7390 nv.validate_minmax_axis(axis)
7391
7392 if not self._is_multi and self.hasnans:
7393 # Take advantage of cache
7394 mask = self._isnan
7395 if not skipna or mask.all():
7396 warnings.warn(
7397 f"The behavior of {type(self).__name__}.argmax/argmin "
7398 "with skipna=False and NAs, or with all-NAs is deprecated. "
7399 "In a future version this will raise ValueError.",
7400 FutureWarning,
7401 stacklevel=find_stack_level(),
7402 )
7403 return -1
7404 return super().argmax(skipna=skipna)
7405
7406 def min(self, axis=None, skipna: bool = True, *args, **kwargs):
7407 """
7408 Return the minimum value of the Index.
7409
7410 Parameters
7411 ----------
7412 axis : {None}
7413 Dummy argument for consistency with Series.
7414 skipna : bool, default True
7415 Exclude NA/null values when showing the result.
7416 *args, **kwargs
7417 Additional arguments and keywords for compatibility with NumPy.
7418
7419 Returns
7420 -------
7421 scalar
7422 Minimum value.
7423
7424 See Also
7425 --------
7426 Index.max : Return the maximum value of the object.
7427 Series.min : Return the minimum value in a Series.
7428 DataFrame.min : Return the minimum values in a DataFrame.
7429
7430 Examples
7431 --------
7432 >>> idx = pd.Index([3, 2, 1])
7433 >>> idx.min()
7434 1
7435
7436 >>> idx = pd.Index(['c', 'b', 'a'])
7437 >>> idx.min()
7438 'a'
7439
7440 For a MultiIndex, the minimum is determined lexicographically.
7441
7442 >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)])
7443 >>> idx.min()
7444 ('a', 1)
7445 """
7446 nv.validate_min(args, kwargs)
7447 nv.validate_minmax_axis(axis)
7448
7449 if not len(self):
7450 return self._na_value
7451
7452 if len(self) and self.is_monotonic_increasing:
7453 # quick check
7454 first = self[0]
7455 if not isna(first):
7456 return first
7457
7458 if not self._is_multi and self.hasnans:
7459 # Take advantage of cache
7460 mask = self._isnan
7461 if not skipna or mask.all():
7462 return self._na_value
7463
7464 if not self._is_multi and not isinstance(self._values, np.ndarray):
7465 return self._values._reduce(name="min", skipna=skipna)
7466
7467 return nanops.nanmin(self._values, skipna=skipna)
7468
7469 def max(self, axis=None, skipna: bool = True, *args, **kwargs):
7470 """
7471 Return the maximum value of the Index.
7472
7473 Parameters
7474 ----------
7475 axis : int, optional
7476 For compatibility with NumPy. Only 0 or None are allowed.
7477 skipna : bool, default True
7478 Exclude NA/null values when showing the result.
7479 *args, **kwargs
7480 Additional arguments and keywords for compatibility with NumPy.
7481
7482 Returns
7483 -------
7484 scalar
7485 Maximum value.
7486
7487 See Also
7488 --------
7489 Index.min : Return the minimum value in an Index.
7490 Series.max : Return the maximum value in a Series.
7491 DataFrame.max : Return the maximum values in a DataFrame.
7492
7493 Examples
7494 --------
7495 >>> idx = pd.Index([3, 2, 1])
7496 >>> idx.max()
7497 3
7498
7499 >>> idx = pd.Index(['c', 'b', 'a'])
7500 >>> idx.max()
7501 'c'
7502
7503 For a MultiIndex, the maximum is determined lexicographically.
7504
7505 >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)])
7506 >>> idx.max()
7507 ('b', 2)
7508 """
7509
7510 nv.validate_max(args, kwargs)
7511 nv.validate_minmax_axis(axis)
7512
7513 if not len(self):
7514 return self._na_value
7515
7516 if len(self) and self.is_monotonic_increasing:
7517 # quick check
7518 last = self[-1]
7519 if not isna(last):
7520 return last
7521
7522 if not self._is_multi and self.hasnans:
7523 # Take advantage of cache
7524 mask = self._isnan
7525 if not skipna or mask.all():
7526 return self._na_value
7527
7528 if not self._is_multi and not isinstance(self._values, np.ndarray):
7529 return self._values._reduce(name="max", skipna=skipna)
7530
7531 return nanops.nanmax(self._values, skipna=skipna)
7532
7533 # --------------------------------------------------------------------
7534
7535 @final
7536 @property
7537 def shape(self) -> Shape:
7538 """
7539 Return a tuple of the shape of the underlying data.
7540
7541 Examples
7542 --------
7543 >>> idx = pd.Index([1, 2, 3])
7544 >>> idx
7545 Index([1, 2, 3], dtype='int64')
7546 >>> idx.shape
7547 (3,)
7548 """
7549 # See GH#27775, GH#27384 for history/reasoning in how this is defined.
7550 return (len(self),)
7551
7552
7553def ensure_index_from_sequences(sequences, names=None) -> Index:
7554 """
7555 Construct an index from sequences of data.
7556
7557 A single sequence returns an Index. Many sequences returns a
7558 MultiIndex.
7559
7560 Parameters
7561 ----------
7562 sequences : sequence of sequences
7563 names : sequence of str
7564
7565 Returns
7566 -------
7567 index : Index or MultiIndex
7568
7569 Examples
7570 --------
7571 >>> ensure_index_from_sequences([[1, 2, 3]], names=["name"])
7572 Index([1, 2, 3], dtype='int64', name='name')
7573
7574 >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"])
7575 MultiIndex([('a', 'a'),
7576 ('a', 'b')],
7577 names=['L1', 'L2'])
7578
7579 See Also
7580 --------
7581 ensure_index
7582 """
7583 from pandas.core.indexes.multi import MultiIndex
7584
7585 if len(sequences) == 1:
7586 if names is not None:
7587 names = names[0]
7588 return Index(sequences[0], name=names)
7589 else:
7590 return MultiIndex.from_arrays(sequences, names=names)
7591
7592
7593def ensure_index(index_like: Axes, copy: bool = False) -> Index:
7594 """
7595 Ensure that we have an index from some index-like object.
7596
7597 Parameters
7598 ----------
7599 index_like : sequence
7600 An Index or other sequence
7601 copy : bool, default False
7602
7603 Returns
7604 -------
7605 index : Index or MultiIndex
7606
7607 See Also
7608 --------
7609 ensure_index_from_sequences
7610
7611 Examples
7612 --------
7613 >>> ensure_index(['a', 'b'])
7614 Index(['a', 'b'], dtype='object')
7615
7616 >>> ensure_index([('a', 'a'), ('b', 'c')])
7617 Index([('a', 'a'), ('b', 'c')], dtype='object')
7618
7619 >>> ensure_index([['a', 'a'], ['b', 'c']])
7620 MultiIndex([('a', 'b'),
7621 ('a', 'c')],
7622 )
7623 """
7624 if isinstance(index_like, Index):
7625 if copy:
7626 index_like = index_like.copy()
7627 return index_like
7628
7629 if isinstance(index_like, ABCSeries):
7630 name = index_like.name
7631 return Index(index_like, name=name, copy=copy)
7632
7633 if is_iterator(index_like):
7634 index_like = list(index_like)
7635
7636 if isinstance(index_like, list):
7637 if type(index_like) is not list: # noqa: E721
7638 # must check for exactly list here because of strict type
7639 # check in clean_index_list
7640 index_like = list(index_like)
7641
7642 if len(index_like) and lib.is_all_arraylike(index_like):
7643 from pandas.core.indexes.multi import MultiIndex
7644
7645 return MultiIndex.from_arrays(index_like)
7646 else:
7647 return Index(index_like, copy=copy, tupleize_cols=False)
7648 else:
7649 return Index(index_like, copy=copy)
7650
7651
7652def ensure_has_len(seq):
7653 """
7654 If seq is an iterator, put its values into a list.
7655 """
7656 try:
7657 len(seq)
7658 except TypeError:
7659 return list(seq)
7660 else:
7661 return seq
7662
7663
7664def trim_front(strings: list[str]) -> list[str]:
7665 """
7666 Trims zeros and decimal points.
7667
7668 Examples
7669 --------
7670 >>> trim_front([" a", " b"])
7671 ['a', 'b']
7672
7673 >>> trim_front([" a", " "])
7674 ['a', '']
7675 """
7676 if not strings:
7677 return strings
7678 while all(strings) and all(x[0] == " " for x in strings):
7679 strings = [x[1:] for x in strings]
7680 return strings
7681
7682
7683def _validate_join_method(method: str) -> None:
7684 if method not in ["left", "right", "inner", "outer"]:
7685 raise ValueError(f"do not recognize join method {method}")
7686
7687
7688def maybe_extract_name(name, obj, cls) -> Hashable:
7689 """
7690 If no name is passed, then extract it from data, validating hashability.
7691 """
7692 if name is None and isinstance(obj, (Index, ABCSeries)):
7693 # Note we don't just check for "name" attribute since that would
7694 # pick up e.g. dtype.name
7695 name = obj.name
7696
7697 # GH#29069
7698 if not is_hashable(name):
7699 raise TypeError(f"{cls.__name__}.name must be a hashable type")
7700
7701 return name
7702
7703
7704def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]:
7705 """
7706 Return common name if all indices agree, otherwise None (level-by-level).
7707
7708 Parameters
7709 ----------
7710 indexes : list of Index objects
7711
7712 Returns
7713 -------
7714 list
7715 A list representing the unanimous 'names' found.
7716 """
7717 name_tups = [tuple(i.names) for i in indexes]
7718 name_sets = [{*ns} for ns in zip_longest(*name_tups)]
7719 names = tuple(ns.pop() if len(ns) == 1 else None for ns in name_sets)
7720 return names
7721
7722
7723def _unpack_nested_dtype(other: Index) -> DtypeObj:
7724 """
7725 When checking if our dtype is comparable with another, we need
7726 to unpack CategoricalDtype to look at its categories.dtype.
7727
7728 Parameters
7729 ----------
7730 other : Index
7731
7732 Returns
7733 -------
7734 np.dtype or ExtensionDtype
7735 """
7736 dtype = other.dtype
7737 if isinstance(dtype, CategoricalDtype):
7738 # If there is ever a SparseIndex, this could get dispatched
7739 # here too.
7740 return dtype.categories.dtype
7741 elif isinstance(dtype, ArrowDtype):
7742 # GH 53617
7743 import pyarrow as pa
7744
7745 if pa.types.is_dictionary(dtype.pyarrow_dtype):
7746 other = other[:0].astype(ArrowDtype(dtype.pyarrow_dtype.value_type))
7747 return other.dtype
7748
7749
7750def _maybe_try_sort(result: Index | ArrayLike, sort: bool | None):
7751 if sort is not False:
7752 try:
7753 # error: Incompatible types in assignment (expression has type
7754 # "Union[ExtensionArray, ndarray[Any, Any], Index, Series,
7755 # Tuple[Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series],
7756 # ndarray[Any, Any]]]", variable has type "Union[Index,
7757 # Union[ExtensionArray, ndarray[Any, Any]]]")
7758 result = algos.safe_sort(result) # type: ignore[assignment]
7759 except TypeError as err:
7760 if sort is True:
7761 raise
7762 warnings.warn(
7763 f"{err}, sort order is undefined for incomparable objects.",
7764 RuntimeWarning,
7765 stacklevel=find_stack_level(),
7766 )
7767 return result
7768
7769
7770def get_values_for_csv(
7771 values: ArrayLike,
7772 *,
7773 date_format,
7774 na_rep: str = "nan",
7775 quoting=None,
7776 float_format=None,
7777 decimal: str = ".",
7778) -> npt.NDArray[np.object_]:
7779 """
7780 Convert to types which can be consumed by the standard library's
7781 csv.writer.writerows.
7782 """
7783 if isinstance(values, Categorical) and values.categories.dtype.kind in "Mm":
7784 # GH#40754 Convert categorical datetimes to datetime array
7785 values = algos.take_nd(
7786 values.categories._values,
7787 ensure_platform_int(values._codes),
7788 fill_value=na_rep,
7789 )
7790
7791 values = ensure_wrapped_if_datetimelike(values)
7792
7793 if isinstance(values, (DatetimeArray, TimedeltaArray)):
7794 if values.ndim == 1:
7795 result = values._format_native_types(na_rep=na_rep, date_format=date_format)
7796 result = result.astype(object, copy=False)
7797 return result
7798
7799 # GH#21734 Process every column separately, they might have different formats
7800 results_converted = []
7801 for i in range(len(values)):
7802 result = values[i, :]._format_native_types(
7803 na_rep=na_rep, date_format=date_format
7804 )
7805 results_converted.append(result.astype(object, copy=False))
7806 return np.vstack(results_converted)
7807
7808 elif isinstance(values.dtype, PeriodDtype):
7809 # TODO: tests that get here in column path
7810 values = cast("PeriodArray", values)
7811 res = values._format_native_types(na_rep=na_rep, date_format=date_format)
7812 return res
7813
7814 elif isinstance(values.dtype, IntervalDtype):
7815 # TODO: tests that get here in column path
7816 values = cast("IntervalArray", values)
7817 mask = values.isna()
7818 if not quoting:
7819 result = np.asarray(values).astype(str)
7820 else:
7821 result = np.array(values, dtype=object, copy=True)
7822
7823 result[mask] = na_rep
7824 return result
7825
7826 elif values.dtype.kind == "f" and not isinstance(values.dtype, SparseDtype):
7827 # see GH#13418: no special formatting is desired at the
7828 # output (important for appropriate 'quoting' behaviour),
7829 # so do not pass it through the FloatArrayFormatter
7830 if float_format is None and decimal == ".":
7831 mask = isna(values)
7832
7833 if not quoting:
7834 values = values.astype(str)
7835 else:
7836 values = np.array(values, dtype="object")
7837
7838 values[mask] = na_rep
7839 values = values.astype(object, copy=False)
7840 return values
7841
7842 from pandas.io.formats.format import FloatArrayFormatter
7843
7844 formatter = FloatArrayFormatter(
7845 values,
7846 na_rep=na_rep,
7847 float_format=float_format,
7848 decimal=decimal,
7849 quoting=quoting,
7850 fixed_width=False,
7851 )
7852 res = formatter.get_result_as_array()
7853 res = res.astype(object, copy=False)
7854 return res
7855
7856 elif isinstance(values, ExtensionArray):
7857 mask = isna(values)
7858
7859 new_values = np.asarray(values.astype(object))
7860 new_values[mask] = na_rep
7861 return new_values
7862
7863 else:
7864 mask = isna(values)
7865 itemsize = writers.word_len(na_rep)
7866
7867 if values.dtype != _dtype_obj and not quoting and itemsize:
7868 values = values.astype(str)
7869 if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize:
7870 # enlarge for the na_rep
7871 values = values.astype(f"<U{itemsize}")
7872 else:
7873 values = np.array(values, dtype="object")
7874
7875 values[mask] = na_rep
7876 values = values.astype(object, copy=False)
7877 return values