1from __future__ import annotations
2
3from datetime import datetime
4import functools
5from itertools import zip_longest
6import operator
7from typing import (
8 TYPE_CHECKING,
9 Any,
10 Callable,
11 ClassVar,
12 Hashable,
13 Iterable,
14 Literal,
15 NoReturn,
16 Sequence,
17 TypeVar,
18 cast,
19 final,
20 overload,
21)
22import warnings
23
24import numpy as np
25
26from pandas._config import get_option
27
28from pandas._libs import (
29 NaT,
30 algos as libalgos,
31 index as libindex,
32 lib,
33)
34from pandas._libs.internals import BlockValuesRefs
35import pandas._libs.join as libjoin
36from pandas._libs.lib import (
37 is_datetime_array,
38 no_default,
39)
40from pandas._libs.missing import is_float_nan
41from pandas._libs.tslibs import (
42 IncompatibleFrequency,
43 OutOfBoundsDatetime,
44 Timestamp,
45 tz_compare,
46)
47from pandas._typing import (
48 AnyAll,
49 ArrayLike,
50 Axes,
51 Axis,
52 DropKeep,
53 DtypeObj,
54 F,
55 IgnoreRaise,
56 IndexLabel,
57 JoinHow,
58 Level,
59 Shape,
60 npt,
61)
62from pandas.compat.numpy import function as nv
63from pandas.errors import (
64 DuplicateLabelError,
65 InvalidIndexError,
66)
67from pandas.util._decorators import (
68 Appender,
69 cache_readonly,
70 doc,
71)
72from pandas.util._exceptions import (
73 find_stack_level,
74 rewrite_exception,
75)
76
77from pandas.core.dtypes.astype import (
78 astype_array,
79 astype_is_view,
80)
81from pandas.core.dtypes.cast import (
82 LossySetitemError,
83 can_hold_element,
84 common_dtype_categorical_compat,
85 find_result_type,
86 infer_dtype_from,
87 maybe_cast_pointwise_result,
88 np_can_hold_element,
89)
90from pandas.core.dtypes.common import (
91 ensure_int64,
92 ensure_object,
93 ensure_platform_int,
94 is_any_real_numeric_dtype,
95 is_bool_dtype,
96 is_categorical_dtype,
97 is_dtype_equal,
98 is_ea_or_datetimelike_dtype,
99 is_extension_array_dtype,
100 is_float,
101 is_float_dtype,
102 is_hashable,
103 is_integer,
104 is_integer_dtype,
105 is_interval_dtype,
106 is_iterator,
107 is_list_like,
108 is_numeric_dtype,
109 is_object_dtype,
110 is_scalar,
111 is_signed_integer_dtype,
112 is_string_dtype,
113 needs_i8_conversion,
114 pandas_dtype,
115 validate_all_hashable,
116)
117from pandas.core.dtypes.concat import concat_compat
118from pandas.core.dtypes.dtypes import (
119 CategoricalDtype,
120 DatetimeTZDtype,
121 ExtensionDtype,
122 IntervalDtype,
123 PeriodDtype,
124)
125from pandas.core.dtypes.generic import (
126 ABCDataFrame,
127 ABCDatetimeIndex,
128 ABCMultiIndex,
129 ABCPeriodIndex,
130 ABCSeries,
131 ABCTimedeltaIndex,
132)
133from pandas.core.dtypes.inference import is_dict_like
134from pandas.core.dtypes.missing import (
135 array_equivalent,
136 is_valid_na_for_dtype,
137 isna,
138)
139
140from pandas.core import (
141 arraylike,
142 ops,
143)
144from pandas.core.accessor import CachedAccessor
145import pandas.core.algorithms as algos
146from pandas.core.array_algos.putmask import (
147 setitem_datetimelike_compat,
148 validate_putmask,
149)
150from pandas.core.arrays import (
151 ArrowExtensionArray,
152 BaseMaskedArray,
153 Categorical,
154 ExtensionArray,
155)
156from pandas.core.arrays.string_ import StringArray
157from pandas.core.base import (
158 IndexOpsMixin,
159 PandasObject,
160)
161import pandas.core.common as com
162from pandas.core.construction import (
163 ensure_wrapped_if_datetimelike,
164 extract_array,
165 sanitize_array,
166)
167from pandas.core.indexers import disallow_ndim_indexing
168from pandas.core.indexes.frozen import FrozenList
169from pandas.core.missing import clean_reindex_fill_method
170from pandas.core.ops import get_op_result_name
171from pandas.core.ops.invalid import make_invalid_op
172from pandas.core.sorting import (
173 ensure_key_mapped,
174 get_group_index_sorter,
175 nargsort,
176)
177from pandas.core.strings.accessor import StringMethods
178
179from pandas.io.formats.printing import (
180 PrettyDict,
181 default_pprint,
182 format_object_summary,
183 pprint_thing,
184)
185
186if TYPE_CHECKING:
187 from pandas import (
188 CategoricalIndex,
189 DataFrame,
190 MultiIndex,
191 Series,
192 )
193 from pandas.core.arrays import PeriodArray
194
195
196__all__ = ["Index"]
197
198_unsortable_types = frozenset(("mixed", "mixed-integer"))
199
200_index_doc_kwargs: dict[str, str] = {
201 "klass": "Index",
202 "inplace": "",
203 "target_klass": "Index",
204 "raises_section": "",
205 "unique": "Index",
206 "duplicated": "np.ndarray",
207}
208_index_shared_docs: dict[str, str] = {}
209str_t = str
210
211
212_dtype_obj = np.dtype("object")
213
214_masked_engines = {
215 "Complex128": libindex.MaskedComplex128Engine,
216 "Complex64": libindex.MaskedComplex64Engine,
217 "Float64": libindex.MaskedFloat64Engine,
218 "Float32": libindex.MaskedFloat32Engine,
219 "UInt64": libindex.MaskedUInt64Engine,
220 "UInt32": libindex.MaskedUInt32Engine,
221 "UInt16": libindex.MaskedUInt16Engine,
222 "UInt8": libindex.MaskedUInt8Engine,
223 "Int64": libindex.MaskedInt64Engine,
224 "Int32": libindex.MaskedInt32Engine,
225 "Int16": libindex.MaskedInt16Engine,
226 "Int8": libindex.MaskedInt8Engine,
227 "boolean": libindex.MaskedBoolEngine,
228 "double[pyarrow]": libindex.MaskedFloat64Engine,
229 "float64[pyarrow]": libindex.MaskedFloat64Engine,
230 "float32[pyarrow]": libindex.MaskedFloat32Engine,
231 "float[pyarrow]": libindex.MaskedFloat32Engine,
232 "uint64[pyarrow]": libindex.MaskedUInt64Engine,
233 "uint32[pyarrow]": libindex.MaskedUInt32Engine,
234 "uint16[pyarrow]": libindex.MaskedUInt16Engine,
235 "uint8[pyarrow]": libindex.MaskedUInt8Engine,
236 "int64[pyarrow]": libindex.MaskedInt64Engine,
237 "int32[pyarrow]": libindex.MaskedInt32Engine,
238 "int16[pyarrow]": libindex.MaskedInt16Engine,
239 "int8[pyarrow]": libindex.MaskedInt8Engine,
240 "bool[pyarrow]": libindex.MaskedBoolEngine,
241}
242
243
244def _maybe_return_indexers(meth: F) -> F:
245 """
246 Decorator to simplify 'return_indexers' checks in Index.join.
247 """
248
249 @functools.wraps(meth)
250 def join(
251 self,
252 other: Index,
253 *,
254 how: JoinHow = "left",
255 level=None,
256 return_indexers: bool = False,
257 sort: bool = False,
258 ):
259 join_index, lidx, ridx = meth(self, other, how=how, level=level, sort=sort)
260 if not return_indexers:
261 return join_index
262
263 if lidx is not None:
264 lidx = ensure_platform_int(lidx)
265 if ridx is not None:
266 ridx = ensure_platform_int(ridx)
267 return join_index, lidx, ridx
268
269 return cast(F, join)
270
271
272def _new_Index(cls, d):
273 """
274 This is called upon unpickling, rather than the default which doesn't
275 have arguments and breaks __new__.
276 """
277 # required for backward compat, because PI can't be instantiated with
278 # ordinals through __new__ GH #13277
279 if issubclass(cls, ABCPeriodIndex):
280 from pandas.core.indexes.period import _new_PeriodIndex
281
282 return _new_PeriodIndex(cls, **d)
283
284 if issubclass(cls, ABCMultiIndex):
285 if "labels" in d and "codes" not in d:
286 # GH#23752 "labels" kwarg has been replaced with "codes"
287 d["codes"] = d.pop("labels")
288
289 # Since this was a valid MultiIndex at pickle-time, we don't need to
290 # check validty at un-pickle time.
291 d["verify_integrity"] = False
292
293 elif "dtype" not in d and "data" in d:
294 # Prevent Index.__new__ from conducting inference;
295 # "data" key not in RangeIndex
296 d["dtype"] = d["data"].dtype
297 return cls.__new__(cls, **d)
298
299
300_IndexT = TypeVar("_IndexT", bound="Index")
301
302
303class Index(IndexOpsMixin, PandasObject):
304 """
305 Immutable sequence used for indexing and alignment.
306
307 The basic object storing axis labels for all pandas objects.
308
309 .. versionchanged:: 2.0.0
310
311 Index can hold all numpy numeric dtypes (except float16). Previously only
312 int64/uint64/float64 dtypes were accepted.
313
314 Parameters
315 ----------
316 data : array-like (1-dimensional)
317 dtype : NumPy dtype (default: object)
318 If dtype is None, we find the dtype that best fits the data.
319 If an actual dtype is provided, we coerce to that dtype if it's safe.
320 Otherwise, an error will be raised.
321 copy : bool
322 Make a copy of input ndarray.
323 name : object
324 Name to be stored in the index.
325 tupleize_cols : bool (default: True)
326 When True, attempt to create a MultiIndex if possible.
327
328 See Also
329 --------
330 RangeIndex : Index implementing a monotonic integer range.
331 CategoricalIndex : Index of :class:`Categorical` s.
332 MultiIndex : A multi-level, or hierarchical Index.
333 IntervalIndex : An Index of :class:`Interval` s.
334 DatetimeIndex : Index of datetime64 data.
335 TimedeltaIndex : Index of timedelta64 data.
336 PeriodIndex : Index of Period data.
337
338 Notes
339 -----
340 An Index instance can **only** contain hashable objects.
341 An Index instance *can not* hold numpy float16 dtype.
342
343 Examples
344 --------
345 >>> pd.Index([1, 2, 3])
346 Index([1, 2, 3], dtype='int64')
347
348 >>> pd.Index(list('abc'))
349 Index(['a', 'b', 'c'], dtype='object')
350
351 >>> pd.Index([1, 2, 3], dtype="uint8")
352 Index([1, 2, 3], dtype='uint8')
353 """
354
355 # To hand over control to subclasses
356 _join_precedence = 1
357
358 # Cython methods; see github.com/cython/cython/issues/2647
359 # for why we need to wrap these instead of making them class attributes
360 # Moreover, cython will choose the appropriate-dtyped sub-function
361 # given the dtypes of the passed arguments
362
363 @final
364 def _left_indexer_unique(self: _IndexT, other: _IndexT) -> npt.NDArray[np.intp]:
365 # Caller is responsible for ensuring other.dtype == self.dtype
366 sv = self._get_join_target()
367 ov = other._get_join_target()
368 # can_use_libjoin assures sv and ov are ndarrays
369 sv = cast(np.ndarray, sv)
370 ov = cast(np.ndarray, ov)
371 # similar but not identical to ov.searchsorted(sv)
372 return libjoin.left_join_indexer_unique(sv, ov)
373
374 @final
375 def _left_indexer(
376 self: _IndexT, other: _IndexT
377 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
378 # Caller is responsible for ensuring other.dtype == self.dtype
379 sv = self._get_join_target()
380 ov = other._get_join_target()
381 # can_use_libjoin assures sv and ov are ndarrays
382 sv = cast(np.ndarray, sv)
383 ov = cast(np.ndarray, ov)
384 joined_ndarray, lidx, ridx = libjoin.left_join_indexer(sv, ov)
385 joined = self._from_join_target(joined_ndarray)
386 return joined, lidx, ridx
387
388 @final
389 def _inner_indexer(
390 self: _IndexT, other: _IndexT
391 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
392 # Caller is responsible for ensuring other.dtype == self.dtype
393 sv = self._get_join_target()
394 ov = other._get_join_target()
395 # can_use_libjoin assures sv and ov are ndarrays
396 sv = cast(np.ndarray, sv)
397 ov = cast(np.ndarray, ov)
398 joined_ndarray, lidx, ridx = libjoin.inner_join_indexer(sv, ov)
399 joined = self._from_join_target(joined_ndarray)
400 return joined, lidx, ridx
401
402 @final
403 def _outer_indexer(
404 self: _IndexT, other: _IndexT
405 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
406 # Caller is responsible for ensuring other.dtype == self.dtype
407 sv = self._get_join_target()
408 ov = other._get_join_target()
409 # can_use_libjoin assures sv and ov are ndarrays
410 sv = cast(np.ndarray, sv)
411 ov = cast(np.ndarray, ov)
412 joined_ndarray, lidx, ridx = libjoin.outer_join_indexer(sv, ov)
413 joined = self._from_join_target(joined_ndarray)
414 return joined, lidx, ridx
415
416 _typ: str = "index"
417 _data: ExtensionArray | np.ndarray
418 _data_cls: type[ExtensionArray] | tuple[type[np.ndarray], type[ExtensionArray]] = (
419 np.ndarray,
420 ExtensionArray,
421 )
422 _id: object | None = None
423 _name: Hashable = None
424 # MultiIndex.levels previously allowed setting the index name. We
425 # don't allow this anymore, and raise if it happens rather than
426 # failing silently.
427 _no_setting_name: bool = False
428 _comparables: list[str] = ["name"]
429 _attributes: list[str] = ["name"]
430
431 @cache_readonly
432 def _can_hold_strings(self) -> bool:
433 return not is_numeric_dtype(self)
434
435 _engine_types: dict[np.dtype | ExtensionDtype, type[libindex.IndexEngine]] = {
436 np.dtype(np.int8): libindex.Int8Engine,
437 np.dtype(np.int16): libindex.Int16Engine,
438 np.dtype(np.int32): libindex.Int32Engine,
439 np.dtype(np.int64): libindex.Int64Engine,
440 np.dtype(np.uint8): libindex.UInt8Engine,
441 np.dtype(np.uint16): libindex.UInt16Engine,
442 np.dtype(np.uint32): libindex.UInt32Engine,
443 np.dtype(np.uint64): libindex.UInt64Engine,
444 np.dtype(np.float32): libindex.Float32Engine,
445 np.dtype(np.float64): libindex.Float64Engine,
446 np.dtype(np.complex64): libindex.Complex64Engine,
447 np.dtype(np.complex128): libindex.Complex128Engine,
448 }
449
450 @property
451 def _engine_type(
452 self,
453 ) -> type[libindex.IndexEngine] | type[libindex.ExtensionEngine]:
454 return self._engine_types.get(self.dtype, libindex.ObjectEngine)
455
456 # whether we support partial string indexing. Overridden
457 # in DatetimeIndex and PeriodIndex
458 _supports_partial_string_indexing = False
459
460 _accessors = {"str"}
461
462 str = CachedAccessor("str", StringMethods)
463
464 _references = None
465
466 # --------------------------------------------------------------------
467 # Constructors
468
469 def __new__(
470 cls,
471 data=None,
472 dtype=None,
473 copy: bool = False,
474 name=None,
475 tupleize_cols: bool = True,
476 ) -> Index:
477 from pandas.core.indexes.range import RangeIndex
478
479 name = maybe_extract_name(name, data, cls)
480
481 if dtype is not None:
482 dtype = pandas_dtype(dtype)
483
484 data_dtype = getattr(data, "dtype", None)
485
486 refs = None
487 if not copy and isinstance(data, (ABCSeries, Index)):
488 refs = data._references
489
490 # range
491 if isinstance(data, (range, RangeIndex)):
492 result = RangeIndex(start=data, copy=copy, name=name)
493 if dtype is not None:
494 return result.astype(dtype, copy=False)
495 return result
496
497 elif is_ea_or_datetimelike_dtype(dtype):
498 # non-EA dtype indexes have special casting logic, so we punt here
499 pass
500
501 elif is_ea_or_datetimelike_dtype(data_dtype):
502 pass
503
504 elif isinstance(data, (np.ndarray, Index, ABCSeries)):
505 if isinstance(data, ABCMultiIndex):
506 data = data._values
507
508 if data.dtype.kind not in ["i", "u", "f", "b", "c", "m", "M"]:
509 # GH#11836 we need to avoid having numpy coerce
510 # things that look like ints/floats to ints unless
511 # they are actually ints, e.g. '0' and 0.0
512 # should not be coerced
513 data = com.asarray_tuplesafe(data, dtype=_dtype_obj)
514
515 elif is_scalar(data):
516 raise cls._raise_scalar_data_error(data)
517 elif hasattr(data, "__array__"):
518 return Index(np.asarray(data), dtype=dtype, copy=copy, name=name)
519 elif not is_list_like(data) and not isinstance(data, memoryview):
520 # 2022-11-16 the memoryview check is only necessary on some CI
521 # builds, not clear why
522 raise cls._raise_scalar_data_error(data)
523
524 else:
525 if tupleize_cols:
526 # GH21470: convert iterable to list before determining if empty
527 if is_iterator(data):
528 data = list(data)
529
530 if data and all(isinstance(e, tuple) for e in data):
531 # we must be all tuples, otherwise don't construct
532 # 10697
533 from pandas.core.indexes.multi import MultiIndex
534
535 return MultiIndex.from_tuples(data, names=name)
536 # other iterable of some kind
537
538 if not isinstance(data, (list, tuple)):
539 # we allow set/frozenset, which Series/sanitize_array does not, so
540 # cast to list here
541 data = list(data)
542 if len(data) == 0:
543 # unlike Series, we default to object dtype:
544 data = np.array(data, dtype=object)
545
546 if len(data) and isinstance(data[0], tuple):
547 # Ensure we get 1-D array of tuples instead of 2D array.
548 data = com.asarray_tuplesafe(data, dtype=_dtype_obj)
549
550 try:
551 arr = sanitize_array(data, None, dtype=dtype, copy=copy)
552 except ValueError as err:
553 if "index must be specified when data is not list-like" in str(err):
554 raise cls._raise_scalar_data_error(data) from err
555 if "Data must be 1-dimensional" in str(err):
556 raise ValueError("Index data must be 1-dimensional") from err
557 raise
558 arr = ensure_wrapped_if_datetimelike(arr)
559
560 klass = cls._dtype_to_subclass(arr.dtype)
561
562 arr = klass._ensure_array(arr, arr.dtype, copy=False)
563 return klass._simple_new(arr, name, refs=refs)
564
565 @classmethod
566 def _ensure_array(cls, data, dtype, copy: bool):
567 """
568 Ensure we have a valid array to pass to _simple_new.
569 """
570 if data.ndim > 1:
571 # GH#13601, GH#20285, GH#27125
572 raise ValueError("Index data must be 1-dimensional")
573 elif dtype == np.float16:
574 # float16 not supported (no indexing engine)
575 raise NotImplementedError("float16 indexes are not supported")
576
577 if copy:
578 # asarray_tuplesafe does not always copy underlying data,
579 # so need to make sure that this happens
580 data = data.copy()
581 return data
582
583 @final
584 @classmethod
585 def _dtype_to_subclass(cls, dtype: DtypeObj):
586 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423
587
588 if isinstance(dtype, ExtensionDtype):
589 if isinstance(dtype, DatetimeTZDtype):
590 from pandas import DatetimeIndex
591
592 return DatetimeIndex
593 elif isinstance(dtype, CategoricalDtype):
594 from pandas import CategoricalIndex
595
596 return CategoricalIndex
597 elif isinstance(dtype, IntervalDtype):
598 from pandas import IntervalIndex
599
600 return IntervalIndex
601 elif isinstance(dtype, PeriodDtype):
602 from pandas import PeriodIndex
603
604 return PeriodIndex
605
606 return Index
607
608 if dtype.kind == "M":
609 from pandas import DatetimeIndex
610
611 return DatetimeIndex
612
613 elif dtype.kind == "m":
614 from pandas import TimedeltaIndex
615
616 return TimedeltaIndex
617
618 elif dtype.kind == "O":
619 # NB: assuming away MultiIndex
620 return Index
621
622 elif issubclass(dtype.type, str) or is_numeric_dtype(dtype):
623 return Index
624
625 raise NotImplementedError(dtype)
626
627 # NOTE for new Index creation:
628
629 # - _simple_new: It returns new Index with the same type as the caller.
630 # All metadata (such as name) must be provided by caller's responsibility.
631 # Using _shallow_copy is recommended because it fills these metadata
632 # otherwise specified.
633
634 # - _shallow_copy: It returns new Index with the same type (using
635 # _simple_new), but fills caller's metadata otherwise specified. Passed
636 # kwargs will overwrite corresponding metadata.
637
638 # See each method's docstring.
639
640 @classmethod
641 def _simple_new(
642 cls: type[_IndexT], values: ArrayLike, name: Hashable = None, refs=None
643 ) -> _IndexT:
644 """
645 We require that we have a dtype compat for the values. If we are passed
646 a non-dtype compat, then coerce using the constructor.
647
648 Must be careful not to recurse.
649 """
650 assert isinstance(values, cls._data_cls), type(values)
651
652 result = object.__new__(cls)
653 result._data = values
654 result._name = name
655 result._cache = {}
656 result._reset_identity()
657 if refs is not None:
658 result._references = refs
659 else:
660 result._references = BlockValuesRefs()
661 result._references.add_index_reference(result)
662
663 return result
664
665 @classmethod
666 def _with_infer(cls, *args, **kwargs):
667 """
668 Constructor that uses the 1.0.x behavior inferring numeric dtypes
669 for ndarray[object] inputs.
670 """
671 result = cls(*args, **kwargs)
672
673 if result.dtype == _dtype_obj and not result._is_multi:
674 # error: Argument 1 to "maybe_convert_objects" has incompatible type
675 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected
676 # "ndarray[Any, Any]"
677 values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type]
678 if values.dtype.kind in ["i", "u", "f", "b"]:
679 return Index(values, name=result.name)
680
681 return result
682
683 @cache_readonly
684 def _constructor(self: _IndexT) -> type[_IndexT]:
685 return type(self)
686
687 @final
688 def _maybe_check_unique(self) -> None:
689 """
690 Check that an Index has no duplicates.
691
692 This is typically only called via
693 `NDFrame.flags.allows_duplicate_labels.setter` when it's set to
694 True (duplicates aren't allowed).
695
696 Raises
697 ------
698 DuplicateLabelError
699 When the index is not unique.
700 """
701 if not self.is_unique:
702 msg = """Index has duplicates."""
703 duplicates = self._format_duplicate_message()
704 msg += f"\n{duplicates}"
705
706 raise DuplicateLabelError(msg)
707
708 @final
709 def _format_duplicate_message(self) -> DataFrame:
710 """
711 Construct the DataFrame for a DuplicateLabelError.
712
713 This returns a DataFrame indicating the labels and positions
714 of duplicates in an index. This should only be called when it's
715 already known that duplicates are present.
716
717 Examples
718 --------
719 >>> idx = pd.Index(['a', 'b', 'a'])
720 >>> idx._format_duplicate_message()
721 positions
722 label
723 a [0, 2]
724 """
725 from pandas import Series
726
727 duplicates = self[self.duplicated(keep="first")].unique()
728 assert len(duplicates)
729
730 out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates]
731 if self._is_multi:
732 # test_format_duplicate_labels_message_multi
733 # error: "Type[Index]" has no attribute "from_tuples" [attr-defined]
734 out.index = type(self).from_tuples(out.index) # type: ignore[attr-defined]
735
736 if self.nlevels == 1:
737 out = out.rename_axis("label")
738 return out.to_frame(name="positions")
739
740 # --------------------------------------------------------------------
741 # Index Internals Methods
742
743 def _shallow_copy(self: _IndexT, values, name: Hashable = no_default) -> _IndexT:
744 """
745 Create a new Index with the same class as the caller, don't copy the
746 data, use the same object attributes with passed in attributes taking
747 precedence.
748
749 *this is an internal non-public method*
750
751 Parameters
752 ----------
753 values : the values to create the new Index, optional
754 name : Label, defaults to self.name
755 """
756 name = self._name if name is no_default else name
757
758 return self._simple_new(values, name=name, refs=self._references)
759
760 def _view(self: _IndexT) -> _IndexT:
761 """
762 fastpath to make a shallow copy, i.e. new object with same data.
763 """
764 result = self._simple_new(self._values, name=self._name, refs=self._references)
765
766 result._cache = self._cache
767 return result
768
769 @final
770 def _rename(self: _IndexT, name: Hashable) -> _IndexT:
771 """
772 fastpath for rename if new name is already validated.
773 """
774 result = self._view()
775 result._name = name
776 return result
777
778 @final
779 def is_(self, other) -> bool:
780 """
781 More flexible, faster check like ``is`` but that works through views.
782
783 Note: this is *not* the same as ``Index.identical()``, which checks
784 that metadata is also the same.
785
786 Parameters
787 ----------
788 other : object
789 Other object to compare against.
790
791 Returns
792 -------
793 bool
794 True if both have same underlying data, False otherwise.
795
796 See Also
797 --------
798 Index.identical : Works like ``Index.is_`` but also checks metadata.
799 """
800 if self is other:
801 return True
802 elif not hasattr(other, "_id"):
803 return False
804 elif self._id is None or other._id is None:
805 return False
806 else:
807 return self._id is other._id
808
809 @final
810 def _reset_identity(self) -> None:
811 """
812 Initializes or resets ``_id`` attribute with new object.
813 """
814 self._id = object()
815
816 @final
817 def _cleanup(self) -> None:
818 self._engine.clear_mapping()
819
820 @cache_readonly
821 def _engine(
822 self,
823 ) -> libindex.IndexEngine | libindex.ExtensionEngine | libindex.MaskedIndexEngine:
824 # For base class (object dtype) we get ObjectEngine
825 target_values = self._get_engine_target()
826 if isinstance(target_values, ExtensionArray):
827 if isinstance(target_values, (BaseMaskedArray, ArrowExtensionArray)):
828 try:
829 return _masked_engines[target_values.dtype.name](target_values)
830 except KeyError:
831 # Not supported yet e.g. decimal
832 pass
833 elif self._engine_type is libindex.ObjectEngine:
834 return libindex.ExtensionEngine(target_values)
835
836 target_values = cast(np.ndarray, target_values)
837 # to avoid a reference cycle, bind `target_values` to a local variable, so
838 # `self` is not passed into the lambda.
839 if target_values.dtype == bool:
840 return libindex.BoolEngine(target_values)
841 elif target_values.dtype == np.complex64:
842 return libindex.Complex64Engine(target_values)
843 elif target_values.dtype == np.complex128:
844 return libindex.Complex128Engine(target_values)
845 elif needs_i8_conversion(self.dtype):
846 # We need to keep M8/m8 dtype when initializing the Engine,
847 # but don't want to change _get_engine_target bc it is used
848 # elsewhere
849 # error: Item "ExtensionArray" of "Union[ExtensionArray,
850 # ndarray[Any, Any]]" has no attribute "_ndarray" [union-attr]
851 target_values = self._data._ndarray # type: ignore[union-attr]
852
853 # error: Argument 1 to "ExtensionEngine" has incompatible type
854 # "ndarray[Any, Any]"; expected "ExtensionArray"
855 return self._engine_type(target_values) # type: ignore[arg-type]
856
857 @final
858 @cache_readonly
859 def _dir_additions_for_owner(self) -> set[str_t]:
860 """
861 Add the string-like labels to the owner dataframe/series dir output.
862
863 If this is a MultiIndex, it's first level values are used.
864 """
865 return {
866 c
867 for c in self.unique(level=0)[: get_option("display.max_dir_items")]
868 if isinstance(c, str) and c.isidentifier()
869 }
870
871 # --------------------------------------------------------------------
872 # Array-Like Methods
873
874 # ndarray compat
875 def __len__(self) -> int:
876 """
877 Return the length of the Index.
878 """
879 return len(self._data)
880
881 def __array__(self, dtype=None) -> np.ndarray:
882 """
883 The array interface, return my values.
884 """
885 return np.asarray(self._data, dtype=dtype)
886
887 def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):
888 if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs):
889 return NotImplemented
890
891 result = arraylike.maybe_dispatch_ufunc_to_dunder_op(
892 self, ufunc, method, *inputs, **kwargs
893 )
894 if result is not NotImplemented:
895 return result
896
897 if "out" in kwargs:
898 # e.g. test_dti_isub_tdi
899 return arraylike.dispatch_ufunc_with_out(
900 self, ufunc, method, *inputs, **kwargs
901 )
902
903 if method == "reduce":
904 result = arraylike.dispatch_reduction_ufunc(
905 self, ufunc, method, *inputs, **kwargs
906 )
907 if result is not NotImplemented:
908 return result
909
910 new_inputs = [x if x is not self else x._values for x in inputs]
911 result = getattr(ufunc, method)(*new_inputs, **kwargs)
912 if ufunc.nout == 2:
913 # i.e. np.divmod, np.modf, np.frexp
914 return tuple(self.__array_wrap__(x) for x in result)
915
916 if result.dtype == np.float16:
917 result = result.astype(np.float32)
918
919 return self.__array_wrap__(result)
920
921 def __array_wrap__(self, result, context=None):
922 """
923 Gets called after a ufunc and other functions e.g. np.split.
924 """
925 result = lib.item_from_zerodim(result)
926 if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1:
927 return result
928
929 return Index(result, name=self.name)
930
931 @cache_readonly
932 def dtype(self) -> DtypeObj:
933 """
934 Return the dtype object of the underlying data.
935 """
936 return self._data.dtype
937
938 @final
939 def ravel(self, order: str_t = "C") -> Index:
940 """
941 Return a view on self.
942
943 Returns
944 -------
945 Index
946
947 See Also
948 --------
949 numpy.ndarray.ravel : Return a flattened array.
950 """
951 return self[:]
952
953 def view(self, cls=None):
954 # we need to see if we are subclassing an
955 # index type here
956 if cls is not None and not hasattr(cls, "_typ"):
957 dtype = cls
958 if isinstance(cls, str):
959 dtype = pandas_dtype(cls)
960
961 if isinstance(dtype, (np.dtype, ExtensionDtype)) and needs_i8_conversion(
962 dtype
963 ):
964 if dtype.kind == "m" and dtype != "m8[ns]":
965 # e.g. m8[s]
966 return self._data.view(cls)
967
968 idx_cls = self._dtype_to_subclass(dtype)
969 # NB: we only get here for subclasses that override
970 # _data_cls such that it is a type and not a tuple
971 # of types.
972 arr_cls = idx_cls._data_cls
973 arr = arr_cls(self._data.view("i8"), dtype=dtype)
974 return idx_cls._simple_new(arr, name=self.name, refs=self._references)
975
976 result = self._data.view(cls)
977 else:
978 result = self._view()
979 if isinstance(result, Index):
980 result._id = self._id
981 return result
982
983 def astype(self, dtype, copy: bool = True):
984 """
985 Create an Index with values cast to dtypes.
986
987 The class of a new Index is determined by dtype. When conversion is
988 impossible, a TypeError exception is raised.
989
990 Parameters
991 ----------
992 dtype : numpy dtype or pandas type
993 Note that any signed integer `dtype` is treated as ``'int64'``,
994 and any unsigned integer `dtype` is treated as ``'uint64'``,
995 regardless of the size.
996 copy : bool, default True
997 By default, astype always returns a newly allocated object.
998 If copy is set to False and internal requirements on dtype are
999 satisfied, the original data is used to create a new Index
1000 or the original Index is returned.
1001
1002 Returns
1003 -------
1004 Index
1005 Index with values cast to specified dtype.
1006 """
1007 if dtype is not None:
1008 dtype = pandas_dtype(dtype)
1009
1010 if is_dtype_equal(self.dtype, dtype):
1011 # Ensure that self.astype(self.dtype) is self
1012 return self.copy() if copy else self
1013
1014 values = self._data
1015 if isinstance(values, ExtensionArray):
1016 with rewrite_exception(type(values).__name__, type(self).__name__):
1017 new_values = values.astype(dtype, copy=copy)
1018
1019 elif isinstance(dtype, ExtensionDtype):
1020 cls = dtype.construct_array_type()
1021 # Note: for RangeIndex and CategoricalDtype self vs self._values
1022 # behaves differently here.
1023 new_values = cls._from_sequence(self, dtype=dtype, copy=copy)
1024
1025 else:
1026 # GH#13149 specifically use astype_array instead of astype
1027 new_values = astype_array(values, dtype=dtype, copy=copy)
1028
1029 # pass copy=False because any copying will be done in the astype above
1030 result = Index(new_values, name=self.name, dtype=new_values.dtype, copy=False)
1031 if (
1032 not copy
1033 and self._references is not None
1034 and astype_is_view(self.dtype, dtype)
1035 ):
1036 result._references = self._references
1037 result._references.add_index_reference(result)
1038 return result
1039
1040 _index_shared_docs[
1041 "take"
1042 ] = """
1043 Return a new %(klass)s of the values selected by the indices.
1044
1045 For internal compatibility with numpy arrays.
1046
1047 Parameters
1048 ----------
1049 indices : array-like
1050 Indices to be taken.
1051 axis : int, optional
1052 The axis over which to select values, always 0.
1053 allow_fill : bool, default True
1054 fill_value : scalar, default None
1055 If allow_fill=True and fill_value is not None, indices specified by
1056 -1 are regarded as NA. If Index doesn't hold NA, raise ValueError.
1057
1058 Returns
1059 -------
1060 Index
1061 An index formed of elements at the given indices. Will be the same
1062 type as self, except for RangeIndex.
1063
1064 See Also
1065 --------
1066 numpy.ndarray.take: Return an array formed from the
1067 elements of a at the given indices.
1068 """
1069
1070 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
1071 def take(
1072 self,
1073 indices,
1074 axis: Axis = 0,
1075 allow_fill: bool = True,
1076 fill_value=None,
1077 **kwargs,
1078 ):
1079 if kwargs:
1080 nv.validate_take((), kwargs)
1081 if is_scalar(indices):
1082 raise TypeError("Expected indices to be array-like")
1083 indices = ensure_platform_int(indices)
1084 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices)
1085
1086 # Note: we discard fill_value and use self._na_value, only relevant
1087 # in the case where allow_fill is True and fill_value is not None
1088 values = self._values
1089 if isinstance(values, np.ndarray):
1090 taken = algos.take(
1091 values, indices, allow_fill=allow_fill, fill_value=self._na_value
1092 )
1093 else:
1094 # algos.take passes 'axis' keyword which not all EAs accept
1095 taken = values.take(
1096 indices, allow_fill=allow_fill, fill_value=self._na_value
1097 )
1098 # _constructor so RangeIndex-> Index with an int64 dtype
1099 return self._constructor._simple_new(taken, name=self.name)
1100
1101 @final
1102 def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool:
1103 """
1104 We only use pandas-style take when allow_fill is True _and_
1105 fill_value is not None.
1106 """
1107 if allow_fill and fill_value is not None:
1108 # only fill if we are passing a non-None fill_value
1109 if self._can_hold_na:
1110 if (indices < -1).any():
1111 raise ValueError(
1112 "When allow_fill=True and fill_value is not None, "
1113 "all indices must be >= -1"
1114 )
1115 else:
1116 cls_name = type(self).__name__
1117 raise ValueError(
1118 f"Unable to fill values because {cls_name} cannot contain NA"
1119 )
1120 else:
1121 allow_fill = False
1122 return allow_fill
1123
1124 _index_shared_docs[
1125 "repeat"
1126 ] = """
1127 Repeat elements of a %(klass)s.
1128
1129 Returns a new %(klass)s where each element of the current %(klass)s
1130 is repeated consecutively a given number of times.
1131
1132 Parameters
1133 ----------
1134 repeats : int or array of ints
1135 The number of repetitions for each element. This should be a
1136 non-negative integer. Repeating 0 times will return an empty
1137 %(klass)s.
1138 axis : None
1139 Must be ``None``. Has no effect but is accepted for compatibility
1140 with numpy.
1141
1142 Returns
1143 -------
1144 %(klass)s
1145 Newly created %(klass)s with repeated elements.
1146
1147 See Also
1148 --------
1149 Series.repeat : Equivalent function for Series.
1150 numpy.repeat : Similar method for :class:`numpy.ndarray`.
1151
1152 Examples
1153 --------
1154 >>> idx = pd.Index(['a', 'b', 'c'])
1155 >>> idx
1156 Index(['a', 'b', 'c'], dtype='object')
1157 >>> idx.repeat(2)
1158 Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')
1159 >>> idx.repeat([1, 2, 3])
1160 Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')
1161 """
1162
1163 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)
1164 def repeat(self, repeats, axis=None):
1165 repeats = ensure_platform_int(repeats)
1166 nv.validate_repeat((), {"axis": axis})
1167 res_values = self._values.repeat(repeats)
1168
1169 # _constructor so RangeIndex-> Index with an int64 dtype
1170 return self._constructor._simple_new(res_values, name=self.name)
1171
1172 # --------------------------------------------------------------------
1173 # Copying Methods
1174
1175 def copy(
1176 self: _IndexT,
1177 name: Hashable | None = None,
1178 deep: bool = False,
1179 ) -> _IndexT:
1180 """
1181 Make a copy of this object.
1182
1183 Name is set on the new object.
1184
1185 Parameters
1186 ----------
1187 name : Label, optional
1188 Set name for new object.
1189 deep : bool, default False
1190
1191 Returns
1192 -------
1193 Index
1194 Index refer to new object which is a copy of this object.
1195
1196 Notes
1197 -----
1198 In most cases, there should be no functional difference from using
1199 ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
1200 """
1201
1202 name = self._validate_names(name=name, deep=deep)[0]
1203 if deep:
1204 new_data = self._data.copy()
1205 new_index = type(self)._simple_new(new_data, name=name)
1206 else:
1207 new_index = self._rename(name=name)
1208 return new_index
1209
1210 @final
1211 def __copy__(self: _IndexT, **kwargs) -> _IndexT:
1212 return self.copy(**kwargs)
1213
1214 @final
1215 def __deepcopy__(self: _IndexT, memo=None) -> _IndexT:
1216 """
1217 Parameters
1218 ----------
1219 memo, default None
1220 Standard signature. Unused
1221 """
1222 return self.copy(deep=True)
1223
1224 # --------------------------------------------------------------------
1225 # Rendering Methods
1226
1227 @final
1228 def __repr__(self) -> str_t:
1229 """
1230 Return a string representation for this object.
1231 """
1232 klass_name = type(self).__name__
1233 data = self._format_data()
1234 attrs = self._format_attrs()
1235 space = self._format_space()
1236 attrs_str = [f"{k}={v}" for k, v in attrs]
1237 prepr = f",{space}".join(attrs_str)
1238
1239 # no data provided, just attributes
1240 if data is None:
1241 data = ""
1242
1243 return f"{klass_name}({data}{prepr})"
1244
1245 def _format_space(self) -> str_t:
1246 # using space here controls if the attributes
1247 # are line separated or not (the default)
1248
1249 # max_seq_items = get_option('display.max_seq_items')
1250 # if len(self) > max_seq_items:
1251 # space = "\n%s" % (' ' * (len(klass) + 1))
1252 return " "
1253
1254 @property
1255 def _formatter_func(self):
1256 """
1257 Return the formatter function.
1258 """
1259 return default_pprint
1260
1261 def _format_data(self, name=None) -> str_t:
1262 """
1263 Return the formatted data as a unicode string.
1264 """
1265 # do we want to justify (only do so for non-objects)
1266 is_justify = True
1267
1268 if self.inferred_type == "string":
1269 is_justify = False
1270 elif self.inferred_type == "categorical":
1271 self = cast("CategoricalIndex", self)
1272 if is_object_dtype(self.categories):
1273 is_justify = False
1274
1275 return format_object_summary(
1276 self,
1277 self._formatter_func,
1278 is_justify=is_justify,
1279 name=name,
1280 line_break_each_value=self._is_multi,
1281 )
1282
1283 def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]:
1284 """
1285 Return a list of tuples of the (attr,formatted_value).
1286 """
1287 attrs: list[tuple[str_t, str_t | int | bool | None]] = []
1288
1289 if not self._is_multi:
1290 attrs.append(("dtype", f"'{self.dtype}'"))
1291
1292 if self.name is not None:
1293 attrs.append(("name", default_pprint(self.name)))
1294 elif self._is_multi and any(x is not None for x in self.names):
1295 attrs.append(("names", default_pprint(self.names)))
1296
1297 max_seq_items = get_option("display.max_seq_items") or len(self)
1298 if len(self) > max_seq_items:
1299 attrs.append(("length", len(self)))
1300 return attrs
1301
1302 @final
1303 def _get_level_names(self) -> Hashable | Sequence[Hashable]:
1304 """
1305 Return a name or list of names with None replaced by the level number.
1306 """
1307 if self._is_multi:
1308 return [
1309 level if name is None else name for level, name in enumerate(self.names)
1310 ]
1311 else:
1312 return 0 if self.name is None else self.name
1313
1314 @final
1315 def _mpl_repr(self) -> np.ndarray:
1316 # how to represent ourselves to matplotlib
1317 if isinstance(self.dtype, np.dtype) and self.dtype.kind != "M":
1318 return cast(np.ndarray, self.values)
1319 return self.astype(object, copy=False)._values
1320
1321 def format(
1322 self,
1323 name: bool = False,
1324 formatter: Callable | None = None,
1325 na_rep: str_t = "NaN",
1326 ) -> list[str_t]:
1327 """
1328 Render a string representation of the Index.
1329 """
1330 header = []
1331 if name:
1332 header.append(
1333 pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))
1334 if self.name is not None
1335 else ""
1336 )
1337
1338 if formatter is not None:
1339 return header + list(self.map(formatter))
1340
1341 return self._format_with_header(header, na_rep=na_rep)
1342
1343 def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t]:
1344 from pandas.io.formats.format import format_array
1345
1346 values = self._values
1347
1348 if is_object_dtype(values.dtype):
1349 values = cast(np.ndarray, values)
1350 values = lib.maybe_convert_objects(values, safe=True)
1351
1352 result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]
1353
1354 # could have nans
1355 mask = is_float_nan(values)
1356 if mask.any():
1357 result_arr = np.array(result)
1358 result_arr[mask] = na_rep
1359 result = result_arr.tolist()
1360 else:
1361 result = trim_front(format_array(values, None, justify="left"))
1362 return header + result
1363
1364 def _format_native_types(
1365 self,
1366 *,
1367 na_rep: str_t = "",
1368 decimal: str_t = ".",
1369 float_format=None,
1370 date_format=None,
1371 quoting=None,
1372 ) -> npt.NDArray[np.object_]:
1373 """
1374 Actually format specific types of the index.
1375 """
1376 from pandas.io.formats.format import FloatArrayFormatter
1377
1378 if is_float_dtype(self.dtype) and not is_extension_array_dtype(self.dtype):
1379 formatter = FloatArrayFormatter(
1380 self._values,
1381 na_rep=na_rep,
1382 float_format=float_format,
1383 decimal=decimal,
1384 quoting=quoting,
1385 fixed_width=False,
1386 )
1387 return formatter.get_result_as_array()
1388
1389 mask = isna(self)
1390 if not is_object_dtype(self) and not quoting:
1391 values = np.asarray(self).astype(str)
1392 else:
1393 values = np.array(self, dtype=object, copy=True)
1394
1395 values[mask] = na_rep
1396 return values
1397
1398 def _summary(self, name=None) -> str_t:
1399 """
1400 Return a summarized representation.
1401
1402 Parameters
1403 ----------
1404 name : str
1405 name to use in the summary representation
1406
1407 Returns
1408 -------
1409 String with a summarized representation of the index
1410 """
1411 if len(self) > 0:
1412 head = self[0]
1413 if hasattr(head, "format") and not isinstance(head, str):
1414 head = head.format()
1415 elif needs_i8_conversion(self.dtype):
1416 # e.g. Timedelta, display as values, not quoted
1417 head = self._formatter_func(head).replace("'", "")
1418 tail = self[-1]
1419 if hasattr(tail, "format") and not isinstance(tail, str):
1420 tail = tail.format()
1421 elif needs_i8_conversion(self.dtype):
1422 # e.g. Timedelta, display as values, not quoted
1423 tail = self._formatter_func(tail).replace("'", "")
1424
1425 index_summary = f", {head} to {tail}"
1426 else:
1427 index_summary = ""
1428
1429 if name is None:
1430 name = type(self).__name__
1431 return f"{name}: {len(self)} entries{index_summary}"
1432
1433 # --------------------------------------------------------------------
1434 # Conversion Methods
1435
1436 def to_flat_index(self: _IndexT) -> _IndexT:
1437 """
1438 Identity method.
1439
1440 This is implemented for compatibility with subclass implementations
1441 when chaining.
1442
1443 Returns
1444 -------
1445 pd.Index
1446 Caller.
1447
1448 See Also
1449 --------
1450 MultiIndex.to_flat_index : Subclass implementation.
1451 """
1452 return self
1453
1454 @final
1455 def to_series(self, index=None, name: Hashable = None) -> Series:
1456 """
1457 Create a Series with both index and values equal to the index keys.
1458
1459 Useful with map for returning an indexer based on an index.
1460
1461 Parameters
1462 ----------
1463 index : Index, optional
1464 Index of resulting Series. If None, defaults to original index.
1465 name : str, optional
1466 Name of resulting Series. If None, defaults to name of original
1467 index.
1468
1469 Returns
1470 -------
1471 Series
1472 The dtype will be based on the type of the Index values.
1473
1474 See Also
1475 --------
1476 Index.to_frame : Convert an Index to a DataFrame.
1477 Series.to_frame : Convert Series to DataFrame.
1478
1479 Examples
1480 --------
1481 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')
1482
1483 By default, the original Index and original name is reused.
1484
1485 >>> idx.to_series()
1486 animal
1487 Ant Ant
1488 Bear Bear
1489 Cow Cow
1490 Name: animal, dtype: object
1491
1492 To enforce a new Index, specify new labels to ``index``:
1493
1494 >>> idx.to_series(index=[0, 1, 2])
1495 0 Ant
1496 1 Bear
1497 2 Cow
1498 Name: animal, dtype: object
1499
1500 To override the name of the resulting column, specify `name`:
1501
1502 >>> idx.to_series(name='zoo')
1503 animal
1504 Ant Ant
1505 Bear Bear
1506 Cow Cow
1507 Name: zoo, dtype: object
1508 """
1509 from pandas import Series
1510
1511 if index is None:
1512 index = self._view()
1513 if name is None:
1514 name = self.name
1515
1516 return Series(self._values.copy(), index=index, name=name)
1517
1518 def to_frame(
1519 self, index: bool = True, name: Hashable = lib.no_default
1520 ) -> DataFrame:
1521 """
1522 Create a DataFrame with a column containing the Index.
1523
1524 Parameters
1525 ----------
1526 index : bool, default True
1527 Set the index of the returned DataFrame as the original Index.
1528
1529 name : object, defaults to index.name
1530 The passed name should substitute for the index name (if it has
1531 one).
1532
1533 Returns
1534 -------
1535 DataFrame
1536 DataFrame containing the original Index data.
1537
1538 See Also
1539 --------
1540 Index.to_series : Convert an Index to a Series.
1541 Series.to_frame : Convert Series to DataFrame.
1542
1543 Examples
1544 --------
1545 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')
1546 >>> idx.to_frame()
1547 animal
1548 animal
1549 Ant Ant
1550 Bear Bear
1551 Cow Cow
1552
1553 By default, the original Index is reused. To enforce a new Index:
1554
1555 >>> idx.to_frame(index=False)
1556 animal
1557 0 Ant
1558 1 Bear
1559 2 Cow
1560
1561 To override the name of the resulting column, specify `name`:
1562
1563 >>> idx.to_frame(index=False, name='zoo')
1564 zoo
1565 0 Ant
1566 1 Bear
1567 2 Cow
1568 """
1569 from pandas import DataFrame
1570
1571 if name is lib.no_default:
1572 name = self._get_level_names()
1573 result = DataFrame({name: self._values.copy()})
1574
1575 if index:
1576 result.index = self
1577 return result
1578
1579 # --------------------------------------------------------------------
1580 # Name-Centric Methods
1581
1582 @property
1583 def name(self) -> Hashable:
1584 """
1585 Return Index or MultiIndex name.
1586 """
1587 return self._name
1588
1589 @name.setter
1590 def name(self, value: Hashable) -> None:
1591 if self._no_setting_name:
1592 # Used in MultiIndex.levels to avoid silently ignoring name updates.
1593 raise RuntimeError(
1594 "Cannot set name on a level of a MultiIndex. Use "
1595 "'MultiIndex.set_names' instead."
1596 )
1597 maybe_extract_name(value, None, type(self))
1598 self._name = value
1599
1600 @final
1601 def _validate_names(
1602 self, name=None, names=None, deep: bool = False
1603 ) -> list[Hashable]:
1604 """
1605 Handles the quirks of having a singular 'name' parameter for general
1606 Index and plural 'names' parameter for MultiIndex.
1607 """
1608 from copy import deepcopy
1609
1610 if names is not None and name is not None:
1611 raise TypeError("Can only provide one of `names` and `name`")
1612 if names is None and name is None:
1613 new_names = deepcopy(self.names) if deep else self.names
1614 elif names is not None:
1615 if not is_list_like(names):
1616 raise TypeError("Must pass list-like as `names`.")
1617 new_names = names
1618 elif not is_list_like(name):
1619 new_names = [name]
1620 else:
1621 new_names = name
1622
1623 if len(new_names) != len(self.names):
1624 raise ValueError(
1625 f"Length of new names must be {len(self.names)}, got {len(new_names)}"
1626 )
1627
1628 # All items in 'new_names' need to be hashable
1629 validate_all_hashable(*new_names, error_name=f"{type(self).__name__}.name")
1630
1631 return new_names
1632
1633 def _get_default_index_names(
1634 self, names: Hashable | Sequence[Hashable] | None = None, default=None
1635 ) -> list[Hashable]:
1636 """
1637 Get names of index.
1638
1639 Parameters
1640 ----------
1641 names : int, str or 1-dimensional list, default None
1642 Index names to set.
1643 default : str
1644 Default name of index.
1645
1646 Raises
1647 ------
1648 TypeError
1649 if names not str or list-like
1650 """
1651 from pandas.core.indexes.multi import MultiIndex
1652
1653 if names is not None:
1654 if isinstance(names, (int, str)):
1655 names = [names]
1656
1657 if not isinstance(names, list) and names is not None:
1658 raise ValueError("Index names must be str or 1-dimensional list")
1659
1660 if not names:
1661 if isinstance(self, MultiIndex):
1662 names = com.fill_missing_names(self.names)
1663 else:
1664 names = [default] if self.name is None else [self.name]
1665
1666 return names
1667
1668 def _get_names(self) -> FrozenList:
1669 return FrozenList((self.name,))
1670
1671 def _set_names(self, values, *, level=None) -> None:
1672 """
1673 Set new names on index. Each name has to be a hashable type.
1674
1675 Parameters
1676 ----------
1677 values : str or sequence
1678 name(s) to set
1679 level : int, level name, or sequence of int/level names (default None)
1680 If the index is a MultiIndex (hierarchical), level(s) to set (None
1681 for all levels). Otherwise level must be None
1682
1683 Raises
1684 ------
1685 TypeError if each name is not hashable.
1686 """
1687 if not is_list_like(values):
1688 raise ValueError("Names must be a list-like")
1689 if len(values) != 1:
1690 raise ValueError(f"Length of new names must be 1, got {len(values)}")
1691
1692 # GH 20527
1693 # All items in 'name' need to be hashable:
1694 validate_all_hashable(*values, error_name=f"{type(self).__name__}.name")
1695
1696 self._name = values[0]
1697
1698 names = property(fset=_set_names, fget=_get_names)
1699
1700 @overload
1701 def set_names(
1702 self: _IndexT, names, *, level=..., inplace: Literal[False] = ...
1703 ) -> _IndexT:
1704 ...
1705
1706 @overload
1707 def set_names(self, names, *, level=..., inplace: Literal[True]) -> None:
1708 ...
1709
1710 @overload
1711 def set_names(
1712 self: _IndexT, names, *, level=..., inplace: bool = ...
1713 ) -> _IndexT | None:
1714 ...
1715
1716 def set_names(
1717 self: _IndexT, names, *, level=None, inplace: bool = False
1718 ) -> _IndexT | None:
1719 """
1720 Set Index or MultiIndex name.
1721
1722 Able to set new names partially and by level.
1723
1724 Parameters
1725 ----------
1726
1727 names : label or list of label or dict-like for MultiIndex
1728 Name(s) to set.
1729
1730 .. versionchanged:: 1.3.0
1731
1732 level : int, label or list of int or label, optional
1733 If the index is a MultiIndex and names is not dict-like, level(s) to set
1734 (None for all levels). Otherwise level must be None.
1735
1736 .. versionchanged:: 1.3.0
1737
1738 inplace : bool, default False
1739 Modifies the object directly, instead of creating a new Index or
1740 MultiIndex.
1741
1742 Returns
1743 -------
1744 Index or None
1745 The same type as the caller or None if ``inplace=True``.
1746
1747 See Also
1748 --------
1749 Index.rename : Able to set new names without level.
1750
1751 Examples
1752 --------
1753 >>> idx = pd.Index([1, 2, 3, 4])
1754 >>> idx
1755 Index([1, 2, 3, 4], dtype='int64')
1756 >>> idx.set_names('quarter')
1757 Index([1, 2, 3, 4], dtype='int64', name='quarter')
1758
1759 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
1760 ... [2018, 2019]])
1761 >>> idx
1762 MultiIndex([('python', 2018),
1763 ('python', 2019),
1764 ( 'cobra', 2018),
1765 ( 'cobra', 2019)],
1766 )
1767 >>> idx = idx.set_names(['kind', 'year'])
1768 >>> idx.set_names('species', level=0)
1769 MultiIndex([('python', 2018),
1770 ('python', 2019),
1771 ( 'cobra', 2018),
1772 ( 'cobra', 2019)],
1773 names=['species', 'year'])
1774
1775 When renaming levels with a dict, levels can not be passed.
1776
1777 >>> idx.set_names({'kind': 'snake'})
1778 MultiIndex([('python', 2018),
1779 ('python', 2019),
1780 ( 'cobra', 2018),
1781 ( 'cobra', 2019)],
1782 names=['snake', 'year'])
1783 """
1784 if level is not None and not isinstance(self, ABCMultiIndex):
1785 raise ValueError("Level must be None for non-MultiIndex")
1786
1787 if level is not None and not is_list_like(level) and is_list_like(names):
1788 raise TypeError("Names must be a string when a single level is provided.")
1789
1790 if not is_list_like(names) and level is None and self.nlevels > 1:
1791 raise TypeError("Must pass list-like as `names`.")
1792
1793 if is_dict_like(names) and not isinstance(self, ABCMultiIndex):
1794 raise TypeError("Can only pass dict-like as `names` for MultiIndex.")
1795
1796 if is_dict_like(names) and level is not None:
1797 raise TypeError("Can not pass level for dictlike `names`.")
1798
1799 if isinstance(self, ABCMultiIndex) and is_dict_like(names) and level is None:
1800 # Transform dict to list of new names and corresponding levels
1801 level, names_adjusted = [], []
1802 for i, name in enumerate(self.names):
1803 if name in names.keys():
1804 level.append(i)
1805 names_adjusted.append(names[name])
1806 names = names_adjusted
1807
1808 if not is_list_like(names):
1809 names = [names]
1810 if level is not None and not is_list_like(level):
1811 level = [level]
1812
1813 if inplace:
1814 idx = self
1815 else:
1816 idx = self._view()
1817
1818 idx._set_names(names, level=level)
1819 if not inplace:
1820 return idx
1821 return None
1822
1823 def rename(self, name, inplace: bool = False):
1824 """
1825 Alter Index or MultiIndex name.
1826
1827 Able to set new names without level. Defaults to returning new index.
1828 Length of names must match number of levels in MultiIndex.
1829
1830 Parameters
1831 ----------
1832 name : label or list of labels
1833 Name(s) to set.
1834 inplace : bool, default False
1835 Modifies the object directly, instead of creating a new Index or
1836 MultiIndex.
1837
1838 Returns
1839 -------
1840 Index or None
1841 The same type as the caller or None if ``inplace=True``.
1842
1843 See Also
1844 --------
1845 Index.set_names : Able to set new names partially and by level.
1846
1847 Examples
1848 --------
1849 >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score')
1850 >>> idx.rename('grade')
1851 Index(['A', 'C', 'A', 'B'], dtype='object', name='grade')
1852
1853 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],
1854 ... [2018, 2019]],
1855 ... names=['kind', 'year'])
1856 >>> idx
1857 MultiIndex([('python', 2018),
1858 ('python', 2019),
1859 ( 'cobra', 2018),
1860 ( 'cobra', 2019)],
1861 names=['kind', 'year'])
1862 >>> idx.rename(['species', 'year'])
1863 MultiIndex([('python', 2018),
1864 ('python', 2019),
1865 ( 'cobra', 2018),
1866 ( 'cobra', 2019)],
1867 names=['species', 'year'])
1868 >>> idx.rename('species')
1869 Traceback (most recent call last):
1870 TypeError: Must pass list-like as `names`.
1871 """
1872 return self.set_names([name], inplace=inplace)
1873
1874 # --------------------------------------------------------------------
1875 # Level-Centric Methods
1876
1877 @property
1878 def nlevels(self) -> int:
1879 """
1880 Number of levels.
1881 """
1882 return 1
1883
1884 def _sort_levels_monotonic(self: _IndexT) -> _IndexT:
1885 """
1886 Compat with MultiIndex.
1887 """
1888 return self
1889
1890 @final
1891 def _validate_index_level(self, level) -> None:
1892 """
1893 Validate index level.
1894
1895 For single-level Index getting level number is a no-op, but some
1896 verification must be done like in MultiIndex.
1897
1898 """
1899 if isinstance(level, int):
1900 if level < 0 and level != -1:
1901 raise IndexError(
1902 "Too many levels: Index has only 1 level, "
1903 f"{level} is not a valid level number"
1904 )
1905 if level > 0:
1906 raise IndexError(
1907 f"Too many levels: Index has only 1 level, not {level + 1}"
1908 )
1909 elif level != self.name:
1910 raise KeyError(
1911 f"Requested level ({level}) does not match index name ({self.name})"
1912 )
1913
1914 def _get_level_number(self, level) -> int:
1915 self._validate_index_level(level)
1916 return 0
1917
1918 def sortlevel(
1919 self, level=None, ascending: bool | list[bool] = True, sort_remaining=None
1920 ):
1921 """
1922 For internal compatibility with the Index API.
1923
1924 Sort the Index. This is for compat with MultiIndex
1925
1926 Parameters
1927 ----------
1928 ascending : bool, default True
1929 False to sort in descending order
1930
1931 level, sort_remaining are compat parameters
1932
1933 Returns
1934 -------
1935 Index
1936 """
1937 if not isinstance(ascending, (list, bool)):
1938 raise TypeError(
1939 "ascending must be a single bool value or"
1940 "a list of bool values of length 1"
1941 )
1942
1943 if isinstance(ascending, list):
1944 if len(ascending) != 1:
1945 raise TypeError("ascending must be a list of bool values of length 1")
1946 ascending = ascending[0]
1947
1948 if not isinstance(ascending, bool):
1949 raise TypeError("ascending must be a bool value")
1950
1951 return self.sort_values(return_indexer=True, ascending=ascending)
1952
1953 def _get_level_values(self, level) -> Index:
1954 """
1955 Return an Index of values for requested level.
1956
1957 This is primarily useful to get an individual level of values from a
1958 MultiIndex, but is provided on Index as well for compatibility.
1959
1960 Parameters
1961 ----------
1962 level : int or str
1963 It is either the integer position or the name of the level.
1964
1965 Returns
1966 -------
1967 Index
1968 Calling object, as there is only one level in the Index.
1969
1970 See Also
1971 --------
1972 MultiIndex.get_level_values : Get values for a level of a MultiIndex.
1973
1974 Notes
1975 -----
1976 For Index, level should be 0, since there are no multiple levels.
1977
1978 Examples
1979 --------
1980 >>> idx = pd.Index(list('abc'))
1981 >>> idx
1982 Index(['a', 'b', 'c'], dtype='object')
1983
1984 Get level values by supplying `level` as integer:
1985
1986 >>> idx.get_level_values(0)
1987 Index(['a', 'b', 'c'], dtype='object')
1988 """
1989 self._validate_index_level(level)
1990 return self
1991
1992 get_level_values = _get_level_values
1993
1994 @final
1995 def droplevel(self, level: IndexLabel = 0):
1996 """
1997 Return index with requested level(s) removed.
1998
1999 If resulting index has only 1 level left, the result will be
2000 of Index type, not MultiIndex. The original index is not modified inplace.
2001
2002 Parameters
2003 ----------
2004 level : int, str, or list-like, default 0
2005 If a string is given, must be the name of a level
2006 If list-like, elements must be names or indexes of levels.
2007
2008 Returns
2009 -------
2010 Index or MultiIndex
2011
2012 Examples
2013 --------
2014 >>> mi = pd.MultiIndex.from_arrays(
2015 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'])
2016 >>> mi
2017 MultiIndex([(1, 3, 5),
2018 (2, 4, 6)],
2019 names=['x', 'y', 'z'])
2020
2021 >>> mi.droplevel()
2022 MultiIndex([(3, 5),
2023 (4, 6)],
2024 names=['y', 'z'])
2025
2026 >>> mi.droplevel(2)
2027 MultiIndex([(1, 3),
2028 (2, 4)],
2029 names=['x', 'y'])
2030
2031 >>> mi.droplevel('z')
2032 MultiIndex([(1, 3),
2033 (2, 4)],
2034 names=['x', 'y'])
2035
2036 >>> mi.droplevel(['x', 'y'])
2037 Index([5, 6], dtype='int64', name='z')
2038 """
2039 if not isinstance(level, (tuple, list)):
2040 level = [level]
2041
2042 levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]
2043
2044 return self._drop_level_numbers(levnums)
2045
2046 @final
2047 def _drop_level_numbers(self, levnums: list[int]):
2048 """
2049 Drop MultiIndex levels by level _number_, not name.
2050 """
2051
2052 if not levnums and not isinstance(self, ABCMultiIndex):
2053 return self
2054 if len(levnums) >= self.nlevels:
2055 raise ValueError(
2056 f"Cannot remove {len(levnums)} levels from an index with "
2057 f"{self.nlevels} levels: at least one level must be left."
2058 )
2059 # The two checks above guarantee that here self is a MultiIndex
2060 self = cast("MultiIndex", self)
2061
2062 new_levels = list(self.levels)
2063 new_codes = list(self.codes)
2064 new_names = list(self.names)
2065
2066 for i in levnums:
2067 new_levels.pop(i)
2068 new_codes.pop(i)
2069 new_names.pop(i)
2070
2071 if len(new_levels) == 1:
2072 lev = new_levels[0]
2073
2074 if len(lev) == 0:
2075 # If lev is empty, lev.take will fail GH#42055
2076 if len(new_codes[0]) == 0:
2077 # GH#45230 preserve RangeIndex here
2078 # see test_reset_index_empty_rangeindex
2079 result = lev[:0]
2080 else:
2081 res_values = algos.take(lev._values, new_codes[0], allow_fill=True)
2082 # _constructor instead of type(lev) for RangeIndex compat GH#35230
2083 result = lev._constructor._simple_new(res_values, name=new_names[0])
2084 else:
2085 # set nan if needed
2086 mask = new_codes[0] == -1
2087 result = new_levels[0].take(new_codes[0])
2088 if mask.any():
2089 result = result.putmask(mask, np.nan)
2090
2091 result._name = new_names[0]
2092
2093 return result
2094 else:
2095 from pandas.core.indexes.multi import MultiIndex
2096
2097 return MultiIndex(
2098 levels=new_levels,
2099 codes=new_codes,
2100 names=new_names,
2101 verify_integrity=False,
2102 )
2103
2104 # --------------------------------------------------------------------
2105 # Introspection Methods
2106
2107 @cache_readonly
2108 @final
2109 def _can_hold_na(self) -> bool:
2110 if isinstance(self.dtype, ExtensionDtype):
2111 if isinstance(self.dtype, IntervalDtype):
2112 # FIXME(GH#45720): this is inaccurate for integer-backed
2113 # IntervalArray, but without it other.categories.take raises
2114 # in IntervalArray._cmp_method
2115 return True
2116 return self.dtype._can_hold_na
2117 if self.dtype.kind in ["i", "u", "b"]:
2118 return False
2119 return True
2120
2121 @property
2122 def is_monotonic_increasing(self) -> bool:
2123 """
2124 Return a boolean if the values are equal or increasing.
2125
2126 Returns
2127 -------
2128 bool
2129
2130 See Also
2131 --------
2132 Index.is_monotonic_decreasing : Check if the values are equal or decreasing.
2133
2134 Examples
2135 --------
2136 >>> pd.Index([1, 2, 3]).is_monotonic_increasing
2137 True
2138 >>> pd.Index([1, 2, 2]).is_monotonic_increasing
2139 True
2140 >>> pd.Index([1, 3, 2]).is_monotonic_increasing
2141 False
2142 """
2143 return self._engine.is_monotonic_increasing
2144
2145 @property
2146 def is_monotonic_decreasing(self) -> bool:
2147 """
2148 Return a boolean if the values are equal or decreasing.
2149
2150 Returns
2151 -------
2152 bool
2153
2154 See Also
2155 --------
2156 Index.is_monotonic_increasing : Check if the values are equal or increasing.
2157
2158 Examples
2159 --------
2160 >>> pd.Index([3, 2, 1]).is_monotonic_decreasing
2161 True
2162 >>> pd.Index([3, 2, 2]).is_monotonic_decreasing
2163 True
2164 >>> pd.Index([3, 1, 2]).is_monotonic_decreasing
2165 False
2166 """
2167 return self._engine.is_monotonic_decreasing
2168
2169 @final
2170 @property
2171 def _is_strictly_monotonic_increasing(self) -> bool:
2172 """
2173 Return if the index is strictly monotonic increasing
2174 (only increasing) values.
2175
2176 Examples
2177 --------
2178 >>> Index([1, 2, 3])._is_strictly_monotonic_increasing
2179 True
2180 >>> Index([1, 2, 2])._is_strictly_monotonic_increasing
2181 False
2182 >>> Index([1, 3, 2])._is_strictly_monotonic_increasing
2183 False
2184 """
2185 return self.is_unique and self.is_monotonic_increasing
2186
2187 @final
2188 @property
2189 def _is_strictly_monotonic_decreasing(self) -> bool:
2190 """
2191 Return if the index is strictly monotonic decreasing
2192 (only decreasing) values.
2193
2194 Examples
2195 --------
2196 >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing
2197 True
2198 >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing
2199 False
2200 >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing
2201 False
2202 """
2203 return self.is_unique and self.is_monotonic_decreasing
2204
2205 @cache_readonly
2206 def is_unique(self) -> bool:
2207 """
2208 Return if the index has unique values.
2209
2210 Returns
2211 -------
2212 bool
2213
2214 See Also
2215 --------
2216 Index.has_duplicates : Inverse method that checks if it has duplicate values.
2217
2218 Examples
2219 --------
2220 >>> idx = pd.Index([1, 5, 7, 7])
2221 >>> idx.is_unique
2222 False
2223
2224 >>> idx = pd.Index([1, 5, 7])
2225 >>> idx.is_unique
2226 True
2227
2228 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
2229 ... "Watermelon"]).astype("category")
2230 >>> idx.is_unique
2231 False
2232
2233 >>> idx = pd.Index(["Orange", "Apple",
2234 ... "Watermelon"]).astype("category")
2235 >>> idx.is_unique
2236 True
2237 """
2238 return self._engine.is_unique
2239
2240 @final
2241 @property
2242 def has_duplicates(self) -> bool:
2243 """
2244 Check if the Index has duplicate values.
2245
2246 Returns
2247 -------
2248 bool
2249 Whether or not the Index has duplicate values.
2250
2251 See Also
2252 --------
2253 Index.is_unique : Inverse method that checks if it has unique values.
2254
2255 Examples
2256 --------
2257 >>> idx = pd.Index([1, 5, 7, 7])
2258 >>> idx.has_duplicates
2259 True
2260
2261 >>> idx = pd.Index([1, 5, 7])
2262 >>> idx.has_duplicates
2263 False
2264
2265 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
2266 ... "Watermelon"]).astype("category")
2267 >>> idx.has_duplicates
2268 True
2269
2270 >>> idx = pd.Index(["Orange", "Apple",
2271 ... "Watermelon"]).astype("category")
2272 >>> idx.has_duplicates
2273 False
2274 """
2275 return not self.is_unique
2276
2277 @final
2278 def is_boolean(self) -> bool:
2279 """
2280 Check if the Index only consists of booleans.
2281
2282 .. deprecated:: 2.0.0
2283 Use `pandas.api.types.is_bool_dtype` instead.
2284
2285 Returns
2286 -------
2287 bool
2288 Whether or not the Index only consists of booleans.
2289
2290 See Also
2291 --------
2292 is_integer : Check if the Index only consists of integers (deprecated).
2293 is_floating : Check if the Index is a floating type (deprecated).
2294 is_numeric : Check if the Index only consists of numeric data (deprecated).
2295 is_object : Check if the Index is of the object dtype (deprecated).
2296 is_categorical : Check if the Index holds categorical data.
2297 is_interval : Check if the Index holds Interval objects (deprecated).
2298
2299 Examples
2300 --------
2301 >>> idx = pd.Index([True, False, True])
2302 >>> idx.is_boolean() # doctest: +SKIP
2303 True
2304
2305 >>> idx = pd.Index(["True", "False", "True"])
2306 >>> idx.is_boolean() # doctest: +SKIP
2307 False
2308
2309 >>> idx = pd.Index([True, False, "True"])
2310 >>> idx.is_boolean() # doctest: +SKIP
2311 False
2312 """
2313 warnings.warn(
2314 f"{type(self).__name__}.is_boolean is deprecated. "
2315 "Use pandas.api.types.is_bool_type instead.",
2316 FutureWarning,
2317 stacklevel=find_stack_level(),
2318 )
2319 return self.inferred_type in ["boolean"]
2320
2321 @final
2322 def is_integer(self) -> bool:
2323 """
2324 Check if the Index only consists of integers.
2325
2326 .. deprecated:: 2.0.0
2327 Use `pandas.api.types.is_integer_dtype` instead.
2328
2329 Returns
2330 -------
2331 bool
2332 Whether or not the Index only consists of integers.
2333
2334 See Also
2335 --------
2336 is_boolean : Check if the Index only consists of booleans (deprecated).
2337 is_floating : Check if the Index is a floating type (deprecated).
2338 is_numeric : Check if the Index only consists of numeric data (deprecated).
2339 is_object : Check if the Index is of the object dtype. (deprecated).
2340 is_categorical : Check if the Index holds categorical data (deprecated).
2341 is_interval : Check if the Index holds Interval objects (deprecated).
2342
2343 Examples
2344 --------
2345 >>> idx = pd.Index([1, 2, 3, 4])
2346 >>> idx.is_integer() # doctest: +SKIP
2347 True
2348
2349 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
2350 >>> idx.is_integer() # doctest: +SKIP
2351 False
2352
2353 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"])
2354 >>> idx.is_integer() # doctest: +SKIP
2355 False
2356 """
2357 warnings.warn(
2358 f"{type(self).__name__}.is_integer is deprecated. "
2359 "Use pandas.api.types.is_integer_dtype instead.",
2360 FutureWarning,
2361 stacklevel=find_stack_level(),
2362 )
2363 return self.inferred_type in ["integer"]
2364
2365 @final
2366 def is_floating(self) -> bool:
2367 """
2368 Check if the Index is a floating type.
2369
2370 .. deprecated:: 2.0.0
2371 Use `pandas.api.types.is_float_dtype` instead
2372
2373 The Index may consist of only floats, NaNs, or a mix of floats,
2374 integers, or NaNs.
2375
2376 Returns
2377 -------
2378 bool
2379 Whether or not the Index only consists of only consists of floats, NaNs, or
2380 a mix of floats, integers, or NaNs.
2381
2382 See Also
2383 --------
2384 is_boolean : Check if the Index only consists of booleans (deprecated).
2385 is_integer : Check if the Index only consists of integers (deprecated).
2386 is_numeric : Check if the Index only consists of numeric data (deprecated).
2387 is_object : Check if the Index is of the object dtype. (deprecated).
2388 is_categorical : Check if the Index holds categorical data (deprecated).
2389 is_interval : Check if the Index holds Interval objects (deprecated).
2390
2391 Examples
2392 --------
2393 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
2394 >>> idx.is_floating() # doctest: +SKIP
2395 True
2396
2397 >>> idx = pd.Index([1.0, 2.0, np.nan, 4.0])
2398 >>> idx.is_floating() # doctest: +SKIP
2399 True
2400
2401 >>> idx = pd.Index([1, 2, 3, 4, np.nan])
2402 >>> idx.is_floating() # doctest: +SKIP
2403 True
2404
2405 >>> idx = pd.Index([1, 2, 3, 4])
2406 >>> idx.is_floating() # doctest: +SKIP
2407 False
2408 """
2409 warnings.warn(
2410 f"{type(self).__name__}.is_floating is deprecated. "
2411 "Use pandas.api.types.is_float_dtype instead.",
2412 FutureWarning,
2413 stacklevel=find_stack_level(),
2414 )
2415 return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"]
2416
2417 @final
2418 def is_numeric(self) -> bool:
2419 """
2420 Check if the Index only consists of numeric data.
2421
2422 .. deprecated:: 2.0.0
2423 Use `pandas.api.types.is_numeric_dtype` instead.
2424
2425 Returns
2426 -------
2427 bool
2428 Whether or not the Index only consists of numeric data.
2429
2430 See Also
2431 --------
2432 is_boolean : Check if the Index only consists of booleans (deprecated).
2433 is_integer : Check if the Index only consists of integers (deprecated).
2434 is_floating : Check if the Index is a floating type (deprecated).
2435 is_object : Check if the Index is of the object dtype. (deprecated).
2436 is_categorical : Check if the Index holds categorical data (deprecated).
2437 is_interval : Check if the Index holds Interval objects (deprecated).
2438
2439 Examples
2440 --------
2441 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
2442 >>> idx.is_numeric() # doctest: +SKIP
2443 True
2444
2445 >>> idx = pd.Index([1, 2, 3, 4.0])
2446 >>> idx.is_numeric() # doctest: +SKIP
2447 True
2448
2449 >>> idx = pd.Index([1, 2, 3, 4])
2450 >>> idx.is_numeric() # doctest: +SKIP
2451 True
2452
2453 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan])
2454 >>> idx.is_numeric() # doctest: +SKIP
2455 True
2456
2457 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan, "Apple"])
2458 >>> idx.is_numeric() # doctest: +SKIP
2459 False
2460 """
2461 warnings.warn(
2462 f"{type(self).__name__}.is_numeric is deprecated. "
2463 "Use pandas.api.types.is_any_real_numeric_dtype instead",
2464 FutureWarning,
2465 stacklevel=find_stack_level(),
2466 )
2467 return self.inferred_type in ["integer", "floating"]
2468
2469 @final
2470 def is_object(self) -> bool:
2471 """
2472 Check if the Index is of the object dtype.
2473
2474 .. deprecated:: 2.0.0
2475 Use `pandas.api.types.is_object_dtype` instead.
2476
2477 Returns
2478 -------
2479 bool
2480 Whether or not the Index is of the object dtype.
2481
2482 See Also
2483 --------
2484 is_boolean : Check if the Index only consists of booleans (deprecated).
2485 is_integer : Check if the Index only consists of integers (deprecated).
2486 is_floating : Check if the Index is a floating type (deprecated).
2487 is_numeric : Check if the Index only consists of numeric data (deprecated).
2488 is_categorical : Check if the Index holds categorical data (deprecated).
2489 is_interval : Check if the Index holds Interval objects (deprecated).
2490
2491 Examples
2492 --------
2493 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"])
2494 >>> idx.is_object() # doctest: +SKIP
2495 True
2496
2497 >>> idx = pd.Index(["Apple", "Mango", 2.0])
2498 >>> idx.is_object() # doctest: +SKIP
2499 True
2500
2501 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
2502 ... "Watermelon"]).astype("category")
2503 >>> idx.is_object() # doctest: +SKIP
2504 False
2505
2506 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])
2507 >>> idx.is_object() # doctest: +SKIP
2508 False
2509 """
2510 warnings.warn(
2511 f"{type(self).__name__}.is_object is deprecated."
2512 "Use pandas.api.types.is_object_dtype instead",
2513 FutureWarning,
2514 stacklevel=find_stack_level(),
2515 )
2516 return is_object_dtype(self.dtype)
2517
2518 @final
2519 def is_categorical(self) -> bool:
2520 """
2521 Check if the Index holds categorical data.
2522
2523 .. deprecated:: 2.0.0
2524 Use `isinstance(index.dtype, pd.CategoricalDtype)` instead.
2525
2526 Returns
2527 -------
2528 bool
2529 True if the Index is categorical.
2530
2531 See Also
2532 --------
2533 CategoricalIndex : Index for categorical data.
2534 is_boolean : Check if the Index only consists of booleans (deprecated).
2535 is_integer : Check if the Index only consists of integers (deprecated).
2536 is_floating : Check if the Index is a floating type (deprecated).
2537 is_numeric : Check if the Index only consists of numeric data (deprecated).
2538 is_object : Check if the Index is of the object dtype. (deprecated).
2539 is_interval : Check if the Index holds Interval objects (deprecated).
2540
2541 Examples
2542 --------
2543 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",
2544 ... "Watermelon"]).astype("category")
2545 >>> idx.is_categorical() # doctest: +SKIP
2546 True
2547
2548 >>> idx = pd.Index([1, 3, 5, 7])
2549 >>> idx.is_categorical() # doctest: +SKIP
2550 False
2551
2552 >>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"])
2553 >>> s
2554 0 Peter
2555 1 Victor
2556 2 Elisabeth
2557 3 Mar
2558 dtype: object
2559 >>> s.index.is_categorical() # doctest: +SKIP
2560 False
2561 """
2562 warnings.warn(
2563 f"{type(self).__name__}.is_categorical is deprecated."
2564 "Use pandas.api.types.is_categorical_dtype instead",
2565 FutureWarning,
2566 stacklevel=find_stack_level(),
2567 )
2568
2569 return self.inferred_type in ["categorical"]
2570
2571 @final
2572 def is_interval(self) -> bool:
2573 """
2574 Check if the Index holds Interval objects.
2575
2576 .. deprecated:: 2.0.0
2577 Use `isinstance(index.dtype, pd.IntervalDtype)` instead.
2578
2579 Returns
2580 -------
2581 bool
2582 Whether or not the Index holds Interval objects.
2583
2584 See Also
2585 --------
2586 IntervalIndex : Index for Interval objects.
2587 is_boolean : Check if the Index only consists of booleans (deprecated).
2588 is_integer : Check if the Index only consists of integers (deprecated).
2589 is_floating : Check if the Index is a floating type (deprecated).
2590 is_numeric : Check if the Index only consists of numeric data (deprecated).
2591 is_object : Check if the Index is of the object dtype. (deprecated).
2592 is_categorical : Check if the Index holds categorical data (deprecated).
2593
2594 Examples
2595 --------
2596 >>> idx = pd.Index([pd.Interval(left=0, right=5),
2597 ... pd.Interval(left=5, right=10)])
2598 >>> idx.is_interval() # doctest: +SKIP
2599 True
2600
2601 >>> idx = pd.Index([1, 3, 5, 7])
2602 >>> idx.is_interval() # doctest: +SKIP
2603 False
2604 """
2605 warnings.warn(
2606 f"{type(self).__name__}.is_interval is deprecated."
2607 "Use pandas.api.types.is_interval_dtype instead",
2608 FutureWarning,
2609 stacklevel=find_stack_level(),
2610 )
2611 return self.inferred_type in ["interval"]
2612
2613 @final
2614 def _holds_integer(self) -> bool:
2615 """
2616 Whether the type is an integer type.
2617 """
2618 return self.inferred_type in ["integer", "mixed-integer"]
2619
2620 @final
2621 def holds_integer(self) -> bool:
2622 """
2623 Whether the type is an integer type.
2624
2625 .. deprecated:: 2.0.0
2626 Use `pandas.api.types.infer_dtype` instead
2627 """
2628 warnings.warn(
2629 f"{type(self).__name__}.holds_integer is deprecated. "
2630 "Use pandas.api.types.infer_dtype instead.",
2631 FutureWarning,
2632 stacklevel=find_stack_level(),
2633 )
2634 return self._holds_integer()
2635
2636 @cache_readonly
2637 def inferred_type(self) -> str_t:
2638 """
2639 Return a string of the type inferred from the values.
2640 """
2641 return lib.infer_dtype(self._values, skipna=False)
2642
2643 @cache_readonly
2644 @final
2645 def _is_all_dates(self) -> bool:
2646 """
2647 Whether or not the index values only consist of dates.
2648 """
2649 if needs_i8_conversion(self.dtype):
2650 return True
2651 elif self.dtype != _dtype_obj:
2652 # TODO(ExtensionIndex): 3rd party EA might override?
2653 # Note: this includes IntervalIndex, even when the left/right
2654 # contain datetime-like objects.
2655 return False
2656 elif self._is_multi:
2657 return False
2658 return is_datetime_array(ensure_object(self._values))
2659
2660 @final
2661 @cache_readonly
2662 def _is_multi(self) -> bool:
2663 """
2664 Cached check equivalent to isinstance(self, MultiIndex)
2665 """
2666 return isinstance(self, ABCMultiIndex)
2667
2668 # --------------------------------------------------------------------
2669 # Pickle Methods
2670
2671 def __reduce__(self):
2672 d = {"data": self._data, "name": self.name}
2673 return _new_Index, (type(self), d), None
2674
2675 # --------------------------------------------------------------------
2676 # Null Handling Methods
2677
2678 @cache_readonly
2679 def _na_value(self):
2680 """The expected NA value to use with this index."""
2681 dtype = self.dtype
2682 if isinstance(dtype, np.dtype):
2683 if dtype.kind in ["m", "M"]:
2684 return NaT
2685 return np.nan
2686 return dtype.na_value
2687
2688 @cache_readonly
2689 def _isnan(self) -> npt.NDArray[np.bool_]:
2690 """
2691 Return if each value is NaN.
2692 """
2693 if self._can_hold_na:
2694 return isna(self)
2695 else:
2696 # shouldn't reach to this condition by checking hasnans beforehand
2697 values = np.empty(len(self), dtype=np.bool_)
2698 values.fill(False)
2699 return values
2700
2701 @cache_readonly
2702 def hasnans(self) -> bool:
2703 """
2704 Return True if there are any NaNs.
2705
2706 Enables various performance speedups.
2707
2708 Returns
2709 -------
2710 bool
2711 """
2712 if self._can_hold_na:
2713 return bool(self._isnan.any())
2714 else:
2715 return False
2716
2717 @final
2718 def isna(self) -> npt.NDArray[np.bool_]:
2719 """
2720 Detect missing values.
2721
2722 Return a boolean same-sized object indicating if the values are NA.
2723 NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get
2724 mapped to ``True`` values.
2725 Everything else get mapped to ``False`` values. Characters such as
2726 empty strings `''` or :attr:`numpy.inf` are not considered NA values
2727 (unless you set ``pandas.options.mode.use_inf_as_na = True``).
2728
2729 Returns
2730 -------
2731 numpy.ndarray[bool]
2732 A boolean array of whether my values are NA.
2733
2734 See Also
2735 --------
2736 Index.notna : Boolean inverse of isna.
2737 Index.dropna : Omit entries with missing values.
2738 isna : Top-level isna.
2739 Series.isna : Detect missing values in Series object.
2740
2741 Examples
2742 --------
2743 Show which entries in a pandas.Index are NA. The result is an
2744 array.
2745
2746 >>> idx = pd.Index([5.2, 6.0, np.NaN])
2747 >>> idx
2748 Index([5.2, 6.0, nan], dtype='float64')
2749 >>> idx.isna()
2750 array([False, False, True])
2751
2752 Empty strings are not considered NA values. None is considered an NA
2753 value.
2754
2755 >>> idx = pd.Index(['black', '', 'red', None])
2756 >>> idx
2757 Index(['black', '', 'red', None], dtype='object')
2758 >>> idx.isna()
2759 array([False, False, False, True])
2760
2761 For datetimes, `NaT` (Not a Time) is considered as an NA value.
2762
2763 >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'),
2764 ... pd.Timestamp(''), None, pd.NaT])
2765 >>> idx
2766 DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'],
2767 dtype='datetime64[ns]', freq=None)
2768 >>> idx.isna()
2769 array([False, True, True, True])
2770 """
2771 return self._isnan
2772
2773 isnull = isna
2774
2775 @final
2776 def notna(self) -> npt.NDArray[np.bool_]:
2777 """
2778 Detect existing (non-missing) values.
2779
2780 Return a boolean same-sized object indicating if the values are not NA.
2781 Non-missing values get mapped to ``True``. Characters such as empty
2782 strings ``''`` or :attr:`numpy.inf` are not considered NA values
2783 (unless you set ``pandas.options.mode.use_inf_as_na = True``).
2784 NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False``
2785 values.
2786
2787 Returns
2788 -------
2789 numpy.ndarray[bool]
2790 Boolean array to indicate which entries are not NA.
2791
2792 See Also
2793 --------
2794 Index.notnull : Alias of notna.
2795 Index.isna: Inverse of notna.
2796 notna : Top-level notna.
2797
2798 Examples
2799 --------
2800 Show which entries in an Index are not NA. The result is an
2801 array.
2802
2803 >>> idx = pd.Index([5.2, 6.0, np.NaN])
2804 >>> idx
2805 Index([5.2, 6.0, nan], dtype='float64')
2806 >>> idx.notna()
2807 array([ True, True, False])
2808
2809 Empty strings are not considered NA values. None is considered a NA
2810 value.
2811
2812 >>> idx = pd.Index(['black', '', 'red', None])
2813 >>> idx
2814 Index(['black', '', 'red', None], dtype='object')
2815 >>> idx.notna()
2816 array([ True, True, True, False])
2817 """
2818 return ~self.isna()
2819
2820 notnull = notna
2821
2822 def fillna(self, value=None, downcast=None):
2823 """
2824 Fill NA/NaN values with the specified value.
2825
2826 Parameters
2827 ----------
2828 value : scalar
2829 Scalar value to use to fill holes (e.g. 0).
2830 This value cannot be a list-likes.
2831 downcast : dict, default is None
2832 A dict of item->dtype of what to downcast if possible,
2833 or the string 'infer' which will try to downcast to an appropriate
2834 equal type (e.g. float64 to int64 if possible).
2835
2836 Returns
2837 -------
2838 Index
2839
2840 See Also
2841 --------
2842 DataFrame.fillna : Fill NaN values of a DataFrame.
2843 Series.fillna : Fill NaN Values of a Series.
2844 """
2845
2846 value = self._require_scalar(value)
2847 if self.hasnans:
2848 result = self.putmask(self._isnan, value)
2849 if downcast is None:
2850 # no need to care metadata other than name
2851 # because it can't have freq if it has NaTs
2852 # _with_infer needed for test_fillna_categorical
2853 return Index._with_infer(result, name=self.name)
2854 raise NotImplementedError(
2855 f"{type(self).__name__}.fillna does not support 'downcast' "
2856 "argument values other than 'None'."
2857 )
2858 return self._view()
2859
2860 def dropna(self: _IndexT, how: AnyAll = "any") -> _IndexT:
2861 """
2862 Return Index without NA/NaN values.
2863
2864 Parameters
2865 ----------
2866 how : {'any', 'all'}, default 'any'
2867 If the Index is a MultiIndex, drop the value when any or all levels
2868 are NaN.
2869
2870 Returns
2871 -------
2872 Index
2873 """
2874 if how not in ("any", "all"):
2875 raise ValueError(f"invalid how option: {how}")
2876
2877 if self.hasnans:
2878 res_values = self._values[~self._isnan]
2879 return type(self)._simple_new(res_values, name=self.name)
2880 return self._view()
2881
2882 # --------------------------------------------------------------------
2883 # Uniqueness Methods
2884
2885 def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT:
2886 """
2887 Return unique values in the index.
2888
2889 Unique values are returned in order of appearance, this does NOT sort.
2890
2891 Parameters
2892 ----------
2893 level : int or hashable, optional
2894 Only return values from specified level (for MultiIndex).
2895 If int, gets the level by integer position, else by level name.
2896
2897 Returns
2898 -------
2899 Index
2900
2901 See Also
2902 --------
2903 unique : Numpy array of unique values in that column.
2904 Series.unique : Return unique values of Series object.
2905 """
2906 if level is not None:
2907 self._validate_index_level(level)
2908
2909 if self.is_unique:
2910 return self._view()
2911
2912 result = super().unique()
2913 return self._shallow_copy(result)
2914
2915 def drop_duplicates(self: _IndexT, *, keep: DropKeep = "first") -> _IndexT:
2916 """
2917 Return Index with duplicate values removed.
2918
2919 Parameters
2920 ----------
2921 keep : {'first', 'last', ``False``}, default 'first'
2922 - 'first' : Drop duplicates except for the first occurrence.
2923 - 'last' : Drop duplicates except for the last occurrence.
2924 - ``False`` : Drop all duplicates.
2925
2926 Returns
2927 -------
2928 Index
2929
2930 See Also
2931 --------
2932 Series.drop_duplicates : Equivalent method on Series.
2933 DataFrame.drop_duplicates : Equivalent method on DataFrame.
2934 Index.duplicated : Related method on Index, indicating duplicate
2935 Index values.
2936
2937 Examples
2938 --------
2939 Generate an pandas.Index with duplicate values.
2940
2941 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])
2942
2943 The `keep` parameter controls which duplicate values are removed.
2944 The value 'first' keeps the first occurrence for each
2945 set of duplicated entries. The default value of keep is 'first'.
2946
2947 >>> idx.drop_duplicates(keep='first')
2948 Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object')
2949
2950 The value 'last' keeps the last occurrence for each set of duplicated
2951 entries.
2952
2953 >>> idx.drop_duplicates(keep='last')
2954 Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object')
2955
2956 The value ``False`` discards all sets of duplicated entries.
2957
2958 >>> idx.drop_duplicates(keep=False)
2959 Index(['cow', 'beetle', 'hippo'], dtype='object')
2960 """
2961 if self.is_unique:
2962 return self._view()
2963
2964 return super().drop_duplicates(keep=keep)
2965
2966 def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:
2967 """
2968 Indicate duplicate index values.
2969
2970 Duplicated values are indicated as ``True`` values in the resulting
2971 array. Either all duplicates, all except the first, or all except the
2972 last occurrence of duplicates can be indicated.
2973
2974 Parameters
2975 ----------
2976 keep : {'first', 'last', False}, default 'first'
2977 The value or values in a set of duplicates to mark as missing.
2978
2979 - 'first' : Mark duplicates as ``True`` except for the first
2980 occurrence.
2981 - 'last' : Mark duplicates as ``True`` except for the last
2982 occurrence.
2983 - ``False`` : Mark all duplicates as ``True``.
2984
2985 Returns
2986 -------
2987 np.ndarray[bool]
2988
2989 See Also
2990 --------
2991 Series.duplicated : Equivalent method on pandas.Series.
2992 DataFrame.duplicated : Equivalent method on pandas.DataFrame.
2993 Index.drop_duplicates : Remove duplicate values from Index.
2994
2995 Examples
2996 --------
2997 By default, for each set of duplicated values, the first occurrence is
2998 set to False and all others to True:
2999
3000 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama'])
3001 >>> idx.duplicated()
3002 array([False, False, True, False, True])
3003
3004 which is equivalent to
3005
3006 >>> idx.duplicated(keep='first')
3007 array([False, False, True, False, True])
3008
3009 By using 'last', the last occurrence of each set of duplicated values
3010 is set on False and all others on True:
3011
3012 >>> idx.duplicated(keep='last')
3013 array([ True, False, True, False, False])
3014
3015 By setting keep on ``False``, all duplicates are True:
3016
3017 >>> idx.duplicated(keep=False)
3018 array([ True, False, True, False, True])
3019 """
3020 if self.is_unique:
3021 # fastpath available bc we are immutable
3022 return np.zeros(len(self), dtype=bool)
3023 return self._duplicated(keep=keep)
3024
3025 # --------------------------------------------------------------------
3026 # Arithmetic & Logical Methods
3027
3028 def __iadd__(self, other):
3029 # alias for __add__
3030 return self + other
3031
3032 @final
3033 def __nonzero__(self) -> NoReturn:
3034 raise ValueError(
3035 f"The truth value of a {type(self).__name__} is ambiguous. "
3036 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."
3037 )
3038
3039 __bool__ = __nonzero__
3040
3041 # --------------------------------------------------------------------
3042 # Set Operation Methods
3043
3044 def _get_reconciled_name_object(self, other):
3045 """
3046 If the result of a set operation will be self,
3047 return self, unless the name changes, in which
3048 case make a shallow copy of self.
3049 """
3050 name = get_op_result_name(self, other)
3051 if self.name is not name:
3052 return self.rename(name)
3053 return self
3054
3055 @final
3056 def _validate_sort_keyword(self, sort):
3057 if sort not in [None, False, True]:
3058 raise ValueError(
3059 "The 'sort' keyword only takes the values of "
3060 f"None, True, or False; {sort} was passed."
3061 )
3062
3063 @final
3064 def _dti_setop_align_tzs(self, other: Index, setop: str_t) -> tuple[Index, Index]:
3065 """
3066 With mismatched timezones, cast both to UTC.
3067 """
3068 # Caller is responsibelf or checking
3069 # `not is_dtype_equal(self.dtype, other.dtype)`
3070 if (
3071 isinstance(self, ABCDatetimeIndex)
3072 and isinstance(other, ABCDatetimeIndex)
3073 and self.tz is not None
3074 and other.tz is not None
3075 ):
3076 # GH#39328, GH#45357
3077 left = self.tz_convert("UTC")
3078 right = other.tz_convert("UTC")
3079 return left, right
3080 return self, other
3081
3082 @final
3083 def union(self, other, sort=None):
3084 """
3085 Form the union of two Index objects.
3086
3087 If the Index objects are incompatible, both Index objects will be
3088 cast to dtype('object') first.
3089
3090 Parameters
3091 ----------
3092 other : Index or array-like
3093 sort : bool or None, default None
3094 Whether to sort the resulting Index.
3095
3096 * None : Sort the result, except when
3097
3098 1. `self` and `other` are equal.
3099 2. `self` or `other` has length 0.
3100 3. Some values in `self` or `other` cannot be compared.
3101 A RuntimeWarning is issued in this case.
3102
3103 * False : do not sort the result.
3104 * True : Sort the result (which may raise TypeError).
3105
3106 Returns
3107 -------
3108 Index
3109
3110 Examples
3111 --------
3112 Union matching dtypes
3113
3114 >>> idx1 = pd.Index([1, 2, 3, 4])
3115 >>> idx2 = pd.Index([3, 4, 5, 6])
3116 >>> idx1.union(idx2)
3117 Index([1, 2, 3, 4, 5, 6], dtype='int64')
3118
3119 Union mismatched dtypes
3120
3121 >>> idx1 = pd.Index(['a', 'b', 'c', 'd'])
3122 >>> idx2 = pd.Index([1, 2, 3, 4])
3123 >>> idx1.union(idx2)
3124 Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object')
3125
3126 MultiIndex case
3127
3128 >>> idx1 = pd.MultiIndex.from_arrays(
3129 ... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]
3130 ... )
3131 >>> idx1
3132 MultiIndex([(1, 'Red'),
3133 (1, 'Blue'),
3134 (2, 'Red'),
3135 (2, 'Blue')],
3136 )
3137 >>> idx2 = pd.MultiIndex.from_arrays(
3138 ... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]]
3139 ... )
3140 >>> idx2
3141 MultiIndex([(3, 'Red'),
3142 (3, 'Green'),
3143 (2, 'Red'),
3144 (2, 'Green')],
3145 )
3146 >>> idx1.union(idx2)
3147 MultiIndex([(1, 'Blue'),
3148 (1, 'Red'),
3149 (2, 'Blue'),
3150 (2, 'Green'),
3151 (2, 'Red'),
3152 (3, 'Green'),
3153 (3, 'Red')],
3154 )
3155 >>> idx1.union(idx2, sort=False)
3156 MultiIndex([(1, 'Red'),
3157 (1, 'Blue'),
3158 (2, 'Red'),
3159 (2, 'Blue'),
3160 (3, 'Red'),
3161 (3, 'Green'),
3162 (2, 'Green')],
3163 )
3164 """
3165 self._validate_sort_keyword(sort)
3166 self._assert_can_do_setop(other)
3167 other, result_name = self._convert_can_do_setop(other)
3168
3169 if not is_dtype_equal(self.dtype, other.dtype):
3170 if (
3171 isinstance(self, ABCMultiIndex)
3172 and not is_object_dtype(_unpack_nested_dtype(other))
3173 and len(other) > 0
3174 ):
3175 raise NotImplementedError(
3176 "Can only union MultiIndex with MultiIndex or Index of tuples, "
3177 "try mi.to_flat_index().union(other) instead."
3178 )
3179 self, other = self._dti_setop_align_tzs(other, "union")
3180
3181 dtype = self._find_common_type_compat(other)
3182 left = self.astype(dtype, copy=False)
3183 right = other.astype(dtype, copy=False)
3184 return left.union(right, sort=sort)
3185
3186 elif not len(other) or self.equals(other):
3187 # NB: whether this (and the `if not len(self)` check below) come before
3188 # or after the is_dtype_equal check above affects the returned dtype
3189 result = self._get_reconciled_name_object(other)
3190 if sort is True:
3191 return result.sort_values()
3192 return result
3193
3194 elif not len(self):
3195 result = other._get_reconciled_name_object(self)
3196 if sort is True:
3197 return result.sort_values()
3198 return result
3199
3200 result = self._union(other, sort=sort)
3201
3202 return self._wrap_setop_result(other, result)
3203
3204 def _union(self, other: Index, sort: bool | None):
3205 """
3206 Specific union logic should go here. In subclasses, union behavior
3207 should be overwritten here rather than in `self.union`.
3208
3209 Parameters
3210 ----------
3211 other : Index or array-like
3212 sort : False or None, default False
3213 Whether to sort the resulting index.
3214
3215 * True : sort the result
3216 * False : do not sort the result.
3217 * None : sort the result, except when `self` and `other` are equal
3218 or when the values cannot be compared.
3219
3220 Returns
3221 -------
3222 Index
3223 """
3224 lvals = self._values
3225 rvals = other._values
3226
3227 if (
3228 sort in (None, True)
3229 and self.is_monotonic_increasing
3230 and other.is_monotonic_increasing
3231 and not (self.has_duplicates and other.has_duplicates)
3232 and self._can_use_libjoin
3233 ):
3234 # Both are monotonic and at least one is unique, so can use outer join
3235 # (actually don't need either unique, but without this restriction
3236 # test_union_same_value_duplicated_in_both fails)
3237 try:
3238 return self._outer_indexer(other)[0]
3239 except (TypeError, IncompatibleFrequency):
3240 # incomparable objects; should only be for object dtype
3241 value_list = list(lvals)
3242
3243 # worth making this faster? a very unusual case
3244 value_set = set(lvals)
3245 value_list.extend([x for x in rvals if x not in value_set])
3246 # If objects are unorderable, we must have object dtype.
3247 return np.array(value_list, dtype=object)
3248
3249 elif not other.is_unique:
3250 # other has duplicates
3251 result_dups = algos.union_with_duplicates(self, other)
3252 return _maybe_try_sort(result_dups, sort)
3253
3254 # The rest of this method is analogous to Index._intersection_via_get_indexer
3255
3256 # Self may have duplicates; other already checked as unique
3257 # find indexes of things in "other" that are not in "self"
3258 if self._index_as_unique:
3259 indexer = self.get_indexer(other)
3260 missing = (indexer == -1).nonzero()[0]
3261 else:
3262 missing = algos.unique1d(self.get_indexer_non_unique(other)[1])
3263
3264 result: Index | MultiIndex | ArrayLike
3265 if self._is_multi:
3266 # Preserve MultiIndex to avoid losing dtypes
3267 result = self.append(other.take(missing))
3268
3269 else:
3270 if len(missing) > 0:
3271 other_diff = rvals.take(missing)
3272 result = concat_compat((lvals, other_diff))
3273 else:
3274 result = lvals
3275
3276 if not self.is_monotonic_increasing or not other.is_monotonic_increasing:
3277 # if both are monotonic then result should already be sorted
3278 result = _maybe_try_sort(result, sort)
3279
3280 return result
3281
3282 @final
3283 def _wrap_setop_result(self, other: Index, result) -> Index:
3284 name = get_op_result_name(self, other)
3285 if isinstance(result, Index):
3286 if result.name != name:
3287 result = result.rename(name)
3288 else:
3289 result = self._shallow_copy(result, name=name)
3290 return result
3291
3292 @final
3293 def intersection(self, other, sort: bool = False):
3294 """
3295 Form the intersection of two Index objects.
3296
3297 This returns a new Index with elements common to the index and `other`.
3298
3299 Parameters
3300 ----------
3301 other : Index or array-like
3302 sort : True, False or None, default False
3303 Whether to sort the resulting index.
3304
3305 * None : sort the result, except when `self` and `other` are equal
3306 or when the values cannot be compared.
3307 * False : do not sort the result.
3308 * True : Sort the result (which may raise TypeError).
3309
3310 Returns
3311 -------
3312 Index
3313
3314 Examples
3315 --------
3316 >>> idx1 = pd.Index([1, 2, 3, 4])
3317 >>> idx2 = pd.Index([3, 4, 5, 6])
3318 >>> idx1.intersection(idx2)
3319 Index([3, 4], dtype='int64')
3320 """
3321 self._validate_sort_keyword(sort)
3322 self._assert_can_do_setop(other)
3323 other, result_name = self._convert_can_do_setop(other)
3324
3325 if not is_dtype_equal(self.dtype, other.dtype):
3326 self, other = self._dti_setop_align_tzs(other, "intersection")
3327
3328 if self.equals(other):
3329 if self.has_duplicates:
3330 result = self.unique()._get_reconciled_name_object(other)
3331 else:
3332 result = self._get_reconciled_name_object(other)
3333 if sort is True:
3334 result = result.sort_values()
3335 return result
3336
3337 if len(self) == 0 or len(other) == 0:
3338 # fastpath; we need to be careful about having commutativity
3339
3340 if self._is_multi or other._is_multi:
3341 # _convert_can_do_setop ensures that we have both or neither
3342 # We retain self.levels
3343 return self[:0].rename(result_name)
3344
3345 dtype = self._find_common_type_compat(other)
3346 if is_dtype_equal(self.dtype, dtype):
3347 # Slicing allows us to retain DTI/TDI.freq, RangeIndex
3348
3349 # Note: self[:0] vs other[:0] affects
3350 # 1) which index's `freq` we get in DTI/TDI cases
3351 # This may be a historical artifact, i.e. no documented
3352 # reason for this choice.
3353 # 2) The `step` we get in RangeIndex cases
3354 if len(self) == 0:
3355 return self[:0].rename(result_name)
3356 else:
3357 return other[:0].rename(result_name)
3358
3359 return Index([], dtype=dtype, name=result_name)
3360
3361 elif not self._should_compare(other):
3362 # We can infer that the intersection is empty.
3363 if isinstance(self, ABCMultiIndex):
3364 return self[:0].rename(result_name)
3365 return Index([], name=result_name)
3366
3367 elif not is_dtype_equal(self.dtype, other.dtype):
3368 dtype = self._find_common_type_compat(other)
3369 this = self.astype(dtype, copy=False)
3370 other = other.astype(dtype, copy=False)
3371 return this.intersection(other, sort=sort)
3372
3373 result = self._intersection(other, sort=sort)
3374 return self._wrap_intersection_result(other, result)
3375
3376 def _intersection(self, other: Index, sort: bool = False):
3377 """
3378 intersection specialized to the case with matching dtypes.
3379 """
3380 if (
3381 self.is_monotonic_increasing
3382 and other.is_monotonic_increasing
3383 and self._can_use_libjoin
3384 and not isinstance(self, ABCMultiIndex)
3385 ):
3386 try:
3387 res_indexer, indexer, _ = self._inner_indexer(other)
3388 except TypeError:
3389 # non-comparable; should only be for object dtype
3390 pass
3391 else:
3392 # TODO: algos.unique1d should preserve DTA/TDA
3393 if is_numeric_dtype(self):
3394 # This is faster, because Index.unique() checks for uniqueness
3395 # before calculating the unique values.
3396 res = algos.unique1d(res_indexer)
3397 else:
3398 result = self.take(indexer)
3399 res = result.drop_duplicates()
3400 return ensure_wrapped_if_datetimelike(res)
3401
3402 res_values = self._intersection_via_get_indexer(other, sort=sort)
3403 res_values = _maybe_try_sort(res_values, sort)
3404 return res_values
3405
3406 def _wrap_intersection_result(self, other, result):
3407 # We will override for MultiIndex to handle empty results
3408 return self._wrap_setop_result(other, result)
3409
3410 @final
3411 def _intersection_via_get_indexer(
3412 self, other: Index | MultiIndex, sort
3413 ) -> ArrayLike | MultiIndex:
3414 """
3415 Find the intersection of two Indexes using get_indexer.
3416
3417 Returns
3418 -------
3419 np.ndarray or ExtensionArray
3420 The returned array will be unique.
3421 """
3422 left_unique = self.unique()
3423 right_unique = other.unique()
3424
3425 # even though we are unique, we need get_indexer_for for IntervalIndex
3426 indexer = left_unique.get_indexer_for(right_unique)
3427
3428 mask = indexer != -1
3429
3430 taker = indexer.take(mask.nonzero()[0])
3431 if sort is False:
3432 # sort bc we want the elements in the same order they are in self
3433 # unnecessary in the case with sort=None bc we will sort later
3434 taker = np.sort(taker)
3435
3436 if isinstance(left_unique, ABCMultiIndex):
3437 result = left_unique.take(taker)
3438 else:
3439 result = left_unique.take(taker)._values
3440 return result
3441
3442 @final
3443 def difference(self, other, sort=None):
3444 """
3445 Return a new Index with elements of index not in `other`.
3446
3447 This is the set difference of two Index objects.
3448
3449 Parameters
3450 ----------
3451 other : Index or array-like
3452 sort : bool or None, default None
3453 Whether to sort the resulting index. By default, the
3454 values are attempted to be sorted, but any TypeError from
3455 incomparable elements is caught by pandas.
3456
3457 * None : Attempt to sort the result, but catch any TypeErrors
3458 from comparing incomparable elements.
3459 * False : Do not sort the result.
3460 * True : Sort the result (which may raise TypeError).
3461
3462 Returns
3463 -------
3464 Index
3465
3466 Examples
3467 --------
3468 >>> idx1 = pd.Index([2, 1, 3, 4])
3469 >>> idx2 = pd.Index([3, 4, 5, 6])
3470 >>> idx1.difference(idx2)
3471 Index([1, 2], dtype='int64')
3472 >>> idx1.difference(idx2, sort=False)
3473 Index([2, 1], dtype='int64')
3474 """
3475 self._validate_sort_keyword(sort)
3476 self._assert_can_do_setop(other)
3477 other, result_name = self._convert_can_do_setop(other)
3478
3479 # Note: we do NOT call _dti_setop_align_tzs here, as there
3480 # is no requirement that .difference be commutative, so it does
3481 # not cast to object.
3482
3483 if self.equals(other):
3484 # Note: we do not (yet) sort even if sort=None GH#24959
3485 return self[:0].rename(result_name)
3486
3487 if len(other) == 0:
3488 # Note: we do not (yet) sort even if sort=None GH#24959
3489 result = self.rename(result_name)
3490 if sort is True:
3491 return result.sort_values()
3492 return result
3493
3494 if not self._should_compare(other):
3495 # Nothing matches -> difference is everything
3496 result = self.rename(result_name)
3497 if sort is True:
3498 return result.sort_values()
3499 return result
3500
3501 result = self._difference(other, sort=sort)
3502 return self._wrap_difference_result(other, result)
3503
3504 def _difference(self, other, sort):
3505 # overridden by RangeIndex
3506
3507 this = self.unique()
3508
3509 indexer = this.get_indexer_for(other)
3510 indexer = indexer.take((indexer != -1).nonzero()[0])
3511
3512 label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True)
3513
3514 the_diff: MultiIndex | ArrayLike
3515 if isinstance(this, ABCMultiIndex):
3516 the_diff = this.take(label_diff)
3517 else:
3518 the_diff = this._values.take(label_diff)
3519 the_diff = _maybe_try_sort(the_diff, sort)
3520
3521 return the_diff
3522
3523 def _wrap_difference_result(self, other, result):
3524 # We will override for MultiIndex to handle empty results
3525 return self._wrap_setop_result(other, result)
3526
3527 def symmetric_difference(self, other, result_name=None, sort=None):
3528 """
3529 Compute the symmetric difference of two Index objects.
3530
3531 Parameters
3532 ----------
3533 other : Index or array-like
3534 result_name : str
3535 sort : bool or None, default None
3536 Whether to sort the resulting index. By default, the
3537 values are attempted to be sorted, but any TypeError from
3538 incomparable elements is caught by pandas.
3539
3540 * None : Attempt to sort the result, but catch any TypeErrors
3541 from comparing incomparable elements.
3542 * False : Do not sort the result.
3543 * True : Sort the result (which may raise TypeError).
3544
3545 Returns
3546 -------
3547 Index
3548
3549 Notes
3550 -----
3551 ``symmetric_difference`` contains elements that appear in either
3552 ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by
3553 ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates
3554 dropped.
3555
3556 Examples
3557 --------
3558 >>> idx1 = pd.Index([1, 2, 3, 4])
3559 >>> idx2 = pd.Index([2, 3, 4, 5])
3560 >>> idx1.symmetric_difference(idx2)
3561 Index([1, 5], dtype='int64')
3562 """
3563 self._validate_sort_keyword(sort)
3564 self._assert_can_do_setop(other)
3565 other, result_name_update = self._convert_can_do_setop(other)
3566 if result_name is None:
3567 result_name = result_name_update
3568
3569 if not is_dtype_equal(self.dtype, other.dtype):
3570 self, other = self._dti_setop_align_tzs(other, "symmetric_difference")
3571
3572 if not self._should_compare(other):
3573 return self.union(other, sort=sort).rename(result_name)
3574
3575 elif not is_dtype_equal(self.dtype, other.dtype):
3576 dtype = self._find_common_type_compat(other)
3577 this = self.astype(dtype, copy=False)
3578 that = other.astype(dtype, copy=False)
3579 return this.symmetric_difference(that, sort=sort).rename(result_name)
3580
3581 this = self.unique()
3582 other = other.unique()
3583 indexer = this.get_indexer_for(other)
3584
3585 # {this} minus {other}
3586 common_indexer = indexer.take((indexer != -1).nonzero()[0])
3587 left_indexer = np.setdiff1d(
3588 np.arange(this.size), common_indexer, assume_unique=True
3589 )
3590 left_diff = this.take(left_indexer)
3591
3592 # {other} minus {this}
3593 right_indexer = (indexer == -1).nonzero()[0]
3594 right_diff = other.take(right_indexer)
3595
3596 res_values = left_diff.append(right_diff)
3597 result = _maybe_try_sort(res_values, sort)
3598
3599 if not self._is_multi:
3600 return Index(result, name=result_name, dtype=res_values.dtype)
3601 else:
3602 left_diff = cast("MultiIndex", left_diff)
3603 if len(result) == 0:
3604 # result might be an Index, if other was an Index
3605 return left_diff.remove_unused_levels().set_names(result_name)
3606 return result.set_names(result_name)
3607
3608 @final
3609 def _assert_can_do_setop(self, other) -> bool:
3610 if not is_list_like(other):
3611 raise TypeError("Input must be Index or array-like")
3612 return True
3613
3614 def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]:
3615 if not isinstance(other, Index):
3616 other = Index(other, name=self.name)
3617 result_name = self.name
3618 else:
3619 result_name = get_op_result_name(self, other)
3620 return other, result_name
3621
3622 # --------------------------------------------------------------------
3623 # Indexing Methods
3624
3625 def get_loc(self, key):
3626 """
3627 Get integer location, slice or boolean mask for requested label.
3628
3629 Parameters
3630 ----------
3631 key : label
3632
3633 Returns
3634 -------
3635 int if unique index, slice if monotonic index, else mask
3636
3637 Examples
3638 --------
3639 >>> unique_index = pd.Index(list('abc'))
3640 >>> unique_index.get_loc('b')
3641 1
3642
3643 >>> monotonic_index = pd.Index(list('abbc'))
3644 >>> monotonic_index.get_loc('b')
3645 slice(1, 3, None)
3646
3647 >>> non_monotonic_index = pd.Index(list('abcb'))
3648 >>> non_monotonic_index.get_loc('b')
3649 array([False, True, False, True])
3650 """
3651 casted_key = self._maybe_cast_indexer(key)
3652 try:
3653 return self._engine.get_loc(casted_key)
3654 except KeyError as err:
3655 raise KeyError(key) from err
3656 except TypeError:
3657 # If we have a listlike key, _check_indexing_error will raise
3658 # InvalidIndexError. Otherwise we fall through and re-raise
3659 # the TypeError.
3660 self._check_indexing_error(key)
3661 raise
3662
3663 _index_shared_docs[
3664 "get_indexer"
3665 ] = """
3666 Compute indexer and mask for new index given the current index.
3667
3668 The indexer should be then used as an input to ndarray.take to align the
3669 current data to the new index.
3670
3671 Parameters
3672 ----------
3673 target : %(target_klass)s
3674 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
3675 * default: exact matches only.
3676 * pad / ffill: find the PREVIOUS index value if no exact match.
3677 * backfill / bfill: use NEXT index value if no exact match
3678 * nearest: use the NEAREST index value if no exact match. Tied
3679 distances are broken by preferring the larger index value.
3680 limit : int, optional
3681 Maximum number of consecutive labels in ``target`` to match for
3682 inexact matches.
3683 tolerance : optional
3684 Maximum distance between original and new labels for inexact
3685 matches. The values of the index at the matching locations must
3686 satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
3687
3688 Tolerance may be a scalar value, which applies the same tolerance
3689 to all values, or list-like, which applies variable tolerance per
3690 element. List-like includes list, tuple, array, Series, and must be
3691 the same size as the index and its dtype must exactly match the
3692 index's type.
3693
3694 Returns
3695 -------
3696 np.ndarray[np.intp]
3697 Integers from 0 to n - 1 indicating that the index at these
3698 positions matches the corresponding target values. Missing values
3699 in the target are marked by -1.
3700 %(raises_section)s
3701 Notes
3702 -----
3703 Returns -1 for unmatched values, for further explanation see the
3704 example below.
3705
3706 Examples
3707 --------
3708 >>> index = pd.Index(['c', 'a', 'b'])
3709 >>> index.get_indexer(['a', 'b', 'x'])
3710 array([ 1, 2, -1])
3711
3712 Notice that the return value is an array of locations in ``index``
3713 and ``x`` is marked by -1, as it is not in ``index``.
3714 """
3715
3716 @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)
3717 @final
3718 def get_indexer(
3719 self,
3720 target,
3721 method: str_t | None = None,
3722 limit: int | None = None,
3723 tolerance=None,
3724 ) -> npt.NDArray[np.intp]:
3725 method = clean_reindex_fill_method(method)
3726 orig_target = target
3727 target = self._maybe_cast_listlike_indexer(target)
3728
3729 self._check_indexing_method(method, limit, tolerance)
3730
3731 if not self._index_as_unique:
3732 raise InvalidIndexError(self._requires_unique_msg)
3733
3734 if len(target) == 0:
3735 return np.array([], dtype=np.intp)
3736
3737 if not self._should_compare(target) and not self._should_partial_index(target):
3738 # IntervalIndex get special treatment bc numeric scalars can be
3739 # matched to Interval scalars
3740 return self._get_indexer_non_comparable(target, method=method, unique=True)
3741
3742 if is_categorical_dtype(self.dtype):
3743 # _maybe_cast_listlike_indexer ensures target has our dtype
3744 # (could improve perf by doing _should_compare check earlier?)
3745 assert is_dtype_equal(self.dtype, target.dtype)
3746
3747 indexer = self._engine.get_indexer(target.codes)
3748 if self.hasnans and target.hasnans:
3749 # After _maybe_cast_listlike_indexer, target elements which do not
3750 # belong to some category are changed to NaNs
3751 # Mask to track actual NaN values compared to inserted NaN values
3752 # GH#45361
3753 target_nans = isna(orig_target)
3754 loc = self.get_loc(np.nan)
3755 mask = target.isna()
3756 indexer[target_nans] = loc
3757 indexer[mask & ~target_nans] = -1
3758 return indexer
3759
3760 if is_categorical_dtype(target.dtype):
3761 # potential fastpath
3762 # get an indexer for unique categories then propagate to codes via take_nd
3763 # get_indexer instead of _get_indexer needed for MultiIndex cases
3764 # e.g. test_append_different_columns_types
3765 categories_indexer = self.get_indexer(target.categories)
3766
3767 indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1)
3768
3769 if (not self._is_multi and self.hasnans) and target.hasnans:
3770 # Exclude MultiIndex because hasnans raises NotImplementedError
3771 # we should only get here if we are unique, so loc is an integer
3772 # GH#41934
3773 loc = self.get_loc(np.nan)
3774 mask = target.isna()
3775 indexer[mask] = loc
3776
3777 return ensure_platform_int(indexer)
3778
3779 pself, ptarget = self._maybe_promote(target)
3780 if pself is not self or ptarget is not target:
3781 return pself.get_indexer(
3782 ptarget, method=method, limit=limit, tolerance=tolerance
3783 )
3784
3785 if is_dtype_equal(self.dtype, target.dtype) and self.equals(target):
3786 # Only call equals if we have same dtype to avoid inference/casting
3787 return np.arange(len(target), dtype=np.intp)
3788
3789 if not is_dtype_equal(
3790 self.dtype, target.dtype
3791 ) and not self._should_partial_index(target):
3792 # _should_partial_index e.g. IntervalIndex with numeric scalars
3793 # that can be matched to Interval scalars.
3794 dtype = self._find_common_type_compat(target)
3795
3796 this = self.astype(dtype, copy=False)
3797 target = target.astype(dtype, copy=False)
3798 return this._get_indexer(
3799 target, method=method, limit=limit, tolerance=tolerance
3800 )
3801
3802 return self._get_indexer(target, method, limit, tolerance)
3803
3804 def _get_indexer(
3805 self,
3806 target: Index,
3807 method: str_t | None = None,
3808 limit: int | None = None,
3809 tolerance=None,
3810 ) -> npt.NDArray[np.intp]:
3811 if tolerance is not None:
3812 tolerance = self._convert_tolerance(tolerance, target)
3813
3814 if method in ["pad", "backfill"]:
3815 indexer = self._get_fill_indexer(target, method, limit, tolerance)
3816 elif method == "nearest":
3817 indexer = self._get_nearest_indexer(target, limit, tolerance)
3818 else:
3819 if target._is_multi and self._is_multi:
3820 engine = self._engine
3821 # error: Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]"
3822 # has no attribute "_extract_level_codes"
3823 tgt_values = engine._extract_level_codes( # type: ignore[union-attr]
3824 target
3825 )
3826 else:
3827 tgt_values = target._get_engine_target()
3828
3829 indexer = self._engine.get_indexer(tgt_values)
3830
3831 return ensure_platform_int(indexer)
3832
3833 @final
3834 def _should_partial_index(self, target: Index) -> bool:
3835 """
3836 Should we attempt partial-matching indexing?
3837 """
3838 if is_interval_dtype(self.dtype):
3839 if is_interval_dtype(target.dtype):
3840 return False
3841 # See https://github.com/pandas-dev/pandas/issues/47772 the commented
3842 # out code can be restored (instead of hardcoding `return True`)
3843 # once that issue is fixed
3844 # "Index" has no attribute "left"
3845 # return self.left._should_compare(target) # type: ignore[attr-defined]
3846 return True
3847 return False
3848
3849 @final
3850 def _check_indexing_method(
3851 self,
3852 method: str_t | None,
3853 limit: int | None = None,
3854 tolerance=None,
3855 ) -> None:
3856 """
3857 Raise if we have a get_indexer `method` that is not supported or valid.
3858 """
3859 if method not in [None, "bfill", "backfill", "pad", "ffill", "nearest"]:
3860 # in practice the clean_reindex_fill_method call would raise
3861 # before we get here
3862 raise ValueError("Invalid fill method") # pragma: no cover
3863
3864 if self._is_multi:
3865 if method == "nearest":
3866 raise NotImplementedError(
3867 "method='nearest' not implemented yet "
3868 "for MultiIndex; see GitHub issue 9365"
3869 )
3870 if method in ("pad", "backfill"):
3871 if tolerance is not None:
3872 raise NotImplementedError(
3873 "tolerance not implemented yet for MultiIndex"
3874 )
3875
3876 if is_interval_dtype(self.dtype) or is_categorical_dtype(self.dtype):
3877 # GH#37871 for now this is only for IntervalIndex and CategoricalIndex
3878 if method is not None:
3879 raise NotImplementedError(
3880 f"method {method} not yet implemented for {type(self).__name__}"
3881 )
3882
3883 if method is None:
3884 if tolerance is not None:
3885 raise ValueError(
3886 "tolerance argument only valid if doing pad, "
3887 "backfill or nearest reindexing"
3888 )
3889 if limit is not None:
3890 raise ValueError(
3891 "limit argument only valid if doing pad, "
3892 "backfill or nearest reindexing"
3893 )
3894
3895 def _convert_tolerance(self, tolerance, target: np.ndarray | Index) -> np.ndarray:
3896 # override this method on subclasses
3897 tolerance = np.asarray(tolerance)
3898 if target.size != tolerance.size and tolerance.size > 1:
3899 raise ValueError("list-like tolerance size must match target index size")
3900 elif is_numeric_dtype(self) and not np.issubdtype(tolerance.dtype, np.number):
3901 if tolerance.ndim > 0:
3902 raise ValueError(
3903 f"tolerance argument for {type(self).__name__} with dtype "
3904 f"{self.dtype} must contain numeric elements if it is list type"
3905 )
3906
3907 raise ValueError(
3908 f"tolerance argument for {type(self).__name__} with dtype {self.dtype} "
3909 f"must be numeric if it is a scalar: {repr(tolerance)}"
3910 )
3911 return tolerance
3912
3913 @final
3914 def _get_fill_indexer(
3915 self, target: Index, method: str_t, limit: int | None = None, tolerance=None
3916 ) -> npt.NDArray[np.intp]:
3917 if self._is_multi:
3918 # TODO: get_indexer_with_fill docstring says values must be _sorted_
3919 # but that doesn't appear to be enforced
3920 # error: "IndexEngine" has no attribute "get_indexer_with_fill"
3921 engine = self._engine
3922 with warnings.catch_warnings():
3923 # TODO: We need to fix this. Casting to int64 in cython
3924 warnings.filterwarnings("ignore", category=RuntimeWarning)
3925 return engine.get_indexer_with_fill( # type: ignore[union-attr]
3926 target=target._values,
3927 values=self._values,
3928 method=method,
3929 limit=limit,
3930 )
3931
3932 if self.is_monotonic_increasing and target.is_monotonic_increasing:
3933 target_values = target._get_engine_target()
3934 own_values = self._get_engine_target()
3935 if not isinstance(target_values, np.ndarray) or not isinstance(
3936 own_values, np.ndarray
3937 ):
3938 raise NotImplementedError
3939
3940 if method == "pad":
3941 indexer = libalgos.pad(own_values, target_values, limit=limit)
3942 else:
3943 # i.e. "backfill"
3944 indexer = libalgos.backfill(own_values, target_values, limit=limit)
3945 else:
3946 indexer = self._get_fill_indexer_searchsorted(target, method, limit)
3947 if tolerance is not None and len(self):
3948 indexer = self._filter_indexer_tolerance(target, indexer, tolerance)
3949 return indexer
3950
3951 @final
3952 def _get_fill_indexer_searchsorted(
3953 self, target: Index, method: str_t, limit: int | None = None
3954 ) -> npt.NDArray[np.intp]:
3955 """
3956 Fallback pad/backfill get_indexer that works for monotonic decreasing
3957 indexes and non-monotonic targets.
3958 """
3959 if limit is not None:
3960 raise ValueError(
3961 f"limit argument for {repr(method)} method only well-defined "
3962 "if index and target are monotonic"
3963 )
3964
3965 side: Literal["left", "right"] = "left" if method == "pad" else "right"
3966
3967 # find exact matches first (this simplifies the algorithm)
3968 indexer = self.get_indexer(target)
3969 nonexact = indexer == -1
3970 indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side)
3971 if side == "left":
3972 # searchsorted returns "indices into a sorted array such that,
3973 # if the corresponding elements in v were inserted before the
3974 # indices, the order of a would be preserved".
3975 # Thus, we need to subtract 1 to find values to the left.
3976 indexer[nonexact] -= 1
3977 # This also mapped not found values (values of 0 from
3978 # np.searchsorted) to -1, which conveniently is also our
3979 # sentinel for missing values
3980 else:
3981 # Mark indices to the right of the largest value as not found
3982 indexer[indexer == len(self)] = -1
3983 return indexer
3984
3985 @final
3986 def _get_nearest_indexer(
3987 self, target: Index, limit: int | None, tolerance
3988 ) -> npt.NDArray[np.intp]:
3989 """
3990 Get the indexer for the nearest index labels; requires an index with
3991 values that can be subtracted from each other (e.g., not strings or
3992 tuples).
3993 """
3994 if not len(self):
3995 return self._get_fill_indexer(target, "pad")
3996
3997 left_indexer = self.get_indexer(target, "pad", limit=limit)
3998 right_indexer = self.get_indexer(target, "backfill", limit=limit)
3999
4000 left_distances = self._difference_compat(target, left_indexer)
4001 right_distances = self._difference_compat(target, right_indexer)
4002
4003 op = operator.lt if self.is_monotonic_increasing else operator.le
4004 indexer = np.where(
4005 # error: Argument 1&2 has incompatible type "Union[ExtensionArray,
4006 # ndarray[Any, Any]]"; expected "Union[SupportsDunderLE,
4007 # SupportsDunderGE, SupportsDunderGT, SupportsDunderLT]"
4008 op(left_distances, right_distances) # type: ignore[arg-type]
4009 | (right_indexer == -1),
4010 left_indexer,
4011 right_indexer,
4012 )
4013 if tolerance is not None:
4014 indexer = self._filter_indexer_tolerance(target, indexer, tolerance)
4015 return indexer
4016
4017 @final
4018 def _filter_indexer_tolerance(
4019 self,
4020 target: Index,
4021 indexer: npt.NDArray[np.intp],
4022 tolerance,
4023 ) -> npt.NDArray[np.intp]:
4024 distance = self._difference_compat(target, indexer)
4025
4026 return np.where(distance <= tolerance, indexer, -1)
4027
4028 @final
4029 def _difference_compat(
4030 self, target: Index, indexer: npt.NDArray[np.intp]
4031 ) -> ArrayLike:
4032 # Compatibility for PeriodArray, for which __sub__ returns an ndarray[object]
4033 # of DateOffset objects, which do not support __abs__ (and would be slow
4034 # if they did)
4035
4036 if isinstance(self.dtype, PeriodDtype):
4037 # Note: we only get here with matching dtypes
4038 own_values = cast("PeriodArray", self._data)._ndarray
4039 target_values = cast("PeriodArray", target._data)._ndarray
4040 diff = own_values[indexer] - target_values
4041 else:
4042 # error: Unsupported left operand type for - ("ExtensionArray")
4043 diff = self._values[indexer] - target._values # type: ignore[operator]
4044 return abs(diff)
4045
4046 # --------------------------------------------------------------------
4047 # Indexer Conversion Methods
4048
4049 @final
4050 def _validate_positional_slice(self, key: slice) -> None:
4051 """
4052 For positional indexing, a slice must have either int or None
4053 for each of start, stop, and step.
4054 """
4055 self._validate_indexer("positional", key.start, "iloc")
4056 self._validate_indexer("positional", key.stop, "iloc")
4057 self._validate_indexer("positional", key.step, "iloc")
4058
4059 def _convert_slice_indexer(self, key: slice, kind: str_t):
4060 """
4061 Convert a slice indexer.
4062
4063 By definition, these are labels unless 'iloc' is passed in.
4064 Floats are not allowed as the start, step, or stop of the slice.
4065
4066 Parameters
4067 ----------
4068 key : label of the slice bound
4069 kind : {'loc', 'getitem'}
4070 """
4071 assert kind in ["loc", "getitem"], kind
4072
4073 # potentially cast the bounds to integers
4074 start, stop, step = key.start, key.stop, key.step
4075
4076 # TODO(GH#50617): once Series.__[gs]etitem__ is removed we should be able
4077 # to simplify this.
4078 if isinstance(self.dtype, np.dtype) and is_float_dtype(self.dtype):
4079 # We always treat __getitem__ slicing as label-based
4080 # translate to locations
4081 return self.slice_indexer(start, stop, step)
4082
4083 # figure out if this is a positional indexer
4084 def is_int(v):
4085 return v is None or is_integer(v)
4086
4087 is_index_slice = is_int(start) and is_int(stop) and is_int(step)
4088
4089 # special case for interval_dtype bc we do not do partial-indexing
4090 # on integer Intervals when slicing
4091 # TODO: write this in terms of e.g. should_partial_index?
4092 ints_are_positional = self._should_fallback_to_positional or is_interval_dtype(
4093 self.dtype
4094 )
4095 is_positional = is_index_slice and ints_are_positional
4096
4097 if kind == "getitem":
4098 # called from the getitem slicers, validate that we are in fact integers
4099 if is_integer_dtype(self.dtype) or is_index_slice:
4100 # Note: these checks are redundant if we know is_index_slice
4101 self._validate_indexer("slice", key.start, "getitem")
4102 self._validate_indexer("slice", key.stop, "getitem")
4103 self._validate_indexer("slice", key.step, "getitem")
4104 return key
4105
4106 # convert the slice to an indexer here
4107
4108 # if we are mixed and have integers
4109 if is_positional:
4110 try:
4111 # Validate start & stop
4112 if start is not None:
4113 self.get_loc(start)
4114 if stop is not None:
4115 self.get_loc(stop)
4116 is_positional = False
4117 except KeyError:
4118 pass
4119
4120 if com.is_null_slice(key):
4121 # It doesn't matter if we are positional or label based
4122 indexer = key
4123 elif is_positional:
4124 if kind == "loc":
4125 # GH#16121, GH#24612, GH#31810
4126 raise TypeError(
4127 "Slicing a positional slice with .loc is not allowed, "
4128 "Use .loc with labels or .iloc with positions instead.",
4129 )
4130 indexer = key
4131 else:
4132 indexer = self.slice_indexer(start, stop, step)
4133
4134 return indexer
4135
4136 @final
4137 def _raise_invalid_indexer(
4138 self,
4139 form: str_t,
4140 key,
4141 reraise: lib.NoDefault | None | Exception = lib.no_default,
4142 ) -> None:
4143 """
4144 Raise consistent invalid indexer message.
4145 """
4146 msg = (
4147 f"cannot do {form} indexing on {type(self).__name__} with these "
4148 f"indexers [{key}] of type {type(key).__name__}"
4149 )
4150 if reraise is not lib.no_default:
4151 raise TypeError(msg) from reraise
4152 raise TypeError(msg)
4153
4154 # --------------------------------------------------------------------
4155 # Reindex Methods
4156
4157 @final
4158 def _validate_can_reindex(self, indexer: np.ndarray) -> None:
4159 """
4160 Check if we are allowing reindexing with this particular indexer.
4161
4162 Parameters
4163 ----------
4164 indexer : an integer ndarray
4165
4166 Raises
4167 ------
4168 ValueError if its a duplicate axis
4169 """
4170 # trying to reindex on an axis with duplicates
4171 if not self._index_as_unique and len(indexer):
4172 raise ValueError("cannot reindex on an axis with duplicate labels")
4173
4174 def reindex(
4175 self, target, method=None, level=None, limit=None, tolerance=None
4176 ) -> tuple[Index, npt.NDArray[np.intp] | None]:
4177 """
4178 Create index with target's values.
4179
4180 Parameters
4181 ----------
4182 target : an iterable
4183 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
4184 * default: exact matches only.
4185 * pad / ffill: find the PREVIOUS index value if no exact match.
4186 * backfill / bfill: use NEXT index value if no exact match
4187 * nearest: use the NEAREST index value if no exact match. Tied
4188 distances are broken by preferring the larger index value.
4189 level : int, optional
4190 Level of multiindex.
4191 limit : int, optional
4192 Maximum number of consecutive labels in ``target`` to match for
4193 inexact matches.
4194 tolerance : int or float, optional
4195 Maximum distance between original and new labels for inexact
4196 matches. The values of the index at the matching locations must
4197 satisfy the equation ``abs(index[indexer] - target) <= tolerance``.
4198
4199 Tolerance may be a scalar value, which applies the same tolerance
4200 to all values, or list-like, which applies variable tolerance per
4201 element. List-like includes list, tuple, array, Series, and must be
4202 the same size as the index and its dtype must exactly match the
4203 index's type.
4204
4205 Returns
4206 -------
4207 new_index : pd.Index
4208 Resulting index.
4209 indexer : np.ndarray[np.intp] or None
4210 Indices of output values in original index.
4211
4212 Raises
4213 ------
4214 TypeError
4215 If ``method`` passed along with ``level``.
4216 ValueError
4217 If non-unique multi-index
4218 ValueError
4219 If non-unique index and ``method`` or ``limit`` passed.
4220
4221 See Also
4222 --------
4223 Series.reindex : Conform Series to new index with optional filling logic.
4224 DataFrame.reindex : Conform DataFrame to new index with optional filling logic.
4225
4226 Examples
4227 --------
4228 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor'])
4229 >>> idx
4230 Index(['car', 'bike', 'train', 'tractor'], dtype='object')
4231 >>> idx.reindex(['car', 'bike'])
4232 (Index(['car', 'bike'], dtype='object'), array([0, 1]))
4233 """
4234 # GH6552: preserve names when reindexing to non-named target
4235 # (i.e. neither Index nor Series).
4236 preserve_names = not hasattr(target, "name")
4237
4238 # GH7774: preserve dtype/tz if target is empty and not an Index.
4239 target = ensure_has_len(target) # target may be an iterator
4240
4241 if not isinstance(target, Index) and len(target) == 0:
4242 if level is not None and self._is_multi:
4243 # "Index" has no attribute "levels"; maybe "nlevels"?
4244 idx = self.levels[level] # type: ignore[attr-defined]
4245 else:
4246 idx = self
4247 target = idx[:0]
4248 else:
4249 target = ensure_index(target)
4250
4251 if level is not None and (
4252 isinstance(self, ABCMultiIndex) or isinstance(target, ABCMultiIndex)
4253 ):
4254 if method is not None:
4255 raise TypeError("Fill method not supported if level passed")
4256
4257 # TODO: tests where passing `keep_order=not self._is_multi`
4258 # makes a difference for non-MultiIndex case
4259 target, indexer, _ = self._join_level(
4260 target, level, how="right", keep_order=not self._is_multi
4261 )
4262
4263 else:
4264 if self.equals(target):
4265 indexer = None
4266 else:
4267 if self._index_as_unique:
4268 indexer = self.get_indexer(
4269 target, method=method, limit=limit, tolerance=tolerance
4270 )
4271 elif self._is_multi:
4272 raise ValueError("cannot handle a non-unique multi-index!")
4273 elif not self.is_unique:
4274 # GH#42568
4275 raise ValueError("cannot reindex on an axis with duplicate labels")
4276 else:
4277 indexer, _ = self.get_indexer_non_unique(target)
4278
4279 target = self._wrap_reindex_result(target, indexer, preserve_names)
4280 return target, indexer
4281
4282 def _wrap_reindex_result(self, target, indexer, preserve_names: bool):
4283 target = self._maybe_preserve_names(target, preserve_names)
4284 return target
4285
4286 def _maybe_preserve_names(self, target: Index, preserve_names: bool):
4287 if preserve_names and target.nlevels == 1 and target.name != self.name:
4288 target = target.copy(deep=False)
4289 target.name = self.name
4290 return target
4291
4292 @final
4293 def _reindex_non_unique(
4294 self, target: Index
4295 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp] | None]:
4296 """
4297 Create a new index with target's values (move/add/delete values as
4298 necessary) use with non-unique Index and a possibly non-unique target.
4299
4300 Parameters
4301 ----------
4302 target : an iterable
4303
4304 Returns
4305 -------
4306 new_index : pd.Index
4307 Resulting index.
4308 indexer : np.ndarray[np.intp]
4309 Indices of output values in original index.
4310 new_indexer : np.ndarray[np.intp] or None
4311
4312 """
4313 target = ensure_index(target)
4314 if len(target) == 0:
4315 # GH#13691
4316 return self[:0], np.array([], dtype=np.intp), None
4317
4318 indexer, missing = self.get_indexer_non_unique(target)
4319 check = indexer != -1
4320 new_labels = self.take(indexer[check])
4321 new_indexer = None
4322
4323 if len(missing):
4324 length = np.arange(len(indexer), dtype=np.intp)
4325
4326 missing = ensure_platform_int(missing)
4327 missing_labels = target.take(missing)
4328 missing_indexer = length[~check]
4329 cur_labels = self.take(indexer[check]).values
4330 cur_indexer = length[check]
4331
4332 # Index constructor below will do inference
4333 new_labels = np.empty((len(indexer),), dtype=object)
4334 new_labels[cur_indexer] = cur_labels
4335 new_labels[missing_indexer] = missing_labels
4336
4337 # GH#38906
4338 if not len(self):
4339 new_indexer = np.arange(0, dtype=np.intp)
4340
4341 # a unique indexer
4342 elif target.is_unique:
4343 # see GH5553, make sure we use the right indexer
4344 new_indexer = np.arange(len(indexer), dtype=np.intp)
4345 new_indexer[cur_indexer] = np.arange(len(cur_labels))
4346 new_indexer[missing_indexer] = -1
4347
4348 # we have a non_unique selector, need to use the original
4349 # indexer here
4350 else:
4351 # need to retake to have the same size as the indexer
4352 indexer[~check] = -1
4353
4354 # reset the new indexer to account for the new size
4355 new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp)
4356 new_indexer[~check] = -1
4357
4358 if not isinstance(self, ABCMultiIndex):
4359 new_index = Index(new_labels, name=self.name)
4360 else:
4361 new_index = type(self).from_tuples(new_labels, names=self.names)
4362 return new_index, indexer, new_indexer
4363
4364 # --------------------------------------------------------------------
4365 # Join Methods
4366
4367 @overload
4368 def join(
4369 self,
4370 other: Index,
4371 *,
4372 how: JoinHow = ...,
4373 level: Level = ...,
4374 return_indexers: Literal[True],
4375 sort: bool = ...,
4376 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4377 ...
4378
4379 @overload
4380 def join(
4381 self,
4382 other: Index,
4383 *,
4384 how: JoinHow = ...,
4385 level: Level = ...,
4386 return_indexers: Literal[False] = ...,
4387 sort: bool = ...,
4388 ) -> Index:
4389 ...
4390
4391 @overload
4392 def join(
4393 self,
4394 other: Index,
4395 *,
4396 how: JoinHow = ...,
4397 level: Level = ...,
4398 return_indexers: bool = ...,
4399 sort: bool = ...,
4400 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4401 ...
4402
4403 @final
4404 @_maybe_return_indexers
4405 def join(
4406 self,
4407 other: Index,
4408 *,
4409 how: JoinHow = "left",
4410 level: Level = None,
4411 return_indexers: bool = False,
4412 sort: bool = False,
4413 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4414 """
4415 Compute join_index and indexers to conform data structures to the new index.
4416
4417 Parameters
4418 ----------
4419 other : Index
4420 how : {'left', 'right', 'inner', 'outer'}
4421 level : int or level name, default None
4422 return_indexers : bool, default False
4423 sort : bool, default False
4424 Sort the join keys lexicographically in the result Index. If False,
4425 the order of the join keys depends on the join type (how keyword).
4426
4427 Returns
4428 -------
4429 join_index, (left_indexer, right_indexer)
4430 """
4431 other = ensure_index(other)
4432
4433 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex):
4434 if (self.tz is None) ^ (other.tz is None):
4435 # Raise instead of casting to object below.
4436 raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")
4437
4438 if not self._is_multi and not other._is_multi:
4439 # We have specific handling for MultiIndex below
4440 pself, pother = self._maybe_promote(other)
4441 if pself is not self or pother is not other:
4442 return pself.join(
4443 pother, how=how, level=level, return_indexers=True, sort=sort
4444 )
4445
4446 lindexer: np.ndarray | None
4447 rindexer: np.ndarray | None
4448
4449 # try to figure out the join level
4450 # GH3662
4451 if level is None and (self._is_multi or other._is_multi):
4452 # have the same levels/names so a simple join
4453 if self.names == other.names:
4454 pass
4455 else:
4456 return self._join_multi(other, how=how)
4457
4458 # join on the level
4459 if level is not None and (self._is_multi or other._is_multi):
4460 return self._join_level(other, level, how=how)
4461
4462 if len(other) == 0:
4463 if how in ("left", "outer"):
4464 join_index = self._view()
4465 rindexer = np.broadcast_to(np.intp(-1), len(join_index))
4466 return join_index, None, rindexer
4467 elif how in ("right", "inner", "cross"):
4468 join_index = other._view()
4469 lindexer = np.array([])
4470 return join_index, lindexer, None
4471
4472 if len(self) == 0:
4473 if how in ("right", "outer"):
4474 join_index = other._view()
4475 lindexer = np.broadcast_to(np.intp(-1), len(join_index))
4476 return join_index, lindexer, None
4477 elif how in ("left", "inner", "cross"):
4478 join_index = self._view()
4479 rindexer = np.array([])
4480 return join_index, None, rindexer
4481
4482 if self._join_precedence < other._join_precedence:
4483 flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"}
4484 how = flip.get(how, how)
4485 join_index, lidx, ridx = other.join(
4486 self, how=how, level=level, return_indexers=True
4487 )
4488 lidx, ridx = ridx, lidx
4489 return join_index, lidx, ridx
4490
4491 if not is_dtype_equal(self.dtype, other.dtype):
4492 dtype = self._find_common_type_compat(other)
4493 this = self.astype(dtype, copy=False)
4494 other = other.astype(dtype, copy=False)
4495 return this.join(other, how=how, return_indexers=True)
4496
4497 _validate_join_method(how)
4498
4499 if not self.is_unique and not other.is_unique:
4500 return self._join_non_unique(other, how=how)
4501 elif not self.is_unique or not other.is_unique:
4502 if self.is_monotonic_increasing and other.is_monotonic_increasing:
4503 if not is_interval_dtype(self.dtype):
4504 # otherwise we will fall through to _join_via_get_indexer
4505 # GH#39133
4506 # go through object dtype for ea till engine is supported properly
4507 return self._join_monotonic(other, how=how)
4508 else:
4509 return self._join_non_unique(other, how=how)
4510 elif (
4511 # GH48504: exclude MultiIndex to avoid going through MultiIndex._values
4512 self.is_monotonic_increasing
4513 and other.is_monotonic_increasing
4514 and self._can_use_libjoin
4515 and not isinstance(self, ABCMultiIndex)
4516 and not is_categorical_dtype(self.dtype)
4517 ):
4518 # Categorical is monotonic if data are ordered as categories, but join can
4519 # not handle this in case of not lexicographically monotonic GH#38502
4520 try:
4521 return self._join_monotonic(other, how=how)
4522 except TypeError:
4523 # object dtype; non-comparable objects
4524 pass
4525
4526 return self._join_via_get_indexer(other, how, sort)
4527
4528 @final
4529 def _join_via_get_indexer(
4530 self, other: Index, how: JoinHow, sort: bool
4531 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4532 # Fallback if we do not have any fastpaths available based on
4533 # uniqueness/monotonicity
4534
4535 # Note: at this point we have checked matching dtypes
4536
4537 if how == "left":
4538 join_index = self
4539 elif how == "right":
4540 join_index = other
4541 elif how == "inner":
4542 # TODO: sort=False here for backwards compat. It may
4543 # be better to use the sort parameter passed into join
4544 join_index = self.intersection(other, sort=False)
4545 elif how == "outer":
4546 # TODO: sort=True here for backwards compat. It may
4547 # be better to use the sort parameter passed into join
4548 join_index = self.union(other)
4549
4550 if sort:
4551 join_index = join_index.sort_values()
4552
4553 if join_index is self:
4554 lindexer = None
4555 else:
4556 lindexer = self.get_indexer_for(join_index)
4557 if join_index is other:
4558 rindexer = None
4559 else:
4560 rindexer = other.get_indexer_for(join_index)
4561 return join_index, lindexer, rindexer
4562
4563 @final
4564 def _join_multi(self, other: Index, how: JoinHow):
4565 from pandas.core.indexes.multi import MultiIndex
4566 from pandas.core.reshape.merge import restore_dropped_levels_multijoin
4567
4568 # figure out join names
4569 self_names_list = list(com.not_none(*self.names))
4570 other_names_list = list(com.not_none(*other.names))
4571 self_names_order = self_names_list.index
4572 other_names_order = other_names_list.index
4573 self_names = set(self_names_list)
4574 other_names = set(other_names_list)
4575 overlap = self_names & other_names
4576
4577 # need at least 1 in common
4578 if not overlap:
4579 raise ValueError("cannot join with no overlapping index names")
4580
4581 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
4582 # Drop the non-matching levels from left and right respectively
4583 ldrop_names = sorted(self_names - overlap, key=self_names_order)
4584 rdrop_names = sorted(other_names - overlap, key=other_names_order)
4585
4586 # if only the order differs
4587 if not len(ldrop_names + rdrop_names):
4588 self_jnlevels = self
4589 other_jnlevels = other.reorder_levels(self.names)
4590 else:
4591 self_jnlevels = self.droplevel(ldrop_names)
4592 other_jnlevels = other.droplevel(rdrop_names)
4593
4594 # Join left and right
4595 # Join on same leveled multi-index frames is supported
4596 join_idx, lidx, ridx = self_jnlevels.join(
4597 other_jnlevels, how=how, return_indexers=True
4598 )
4599
4600 # Restore the dropped levels
4601 # Returned index level order is
4602 # common levels, ldrop_names, rdrop_names
4603 dropped_names = ldrop_names + rdrop_names
4604
4605 # error: Argument 5/6 to "restore_dropped_levels_multijoin" has
4606 # incompatible type "Optional[ndarray[Any, dtype[signedinteger[Any
4607 # ]]]]"; expected "ndarray[Any, dtype[signedinteger[Any]]]"
4608 levels, codes, names = restore_dropped_levels_multijoin(
4609 self,
4610 other,
4611 dropped_names,
4612 join_idx,
4613 lidx, # type: ignore[arg-type]
4614 ridx, # type: ignore[arg-type]
4615 )
4616
4617 # Re-create the multi-index
4618 multi_join_idx = MultiIndex(
4619 levels=levels, codes=codes, names=names, verify_integrity=False
4620 )
4621
4622 multi_join_idx = multi_join_idx.remove_unused_levels()
4623
4624 return multi_join_idx, lidx, ridx
4625
4626 jl = list(overlap)[0]
4627
4628 # Case where only one index is multi
4629 # make the indices into mi's that match
4630 flip_order = False
4631 if isinstance(self, MultiIndex):
4632 self, other = other, self
4633 flip_order = True
4634 # flip if join method is right or left
4635 flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"}
4636 how = flip.get(how, how)
4637
4638 level = other.names.index(jl)
4639 result = self._join_level(other, level, how=how)
4640
4641 if flip_order:
4642 return result[0], result[2], result[1]
4643 return result
4644
4645 @final
4646 def _join_non_unique(
4647 self, other: Index, how: JoinHow = "left"
4648 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp]]:
4649 from pandas.core.reshape.merge import get_join_indexers
4650
4651 # We only get here if dtypes match
4652 assert self.dtype == other.dtype
4653
4654 left_idx, right_idx = get_join_indexers(
4655 [self._values], [other._values], how=how, sort=True
4656 )
4657 mask = left_idx == -1
4658
4659 join_idx = self.take(left_idx)
4660 right = other.take(right_idx)
4661 join_index = join_idx.putmask(mask, right)
4662 return join_index, left_idx, right_idx
4663
4664 @final
4665 def _join_level(
4666 self, other: Index, level, how: JoinHow = "left", keep_order: bool = True
4667 ) -> tuple[MultiIndex, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4668 """
4669 The join method *only* affects the level of the resulting
4670 MultiIndex. Otherwise it just exactly aligns the Index data to the
4671 labels of the level in the MultiIndex.
4672
4673 If ```keep_order == True```, the order of the data indexed by the
4674 MultiIndex will not be changed; otherwise, it will tie out
4675 with `other`.
4676 """
4677 from pandas.core.indexes.multi import MultiIndex
4678
4679 def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]:
4680 """
4681 Returns sorter for the inner most level while preserving the
4682 order of higher levels.
4683
4684 Parameters
4685 ----------
4686 labels : list[np.ndarray]
4687 Each ndarray has signed integer dtype, not necessarily identical.
4688
4689 Returns
4690 -------
4691 np.ndarray[np.intp]
4692 """
4693 if labels[0].size == 0:
4694 return np.empty(0, dtype=np.intp)
4695
4696 if len(labels) == 1:
4697 return get_group_index_sorter(ensure_platform_int(labels[0]))
4698
4699 # find indexers of beginning of each set of
4700 # same-key labels w.r.t all but last level
4701 tic = labels[0][:-1] != labels[0][1:]
4702 for lab in labels[1:-1]:
4703 tic |= lab[:-1] != lab[1:]
4704
4705 starts = np.hstack(([True], tic, [True])).nonzero()[0]
4706 lab = ensure_int64(labels[-1])
4707 return lib.get_level_sorter(lab, ensure_platform_int(starts))
4708
4709 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):
4710 raise TypeError("Join on level between two MultiIndex objects is ambiguous")
4711
4712 left, right = self, other
4713
4714 flip_order = not isinstance(self, MultiIndex)
4715 if flip_order:
4716 left, right = right, left
4717 flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"}
4718 how = flip.get(how, how)
4719
4720 assert isinstance(left, MultiIndex)
4721
4722 level = left._get_level_number(level)
4723 old_level = left.levels[level]
4724
4725 if not right.is_unique:
4726 raise NotImplementedError(
4727 "Index._join_level on non-unique index is not implemented"
4728 )
4729
4730 new_level, left_lev_indexer, right_lev_indexer = old_level.join(
4731 right, how=how, return_indexers=True
4732 )
4733
4734 if left_lev_indexer is None:
4735 if keep_order or len(left) == 0:
4736 left_indexer = None
4737 join_index = left
4738 else: # sort the leaves
4739 left_indexer = _get_leaf_sorter(left.codes[: level + 1])
4740 join_index = left[left_indexer]
4741
4742 else:
4743 left_lev_indexer = ensure_platform_int(left_lev_indexer)
4744 rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level))
4745 old_codes = left.codes[level]
4746
4747 taker = old_codes[old_codes != -1]
4748 new_lev_codes = rev_indexer.take(taker)
4749
4750 new_codes = list(left.codes)
4751 new_codes[level] = new_lev_codes
4752
4753 new_levels = list(left.levels)
4754 new_levels[level] = new_level
4755
4756 if keep_order: # just drop missing values. o.w. keep order
4757 left_indexer = np.arange(len(left), dtype=np.intp)
4758 left_indexer = cast(np.ndarray, left_indexer)
4759 mask = new_lev_codes != -1
4760 if not mask.all():
4761 new_codes = [lab[mask] for lab in new_codes]
4762 left_indexer = left_indexer[mask]
4763
4764 else: # tie out the order with other
4765 if level == 0: # outer most level, take the fast route
4766 max_new_lev = 0 if len(new_lev_codes) == 0 else new_lev_codes.max()
4767 ngroups = 1 + max_new_lev
4768 left_indexer, counts = libalgos.groupsort_indexer(
4769 new_lev_codes, ngroups
4770 )
4771
4772 # missing values are placed first; drop them!
4773 left_indexer = left_indexer[counts[0] :]
4774 new_codes = [lab[left_indexer] for lab in new_codes]
4775
4776 else: # sort the leaves
4777 mask = new_lev_codes != -1
4778 mask_all = mask.all()
4779 if not mask_all:
4780 new_codes = [lab[mask] for lab in new_codes]
4781
4782 left_indexer = _get_leaf_sorter(new_codes[: level + 1])
4783 new_codes = [lab[left_indexer] for lab in new_codes]
4784
4785 # left_indexers are w.r.t masked frame.
4786 # reverse to original frame!
4787 if not mask_all:
4788 left_indexer = mask.nonzero()[0][left_indexer]
4789
4790 join_index = MultiIndex(
4791 levels=new_levels,
4792 codes=new_codes,
4793 names=left.names,
4794 verify_integrity=False,
4795 )
4796
4797 if right_lev_indexer is not None:
4798 right_indexer = right_lev_indexer.take(join_index.codes[level])
4799 else:
4800 right_indexer = join_index.codes[level]
4801
4802 if flip_order:
4803 left_indexer, right_indexer = right_indexer, left_indexer
4804
4805 left_indexer = (
4806 None if left_indexer is None else ensure_platform_int(left_indexer)
4807 )
4808 right_indexer = (
4809 None if right_indexer is None else ensure_platform_int(right_indexer)
4810 )
4811 return join_index, left_indexer, right_indexer
4812
4813 @final
4814 def _join_monotonic(
4815 self, other: Index, how: JoinHow = "left"
4816 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:
4817 # We only get here with matching dtypes and both monotonic increasing
4818 assert other.dtype == self.dtype
4819
4820 if self.equals(other):
4821 # This is a convenient place for this check, but its correctness
4822 # does not depend on monotonicity, so it could go earlier
4823 # in the calling method.
4824 ret_index = other if how == "right" else self
4825 return ret_index, None, None
4826
4827 ridx: npt.NDArray[np.intp] | None
4828 lidx: npt.NDArray[np.intp] | None
4829
4830 if self.is_unique and other.is_unique:
4831 # We can perform much better than the general case
4832 if how == "left":
4833 join_index = self
4834 lidx = None
4835 ridx = self._left_indexer_unique(other)
4836 elif how == "right":
4837 join_index = other
4838 lidx = other._left_indexer_unique(self)
4839 ridx = None
4840 elif how == "inner":
4841 join_array, lidx, ridx = self._inner_indexer(other)
4842 join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
4843 elif how == "outer":
4844 join_array, lidx, ridx = self._outer_indexer(other)
4845 join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
4846 else:
4847 if how == "left":
4848 join_array, lidx, ridx = self._left_indexer(other)
4849 elif how == "right":
4850 join_array, ridx, lidx = other._left_indexer(self)
4851 elif how == "inner":
4852 join_array, lidx, ridx = self._inner_indexer(other)
4853 elif how == "outer":
4854 join_array, lidx, ridx = self._outer_indexer(other)
4855
4856 assert lidx is not None
4857 assert ridx is not None
4858
4859 join_index = self._wrap_joined_index(join_array, other, lidx, ridx)
4860
4861 lidx = None if lidx is None else ensure_platform_int(lidx)
4862 ridx = None if ridx is None else ensure_platform_int(ridx)
4863 return join_index, lidx, ridx
4864
4865 def _wrap_joined_index(
4866 self: _IndexT,
4867 joined: ArrayLike,
4868 other: _IndexT,
4869 lidx: npt.NDArray[np.intp],
4870 ridx: npt.NDArray[np.intp],
4871 ) -> _IndexT:
4872 assert other.dtype == self.dtype
4873
4874 if isinstance(self, ABCMultiIndex):
4875 name = self.names if self.names == other.names else None
4876 # error: Incompatible return value type (got "MultiIndex",
4877 # expected "_IndexT")
4878 mask = lidx == -1
4879 join_idx = self.take(lidx)
4880 right = other.take(ridx)
4881 join_index = join_idx.putmask(mask, right)._sort_levels_monotonic()
4882 return join_index.set_names(name) # type: ignore[return-value]
4883 else:
4884 name = get_op_result_name(self, other)
4885 return self._constructor._with_infer(joined, name=name, dtype=self.dtype)
4886
4887 @cache_readonly
4888 def _can_use_libjoin(self) -> bool:
4889 """
4890 Whether we can use the fastpaths implement in _libs.join
4891 """
4892 if type(self) is Index:
4893 # excludes EAs, but include masks, we get here with monotonic
4894 # values only, meaning no NA
4895 return (
4896 isinstance(self.dtype, np.dtype)
4897 or isinstance(self.values, BaseMaskedArray)
4898 or isinstance(self._values, ArrowExtensionArray)
4899 )
4900 return not is_interval_dtype(self.dtype)
4901
4902 # --------------------------------------------------------------------
4903 # Uncategorized Methods
4904
4905 @property
4906 def values(self) -> ArrayLike:
4907 """
4908 Return an array representing the data in the Index.
4909
4910 .. warning::
4911
4912 We recommend using :attr:`Index.array` or
4913 :meth:`Index.to_numpy`, depending on whether you need
4914 a reference to the underlying data or a NumPy array.
4915
4916 Returns
4917 -------
4918 array: numpy.ndarray or ExtensionArray
4919
4920 See Also
4921 --------
4922 Index.array : Reference to the underlying data.
4923 Index.to_numpy : A NumPy array representing the underlying data.
4924 """
4925 return self._data
4926
4927 @cache_readonly
4928 @doc(IndexOpsMixin.array)
4929 def array(self) -> ExtensionArray:
4930 array = self._data
4931 if isinstance(array, np.ndarray):
4932 from pandas.core.arrays.numpy_ import PandasArray
4933
4934 array = PandasArray(array)
4935 return array
4936
4937 @property
4938 def _values(self) -> ExtensionArray | np.ndarray:
4939 """
4940 The best array representation.
4941
4942 This is an ndarray or ExtensionArray.
4943
4944 ``_values`` are consistent between ``Series`` and ``Index``.
4945
4946 It may differ from the public '.values' method.
4947
4948 index | values | _values |
4949 ----------------- | --------------- | ------------- |
4950 Index | ndarray | ndarray |
4951 CategoricalIndex | Categorical | Categorical |
4952 DatetimeIndex | ndarray[M8ns] | DatetimeArray |
4953 DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray |
4954 PeriodIndex | ndarray[object] | PeriodArray |
4955 IntervalIndex | IntervalArray | IntervalArray |
4956
4957 See Also
4958 --------
4959 values : Values
4960 """
4961 return self._data
4962
4963 def _get_engine_target(self) -> ArrayLike:
4964 """
4965 Get the ndarray or ExtensionArray that we can pass to the IndexEngine
4966 constructor.
4967 """
4968 vals = self._values
4969 if isinstance(vals, StringArray):
4970 # GH#45652 much more performant than ExtensionEngine
4971 return vals._ndarray
4972 if (
4973 type(self) is Index
4974 and isinstance(self._values, ExtensionArray)
4975 and not isinstance(self._values, BaseMaskedArray)
4976 and not (
4977 isinstance(self._values, ArrowExtensionArray)
4978 and is_numeric_dtype(self.dtype)
4979 # Exclude decimal
4980 and self.dtype.kind != "O"
4981 )
4982 ):
4983 # TODO(ExtensionIndex): remove special-case, just use self._values
4984 return self._values.astype(object)
4985 return vals
4986
4987 def _get_join_target(self) -> ArrayLike:
4988 """
4989 Get the ndarray or ExtensionArray that we can pass to the join
4990 functions.
4991 """
4992 if isinstance(self._values, BaseMaskedArray):
4993 # This is only used if our array is monotonic, so no NAs present
4994 return self._values._data
4995 elif isinstance(self._values, ArrowExtensionArray):
4996 # This is only used if our array is monotonic, so no missing values
4997 # present
4998 return self._values.to_numpy()
4999 return self._get_engine_target()
5000
5001 def _from_join_target(self, result: np.ndarray) -> ArrayLike:
5002 """
5003 Cast the ndarray returned from one of the libjoin.foo_indexer functions
5004 back to type(self)._data.
5005 """
5006 if isinstance(self.values, BaseMaskedArray):
5007 return type(self.values)(result, np.zeros(result.shape, dtype=np.bool_))
5008 elif isinstance(self.values, ArrowExtensionArray):
5009 return type(self.values)._from_sequence(result)
5010 return result
5011
5012 @doc(IndexOpsMixin._memory_usage)
5013 def memory_usage(self, deep: bool = False) -> int:
5014 result = self._memory_usage(deep=deep)
5015
5016 # include our engine hashtable
5017 result += self._engine.sizeof(deep=deep)
5018 return result
5019
5020 @final
5021 def where(self, cond, other=None) -> Index:
5022 """
5023 Replace values where the condition is False.
5024
5025 The replacement is taken from other.
5026
5027 Parameters
5028 ----------
5029 cond : bool array-like with the same length as self
5030 Condition to select the values on.
5031 other : scalar, or array-like, default None
5032 Replacement if the condition is False.
5033
5034 Returns
5035 -------
5036 pandas.Index
5037 A copy of self with values replaced from other
5038 where the condition is False.
5039
5040 See Also
5041 --------
5042 Series.where : Same method for Series.
5043 DataFrame.where : Same method for DataFrame.
5044
5045 Examples
5046 --------
5047 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor'])
5048 >>> idx
5049 Index(['car', 'bike', 'train', 'tractor'], dtype='object')
5050 >>> idx.where(idx.isin(['car', 'train']), 'other')
5051 Index(['car', 'other', 'train', 'other'], dtype='object')
5052 """
5053 if isinstance(self, ABCMultiIndex):
5054 raise NotImplementedError(
5055 ".where is not supported for MultiIndex operations"
5056 )
5057 cond = np.asarray(cond, dtype=bool)
5058 return self.putmask(~cond, other)
5059
5060 # construction helpers
5061 @final
5062 @classmethod
5063 def _raise_scalar_data_error(cls, data):
5064 # We return the TypeError so that we can raise it from the constructor
5065 # in order to keep mypy happy
5066 raise TypeError(
5067 f"{cls.__name__}(...) must be called with a collection of some "
5068 f"kind, {repr(data)} was passed"
5069 )
5070
5071 def _validate_fill_value(self, value):
5072 """
5073 Check if the value can be inserted into our array without casting,
5074 and convert it to an appropriate native type if necessary.
5075
5076 Raises
5077 ------
5078 TypeError
5079 If the value cannot be inserted into an array of this dtype.
5080 """
5081 dtype = self.dtype
5082 if isinstance(dtype, np.dtype) and dtype.kind not in ["m", "M"]:
5083 # return np_can_hold_element(dtype, value)
5084 try:
5085 return np_can_hold_element(dtype, value)
5086 except LossySetitemError as err:
5087 # re-raise as TypeError for consistency
5088 raise TypeError from err
5089 elif not can_hold_element(self._values, value):
5090 raise TypeError
5091 return value
5092
5093 @final
5094 def _require_scalar(self, value):
5095 """
5096 Check that this is a scalar value that we can use for setitem-like
5097 operations without changing dtype.
5098 """
5099 if not is_scalar(value):
5100 raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}")
5101 return value
5102
5103 def _is_memory_usage_qualified(self) -> bool:
5104 """
5105 Return a boolean if we need a qualified .info display.
5106 """
5107 return is_object_dtype(self.dtype)
5108
5109 def __contains__(self, key: Any) -> bool:
5110 """
5111 Return a boolean indicating whether the provided key is in the index.
5112
5113 Parameters
5114 ----------
5115 key : label
5116 The key to check if it is present in the index.
5117
5118 Returns
5119 -------
5120 bool
5121 Whether the key search is in the index.
5122
5123 Raises
5124 ------
5125 TypeError
5126 If the key is not hashable.
5127
5128 See Also
5129 --------
5130 Index.isin : Returns an ndarray of boolean dtype indicating whether the
5131 list-like key is in the index.
5132
5133 Examples
5134 --------
5135 >>> idx = pd.Index([1, 2, 3, 4])
5136 >>> idx
5137 Index([1, 2, 3, 4], dtype='int64')
5138
5139 >>> 2 in idx
5140 True
5141 >>> 6 in idx
5142 False
5143 """
5144 hash(key)
5145 try:
5146 return key in self._engine
5147 except (OverflowError, TypeError, ValueError):
5148 return False
5149
5150 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318
5151 # Incompatible types in assignment (expression has type "None", base class
5152 # "object" defined the type as "Callable[[object], int]")
5153 __hash__: ClassVar[None] # type: ignore[assignment]
5154
5155 @final
5156 def __setitem__(self, key, value):
5157 raise TypeError("Index does not support mutable operations")
5158
5159 def __getitem__(self, key):
5160 """
5161 Override numpy.ndarray's __getitem__ method to work as desired.
5162
5163 This function adds lists and Series as valid boolean indexers
5164 (ndarrays only supports ndarray with dtype=bool).
5165
5166 If resulting ndim != 1, plain ndarray is returned instead of
5167 corresponding `Index` subclass.
5168
5169 """
5170 getitem = self._data.__getitem__
5171
5172 if is_integer(key) or is_float(key):
5173 # GH#44051 exclude bool, which would return a 2d ndarray
5174 key = com.cast_scalar_indexer(key)
5175 return getitem(key)
5176
5177 if isinstance(key, slice):
5178 # This case is separated from the conditional above to avoid
5179 # pessimization com.is_bool_indexer and ndim checks.
5180 result = getitem(key)
5181 # Going through simple_new for performance.
5182 return type(self)._simple_new(
5183 result, name=self._name, refs=self._references
5184 )
5185
5186 if com.is_bool_indexer(key):
5187 # if we have list[bools, length=1e5] then doing this check+convert
5188 # takes 166 µs + 2.1 ms and cuts the ndarray.__getitem__
5189 # time below from 3.8 ms to 496 µs
5190 # if we already have ndarray[bool], the overhead is 1.4 µs or .25%
5191 if is_extension_array_dtype(getattr(key, "dtype", None)):
5192 key = key.to_numpy(dtype=bool, na_value=False)
5193 else:
5194 key = np.asarray(key, dtype=bool)
5195
5196 result = getitem(key)
5197 # Because we ruled out integer above, we always get an arraylike here
5198 if result.ndim > 1:
5199 disallow_ndim_indexing(result)
5200
5201 # NB: Using _constructor._simple_new would break if MultiIndex
5202 # didn't override __getitem__
5203 return self._constructor._simple_new(result, name=self._name)
5204
5205 def _getitem_slice(self: _IndexT, slobj: slice) -> _IndexT:
5206 """
5207 Fastpath for __getitem__ when we know we have a slice.
5208 """
5209 res = self._data[slobj]
5210 return type(self)._simple_new(res, name=self._name, refs=self._references)
5211
5212 @final
5213 def _can_hold_identifiers_and_holds_name(self, name) -> bool:
5214 """
5215 Faster check for ``name in self`` when we know `name` is a Python
5216 identifier (e.g. in NDFrame.__getattr__, which hits this to support
5217 . key lookup). For indexes that can't hold identifiers (everything
5218 but object & categorical) we just return False.
5219
5220 https://github.com/pandas-dev/pandas/issues/19764
5221 """
5222 if (
5223 is_object_dtype(self.dtype)
5224 or is_string_dtype(self.dtype)
5225 or is_categorical_dtype(self.dtype)
5226 ):
5227 return name in self
5228 return False
5229
5230 def append(self, other: Index | Sequence[Index]) -> Index:
5231 """
5232 Append a collection of Index options together.
5233
5234 Parameters
5235 ----------
5236 other : Index or list/tuple of indices
5237
5238 Returns
5239 -------
5240 Index
5241 """
5242 to_concat = [self]
5243
5244 if isinstance(other, (list, tuple)):
5245 to_concat += list(other)
5246 else:
5247 # error: Argument 1 to "append" of "list" has incompatible type
5248 # "Union[Index, Sequence[Index]]"; expected "Index"
5249 to_concat.append(other) # type: ignore[arg-type]
5250
5251 for obj in to_concat:
5252 if not isinstance(obj, Index):
5253 raise TypeError("all inputs must be Index")
5254
5255 names = {obj.name for obj in to_concat}
5256 name = None if len(names) > 1 else self.name
5257
5258 return self._concat(to_concat, name)
5259
5260 def _concat(self, to_concat: list[Index], name: Hashable) -> Index:
5261 """
5262 Concatenate multiple Index objects.
5263 """
5264 to_concat_vals = [x._values for x in to_concat]
5265
5266 result = concat_compat(to_concat_vals)
5267
5268 return Index._with_infer(result, name=name)
5269
5270 def putmask(self, mask, value) -> Index:
5271 """
5272 Return a new Index of the values set with the mask.
5273
5274 Returns
5275 -------
5276 Index
5277
5278 See Also
5279 --------
5280 numpy.ndarray.putmask : Changes elements of an array
5281 based on conditional and input values.
5282 """
5283 mask, noop = validate_putmask(self._values, mask)
5284 if noop:
5285 return self.copy()
5286
5287 if self.dtype != object and is_valid_na_for_dtype(value, self.dtype):
5288 # e.g. None -> np.nan, see also Block._standardize_fill_value
5289 value = self._na_value
5290
5291 try:
5292 converted = self._validate_fill_value(value)
5293 except (LossySetitemError, ValueError, TypeError) as err:
5294 if is_object_dtype(self): # pragma: no cover
5295 raise err
5296
5297 # See also: Block.coerce_to_target_dtype
5298 dtype = self._find_common_type_compat(value)
5299 return self.astype(dtype).putmask(mask, value)
5300
5301 values = self._values.copy()
5302
5303 if isinstance(values, np.ndarray):
5304 converted = setitem_datetimelike_compat(values, mask.sum(), converted)
5305 np.putmask(values, mask, converted)
5306
5307 else:
5308 # Note: we use the original value here, not converted, as
5309 # _validate_fill_value is not idempotent
5310 values._putmask(mask, value)
5311
5312 return self._shallow_copy(values)
5313
5314 def equals(self, other: Any) -> bool:
5315 """
5316 Determine if two Index object are equal.
5317
5318 The things that are being compared are:
5319
5320 * The elements inside the Index object.
5321 * The order of the elements inside the Index object.
5322
5323 Parameters
5324 ----------
5325 other : Any
5326 The other object to compare against.
5327
5328 Returns
5329 -------
5330 bool
5331 True if "other" is an Index and it has the same elements and order
5332 as the calling index; False otherwise.
5333
5334 Examples
5335 --------
5336 >>> idx1 = pd.Index([1, 2, 3])
5337 >>> idx1
5338 Index([1, 2, 3], dtype='int64')
5339 >>> idx1.equals(pd.Index([1, 2, 3]))
5340 True
5341
5342 The elements inside are compared
5343
5344 >>> idx2 = pd.Index(["1", "2", "3"])
5345 >>> idx2
5346 Index(['1', '2', '3'], dtype='object')
5347
5348 >>> idx1.equals(idx2)
5349 False
5350
5351 The order is compared
5352
5353 >>> ascending_idx = pd.Index([1, 2, 3])
5354 >>> ascending_idx
5355 Index([1, 2, 3], dtype='int64')
5356 >>> descending_idx = pd.Index([3, 2, 1])
5357 >>> descending_idx
5358 Index([3, 2, 1], dtype='int64')
5359 >>> ascending_idx.equals(descending_idx)
5360 False
5361
5362 The dtype is *not* compared
5363
5364 >>> int64_idx = pd.Index([1, 2, 3], dtype='int64')
5365 >>> int64_idx
5366 Index([1, 2, 3], dtype='int64')
5367 >>> uint64_idx = pd.Index([1, 2, 3], dtype='uint64')
5368 >>> uint64_idx
5369 Index([1, 2, 3], dtype='uint64')
5370 >>> int64_idx.equals(uint64_idx)
5371 True
5372 """
5373 if self.is_(other):
5374 return True
5375
5376 if not isinstance(other, Index):
5377 return False
5378
5379 if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype):
5380 # if other is not object, use other's logic for coercion
5381 return other.equals(self)
5382
5383 if isinstance(other, ABCMultiIndex):
5384 # d-level MultiIndex can equal d-tuple Index
5385 return other.equals(self)
5386
5387 if isinstance(self._values, ExtensionArray):
5388 # Dispatch to the ExtensionArray's .equals method.
5389 if not isinstance(other, type(self)):
5390 return False
5391
5392 earr = cast(ExtensionArray, self._data)
5393 return earr.equals(other._data)
5394
5395 if is_extension_array_dtype(other.dtype):
5396 # All EA-backed Index subclasses override equals
5397 return other.equals(self)
5398
5399 return array_equivalent(self._values, other._values)
5400
5401 @final
5402 def identical(self, other) -> bool:
5403 """
5404 Similar to equals, but checks that object attributes and types are also equal.
5405
5406 Returns
5407 -------
5408 bool
5409 If two Index objects have equal elements and same type True,
5410 otherwise False.
5411 """
5412 return (
5413 self.equals(other)
5414 and all(
5415 getattr(self, c, None) == getattr(other, c, None)
5416 for c in self._comparables
5417 )
5418 and type(self) == type(other)
5419 and self.dtype == other.dtype
5420 )
5421
5422 @final
5423 def asof(self, label):
5424 """
5425 Return the label from the index, or, if not present, the previous one.
5426
5427 Assuming that the index is sorted, return the passed index label if it
5428 is in the index, or return the previous index label if the passed one
5429 is not in the index.
5430
5431 Parameters
5432 ----------
5433 label : object
5434 The label up to which the method returns the latest index label.
5435
5436 Returns
5437 -------
5438 object
5439 The passed label if it is in the index. The previous label if the
5440 passed label is not in the sorted index or `NaN` if there is no
5441 such label.
5442
5443 See Also
5444 --------
5445 Series.asof : Return the latest value in a Series up to the
5446 passed index.
5447 merge_asof : Perform an asof merge (similar to left join but it
5448 matches on nearest key rather than equal key).
5449 Index.get_loc : An `asof` is a thin wrapper around `get_loc`
5450 with method='pad'.
5451
5452 Examples
5453 --------
5454 `Index.asof` returns the latest index label up to the passed label.
5455
5456 >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03'])
5457 >>> idx.asof('2014-01-01')
5458 '2013-12-31'
5459
5460 If the label is in the index, the method returns the passed label.
5461
5462 >>> idx.asof('2014-01-02')
5463 '2014-01-02'
5464
5465 If all of the labels in the index are later than the passed label,
5466 NaN is returned.
5467
5468 >>> idx.asof('1999-01-02')
5469 nan
5470
5471 If the index is not sorted, an error is raised.
5472
5473 >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02',
5474 ... '2014-01-03'])
5475 >>> idx_not_sorted.asof('2013-12-31')
5476 Traceback (most recent call last):
5477 ValueError: index must be monotonic increasing or decreasing
5478 """
5479 self._searchsorted_monotonic(label) # validate sortedness
5480 try:
5481 loc = self.get_loc(label)
5482 except (KeyError, TypeError):
5483 # KeyError -> No exact match, try for padded
5484 # TypeError -> passed e.g. non-hashable, fall through to get
5485 # the tested exception message
5486 indexer = self.get_indexer([label], method="pad")
5487 if indexer.ndim > 1 or indexer.size > 1:
5488 raise TypeError("asof requires scalar valued input")
5489 loc = indexer.item()
5490 if loc == -1:
5491 return self._na_value
5492 else:
5493 if isinstance(loc, slice):
5494 loc = loc.indices(len(self))[-1]
5495
5496 return self[loc]
5497
5498 def asof_locs(
5499 self, where: Index, mask: npt.NDArray[np.bool_]
5500 ) -> npt.NDArray[np.intp]:
5501 """
5502 Return the locations (indices) of labels in the index.
5503
5504 As in the `asof` function, if the label (a particular entry in
5505 `where`) is not in the index, the latest index label up to the
5506 passed label is chosen and its index returned.
5507
5508 If all of the labels in the index are later than a label in `where`,
5509 -1 is returned.
5510
5511 `mask` is used to ignore NA values in the index during calculation.
5512
5513 Parameters
5514 ----------
5515 where : Index
5516 An Index consisting of an array of timestamps.
5517 mask : np.ndarray[bool]
5518 Array of booleans denoting where values in the original
5519 data are not NA.
5520
5521 Returns
5522 -------
5523 np.ndarray[np.intp]
5524 An array of locations (indices) of the labels from the Index
5525 which correspond to the return values of the `asof` function
5526 for every element in `where`.
5527 """
5528 # error: No overload variant of "searchsorted" of "ndarray" matches argument
5529 # types "Union[ExtensionArray, ndarray[Any, Any]]", "str"
5530 # TODO: will be fixed when ExtensionArray.searchsorted() is fixed
5531 locs = self._values[mask].searchsorted(
5532 where._values, side="right" # type: ignore[call-overload]
5533 )
5534 locs = np.where(locs > 0, locs - 1, 0)
5535
5536 result = np.arange(len(self), dtype=np.intp)[mask].take(locs)
5537
5538 first_value = self._values[mask.argmax()]
5539 result[(locs == 0) & (where._values < first_value)] = -1
5540
5541 return result
5542
5543 def sort_values(
5544 self,
5545 return_indexer: bool = False,
5546 ascending: bool = True,
5547 na_position: str_t = "last",
5548 key: Callable | None = None,
5549 ):
5550 """
5551 Return a sorted copy of the index.
5552
5553 Return a sorted copy of the index, and optionally return the indices
5554 that sorted the index itself.
5555
5556 Parameters
5557 ----------
5558 return_indexer : bool, default False
5559 Should the indices that would sort the index be returned.
5560 ascending : bool, default True
5561 Should the index values be sorted in an ascending order.
5562 na_position : {'first' or 'last'}, default 'last'
5563 Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
5564 the end.
5565
5566 .. versionadded:: 1.2.0
5567
5568 key : callable, optional
5569 If not None, apply the key function to the index values
5570 before sorting. This is similar to the `key` argument in the
5571 builtin :meth:`sorted` function, with the notable difference that
5572 this `key` function should be *vectorized*. It should expect an
5573 ``Index`` and return an ``Index`` of the same shape.
5574
5575 .. versionadded:: 1.1.0
5576
5577 Returns
5578 -------
5579 sorted_index : pandas.Index
5580 Sorted copy of the index.
5581 indexer : numpy.ndarray, optional
5582 The indices that the index itself was sorted by.
5583
5584 See Also
5585 --------
5586 Series.sort_values : Sort values of a Series.
5587 DataFrame.sort_values : Sort values in a DataFrame.
5588
5589 Examples
5590 --------
5591 >>> idx = pd.Index([10, 100, 1, 1000])
5592 >>> idx
5593 Index([10, 100, 1, 1000], dtype='int64')
5594
5595 Sort values in ascending order (default behavior).
5596
5597 >>> idx.sort_values()
5598 Index([1, 10, 100, 1000], dtype='int64')
5599
5600 Sort values in descending order, and also get the indices `idx` was
5601 sorted by.
5602
5603 >>> idx.sort_values(ascending=False, return_indexer=True)
5604 (Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2]))
5605 """
5606 idx = ensure_key_mapped(self, key)
5607
5608 # GH 35584. Sort missing values according to na_position kwarg
5609 # ignore na_position for MultiIndex
5610 if not isinstance(self, ABCMultiIndex):
5611 _as = nargsort(
5612 items=idx, ascending=ascending, na_position=na_position, key=key
5613 )
5614 else:
5615 _as = idx.argsort()
5616 if not ascending:
5617 _as = _as[::-1]
5618
5619 sorted_index = self.take(_as)
5620
5621 if return_indexer:
5622 return sorted_index, _as
5623 else:
5624 return sorted_index
5625
5626 @final
5627 def sort(self, *args, **kwargs):
5628 """
5629 Use sort_values instead.
5630 """
5631 raise TypeError("cannot sort an Index object in-place, use sort_values instead")
5632
5633 def shift(self, periods: int = 1, freq=None):
5634 """
5635 Shift index by desired number of time frequency increments.
5636
5637 This method is for shifting the values of datetime-like indexes
5638 by a specified time increment a given number of times.
5639
5640 Parameters
5641 ----------
5642 periods : int, default 1
5643 Number of periods (or increments) to shift by,
5644 can be positive or negative.
5645 freq : pandas.DateOffset, pandas.Timedelta or str, optional
5646 Frequency increment to shift by.
5647 If None, the index is shifted by its own `freq` attribute.
5648 Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.
5649
5650 Returns
5651 -------
5652 pandas.Index
5653 Shifted index.
5654
5655 See Also
5656 --------
5657 Series.shift : Shift values of Series.
5658
5659 Notes
5660 -----
5661 This method is only implemented for datetime-like index classes,
5662 i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex.
5663
5664 Examples
5665 --------
5666 Put the first 5 month starts of 2011 into an index.
5667
5668 >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS')
5669 >>> month_starts
5670 DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01',
5671 '2011-05-01'],
5672 dtype='datetime64[ns]', freq='MS')
5673
5674 Shift the index by 10 days.
5675
5676 >>> month_starts.shift(10, freq='D')
5677 DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11',
5678 '2011-05-11'],
5679 dtype='datetime64[ns]', freq=None)
5680
5681 The default value of `freq` is the `freq` attribute of the index,
5682 which is 'MS' (month start) in this example.
5683
5684 >>> month_starts.shift(10)
5685 DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01',
5686 '2012-03-01'],
5687 dtype='datetime64[ns]', freq='MS')
5688 """
5689 raise NotImplementedError(
5690 f"This method is only implemented for DatetimeIndex, PeriodIndex and "
5691 f"TimedeltaIndex; Got type {type(self).__name__}"
5692 )
5693
5694 def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:
5695 """
5696 Return the integer indices that would sort the index.
5697
5698 Parameters
5699 ----------
5700 *args
5701 Passed to `numpy.ndarray.argsort`.
5702 **kwargs
5703 Passed to `numpy.ndarray.argsort`.
5704
5705 Returns
5706 -------
5707 np.ndarray[np.intp]
5708 Integer indices that would sort the index if used as
5709 an indexer.
5710
5711 See Also
5712 --------
5713 numpy.argsort : Similar method for NumPy arrays.
5714 Index.sort_values : Return sorted copy of Index.
5715
5716 Examples
5717 --------
5718 >>> idx = pd.Index(['b', 'a', 'd', 'c'])
5719 >>> idx
5720 Index(['b', 'a', 'd', 'c'], dtype='object')
5721
5722 >>> order = idx.argsort()
5723 >>> order
5724 array([1, 0, 3, 2])
5725
5726 >>> idx[order]
5727 Index(['a', 'b', 'c', 'd'], dtype='object')
5728 """
5729 # This works for either ndarray or EA, is overridden
5730 # by RangeIndex, MultIIndex
5731 return self._data.argsort(*args, **kwargs)
5732
5733 def _check_indexing_error(self, key):
5734 if not is_scalar(key):
5735 # if key is not a scalar, directly raise an error (the code below
5736 # would convert to numpy arrays and raise later any way) - GH29926
5737 raise InvalidIndexError(key)
5738
5739 @cache_readonly
5740 def _should_fallback_to_positional(self) -> bool:
5741 """
5742 Should an integer key be treated as positional?
5743 """
5744 return self.inferred_type not in {
5745 "integer",
5746 "mixed-integer",
5747 "floating",
5748 "complex",
5749 }
5750
5751 _index_shared_docs[
5752 "get_indexer_non_unique"
5753 ] = """
5754 Compute indexer and mask for new index given the current index.
5755
5756 The indexer should be then used as an input to ndarray.take to align the
5757 current data to the new index.
5758
5759 Parameters
5760 ----------
5761 target : %(target_klass)s
5762
5763 Returns
5764 -------
5765 indexer : np.ndarray[np.intp]
5766 Integers from 0 to n - 1 indicating that the index at these
5767 positions matches the corresponding target values. Missing values
5768 in the target are marked by -1.
5769 missing : np.ndarray[np.intp]
5770 An indexer into the target of the values not found.
5771 These correspond to the -1 in the indexer array.
5772
5773 Examples
5774 --------
5775 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])
5776 >>> index.get_indexer_non_unique(['b', 'b'])
5777 (array([1, 3, 4, 1, 3, 4]), array([], dtype=int64))
5778
5779 In the example below there are no matched values.
5780
5781 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])
5782 >>> index.get_indexer_non_unique(['q', 'r', 't'])
5783 (array([-1, -1, -1]), array([0, 1, 2]))
5784
5785 For this reason, the returned ``indexer`` contains only integers equal to -1.
5786 It demonstrates that there's no match between the index and the ``target``
5787 values at these positions. The mask [0, 1, 2] in the return value shows that
5788 the first, second, and third elements are missing.
5789
5790 Notice that the return value is a tuple contains two items. In the example
5791 below the first item is an array of locations in ``index``. The second
5792 item is a mask shows that the first and third elements are missing.
5793
5794 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])
5795 >>> index.get_indexer_non_unique(['f', 'b', 's'])
5796 (array([-1, 1, 3, 4, -1]), array([0, 2]))
5797 """
5798
5799 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
5800 def get_indexer_non_unique(
5801 self, target
5802 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
5803 target = ensure_index(target)
5804 target = self._maybe_cast_listlike_indexer(target)
5805
5806 if not self._should_compare(target) and not self._should_partial_index(target):
5807 # _should_partial_index e.g. IntervalIndex with numeric scalars
5808 # that can be matched to Interval scalars.
5809 return self._get_indexer_non_comparable(target, method=None, unique=False)
5810
5811 pself, ptarget = self._maybe_promote(target)
5812 if pself is not self or ptarget is not target:
5813 return pself.get_indexer_non_unique(ptarget)
5814
5815 if not is_dtype_equal(self.dtype, target.dtype):
5816 # TODO: if object, could use infer_dtype to preempt costly
5817 # conversion if still non-comparable?
5818 dtype = self._find_common_type_compat(target)
5819
5820 this = self.astype(dtype, copy=False)
5821 that = target.astype(dtype, copy=False)
5822 return this.get_indexer_non_unique(that)
5823
5824 # TODO: get_indexer has fastpaths for both Categorical-self and
5825 # Categorical-target. Can we do something similar here?
5826
5827 # Note: _maybe_promote ensures we never get here with MultiIndex
5828 # self and non-Multi target
5829 tgt_values = target._get_engine_target()
5830 if self._is_multi and target._is_multi:
5831 engine = self._engine
5832 # Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]" has
5833 # no attribute "_extract_level_codes"
5834 tgt_values = engine._extract_level_codes(target) # type: ignore[union-attr]
5835
5836 indexer, missing = self._engine.get_indexer_non_unique(tgt_values)
5837 return ensure_platform_int(indexer), ensure_platform_int(missing)
5838
5839 @final
5840 def get_indexer_for(self, target) -> npt.NDArray[np.intp]:
5841 """
5842 Guaranteed return of an indexer even when non-unique.
5843
5844 This dispatches to get_indexer or get_indexer_non_unique
5845 as appropriate.
5846
5847 Returns
5848 -------
5849 np.ndarray[np.intp]
5850 List of indices.
5851
5852 Examples
5853 --------
5854 >>> idx = pd.Index([np.nan, 'var1', np.nan])
5855 >>> idx.get_indexer_for([np.nan])
5856 array([0, 2])
5857 """
5858 if self._index_as_unique:
5859 return self.get_indexer(target)
5860 indexer, _ = self.get_indexer_non_unique(target)
5861 return indexer
5862
5863 def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]:
5864 """
5865 Analogue to get_indexer that raises if any elements are missing.
5866 """
5867 keyarr = key
5868 if not isinstance(keyarr, Index):
5869 keyarr = com.asarray_tuplesafe(keyarr)
5870
5871 if self._index_as_unique:
5872 indexer = self.get_indexer_for(keyarr)
5873 keyarr = self.reindex(keyarr)[0]
5874 else:
5875 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
5876
5877 self._raise_if_missing(keyarr, indexer, axis_name)
5878
5879 keyarr = self.take(indexer)
5880 if isinstance(key, Index):
5881 # GH 42790 - Preserve name from an Index
5882 keyarr.name = key.name
5883 if (
5884 isinstance(keyarr.dtype, np.dtype) and keyarr.dtype.kind in ["m", "M"]
5885 ) or isinstance(keyarr.dtype, DatetimeTZDtype):
5886 # DTI/TDI.take can infer a freq in some cases when we dont want one
5887 if isinstance(key, list) or (
5888 isinstance(key, type(self))
5889 # "Index" has no attribute "freq"
5890 and key.freq is None # type: ignore[attr-defined]
5891 ):
5892 keyarr = keyarr._with_freq(None)
5893
5894 return keyarr, indexer
5895
5896 def _raise_if_missing(self, key, indexer, axis_name: str_t) -> None:
5897 """
5898 Check that indexer can be used to return a result.
5899
5900 e.g. at least one element was found,
5901 unless the list of keys was actually empty.
5902
5903 Parameters
5904 ----------
5905 key : list-like
5906 Targeted labels (only used to show correct error message).
5907 indexer: array-like of booleans
5908 Indices corresponding to the key,
5909 (with -1 indicating not found).
5910 axis_name : str
5911
5912 Raises
5913 ------
5914 KeyError
5915 If at least one key was requested but none was found.
5916 """
5917 if len(key) == 0:
5918 return
5919
5920 # Count missing values
5921 missing_mask = indexer < 0
5922 nmissing = missing_mask.sum()
5923
5924 if nmissing:
5925 # TODO: remove special-case; this is just to keep exception
5926 # message tests from raising while debugging
5927 use_interval_msg = is_interval_dtype(self.dtype) or (
5928 is_categorical_dtype(self.dtype)
5929 # "Index" has no attribute "categories" [attr-defined]
5930 and is_interval_dtype(
5931 self.categories.dtype # type: ignore[attr-defined]
5932 )
5933 )
5934
5935 if nmissing == len(indexer):
5936 if use_interval_msg:
5937 key = list(key)
5938 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
5939
5940 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())
5941 raise KeyError(f"{not_found} not in index")
5942
5943 @overload
5944 def _get_indexer_non_comparable(
5945 self, target: Index, method, unique: Literal[True] = ...
5946 ) -> npt.NDArray[np.intp]:
5947 ...
5948
5949 @overload
5950 def _get_indexer_non_comparable(
5951 self, target: Index, method, unique: Literal[False]
5952 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
5953 ...
5954
5955 @overload
5956 def _get_indexer_non_comparable(
5957 self, target: Index, method, unique: bool = True
5958 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
5959 ...
5960
5961 @final
5962 def _get_indexer_non_comparable(
5963 self, target: Index, method, unique: bool = True
5964 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
5965 """
5966 Called from get_indexer or get_indexer_non_unique when the target
5967 is of a non-comparable dtype.
5968
5969 For get_indexer lookups with method=None, get_indexer is an _equality_
5970 check, so non-comparable dtypes mean we will always have no matches.
5971
5972 For get_indexer lookups with a method, get_indexer is an _inequality_
5973 check, so non-comparable dtypes mean we will always raise TypeError.
5974
5975 Parameters
5976 ----------
5977 target : Index
5978 method : str or None
5979 unique : bool, default True
5980 * True if called from get_indexer.
5981 * False if called from get_indexer_non_unique.
5982
5983 Raises
5984 ------
5985 TypeError
5986 If doing an inequality check, i.e. method is not None.
5987 """
5988 if method is not None:
5989 other = _unpack_nested_dtype(target)
5990 raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}")
5991
5992 no_matches = -1 * np.ones(target.shape, dtype=np.intp)
5993 if unique:
5994 # This is for get_indexer
5995 return no_matches
5996 else:
5997 # This is for get_indexer_non_unique
5998 missing = np.arange(len(target), dtype=np.intp)
5999 return no_matches, missing
6000
6001 @property
6002 def _index_as_unique(self) -> bool:
6003 """
6004 Whether we should treat this as unique for the sake of
6005 get_indexer vs get_indexer_non_unique.
6006
6007 For IntervalIndex compat.
6008 """
6009 return self.is_unique
6010
6011 _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects"
6012
6013 @final
6014 def _maybe_promote(self, other: Index) -> tuple[Index, Index]:
6015 """
6016 When dealing with an object-dtype Index and a non-object Index, see
6017 if we can upcast the object-dtype one to improve performance.
6018 """
6019
6020 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex):
6021 if (
6022 self.tz is not None
6023 and other.tz is not None
6024 and not tz_compare(self.tz, other.tz)
6025 ):
6026 # standardize on UTC
6027 return self.tz_convert("UTC"), other.tz_convert("UTC")
6028
6029 elif self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex):
6030 try:
6031 return type(other)(self), other
6032 except OutOfBoundsDatetime:
6033 return self, other
6034 elif self.inferred_type == "timedelta" and isinstance(other, ABCTimedeltaIndex):
6035 # TODO: we dont have tests that get here
6036 return type(other)(self), other
6037
6038 elif self.dtype.kind == "u" and other.dtype.kind == "i":
6039 # GH#41873
6040 if other.min() >= 0:
6041 # lookup min as it may be cached
6042 # TODO: may need itemsize check if we have non-64-bit Indexes
6043 return self, other.astype(self.dtype)
6044
6045 elif self._is_multi and not other._is_multi:
6046 try:
6047 # "Type[Index]" has no attribute "from_tuples"
6048 other = type(self).from_tuples(other) # type: ignore[attr-defined]
6049 except (TypeError, ValueError):
6050 # let's instead try with a straight Index
6051 self = Index(self._values)
6052
6053 if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):
6054 # Reverse op so we dont need to re-implement on the subclasses
6055 other, self = other._maybe_promote(self)
6056
6057 return self, other
6058
6059 @final
6060 def _find_common_type_compat(self, target) -> DtypeObj:
6061 """
6062 Implementation of find_common_type that adjusts for Index-specific
6063 special cases.
6064 """
6065 target_dtype, _ = infer_dtype_from(target, pandas_dtype=True)
6066
6067 # special case: if one dtype is uint64 and the other a signed int, return object
6068 # See https://github.com/pandas-dev/pandas/issues/26778 for discussion
6069 # Now it's:
6070 # * float | [u]int -> float
6071 # * uint64 | signed int -> object
6072 # We may change union(float | [u]int) to go to object.
6073 if self.dtype == "uint64" or target_dtype == "uint64":
6074 if is_signed_integer_dtype(self.dtype) or is_signed_integer_dtype(
6075 target_dtype
6076 ):
6077 return _dtype_obj
6078
6079 dtype = find_result_type(self._values, target)
6080 dtype = common_dtype_categorical_compat([self, target], dtype)
6081 return dtype
6082
6083 @final
6084 def _should_compare(self, other: Index) -> bool:
6085 """
6086 Check if `self == other` can ever have non-False entries.
6087 """
6088
6089 if (is_bool_dtype(other) and is_any_real_numeric_dtype(self)) or (
6090 is_bool_dtype(self) and is_any_real_numeric_dtype(other)
6091 ):
6092 # GH#16877 Treat boolean labels passed to a numeric index as not
6093 # found. Without this fix False and True would be treated as 0 and 1
6094 # respectively.
6095 return False
6096
6097 other = _unpack_nested_dtype(other)
6098 dtype = other.dtype
6099 return self._is_comparable_dtype(dtype) or is_object_dtype(dtype)
6100
6101 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
6102 """
6103 Can we compare values of the given dtype to our own?
6104 """
6105 if self.dtype.kind == "b":
6106 return dtype.kind == "b"
6107 elif is_numeric_dtype(self.dtype):
6108 return is_numeric_dtype(dtype)
6109 # TODO: this was written assuming we only get here with object-dtype,
6110 # which is nom longer correct. Can we specialize for EA?
6111 return True
6112
6113 @final
6114 def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]:
6115 """
6116 Group the index labels by a given array of values.
6117
6118 Parameters
6119 ----------
6120 values : array
6121 Values used to determine the groups.
6122
6123 Returns
6124 -------
6125 dict
6126 {group name -> group labels}
6127 """
6128 # TODO: if we are a MultiIndex, we can do better
6129 # that converting to tuples
6130 if isinstance(values, ABCMultiIndex):
6131 values = values._values
6132 values = Categorical(values)
6133 result = values._reverse_indexer()
6134
6135 # map to the label
6136 result = {k: self.take(v) for k, v in result.items()}
6137
6138 return PrettyDict(result)
6139
6140 def map(self, mapper, na_action=None):
6141 """
6142 Map values using an input mapping or function.
6143
6144 Parameters
6145 ----------
6146 mapper : function, dict, or Series
6147 Mapping correspondence.
6148 na_action : {None, 'ignore'}
6149 If 'ignore', propagate NA values, without passing them to the
6150 mapping correspondence.
6151
6152 Returns
6153 -------
6154 Union[Index, MultiIndex]
6155 The output of the mapping function applied to the index.
6156 If the function returns a tuple with more than one element
6157 a MultiIndex will be returned.
6158 """
6159 from pandas.core.indexes.multi import MultiIndex
6160
6161 new_values = self._map_values(mapper, na_action=na_action)
6162
6163 # we can return a MultiIndex
6164 if new_values.size and isinstance(new_values[0], tuple):
6165 if isinstance(self, MultiIndex):
6166 names = self.names
6167 elif self.name:
6168 names = [self.name] * len(new_values[0])
6169 else:
6170 names = None
6171 return MultiIndex.from_tuples(new_values, names=names)
6172
6173 dtype = None
6174 if not new_values.size:
6175 # empty
6176 dtype = self.dtype
6177
6178 # e.g. if we are floating and new_values is all ints, then we
6179 # don't want to cast back to floating. But if we are UInt64
6180 # and new_values is all ints, we want to try.
6181 same_dtype = lib.infer_dtype(new_values, skipna=False) == self.inferred_type
6182 if same_dtype:
6183 new_values = maybe_cast_pointwise_result(
6184 new_values, self.dtype, same_dtype=same_dtype
6185 )
6186
6187 return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name)
6188
6189 # TODO: De-duplicate with map, xref GH#32349
6190 @final
6191 def _transform_index(self, func, *, level=None) -> Index:
6192 """
6193 Apply function to all values found in index.
6194
6195 This includes transforming multiindex entries separately.
6196 Only apply function to one level of the MultiIndex if level is specified.
6197 """
6198 if isinstance(self, ABCMultiIndex):
6199 values = [
6200 self.get_level_values(i).map(func)
6201 if i == level or level is None
6202 else self.get_level_values(i)
6203 for i in range(self.nlevels)
6204 ]
6205 return type(self).from_arrays(values)
6206 else:
6207 items = [func(x) for x in self]
6208 return Index(items, name=self.name, tupleize_cols=False)
6209
6210 def isin(self, values, level=None) -> npt.NDArray[np.bool_]:
6211 """
6212 Return a boolean array where the index values are in `values`.
6213
6214 Compute boolean array of whether each index value is found in the
6215 passed set of values. The length of the returned boolean array matches
6216 the length of the index.
6217
6218 Parameters
6219 ----------
6220 values : set or list-like
6221 Sought values.
6222 level : str or int, optional
6223 Name or position of the index level to use (if the index is a
6224 `MultiIndex`).
6225
6226 Returns
6227 -------
6228 np.ndarray[bool]
6229 NumPy array of boolean values.
6230
6231 See Also
6232 --------
6233 Series.isin : Same for Series.
6234 DataFrame.isin : Same method for DataFrames.
6235
6236 Notes
6237 -----
6238 In the case of `MultiIndex` you must either specify `values` as a
6239 list-like object containing tuples that are the same length as the
6240 number of levels, or specify `level`. Otherwise it will raise a
6241 ``ValueError``.
6242
6243 If `level` is specified:
6244
6245 - if it is the name of one *and only one* index level, use that level;
6246 - otherwise it should be a number indicating level position.
6247
6248 Examples
6249 --------
6250 >>> idx = pd.Index([1,2,3])
6251 >>> idx
6252 Index([1, 2, 3], dtype='int64')
6253
6254 Check whether each index value in a list of values.
6255
6256 >>> idx.isin([1, 4])
6257 array([ True, False, False])
6258
6259 >>> midx = pd.MultiIndex.from_arrays([[1,2,3],
6260 ... ['red', 'blue', 'green']],
6261 ... names=('number', 'color'))
6262 >>> midx
6263 MultiIndex([(1, 'red'),
6264 (2, 'blue'),
6265 (3, 'green')],
6266 names=['number', 'color'])
6267
6268 Check whether the strings in the 'color' level of the MultiIndex
6269 are in a list of colors.
6270
6271 >>> midx.isin(['red', 'orange', 'yellow'], level='color')
6272 array([ True, False, False])
6273
6274 To check across the levels of a MultiIndex, pass a list of tuples:
6275
6276 >>> midx.isin([(1, 'red'), (3, 'red')])
6277 array([ True, False, False])
6278
6279 For a DatetimeIndex, string values in `values` are converted to
6280 Timestamps.
6281
6282 >>> dates = ['2000-03-11', '2000-03-12', '2000-03-13']
6283 >>> dti = pd.to_datetime(dates)
6284 >>> dti
6285 DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'],
6286 dtype='datetime64[ns]', freq=None)
6287
6288 >>> dti.isin(['2000-03-11'])
6289 array([ True, False, False])
6290 """
6291 if level is not None:
6292 self._validate_index_level(level)
6293 return algos.isin(self._values, values)
6294
6295 def _get_string_slice(self, key: str_t):
6296 # this is for partial string indexing,
6297 # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex
6298 raise NotImplementedError
6299
6300 def slice_indexer(
6301 self,
6302 start: Hashable | None = None,
6303 end: Hashable | None = None,
6304 step: int | None = None,
6305 ) -> slice:
6306 """
6307 Compute the slice indexer for input labels and step.
6308
6309 Index needs to be ordered and unique.
6310
6311 Parameters
6312 ----------
6313 start : label, default None
6314 If None, defaults to the beginning.
6315 end : label, default None
6316 If None, defaults to the end.
6317 step : int, default None
6318
6319 Returns
6320 -------
6321 slice
6322
6323 Raises
6324 ------
6325 KeyError : If key does not exist, or key is not unique and index is
6326 not ordered.
6327
6328 Notes
6329 -----
6330 This function assumes that the data is sorted, so use at your own peril
6331
6332 Examples
6333 --------
6334 This is a method on all index types. For example you can do:
6335
6336 >>> idx = pd.Index(list('abcd'))
6337 >>> idx.slice_indexer(start='b', end='c')
6338 slice(1, 3, None)
6339
6340 >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')])
6341 >>> idx.slice_indexer(start='b', end=('c', 'g'))
6342 slice(1, 3, None)
6343 """
6344 start_slice, end_slice = self.slice_locs(start, end, step=step)
6345
6346 # return a slice
6347 if not is_scalar(start_slice):
6348 raise AssertionError("Start slice bound is non-scalar")
6349 if not is_scalar(end_slice):
6350 raise AssertionError("End slice bound is non-scalar")
6351
6352 return slice(start_slice, end_slice, step)
6353
6354 def _maybe_cast_indexer(self, key):
6355 """
6356 If we have a float key and are not a floating index, then try to cast
6357 to an int if equivalent.
6358 """
6359 return key
6360
6361 def _maybe_cast_listlike_indexer(self, target) -> Index:
6362 """
6363 Analogue to maybe_cast_indexer for get_indexer instead of get_loc.
6364 """
6365 return ensure_index(target)
6366
6367 @final
6368 def _validate_indexer(self, form: str_t, key, kind: str_t) -> None:
6369 """
6370 If we are positional indexer, validate that we have appropriate
6371 typed bounds must be an integer.
6372 """
6373 assert kind in ["getitem", "iloc"]
6374
6375 if key is not None and not is_integer(key):
6376 self._raise_invalid_indexer(form, key)
6377
6378 def _maybe_cast_slice_bound(self, label, side: str_t):
6379 """
6380 This function should be overloaded in subclasses that allow non-trivial
6381 casting on label-slice bounds, e.g. datetime-like indices allowing
6382 strings containing formatted datetimes.
6383
6384 Parameters
6385 ----------
6386 label : object
6387 side : {'left', 'right'}
6388
6389 Returns
6390 -------
6391 label : object
6392
6393 Notes
6394 -----
6395 Value of `side` parameter should be validated in caller.
6396 """
6397
6398 # We are a plain index here (sub-class override this method if they
6399 # wish to have special treatment for floats/ints, e.g. datetimelike Indexes
6400
6401 if is_numeric_dtype(self.dtype):
6402 return self._maybe_cast_indexer(label)
6403
6404 # reject them, if index does not contain label
6405 if (is_float(label) or is_integer(label)) and label not in self:
6406 self._raise_invalid_indexer("slice", label)
6407
6408 return label
6409
6410 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):
6411 if self.is_monotonic_increasing:
6412 return self.searchsorted(label, side=side)
6413 elif self.is_monotonic_decreasing:
6414 # np.searchsorted expects ascending sort order, have to reverse
6415 # everything for it to work (element ordering, search side and
6416 # resulting value).
6417 pos = self[::-1].searchsorted(
6418 label, side="right" if side == "left" else "left"
6419 )
6420 return len(self) - pos
6421
6422 raise ValueError("index must be monotonic increasing or decreasing")
6423
6424 def get_slice_bound(self, label, side: Literal["left", "right"]) -> int:
6425 """
6426 Calculate slice bound that corresponds to given label.
6427
6428 Returns leftmost (one-past-the-rightmost if ``side=='right'``) position
6429 of given label.
6430
6431 Parameters
6432 ----------
6433 label : object
6434 side : {'left', 'right'}
6435
6436 Returns
6437 -------
6438 int
6439 Index of label.
6440 """
6441
6442 if side not in ("left", "right"):
6443 raise ValueError(
6444 "Invalid value for side kwarg, must be either "
6445 f"'left' or 'right': {side}"
6446 )
6447
6448 original_label = label
6449
6450 # For datetime indices label may be a string that has to be converted
6451 # to datetime boundary according to its resolution.
6452 label = self._maybe_cast_slice_bound(label, side)
6453
6454 # we need to look up the label
6455 try:
6456 slc = self.get_loc(label)
6457 except KeyError as err:
6458 try:
6459 return self._searchsorted_monotonic(label, side)
6460 except ValueError:
6461 # raise the original KeyError
6462 raise err
6463
6464 if isinstance(slc, np.ndarray):
6465 # get_loc may return a boolean array, which
6466 # is OK as long as they are representable by a slice.
6467 assert is_bool_dtype(slc.dtype)
6468 slc = lib.maybe_booleans_to_slice(slc.view("u1"))
6469 if isinstance(slc, np.ndarray):
6470 raise KeyError(
6471 f"Cannot get {side} slice bound for non-unique "
6472 f"label: {repr(original_label)}"
6473 )
6474
6475 if isinstance(slc, slice):
6476 if side == "left":
6477 return slc.start
6478 else:
6479 return slc.stop
6480 else:
6481 if side == "right":
6482 return slc + 1
6483 else:
6484 return slc
6485
6486 def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]:
6487 """
6488 Compute slice locations for input labels.
6489
6490 Parameters
6491 ----------
6492 start : label, default None
6493 If None, defaults to the beginning.
6494 end : label, default None
6495 If None, defaults to the end.
6496 step : int, defaults None
6497 If None, defaults to 1.
6498
6499 Returns
6500 -------
6501 tuple[int, int]
6502
6503 See Also
6504 --------
6505 Index.get_loc : Get location for a single label.
6506
6507 Notes
6508 -----
6509 This method only works if the index is monotonic or unique.
6510
6511 Examples
6512 --------
6513 >>> idx = pd.Index(list('abcd'))
6514 >>> idx.slice_locs(start='b', end='c')
6515 (1, 3)
6516 """
6517 inc = step is None or step >= 0
6518
6519 if not inc:
6520 # If it's a reverse slice, temporarily swap bounds.
6521 start, end = end, start
6522
6523 # GH 16785: If start and end happen to be date strings with UTC offsets
6524 # attempt to parse and check that the offsets are the same
6525 if isinstance(start, (str, datetime)) and isinstance(end, (str, datetime)):
6526 try:
6527 ts_start = Timestamp(start)
6528 ts_end = Timestamp(end)
6529 except (ValueError, TypeError):
6530 pass
6531 else:
6532 if not tz_compare(ts_start.tzinfo, ts_end.tzinfo):
6533 raise ValueError("Both dates must have the same UTC offset")
6534
6535 start_slice = None
6536 if start is not None:
6537 start_slice = self.get_slice_bound(start, "left")
6538 if start_slice is None:
6539 start_slice = 0
6540
6541 end_slice = None
6542 if end is not None:
6543 end_slice = self.get_slice_bound(end, "right")
6544 if end_slice is None:
6545 end_slice = len(self)
6546
6547 if not inc:
6548 # Bounds at this moment are swapped, swap them back and shift by 1.
6549 #
6550 # slice_locs('B', 'A', step=-1): s='B', e='A'
6551 #
6552 # s='A' e='B'
6553 # AFTER SWAP: | |
6554 # v ------------------> V
6555 # -----------------------------------
6556 # | | |A|A|A|A| | | | | |B|B| | | | |
6557 # -----------------------------------
6558 # ^ <------------------ ^
6559 # SHOULD BE: | |
6560 # end=s-1 start=e-1
6561 #
6562 end_slice, start_slice = start_slice - 1, end_slice - 1
6563
6564 # i == -1 triggers ``len(self) + i`` selection that points to the
6565 # last element, not before-the-first one, subtracting len(self)
6566 # compensates that.
6567 if end_slice == -1:
6568 end_slice -= len(self)
6569 if start_slice == -1:
6570 start_slice -= len(self)
6571
6572 return start_slice, end_slice
6573
6574 def delete(self: _IndexT, loc) -> _IndexT:
6575 """
6576 Make new Index with passed location(-s) deleted.
6577
6578 Parameters
6579 ----------
6580 loc : int or list of int
6581 Location of item(-s) which will be deleted.
6582 Use a list of locations to delete more than one value at the same time.
6583
6584 Returns
6585 -------
6586 Index
6587 Will be same type as self, except for RangeIndex.
6588
6589 See Also
6590 --------
6591 numpy.delete : Delete any rows and column from NumPy array (ndarray).
6592
6593 Examples
6594 --------
6595 >>> idx = pd.Index(['a', 'b', 'c'])
6596 >>> idx.delete(1)
6597 Index(['a', 'c'], dtype='object')
6598
6599 >>> idx = pd.Index(['a', 'b', 'c'])
6600 >>> idx.delete([0, 2])
6601 Index(['b'], dtype='object')
6602 """
6603 values = self._values
6604 res_values: ArrayLike
6605 if isinstance(values, np.ndarray):
6606 # TODO(__array_function__): special casing will be unnecessary
6607 res_values = np.delete(values, loc)
6608 else:
6609 res_values = values.delete(loc)
6610
6611 # _constructor so RangeIndex-> Index with an int64 dtype
6612 return self._constructor._simple_new(res_values, name=self.name)
6613
6614 def insert(self, loc: int, item) -> Index:
6615 """
6616 Make new Index inserting new item at location.
6617
6618 Follows Python numpy.insert semantics for negative values.
6619
6620 Parameters
6621 ----------
6622 loc : int
6623 item : object
6624
6625 Returns
6626 -------
6627 Index
6628 """
6629 item = lib.item_from_zerodim(item)
6630 if is_valid_na_for_dtype(item, self.dtype) and self.dtype != object:
6631 item = self._na_value
6632
6633 arr = self._values
6634
6635 try:
6636 if isinstance(arr, ExtensionArray):
6637 res_values = arr.insert(loc, item)
6638 return type(self)._simple_new(res_values, name=self.name)
6639 else:
6640 item = self._validate_fill_value(item)
6641 except (TypeError, ValueError, LossySetitemError):
6642 # e.g. trying to insert an integer into a DatetimeIndex
6643 # We cannot keep the same dtype, so cast to the (often object)
6644 # minimal shared dtype before doing the insert.
6645 dtype = self._find_common_type_compat(item)
6646 return self.astype(dtype).insert(loc, item)
6647
6648 if arr.dtype != object or not isinstance(
6649 item, (tuple, np.datetime64, np.timedelta64)
6650 ):
6651 # with object-dtype we need to worry about numpy incorrectly casting
6652 # dt64/td64 to integer, also about treating tuples as sequences
6653 # special-casing dt64/td64 https://github.com/numpy/numpy/issues/12550
6654 casted = arr.dtype.type(item)
6655 new_values = np.insert(arr, loc, casted)
6656
6657 else:
6658 # error: No overload variant of "insert" matches argument types
6659 # "ndarray[Any, Any]", "int", "None"
6660 new_values = np.insert(arr, loc, None) # type: ignore[call-overload]
6661 loc = loc if loc >= 0 else loc - 1
6662 new_values[loc] = item
6663
6664 return Index._with_infer(new_values, name=self.name)
6665
6666 def drop(
6667 self,
6668 labels: Index | np.ndarray | Iterable[Hashable],
6669 errors: IgnoreRaise = "raise",
6670 ) -> Index:
6671 """
6672 Make new Index with passed list of labels deleted.
6673
6674 Parameters
6675 ----------
6676 labels : array-like or scalar
6677 errors : {'ignore', 'raise'}, default 'raise'
6678 If 'ignore', suppress error and existing labels are dropped.
6679
6680 Returns
6681 -------
6682 Index
6683 Will be same type as self, except for RangeIndex.
6684
6685 Raises
6686 ------
6687 KeyError
6688 If not all of the labels are found in the selected axis
6689 """
6690 if not isinstance(labels, Index):
6691 # avoid materializing e.g. RangeIndex
6692 arr_dtype = "object" if self.dtype == "object" else None
6693 labels = com.index_labels_to_array(labels, dtype=arr_dtype)
6694
6695 indexer = self.get_indexer_for(labels)
6696 mask = indexer == -1
6697 if mask.any():
6698 if errors != "ignore":
6699 raise KeyError(f"{list(labels[mask])} not found in axis")
6700 indexer = indexer[~mask]
6701 return self.delete(indexer)
6702
6703 def infer_objects(self, copy: bool = True) -> Index:
6704 """
6705 If we have an object dtype, try to infer a non-object dtype.
6706
6707 Parameters
6708 ----------
6709 copy : bool, default True
6710 Whether to make a copy in cases where no inference occurs.
6711 """
6712 if self._is_multi:
6713 raise NotImplementedError(
6714 "infer_objects is not implemented for MultiIndex. "
6715 "Use index.to_frame().infer_objects() instead."
6716 )
6717 if self.dtype != object:
6718 return self.copy() if copy else self
6719
6720 values = self._values
6721 values = cast("npt.NDArray[np.object_]", values)
6722 res_values = lib.maybe_convert_objects(
6723 values,
6724 convert_datetime=True,
6725 convert_timedelta=True,
6726 convert_period=True,
6727 convert_interval=True,
6728 )
6729 if copy and res_values is values:
6730 return self.copy()
6731 result = Index(res_values, name=self.name)
6732 if not copy and res_values is values and self._references is not None:
6733 result._references = self._references
6734 result._references.add_index_reference(result)
6735 return result
6736
6737 # --------------------------------------------------------------------
6738 # Generated Arithmetic, Comparison, and Unary Methods
6739
6740 def _cmp_method(self, other, op):
6741 """
6742 Wrapper used to dispatch comparison operations.
6743 """
6744 if self.is_(other):
6745 # fastpath
6746 if op in {operator.eq, operator.le, operator.ge}:
6747 arr = np.ones(len(self), dtype=bool)
6748 if self._can_hold_na and not isinstance(self, ABCMultiIndex):
6749 # TODO: should set MultiIndex._can_hold_na = False?
6750 arr[self.isna()] = False
6751 return arr
6752 elif op is operator.ne:
6753 arr = np.zeros(len(self), dtype=bool)
6754 if self._can_hold_na and not isinstance(self, ABCMultiIndex):
6755 arr[self.isna()] = True
6756 return arr
6757
6758 if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and len(
6759 self
6760 ) != len(other):
6761 raise ValueError("Lengths must match to compare")
6762
6763 if not isinstance(other, ABCMultiIndex):
6764 other = extract_array(other, extract_numpy=True)
6765 else:
6766 other = np.asarray(other)
6767
6768 if is_object_dtype(self.dtype) and isinstance(other, ExtensionArray):
6769 # e.g. PeriodArray, Categorical
6770 with np.errstate(all="ignore"):
6771 result = op(self._values, other)
6772
6773 elif isinstance(self._values, ExtensionArray):
6774 result = op(self._values, other)
6775
6776 elif is_object_dtype(self.dtype) and not isinstance(self, ABCMultiIndex):
6777 # don't pass MultiIndex
6778 with np.errstate(all="ignore"):
6779 result = ops.comp_method_OBJECT_ARRAY(op, self._values, other)
6780
6781 else:
6782 with np.errstate(all="ignore"):
6783 result = ops.comparison_op(self._values, other, op)
6784
6785 return result
6786
6787 @final
6788 def _logical_method(self, other, op):
6789 res_name = ops.get_op_result_name(self, other)
6790
6791 lvalues = self._values
6792 rvalues = extract_array(other, extract_numpy=True, extract_range=True)
6793
6794 res_values = ops.logical_op(lvalues, rvalues, op)
6795 return self._construct_result(res_values, name=res_name)
6796
6797 @final
6798 def _construct_result(self, result, name):
6799 if isinstance(result, tuple):
6800 return (
6801 Index(result[0], name=name, dtype=result[0].dtype),
6802 Index(result[1], name=name, dtype=result[1].dtype),
6803 )
6804 return Index(result, name=name, dtype=result.dtype)
6805
6806 def _arith_method(self, other, op):
6807 if (
6808 isinstance(other, Index)
6809 and is_object_dtype(other.dtype)
6810 and type(other) is not Index
6811 ):
6812 # We return NotImplemented for object-dtype index *subclasses* so they have
6813 # a chance to implement ops before we unwrap them.
6814 # See https://github.com/pandas-dev/pandas/issues/31109
6815 return NotImplemented
6816
6817 return super()._arith_method(other, op)
6818
6819 @final
6820 def _unary_method(self, op):
6821 result = op(self._values)
6822 return Index(result, name=self.name)
6823
6824 def __abs__(self) -> Index:
6825 return self._unary_method(operator.abs)
6826
6827 def __neg__(self) -> Index:
6828 return self._unary_method(operator.neg)
6829
6830 def __pos__(self) -> Index:
6831 return self._unary_method(operator.pos)
6832
6833 def __invert__(self) -> Index:
6834 # GH#8875
6835 return self._unary_method(operator.inv)
6836
6837 # --------------------------------------------------------------------
6838 # Reductions
6839
6840 def any(self, *args, **kwargs):
6841 """
6842 Return whether any element is Truthy.
6843
6844 Parameters
6845 ----------
6846 *args
6847 Required for compatibility with numpy.
6848 **kwargs
6849 Required for compatibility with numpy.
6850
6851 Returns
6852 -------
6853 bool or array-like (if axis is specified)
6854 A single element array-like may be converted to bool.
6855
6856 See Also
6857 --------
6858 Index.all : Return whether all elements are True.
6859 Series.all : Return whether all elements are True.
6860
6861 Notes
6862 -----
6863 Not a Number (NaN), positive infinity and negative infinity
6864 evaluate to True because these are not equal to zero.
6865
6866 Examples
6867 --------
6868 >>> index = pd.Index([0, 1, 2])
6869 >>> index.any()
6870 True
6871
6872 >>> index = pd.Index([0, 0, 0])
6873 >>> index.any()
6874 False
6875 """
6876 nv.validate_any(args, kwargs)
6877 self._maybe_disable_logical_methods("any")
6878 # error: Argument 1 to "any" has incompatible type "ArrayLike"; expected
6879 # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int,
6880 # float, complex, str, bytes, generic]], Sequence[Sequence[Any]],
6881 # _SupportsArray]"
6882 return np.any(self.values) # type: ignore[arg-type]
6883
6884 def all(self, *args, **kwargs):
6885 """
6886 Return whether all elements are Truthy.
6887
6888 Parameters
6889 ----------
6890 *args
6891 Required for compatibility with numpy.
6892 **kwargs
6893 Required for compatibility with numpy.
6894
6895 Returns
6896 -------
6897 bool or array-like (if axis is specified)
6898 A single element array-like may be converted to bool.
6899
6900 See Also
6901 --------
6902 Index.any : Return whether any element in an Index is True.
6903 Series.any : Return whether any element in a Series is True.
6904 Series.all : Return whether all elements in a Series are True.
6905
6906 Notes
6907 -----
6908 Not a Number (NaN), positive infinity and negative infinity
6909 evaluate to True because these are not equal to zero.
6910
6911 Examples
6912 --------
6913 True, because nonzero integers are considered True.
6914
6915 >>> pd.Index([1, 2, 3]).all()
6916 True
6917
6918 False, because ``0`` is considered False.
6919
6920 >>> pd.Index([0, 1, 2]).all()
6921 False
6922 """
6923 nv.validate_all(args, kwargs)
6924 self._maybe_disable_logical_methods("all")
6925 # error: Argument 1 to "all" has incompatible type "ArrayLike"; expected
6926 # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int,
6927 # float, complex, str, bytes, generic]], Sequence[Sequence[Any]],
6928 # _SupportsArray]"
6929 return np.all(self.values) # type: ignore[arg-type]
6930
6931 @final
6932 def _maybe_disable_logical_methods(self, opname: str_t) -> None:
6933 """
6934 raise if this Index subclass does not support any or all.
6935 """
6936 if (
6937 isinstance(self, ABCMultiIndex)
6938 or needs_i8_conversion(self.dtype)
6939 or is_interval_dtype(self.dtype)
6940 or is_categorical_dtype(self.dtype)
6941 or is_float_dtype(self.dtype)
6942 ):
6943 # This call will raise
6944 make_invalid_op(opname)(self)
6945
6946 @Appender(IndexOpsMixin.argmin.__doc__)
6947 def argmin(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
6948 nv.validate_argmin(args, kwargs)
6949 nv.validate_minmax_axis(axis)
6950
6951 if not self._is_multi and self.hasnans:
6952 # Take advantage of cache
6953 mask = self._isnan
6954 if not skipna or mask.all():
6955 return -1
6956 return super().argmin(skipna=skipna)
6957
6958 @Appender(IndexOpsMixin.argmax.__doc__)
6959 def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:
6960 nv.validate_argmax(args, kwargs)
6961 nv.validate_minmax_axis(axis)
6962
6963 if not self._is_multi and self.hasnans:
6964 # Take advantage of cache
6965 mask = self._isnan
6966 if not skipna or mask.all():
6967 return -1
6968 return super().argmax(skipna=skipna)
6969
6970 @doc(IndexOpsMixin.min)
6971 def min(self, axis=None, skipna: bool = True, *args, **kwargs):
6972 nv.validate_min(args, kwargs)
6973 nv.validate_minmax_axis(axis)
6974
6975 if not len(self):
6976 return self._na_value
6977
6978 if len(self) and self.is_monotonic_increasing:
6979 # quick check
6980 first = self[0]
6981 if not isna(first):
6982 return first
6983
6984 if not self._is_multi and self.hasnans:
6985 # Take advantage of cache
6986 mask = self._isnan
6987 if not skipna or mask.all():
6988 return self._na_value
6989
6990 if not self._is_multi and not isinstance(self._values, np.ndarray):
6991 return self._values._reduce(name="min", skipna=skipna)
6992
6993 return super().min(skipna=skipna)
6994
6995 @doc(IndexOpsMixin.max)
6996 def max(self, axis=None, skipna: bool = True, *args, **kwargs):
6997 nv.validate_max(args, kwargs)
6998 nv.validate_minmax_axis(axis)
6999
7000 if not len(self):
7001 return self._na_value
7002
7003 if len(self) and self.is_monotonic_increasing:
7004 # quick check
7005 last = self[-1]
7006 if not isna(last):
7007 return last
7008
7009 if not self._is_multi and self.hasnans:
7010 # Take advantage of cache
7011 mask = self._isnan
7012 if not skipna or mask.all():
7013 return self._na_value
7014
7015 if not self._is_multi and not isinstance(self._values, np.ndarray):
7016 return self._values._reduce(name="max", skipna=skipna)
7017
7018 return super().max(skipna=skipna)
7019
7020 # --------------------------------------------------------------------
7021
7022 @final
7023 @property
7024 def shape(self) -> Shape:
7025 """
7026 Return a tuple of the shape of the underlying data.
7027 """
7028 # See GH#27775, GH#27384 for history/reasoning in how this is defined.
7029 return (len(self),)
7030
7031
7032def ensure_index_from_sequences(sequences, names=None) -> Index:
7033 """
7034 Construct an index from sequences of data.
7035
7036 A single sequence returns an Index. Many sequences returns a
7037 MultiIndex.
7038
7039 Parameters
7040 ----------
7041 sequences : sequence of sequences
7042 names : sequence of str
7043
7044 Returns
7045 -------
7046 index : Index or MultiIndex
7047
7048 Examples
7049 --------
7050 >>> ensure_index_from_sequences([[1, 2, 3]], names=["name"])
7051 Index([1, 2, 3], dtype='int64', name='name')
7052
7053 >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"])
7054 MultiIndex([('a', 'a'),
7055 ('a', 'b')],
7056 names=['L1', 'L2'])
7057
7058 See Also
7059 --------
7060 ensure_index
7061 """
7062 from pandas.core.indexes.multi import MultiIndex
7063
7064 if len(sequences) == 1:
7065 if names is not None:
7066 names = names[0]
7067 return Index(sequences[0], name=names)
7068 else:
7069 return MultiIndex.from_arrays(sequences, names=names)
7070
7071
7072def ensure_index(index_like: Axes, copy: bool = False) -> Index:
7073 """
7074 Ensure that we have an index from some index-like object.
7075
7076 Parameters
7077 ----------
7078 index_like : sequence
7079 An Index or other sequence
7080 copy : bool, default False
7081
7082 Returns
7083 -------
7084 index : Index or MultiIndex
7085
7086 See Also
7087 --------
7088 ensure_index_from_sequences
7089
7090 Examples
7091 --------
7092 >>> ensure_index(['a', 'b'])
7093 Index(['a', 'b'], dtype='object')
7094
7095 >>> ensure_index([('a', 'a'), ('b', 'c')])
7096 Index([('a', 'a'), ('b', 'c')], dtype='object')
7097
7098 >>> ensure_index([['a', 'a'], ['b', 'c']])
7099 MultiIndex([('a', 'b'),
7100 ('a', 'c')],
7101 )
7102 """
7103 if isinstance(index_like, Index):
7104 if copy:
7105 index_like = index_like.copy()
7106 return index_like
7107
7108 if isinstance(index_like, ABCSeries):
7109 name = index_like.name
7110 return Index(index_like, name=name, copy=copy)
7111
7112 if is_iterator(index_like):
7113 index_like = list(index_like)
7114
7115 if isinstance(index_like, list):
7116 if type(index_like) is not list:
7117 # must check for exactly list here because of strict type
7118 # check in clean_index_list
7119 index_like = list(index_like)
7120
7121 if len(index_like) and lib.is_all_arraylike(index_like):
7122 from pandas.core.indexes.multi import MultiIndex
7123
7124 return MultiIndex.from_arrays(index_like)
7125 else:
7126 return Index(index_like, copy=copy, tupleize_cols=False)
7127 else:
7128 return Index(index_like, copy=copy)
7129
7130
7131def ensure_has_len(seq):
7132 """
7133 If seq is an iterator, put its values into a list.
7134 """
7135 try:
7136 len(seq)
7137 except TypeError:
7138 return list(seq)
7139 else:
7140 return seq
7141
7142
7143def trim_front(strings: list[str]) -> list[str]:
7144 """
7145 Trims zeros and decimal points.
7146
7147 Examples
7148 --------
7149 >>> trim_front([" a", " b"])
7150 ['a', 'b']
7151
7152 >>> trim_front([" a", " "])
7153 ['a', '']
7154 """
7155 if not strings:
7156 return strings
7157 while all(strings) and all(x[0] == " " for x in strings):
7158 strings = [x[1:] for x in strings]
7159 return strings
7160
7161
7162def _validate_join_method(method: str) -> None:
7163 if method not in ["left", "right", "inner", "outer"]:
7164 raise ValueError(f"do not recognize join method {method}")
7165
7166
7167def maybe_extract_name(name, obj, cls) -> Hashable:
7168 """
7169 If no name is passed, then extract it from data, validating hashability.
7170 """
7171 if name is None and isinstance(obj, (Index, ABCSeries)):
7172 # Note we don't just check for "name" attribute since that would
7173 # pick up e.g. dtype.name
7174 name = obj.name
7175
7176 # GH#29069
7177 if not is_hashable(name):
7178 raise TypeError(f"{cls.__name__}.name must be a hashable type")
7179
7180 return name
7181
7182
7183def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]:
7184 """
7185 Return common name if all indices agree, otherwise None (level-by-level).
7186
7187 Parameters
7188 ----------
7189 indexes : list of Index objects
7190
7191 Returns
7192 -------
7193 list
7194 A list representing the unanimous 'names' found.
7195 """
7196 name_tups = [tuple(i.names) for i in indexes]
7197 name_sets = [{*ns} for ns in zip_longest(*name_tups)]
7198 names = tuple(ns.pop() if len(ns) == 1 else None for ns in name_sets)
7199 return names
7200
7201
7202def _unpack_nested_dtype(other: Index) -> Index:
7203 """
7204 When checking if our dtype is comparable with another, we need
7205 to unpack CategoricalDtype to look at its categories.dtype.
7206
7207 Parameters
7208 ----------
7209 other : Index
7210
7211 Returns
7212 -------
7213 Index
7214 """
7215 from pandas.core.arrays.arrow import ArrowDtype
7216
7217 dtype = other.dtype
7218 if isinstance(dtype, CategoricalDtype):
7219 # If there is ever a SparseIndex, this could get dispatched
7220 # here too.
7221 return dtype.categories
7222 elif isinstance(dtype, ArrowDtype):
7223 # GH 53617
7224 import pyarrow as pa
7225
7226 if pa.types.is_dictionary(dtype.pyarrow_dtype):
7227 other = other.astype(ArrowDtype(dtype.pyarrow_dtype.value_type))
7228 return other
7229
7230
7231def _maybe_try_sort(result, sort):
7232 if sort is not False:
7233 try:
7234 result = algos.safe_sort(result)
7235 except TypeError as err:
7236 if sort is True:
7237 raise
7238 warnings.warn(
7239 f"{err}, sort order is undefined for incomparable objects.",
7240 RuntimeWarning,
7241 stacklevel=find_stack_level(),
7242 )
7243 return result