1from __future__ import annotations
2
3from functools import wraps
4from sys import getsizeof
5from typing import (
6 TYPE_CHECKING,
7 Any,
8 Callable,
9 Collection,
10 Generator,
11 Hashable,
12 Iterable,
13 List,
14 Literal,
15 Sequence,
16 Tuple,
17 cast,
18)
19import warnings
20
21import numpy as np
22
23from pandas._config import get_option
24
25from pandas._libs import (
26 algos as libalgos,
27 index as libindex,
28 lib,
29)
30from pandas._libs.hashtable import duplicated
31from pandas._typing import (
32 AnyAll,
33 AnyArrayLike,
34 Axis,
35 DropKeep,
36 DtypeObj,
37 F,
38 IgnoreRaise,
39 IndexLabel,
40 Scalar,
41 Shape,
42 npt,
43)
44from pandas.compat.numpy import function as nv
45from pandas.errors import (
46 InvalidIndexError,
47 PerformanceWarning,
48 UnsortedIndexError,
49)
50from pandas.util._decorators import (
51 Appender,
52 cache_readonly,
53 doc,
54)
55from pandas.util._exceptions import find_stack_level
56
57from pandas.core.dtypes.cast import coerce_indexer_dtype
58from pandas.core.dtypes.common import (
59 ensure_int64,
60 ensure_platform_int,
61 is_categorical_dtype,
62 is_extension_array_dtype,
63 is_hashable,
64 is_integer,
65 is_iterator,
66 is_list_like,
67 is_object_dtype,
68 is_scalar,
69 pandas_dtype,
70)
71from pandas.core.dtypes.dtypes import ExtensionDtype
72from pandas.core.dtypes.generic import (
73 ABCDataFrame,
74 ABCDatetimeIndex,
75 ABCTimedeltaIndex,
76)
77from pandas.core.dtypes.missing import (
78 array_equivalent,
79 isna,
80)
81
82import pandas.core.algorithms as algos
83from pandas.core.array_algos.putmask import validate_putmask
84from pandas.core.arrays import Categorical
85from pandas.core.arrays.categorical import factorize_from_iterables
86import pandas.core.common as com
87import pandas.core.indexes.base as ibase
88from pandas.core.indexes.base import (
89 Index,
90 _index_shared_docs,
91 ensure_index,
92 get_unanimous_names,
93)
94from pandas.core.indexes.frozen import FrozenList
95from pandas.core.ops.invalid import make_invalid_op
96from pandas.core.sorting import (
97 get_group_index,
98 indexer_from_factorized,
99 lexsort_indexer,
100)
101
102from pandas.io.formats.printing import pprint_thing
103
104if TYPE_CHECKING:
105 from pandas import (
106 CategoricalIndex,
107 DataFrame,
108 Series,
109 )
110
111_index_doc_kwargs = dict(ibase._index_doc_kwargs)
112_index_doc_kwargs.update(
113 {"klass": "MultiIndex", "target_klass": "MultiIndex or list of tuples"}
114)
115
116
117class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine):
118 """
119 This class manages a MultiIndex by mapping label combinations to positive
120 integers.
121 """
122
123 _base = libindex.UInt64Engine
124
125 def _codes_to_ints(self, codes):
126 """
127 Transform combination(s) of uint64 in one uint64 (each), in a strictly
128 monotonic way (i.e. respecting the lexicographic order of integer
129 combinations): see BaseMultiIndexCodesEngine documentation.
130
131 Parameters
132 ----------
133 codes : 1- or 2-dimensional array of dtype uint64
134 Combinations of integers (one per row)
135
136 Returns
137 -------
138 scalar or 1-dimensional array, of dtype uint64
139 Integer(s) representing one combination (each).
140 """
141 # Shift the representation of each level by the pre-calculated number
142 # of bits:
143 codes <<= self.offsets
144
145 # Now sum and OR are in fact interchangeable. This is a simple
146 # composition of the (disjunct) significant bits of each level (i.e.
147 # each column in "codes") in a single positive integer:
148 if codes.ndim == 1:
149 # Single key
150 return np.bitwise_or.reduce(codes)
151
152 # Multiple keys
153 return np.bitwise_or.reduce(codes, axis=1)
154
155
156class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine):
157 """
158 This class manages those (extreme) cases in which the number of possible
159 label combinations overflows the 64 bits integers, and uses an ObjectEngine
160 containing Python integers.
161 """
162
163 _base = libindex.ObjectEngine
164
165 def _codes_to_ints(self, codes):
166 """
167 Transform combination(s) of uint64 in one Python integer (each), in a
168 strictly monotonic way (i.e. respecting the lexicographic order of
169 integer combinations): see BaseMultiIndexCodesEngine documentation.
170
171 Parameters
172 ----------
173 codes : 1- or 2-dimensional array of dtype uint64
174 Combinations of integers (one per row)
175
176 Returns
177 -------
178 int, or 1-dimensional array of dtype object
179 Integer(s) representing one combination (each).
180 """
181 # Shift the representation of each level by the pre-calculated number
182 # of bits. Since this can overflow uint64, first make sure we are
183 # working with Python integers:
184 codes = codes.astype("object") << self.offsets
185
186 # Now sum and OR are in fact interchangeable. This is a simple
187 # composition of the (disjunct) significant bits of each level (i.e.
188 # each column in "codes") in a single positive integer (per row):
189 if codes.ndim == 1:
190 # Single key
191 return np.bitwise_or.reduce(codes)
192
193 # Multiple keys
194 return np.bitwise_or.reduce(codes, axis=1)
195
196
197def names_compat(meth: F) -> F:
198 """
199 A decorator to allow either `name` or `names` keyword but not both.
200
201 This makes it easier to share code with base class.
202 """
203
204 @wraps(meth)
205 def new_meth(self_or_cls, *args, **kwargs):
206 if "name" in kwargs and "names" in kwargs:
207 raise TypeError("Can only provide one of `names` and `name`")
208 if "name" in kwargs:
209 kwargs["names"] = kwargs.pop("name")
210
211 return meth(self_or_cls, *args, **kwargs)
212
213 return cast(F, new_meth)
214
215
216class MultiIndex(Index):
217 """
218 A multi-level, or hierarchical, index object for pandas objects.
219
220 Parameters
221 ----------
222 levels : sequence of arrays
223 The unique labels for each level.
224 codes : sequence of arrays
225 Integers for each level designating which label at each location.
226 sortorder : optional int
227 Level of sortedness (must be lexicographically sorted by that
228 level).
229 names : optional sequence of objects
230 Names for each of the index levels. (name is accepted for compat).
231 copy : bool, default False
232 Copy the meta-data.
233 verify_integrity : bool, default True
234 Check that the levels/codes are consistent and valid.
235
236 Attributes
237 ----------
238 names
239 levels
240 codes
241 nlevels
242 levshape
243 dtypes
244
245 Methods
246 -------
247 from_arrays
248 from_tuples
249 from_product
250 from_frame
251 set_levels
252 set_codes
253 to_frame
254 to_flat_index
255 sortlevel
256 droplevel
257 swaplevel
258 reorder_levels
259 remove_unused_levels
260 get_level_values
261 get_indexer
262 get_loc
263 get_locs
264 get_loc_level
265 drop
266
267 See Also
268 --------
269 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
270 MultiIndex.from_product : Create a MultiIndex from the cartesian product
271 of iterables.
272 MultiIndex.from_tuples : Convert list of tuples to a MultiIndex.
273 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
274 Index : The base pandas Index type.
275
276 Notes
277 -----
278 See the `user guide
279 <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html>`__
280 for more.
281
282 Examples
283 --------
284 A new ``MultiIndex`` is typically constructed using one of the helper
285 methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product`
286 and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``):
287
288 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
289 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
290 MultiIndex([(1, 'red'),
291 (1, 'blue'),
292 (2, 'red'),
293 (2, 'blue')],
294 names=['number', 'color'])
295
296 See further examples for how to construct a MultiIndex in the doc strings
297 of the mentioned helper methods.
298 """
299
300 _hidden_attrs = Index._hidden_attrs | frozenset()
301
302 # initialize to zero-length tuples to make everything work
303 _typ = "multiindex"
304 _names: list[Hashable | None] = []
305 _levels = FrozenList()
306 _codes = FrozenList()
307 _comparables = ["names"]
308
309 sortorder: int | None
310
311 # --------------------------------------------------------------------
312 # Constructors
313
314 def __new__(
315 cls,
316 levels=None,
317 codes=None,
318 sortorder=None,
319 names=None,
320 dtype=None,
321 copy: bool = False,
322 name=None,
323 verify_integrity: bool = True,
324 ) -> MultiIndex:
325 # compat with Index
326 if name is not None:
327 names = name
328 if levels is None or codes is None:
329 raise TypeError("Must pass both levels and codes")
330 if len(levels) != len(codes):
331 raise ValueError("Length of levels and codes must be the same.")
332 if len(levels) == 0:
333 raise ValueError("Must pass non-zero number of levels/codes")
334
335 result = object.__new__(cls)
336 result._cache = {}
337
338 # we've already validated levels and codes, so shortcut here
339 result._set_levels(levels, copy=copy, validate=False)
340 result._set_codes(codes, copy=copy, validate=False)
341
342 result._names = [None] * len(levels)
343 if names is not None:
344 # handles name validation
345 result._set_names(names)
346
347 if sortorder is not None:
348 result.sortorder = int(sortorder)
349 else:
350 result.sortorder = sortorder
351
352 if verify_integrity:
353 new_codes = result._verify_integrity()
354 result._codes = new_codes
355
356 result._reset_identity()
357 result._references = None
358
359 return result
360
361 def _validate_codes(self, level: list, code: list):
362 """
363 Reassign code values as -1 if their corresponding levels are NaN.
364
365 Parameters
366 ----------
367 code : list
368 Code to reassign.
369 level : list
370 Level to check for missing values (NaN, NaT, None).
371
372 Returns
373 -------
374 new code where code value = -1 if it corresponds
375 to a level with missing values (NaN, NaT, None).
376 """
377 null_mask = isna(level)
378 if np.any(null_mask):
379 # error: Incompatible types in assignment
380 # (expression has type "ndarray[Any, dtype[Any]]",
381 # variable has type "List[Any]")
382 code = np.where(null_mask[code], -1, code) # type: ignore[assignment]
383 return code
384
385 def _verify_integrity(self, codes: list | None = None, levels: list | None = None):
386 """
387 Parameters
388 ----------
389 codes : optional list
390 Codes to check for validity. Defaults to current codes.
391 levels : optional list
392 Levels to check for validity. Defaults to current levels.
393
394 Raises
395 ------
396 ValueError
397 If length of levels and codes don't match, if the codes for any
398 level would exceed level bounds, or there are any duplicate levels.
399
400 Returns
401 -------
402 new codes where code value = -1 if it corresponds to a
403 NaN level.
404 """
405 # NOTE: Currently does not check, among other things, that cached
406 # nlevels matches nor that sortorder matches actually sortorder.
407 codes = codes or self.codes
408 levels = levels or self.levels
409
410 if len(levels) != len(codes):
411 raise ValueError(
412 "Length of levels and codes must match. NOTE: "
413 "this index is in an inconsistent state."
414 )
415 codes_length = len(codes[0])
416 for i, (level, level_codes) in enumerate(zip(levels, codes)):
417 if len(level_codes) != codes_length:
418 raise ValueError(
419 f"Unequal code lengths: {[len(code_) for code_ in codes]}"
420 )
421 if len(level_codes) and level_codes.max() >= len(level):
422 raise ValueError(
423 f"On level {i}, code max ({level_codes.max()}) >= length of "
424 f"level ({len(level)}). NOTE: this index is in an "
425 "inconsistent state"
426 )
427 if len(level_codes) and level_codes.min() < -1:
428 raise ValueError(f"On level {i}, code value ({level_codes.min()}) < -1")
429 if not level.is_unique:
430 raise ValueError(
431 f"Level values must be unique: {list(level)} on level {i}"
432 )
433 if self.sortorder is not None:
434 if self.sortorder > _lexsort_depth(self.codes, self.nlevels):
435 raise ValueError(
436 "Value for sortorder must be inferior or equal to actual "
437 f"lexsort_depth: sortorder {self.sortorder} "
438 f"with lexsort_depth {_lexsort_depth(self.codes, self.nlevels)}"
439 )
440
441 codes = [
442 self._validate_codes(level, code) for level, code in zip(levels, codes)
443 ]
444 new_codes = FrozenList(codes)
445 return new_codes
446
447 @classmethod
448 def from_arrays(
449 cls,
450 arrays,
451 sortorder=None,
452 names: Sequence[Hashable] | Hashable | lib.NoDefault = lib.no_default,
453 ) -> MultiIndex:
454 """
455 Convert arrays to MultiIndex.
456
457 Parameters
458 ----------
459 arrays : list / sequence of array-likes
460 Each array-like gives one level's value for each data point.
461 len(arrays) is the number of levels.
462 sortorder : int or None
463 Level of sortedness (must be lexicographically sorted by that
464 level).
465 names : list / sequence of str, optional
466 Names for the levels in the index.
467
468 Returns
469 -------
470 MultiIndex
471
472 See Also
473 --------
474 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
475 MultiIndex.from_product : Make a MultiIndex from cartesian product
476 of iterables.
477 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
478
479 Examples
480 --------
481 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
482 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
483 MultiIndex([(1, 'red'),
484 (1, 'blue'),
485 (2, 'red'),
486 (2, 'blue')],
487 names=['number', 'color'])
488 """
489 error_msg = "Input must be a list / sequence of array-likes."
490 if not is_list_like(arrays):
491 raise TypeError(error_msg)
492 if is_iterator(arrays):
493 arrays = list(arrays)
494
495 # Check if elements of array are list-like
496 for array in arrays:
497 if not is_list_like(array):
498 raise TypeError(error_msg)
499
500 # Check if lengths of all arrays are equal or not,
501 # raise ValueError, if not
502 for i in range(1, len(arrays)):
503 if len(arrays[i]) != len(arrays[i - 1]):
504 raise ValueError("all arrays must be same length")
505
506 codes, levels = factorize_from_iterables(arrays)
507 if names is lib.no_default:
508 names = [getattr(arr, "name", None) for arr in arrays]
509
510 return cls(
511 levels=levels,
512 codes=codes,
513 sortorder=sortorder,
514 names=names,
515 verify_integrity=False,
516 )
517
518 @classmethod
519 @names_compat
520 def from_tuples(
521 cls,
522 tuples: Iterable[tuple[Hashable, ...]],
523 sortorder: int | None = None,
524 names: Sequence[Hashable] | Hashable = None,
525 ) -> MultiIndex:
526 """
527 Convert list of tuples to MultiIndex.
528
529 Parameters
530 ----------
531 tuples : list / sequence of tuple-likes
532 Each tuple is the index of one row/column.
533 sortorder : int or None
534 Level of sortedness (must be lexicographically sorted by that
535 level).
536 names : list / sequence of str, optional
537 Names for the levels in the index.
538
539 Returns
540 -------
541 MultiIndex
542
543 See Also
544 --------
545 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
546 MultiIndex.from_product : Make a MultiIndex from cartesian product
547 of iterables.
548 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
549
550 Examples
551 --------
552 >>> tuples = [(1, 'red'), (1, 'blue'),
553 ... (2, 'red'), (2, 'blue')]
554 >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color'))
555 MultiIndex([(1, 'red'),
556 (1, 'blue'),
557 (2, 'red'),
558 (2, 'blue')],
559 names=['number', 'color'])
560 """
561 if not is_list_like(tuples):
562 raise TypeError("Input must be a list / sequence of tuple-likes.")
563 if is_iterator(tuples):
564 tuples = list(tuples)
565 tuples = cast(Collection[Tuple[Hashable, ...]], tuples)
566
567 # handling the empty tuple cases
568 if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples):
569 codes = [np.zeros(len(tuples))]
570 levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))]
571 return cls(
572 levels=levels,
573 codes=codes,
574 sortorder=sortorder,
575 names=names,
576 verify_integrity=False,
577 )
578
579 arrays: list[Sequence[Hashable]]
580 if len(tuples) == 0:
581 if names is None:
582 raise TypeError("Cannot infer number of levels from empty list")
583 # error: Argument 1 to "len" has incompatible type "Hashable";
584 # expected "Sized"
585 arrays = [[]] * len(names) # type: ignore[arg-type]
586 elif isinstance(tuples, (np.ndarray, Index)):
587 if isinstance(tuples, Index):
588 tuples = np.asarray(tuples._values)
589
590 arrays = list(lib.tuples_to_object_array(tuples).T)
591 elif isinstance(tuples, list):
592 arrays = list(lib.to_object_array_tuples(tuples).T)
593 else:
594 arrs = zip(*tuples)
595 arrays = cast(List[Sequence[Hashable]], arrs)
596
597 return cls.from_arrays(arrays, sortorder=sortorder, names=names)
598
599 @classmethod
600 def from_product(
601 cls,
602 iterables: Sequence[Iterable[Hashable]],
603 sortorder: int | None = None,
604 names: Sequence[Hashable] | Hashable | lib.NoDefault = lib.no_default,
605 ) -> MultiIndex:
606 """
607 Make a MultiIndex from the cartesian product of multiple iterables.
608
609 Parameters
610 ----------
611 iterables : list / sequence of iterables
612 Each iterable has unique labels for each level of the index.
613 sortorder : int or None
614 Level of sortedness (must be lexicographically sorted by that
615 level).
616 names : list / sequence of str, optional
617 Names for the levels in the index.
618 If not explicitly provided, names will be inferred from the
619 elements of iterables if an element has a name attribute.
620
621 Returns
622 -------
623 MultiIndex
624
625 See Also
626 --------
627 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
628 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
629 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
630
631 Examples
632 --------
633 >>> numbers = [0, 1, 2]
634 >>> colors = ['green', 'purple']
635 >>> pd.MultiIndex.from_product([numbers, colors],
636 ... names=['number', 'color'])
637 MultiIndex([(0, 'green'),
638 (0, 'purple'),
639 (1, 'green'),
640 (1, 'purple'),
641 (2, 'green'),
642 (2, 'purple')],
643 names=['number', 'color'])
644 """
645 from pandas.core.reshape.util import cartesian_product
646
647 if not is_list_like(iterables):
648 raise TypeError("Input must be a list / sequence of iterables.")
649 if is_iterator(iterables):
650 iterables = list(iterables)
651
652 codes, levels = factorize_from_iterables(iterables)
653 if names is lib.no_default:
654 names = [getattr(it, "name", None) for it in iterables]
655
656 # codes are all ndarrays, so cartesian_product is lossless
657 codes = cartesian_product(codes)
658 return cls(levels, codes, sortorder=sortorder, names=names)
659
660 @classmethod
661 def from_frame(cls, df: DataFrame, sortorder=None, names=None) -> MultiIndex:
662 """
663 Make a MultiIndex from a DataFrame.
664
665 Parameters
666 ----------
667 df : DataFrame
668 DataFrame to be converted to MultiIndex.
669 sortorder : int, optional
670 Level of sortedness (must be lexicographically sorted by that
671 level).
672 names : list-like, optional
673 If no names are provided, use the column names, or tuple of column
674 names if the columns is a MultiIndex. If a sequence, overwrite
675 names with the given sequence.
676
677 Returns
678 -------
679 MultiIndex
680 The MultiIndex representation of the given DataFrame.
681
682 See Also
683 --------
684 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
685 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
686 MultiIndex.from_product : Make a MultiIndex from cartesian product
687 of iterables.
688
689 Examples
690 --------
691 >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'],
692 ... ['NJ', 'Temp'], ['NJ', 'Precip']],
693 ... columns=['a', 'b'])
694 >>> df
695 a b
696 0 HI Temp
697 1 HI Precip
698 2 NJ Temp
699 3 NJ Precip
700
701 >>> pd.MultiIndex.from_frame(df)
702 MultiIndex([('HI', 'Temp'),
703 ('HI', 'Precip'),
704 ('NJ', 'Temp'),
705 ('NJ', 'Precip')],
706 names=['a', 'b'])
707
708 Using explicit names, instead of the column names
709
710 >>> pd.MultiIndex.from_frame(df, names=['state', 'observation'])
711 MultiIndex([('HI', 'Temp'),
712 ('HI', 'Precip'),
713 ('NJ', 'Temp'),
714 ('NJ', 'Precip')],
715 names=['state', 'observation'])
716 """
717 if not isinstance(df, ABCDataFrame):
718 raise TypeError("Input must be a DataFrame")
719
720 column_names, columns = zip(*df.items())
721 names = column_names if names is None else names
722 return cls.from_arrays(columns, sortorder=sortorder, names=names)
723
724 # --------------------------------------------------------------------
725
726 @cache_readonly
727 def _values(self) -> np.ndarray:
728 # We override here, since our parent uses _data, which we don't use.
729 values = []
730
731 for i in range(self.nlevels):
732 index = self.levels[i]
733 codes = self.codes[i]
734
735 vals = index
736 if is_categorical_dtype(vals.dtype):
737 vals = cast("CategoricalIndex", vals)
738 vals = vals._data._internal_get_values()
739
740 if isinstance(vals.dtype, ExtensionDtype) or isinstance(
741 vals, (ABCDatetimeIndex, ABCTimedeltaIndex)
742 ):
743 vals = vals.astype(object)
744
745 vals = np.array(vals, copy=False)
746 vals = algos.take_nd(vals, codes, fill_value=index._na_value)
747 values.append(vals)
748
749 arr = lib.fast_zip(values)
750 return arr
751
752 @property
753 def values(self) -> np.ndarray:
754 return self._values
755
756 @property
757 def array(self):
758 """
759 Raises a ValueError for `MultiIndex` because there's no single
760 array backing a MultiIndex.
761
762 Raises
763 ------
764 ValueError
765 """
766 raise ValueError(
767 "MultiIndex has no single backing array. Use "
768 "'MultiIndex.to_numpy()' to get a NumPy array of tuples."
769 )
770
771 @cache_readonly
772 def dtypes(self) -> Series:
773 """
774 Return the dtypes as a Series for the underlying MultiIndex.
775 """
776 from pandas import Series
777
778 names = com.fill_missing_names([level.name for level in self.levels])
779 return Series([level.dtype for level in self.levels], index=Index(names))
780
781 def __len__(self) -> int:
782 return len(self.codes[0])
783
784 @property
785 def size(self) -> int:
786 """
787 Return the number of elements in the underlying data.
788 """
789 # override Index.size to avoid materializing _values
790 return len(self)
791
792 # --------------------------------------------------------------------
793 # Levels Methods
794
795 @cache_readonly
796 def levels(self) -> FrozenList:
797 # Use cache_readonly to ensure that self.get_locs doesn't repeatedly
798 # create new IndexEngine
799 # https://github.com/pandas-dev/pandas/issues/31648
800 result = [x._rename(name=name) for x, name in zip(self._levels, self._names)]
801 for level in result:
802 # disallow midx.levels[0].name = "foo"
803 level._no_setting_name = True
804 return FrozenList(result)
805
806 def _set_levels(
807 self,
808 levels,
809 *,
810 level=None,
811 copy: bool = False,
812 validate: bool = True,
813 verify_integrity: bool = False,
814 ) -> None:
815 # This is NOT part of the levels property because it should be
816 # externally not allowed to set levels. User beware if you change
817 # _levels directly
818 if validate:
819 if len(levels) == 0:
820 raise ValueError("Must set non-zero number of levels.")
821 if level is None and len(levels) != self.nlevels:
822 raise ValueError("Length of levels must match number of levels.")
823 if level is not None and len(levels) != len(level):
824 raise ValueError("Length of levels must match length of level.")
825
826 if level is None:
827 new_levels = FrozenList(
828 ensure_index(lev, copy=copy)._view() for lev in levels
829 )
830 else:
831 level_numbers = [self._get_level_number(lev) for lev in level]
832 new_levels_list = list(self._levels)
833 for lev_num, lev in zip(level_numbers, levels):
834 new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view()
835 new_levels = FrozenList(new_levels_list)
836
837 if verify_integrity:
838 new_codes = self._verify_integrity(levels=new_levels)
839 self._codes = new_codes
840
841 names = self.names
842 self._levels = new_levels
843 if any(names):
844 self._set_names(names)
845
846 self._reset_cache()
847
848 def set_levels(
849 self, levels, *, level=None, verify_integrity: bool = True
850 ) -> MultiIndex:
851 """
852 Set new levels on MultiIndex. Defaults to returning new index.
853
854 Parameters
855 ----------
856 levels : sequence or list of sequence
857 New level(s) to apply.
858 level : int, level name, or sequence of int/level names (default None)
859 Level(s) to set (None for all levels).
860 verify_integrity : bool, default True
861 If True, checks that levels and codes are compatible.
862
863 Returns
864 -------
865 MultiIndex
866
867 Examples
868 --------
869 >>> idx = pd.MultiIndex.from_tuples(
870 ... [
871 ... (1, "one"),
872 ... (1, "two"),
873 ... (2, "one"),
874 ... (2, "two"),
875 ... (3, "one"),
876 ... (3, "two")
877 ... ],
878 ... names=["foo", "bar"]
879 ... )
880 >>> idx
881 MultiIndex([(1, 'one'),
882 (1, 'two'),
883 (2, 'one'),
884 (2, 'two'),
885 (3, 'one'),
886 (3, 'two')],
887 names=['foo', 'bar'])
888
889 >>> idx.set_levels([['a', 'b', 'c'], [1, 2]])
890 MultiIndex([('a', 1),
891 ('a', 2),
892 ('b', 1),
893 ('b', 2),
894 ('c', 1),
895 ('c', 2)],
896 names=['foo', 'bar'])
897 >>> idx.set_levels(['a', 'b', 'c'], level=0)
898 MultiIndex([('a', 'one'),
899 ('a', 'two'),
900 ('b', 'one'),
901 ('b', 'two'),
902 ('c', 'one'),
903 ('c', 'two')],
904 names=['foo', 'bar'])
905 >>> idx.set_levels(['a', 'b'], level='bar')
906 MultiIndex([(1, 'a'),
907 (1, 'b'),
908 (2, 'a'),
909 (2, 'b'),
910 (3, 'a'),
911 (3, 'b')],
912 names=['foo', 'bar'])
913
914 If any of the levels passed to ``set_levels()`` exceeds the
915 existing length, all of the values from that argument will
916 be stored in the MultiIndex levels, though the values will
917 be truncated in the MultiIndex output.
918
919 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1])
920 MultiIndex([('a', 1),
921 ('a', 2),
922 ('b', 1),
923 ('b', 2),
924 ('c', 1),
925 ('c', 2)],
926 names=['foo', 'bar'])
927 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels
928 FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]])
929 """
930
931 if is_list_like(levels) and not isinstance(levels, Index):
932 levels = list(levels)
933
934 level, levels = _require_listlike(level, levels, "Levels")
935 idx = self._view()
936 idx._reset_identity()
937 idx._set_levels(
938 levels, level=level, validate=True, verify_integrity=verify_integrity
939 )
940 return idx
941
942 @property
943 def nlevels(self) -> int:
944 """
945 Integer number of levels in this MultiIndex.
946
947 Examples
948 --------
949 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])
950 >>> mi
951 MultiIndex([('a', 'b', 'c')],
952 )
953 >>> mi.nlevels
954 3
955 """
956 return len(self._levels)
957
958 @property
959 def levshape(self) -> Shape:
960 """
961 A tuple with the length of each level.
962
963 Examples
964 --------
965 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])
966 >>> mi
967 MultiIndex([('a', 'b', 'c')],
968 )
969 >>> mi.levshape
970 (1, 1, 1)
971 """
972 return tuple(len(x) for x in self.levels)
973
974 # --------------------------------------------------------------------
975 # Codes Methods
976
977 @property
978 def codes(self):
979 return self._codes
980
981 def _set_codes(
982 self,
983 codes,
984 *,
985 level=None,
986 copy: bool = False,
987 validate: bool = True,
988 verify_integrity: bool = False,
989 ) -> None:
990 if validate:
991 if level is None and len(codes) != self.nlevels:
992 raise ValueError("Length of codes must match number of levels")
993 if level is not None and len(codes) != len(level):
994 raise ValueError("Length of codes must match length of levels.")
995
996 if level is None:
997 new_codes = FrozenList(
998 _coerce_indexer_frozen(level_codes, lev, copy=copy).view()
999 for lev, level_codes in zip(self._levels, codes)
1000 )
1001 else:
1002 level_numbers = [self._get_level_number(lev) for lev in level]
1003 new_codes_list = list(self._codes)
1004 for lev_num, level_codes in zip(level_numbers, codes):
1005 lev = self.levels[lev_num]
1006 new_codes_list[lev_num] = _coerce_indexer_frozen(
1007 level_codes, lev, copy=copy
1008 )
1009 new_codes = FrozenList(new_codes_list)
1010
1011 if verify_integrity:
1012 new_codes = self._verify_integrity(codes=new_codes)
1013
1014 self._codes = new_codes
1015
1016 self._reset_cache()
1017
1018 def set_codes(self, codes, *, level=None, verify_integrity: bool = True):
1019 """
1020 Set new codes on MultiIndex. Defaults to returning new index.
1021
1022 Parameters
1023 ----------
1024 codes : sequence or list of sequence
1025 New codes to apply.
1026 level : int, level name, or sequence of int/level names (default None)
1027 Level(s) to set (None for all levels).
1028 verify_integrity : bool, default True
1029 If True, checks that levels and codes are compatible.
1030
1031 Returns
1032 -------
1033 new index (of same type and class...etc) or None
1034 The same type as the caller or None if ``inplace=True``.
1035
1036 Examples
1037 --------
1038 >>> idx = pd.MultiIndex.from_tuples(
1039 ... [(1, "one"), (1, "two"), (2, "one"), (2, "two")], names=["foo", "bar"]
1040 ... )
1041 >>> idx
1042 MultiIndex([(1, 'one'),
1043 (1, 'two'),
1044 (2, 'one'),
1045 (2, 'two')],
1046 names=['foo', 'bar'])
1047
1048 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]])
1049 MultiIndex([(2, 'one'),
1050 (1, 'one'),
1051 (2, 'two'),
1052 (1, 'two')],
1053 names=['foo', 'bar'])
1054 >>> idx.set_codes([1, 0, 1, 0], level=0)
1055 MultiIndex([(2, 'one'),
1056 (1, 'two'),
1057 (2, 'one'),
1058 (1, 'two')],
1059 names=['foo', 'bar'])
1060 >>> idx.set_codes([0, 0, 1, 1], level='bar')
1061 MultiIndex([(1, 'one'),
1062 (1, 'one'),
1063 (2, 'two'),
1064 (2, 'two')],
1065 names=['foo', 'bar'])
1066 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1])
1067 MultiIndex([(2, 'one'),
1068 (1, 'one'),
1069 (2, 'two'),
1070 (1, 'two')],
1071 names=['foo', 'bar'])
1072 """
1073
1074 level, codes = _require_listlike(level, codes, "Codes")
1075 idx = self._view()
1076 idx._reset_identity()
1077 idx._set_codes(codes, level=level, verify_integrity=verify_integrity)
1078 return idx
1079
1080 # --------------------------------------------------------------------
1081 # Index Internals
1082
1083 @cache_readonly
1084 def _engine(self):
1085 # Calculate the number of bits needed to represent labels in each
1086 # level, as log2 of their sizes:
1087 # NaN values are shifted to 1 and missing values in other while
1088 # calculating the indexer are shifted to 0
1089 sizes = np.ceil(
1090 np.log2(
1091 [
1092 len(level)
1093 + libindex.multiindex_nulls_shift # type: ignore[attr-defined]
1094 for level in self.levels
1095 ]
1096 )
1097 )
1098
1099 # Sum bit counts, starting from the _right_....
1100 lev_bits = np.cumsum(sizes[::-1])[::-1]
1101
1102 # ... in order to obtain offsets such that sorting the combination of
1103 # shifted codes (one for each level, resulting in a unique integer) is
1104 # equivalent to sorting lexicographically the codes themselves. Notice
1105 # that each level needs to be shifted by the number of bits needed to
1106 # represent the _previous_ ones:
1107 offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64")
1108
1109 # Check the total number of bits needed for our representation:
1110 if lev_bits[0] > 64:
1111 # The levels would overflow a 64 bit uint - use Python integers:
1112 return MultiIndexPyIntEngine(self.levels, self.codes, offsets)
1113 return MultiIndexUIntEngine(self.levels, self.codes, offsets)
1114
1115 # Return type "Callable[..., MultiIndex]" of "_constructor" incompatible with return
1116 # type "Type[MultiIndex]" in supertype "Index"
1117 @property
1118 def _constructor(self) -> Callable[..., MultiIndex]: # type: ignore[override]
1119 return type(self).from_tuples
1120
1121 @doc(Index._shallow_copy)
1122 def _shallow_copy(self, values: np.ndarray, name=lib.no_default) -> MultiIndex:
1123 names = name if name is not lib.no_default else self.names
1124
1125 return type(self).from_tuples(values, sortorder=None, names=names)
1126
1127 def _view(self) -> MultiIndex:
1128 result = type(self)(
1129 levels=self.levels,
1130 codes=self.codes,
1131 sortorder=self.sortorder,
1132 names=self.names,
1133 verify_integrity=False,
1134 )
1135 result._cache = self._cache.copy()
1136 result._cache.pop("levels", None) # GH32669
1137 return result
1138
1139 # --------------------------------------------------------------------
1140
1141 # error: Signature of "copy" incompatible with supertype "Index"
1142 def copy( # type: ignore[override]
1143 self,
1144 names=None,
1145 deep: bool = False,
1146 name=None,
1147 ):
1148 """
1149 Make a copy of this object.
1150
1151 Names, dtype, levels and codes can be passed and will be set on new copy.
1152
1153 Parameters
1154 ----------
1155 names : sequence, optional
1156 deep : bool, default False
1157 name : Label
1158 Kept for compatibility with 1-dimensional Index. Should not be used.
1159
1160 Returns
1161 -------
1162 MultiIndex
1163
1164 Notes
1165 -----
1166 In most cases, there should be no functional difference from using
1167 ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
1168 This could be potentially expensive on large MultiIndex objects.
1169
1170 Examples
1171 --------
1172 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])
1173 >>> mi
1174 MultiIndex([('a', 'b', 'c')],
1175 )
1176 >>> mi.copy()
1177 MultiIndex([('a', 'b', 'c')],
1178 )
1179 """
1180 names = self._validate_names(name=name, names=names, deep=deep)
1181 keep_id = not deep
1182 levels, codes = None, None
1183
1184 if deep:
1185 from copy import deepcopy
1186
1187 levels = deepcopy(self.levels)
1188 codes = deepcopy(self.codes)
1189
1190 levels = levels if levels is not None else self.levels
1191 codes = codes if codes is not None else self.codes
1192
1193 new_index = type(self)(
1194 levels=levels,
1195 codes=codes,
1196 sortorder=self.sortorder,
1197 names=names,
1198 verify_integrity=False,
1199 )
1200 new_index._cache = self._cache.copy()
1201 new_index._cache.pop("levels", None) # GH32669
1202 if keep_id:
1203 new_index._id = self._id
1204 return new_index
1205
1206 def __array__(self, dtype=None) -> np.ndarray:
1207 """the array interface, return my values"""
1208 return self.values
1209
1210 def view(self, cls=None):
1211 """this is defined as a copy with the same identity"""
1212 result = self.copy()
1213 result._id = self._id
1214 return result
1215
1216 @doc(Index.__contains__)
1217 def __contains__(self, key: Any) -> bool:
1218 hash(key)
1219 try:
1220 self.get_loc(key)
1221 return True
1222 except (LookupError, TypeError, ValueError):
1223 return False
1224
1225 @cache_readonly
1226 def dtype(self) -> np.dtype:
1227 return np.dtype("O")
1228
1229 def _is_memory_usage_qualified(self) -> bool:
1230 """return a boolean if we need a qualified .info display"""
1231
1232 def f(level) -> bool:
1233 return "mixed" in level or "string" in level or "unicode" in level
1234
1235 return any(f(level) for level in self._inferred_type_levels)
1236
1237 # Cannot determine type of "memory_usage"
1238 @doc(Index.memory_usage) # type: ignore[has-type]
1239 def memory_usage(self, deep: bool = False) -> int:
1240 # we are overwriting our base class to avoid
1241 # computing .values here which could materialize
1242 # a tuple representation unnecessarily
1243 return self._nbytes(deep)
1244
1245 @cache_readonly
1246 def nbytes(self) -> int:
1247 """return the number of bytes in the underlying data"""
1248 return self._nbytes(False)
1249
1250 def _nbytes(self, deep: bool = False) -> int:
1251 """
1252 return the number of bytes in the underlying data
1253 deeply introspect the level data if deep=True
1254
1255 include the engine hashtable
1256
1257 *this is in internal routine*
1258
1259 """
1260 # for implementations with no useful getsizeof (PyPy)
1261 objsize = 24
1262
1263 level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels)
1264 label_nbytes = sum(i.nbytes for i in self.codes)
1265 names_nbytes = sum(getsizeof(i, objsize) for i in self.names)
1266 result = level_nbytes + label_nbytes + names_nbytes
1267
1268 # include our engine hashtable
1269 result += self._engine.sizeof(deep=deep)
1270 return result
1271
1272 # --------------------------------------------------------------------
1273 # Rendering Methods
1274
1275 def _formatter_func(self, tup):
1276 """
1277 Formats each item in tup according to its level's formatter function.
1278 """
1279 formatter_funcs = [level._formatter_func for level in self.levels]
1280 return tuple(func(val) for func, val in zip(formatter_funcs, tup))
1281
1282 def _format_native_types(
1283 self, *, na_rep: str = "nan", **kwargs
1284 ) -> npt.NDArray[np.object_]:
1285 new_levels = []
1286 new_codes = []
1287
1288 # go through the levels and format them
1289 for level, level_codes in zip(self.levels, self.codes):
1290 level_strs = level._format_native_types(na_rep=na_rep, **kwargs)
1291 # add nan values, if there are any
1292 mask = level_codes == -1
1293 if mask.any():
1294 nan_index = len(level_strs)
1295 # numpy 1.21 deprecated implicit string casting
1296 level_strs = level_strs.astype(str)
1297 level_strs = np.append(level_strs, na_rep)
1298 assert not level_codes.flags.writeable # i.e. copy is needed
1299 level_codes = level_codes.copy() # make writeable
1300 level_codes[mask] = nan_index
1301 new_levels.append(level_strs)
1302 new_codes.append(level_codes)
1303
1304 if len(new_levels) == 1:
1305 # a single-level multi-index
1306 return Index(new_levels[0].take(new_codes[0]))._format_native_types()
1307 else:
1308 # reconstruct the multi-index
1309 mi = MultiIndex(
1310 levels=new_levels,
1311 codes=new_codes,
1312 names=self.names,
1313 sortorder=self.sortorder,
1314 verify_integrity=False,
1315 )
1316 return mi._values
1317
1318 def format(
1319 self,
1320 name: bool | None = None,
1321 formatter: Callable | None = None,
1322 na_rep: str | None = None,
1323 names: bool = False,
1324 space: int = 2,
1325 sparsify=None,
1326 adjoin: bool = True,
1327 ) -> list:
1328 if name is not None:
1329 names = name
1330
1331 if len(self) == 0:
1332 return []
1333
1334 stringified_levels = []
1335 for lev, level_codes in zip(self.levels, self.codes):
1336 na = na_rep if na_rep is not None else _get_na_rep(lev.dtype)
1337
1338 if len(lev) > 0:
1339 formatted = lev.take(level_codes).format(formatter=formatter)
1340
1341 # we have some NA
1342 mask = level_codes == -1
1343 if mask.any():
1344 formatted = np.array(formatted, dtype=object)
1345 formatted[mask] = na
1346 formatted = formatted.tolist()
1347
1348 else:
1349 # weird all NA case
1350 formatted = [
1351 pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n"))
1352 for x in algos.take_nd(lev._values, level_codes)
1353 ]
1354 stringified_levels.append(formatted)
1355
1356 result_levels = []
1357 for lev, lev_name in zip(stringified_levels, self.names):
1358 level = []
1359
1360 if names:
1361 level.append(
1362 pprint_thing(lev_name, escape_chars=("\t", "\r", "\n"))
1363 if lev_name is not None
1364 else ""
1365 )
1366
1367 level.extend(np.array(lev, dtype=object))
1368 result_levels.append(level)
1369
1370 if sparsify is None:
1371 sparsify = get_option("display.multi_sparse")
1372
1373 if sparsify:
1374 sentinel: Literal[""] | bool | lib.NoDefault = ""
1375 # GH3547 use value of sparsify as sentinel if it's "Falsey"
1376 assert isinstance(sparsify, bool) or sparsify is lib.no_default
1377 if sparsify in [False, lib.no_default]:
1378 sentinel = sparsify
1379 # little bit of a kludge job for #1217
1380 result_levels = sparsify_labels(
1381 result_levels, start=int(names), sentinel=sentinel
1382 )
1383
1384 if adjoin:
1385 from pandas.io.formats.format import get_adjustment
1386
1387 adj = get_adjustment()
1388 return adj.adjoin(space, *result_levels).split("\n")
1389 else:
1390 return result_levels
1391
1392 # --------------------------------------------------------------------
1393 # Names Methods
1394
1395 def _get_names(self) -> FrozenList:
1396 return FrozenList(self._names)
1397
1398 def _set_names(self, names, *, level=None, validate: bool = True):
1399 """
1400 Set new names on index. Each name has to be a hashable type.
1401
1402 Parameters
1403 ----------
1404 values : str or sequence
1405 name(s) to set
1406 level : int, level name, or sequence of int/level names (default None)
1407 If the index is a MultiIndex (hierarchical), level(s) to set (None
1408 for all levels). Otherwise level must be None
1409 validate : bool, default True
1410 validate that the names match level lengths
1411
1412 Raises
1413 ------
1414 TypeError if each name is not hashable.
1415
1416 Notes
1417 -----
1418 sets names on levels. WARNING: mutates!
1419
1420 Note that you generally want to set this *after* changing levels, so
1421 that it only acts on copies
1422 """
1423 # GH 15110
1424 # Don't allow a single string for names in a MultiIndex
1425 if names is not None and not is_list_like(names):
1426 raise ValueError("Names should be list-like for a MultiIndex")
1427 names = list(names)
1428
1429 if validate:
1430 if level is not None and len(names) != len(level):
1431 raise ValueError("Length of names must match length of level.")
1432 if level is None and len(names) != self.nlevels:
1433 raise ValueError(
1434 "Length of names must match number of levels in MultiIndex."
1435 )
1436
1437 if level is None:
1438 level = range(self.nlevels)
1439 else:
1440 level = [self._get_level_number(lev) for lev in level]
1441
1442 # set the name
1443 for lev, name in zip(level, names):
1444 if name is not None:
1445 # GH 20527
1446 # All items in 'names' need to be hashable:
1447 if not is_hashable(name):
1448 raise TypeError(
1449 f"{type(self).__name__}.name must be a hashable type"
1450 )
1451 self._names[lev] = name
1452
1453 # If .levels has been accessed, the names in our cache will be stale.
1454 self._reset_cache()
1455
1456 names = property(
1457 fset=_set_names,
1458 fget=_get_names,
1459 doc="""
1460 Names of levels in MultiIndex.
1461
1462 Examples
1463 --------
1464 >>> mi = pd.MultiIndex.from_arrays(
1465 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'])
1466 >>> mi
1467 MultiIndex([(1, 3, 5),
1468 (2, 4, 6)],
1469 names=['x', 'y', 'z'])
1470 >>> mi.names
1471 FrozenList(['x', 'y', 'z'])
1472 """,
1473 )
1474
1475 # --------------------------------------------------------------------
1476
1477 @cache_readonly
1478 def inferred_type(self) -> str:
1479 return "mixed"
1480
1481 def _get_level_number(self, level) -> int:
1482 count = self.names.count(level)
1483 if (count > 1) and not is_integer(level):
1484 raise ValueError(
1485 f"The name {level} occurs multiple times, use a level number"
1486 )
1487 try:
1488 level = self.names.index(level)
1489 except ValueError as err:
1490 if not is_integer(level):
1491 raise KeyError(f"Level {level} not found") from err
1492 if level < 0:
1493 level += self.nlevels
1494 if level < 0:
1495 orig_level = level - self.nlevels
1496 raise IndexError(
1497 f"Too many levels: Index has only {self.nlevels} levels, "
1498 f"{orig_level} is not a valid level number"
1499 ) from err
1500 # Note: levels are zero-based
1501 elif level >= self.nlevels:
1502 raise IndexError(
1503 f"Too many levels: Index has only {self.nlevels} levels, "
1504 f"not {level + 1}"
1505 ) from err
1506 return level
1507
1508 @cache_readonly
1509 def is_monotonic_increasing(self) -> bool:
1510 """
1511 Return a boolean if the values are equal or increasing.
1512 """
1513 if any(-1 in code for code in self.codes):
1514 return False
1515
1516 if all(level.is_monotonic_increasing for level in self.levels):
1517 # If each level is sorted, we can operate on the codes directly. GH27495
1518 return libalgos.is_lexsorted(
1519 [x.astype("int64", copy=False) for x in self.codes]
1520 )
1521
1522 # reversed() because lexsort() wants the most significant key last.
1523 values = [
1524 self._get_level_values(i)._values for i in reversed(range(len(self.levels)))
1525 ]
1526 try:
1527 # error: Argument 1 to "lexsort" has incompatible type
1528 # "List[Union[ExtensionArray, ndarray[Any, Any]]]";
1529 # expected "Union[_SupportsArray[dtype[Any]],
1530 # _NestedSequence[_SupportsArray[dtype[Any]]], bool,
1531 # int, float, complex, str, bytes, _NestedSequence[Union
1532 # [bool, int, float, complex, str, bytes]]]"
1533 sort_order = np.lexsort(values) # type: ignore[arg-type]
1534 return Index(sort_order).is_monotonic_increasing
1535 except TypeError:
1536 # we have mixed types and np.lexsort is not happy
1537 return Index(self._values).is_monotonic_increasing
1538
1539 @cache_readonly
1540 def is_monotonic_decreasing(self) -> bool:
1541 """
1542 Return a boolean if the values are equal or decreasing.
1543 """
1544 # monotonic decreasing if and only if reverse is monotonic increasing
1545 return self[::-1].is_monotonic_increasing
1546
1547 @cache_readonly
1548 def _inferred_type_levels(self) -> list[str]:
1549 """return a list of the inferred types, one for each level"""
1550 return [i.inferred_type for i in self.levels]
1551
1552 @doc(Index.duplicated)
1553 def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:
1554 shape = tuple(len(lev) for lev in self.levels)
1555 ids = get_group_index(self.codes, shape, sort=False, xnull=False)
1556
1557 return duplicated(ids, keep)
1558
1559 # error: Cannot override final attribute "_duplicated"
1560 # (previously declared in base class "IndexOpsMixin")
1561 _duplicated = duplicated # type: ignore[misc]
1562
1563 def fillna(self, value=None, downcast=None):
1564 """
1565 fillna is not implemented for MultiIndex
1566 """
1567 raise NotImplementedError("isna is not defined for MultiIndex")
1568
1569 @doc(Index.dropna)
1570 def dropna(self, how: AnyAll = "any") -> MultiIndex:
1571 nans = [level_codes == -1 for level_codes in self.codes]
1572 if how == "any":
1573 indexer = np.any(nans, axis=0)
1574 elif how == "all":
1575 indexer = np.all(nans, axis=0)
1576 else:
1577 raise ValueError(f"invalid how option: {how}")
1578
1579 new_codes = [level_codes[~indexer] for level_codes in self.codes]
1580 return self.set_codes(codes=new_codes)
1581
1582 def _get_level_values(self, level: int, unique: bool = False) -> Index:
1583 """
1584 Return vector of label values for requested level,
1585 equal to the length of the index
1586
1587 **this is an internal method**
1588
1589 Parameters
1590 ----------
1591 level : int
1592 unique : bool, default False
1593 if True, drop duplicated values
1594
1595 Returns
1596 -------
1597 Index
1598 """
1599 lev = self.levels[level]
1600 level_codes = self.codes[level]
1601 name = self._names[level]
1602 if unique:
1603 level_codes = algos.unique(level_codes)
1604 filled = algos.take_nd(lev._values, level_codes, fill_value=lev._na_value)
1605 return lev._shallow_copy(filled, name=name)
1606
1607 def get_level_values(self, level):
1608 """
1609 Return vector of label values for requested level.
1610
1611 Length of returned vector is equal to the length of the index.
1612
1613 Parameters
1614 ----------
1615 level : int or str
1616 ``level`` is either the integer position of the level in the
1617 MultiIndex, or the name of the level.
1618
1619 Returns
1620 -------
1621 Index
1622 Values is a level of this MultiIndex converted to
1623 a single :class:`Index` (or subclass thereof).
1624
1625 Notes
1626 -----
1627 If the level contains missing values, the result may be casted to
1628 ``float`` with missing values specified as ``NaN``. This is because
1629 the level is converted to a regular ``Index``.
1630
1631 Examples
1632 --------
1633 Create a MultiIndex:
1634
1635 >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def')))
1636 >>> mi.names = ['level_1', 'level_2']
1637
1638 Get level values by supplying level as either integer or name:
1639
1640 >>> mi.get_level_values(0)
1641 Index(['a', 'b', 'c'], dtype='object', name='level_1')
1642 >>> mi.get_level_values('level_2')
1643 Index(['d', 'e', 'f'], dtype='object', name='level_2')
1644
1645 If a level contains missing values, the return type of the level
1646 may be cast to ``float``.
1647
1648 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).dtypes
1649 level_0 int64
1650 level_1 int64
1651 dtype: object
1652 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).get_level_values(0)
1653 Index([1.0, nan, 2.0], dtype='float64')
1654 """
1655 level = self._get_level_number(level)
1656 values = self._get_level_values(level)
1657 return values
1658
1659 @doc(Index.unique)
1660 def unique(self, level=None):
1661 if level is None:
1662 return self.drop_duplicates()
1663 else:
1664 level = self._get_level_number(level)
1665 return self._get_level_values(level=level, unique=True)
1666
1667 def to_frame(
1668 self,
1669 index: bool = True,
1670 name=lib.no_default,
1671 allow_duplicates: bool = False,
1672 ) -> DataFrame:
1673 """
1674 Create a DataFrame with the levels of the MultiIndex as columns.
1675
1676 Column ordering is determined by the DataFrame constructor with data as
1677 a dict.
1678
1679 Parameters
1680 ----------
1681 index : bool, default True
1682 Set the index of the returned DataFrame as the original MultiIndex.
1683
1684 name : list / sequence of str, optional
1685 The passed names should substitute index level names.
1686
1687 allow_duplicates : bool, optional default False
1688 Allow duplicate column labels to be created.
1689
1690 .. versionadded:: 1.5.0
1691
1692 Returns
1693 -------
1694 DataFrame
1695
1696 See Also
1697 --------
1698 DataFrame : Two-dimensional, size-mutable, potentially heterogeneous
1699 tabular data.
1700
1701 Examples
1702 --------
1703 >>> mi = pd.MultiIndex.from_arrays([['a', 'b'], ['c', 'd']])
1704 >>> mi
1705 MultiIndex([('a', 'c'),
1706 ('b', 'd')],
1707 )
1708
1709 >>> df = mi.to_frame()
1710 >>> df
1711 0 1
1712 a c a c
1713 b d b d
1714
1715 >>> df = mi.to_frame(index=False)
1716 >>> df
1717 0 1
1718 0 a c
1719 1 b d
1720
1721 >>> df = mi.to_frame(name=['x', 'y'])
1722 >>> df
1723 x y
1724 a c a c
1725 b d b d
1726 """
1727 from pandas import DataFrame
1728
1729 if name is not lib.no_default:
1730 if not is_list_like(name):
1731 raise TypeError("'name' must be a list / sequence of column names.")
1732
1733 if len(name) != len(self.levels):
1734 raise ValueError(
1735 "'name' should have same length as number of levels on index."
1736 )
1737 idx_names = name
1738 else:
1739 idx_names = self._get_level_names()
1740
1741 if not allow_duplicates and len(set(idx_names)) != len(idx_names):
1742 raise ValueError(
1743 "Cannot create duplicate column labels if allow_duplicates is False"
1744 )
1745
1746 # Guarantee resulting column order - PY36+ dict maintains insertion order
1747 result = DataFrame(
1748 {level: self._get_level_values(level) for level in range(len(self.levels))},
1749 copy=False,
1750 )
1751 result.columns = idx_names
1752
1753 if index:
1754 result.index = self
1755 return result
1756
1757 # error: Return type "Index" of "to_flat_index" incompatible with return type
1758 # "MultiIndex" in supertype "Index"
1759 def to_flat_index(self) -> Index: # type: ignore[override]
1760 """
1761 Convert a MultiIndex to an Index of Tuples containing the level values.
1762
1763 Returns
1764 -------
1765 pd.Index
1766 Index with the MultiIndex data represented in Tuples.
1767
1768 See Also
1769 --------
1770 MultiIndex.from_tuples : Convert flat index back to MultiIndex.
1771
1772 Notes
1773 -----
1774 This method will simply return the caller if called by anything other
1775 than a MultiIndex.
1776
1777 Examples
1778 --------
1779 >>> index = pd.MultiIndex.from_product(
1780 ... [['foo', 'bar'], ['baz', 'qux']],
1781 ... names=['a', 'b'])
1782 >>> index.to_flat_index()
1783 Index([('foo', 'baz'), ('foo', 'qux'),
1784 ('bar', 'baz'), ('bar', 'qux')],
1785 dtype='object')
1786 """
1787 return Index(self._values, tupleize_cols=False)
1788
1789 def _is_lexsorted(self) -> bool:
1790 """
1791 Return True if the codes are lexicographically sorted.
1792
1793 Returns
1794 -------
1795 bool
1796
1797 Examples
1798 --------
1799 In the below examples, the first level of the MultiIndex is sorted because
1800 a<b<c, so there is no need to look at the next level.
1801
1802 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'],
1803 ... ['d', 'e', 'f']])._is_lexsorted()
1804 True
1805 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'],
1806 ... ['d', 'f', 'e']])._is_lexsorted()
1807 True
1808
1809 In case there is a tie, the lexicographical sorting looks
1810 at the next level of the MultiIndex.
1811
1812 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']])._is_lexsorted()
1813 True
1814 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']])._is_lexsorted()
1815 False
1816 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'],
1817 ... ['aa', 'bb', 'aa', 'bb']])._is_lexsorted()
1818 True
1819 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'],
1820 ... ['bb', 'aa', 'aa', 'bb']])._is_lexsorted()
1821 False
1822 """
1823 return self._lexsort_depth == self.nlevels
1824
1825 @cache_readonly
1826 def _lexsort_depth(self) -> int:
1827 """
1828 Compute and return the lexsort_depth, the number of levels of the
1829 MultiIndex that are sorted lexically
1830
1831 Returns
1832 -------
1833 int
1834 """
1835 if self.sortorder is not None:
1836 return self.sortorder
1837 return _lexsort_depth(self.codes, self.nlevels)
1838
1839 def _sort_levels_monotonic(self, raise_if_incomparable: bool = False) -> MultiIndex:
1840 """
1841 This is an *internal* function.
1842
1843 Create a new MultiIndex from the current to monotonically sorted
1844 items IN the levels. This does not actually make the entire MultiIndex
1845 monotonic, JUST the levels.
1846
1847 The resulting MultiIndex will have the same outward
1848 appearance, meaning the same .values and ordering. It will also
1849 be .equals() to the original.
1850
1851 Returns
1852 -------
1853 MultiIndex
1854
1855 Examples
1856 --------
1857 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
1858 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
1859 >>> mi
1860 MultiIndex([('a', 'bb'),
1861 ('a', 'aa'),
1862 ('b', 'bb'),
1863 ('b', 'aa')],
1864 )
1865
1866 >>> mi.sort_values()
1867 MultiIndex([('a', 'aa'),
1868 ('a', 'bb'),
1869 ('b', 'aa'),
1870 ('b', 'bb')],
1871 )
1872 """
1873 if self._is_lexsorted() and self.is_monotonic_increasing:
1874 return self
1875
1876 new_levels = []
1877 new_codes = []
1878
1879 for lev, level_codes in zip(self.levels, self.codes):
1880 if not lev.is_monotonic_increasing:
1881 try:
1882 # indexer to reorder the levels
1883 indexer = lev.argsort()
1884 except TypeError:
1885 if raise_if_incomparable:
1886 raise
1887 else:
1888 lev = lev.take(indexer)
1889
1890 # indexer to reorder the level codes
1891 indexer = ensure_platform_int(indexer)
1892 ri = lib.get_reverse_indexer(indexer, len(indexer))
1893 level_codes = algos.take_nd(ri, level_codes)
1894
1895 new_levels.append(lev)
1896 new_codes.append(level_codes)
1897
1898 return MultiIndex(
1899 new_levels,
1900 new_codes,
1901 names=self.names,
1902 sortorder=self.sortorder,
1903 verify_integrity=False,
1904 )
1905
1906 def remove_unused_levels(self) -> MultiIndex:
1907 """
1908 Create new MultiIndex from current that removes unused levels.
1909
1910 Unused level(s) means levels that are not expressed in the
1911 labels. The resulting MultiIndex will have the same outward
1912 appearance, meaning the same .values and ordering. It will
1913 also be .equals() to the original.
1914
1915 Returns
1916 -------
1917 MultiIndex
1918
1919 Examples
1920 --------
1921 >>> mi = pd.MultiIndex.from_product([range(2), list('ab')])
1922 >>> mi
1923 MultiIndex([(0, 'a'),
1924 (0, 'b'),
1925 (1, 'a'),
1926 (1, 'b')],
1927 )
1928
1929 >>> mi[2:]
1930 MultiIndex([(1, 'a'),
1931 (1, 'b')],
1932 )
1933
1934 The 0 from the first level is not represented
1935 and can be removed
1936
1937 >>> mi2 = mi[2:].remove_unused_levels()
1938 >>> mi2.levels
1939 FrozenList([[1], ['a', 'b']])
1940 """
1941 new_levels = []
1942 new_codes = []
1943
1944 changed = False
1945 for lev, level_codes in zip(self.levels, self.codes):
1946 # Since few levels are typically unused, bincount() is more
1947 # efficient than unique() - however it only accepts positive values
1948 # (and drops order):
1949 uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1
1950 has_na = int(len(uniques) and (uniques[0] == -1))
1951
1952 if len(uniques) != len(lev) + has_na:
1953 if lev.isna().any() and len(uniques) == len(lev):
1954 break
1955 # We have unused levels
1956 changed = True
1957
1958 # Recalculate uniques, now preserving order.
1959 # Can easily be cythonized by exploiting the already existing
1960 # "uniques" and stop parsing "level_codes" when all items
1961 # are found:
1962 uniques = algos.unique(level_codes)
1963 if has_na:
1964 na_idx = np.where(uniques == -1)[0]
1965 # Just ensure that -1 is in first position:
1966 uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]]
1967
1968 # codes get mapped from uniques to 0:len(uniques)
1969 # -1 (if present) is mapped to last position
1970 code_mapping = np.zeros(len(lev) + has_na)
1971 # ... and reassigned value -1:
1972 code_mapping[uniques] = np.arange(len(uniques)) - has_na
1973
1974 level_codes = code_mapping[level_codes]
1975
1976 # new levels are simple
1977 lev = lev.take(uniques[has_na:])
1978
1979 new_levels.append(lev)
1980 new_codes.append(level_codes)
1981
1982 result = self.view()
1983
1984 if changed:
1985 result._reset_identity()
1986 result._set_levels(new_levels, validate=False)
1987 result._set_codes(new_codes, validate=False)
1988
1989 return result
1990
1991 # --------------------------------------------------------------------
1992 # Pickling Methods
1993
1994 def __reduce__(self):
1995 """Necessary for making this object picklable"""
1996 d = {
1997 "levels": list(self.levels),
1998 "codes": list(self.codes),
1999 "sortorder": self.sortorder,
2000 "names": list(self.names),
2001 }
2002 return ibase._new_Index, (type(self), d), None
2003
2004 # --------------------------------------------------------------------
2005
2006 def __getitem__(self, key):
2007 if is_scalar(key):
2008 key = com.cast_scalar_indexer(key)
2009
2010 retval = []
2011 for lev, level_codes in zip(self.levels, self.codes):
2012 if level_codes[key] == -1:
2013 retval.append(np.nan)
2014 else:
2015 retval.append(lev[level_codes[key]])
2016
2017 return tuple(retval)
2018 else:
2019 # in general cannot be sure whether the result will be sorted
2020 sortorder = None
2021 if com.is_bool_indexer(key):
2022 key = np.asarray(key, dtype=bool)
2023 sortorder = self.sortorder
2024 elif isinstance(key, slice):
2025 if key.step is None or key.step > 0:
2026 sortorder = self.sortorder
2027 elif isinstance(key, Index):
2028 key = np.asarray(key)
2029
2030 new_codes = [level_codes[key] for level_codes in self.codes]
2031
2032 return MultiIndex(
2033 levels=self.levels,
2034 codes=new_codes,
2035 names=self.names,
2036 sortorder=sortorder,
2037 verify_integrity=False,
2038 )
2039
2040 def _getitem_slice(self: MultiIndex, slobj: slice) -> MultiIndex:
2041 """
2042 Fastpath for __getitem__ when we know we have a slice.
2043 """
2044 sortorder = None
2045 if slobj.step is None or slobj.step > 0:
2046 sortorder = self.sortorder
2047
2048 new_codes = [level_codes[slobj] for level_codes in self.codes]
2049
2050 return type(self)(
2051 levels=self.levels,
2052 codes=new_codes,
2053 names=self._names,
2054 sortorder=sortorder,
2055 verify_integrity=False,
2056 )
2057
2058 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
2059 def take(
2060 self: MultiIndex,
2061 indices,
2062 axis: Axis = 0,
2063 allow_fill: bool = True,
2064 fill_value=None,
2065 **kwargs,
2066 ) -> MultiIndex:
2067 nv.validate_take((), kwargs)
2068 indices = ensure_platform_int(indices)
2069
2070 # only fill if we are passing a non-None fill_value
2071 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices)
2072
2073 na_value = -1
2074
2075 taken = [lab.take(indices) for lab in self.codes]
2076 if allow_fill:
2077 mask = indices == -1
2078 if mask.any():
2079 masked = []
2080 for new_label in taken:
2081 label_values = new_label
2082 label_values[mask] = na_value
2083 masked.append(np.asarray(label_values))
2084 taken = masked
2085
2086 return MultiIndex(
2087 levels=self.levels, codes=taken, names=self.names, verify_integrity=False
2088 )
2089
2090 def append(self, other):
2091 """
2092 Append a collection of Index options together.
2093
2094 Parameters
2095 ----------
2096 other : Index or list/tuple of indices
2097
2098 Returns
2099 -------
2100 Index
2101 The combined index.
2102
2103 Examples
2104 --------
2105 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b']])
2106 >>> mi
2107 MultiIndex([('a', 'b')],
2108 )
2109 >>> mi.append(mi)
2110 MultiIndex([('a', 'b'), ('a', 'b')],
2111 )
2112 """
2113 if not isinstance(other, (list, tuple)):
2114 other = [other]
2115
2116 if all(
2117 (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other
2118 ):
2119 arrays, names = [], []
2120 for i in range(self.nlevels):
2121 label = self._get_level_values(i)
2122 appended = [o._get_level_values(i) for o in other]
2123 arrays.append(label.append(appended))
2124 single_label_name = all(label.name == x.name for x in appended)
2125 names.append(label.name if single_label_name else None)
2126 return MultiIndex.from_arrays(arrays, names=names)
2127
2128 to_concat = (self._values,) + tuple(k._values for k in other)
2129 new_tuples = np.concatenate(to_concat)
2130
2131 # if all(isinstance(x, MultiIndex) for x in other):
2132 try:
2133 # We only get here if other contains at least one index with tuples,
2134 # setting names to None automatically
2135 return MultiIndex.from_tuples(new_tuples)
2136 except (TypeError, IndexError):
2137 return Index(new_tuples)
2138
2139 def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:
2140 if len(args) == 0 and len(kwargs) == 0:
2141 # lexsort is significantly faster than self._values.argsort()
2142 target = self._sort_levels_monotonic(raise_if_incomparable=True)
2143 return lexsort_indexer(target._get_codes_for_sorting())
2144 return self._values.argsort(*args, **kwargs)
2145
2146 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)
2147 def repeat(self, repeats: int, axis=None) -> MultiIndex:
2148 nv.validate_repeat((), {"axis": axis})
2149 # error: Incompatible types in assignment (expression has type "ndarray",
2150 # variable has type "int")
2151 repeats = ensure_platform_int(repeats) # type: ignore[assignment]
2152 return MultiIndex(
2153 levels=self.levels,
2154 codes=[
2155 level_codes.view(np.ndarray).astype(np.intp, copy=False).repeat(repeats)
2156 for level_codes in self.codes
2157 ],
2158 names=self.names,
2159 sortorder=self.sortorder,
2160 verify_integrity=False,
2161 )
2162
2163 # error: Signature of "drop" incompatible with supertype "Index"
2164 def drop( # type: ignore[override]
2165 self,
2166 codes,
2167 level: Index | np.ndarray | Iterable[Hashable] | None = None,
2168 errors: IgnoreRaise = "raise",
2169 ) -> MultiIndex:
2170 """
2171 Make new MultiIndex with passed list of codes deleted.
2172
2173 Parameters
2174 ----------
2175 codes : array-like
2176 Must be a list of tuples when level is not specified.
2177 level : int or level name, default None
2178 errors : str, default 'raise'
2179
2180 Returns
2181 -------
2182 MultiIndex
2183 """
2184 if level is not None:
2185 return self._drop_from_level(codes, level, errors)
2186
2187 if not isinstance(codes, (np.ndarray, Index)):
2188 try:
2189 codes = com.index_labels_to_array(codes, dtype=np.dtype("object"))
2190 except ValueError:
2191 pass
2192
2193 inds = []
2194 for level_codes in codes:
2195 try:
2196 loc = self.get_loc(level_codes)
2197 # get_loc returns either an integer, a slice, or a boolean
2198 # mask
2199 if isinstance(loc, int):
2200 inds.append(loc)
2201 elif isinstance(loc, slice):
2202 step = loc.step if loc.step is not None else 1
2203 inds.extend(range(loc.start, loc.stop, step))
2204 elif com.is_bool_indexer(loc):
2205 if self._lexsort_depth == 0:
2206 warnings.warn(
2207 "dropping on a non-lexsorted multi-index "
2208 "without a level parameter may impact performance.",
2209 PerformanceWarning,
2210 stacklevel=find_stack_level(),
2211 )
2212 loc = loc.nonzero()[0]
2213 inds.extend(loc)
2214 else:
2215 msg = f"unsupported indexer of type {type(loc)}"
2216 raise AssertionError(msg)
2217 except KeyError:
2218 if errors != "ignore":
2219 raise
2220
2221 return self.delete(inds)
2222
2223 def _drop_from_level(
2224 self, codes, level, errors: IgnoreRaise = "raise"
2225 ) -> MultiIndex:
2226 codes = com.index_labels_to_array(codes)
2227 i = self._get_level_number(level)
2228 index = self.levels[i]
2229 values = index.get_indexer(codes)
2230 # If nan should be dropped it will equal -1 here. We have to check which values
2231 # are not nan and equal -1, this means they are missing in the index
2232 nan_codes = isna(codes)
2233 values[(np.equal(nan_codes, False)) & (values == -1)] = -2
2234 if index.shape[0] == self.shape[0]:
2235 values[np.equal(nan_codes, True)] = -2
2236
2237 not_found = codes[values == -2]
2238 if len(not_found) != 0 and errors != "ignore":
2239 raise KeyError(f"labels {not_found} not found in level")
2240 mask = ~algos.isin(self.codes[i], values)
2241
2242 return self[mask]
2243
2244 def swaplevel(self, i=-2, j=-1) -> MultiIndex:
2245 """
2246 Swap level i with level j.
2247
2248 Calling this method does not change the ordering of the values.
2249
2250 Parameters
2251 ----------
2252 i : int, str, default -2
2253 First level of index to be swapped. Can pass level name as string.
2254 Type of parameters can be mixed.
2255 j : int, str, default -1
2256 Second level of index to be swapped. Can pass level name as string.
2257 Type of parameters can be mixed.
2258
2259 Returns
2260 -------
2261 MultiIndex
2262 A new MultiIndex.
2263
2264 See Also
2265 --------
2266 Series.swaplevel : Swap levels i and j in a MultiIndex.
2267 DataFrame.swaplevel : Swap levels i and j in a MultiIndex on a
2268 particular axis.
2269
2270 Examples
2271 --------
2272 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
2273 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
2274 >>> mi
2275 MultiIndex([('a', 'bb'),
2276 ('a', 'aa'),
2277 ('b', 'bb'),
2278 ('b', 'aa')],
2279 )
2280 >>> mi.swaplevel(0, 1)
2281 MultiIndex([('bb', 'a'),
2282 ('aa', 'a'),
2283 ('bb', 'b'),
2284 ('aa', 'b')],
2285 )
2286 """
2287 new_levels = list(self.levels)
2288 new_codes = list(self.codes)
2289 new_names = list(self.names)
2290
2291 i = self._get_level_number(i)
2292 j = self._get_level_number(j)
2293
2294 new_levels[i], new_levels[j] = new_levels[j], new_levels[i]
2295 new_codes[i], new_codes[j] = new_codes[j], new_codes[i]
2296 new_names[i], new_names[j] = new_names[j], new_names[i]
2297
2298 return MultiIndex(
2299 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
2300 )
2301
2302 def reorder_levels(self, order) -> MultiIndex:
2303 """
2304 Rearrange levels using input order. May not drop or duplicate levels.
2305
2306 Parameters
2307 ----------
2308 order : list of int or list of str
2309 List representing new level order. Reference level by number
2310 (position) or by key (label).
2311
2312 Returns
2313 -------
2314 MultiIndex
2315
2316 Examples
2317 --------
2318 >>> mi = pd.MultiIndex.from_arrays([[1, 2], [3, 4]], names=['x', 'y'])
2319 >>> mi
2320 MultiIndex([(1, 3),
2321 (2, 4)],
2322 names=['x', 'y'])
2323
2324 >>> mi.reorder_levels(order=[1, 0])
2325 MultiIndex([(3, 1),
2326 (4, 2)],
2327 names=['y', 'x'])
2328
2329 >>> mi.reorder_levels(order=['y', 'x'])
2330 MultiIndex([(3, 1),
2331 (4, 2)],
2332 names=['y', 'x'])
2333 """
2334 order = [self._get_level_number(i) for i in order]
2335 if len(order) != self.nlevels:
2336 raise AssertionError(
2337 f"Length of order must be same as number of levels ({self.nlevels}), "
2338 f"got {len(order)}"
2339 )
2340 new_levels = [self.levels[i] for i in order]
2341 new_codes = [self.codes[i] for i in order]
2342 new_names = [self.names[i] for i in order]
2343
2344 return MultiIndex(
2345 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
2346 )
2347
2348 def _get_codes_for_sorting(self) -> list[Categorical]:
2349 """
2350 we are categorizing our codes by using the
2351 available categories (all, not just observed)
2352 excluding any missing ones (-1); this is in preparation
2353 for sorting, where we need to disambiguate that -1 is not
2354 a valid valid
2355 """
2356
2357 def cats(level_codes):
2358 return np.arange(
2359 np.array(level_codes).max() + 1 if len(level_codes) else 0,
2360 dtype=level_codes.dtype,
2361 )
2362
2363 return [
2364 Categorical.from_codes(level_codes, cats(level_codes), ordered=True)
2365 for level_codes in self.codes
2366 ]
2367
2368 def sortlevel(
2369 self,
2370 level: IndexLabel = 0,
2371 ascending: bool | list[bool] = True,
2372 sort_remaining: bool = True,
2373 ) -> tuple[MultiIndex, npt.NDArray[np.intp]]:
2374 """
2375 Sort MultiIndex at the requested level.
2376
2377 The result will respect the original ordering of the associated
2378 factor at that level.
2379
2380 Parameters
2381 ----------
2382 level : list-like, int or str, default 0
2383 If a string is given, must be a name of the level.
2384 If list-like must be names or ints of levels.
2385 ascending : bool, default True
2386 False to sort in descending order.
2387 Can also be a list to specify a directed ordering.
2388 sort_remaining : sort by the remaining levels after level
2389
2390 Returns
2391 -------
2392 sorted_index : pd.MultiIndex
2393 Resulting index.
2394 indexer : np.ndarray[np.intp]
2395 Indices of output values in original index.
2396
2397 Examples
2398 --------
2399 >>> mi = pd.MultiIndex.from_arrays([[0, 0], [2, 1]])
2400 >>> mi
2401 MultiIndex([(0, 2),
2402 (0, 1)],
2403 )
2404
2405 >>> mi.sortlevel()
2406 (MultiIndex([(0, 1),
2407 (0, 2)],
2408 ), array([1, 0]))
2409
2410 >>> mi.sortlevel(sort_remaining=False)
2411 (MultiIndex([(0, 2),
2412 (0, 1)],
2413 ), array([0, 1]))
2414
2415 >>> mi.sortlevel(1)
2416 (MultiIndex([(0, 1),
2417 (0, 2)],
2418 ), array([1, 0]))
2419
2420 >>> mi.sortlevel(1, ascending=False)
2421 (MultiIndex([(0, 2),
2422 (0, 1)],
2423 ), array([0, 1]))
2424 """
2425 if not is_list_like(level):
2426 level = [level]
2427 # error: Item "Hashable" of "Union[Hashable, Sequence[Hashable]]" has
2428 # no attribute "__iter__" (not iterable)
2429 level = [
2430 self._get_level_number(lev) for lev in level # type: ignore[union-attr]
2431 ]
2432 sortorder = None
2433
2434 # we have a directed ordering via ascending
2435 if isinstance(ascending, list):
2436 if not len(level) == len(ascending):
2437 raise ValueError("level must have same length as ascending")
2438
2439 indexer = lexsort_indexer(
2440 [self.codes[lev] for lev in level], orders=ascending
2441 )
2442
2443 # level ordering
2444 else:
2445 codes = list(self.codes)
2446 shape = list(self.levshape)
2447
2448 # partition codes and shape
2449 primary = tuple(codes[lev] for lev in level)
2450 primshp = tuple(shape[lev] for lev in level)
2451
2452 # Reverse sorted to retain the order of
2453 # smaller indices that needs to be removed
2454 for lev in sorted(level, reverse=True):
2455 codes.pop(lev)
2456 shape.pop(lev)
2457
2458 if sort_remaining:
2459 primary += primary + tuple(codes)
2460 primshp += primshp + tuple(shape)
2461 else:
2462 sortorder = level[0]
2463
2464 indexer = indexer_from_factorized(primary, primshp, compress=False)
2465
2466 if not ascending:
2467 indexer = indexer[::-1]
2468
2469 indexer = ensure_platform_int(indexer)
2470 new_codes = [level_codes.take(indexer) for level_codes in self.codes]
2471
2472 new_index = MultiIndex(
2473 codes=new_codes,
2474 levels=self.levels,
2475 names=self.names,
2476 sortorder=sortorder,
2477 verify_integrity=False,
2478 )
2479
2480 return new_index, indexer
2481
2482 def _wrap_reindex_result(self, target, indexer, preserve_names: bool):
2483 if not isinstance(target, MultiIndex):
2484 if indexer is None:
2485 target = self
2486 elif (indexer >= 0).all():
2487 target = self.take(indexer)
2488 else:
2489 try:
2490 target = MultiIndex.from_tuples(target)
2491 except TypeError:
2492 # not all tuples, see test_constructor_dict_multiindex_reindex_flat
2493 return target
2494
2495 target = self._maybe_preserve_names(target, preserve_names)
2496 return target
2497
2498 def _maybe_preserve_names(self, target: Index, preserve_names: bool) -> Index:
2499 if (
2500 preserve_names
2501 and target.nlevels == self.nlevels
2502 and target.names != self.names
2503 ):
2504 target = target.copy(deep=False)
2505 target.names = self.names
2506 return target
2507
2508 # --------------------------------------------------------------------
2509 # Indexing Methods
2510
2511 def _check_indexing_error(self, key) -> None:
2512 if not is_hashable(key) or is_iterator(key):
2513 # We allow tuples if they are hashable, whereas other Index
2514 # subclasses require scalar.
2515 # We have to explicitly exclude generators, as these are hashable.
2516 raise InvalidIndexError(key)
2517
2518 @cache_readonly
2519 def _should_fallback_to_positional(self) -> bool:
2520 """
2521 Should integer key(s) be treated as positional?
2522 """
2523 # GH#33355
2524 return self.levels[0]._should_fallback_to_positional
2525
2526 def _get_indexer_strict(
2527 self, key, axis_name: str
2528 ) -> tuple[Index, npt.NDArray[np.intp]]:
2529 keyarr = key
2530 if not isinstance(keyarr, Index):
2531 keyarr = com.asarray_tuplesafe(keyarr)
2532
2533 if len(keyarr) and not isinstance(keyarr[0], tuple):
2534 indexer = self._get_indexer_level_0(keyarr)
2535
2536 self._raise_if_missing(key, indexer, axis_name)
2537 return self[indexer], indexer
2538
2539 return super()._get_indexer_strict(key, axis_name)
2540
2541 def _raise_if_missing(self, key, indexer, axis_name: str) -> None:
2542 keyarr = key
2543 if not isinstance(key, Index):
2544 keyarr = com.asarray_tuplesafe(key)
2545
2546 if len(keyarr) and not isinstance(keyarr[0], tuple):
2547 # i.e. same condition for special case in MultiIndex._get_indexer_strict
2548
2549 mask = indexer == -1
2550 if mask.any():
2551 check = self.levels[0].get_indexer(keyarr)
2552 cmask = check == -1
2553 if cmask.any():
2554 raise KeyError(f"{keyarr[cmask]} not in index")
2555 # We get here when levels still contain values which are not
2556 # actually in Index anymore
2557 raise KeyError(f"{keyarr} not in index")
2558 else:
2559 return super()._raise_if_missing(key, indexer, axis_name)
2560
2561 def _get_indexer_level_0(self, target) -> npt.NDArray[np.intp]:
2562 """
2563 Optimized equivalent to `self.get_level_values(0).get_indexer_for(target)`.
2564 """
2565 lev = self.levels[0]
2566 codes = self._codes[0]
2567 cat = Categorical.from_codes(codes=codes, categories=lev)
2568 ci = Index(cat)
2569 return ci.get_indexer_for(target)
2570
2571 def get_slice_bound(
2572 self,
2573 label: Hashable | Sequence[Hashable],
2574 side: Literal["left", "right"],
2575 ) -> int:
2576 """
2577 For an ordered MultiIndex, compute slice bound
2578 that corresponds to given label.
2579
2580 Returns leftmost (one-past-the-rightmost if `side=='right') position
2581 of given label.
2582
2583 Parameters
2584 ----------
2585 label : object or tuple of objects
2586 side : {'left', 'right'}
2587
2588 Returns
2589 -------
2590 int
2591 Index of label.
2592
2593 Notes
2594 -----
2595 This method only works if level 0 index of the MultiIndex is lexsorted.
2596
2597 Examples
2598 --------
2599 >>> mi = pd.MultiIndex.from_arrays([list('abbc'), list('gefd')])
2600
2601 Get the locations from the leftmost 'b' in the first level
2602 until the end of the multiindex:
2603
2604 >>> mi.get_slice_bound('b', side="left")
2605 1
2606
2607 Like above, but if you get the locations from the rightmost
2608 'b' in the first level and 'f' in the second level:
2609
2610 >>> mi.get_slice_bound(('b','f'), side="right")
2611 3
2612
2613 See Also
2614 --------
2615 MultiIndex.get_loc : Get location for a label or a tuple of labels.
2616 MultiIndex.get_locs : Get location for a label/slice/list/mask or a
2617 sequence of such.
2618 """
2619 if not isinstance(label, tuple):
2620 label = (label,)
2621 return self._partial_tup_index(label, side=side)
2622
2623 # pylint: disable-next=useless-parent-delegation
2624 def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]:
2625 """
2626 For an ordered MultiIndex, compute the slice locations for input
2627 labels.
2628
2629 The input labels can be tuples representing partial levels, e.g. for a
2630 MultiIndex with 3 levels, you can pass a single value (corresponding to
2631 the first level), or a 1-, 2-, or 3-tuple.
2632
2633 Parameters
2634 ----------
2635 start : label or tuple, default None
2636 If None, defaults to the beginning
2637 end : label or tuple
2638 If None, defaults to the end
2639 step : int or None
2640 Slice step
2641
2642 Returns
2643 -------
2644 (start, end) : (int, int)
2645
2646 Notes
2647 -----
2648 This method only works if the MultiIndex is properly lexsorted. So,
2649 if only the first 2 levels of a 3-level MultiIndex are lexsorted,
2650 you can only pass two levels to ``.slice_locs``.
2651
2652 Examples
2653 --------
2654 >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')],
2655 ... names=['A', 'B'])
2656
2657 Get the slice locations from the beginning of 'b' in the first level
2658 until the end of the multiindex:
2659
2660 >>> mi.slice_locs(start='b')
2661 (1, 4)
2662
2663 Like above, but stop at the end of 'b' in the first level and 'f' in
2664 the second level:
2665
2666 >>> mi.slice_locs(start='b', end=('b', 'f'))
2667 (1, 3)
2668
2669 See Also
2670 --------
2671 MultiIndex.get_loc : Get location for a label or a tuple of labels.
2672 MultiIndex.get_locs : Get location for a label/slice/list/mask or a
2673 sequence of such.
2674 """
2675 # This function adds nothing to its parent implementation (the magic
2676 # happens in get_slice_bound method), but it adds meaningful doc.
2677 return super().slice_locs(start, end, step)
2678
2679 def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left"):
2680 if len(tup) > self._lexsort_depth:
2681 raise UnsortedIndexError(
2682 f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth "
2683 f"({self._lexsort_depth})"
2684 )
2685
2686 n = len(tup)
2687 start, end = 0, len(self)
2688 zipped = zip(tup, self.levels, self.codes)
2689 for k, (lab, lev, level_codes) in enumerate(zipped):
2690 section = level_codes[start:end]
2691
2692 if lab not in lev and not isna(lab):
2693 # short circuit
2694 try:
2695 loc = algos.searchsorted(lev, lab, side=side)
2696 except TypeError as err:
2697 # non-comparable e.g. test_slice_locs_with_type_mismatch
2698 raise TypeError(f"Level type mismatch: {lab}") from err
2699 if not is_integer(loc):
2700 # non-comparable level, e.g. test_groupby_example
2701 raise TypeError(f"Level type mismatch: {lab}")
2702 if side == "right" and loc >= 0:
2703 loc -= 1
2704 return start + algos.searchsorted(section, loc, side=side)
2705
2706 idx = self._get_loc_single_level_index(lev, lab)
2707 if isinstance(idx, slice) and k < n - 1:
2708 # Get start and end value from slice, necessary when a non-integer
2709 # interval is given as input GH#37707
2710 start = idx.start
2711 end = idx.stop
2712 elif k < n - 1:
2713 # error: Incompatible types in assignment (expression has type
2714 # "Union[ndarray[Any, dtype[signedinteger[Any]]]
2715 end = start + algos.searchsorted( # type: ignore[assignment]
2716 section, idx, side="right"
2717 )
2718 # error: Incompatible types in assignment (expression has type
2719 # "Union[ndarray[Any, dtype[signedinteger[Any]]]
2720 start = start + algos.searchsorted( # type: ignore[assignment]
2721 section, idx, side="left"
2722 )
2723 elif isinstance(idx, slice):
2724 idx = idx.start
2725 return start + algos.searchsorted(section, idx, side=side)
2726 else:
2727 return start + algos.searchsorted(section, idx, side=side)
2728
2729 def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int:
2730 """
2731 If key is NA value, location of index unify as -1.
2732
2733 Parameters
2734 ----------
2735 level_index: Index
2736 key : label
2737
2738 Returns
2739 -------
2740 loc : int
2741 If key is NA value, loc is -1
2742 Else, location of key in index.
2743
2744 See Also
2745 --------
2746 Index.get_loc : The get_loc method for (single-level) index.
2747 """
2748 if is_scalar(key) and isna(key):
2749 # TODO: need is_valid_na_for_dtype(key, level_index.dtype)
2750 return -1
2751 else:
2752 return level_index.get_loc(key)
2753
2754 def get_loc(self, key):
2755 """
2756 Get location for a label or a tuple of labels.
2757
2758 The location is returned as an integer/slice or boolean
2759 mask.
2760
2761 Parameters
2762 ----------
2763 key : label or tuple of labels (one for each level)
2764
2765 Returns
2766 -------
2767 int, slice object or boolean mask
2768 If the key is past the lexsort depth, the return may be a
2769 boolean mask array, otherwise it is always a slice or int.
2770
2771 See Also
2772 --------
2773 Index.get_loc : The get_loc method for (single-level) index.
2774 MultiIndex.slice_locs : Get slice location given start label(s) and
2775 end label(s).
2776 MultiIndex.get_locs : Get location for a label/slice/list/mask or a
2777 sequence of such.
2778
2779 Notes
2780 -----
2781 The key cannot be a slice, list of same-level labels, a boolean mask,
2782 or a sequence of such. If you want to use those, use
2783 :meth:`MultiIndex.get_locs` instead.
2784
2785 Examples
2786 --------
2787 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])
2788
2789 >>> mi.get_loc('b')
2790 slice(1, 3, None)
2791
2792 >>> mi.get_loc(('b', 'e'))
2793 1
2794 """
2795 self._check_indexing_error(key)
2796
2797 def _maybe_to_slice(loc):
2798 """convert integer indexer to boolean mask or slice if possible"""
2799 if not isinstance(loc, np.ndarray) or loc.dtype != np.intp:
2800 return loc
2801
2802 loc = lib.maybe_indices_to_slice(loc, len(self))
2803 if isinstance(loc, slice):
2804 return loc
2805
2806 mask = np.empty(len(self), dtype="bool")
2807 mask.fill(False)
2808 mask[loc] = True
2809 return mask
2810
2811 if not isinstance(key, tuple):
2812 loc = self._get_level_indexer(key, level=0)
2813 return _maybe_to_slice(loc)
2814
2815 keylen = len(key)
2816 if self.nlevels < keylen:
2817 raise KeyError(
2818 f"Key length ({keylen}) exceeds index depth ({self.nlevels})"
2819 )
2820
2821 if keylen == self.nlevels and self.is_unique:
2822 # TODO: what if we have an IntervalIndex level?
2823 # i.e. do we need _index_as_unique on that level?
2824 try:
2825 return self._engine.get_loc(key)
2826 except TypeError:
2827 # e.g. test_partial_slicing_with_multiindex partial string slicing
2828 loc, _ = self.get_loc_level(key, list(range(self.nlevels)))
2829 return loc
2830
2831 # -- partial selection or non-unique index
2832 # break the key into 2 parts based on the lexsort_depth of the index;
2833 # the first part returns a continuous slice of the index; the 2nd part
2834 # needs linear search within the slice
2835 i = self._lexsort_depth
2836 lead_key, follow_key = key[:i], key[i:]
2837
2838 if not lead_key:
2839 start = 0
2840 stop = len(self)
2841 else:
2842 try:
2843 start, stop = self.slice_locs(lead_key, lead_key)
2844 except TypeError as err:
2845 # e.g. test_groupby_example key = ((0, 0, 1, 2), "new_col")
2846 # when self has 5 integer levels
2847 raise KeyError(key) from err
2848
2849 if start == stop:
2850 raise KeyError(key)
2851
2852 if not follow_key:
2853 return slice(start, stop)
2854
2855 warnings.warn(
2856 "indexing past lexsort depth may impact performance.",
2857 PerformanceWarning,
2858 stacklevel=find_stack_level(),
2859 )
2860
2861 loc = np.arange(start, stop, dtype=np.intp)
2862
2863 for i, k in enumerate(follow_key, len(lead_key)):
2864 mask = self.codes[i][loc] == self._get_loc_single_level_index(
2865 self.levels[i], k
2866 )
2867 if not mask.all():
2868 loc = loc[mask]
2869 if not len(loc):
2870 raise KeyError(key)
2871
2872 return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop)
2873
2874 def get_loc_level(self, key, level: IndexLabel = 0, drop_level: bool = True):
2875 """
2876 Get location and sliced index for requested label(s)/level(s).
2877
2878 Parameters
2879 ----------
2880 key : label or sequence of labels
2881 level : int/level name or list thereof, optional
2882 drop_level : bool, default True
2883 If ``False``, the resulting index will not drop any level.
2884
2885 Returns
2886 -------
2887 tuple
2888 A 2-tuple where the elements :
2889
2890 Element 0: int, slice object or boolean array.
2891
2892 Element 1: The resulting sliced multiindex/index. If the key
2893 contains all levels, this will be ``None``.
2894
2895 See Also
2896 --------
2897 MultiIndex.get_loc : Get location for a label or a tuple of labels.
2898 MultiIndex.get_locs : Get location for a label/slice/list/mask or a
2899 sequence of such.
2900
2901 Examples
2902 --------
2903 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')],
2904 ... names=['A', 'B'])
2905
2906 >>> mi.get_loc_level('b')
2907 (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B'))
2908
2909 >>> mi.get_loc_level('e', level='B')
2910 (array([False, True, False]), Index(['b'], dtype='object', name='A'))
2911
2912 >>> mi.get_loc_level(['b', 'e'])
2913 (1, None)
2914 """
2915 if not isinstance(level, (list, tuple)):
2916 level = self._get_level_number(level)
2917 else:
2918 level = [self._get_level_number(lev) for lev in level]
2919
2920 loc, mi = self._get_loc_level(key, level=level)
2921 if not drop_level:
2922 if lib.is_integer(loc):
2923 mi = self[loc : loc + 1]
2924 else:
2925 mi = self[loc]
2926 return loc, mi
2927
2928 def _get_loc_level(self, key, level: int | list[int] = 0):
2929 """
2930 get_loc_level but with `level` known to be positional, not name-based.
2931 """
2932
2933 # different name to distinguish from maybe_droplevels
2934 def maybe_mi_droplevels(indexer, levels):
2935 """
2936 If level does not exist or all levels were dropped, the exception
2937 has to be handled outside.
2938 """
2939 new_index = self[indexer]
2940
2941 for i in sorted(levels, reverse=True):
2942 new_index = new_index._drop_level_numbers([i])
2943
2944 return new_index
2945
2946 if isinstance(level, (tuple, list)):
2947 if len(key) != len(level):
2948 raise AssertionError(
2949 "Key for location must have same length as number of levels"
2950 )
2951 result = None
2952 for lev, k in zip(level, key):
2953 loc, new_index = self._get_loc_level(k, level=lev)
2954 if isinstance(loc, slice):
2955 mask = np.zeros(len(self), dtype=bool)
2956 mask[loc] = True
2957 loc = mask
2958 result = loc if result is None else result & loc
2959
2960 try:
2961 # FIXME: we should be only dropping levels on which we are
2962 # scalar-indexing
2963 mi = maybe_mi_droplevels(result, level)
2964 except ValueError:
2965 # droplevel failed because we tried to drop all levels,
2966 # i.e. len(level) == self.nlevels
2967 mi = self[result]
2968
2969 return result, mi
2970
2971 # kludge for #1796
2972 if isinstance(key, list):
2973 key = tuple(key)
2974
2975 if isinstance(key, tuple) and level == 0:
2976 try:
2977 # Check if this tuple is a single key in our first level
2978 if key in self.levels[0]:
2979 indexer = self._get_level_indexer(key, level=level)
2980 new_index = maybe_mi_droplevels(indexer, [0])
2981 return indexer, new_index
2982 except (TypeError, InvalidIndexError):
2983 pass
2984
2985 if not any(isinstance(k, slice) for k in key):
2986 if len(key) == self.nlevels and self.is_unique:
2987 # Complete key in unique index -> standard get_loc
2988 try:
2989 return (self._engine.get_loc(key), None)
2990 except KeyError as err:
2991 raise KeyError(key) from err
2992 except TypeError:
2993 # e.g. partial string indexing
2994 # test_partial_string_timestamp_multiindex
2995 pass
2996
2997 # partial selection
2998 indexer = self.get_loc(key)
2999 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)]
3000 if len(ilevels) == self.nlevels:
3001 if is_integer(indexer):
3002 # we are dropping all levels
3003 return indexer, None
3004
3005 # TODO: in some cases we still need to drop some levels,
3006 # e.g. test_multiindex_perf_warn
3007 # test_partial_string_timestamp_multiindex
3008 ilevels = [
3009 i
3010 for i in range(len(key))
3011 if (
3012 not isinstance(key[i], str)
3013 or not self.levels[i]._supports_partial_string_indexing
3014 )
3015 and key[i] != slice(None, None)
3016 ]
3017 if len(ilevels) == self.nlevels:
3018 # TODO: why?
3019 ilevels = []
3020 return indexer, maybe_mi_droplevels(indexer, ilevels)
3021
3022 else:
3023 indexer = None
3024 for i, k in enumerate(key):
3025 if not isinstance(k, slice):
3026 loc_level = self._get_level_indexer(k, level=i)
3027 if isinstance(loc_level, slice):
3028 if com.is_null_slice(loc_level) or com.is_full_slice(
3029 loc_level, len(self)
3030 ):
3031 # everything
3032 continue
3033
3034 # e.g. test_xs_IndexSlice_argument_not_implemented
3035 k_index = np.zeros(len(self), dtype=bool)
3036 k_index[loc_level] = True
3037
3038 else:
3039 k_index = loc_level
3040
3041 elif com.is_null_slice(k):
3042 # taking everything, does not affect `indexer` below
3043 continue
3044
3045 else:
3046 # FIXME: this message can be inaccurate, e.g.
3047 # test_series_varied_multiindex_alignment
3048 raise TypeError(f"Expected label or tuple of labels, got {key}")
3049
3050 if indexer is None:
3051 indexer = k_index
3052 else:
3053 indexer &= k_index
3054 if indexer is None:
3055 indexer = slice(None, None)
3056 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)]
3057 return indexer, maybe_mi_droplevels(indexer, ilevels)
3058 else:
3059 indexer = self._get_level_indexer(key, level=level)
3060 if (
3061 isinstance(key, str)
3062 and self.levels[level]._supports_partial_string_indexing
3063 ):
3064 # check to see if we did an exact lookup vs sliced
3065 check = self.levels[level].get_loc(key)
3066 if not is_integer(check):
3067 # e.g. test_partial_string_timestamp_multiindex
3068 return indexer, self[indexer]
3069
3070 try:
3071 result_index = maybe_mi_droplevels(indexer, [level])
3072 except ValueError:
3073 result_index = self[indexer]
3074
3075 return indexer, result_index
3076
3077 def _get_level_indexer(
3078 self, key, level: int = 0, indexer: npt.NDArray[np.bool_] | None = None
3079 ):
3080 # `level` kwarg is _always_ positional, never name
3081 # return a boolean array or slice showing where the key is
3082 # in the totality of values
3083 # if the indexer is provided, then use this
3084
3085 level_index = self.levels[level]
3086 level_codes = self.codes[level]
3087
3088 def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
3089 # Compute a bool indexer to identify the positions to take.
3090 # If we have an existing indexer, we only need to examine the
3091 # subset of positions where the existing indexer is True.
3092 if indexer is not None:
3093 # we only need to look at the subset of codes where the
3094 # existing indexer equals True
3095 codes = codes[indexer]
3096
3097 if step is None or step == 1:
3098 new_indexer = (codes >= start) & (codes < stop)
3099 else:
3100 r = np.arange(start, stop, step, dtype=codes.dtype)
3101 new_indexer = algos.isin(codes, r)
3102
3103 if indexer is None:
3104 return new_indexer
3105
3106 indexer = indexer.copy()
3107 indexer[indexer] = new_indexer
3108 return indexer
3109
3110 if isinstance(key, slice):
3111 # handle a slice, returning a slice if we can
3112 # otherwise a boolean indexer
3113 step = key.step
3114 is_negative_step = step is not None and step < 0
3115
3116 try:
3117 if key.start is not None:
3118 start = level_index.get_loc(key.start)
3119 elif is_negative_step:
3120 start = len(level_index) - 1
3121 else:
3122 start = 0
3123
3124 if key.stop is not None:
3125 stop = level_index.get_loc(key.stop)
3126 elif is_negative_step:
3127 stop = 0
3128 elif isinstance(start, slice):
3129 stop = len(level_index)
3130 else:
3131 stop = len(level_index) - 1
3132 except KeyError:
3133 # we have a partial slice (like looking up a partial date
3134 # string)
3135 start = stop = level_index.slice_indexer(key.start, key.stop, key.step)
3136 step = start.step
3137
3138 if isinstance(start, slice) or isinstance(stop, slice):
3139 # we have a slice for start and/or stop
3140 # a partial date slicer on a DatetimeIndex generates a slice
3141 # note that the stop ALREADY includes the stopped point (if
3142 # it was a string sliced)
3143 start = getattr(start, "start", start)
3144 stop = getattr(stop, "stop", stop)
3145 return convert_indexer(start, stop, step)
3146
3147 elif level > 0 or self._lexsort_depth == 0 or step is not None:
3148 # need to have like semantics here to right
3149 # searching as when we are using a slice
3150 # so adjust the stop by 1 (so we include stop)
3151 stop = (stop - 1) if is_negative_step else (stop + 1)
3152 return convert_indexer(start, stop, step)
3153 else:
3154 # sorted, so can return slice object -> view
3155 i = algos.searchsorted(level_codes, start, side="left")
3156 j = algos.searchsorted(level_codes, stop, side="right")
3157 return slice(i, j, step)
3158
3159 else:
3160 idx = self._get_loc_single_level_index(level_index, key)
3161
3162 if level > 0 or self._lexsort_depth == 0:
3163 # Desired level is not sorted
3164 if isinstance(idx, slice):
3165 # test_get_loc_partial_timestamp_multiindex
3166 locs = (level_codes >= idx.start) & (level_codes < idx.stop)
3167 return locs
3168
3169 locs = np.array(level_codes == idx, dtype=bool, copy=False)
3170
3171 if not locs.any():
3172 # The label is present in self.levels[level] but unused:
3173 raise KeyError(key)
3174 return locs
3175
3176 if isinstance(idx, slice):
3177 # e.g. test_partial_string_timestamp_multiindex
3178 start = algos.searchsorted(level_codes, idx.start, side="left")
3179 # NB: "left" here bc of slice semantics
3180 end = algos.searchsorted(level_codes, idx.stop, side="left")
3181 else:
3182 start = algos.searchsorted(level_codes, idx, side="left")
3183 end = algos.searchsorted(level_codes, idx, side="right")
3184
3185 if start == end:
3186 # The label is present in self.levels[level] but unused:
3187 raise KeyError(key)
3188 return slice(start, end)
3189
3190 def get_locs(self, seq):
3191 """
3192 Get location for a sequence of labels.
3193
3194 Parameters
3195 ----------
3196 seq : label, slice, list, mask or a sequence of such
3197 You should use one of the above for each level.
3198 If a level should not be used, set it to ``slice(None)``.
3199
3200 Returns
3201 -------
3202 numpy.ndarray
3203 NumPy array of integers suitable for passing to iloc.
3204
3205 See Also
3206 --------
3207 MultiIndex.get_loc : Get location for a label or a tuple of labels.
3208 MultiIndex.slice_locs : Get slice location given start label(s) and
3209 end label(s).
3210
3211 Examples
3212 --------
3213 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])
3214
3215 >>> mi.get_locs('b') # doctest: +SKIP
3216 array([1, 2], dtype=int64)
3217
3218 >>> mi.get_locs([slice(None), ['e', 'f']]) # doctest: +SKIP
3219 array([1, 2], dtype=int64)
3220
3221 >>> mi.get_locs([[True, False, True], slice('e', 'f')]) # doctest: +SKIP
3222 array([2], dtype=int64)
3223 """
3224
3225 # must be lexsorted to at least as many levels
3226 true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s]
3227 if true_slices and true_slices[-1] >= self._lexsort_depth:
3228 raise UnsortedIndexError(
3229 "MultiIndex slicing requires the index to be lexsorted: slicing "
3230 f"on levels {true_slices}, lexsort depth {self._lexsort_depth}"
3231 )
3232
3233 if any(x is Ellipsis for x in seq):
3234 raise NotImplementedError(
3235 "MultiIndex does not support indexing with Ellipsis"
3236 )
3237
3238 n = len(self)
3239
3240 def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]:
3241 if isinstance(indexer, slice):
3242 new_indexer = np.zeros(n, dtype=np.bool_)
3243 new_indexer[indexer] = True
3244 return new_indexer
3245 return indexer
3246
3247 # a bool indexer for the positions we want to take
3248 indexer: npt.NDArray[np.bool_] | None = None
3249
3250 for i, k in enumerate(seq):
3251 lvl_indexer: npt.NDArray[np.bool_] | slice | None = None
3252
3253 if com.is_bool_indexer(k):
3254 if len(k) != n:
3255 raise ValueError(
3256 "cannot index with a boolean indexer that "
3257 "is not the same length as the index"
3258 )
3259 lvl_indexer = np.asarray(k)
3260
3261 elif is_list_like(k):
3262 # a collection of labels to include from this level (these are or'd)
3263
3264 # GH#27591 check if this is a single tuple key in the level
3265 try:
3266 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer)
3267 except (InvalidIndexError, TypeError, KeyError) as err:
3268 # InvalidIndexError e.g. non-hashable, fall back to treating
3269 # this as a sequence of labels
3270 # KeyError it can be ambiguous if this is a label or sequence
3271 # of labels
3272 # github.com/pandas-dev/pandas/issues/39424#issuecomment-871626708
3273 for x in k:
3274 if not is_hashable(x):
3275 # e.g. slice
3276 raise err
3277 # GH 39424: Ignore not founds
3278 # GH 42351: No longer ignore not founds & enforced in 2.0
3279 # TODO: how to handle IntervalIndex level? (no test cases)
3280 item_indexer = self._get_level_indexer(
3281 x, level=i, indexer=indexer
3282 )
3283 if lvl_indexer is None:
3284 lvl_indexer = _to_bool_indexer(item_indexer)
3285 elif isinstance(item_indexer, slice):
3286 lvl_indexer[item_indexer] = True # type: ignore[index]
3287 else:
3288 lvl_indexer |= item_indexer
3289
3290 if lvl_indexer is None:
3291 # no matches we are done
3292 # test_loc_getitem_duplicates_multiindex_empty_indexer
3293 return np.array([], dtype=np.intp)
3294
3295 elif com.is_null_slice(k):
3296 # empty slice
3297 if indexer is None and i == len(seq) - 1:
3298 return np.arange(n, dtype=np.intp)
3299 continue
3300
3301 else:
3302 # a slice or a single label
3303 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer)
3304
3305 # update indexer
3306 lvl_indexer = _to_bool_indexer(lvl_indexer)
3307 if indexer is None:
3308 indexer = lvl_indexer
3309 else:
3310 indexer &= lvl_indexer
3311 if not np.any(indexer) and np.any(lvl_indexer):
3312 raise KeyError(seq)
3313
3314 # empty indexer
3315 if indexer is None:
3316 return np.array([], dtype=np.intp)
3317
3318 pos_indexer = indexer.nonzero()[0]
3319 return self._reorder_indexer(seq, pos_indexer)
3320
3321 # --------------------------------------------------------------------
3322
3323 def _reorder_indexer(
3324 self,
3325 seq: tuple[Scalar | Iterable | AnyArrayLike, ...],
3326 indexer: npt.NDArray[np.intp],
3327 ) -> npt.NDArray[np.intp]:
3328 """
3329 Reorder an indexer of a MultiIndex (self) so that the labels are in the
3330 same order as given in seq
3331
3332 Parameters
3333 ----------
3334 seq : label/slice/list/mask or a sequence of such
3335 indexer: a position indexer of self
3336
3337 Returns
3338 -------
3339 indexer : a sorted position indexer of self ordered as seq
3340 """
3341
3342 # check if sorting is necessary
3343 need_sort = False
3344 for i, k in enumerate(seq):
3345 if com.is_null_slice(k) or com.is_bool_indexer(k) or is_scalar(k):
3346 pass
3347 elif is_list_like(k):
3348 if len(k) <= 1: # type: ignore[arg-type]
3349 pass
3350 elif self._is_lexsorted():
3351 # If the index is lexsorted and the list_like label
3352 # in seq are sorted then we do not need to sort
3353 k_codes = self.levels[i].get_indexer(k)
3354 k_codes = k_codes[k_codes >= 0] # Filter absent keys
3355 # True if the given codes are not ordered
3356 need_sort = (k_codes[:-1] > k_codes[1:]).any()
3357 else:
3358 need_sort = True
3359 elif isinstance(k, slice):
3360 if self._is_lexsorted():
3361 need_sort = k.step is not None and k.step < 0
3362 else:
3363 need_sort = True
3364 else:
3365 need_sort = True
3366 if need_sort:
3367 break
3368 if not need_sort:
3369 return indexer
3370
3371 n = len(self)
3372 keys: tuple[np.ndarray, ...] = ()
3373 # For each level of the sequence in seq, map the level codes with the
3374 # order they appears in a list-like sequence
3375 # This mapping is then use to reorder the indexer
3376 for i, k in enumerate(seq):
3377 if is_scalar(k):
3378 # GH#34603 we want to treat a scalar the same as an all equal list
3379 k = [k]
3380 if com.is_bool_indexer(k):
3381 new_order = np.arange(n)[indexer]
3382 elif is_list_like(k):
3383 # Generate a map with all level codes as sorted initially
3384 k = algos.unique(k)
3385 key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len(
3386 self.levels[i]
3387 )
3388 # Set order as given in the indexer list
3389 level_indexer = self.levels[i].get_indexer(k)
3390 level_indexer = level_indexer[level_indexer >= 0] # Filter absent keys
3391 key_order_map[level_indexer] = np.arange(len(level_indexer))
3392
3393 new_order = key_order_map[self.codes[i][indexer]]
3394 elif isinstance(k, slice) and k.step is not None and k.step < 0:
3395 # flip order for negative step
3396 new_order = np.arange(n)[::-1][indexer]
3397 elif isinstance(k, slice) and k.start is None and k.stop is None:
3398 # slice(None) should not determine order GH#31330
3399 new_order = np.ones((n,), dtype=np.intp)[indexer]
3400 else:
3401 # For all other case, use the same order as the level
3402 new_order = np.arange(n)[indexer]
3403 keys = (new_order,) + keys
3404
3405 # Find the reordering using lexsort on the keys mapping
3406 ind = np.lexsort(keys)
3407 return indexer[ind]
3408
3409 def truncate(self, before=None, after=None) -> MultiIndex:
3410 """
3411 Slice index between two labels / tuples, return new MultiIndex.
3412
3413 Parameters
3414 ----------
3415 before : label or tuple, can be partial. Default None
3416 None defaults to start.
3417 after : label or tuple, can be partial. Default None
3418 None defaults to end.
3419
3420 Returns
3421 -------
3422 MultiIndex
3423 The truncated MultiIndex.
3424
3425 Examples
3426 --------
3427 >>> mi = pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['x', 'y', 'z']])
3428 >>> mi
3429 MultiIndex([('a', 'x'), ('b', 'y'), ('c', 'z')],
3430 )
3431 >>> mi.truncate(before='a', after='b')
3432 MultiIndex([('a', 'x'), ('b', 'y')],
3433 )
3434 """
3435 if after and before and after < before:
3436 raise ValueError("after < before")
3437
3438 i, j = self.levels[0].slice_locs(before, after)
3439 left, right = self.slice_locs(before, after)
3440
3441 new_levels = list(self.levels)
3442 new_levels[0] = new_levels[0][i:j]
3443
3444 new_codes = [level_codes[left:right] for level_codes in self.codes]
3445 new_codes[0] = new_codes[0] - i
3446
3447 return MultiIndex(
3448 levels=new_levels,
3449 codes=new_codes,
3450 names=self._names,
3451 verify_integrity=False,
3452 )
3453
3454 def equals(self, other: object) -> bool:
3455 """
3456 Determines if two MultiIndex objects have the same labeling information
3457 (the levels themselves do not necessarily have to be the same)
3458
3459 See Also
3460 --------
3461 equal_levels
3462 """
3463 if self.is_(other):
3464 return True
3465
3466 if not isinstance(other, Index):
3467 return False
3468
3469 if len(self) != len(other):
3470 return False
3471
3472 if not isinstance(other, MultiIndex):
3473 # d-level MultiIndex can equal d-tuple Index
3474 if not self._should_compare(other):
3475 # object Index or Categorical[object] may contain tuples
3476 return False
3477 return array_equivalent(self._values, other._values)
3478
3479 if self.nlevels != other.nlevels:
3480 return False
3481
3482 for i in range(self.nlevels):
3483 self_codes = self.codes[i]
3484 other_codes = other.codes[i]
3485 self_mask = self_codes == -1
3486 other_mask = other_codes == -1
3487 if not np.array_equal(self_mask, other_mask):
3488 return False
3489 self_codes = self_codes[~self_mask]
3490 self_values = self.levels[i]._values.take(self_codes)
3491
3492 other_codes = other_codes[~other_mask]
3493 other_values = other.levels[i]._values.take(other_codes)
3494
3495 # since we use NaT both datetime64 and timedelta64 we can have a
3496 # situation where a level is typed say timedelta64 in self (IOW it
3497 # has other values than NaT) but types datetime64 in other (where
3498 # its all NaT) but these are equivalent
3499 if len(self_values) == 0 and len(other_values) == 0:
3500 continue
3501
3502 if not isinstance(self_values, np.ndarray):
3503 # i.e. ExtensionArray
3504 if not self_values.equals(other_values):
3505 return False
3506 elif not isinstance(other_values, np.ndarray):
3507 # i.e. other is ExtensionArray
3508 if not other_values.equals(self_values):
3509 return False
3510 else:
3511 if not array_equivalent(self_values, other_values):
3512 return False
3513
3514 return True
3515
3516 def equal_levels(self, other: MultiIndex) -> bool:
3517 """
3518 Return True if the levels of both MultiIndex objects are the same
3519
3520 """
3521 if self.nlevels != other.nlevels:
3522 return False
3523
3524 for i in range(self.nlevels):
3525 if not self.levels[i].equals(other.levels[i]):
3526 return False
3527 return True
3528
3529 # --------------------------------------------------------------------
3530 # Set Methods
3531
3532 def _union(self, other, sort) -> MultiIndex:
3533 other, result_names = self._convert_can_do_setop(other)
3534 if other.has_duplicates:
3535 # This is only necessary if other has dupes,
3536 # otherwise difference is faster
3537 result = super()._union(other, sort)
3538
3539 if isinstance(result, MultiIndex):
3540 return result
3541 return MultiIndex.from_arrays(
3542 zip(*result), sortorder=None, names=result_names
3543 )
3544
3545 else:
3546 right_missing = other.difference(self, sort=False)
3547 if len(right_missing):
3548 result = self.append(right_missing)
3549 else:
3550 result = self._get_reconciled_name_object(other)
3551
3552 if sort is not False:
3553 try:
3554 result = result.sort_values()
3555 except TypeError:
3556 if sort is True:
3557 raise
3558 warnings.warn(
3559 "The values in the array are unorderable. "
3560 "Pass `sort=False` to suppress this warning.",
3561 RuntimeWarning,
3562 stacklevel=find_stack_level(),
3563 )
3564 return result
3565
3566 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
3567 return is_object_dtype(dtype)
3568
3569 def _get_reconciled_name_object(self, other) -> MultiIndex:
3570 """
3571 If the result of a set operation will be self,
3572 return self, unless the names change, in which
3573 case make a shallow copy of self.
3574 """
3575 names = self._maybe_match_names(other)
3576 if self.names != names:
3577 # error: Cannot determine type of "rename"
3578 return self.rename(names) # type: ignore[has-type]
3579 return self
3580
3581 def _maybe_match_names(self, other):
3582 """
3583 Try to find common names to attach to the result of an operation between
3584 a and b. Return a consensus list of names if they match at least partly
3585 or list of None if they have completely different names.
3586 """
3587 if len(self.names) != len(other.names):
3588 return [None] * len(self.names)
3589 names = []
3590 for a_name, b_name in zip(self.names, other.names):
3591 if a_name == b_name:
3592 names.append(a_name)
3593 else:
3594 # TODO: what if they both have np.nan for their names?
3595 names.append(None)
3596 return names
3597
3598 def _wrap_intersection_result(self, other, result) -> MultiIndex:
3599 _, result_names = self._convert_can_do_setop(other)
3600 return result.set_names(result_names)
3601
3602 def _wrap_difference_result(self, other, result: MultiIndex) -> MultiIndex:
3603 _, result_names = self._convert_can_do_setop(other)
3604
3605 if len(result) == 0:
3606 return result.remove_unused_levels().set_names(result_names)
3607 else:
3608 return result.set_names(result_names)
3609
3610 def _convert_can_do_setop(self, other):
3611 result_names = self.names
3612
3613 if not isinstance(other, Index):
3614 if len(other) == 0:
3615 return self[:0], self.names
3616 else:
3617 msg = "other must be a MultiIndex or a list of tuples"
3618 try:
3619 other = MultiIndex.from_tuples(other, names=self.names)
3620 except (ValueError, TypeError) as err:
3621 # ValueError raised by tuples_to_object_array if we
3622 # have non-object dtype
3623 raise TypeError(msg) from err
3624 else:
3625 result_names = get_unanimous_names(self, other)
3626
3627 return other, result_names
3628
3629 # --------------------------------------------------------------------
3630
3631 @doc(Index.astype)
3632 def astype(self, dtype, copy: bool = True):
3633 dtype = pandas_dtype(dtype)
3634 if is_categorical_dtype(dtype):
3635 msg = "> 1 ndim Categorical are not supported at this time"
3636 raise NotImplementedError(msg)
3637 if not is_object_dtype(dtype):
3638 raise TypeError(
3639 "Setting a MultiIndex dtype to anything other than object "
3640 "is not supported"
3641 )
3642 if copy is True:
3643 return self._view()
3644 return self
3645
3646 def _validate_fill_value(self, item):
3647 if isinstance(item, MultiIndex):
3648 # GH#43212
3649 if item.nlevels != self.nlevels:
3650 raise ValueError("Item must have length equal to number of levels.")
3651 return item._values
3652 elif not isinstance(item, tuple):
3653 # Pad the key with empty strings if lower levels of the key
3654 # aren't specified:
3655 item = (item,) + ("",) * (self.nlevels - 1)
3656 elif len(item) != self.nlevels:
3657 raise ValueError("Item must have length equal to number of levels.")
3658 return item
3659
3660 def putmask(self, mask, value: MultiIndex) -> MultiIndex:
3661 """
3662 Return a new MultiIndex of the values set with the mask.
3663
3664 Parameters
3665 ----------
3666 mask : array like
3667 value : MultiIndex
3668 Must either be the same length as self or length one
3669
3670 Returns
3671 -------
3672 MultiIndex
3673 """
3674 mask, noop = validate_putmask(self, mask)
3675 if noop:
3676 return self.copy()
3677
3678 if len(mask) == len(value):
3679 subset = value[mask].remove_unused_levels()
3680 else:
3681 subset = value.remove_unused_levels()
3682
3683 new_levels = []
3684 new_codes = []
3685
3686 for i, (value_level, level, level_codes) in enumerate(
3687 zip(subset.levels, self.levels, self.codes)
3688 ):
3689 new_level = level.union(value_level, sort=False)
3690 value_codes = new_level.get_indexer_for(subset.get_level_values(i))
3691 new_code = ensure_int64(level_codes)
3692 new_code[mask] = value_codes
3693 new_levels.append(new_level)
3694 new_codes.append(new_code)
3695
3696 return MultiIndex(
3697 levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False
3698 )
3699
3700 def insert(self, loc: int, item) -> MultiIndex:
3701 """
3702 Make new MultiIndex inserting new item at location
3703
3704 Parameters
3705 ----------
3706 loc : int
3707 item : tuple
3708 Must be same length as number of levels in the MultiIndex
3709
3710 Returns
3711 -------
3712 new_index : Index
3713 """
3714 item = self._validate_fill_value(item)
3715
3716 new_levels = []
3717 new_codes = []
3718 for k, level, level_codes in zip(item, self.levels, self.codes):
3719 if k not in level:
3720 # have to insert into level
3721 # must insert at end otherwise you have to recompute all the
3722 # other codes
3723 lev_loc = len(level)
3724 level = level.insert(lev_loc, k)
3725 else:
3726 lev_loc = level.get_loc(k)
3727
3728 new_levels.append(level)
3729 new_codes.append(np.insert(ensure_int64(level_codes), loc, lev_loc))
3730
3731 return MultiIndex(
3732 levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False
3733 )
3734
3735 def delete(self, loc) -> MultiIndex:
3736 """
3737 Make new index with passed location deleted
3738
3739 Returns
3740 -------
3741 new_index : MultiIndex
3742 """
3743 new_codes = [np.delete(level_codes, loc) for level_codes in self.codes]
3744 return MultiIndex(
3745 levels=self.levels,
3746 codes=new_codes,
3747 names=self.names,
3748 verify_integrity=False,
3749 )
3750
3751 @doc(Index.isin)
3752 def isin(self, values, level=None) -> npt.NDArray[np.bool_]:
3753 if isinstance(values, Generator):
3754 values = list(values)
3755
3756 if level is None:
3757 if len(values) == 0:
3758 return np.zeros((len(self),), dtype=np.bool_)
3759 if not isinstance(values, MultiIndex):
3760 values = MultiIndex.from_tuples(values)
3761 return values.unique().get_indexer_for(self) != -1
3762 else:
3763 num = self._get_level_number(level)
3764 levs = self.get_level_values(num)
3765
3766 if levs.size == 0:
3767 return np.zeros(len(levs), dtype=np.bool_)
3768 return levs.isin(values)
3769
3770 # error: Incompatible types in assignment (expression has type overloaded function,
3771 # base class "Index" defined the type as "Callable[[Index, Any, bool], Any]")
3772 rename = Index.set_names # type: ignore[assignment]
3773
3774 # ---------------------------------------------------------------
3775 # Arithmetic/Numeric Methods - Disabled
3776
3777 __add__ = make_invalid_op("__add__")
3778 __radd__ = make_invalid_op("__radd__")
3779 __iadd__ = make_invalid_op("__iadd__")
3780 __sub__ = make_invalid_op("__sub__")
3781 __rsub__ = make_invalid_op("__rsub__")
3782 __isub__ = make_invalid_op("__isub__")
3783 __pow__ = make_invalid_op("__pow__")
3784 __rpow__ = make_invalid_op("__rpow__")
3785 __mul__ = make_invalid_op("__mul__")
3786 __rmul__ = make_invalid_op("__rmul__")
3787 __floordiv__ = make_invalid_op("__floordiv__")
3788 __rfloordiv__ = make_invalid_op("__rfloordiv__")
3789 __truediv__ = make_invalid_op("__truediv__")
3790 __rtruediv__ = make_invalid_op("__rtruediv__")
3791 __mod__ = make_invalid_op("__mod__")
3792 __rmod__ = make_invalid_op("__rmod__")
3793 __divmod__ = make_invalid_op("__divmod__")
3794 __rdivmod__ = make_invalid_op("__rdivmod__")
3795 # Unary methods disabled
3796 __neg__ = make_invalid_op("__neg__")
3797 __pos__ = make_invalid_op("__pos__")
3798 __abs__ = make_invalid_op("__abs__")
3799 __invert__ = make_invalid_op("__invert__")
3800
3801
3802def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int:
3803 """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted."""
3804 int64_codes = [ensure_int64(level_codes) for level_codes in codes]
3805 for k in range(nlevels, 0, -1):
3806 if libalgos.is_lexsorted(int64_codes[:k]):
3807 return k
3808 return 0
3809
3810
3811def sparsify_labels(label_list, start: int = 0, sentinel: object = ""):
3812 pivoted = list(zip(*label_list))
3813 k = len(label_list)
3814
3815 result = pivoted[: start + 1]
3816 prev = pivoted[start]
3817
3818 for cur in pivoted[start + 1 :]:
3819 sparse_cur = []
3820
3821 for i, (p, t) in enumerate(zip(prev, cur)):
3822 if i == k - 1:
3823 sparse_cur.append(t)
3824 result.append(sparse_cur)
3825 break
3826
3827 if p == t:
3828 sparse_cur.append(sentinel)
3829 else:
3830 sparse_cur.extend(cur[i:])
3831 result.append(sparse_cur)
3832 break
3833
3834 prev = cur
3835
3836 return list(zip(*result))
3837
3838
3839def _get_na_rep(dtype) -> str:
3840 if is_extension_array_dtype(dtype):
3841 return f"{dtype.na_value}"
3842 else:
3843 dtype = dtype.type
3844
3845 return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN")
3846
3847
3848def maybe_droplevels(index: Index, key) -> Index:
3849 """
3850 Attempt to drop level or levels from the given index.
3851
3852 Parameters
3853 ----------
3854 index: Index
3855 key : scalar or tuple
3856
3857 Returns
3858 -------
3859 Index
3860 """
3861 # drop levels
3862 original_index = index
3863 if isinstance(key, tuple):
3864 # Caller is responsible for ensuring the key is not an entry in the first
3865 # level of the MultiIndex.
3866 for _ in key:
3867 try:
3868 index = index._drop_level_numbers([0])
3869 except ValueError:
3870 # we have dropped too much, so back out
3871 return original_index
3872 else:
3873 try:
3874 index = index._drop_level_numbers([0])
3875 except ValueError:
3876 pass
3877
3878 return index
3879
3880
3881def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray:
3882 """
3883 Coerce the array-like indexer to the smallest integer dtype that can encode all
3884 of the given categories.
3885
3886 Parameters
3887 ----------
3888 array_like : array-like
3889 categories : array-like
3890 copy : bool
3891
3892 Returns
3893 -------
3894 np.ndarray
3895 Non-writeable.
3896 """
3897 array_like = coerce_indexer_dtype(array_like, categories)
3898 if copy:
3899 array_like = array_like.copy()
3900 array_like.flags.writeable = False
3901 return array_like
3902
3903
3904def _require_listlike(level, arr, arrname: str):
3905 """
3906 Ensure that level is either None or listlike, and arr is list-of-listlike.
3907 """
3908 if level is not None and not is_list_like(level):
3909 if not is_list_like(arr):
3910 raise TypeError(f"{arrname} must be list-like")
3911 if len(arr) > 0 and is_list_like(arr[0]):
3912 raise TypeError(f"{arrname} must be list-like")
3913 level = [level]
3914 arr = [arr]
3915 elif level is None or is_list_like(level):
3916 if not is_list_like(arr) or not is_list_like(arr[0]):
3917 raise TypeError(f"{arrname} must be list of lists-like")
3918 return level, arr