1from __future__ import annotations
2
3from collections.abc import (
4 Collection,
5 Generator,
6 Hashable,
7 Iterable,
8 Sequence,
9)
10from functools import wraps
11from sys import getsizeof
12from typing import (
13 TYPE_CHECKING,
14 Any,
15 Callable,
16 Literal,
17 cast,
18)
19import warnings
20
21import numpy as np
22
23from pandas._config import get_option
24
25from pandas._libs import (
26 algos as libalgos,
27 index as libindex,
28 lib,
29)
30from pandas._libs.hashtable import duplicated
31from pandas._typing import (
32 AnyAll,
33 AnyArrayLike,
34 Axis,
35 DropKeep,
36 DtypeObj,
37 F,
38 IgnoreRaise,
39 IndexLabel,
40 Scalar,
41 Self,
42 Shape,
43 npt,
44)
45from pandas.compat.numpy import function as nv
46from pandas.errors import (
47 InvalidIndexError,
48 PerformanceWarning,
49 UnsortedIndexError,
50)
51from pandas.util._decorators import (
52 Appender,
53 cache_readonly,
54 doc,
55)
56from pandas.util._exceptions import find_stack_level
57
58from pandas.core.dtypes.cast import coerce_indexer_dtype
59from pandas.core.dtypes.common import (
60 ensure_int64,
61 ensure_platform_int,
62 is_hashable,
63 is_integer,
64 is_iterator,
65 is_list_like,
66 is_object_dtype,
67 is_scalar,
68 pandas_dtype,
69)
70from pandas.core.dtypes.dtypes import (
71 CategoricalDtype,
72 ExtensionDtype,
73)
74from pandas.core.dtypes.generic import (
75 ABCDataFrame,
76 ABCSeries,
77)
78from pandas.core.dtypes.inference import is_array_like
79from pandas.core.dtypes.missing import (
80 array_equivalent,
81 isna,
82)
83
84import pandas.core.algorithms as algos
85from pandas.core.array_algos.putmask import validate_putmask
86from pandas.core.arrays import (
87 Categorical,
88 ExtensionArray,
89)
90from pandas.core.arrays.categorical import (
91 factorize_from_iterables,
92 recode_for_categories,
93)
94import pandas.core.common as com
95from pandas.core.construction import sanitize_array
96import pandas.core.indexes.base as ibase
97from pandas.core.indexes.base import (
98 Index,
99 _index_shared_docs,
100 ensure_index,
101 get_unanimous_names,
102)
103from pandas.core.indexes.frozen import FrozenList
104from pandas.core.ops.invalid import make_invalid_op
105from pandas.core.sorting import (
106 get_group_index,
107 lexsort_indexer,
108)
109
110from pandas.io.formats.printing import (
111 get_adjustment,
112 pprint_thing,
113)
114
115if TYPE_CHECKING:
116 from pandas import (
117 CategoricalIndex,
118 DataFrame,
119 Series,
120 )
121
122_index_doc_kwargs = dict(ibase._index_doc_kwargs)
123_index_doc_kwargs.update(
124 {"klass": "MultiIndex", "target_klass": "MultiIndex or list of tuples"}
125)
126
127
128class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine):
129 """
130 This class manages a MultiIndex by mapping label combinations to positive
131 integers.
132 """
133
134 _base = libindex.UInt64Engine
135
136 def _codes_to_ints(self, codes):
137 """
138 Transform combination(s) of uint64 in one uint64 (each), in a strictly
139 monotonic way (i.e. respecting the lexicographic order of integer
140 combinations): see BaseMultiIndexCodesEngine documentation.
141
142 Parameters
143 ----------
144 codes : 1- or 2-dimensional array of dtype uint64
145 Combinations of integers (one per row)
146
147 Returns
148 -------
149 scalar or 1-dimensional array, of dtype uint64
150 Integer(s) representing one combination (each).
151 """
152 # Shift the representation of each level by the pre-calculated number
153 # of bits:
154 codes <<= self.offsets
155
156 # Now sum and OR are in fact interchangeable. This is a simple
157 # composition of the (disjunct) significant bits of each level (i.e.
158 # each column in "codes") in a single positive integer:
159 if codes.ndim == 1:
160 # Single key
161 return np.bitwise_or.reduce(codes)
162
163 # Multiple keys
164 return np.bitwise_or.reduce(codes, axis=1)
165
166
167class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine):
168 """
169 This class manages those (extreme) cases in which the number of possible
170 label combinations overflows the 64 bits integers, and uses an ObjectEngine
171 containing Python integers.
172 """
173
174 _base = libindex.ObjectEngine
175
176 def _codes_to_ints(self, codes):
177 """
178 Transform combination(s) of uint64 in one Python integer (each), in a
179 strictly monotonic way (i.e. respecting the lexicographic order of
180 integer combinations): see BaseMultiIndexCodesEngine documentation.
181
182 Parameters
183 ----------
184 codes : 1- or 2-dimensional array of dtype uint64
185 Combinations of integers (one per row)
186
187 Returns
188 -------
189 int, or 1-dimensional array of dtype object
190 Integer(s) representing one combination (each).
191 """
192 # Shift the representation of each level by the pre-calculated number
193 # of bits. Since this can overflow uint64, first make sure we are
194 # working with Python integers:
195 codes = codes.astype("object") << self.offsets
196
197 # Now sum and OR are in fact interchangeable. This is a simple
198 # composition of the (disjunct) significant bits of each level (i.e.
199 # each column in "codes") in a single positive integer (per row):
200 if codes.ndim == 1:
201 # Single key
202 return np.bitwise_or.reduce(codes)
203
204 # Multiple keys
205 return np.bitwise_or.reduce(codes, axis=1)
206
207
208def names_compat(meth: F) -> F:
209 """
210 A decorator to allow either `name` or `names` keyword but not both.
211
212 This makes it easier to share code with base class.
213 """
214
215 @wraps(meth)
216 def new_meth(self_or_cls, *args, **kwargs):
217 if "name" in kwargs and "names" in kwargs:
218 raise TypeError("Can only provide one of `names` and `name`")
219 if "name" in kwargs:
220 kwargs["names"] = kwargs.pop("name")
221
222 return meth(self_or_cls, *args, **kwargs)
223
224 return cast(F, new_meth)
225
226
227class MultiIndex(Index):
228 """
229 A multi-level, or hierarchical, index object for pandas objects.
230
231 Parameters
232 ----------
233 levels : sequence of arrays
234 The unique labels for each level.
235 codes : sequence of arrays
236 Integers for each level designating which label at each location.
237 sortorder : optional int
238 Level of sortedness (must be lexicographically sorted by that
239 level).
240 names : optional sequence of objects
241 Names for each of the index levels. (name is accepted for compat).
242 copy : bool, default False
243 Copy the meta-data.
244 verify_integrity : bool, default True
245 Check that the levels/codes are consistent and valid.
246
247 Attributes
248 ----------
249 names
250 levels
251 codes
252 nlevels
253 levshape
254 dtypes
255
256 Methods
257 -------
258 from_arrays
259 from_tuples
260 from_product
261 from_frame
262 set_levels
263 set_codes
264 to_frame
265 to_flat_index
266 sortlevel
267 droplevel
268 swaplevel
269 reorder_levels
270 remove_unused_levels
271 get_level_values
272 get_indexer
273 get_loc
274 get_locs
275 get_loc_level
276 drop
277
278 See Also
279 --------
280 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
281 MultiIndex.from_product : Create a MultiIndex from the cartesian product
282 of iterables.
283 MultiIndex.from_tuples : Convert list of tuples to a MultiIndex.
284 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
285 Index : The base pandas Index type.
286
287 Notes
288 -----
289 See the `user guide
290 <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html>`__
291 for more.
292
293 Examples
294 --------
295 A new ``MultiIndex`` is typically constructed using one of the helper
296 methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product`
297 and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``):
298
299 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
300 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
301 MultiIndex([(1, 'red'),
302 (1, 'blue'),
303 (2, 'red'),
304 (2, 'blue')],
305 names=['number', 'color'])
306
307 See further examples for how to construct a MultiIndex in the doc strings
308 of the mentioned helper methods.
309 """
310
311 _hidden_attrs = Index._hidden_attrs | frozenset()
312
313 # initialize to zero-length tuples to make everything work
314 _typ = "multiindex"
315 _names: list[Hashable | None] = []
316 _levels = FrozenList()
317 _codes = FrozenList()
318 _comparables = ["names"]
319
320 sortorder: int | None
321
322 # --------------------------------------------------------------------
323 # Constructors
324
325 def __new__(
326 cls,
327 levels=None,
328 codes=None,
329 sortorder=None,
330 names=None,
331 dtype=None,
332 copy: bool = False,
333 name=None,
334 verify_integrity: bool = True,
335 ) -> Self:
336 # compat with Index
337 if name is not None:
338 names = name
339 if levels is None or codes is None:
340 raise TypeError("Must pass both levels and codes")
341 if len(levels) != len(codes):
342 raise ValueError("Length of levels and codes must be the same.")
343 if len(levels) == 0:
344 raise ValueError("Must pass non-zero number of levels/codes")
345
346 result = object.__new__(cls)
347 result._cache = {}
348
349 # we've already validated levels and codes, so shortcut here
350 result._set_levels(levels, copy=copy, validate=False)
351 result._set_codes(codes, copy=copy, validate=False)
352
353 result._names = [None] * len(levels)
354 if names is not None:
355 # handles name validation
356 result._set_names(names)
357
358 if sortorder is not None:
359 result.sortorder = int(sortorder)
360 else:
361 result.sortorder = sortorder
362
363 if verify_integrity:
364 new_codes = result._verify_integrity()
365 result._codes = new_codes
366
367 result._reset_identity()
368 result._references = None
369
370 return result
371
372 def _validate_codes(self, level: list, code: list):
373 """
374 Reassign code values as -1 if their corresponding levels are NaN.
375
376 Parameters
377 ----------
378 code : list
379 Code to reassign.
380 level : list
381 Level to check for missing values (NaN, NaT, None).
382
383 Returns
384 -------
385 new code where code value = -1 if it corresponds
386 to a level with missing values (NaN, NaT, None).
387 """
388 null_mask = isna(level)
389 if np.any(null_mask):
390 # error: Incompatible types in assignment
391 # (expression has type "ndarray[Any, dtype[Any]]",
392 # variable has type "List[Any]")
393 code = np.where(null_mask[code], -1, code) # type: ignore[assignment]
394 return code
395
396 def _verify_integrity(
397 self,
398 codes: list | None = None,
399 levels: list | None = None,
400 levels_to_verify: list[int] | range | None = None,
401 ):
402 """
403 Parameters
404 ----------
405 codes : optional list
406 Codes to check for validity. Defaults to current codes.
407 levels : optional list
408 Levels to check for validity. Defaults to current levels.
409 levels_to_validate: optional list
410 Specifies the levels to verify.
411
412 Raises
413 ------
414 ValueError
415 If length of levels and codes don't match, if the codes for any
416 level would exceed level bounds, or there are any duplicate levels.
417
418 Returns
419 -------
420 new codes where code value = -1 if it corresponds to a
421 NaN level.
422 """
423 # NOTE: Currently does not check, among other things, that cached
424 # nlevels matches nor that sortorder matches actually sortorder.
425 codes = codes or self.codes
426 levels = levels or self.levels
427 if levels_to_verify is None:
428 levels_to_verify = range(len(levels))
429
430 if len(levels) != len(codes):
431 raise ValueError(
432 "Length of levels and codes must match. NOTE: "
433 "this index is in an inconsistent state."
434 )
435 codes_length = len(codes[0])
436 for i in levels_to_verify:
437 level = levels[i]
438 level_codes = codes[i]
439
440 if len(level_codes) != codes_length:
441 raise ValueError(
442 f"Unequal code lengths: {[len(code_) for code_ in codes]}"
443 )
444 if len(level_codes) and level_codes.max() >= len(level):
445 raise ValueError(
446 f"On level {i}, code max ({level_codes.max()}) >= length of "
447 f"level ({len(level)}). NOTE: this index is in an "
448 "inconsistent state"
449 )
450 if len(level_codes) and level_codes.min() < -1:
451 raise ValueError(f"On level {i}, code value ({level_codes.min()}) < -1")
452 if not level.is_unique:
453 raise ValueError(
454 f"Level values must be unique: {list(level)} on level {i}"
455 )
456 if self.sortorder is not None:
457 if self.sortorder > _lexsort_depth(self.codes, self.nlevels):
458 raise ValueError(
459 "Value for sortorder must be inferior or equal to actual "
460 f"lexsort_depth: sortorder {self.sortorder} "
461 f"with lexsort_depth {_lexsort_depth(self.codes, self.nlevels)}"
462 )
463
464 result_codes = []
465 for i in range(len(levels)):
466 if i in levels_to_verify:
467 result_codes.append(self._validate_codes(levels[i], codes[i]))
468 else:
469 result_codes.append(codes[i])
470
471 new_codes = FrozenList(result_codes)
472 return new_codes
473
474 @classmethod
475 def from_arrays(
476 cls,
477 arrays,
478 sortorder: int | None = None,
479 names: Sequence[Hashable] | Hashable | lib.NoDefault = lib.no_default,
480 ) -> MultiIndex:
481 """
482 Convert arrays to MultiIndex.
483
484 Parameters
485 ----------
486 arrays : list / sequence of array-likes
487 Each array-like gives one level's value for each data point.
488 len(arrays) is the number of levels.
489 sortorder : int or None
490 Level of sortedness (must be lexicographically sorted by that
491 level).
492 names : list / sequence of str, optional
493 Names for the levels in the index.
494
495 Returns
496 -------
497 MultiIndex
498
499 See Also
500 --------
501 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
502 MultiIndex.from_product : Make a MultiIndex from cartesian product
503 of iterables.
504 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
505
506 Examples
507 --------
508 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]
509 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))
510 MultiIndex([(1, 'red'),
511 (1, 'blue'),
512 (2, 'red'),
513 (2, 'blue')],
514 names=['number', 'color'])
515 """
516 error_msg = "Input must be a list / sequence of array-likes."
517 if not is_list_like(arrays):
518 raise TypeError(error_msg)
519 if is_iterator(arrays):
520 arrays = list(arrays)
521
522 # Check if elements of array are list-like
523 for array in arrays:
524 if not is_list_like(array):
525 raise TypeError(error_msg)
526
527 # Check if lengths of all arrays are equal or not,
528 # raise ValueError, if not
529 for i in range(1, len(arrays)):
530 if len(arrays[i]) != len(arrays[i - 1]):
531 raise ValueError("all arrays must be same length")
532
533 codes, levels = factorize_from_iterables(arrays)
534 if names is lib.no_default:
535 names = [getattr(arr, "name", None) for arr in arrays]
536
537 return cls(
538 levels=levels,
539 codes=codes,
540 sortorder=sortorder,
541 names=names,
542 verify_integrity=False,
543 )
544
545 @classmethod
546 @names_compat
547 def from_tuples(
548 cls,
549 tuples: Iterable[tuple[Hashable, ...]],
550 sortorder: int | None = None,
551 names: Sequence[Hashable] | Hashable | None = None,
552 ) -> MultiIndex:
553 """
554 Convert list of tuples to MultiIndex.
555
556 Parameters
557 ----------
558 tuples : list / sequence of tuple-likes
559 Each tuple is the index of one row/column.
560 sortorder : int or None
561 Level of sortedness (must be lexicographically sorted by that
562 level).
563 names : list / sequence of str, optional
564 Names for the levels in the index.
565
566 Returns
567 -------
568 MultiIndex
569
570 See Also
571 --------
572 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
573 MultiIndex.from_product : Make a MultiIndex from cartesian product
574 of iterables.
575 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
576
577 Examples
578 --------
579 >>> tuples = [(1, 'red'), (1, 'blue'),
580 ... (2, 'red'), (2, 'blue')]
581 >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color'))
582 MultiIndex([(1, 'red'),
583 (1, 'blue'),
584 (2, 'red'),
585 (2, 'blue')],
586 names=['number', 'color'])
587 """
588 if not is_list_like(tuples):
589 raise TypeError("Input must be a list / sequence of tuple-likes.")
590 if is_iterator(tuples):
591 tuples = list(tuples)
592 tuples = cast(Collection[tuple[Hashable, ...]], tuples)
593
594 # handling the empty tuple cases
595 if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples):
596 codes = [np.zeros(len(tuples))]
597 levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))]
598 return cls(
599 levels=levels,
600 codes=codes,
601 sortorder=sortorder,
602 names=names,
603 verify_integrity=False,
604 )
605
606 arrays: list[Sequence[Hashable]]
607 if len(tuples) == 0:
608 if names is None:
609 raise TypeError("Cannot infer number of levels from empty list")
610 # error: Argument 1 to "len" has incompatible type "Hashable";
611 # expected "Sized"
612 arrays = [[]] * len(names) # type: ignore[arg-type]
613 elif isinstance(tuples, (np.ndarray, Index)):
614 if isinstance(tuples, Index):
615 tuples = np.asarray(tuples._values)
616
617 arrays = list(lib.tuples_to_object_array(tuples).T)
618 elif isinstance(tuples, list):
619 arrays = list(lib.to_object_array_tuples(tuples).T)
620 else:
621 arrs = zip(*tuples)
622 arrays = cast(list[Sequence[Hashable]], arrs)
623
624 return cls.from_arrays(arrays, sortorder=sortorder, names=names)
625
626 @classmethod
627 def from_product(
628 cls,
629 iterables: Sequence[Iterable[Hashable]],
630 sortorder: int | None = None,
631 names: Sequence[Hashable] | Hashable | lib.NoDefault = lib.no_default,
632 ) -> MultiIndex:
633 """
634 Make a MultiIndex from the cartesian product of multiple iterables.
635
636 Parameters
637 ----------
638 iterables : list / sequence of iterables
639 Each iterable has unique labels for each level of the index.
640 sortorder : int or None
641 Level of sortedness (must be lexicographically sorted by that
642 level).
643 names : list / sequence of str, optional
644 Names for the levels in the index.
645 If not explicitly provided, names will be inferred from the
646 elements of iterables if an element has a name attribute.
647
648 Returns
649 -------
650 MultiIndex
651
652 See Also
653 --------
654 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
655 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
656 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.
657
658 Examples
659 --------
660 >>> numbers = [0, 1, 2]
661 >>> colors = ['green', 'purple']
662 >>> pd.MultiIndex.from_product([numbers, colors],
663 ... names=['number', 'color'])
664 MultiIndex([(0, 'green'),
665 (0, 'purple'),
666 (1, 'green'),
667 (1, 'purple'),
668 (2, 'green'),
669 (2, 'purple')],
670 names=['number', 'color'])
671 """
672 from pandas.core.reshape.util import cartesian_product
673
674 if not is_list_like(iterables):
675 raise TypeError("Input must be a list / sequence of iterables.")
676 if is_iterator(iterables):
677 iterables = list(iterables)
678
679 codes, levels = factorize_from_iterables(iterables)
680 if names is lib.no_default:
681 names = [getattr(it, "name", None) for it in iterables]
682
683 # codes are all ndarrays, so cartesian_product is lossless
684 codes = cartesian_product(codes)
685 return cls(levels, codes, sortorder=sortorder, names=names)
686
687 @classmethod
688 def from_frame(
689 cls,
690 df: DataFrame,
691 sortorder: int | None = None,
692 names: Sequence[Hashable] | Hashable | None = None,
693 ) -> MultiIndex:
694 """
695 Make a MultiIndex from a DataFrame.
696
697 Parameters
698 ----------
699 df : DataFrame
700 DataFrame to be converted to MultiIndex.
701 sortorder : int, optional
702 Level of sortedness (must be lexicographically sorted by that
703 level).
704 names : list-like, optional
705 If no names are provided, use the column names, or tuple of column
706 names if the columns is a MultiIndex. If a sequence, overwrite
707 names with the given sequence.
708
709 Returns
710 -------
711 MultiIndex
712 The MultiIndex representation of the given DataFrame.
713
714 See Also
715 --------
716 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.
717 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.
718 MultiIndex.from_product : Make a MultiIndex from cartesian product
719 of iterables.
720
721 Examples
722 --------
723 >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'],
724 ... ['NJ', 'Temp'], ['NJ', 'Precip']],
725 ... columns=['a', 'b'])
726 >>> df
727 a b
728 0 HI Temp
729 1 HI Precip
730 2 NJ Temp
731 3 NJ Precip
732
733 >>> pd.MultiIndex.from_frame(df)
734 MultiIndex([('HI', 'Temp'),
735 ('HI', 'Precip'),
736 ('NJ', 'Temp'),
737 ('NJ', 'Precip')],
738 names=['a', 'b'])
739
740 Using explicit names, instead of the column names
741
742 >>> pd.MultiIndex.from_frame(df, names=['state', 'observation'])
743 MultiIndex([('HI', 'Temp'),
744 ('HI', 'Precip'),
745 ('NJ', 'Temp'),
746 ('NJ', 'Precip')],
747 names=['state', 'observation'])
748 """
749 if not isinstance(df, ABCDataFrame):
750 raise TypeError("Input must be a DataFrame")
751
752 column_names, columns = zip(*df.items())
753 names = column_names if names is None else names
754 return cls.from_arrays(columns, sortorder=sortorder, names=names)
755
756 # --------------------------------------------------------------------
757
758 @cache_readonly
759 def _values(self) -> np.ndarray:
760 # We override here, since our parent uses _data, which we don't use.
761 values = []
762
763 for i in range(self.nlevels):
764 index = self.levels[i]
765 codes = self.codes[i]
766
767 vals = index
768 if isinstance(vals.dtype, CategoricalDtype):
769 vals = cast("CategoricalIndex", vals)
770 vals = vals._data._internal_get_values()
771
772 if isinstance(vals.dtype, ExtensionDtype) or lib.is_np_dtype(
773 vals.dtype, "mM"
774 ):
775 vals = vals.astype(object)
776
777 vals = np.asarray(vals)
778 vals = algos.take_nd(vals, codes, fill_value=index._na_value)
779 values.append(vals)
780
781 arr = lib.fast_zip(values)
782 return arr
783
784 @property
785 def values(self) -> np.ndarray:
786 return self._values
787
788 @property
789 def array(self):
790 """
791 Raises a ValueError for `MultiIndex` because there's no single
792 array backing a MultiIndex.
793
794 Raises
795 ------
796 ValueError
797 """
798 raise ValueError(
799 "MultiIndex has no single backing array. Use "
800 "'MultiIndex.to_numpy()' to get a NumPy array of tuples."
801 )
802
803 @cache_readonly
804 def dtypes(self) -> Series:
805 """
806 Return the dtypes as a Series for the underlying MultiIndex.
807
808 Examples
809 --------
810 >>> idx = pd.MultiIndex.from_product([(0, 1, 2), ('green', 'purple')],
811 ... names=['number', 'color'])
812 >>> idx
813 MultiIndex([(0, 'green'),
814 (0, 'purple'),
815 (1, 'green'),
816 (1, 'purple'),
817 (2, 'green'),
818 (2, 'purple')],
819 names=['number', 'color'])
820 >>> idx.dtypes
821 number int64
822 color object
823 dtype: object
824 """
825 from pandas import Series
826
827 names = com.fill_missing_names([level.name for level in self.levels])
828 return Series([level.dtype for level in self.levels], index=Index(names))
829
830 def __len__(self) -> int:
831 return len(self.codes[0])
832
833 @property
834 def size(self) -> int:
835 """
836 Return the number of elements in the underlying data.
837 """
838 # override Index.size to avoid materializing _values
839 return len(self)
840
841 # --------------------------------------------------------------------
842 # Levels Methods
843
844 @cache_readonly
845 def levels(self) -> FrozenList:
846 """
847 Levels of the MultiIndex.
848
849 Levels refer to the different hierarchical levels or layers in a MultiIndex.
850 In a MultiIndex, each level represents a distinct dimension or category of
851 the index.
852
853 To access the levels, you can use the levels attribute of the MultiIndex,
854 which returns a tuple of Index objects. Each Index object represents a
855 level in the MultiIndex and contains the unique values found in that
856 specific level.
857
858 If a MultiIndex is created with levels A, B, C, and the DataFrame using
859 it filters out all rows of the level C, MultiIndex.levels will still
860 return A, B, C.
861
862 Examples
863 --------
864 >>> index = pd.MultiIndex.from_product([['mammal'],
865 ... ('goat', 'human', 'cat', 'dog')],
866 ... names=['Category', 'Animals'])
867 >>> leg_num = pd.DataFrame(data=(4, 2, 4, 4), index=index, columns=['Legs'])
868 >>> leg_num
869 Legs
870 Category Animals
871 mammal goat 4
872 human 2
873 cat 4
874 dog 4
875
876 >>> leg_num.index.levels
877 FrozenList([['mammal'], ['cat', 'dog', 'goat', 'human']])
878
879 MultiIndex levels will not change even if the DataFrame using the MultiIndex
880 does not contain all them anymore.
881 See how "human" is not in the DataFrame, but it is still in levels:
882
883 >>> large_leg_num = leg_num[leg_num.Legs > 2]
884 >>> large_leg_num
885 Legs
886 Category Animals
887 mammal goat 4
888 cat 4
889 dog 4
890
891 >>> large_leg_num.index.levels
892 FrozenList([['mammal'], ['cat', 'dog', 'goat', 'human']])
893 """
894 # Use cache_readonly to ensure that self.get_locs doesn't repeatedly
895 # create new IndexEngine
896 # https://github.com/pandas-dev/pandas/issues/31648
897 result = [x._rename(name=name) for x, name in zip(self._levels, self._names)]
898 for level in result:
899 # disallow midx.levels[0].name = "foo"
900 level._no_setting_name = True
901 return FrozenList(result)
902
903 def _set_levels(
904 self,
905 levels,
906 *,
907 level=None,
908 copy: bool = False,
909 validate: bool = True,
910 verify_integrity: bool = False,
911 ) -> None:
912 # This is NOT part of the levels property because it should be
913 # externally not allowed to set levels. User beware if you change
914 # _levels directly
915 if validate:
916 if len(levels) == 0:
917 raise ValueError("Must set non-zero number of levels.")
918 if level is None and len(levels) != self.nlevels:
919 raise ValueError("Length of levels must match number of levels.")
920 if level is not None and len(levels) != len(level):
921 raise ValueError("Length of levels must match length of level.")
922
923 if level is None:
924 new_levels = FrozenList(
925 ensure_index(lev, copy=copy)._view() for lev in levels
926 )
927 level_numbers = list(range(len(new_levels)))
928 else:
929 level_numbers = [self._get_level_number(lev) for lev in level]
930 new_levels_list = list(self._levels)
931 for lev_num, lev in zip(level_numbers, levels):
932 new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view()
933 new_levels = FrozenList(new_levels_list)
934
935 if verify_integrity:
936 new_codes = self._verify_integrity(
937 levels=new_levels, levels_to_verify=level_numbers
938 )
939 self._codes = new_codes
940
941 names = self.names
942 self._levels = new_levels
943 if any(names):
944 self._set_names(names)
945
946 self._reset_cache()
947
948 def set_levels(
949 self, levels, *, level=None, verify_integrity: bool = True
950 ) -> MultiIndex:
951 """
952 Set new levels on MultiIndex. Defaults to returning new index.
953
954 Parameters
955 ----------
956 levels : sequence or list of sequence
957 New level(s) to apply.
958 level : int, level name, or sequence of int/level names (default None)
959 Level(s) to set (None for all levels).
960 verify_integrity : bool, default True
961 If True, checks that levels and codes are compatible.
962
963 Returns
964 -------
965 MultiIndex
966
967 Examples
968 --------
969 >>> idx = pd.MultiIndex.from_tuples(
970 ... [
971 ... (1, "one"),
972 ... (1, "two"),
973 ... (2, "one"),
974 ... (2, "two"),
975 ... (3, "one"),
976 ... (3, "two")
977 ... ],
978 ... names=["foo", "bar"]
979 ... )
980 >>> idx
981 MultiIndex([(1, 'one'),
982 (1, 'two'),
983 (2, 'one'),
984 (2, 'two'),
985 (3, 'one'),
986 (3, 'two')],
987 names=['foo', 'bar'])
988
989 >>> idx.set_levels([['a', 'b', 'c'], [1, 2]])
990 MultiIndex([('a', 1),
991 ('a', 2),
992 ('b', 1),
993 ('b', 2),
994 ('c', 1),
995 ('c', 2)],
996 names=['foo', 'bar'])
997 >>> idx.set_levels(['a', 'b', 'c'], level=0)
998 MultiIndex([('a', 'one'),
999 ('a', 'two'),
1000 ('b', 'one'),
1001 ('b', 'two'),
1002 ('c', 'one'),
1003 ('c', 'two')],
1004 names=['foo', 'bar'])
1005 >>> idx.set_levels(['a', 'b'], level='bar')
1006 MultiIndex([(1, 'a'),
1007 (1, 'b'),
1008 (2, 'a'),
1009 (2, 'b'),
1010 (3, 'a'),
1011 (3, 'b')],
1012 names=['foo', 'bar'])
1013
1014 If any of the levels passed to ``set_levels()`` exceeds the
1015 existing length, all of the values from that argument will
1016 be stored in the MultiIndex levels, though the values will
1017 be truncated in the MultiIndex output.
1018
1019 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1])
1020 MultiIndex([('a', 1),
1021 ('a', 2),
1022 ('b', 1),
1023 ('b', 2),
1024 ('c', 1),
1025 ('c', 2)],
1026 names=['foo', 'bar'])
1027 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels
1028 FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]])
1029 """
1030
1031 if isinstance(levels, Index):
1032 pass
1033 elif is_array_like(levels):
1034 levels = Index(levels)
1035 elif is_list_like(levels):
1036 levels = list(levels)
1037
1038 level, levels = _require_listlike(level, levels, "Levels")
1039 idx = self._view()
1040 idx._reset_identity()
1041 idx._set_levels(
1042 levels, level=level, validate=True, verify_integrity=verify_integrity
1043 )
1044 return idx
1045
1046 @property
1047 def nlevels(self) -> int:
1048 """
1049 Integer number of levels in this MultiIndex.
1050
1051 Examples
1052 --------
1053 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])
1054 >>> mi
1055 MultiIndex([('a', 'b', 'c')],
1056 )
1057 >>> mi.nlevels
1058 3
1059 """
1060 return len(self._levels)
1061
1062 @property
1063 def levshape(self) -> Shape:
1064 """
1065 A tuple with the length of each level.
1066
1067 Examples
1068 --------
1069 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])
1070 >>> mi
1071 MultiIndex([('a', 'b', 'c')],
1072 )
1073 >>> mi.levshape
1074 (1, 1, 1)
1075 """
1076 return tuple(len(x) for x in self.levels)
1077
1078 # --------------------------------------------------------------------
1079 # Codes Methods
1080
1081 @property
1082 def codes(self) -> FrozenList:
1083 return self._codes
1084
1085 def _set_codes(
1086 self,
1087 codes,
1088 *,
1089 level=None,
1090 copy: bool = False,
1091 validate: bool = True,
1092 verify_integrity: bool = False,
1093 ) -> None:
1094 if validate:
1095 if level is None and len(codes) != self.nlevels:
1096 raise ValueError("Length of codes must match number of levels")
1097 if level is not None and len(codes) != len(level):
1098 raise ValueError("Length of codes must match length of levels.")
1099
1100 level_numbers: list[int] | range
1101 if level is None:
1102 new_codes = FrozenList(
1103 _coerce_indexer_frozen(level_codes, lev, copy=copy).view()
1104 for lev, level_codes in zip(self._levels, codes)
1105 )
1106 level_numbers = range(len(new_codes))
1107 else:
1108 level_numbers = [self._get_level_number(lev) for lev in level]
1109 new_codes_list = list(self._codes)
1110 for lev_num, level_codes in zip(level_numbers, codes):
1111 lev = self.levels[lev_num]
1112 new_codes_list[lev_num] = _coerce_indexer_frozen(
1113 level_codes, lev, copy=copy
1114 )
1115 new_codes = FrozenList(new_codes_list)
1116
1117 if verify_integrity:
1118 new_codes = self._verify_integrity(
1119 codes=new_codes, levels_to_verify=level_numbers
1120 )
1121
1122 self._codes = new_codes
1123
1124 self._reset_cache()
1125
1126 def set_codes(
1127 self, codes, *, level=None, verify_integrity: bool = True
1128 ) -> MultiIndex:
1129 """
1130 Set new codes on MultiIndex. Defaults to returning new index.
1131
1132 Parameters
1133 ----------
1134 codes : sequence or list of sequence
1135 New codes to apply.
1136 level : int, level name, or sequence of int/level names (default None)
1137 Level(s) to set (None for all levels).
1138 verify_integrity : bool, default True
1139 If True, checks that levels and codes are compatible.
1140
1141 Returns
1142 -------
1143 new index (of same type and class...etc) or None
1144 The same type as the caller or None if ``inplace=True``.
1145
1146 Examples
1147 --------
1148 >>> idx = pd.MultiIndex.from_tuples(
1149 ... [(1, "one"), (1, "two"), (2, "one"), (2, "two")], names=["foo", "bar"]
1150 ... )
1151 >>> idx
1152 MultiIndex([(1, 'one'),
1153 (1, 'two'),
1154 (2, 'one'),
1155 (2, 'two')],
1156 names=['foo', 'bar'])
1157
1158 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]])
1159 MultiIndex([(2, 'one'),
1160 (1, 'one'),
1161 (2, 'two'),
1162 (1, 'two')],
1163 names=['foo', 'bar'])
1164 >>> idx.set_codes([1, 0, 1, 0], level=0)
1165 MultiIndex([(2, 'one'),
1166 (1, 'two'),
1167 (2, 'one'),
1168 (1, 'two')],
1169 names=['foo', 'bar'])
1170 >>> idx.set_codes([0, 0, 1, 1], level='bar')
1171 MultiIndex([(1, 'one'),
1172 (1, 'one'),
1173 (2, 'two'),
1174 (2, 'two')],
1175 names=['foo', 'bar'])
1176 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1])
1177 MultiIndex([(2, 'one'),
1178 (1, 'one'),
1179 (2, 'two'),
1180 (1, 'two')],
1181 names=['foo', 'bar'])
1182 """
1183
1184 level, codes = _require_listlike(level, codes, "Codes")
1185 idx = self._view()
1186 idx._reset_identity()
1187 idx._set_codes(codes, level=level, verify_integrity=verify_integrity)
1188 return idx
1189
1190 # --------------------------------------------------------------------
1191 # Index Internals
1192
1193 @cache_readonly
1194 def _engine(self):
1195 # Calculate the number of bits needed to represent labels in each
1196 # level, as log2 of their sizes:
1197 # NaN values are shifted to 1 and missing values in other while
1198 # calculating the indexer are shifted to 0
1199 sizes = np.ceil(
1200 np.log2(
1201 [len(level) + libindex.multiindex_nulls_shift for level in self.levels]
1202 )
1203 )
1204
1205 # Sum bit counts, starting from the _right_....
1206 lev_bits = np.cumsum(sizes[::-1])[::-1]
1207
1208 # ... in order to obtain offsets such that sorting the combination of
1209 # shifted codes (one for each level, resulting in a unique integer) is
1210 # equivalent to sorting lexicographically the codes themselves. Notice
1211 # that each level needs to be shifted by the number of bits needed to
1212 # represent the _previous_ ones:
1213 offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64")
1214
1215 # Check the total number of bits needed for our representation:
1216 if lev_bits[0] > 64:
1217 # The levels would overflow a 64 bit uint - use Python integers:
1218 return MultiIndexPyIntEngine(self.levels, self.codes, offsets)
1219 return MultiIndexUIntEngine(self.levels, self.codes, offsets)
1220
1221 # Return type "Callable[..., MultiIndex]" of "_constructor" incompatible with return
1222 # type "Type[MultiIndex]" in supertype "Index"
1223 @property
1224 def _constructor(self) -> Callable[..., MultiIndex]: # type: ignore[override]
1225 return type(self).from_tuples
1226
1227 @doc(Index._shallow_copy)
1228 def _shallow_copy(self, values: np.ndarray, name=lib.no_default) -> MultiIndex:
1229 names = name if name is not lib.no_default else self.names
1230
1231 return type(self).from_tuples(values, sortorder=None, names=names)
1232
1233 def _view(self) -> MultiIndex:
1234 result = type(self)(
1235 levels=self.levels,
1236 codes=self.codes,
1237 sortorder=self.sortorder,
1238 names=self.names,
1239 verify_integrity=False,
1240 )
1241 result._cache = self._cache.copy()
1242 result._cache.pop("levels", None) # GH32669
1243 return result
1244
1245 # --------------------------------------------------------------------
1246
1247 # error: Signature of "copy" incompatible with supertype "Index"
1248 def copy( # type: ignore[override]
1249 self,
1250 names=None,
1251 deep: bool = False,
1252 name=None,
1253 ) -> Self:
1254 """
1255 Make a copy of this object.
1256
1257 Names, dtype, levels and codes can be passed and will be set on new copy.
1258
1259 Parameters
1260 ----------
1261 names : sequence, optional
1262 deep : bool, default False
1263 name : Label
1264 Kept for compatibility with 1-dimensional Index. Should not be used.
1265
1266 Returns
1267 -------
1268 MultiIndex
1269
1270 Notes
1271 -----
1272 In most cases, there should be no functional difference from using
1273 ``deep``, but if ``deep`` is passed it will attempt to deepcopy.
1274 This could be potentially expensive on large MultiIndex objects.
1275
1276 Examples
1277 --------
1278 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])
1279 >>> mi
1280 MultiIndex([('a', 'b', 'c')],
1281 )
1282 >>> mi.copy()
1283 MultiIndex([('a', 'b', 'c')],
1284 )
1285 """
1286 names = self._validate_names(name=name, names=names, deep=deep)
1287 keep_id = not deep
1288 levels, codes = None, None
1289
1290 if deep:
1291 from copy import deepcopy
1292
1293 levels = deepcopy(self.levels)
1294 codes = deepcopy(self.codes)
1295
1296 levels = levels if levels is not None else self.levels
1297 codes = codes if codes is not None else self.codes
1298
1299 new_index = type(self)(
1300 levels=levels,
1301 codes=codes,
1302 sortorder=self.sortorder,
1303 names=names,
1304 verify_integrity=False,
1305 )
1306 new_index._cache = self._cache.copy()
1307 new_index._cache.pop("levels", None) # GH32669
1308 if keep_id:
1309 new_index._id = self._id
1310 return new_index
1311
1312 def __array__(self, dtype=None, copy=None) -> np.ndarray:
1313 """the array interface, return my values"""
1314 return self.values
1315
1316 def view(self, cls=None) -> Self:
1317 """this is defined as a copy with the same identity"""
1318 result = self.copy()
1319 result._id = self._id
1320 return result
1321
1322 @doc(Index.__contains__)
1323 def __contains__(self, key: Any) -> bool:
1324 hash(key)
1325 try:
1326 self.get_loc(key)
1327 return True
1328 except (LookupError, TypeError, ValueError):
1329 return False
1330
1331 @cache_readonly
1332 def dtype(self) -> np.dtype:
1333 return np.dtype("O")
1334
1335 def _is_memory_usage_qualified(self) -> bool:
1336 """return a boolean if we need a qualified .info display"""
1337
1338 def f(level) -> bool:
1339 return "mixed" in level or "string" in level or "unicode" in level
1340
1341 return any(f(level) for level in self._inferred_type_levels)
1342
1343 # Cannot determine type of "memory_usage"
1344 @doc(Index.memory_usage) # type: ignore[has-type]
1345 def memory_usage(self, deep: bool = False) -> int:
1346 # we are overwriting our base class to avoid
1347 # computing .values here which could materialize
1348 # a tuple representation unnecessarily
1349 return self._nbytes(deep)
1350
1351 @cache_readonly
1352 def nbytes(self) -> int:
1353 """return the number of bytes in the underlying data"""
1354 return self._nbytes(False)
1355
1356 def _nbytes(self, deep: bool = False) -> int:
1357 """
1358 return the number of bytes in the underlying data
1359 deeply introspect the level data if deep=True
1360
1361 include the engine hashtable
1362
1363 *this is in internal routine*
1364
1365 """
1366 # for implementations with no useful getsizeof (PyPy)
1367 objsize = 24
1368
1369 level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels)
1370 label_nbytes = sum(i.nbytes for i in self.codes)
1371 names_nbytes = sum(getsizeof(i, objsize) for i in self.names)
1372 result = level_nbytes + label_nbytes + names_nbytes
1373
1374 # include our engine hashtable
1375 result += self._engine.sizeof(deep=deep)
1376 return result
1377
1378 # --------------------------------------------------------------------
1379 # Rendering Methods
1380
1381 def _formatter_func(self, tup):
1382 """
1383 Formats each item in tup according to its level's formatter function.
1384 """
1385 formatter_funcs = [level._formatter_func for level in self.levels]
1386 return tuple(func(val) for func, val in zip(formatter_funcs, tup))
1387
1388 def _get_values_for_csv(
1389 self, *, na_rep: str = "nan", **kwargs
1390 ) -> npt.NDArray[np.object_]:
1391 new_levels = []
1392 new_codes = []
1393
1394 # go through the levels and format them
1395 for level, level_codes in zip(self.levels, self.codes):
1396 level_strs = level._get_values_for_csv(na_rep=na_rep, **kwargs)
1397 # add nan values, if there are any
1398 mask = level_codes == -1
1399 if mask.any():
1400 nan_index = len(level_strs)
1401 # numpy 1.21 deprecated implicit string casting
1402 level_strs = level_strs.astype(str)
1403 level_strs = np.append(level_strs, na_rep)
1404 assert not level_codes.flags.writeable # i.e. copy is needed
1405 level_codes = level_codes.copy() # make writeable
1406 level_codes[mask] = nan_index
1407 new_levels.append(level_strs)
1408 new_codes.append(level_codes)
1409
1410 if len(new_levels) == 1:
1411 # a single-level multi-index
1412 return Index(new_levels[0].take(new_codes[0]))._get_values_for_csv()
1413 else:
1414 # reconstruct the multi-index
1415 mi = MultiIndex(
1416 levels=new_levels,
1417 codes=new_codes,
1418 names=self.names,
1419 sortorder=self.sortorder,
1420 verify_integrity=False,
1421 )
1422 return mi._values
1423
1424 def format(
1425 self,
1426 name: bool | None = None,
1427 formatter: Callable | None = None,
1428 na_rep: str | None = None,
1429 names: bool = False,
1430 space: int = 2,
1431 sparsify=None,
1432 adjoin: bool = True,
1433 ) -> list:
1434 warnings.warn(
1435 # GH#55413
1436 f"{type(self).__name__}.format is deprecated and will be removed "
1437 "in a future version. Convert using index.astype(str) or "
1438 "index.map(formatter) instead.",
1439 FutureWarning,
1440 stacklevel=find_stack_level(),
1441 )
1442
1443 if name is not None:
1444 names = name
1445
1446 if len(self) == 0:
1447 return []
1448
1449 stringified_levels = []
1450 for lev, level_codes in zip(self.levels, self.codes):
1451 na = na_rep if na_rep is not None else _get_na_rep(lev.dtype)
1452
1453 if len(lev) > 0:
1454 formatted = lev.take(level_codes).format(formatter=formatter)
1455
1456 # we have some NA
1457 mask = level_codes == -1
1458 if mask.any():
1459 formatted = np.array(formatted, dtype=object)
1460 formatted[mask] = na
1461 formatted = formatted.tolist()
1462
1463 else:
1464 # weird all NA case
1465 formatted = [
1466 pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n"))
1467 for x in algos.take_nd(lev._values, level_codes)
1468 ]
1469 stringified_levels.append(formatted)
1470
1471 result_levels = []
1472 for lev, lev_name in zip(stringified_levels, self.names):
1473 level = []
1474
1475 if names:
1476 level.append(
1477 pprint_thing(lev_name, escape_chars=("\t", "\r", "\n"))
1478 if lev_name is not None
1479 else ""
1480 )
1481
1482 level.extend(np.array(lev, dtype=object))
1483 result_levels.append(level)
1484
1485 if sparsify is None:
1486 sparsify = get_option("display.multi_sparse")
1487
1488 if sparsify:
1489 sentinel: Literal[""] | bool | lib.NoDefault = ""
1490 # GH3547 use value of sparsify as sentinel if it's "Falsey"
1491 assert isinstance(sparsify, bool) or sparsify is lib.no_default
1492 if sparsify in [False, lib.no_default]:
1493 sentinel = sparsify
1494 # little bit of a kludge job for #1217
1495 result_levels = sparsify_labels(
1496 result_levels, start=int(names), sentinel=sentinel
1497 )
1498
1499 if adjoin:
1500 adj = get_adjustment()
1501 return adj.adjoin(space, *result_levels).split("\n")
1502 else:
1503 return result_levels
1504
1505 def _format_multi(
1506 self,
1507 *,
1508 include_names: bool,
1509 sparsify: bool | None | lib.NoDefault,
1510 formatter: Callable | None = None,
1511 ) -> list:
1512 if len(self) == 0:
1513 return []
1514
1515 stringified_levels = []
1516 for lev, level_codes in zip(self.levels, self.codes):
1517 na = _get_na_rep(lev.dtype)
1518
1519 if len(lev) > 0:
1520 taken = formatted = lev.take(level_codes)
1521 formatted = taken._format_flat(include_name=False, formatter=formatter)
1522
1523 # we have some NA
1524 mask = level_codes == -1
1525 if mask.any():
1526 formatted = np.array(formatted, dtype=object)
1527 formatted[mask] = na
1528 formatted = formatted.tolist()
1529
1530 else:
1531 # weird all NA case
1532 formatted = [
1533 pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n"))
1534 for x in algos.take_nd(lev._values, level_codes)
1535 ]
1536 stringified_levels.append(formatted)
1537
1538 result_levels = []
1539 for lev, lev_name in zip(stringified_levels, self.names):
1540 level = []
1541
1542 if include_names:
1543 level.append(
1544 pprint_thing(lev_name, escape_chars=("\t", "\r", "\n"))
1545 if lev_name is not None
1546 else ""
1547 )
1548
1549 level.extend(np.array(lev, dtype=object))
1550 result_levels.append(level)
1551
1552 if sparsify is None:
1553 sparsify = get_option("display.multi_sparse")
1554
1555 if sparsify:
1556 sentinel: Literal[""] | bool | lib.NoDefault = ""
1557 # GH3547 use value of sparsify as sentinel if it's "Falsey"
1558 assert isinstance(sparsify, bool) or sparsify is lib.no_default
1559 if sparsify is lib.no_default:
1560 sentinel = sparsify
1561 # little bit of a kludge job for #1217
1562 result_levels = sparsify_labels(
1563 result_levels, start=int(include_names), sentinel=sentinel
1564 )
1565
1566 return result_levels
1567
1568 # --------------------------------------------------------------------
1569 # Names Methods
1570
1571 def _get_names(self) -> FrozenList:
1572 return FrozenList(self._names)
1573
1574 def _set_names(self, names, *, level=None, validate: bool = True):
1575 """
1576 Set new names on index. Each name has to be a hashable type.
1577
1578 Parameters
1579 ----------
1580 values : str or sequence
1581 name(s) to set
1582 level : int, level name, or sequence of int/level names (default None)
1583 If the index is a MultiIndex (hierarchical), level(s) to set (None
1584 for all levels). Otherwise level must be None
1585 validate : bool, default True
1586 validate that the names match level lengths
1587
1588 Raises
1589 ------
1590 TypeError if each name is not hashable.
1591
1592 Notes
1593 -----
1594 sets names on levels. WARNING: mutates!
1595
1596 Note that you generally want to set this *after* changing levels, so
1597 that it only acts on copies
1598 """
1599 # GH 15110
1600 # Don't allow a single string for names in a MultiIndex
1601 if names is not None and not is_list_like(names):
1602 raise ValueError("Names should be list-like for a MultiIndex")
1603 names = list(names)
1604
1605 if validate:
1606 if level is not None and len(names) != len(level):
1607 raise ValueError("Length of names must match length of level.")
1608 if level is None and len(names) != self.nlevels:
1609 raise ValueError(
1610 "Length of names must match number of levels in MultiIndex."
1611 )
1612
1613 if level is None:
1614 level = range(self.nlevels)
1615 else:
1616 level = [self._get_level_number(lev) for lev in level]
1617
1618 # set the name
1619 for lev, name in zip(level, names):
1620 if name is not None:
1621 # GH 20527
1622 # All items in 'names' need to be hashable:
1623 if not is_hashable(name):
1624 raise TypeError(
1625 f"{type(self).__name__}.name must be a hashable type"
1626 )
1627 self._names[lev] = name
1628
1629 # If .levels has been accessed, the names in our cache will be stale.
1630 self._reset_cache()
1631
1632 names = property(
1633 fset=_set_names,
1634 fget=_get_names,
1635 doc="""
1636 Names of levels in MultiIndex.
1637
1638 Examples
1639 --------
1640 >>> mi = pd.MultiIndex.from_arrays(
1641 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'])
1642 >>> mi
1643 MultiIndex([(1, 3, 5),
1644 (2, 4, 6)],
1645 names=['x', 'y', 'z'])
1646 >>> mi.names
1647 FrozenList(['x', 'y', 'z'])
1648 """,
1649 )
1650
1651 # --------------------------------------------------------------------
1652
1653 @cache_readonly
1654 def inferred_type(self) -> str:
1655 return "mixed"
1656
1657 def _get_level_number(self, level) -> int:
1658 count = self.names.count(level)
1659 if (count > 1) and not is_integer(level):
1660 raise ValueError(
1661 f"The name {level} occurs multiple times, use a level number"
1662 )
1663 try:
1664 level = self.names.index(level)
1665 except ValueError as err:
1666 if not is_integer(level):
1667 raise KeyError(f"Level {level} not found") from err
1668 if level < 0:
1669 level += self.nlevels
1670 if level < 0:
1671 orig_level = level - self.nlevels
1672 raise IndexError(
1673 f"Too many levels: Index has only {self.nlevels} levels, "
1674 f"{orig_level} is not a valid level number"
1675 ) from err
1676 # Note: levels are zero-based
1677 elif level >= self.nlevels:
1678 raise IndexError(
1679 f"Too many levels: Index has only {self.nlevels} levels, "
1680 f"not {level + 1}"
1681 ) from err
1682 return level
1683
1684 @cache_readonly
1685 def is_monotonic_increasing(self) -> bool:
1686 """
1687 Return a boolean if the values are equal or increasing.
1688 """
1689 if any(-1 in code for code in self.codes):
1690 return False
1691
1692 if all(level.is_monotonic_increasing for level in self.levels):
1693 # If each level is sorted, we can operate on the codes directly. GH27495
1694 return libalgos.is_lexsorted(
1695 [x.astype("int64", copy=False) for x in self.codes]
1696 )
1697
1698 # reversed() because lexsort() wants the most significant key last.
1699 values = [
1700 self._get_level_values(i)._values for i in reversed(range(len(self.levels)))
1701 ]
1702 try:
1703 # error: Argument 1 to "lexsort" has incompatible type
1704 # "List[Union[ExtensionArray, ndarray[Any, Any]]]";
1705 # expected "Union[_SupportsArray[dtype[Any]],
1706 # _NestedSequence[_SupportsArray[dtype[Any]]], bool,
1707 # int, float, complex, str, bytes, _NestedSequence[Union
1708 # [bool, int, float, complex, str, bytes]]]"
1709 sort_order = np.lexsort(values) # type: ignore[arg-type]
1710 return Index(sort_order).is_monotonic_increasing
1711 except TypeError:
1712 # we have mixed types and np.lexsort is not happy
1713 return Index(self._values).is_monotonic_increasing
1714
1715 @cache_readonly
1716 def is_monotonic_decreasing(self) -> bool:
1717 """
1718 Return a boolean if the values are equal or decreasing.
1719 """
1720 # monotonic decreasing if and only if reverse is monotonic increasing
1721 return self[::-1].is_monotonic_increasing
1722
1723 @cache_readonly
1724 def _inferred_type_levels(self) -> list[str]:
1725 """return a list of the inferred types, one for each level"""
1726 return [i.inferred_type for i in self.levels]
1727
1728 @doc(Index.duplicated)
1729 def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:
1730 shape = tuple(len(lev) for lev in self.levels)
1731 ids = get_group_index(self.codes, shape, sort=False, xnull=False)
1732
1733 return duplicated(ids, keep)
1734
1735 # error: Cannot override final attribute "_duplicated"
1736 # (previously declared in base class "IndexOpsMixin")
1737 _duplicated = duplicated # type: ignore[misc]
1738
1739 def fillna(self, value=None, downcast=None):
1740 """
1741 fillna is not implemented for MultiIndex
1742 """
1743 raise NotImplementedError("isna is not defined for MultiIndex")
1744
1745 @doc(Index.dropna)
1746 def dropna(self, how: AnyAll = "any") -> MultiIndex:
1747 nans = [level_codes == -1 for level_codes in self.codes]
1748 if how == "any":
1749 indexer = np.any(nans, axis=0)
1750 elif how == "all":
1751 indexer = np.all(nans, axis=0)
1752 else:
1753 raise ValueError(f"invalid how option: {how}")
1754
1755 new_codes = [level_codes[~indexer] for level_codes in self.codes]
1756 return self.set_codes(codes=new_codes)
1757
1758 def _get_level_values(self, level: int, unique: bool = False) -> Index:
1759 """
1760 Return vector of label values for requested level,
1761 equal to the length of the index
1762
1763 **this is an internal method**
1764
1765 Parameters
1766 ----------
1767 level : int
1768 unique : bool, default False
1769 if True, drop duplicated values
1770
1771 Returns
1772 -------
1773 Index
1774 """
1775 lev = self.levels[level]
1776 level_codes = self.codes[level]
1777 name = self._names[level]
1778 if unique:
1779 level_codes = algos.unique(level_codes)
1780 filled = algos.take_nd(lev._values, level_codes, fill_value=lev._na_value)
1781 return lev._shallow_copy(filled, name=name)
1782
1783 # error: Signature of "get_level_values" incompatible with supertype "Index"
1784 def get_level_values(self, level) -> Index: # type: ignore[override]
1785 """
1786 Return vector of label values for requested level.
1787
1788 Length of returned vector is equal to the length of the index.
1789
1790 Parameters
1791 ----------
1792 level : int or str
1793 ``level`` is either the integer position of the level in the
1794 MultiIndex, or the name of the level.
1795
1796 Returns
1797 -------
1798 Index
1799 Values is a level of this MultiIndex converted to
1800 a single :class:`Index` (or subclass thereof).
1801
1802 Notes
1803 -----
1804 If the level contains missing values, the result may be casted to
1805 ``float`` with missing values specified as ``NaN``. This is because
1806 the level is converted to a regular ``Index``.
1807
1808 Examples
1809 --------
1810 Create a MultiIndex:
1811
1812 >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def')))
1813 >>> mi.names = ['level_1', 'level_2']
1814
1815 Get level values by supplying level as either integer or name:
1816
1817 >>> mi.get_level_values(0)
1818 Index(['a', 'b', 'c'], dtype='object', name='level_1')
1819 >>> mi.get_level_values('level_2')
1820 Index(['d', 'e', 'f'], dtype='object', name='level_2')
1821
1822 If a level contains missing values, the return type of the level
1823 may be cast to ``float``.
1824
1825 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).dtypes
1826 level_0 int64
1827 level_1 int64
1828 dtype: object
1829 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).get_level_values(0)
1830 Index([1.0, nan, 2.0], dtype='float64')
1831 """
1832 level = self._get_level_number(level)
1833 values = self._get_level_values(level)
1834 return values
1835
1836 @doc(Index.unique)
1837 def unique(self, level=None):
1838 if level is None:
1839 return self.drop_duplicates()
1840 else:
1841 level = self._get_level_number(level)
1842 return self._get_level_values(level=level, unique=True)
1843
1844 def to_frame(
1845 self,
1846 index: bool = True,
1847 name=lib.no_default,
1848 allow_duplicates: bool = False,
1849 ) -> DataFrame:
1850 """
1851 Create a DataFrame with the levels of the MultiIndex as columns.
1852
1853 Column ordering is determined by the DataFrame constructor with data as
1854 a dict.
1855
1856 Parameters
1857 ----------
1858 index : bool, default True
1859 Set the index of the returned DataFrame as the original MultiIndex.
1860
1861 name : list / sequence of str, optional
1862 The passed names should substitute index level names.
1863
1864 allow_duplicates : bool, optional default False
1865 Allow duplicate column labels to be created.
1866
1867 .. versionadded:: 1.5.0
1868
1869 Returns
1870 -------
1871 DataFrame
1872
1873 See Also
1874 --------
1875 DataFrame : Two-dimensional, size-mutable, potentially heterogeneous
1876 tabular data.
1877
1878 Examples
1879 --------
1880 >>> mi = pd.MultiIndex.from_arrays([['a', 'b'], ['c', 'd']])
1881 >>> mi
1882 MultiIndex([('a', 'c'),
1883 ('b', 'd')],
1884 )
1885
1886 >>> df = mi.to_frame()
1887 >>> df
1888 0 1
1889 a c a c
1890 b d b d
1891
1892 >>> df = mi.to_frame(index=False)
1893 >>> df
1894 0 1
1895 0 a c
1896 1 b d
1897
1898 >>> df = mi.to_frame(name=['x', 'y'])
1899 >>> df
1900 x y
1901 a c a c
1902 b d b d
1903 """
1904 from pandas import DataFrame
1905
1906 if name is not lib.no_default:
1907 if not is_list_like(name):
1908 raise TypeError("'name' must be a list / sequence of column names.")
1909
1910 if len(name) != len(self.levels):
1911 raise ValueError(
1912 "'name' should have same length as number of levels on index."
1913 )
1914 idx_names = name
1915 else:
1916 idx_names = self._get_level_names()
1917
1918 if not allow_duplicates and len(set(idx_names)) != len(idx_names):
1919 raise ValueError(
1920 "Cannot create duplicate column labels if allow_duplicates is False"
1921 )
1922
1923 # Guarantee resulting column order - PY36+ dict maintains insertion order
1924 result = DataFrame(
1925 {level: self._get_level_values(level) for level in range(len(self.levels))},
1926 copy=False,
1927 )
1928 result.columns = idx_names
1929
1930 if index:
1931 result.index = self
1932 return result
1933
1934 # error: Return type "Index" of "to_flat_index" incompatible with return type
1935 # "MultiIndex" in supertype "Index"
1936 def to_flat_index(self) -> Index: # type: ignore[override]
1937 """
1938 Convert a MultiIndex to an Index of Tuples containing the level values.
1939
1940 Returns
1941 -------
1942 pd.Index
1943 Index with the MultiIndex data represented in Tuples.
1944
1945 See Also
1946 --------
1947 MultiIndex.from_tuples : Convert flat index back to MultiIndex.
1948
1949 Notes
1950 -----
1951 This method will simply return the caller if called by anything other
1952 than a MultiIndex.
1953
1954 Examples
1955 --------
1956 >>> index = pd.MultiIndex.from_product(
1957 ... [['foo', 'bar'], ['baz', 'qux']],
1958 ... names=['a', 'b'])
1959 >>> index.to_flat_index()
1960 Index([('foo', 'baz'), ('foo', 'qux'),
1961 ('bar', 'baz'), ('bar', 'qux')],
1962 dtype='object')
1963 """
1964 return Index(self._values, tupleize_cols=False)
1965
1966 def _is_lexsorted(self) -> bool:
1967 """
1968 Return True if the codes are lexicographically sorted.
1969
1970 Returns
1971 -------
1972 bool
1973
1974 Examples
1975 --------
1976 In the below examples, the first level of the MultiIndex is sorted because
1977 a<b<c, so there is no need to look at the next level.
1978
1979 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'],
1980 ... ['d', 'e', 'f']])._is_lexsorted()
1981 True
1982 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'],
1983 ... ['d', 'f', 'e']])._is_lexsorted()
1984 True
1985
1986 In case there is a tie, the lexicographical sorting looks
1987 at the next level of the MultiIndex.
1988
1989 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']])._is_lexsorted()
1990 True
1991 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']])._is_lexsorted()
1992 False
1993 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'],
1994 ... ['aa', 'bb', 'aa', 'bb']])._is_lexsorted()
1995 True
1996 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'],
1997 ... ['bb', 'aa', 'aa', 'bb']])._is_lexsorted()
1998 False
1999 """
2000 return self._lexsort_depth == self.nlevels
2001
2002 @cache_readonly
2003 def _lexsort_depth(self) -> int:
2004 """
2005 Compute and return the lexsort_depth, the number of levels of the
2006 MultiIndex that are sorted lexically
2007
2008 Returns
2009 -------
2010 int
2011 """
2012 if self.sortorder is not None:
2013 return self.sortorder
2014 return _lexsort_depth(self.codes, self.nlevels)
2015
2016 def _sort_levels_monotonic(self, raise_if_incomparable: bool = False) -> MultiIndex:
2017 """
2018 This is an *internal* function.
2019
2020 Create a new MultiIndex from the current to monotonically sorted
2021 items IN the levels. This does not actually make the entire MultiIndex
2022 monotonic, JUST the levels.
2023
2024 The resulting MultiIndex will have the same outward
2025 appearance, meaning the same .values and ordering. It will also
2026 be .equals() to the original.
2027
2028 Returns
2029 -------
2030 MultiIndex
2031
2032 Examples
2033 --------
2034 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
2035 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
2036 >>> mi
2037 MultiIndex([('a', 'bb'),
2038 ('a', 'aa'),
2039 ('b', 'bb'),
2040 ('b', 'aa')],
2041 )
2042
2043 >>> mi.sort_values()
2044 MultiIndex([('a', 'aa'),
2045 ('a', 'bb'),
2046 ('b', 'aa'),
2047 ('b', 'bb')],
2048 )
2049 """
2050 if self._is_lexsorted() and self.is_monotonic_increasing:
2051 return self
2052
2053 new_levels = []
2054 new_codes = []
2055
2056 for lev, level_codes in zip(self.levels, self.codes):
2057 if not lev.is_monotonic_increasing:
2058 try:
2059 # indexer to reorder the levels
2060 indexer = lev.argsort()
2061 except TypeError:
2062 if raise_if_incomparable:
2063 raise
2064 else:
2065 lev = lev.take(indexer)
2066
2067 # indexer to reorder the level codes
2068 indexer = ensure_platform_int(indexer)
2069 ri = lib.get_reverse_indexer(indexer, len(indexer))
2070 level_codes = algos.take_nd(ri, level_codes, fill_value=-1)
2071
2072 new_levels.append(lev)
2073 new_codes.append(level_codes)
2074
2075 return MultiIndex(
2076 new_levels,
2077 new_codes,
2078 names=self.names,
2079 sortorder=self.sortorder,
2080 verify_integrity=False,
2081 )
2082
2083 def remove_unused_levels(self) -> MultiIndex:
2084 """
2085 Create new MultiIndex from current that removes unused levels.
2086
2087 Unused level(s) means levels that are not expressed in the
2088 labels. The resulting MultiIndex will have the same outward
2089 appearance, meaning the same .values and ordering. It will
2090 also be .equals() to the original.
2091
2092 Returns
2093 -------
2094 MultiIndex
2095
2096 Examples
2097 --------
2098 >>> mi = pd.MultiIndex.from_product([range(2), list('ab')])
2099 >>> mi
2100 MultiIndex([(0, 'a'),
2101 (0, 'b'),
2102 (1, 'a'),
2103 (1, 'b')],
2104 )
2105
2106 >>> mi[2:]
2107 MultiIndex([(1, 'a'),
2108 (1, 'b')],
2109 )
2110
2111 The 0 from the first level is not represented
2112 and can be removed
2113
2114 >>> mi2 = mi[2:].remove_unused_levels()
2115 >>> mi2.levels
2116 FrozenList([[1], ['a', 'b']])
2117 """
2118 new_levels = []
2119 new_codes = []
2120
2121 changed = False
2122 for lev, level_codes in zip(self.levels, self.codes):
2123 # Since few levels are typically unused, bincount() is more
2124 # efficient than unique() - however it only accepts positive values
2125 # (and drops order):
2126 uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1
2127 has_na = int(len(uniques) and (uniques[0] == -1))
2128
2129 if len(uniques) != len(lev) + has_na:
2130 if lev.isna().any() and len(uniques) == len(lev):
2131 break
2132 # We have unused levels
2133 changed = True
2134
2135 # Recalculate uniques, now preserving order.
2136 # Can easily be cythonized by exploiting the already existing
2137 # "uniques" and stop parsing "level_codes" when all items
2138 # are found:
2139 uniques = algos.unique(level_codes)
2140 if has_na:
2141 na_idx = np.where(uniques == -1)[0]
2142 # Just ensure that -1 is in first position:
2143 uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]]
2144
2145 # codes get mapped from uniques to 0:len(uniques)
2146 # -1 (if present) is mapped to last position
2147 code_mapping = np.zeros(len(lev) + has_na)
2148 # ... and reassigned value -1:
2149 code_mapping[uniques] = np.arange(len(uniques)) - has_na
2150
2151 level_codes = code_mapping[level_codes]
2152
2153 # new levels are simple
2154 lev = lev.take(uniques[has_na:])
2155
2156 new_levels.append(lev)
2157 new_codes.append(level_codes)
2158
2159 result = self.view()
2160
2161 if changed:
2162 result._reset_identity()
2163 result._set_levels(new_levels, validate=False)
2164 result._set_codes(new_codes, validate=False)
2165
2166 return result
2167
2168 # --------------------------------------------------------------------
2169 # Pickling Methods
2170
2171 def __reduce__(self):
2172 """Necessary for making this object picklable"""
2173 d = {
2174 "levels": list(self.levels),
2175 "codes": list(self.codes),
2176 "sortorder": self.sortorder,
2177 "names": list(self.names),
2178 }
2179 return ibase._new_Index, (type(self), d), None
2180
2181 # --------------------------------------------------------------------
2182
2183 def __getitem__(self, key):
2184 if is_scalar(key):
2185 key = com.cast_scalar_indexer(key)
2186
2187 retval = []
2188 for lev, level_codes in zip(self.levels, self.codes):
2189 if level_codes[key] == -1:
2190 retval.append(np.nan)
2191 else:
2192 retval.append(lev[level_codes[key]])
2193
2194 return tuple(retval)
2195 else:
2196 # in general cannot be sure whether the result will be sorted
2197 sortorder = None
2198 if com.is_bool_indexer(key):
2199 key = np.asarray(key, dtype=bool)
2200 sortorder = self.sortorder
2201 elif isinstance(key, slice):
2202 if key.step is None or key.step > 0:
2203 sortorder = self.sortorder
2204 elif isinstance(key, Index):
2205 key = np.asarray(key)
2206
2207 new_codes = [level_codes[key] for level_codes in self.codes]
2208
2209 return MultiIndex(
2210 levels=self.levels,
2211 codes=new_codes,
2212 names=self.names,
2213 sortorder=sortorder,
2214 verify_integrity=False,
2215 )
2216
2217 def _getitem_slice(self: MultiIndex, slobj: slice) -> MultiIndex:
2218 """
2219 Fastpath for __getitem__ when we know we have a slice.
2220 """
2221 sortorder = None
2222 if slobj.step is None or slobj.step > 0:
2223 sortorder = self.sortorder
2224
2225 new_codes = [level_codes[slobj] for level_codes in self.codes]
2226
2227 return type(self)(
2228 levels=self.levels,
2229 codes=new_codes,
2230 names=self._names,
2231 sortorder=sortorder,
2232 verify_integrity=False,
2233 )
2234
2235 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)
2236 def take(
2237 self: MultiIndex,
2238 indices,
2239 axis: Axis = 0,
2240 allow_fill: bool = True,
2241 fill_value=None,
2242 **kwargs,
2243 ) -> MultiIndex:
2244 nv.validate_take((), kwargs)
2245 indices = ensure_platform_int(indices)
2246
2247 # only fill if we are passing a non-None fill_value
2248 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices)
2249
2250 na_value = -1
2251
2252 taken = [lab.take(indices) for lab in self.codes]
2253 if allow_fill:
2254 mask = indices == -1
2255 if mask.any():
2256 masked = []
2257 for new_label in taken:
2258 label_values = new_label
2259 label_values[mask] = na_value
2260 masked.append(np.asarray(label_values))
2261 taken = masked
2262
2263 return MultiIndex(
2264 levels=self.levels, codes=taken, names=self.names, verify_integrity=False
2265 )
2266
2267 def append(self, other):
2268 """
2269 Append a collection of Index options together.
2270
2271 Parameters
2272 ----------
2273 other : Index or list/tuple of indices
2274
2275 Returns
2276 -------
2277 Index
2278 The combined index.
2279
2280 Examples
2281 --------
2282 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b']])
2283 >>> mi
2284 MultiIndex([('a', 'b')],
2285 )
2286 >>> mi.append(mi)
2287 MultiIndex([('a', 'b'), ('a', 'b')],
2288 )
2289 """
2290 if not isinstance(other, (list, tuple)):
2291 other = [other]
2292
2293 if all(
2294 (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other
2295 ):
2296 codes = []
2297 levels = []
2298 names = []
2299 for i in range(self.nlevels):
2300 level_values = self.levels[i]
2301 for mi in other:
2302 level_values = level_values.union(mi.levels[i])
2303 level_codes = [
2304 recode_for_categories(
2305 mi.codes[i], mi.levels[i], level_values, copy=False
2306 )
2307 for mi in ([self, *other])
2308 ]
2309 level_name = self.names[i]
2310 if any(mi.names[i] != level_name for mi in other):
2311 level_name = None
2312 codes.append(np.concatenate(level_codes))
2313 levels.append(level_values)
2314 names.append(level_name)
2315 return MultiIndex(
2316 codes=codes, levels=levels, names=names, verify_integrity=False
2317 )
2318
2319 to_concat = (self._values,) + tuple(k._values for k in other)
2320 new_tuples = np.concatenate(to_concat)
2321
2322 # if all(isinstance(x, MultiIndex) for x in other):
2323 try:
2324 # We only get here if other contains at least one index with tuples,
2325 # setting names to None automatically
2326 return MultiIndex.from_tuples(new_tuples)
2327 except (TypeError, IndexError):
2328 return Index(new_tuples)
2329
2330 def argsort(
2331 self, *args, na_position: str = "last", **kwargs
2332 ) -> npt.NDArray[np.intp]:
2333 target = self._sort_levels_monotonic(raise_if_incomparable=True)
2334 keys = [lev.codes for lev in target._get_codes_for_sorting()]
2335 return lexsort_indexer(keys, na_position=na_position, codes_given=True)
2336
2337 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)
2338 def repeat(self, repeats: int, axis=None) -> MultiIndex:
2339 nv.validate_repeat((), {"axis": axis})
2340 # error: Incompatible types in assignment (expression has type "ndarray",
2341 # variable has type "int")
2342 repeats = ensure_platform_int(repeats) # type: ignore[assignment]
2343 return MultiIndex(
2344 levels=self.levels,
2345 codes=[
2346 level_codes.view(np.ndarray).astype(np.intp, copy=False).repeat(repeats)
2347 for level_codes in self.codes
2348 ],
2349 names=self.names,
2350 sortorder=self.sortorder,
2351 verify_integrity=False,
2352 )
2353
2354 # error: Signature of "drop" incompatible with supertype "Index"
2355 def drop( # type: ignore[override]
2356 self,
2357 codes,
2358 level: Index | np.ndarray | Iterable[Hashable] | None = None,
2359 errors: IgnoreRaise = "raise",
2360 ) -> MultiIndex:
2361 """
2362 Make a new :class:`pandas.MultiIndex` with the passed list of codes deleted.
2363
2364 Parameters
2365 ----------
2366 codes : array-like
2367 Must be a list of tuples when ``level`` is not specified.
2368 level : int or level name, default None
2369 errors : str, default 'raise'
2370
2371 Returns
2372 -------
2373 MultiIndex
2374
2375 Examples
2376 --------
2377 >>> idx = pd.MultiIndex.from_product([(0, 1, 2), ('green', 'purple')],
2378 ... names=["number", "color"])
2379 >>> idx
2380 MultiIndex([(0, 'green'),
2381 (0, 'purple'),
2382 (1, 'green'),
2383 (1, 'purple'),
2384 (2, 'green'),
2385 (2, 'purple')],
2386 names=['number', 'color'])
2387 >>> idx.drop([(1, 'green'), (2, 'purple')])
2388 MultiIndex([(0, 'green'),
2389 (0, 'purple'),
2390 (1, 'purple'),
2391 (2, 'green')],
2392 names=['number', 'color'])
2393
2394 We can also drop from a specific level.
2395
2396 >>> idx.drop('green', level='color')
2397 MultiIndex([(0, 'purple'),
2398 (1, 'purple'),
2399 (2, 'purple')],
2400 names=['number', 'color'])
2401
2402 >>> idx.drop([1, 2], level=0)
2403 MultiIndex([(0, 'green'),
2404 (0, 'purple')],
2405 names=['number', 'color'])
2406 """
2407 if level is not None:
2408 return self._drop_from_level(codes, level, errors)
2409
2410 if not isinstance(codes, (np.ndarray, Index)):
2411 try:
2412 codes = com.index_labels_to_array(codes, dtype=np.dtype("object"))
2413 except ValueError:
2414 pass
2415
2416 inds = []
2417 for level_codes in codes:
2418 try:
2419 loc = self.get_loc(level_codes)
2420 # get_loc returns either an integer, a slice, or a boolean
2421 # mask
2422 if isinstance(loc, int):
2423 inds.append(loc)
2424 elif isinstance(loc, slice):
2425 step = loc.step if loc.step is not None else 1
2426 inds.extend(range(loc.start, loc.stop, step))
2427 elif com.is_bool_indexer(loc):
2428 if self._lexsort_depth == 0:
2429 warnings.warn(
2430 "dropping on a non-lexsorted multi-index "
2431 "without a level parameter may impact performance.",
2432 PerformanceWarning,
2433 stacklevel=find_stack_level(),
2434 )
2435 loc = loc.nonzero()[0]
2436 inds.extend(loc)
2437 else:
2438 msg = f"unsupported indexer of type {type(loc)}"
2439 raise AssertionError(msg)
2440 except KeyError:
2441 if errors != "ignore":
2442 raise
2443
2444 return self.delete(inds)
2445
2446 def _drop_from_level(
2447 self, codes, level, errors: IgnoreRaise = "raise"
2448 ) -> MultiIndex:
2449 codes = com.index_labels_to_array(codes)
2450 i = self._get_level_number(level)
2451 index = self.levels[i]
2452 values = index.get_indexer(codes)
2453 # If nan should be dropped it will equal -1 here. We have to check which values
2454 # are not nan and equal -1, this means they are missing in the index
2455 nan_codes = isna(codes)
2456 values[(np.equal(nan_codes, False)) & (values == -1)] = -2
2457 if index.shape[0] == self.shape[0]:
2458 values[np.equal(nan_codes, True)] = -2
2459
2460 not_found = codes[values == -2]
2461 if len(not_found) != 0 and errors != "ignore":
2462 raise KeyError(f"labels {not_found} not found in level")
2463 mask = ~algos.isin(self.codes[i], values)
2464
2465 return self[mask]
2466
2467 def swaplevel(self, i=-2, j=-1) -> MultiIndex:
2468 """
2469 Swap level i with level j.
2470
2471 Calling this method does not change the ordering of the values.
2472
2473 Parameters
2474 ----------
2475 i : int, str, default -2
2476 First level of index to be swapped. Can pass level name as string.
2477 Type of parameters can be mixed.
2478 j : int, str, default -1
2479 Second level of index to be swapped. Can pass level name as string.
2480 Type of parameters can be mixed.
2481
2482 Returns
2483 -------
2484 MultiIndex
2485 A new MultiIndex.
2486
2487 See Also
2488 --------
2489 Series.swaplevel : Swap levels i and j in a MultiIndex.
2490 DataFrame.swaplevel : Swap levels i and j in a MultiIndex on a
2491 particular axis.
2492
2493 Examples
2494 --------
2495 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],
2496 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])
2497 >>> mi
2498 MultiIndex([('a', 'bb'),
2499 ('a', 'aa'),
2500 ('b', 'bb'),
2501 ('b', 'aa')],
2502 )
2503 >>> mi.swaplevel(0, 1)
2504 MultiIndex([('bb', 'a'),
2505 ('aa', 'a'),
2506 ('bb', 'b'),
2507 ('aa', 'b')],
2508 )
2509 """
2510 new_levels = list(self.levels)
2511 new_codes = list(self.codes)
2512 new_names = list(self.names)
2513
2514 i = self._get_level_number(i)
2515 j = self._get_level_number(j)
2516
2517 new_levels[i], new_levels[j] = new_levels[j], new_levels[i]
2518 new_codes[i], new_codes[j] = new_codes[j], new_codes[i]
2519 new_names[i], new_names[j] = new_names[j], new_names[i]
2520
2521 return MultiIndex(
2522 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
2523 )
2524
2525 def reorder_levels(self, order) -> MultiIndex:
2526 """
2527 Rearrange levels using input order. May not drop or duplicate levels.
2528
2529 Parameters
2530 ----------
2531 order : list of int or list of str
2532 List representing new level order. Reference level by number
2533 (position) or by key (label).
2534
2535 Returns
2536 -------
2537 MultiIndex
2538
2539 Examples
2540 --------
2541 >>> mi = pd.MultiIndex.from_arrays([[1, 2], [3, 4]], names=['x', 'y'])
2542 >>> mi
2543 MultiIndex([(1, 3),
2544 (2, 4)],
2545 names=['x', 'y'])
2546
2547 >>> mi.reorder_levels(order=[1, 0])
2548 MultiIndex([(3, 1),
2549 (4, 2)],
2550 names=['y', 'x'])
2551
2552 >>> mi.reorder_levels(order=['y', 'x'])
2553 MultiIndex([(3, 1),
2554 (4, 2)],
2555 names=['y', 'x'])
2556 """
2557 order = [self._get_level_number(i) for i in order]
2558 result = self._reorder_ilevels(order)
2559 return result
2560
2561 def _reorder_ilevels(self, order) -> MultiIndex:
2562 if len(order) != self.nlevels:
2563 raise AssertionError(
2564 f"Length of order must be same as number of levels ({self.nlevels}), "
2565 f"got {len(order)}"
2566 )
2567 new_levels = [self.levels[i] for i in order]
2568 new_codes = [self.codes[i] for i in order]
2569 new_names = [self.names[i] for i in order]
2570
2571 return MultiIndex(
2572 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False
2573 )
2574
2575 def _recode_for_new_levels(
2576 self, new_levels, copy: bool = True
2577 ) -> Generator[np.ndarray, None, None]:
2578 if len(new_levels) > self.nlevels:
2579 raise AssertionError(
2580 f"Length of new_levels ({len(new_levels)}) "
2581 f"must be <= self.nlevels ({self.nlevels})"
2582 )
2583 for i in range(len(new_levels)):
2584 yield recode_for_categories(
2585 self.codes[i], self.levels[i], new_levels[i], copy=copy
2586 )
2587
2588 def _get_codes_for_sorting(self) -> list[Categorical]:
2589 """
2590 we are categorizing our codes by using the
2591 available categories (all, not just observed)
2592 excluding any missing ones (-1); this is in preparation
2593 for sorting, where we need to disambiguate that -1 is not
2594 a valid valid
2595 """
2596
2597 def cats(level_codes):
2598 return np.arange(
2599 np.array(level_codes).max() + 1 if len(level_codes) else 0,
2600 dtype=level_codes.dtype,
2601 )
2602
2603 return [
2604 Categorical.from_codes(level_codes, cats(level_codes), True, validate=False)
2605 for level_codes in self.codes
2606 ]
2607
2608 def sortlevel(
2609 self,
2610 level: IndexLabel = 0,
2611 ascending: bool | list[bool] = True,
2612 sort_remaining: bool = True,
2613 na_position: str = "first",
2614 ) -> tuple[MultiIndex, npt.NDArray[np.intp]]:
2615 """
2616 Sort MultiIndex at the requested level.
2617
2618 The result will respect the original ordering of the associated
2619 factor at that level.
2620
2621 Parameters
2622 ----------
2623 level : list-like, int or str, default 0
2624 If a string is given, must be a name of the level.
2625 If list-like must be names or ints of levels.
2626 ascending : bool, default True
2627 False to sort in descending order.
2628 Can also be a list to specify a directed ordering.
2629 sort_remaining : sort by the remaining levels after level
2630 na_position : {'first' or 'last'}, default 'first'
2631 Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at
2632 the end.
2633
2634 .. versionadded:: 2.1.0
2635
2636 Returns
2637 -------
2638 sorted_index : pd.MultiIndex
2639 Resulting index.
2640 indexer : np.ndarray[np.intp]
2641 Indices of output values in original index.
2642
2643 Examples
2644 --------
2645 >>> mi = pd.MultiIndex.from_arrays([[0, 0], [2, 1]])
2646 >>> mi
2647 MultiIndex([(0, 2),
2648 (0, 1)],
2649 )
2650
2651 >>> mi.sortlevel()
2652 (MultiIndex([(0, 1),
2653 (0, 2)],
2654 ), array([1, 0]))
2655
2656 >>> mi.sortlevel(sort_remaining=False)
2657 (MultiIndex([(0, 2),
2658 (0, 1)],
2659 ), array([0, 1]))
2660
2661 >>> mi.sortlevel(1)
2662 (MultiIndex([(0, 1),
2663 (0, 2)],
2664 ), array([1, 0]))
2665
2666 >>> mi.sortlevel(1, ascending=False)
2667 (MultiIndex([(0, 2),
2668 (0, 1)],
2669 ), array([0, 1]))
2670 """
2671 if not is_list_like(level):
2672 level = [level]
2673 # error: Item "Hashable" of "Union[Hashable, Sequence[Hashable]]" has
2674 # no attribute "__iter__" (not iterable)
2675 level = [
2676 self._get_level_number(lev) for lev in level # type: ignore[union-attr]
2677 ]
2678 sortorder = None
2679
2680 codes = [self.codes[lev] for lev in level]
2681 # we have a directed ordering via ascending
2682 if isinstance(ascending, list):
2683 if not len(level) == len(ascending):
2684 raise ValueError("level must have same length as ascending")
2685 elif sort_remaining:
2686 codes.extend(
2687 [self.codes[lev] for lev in range(len(self.levels)) if lev not in level]
2688 )
2689 else:
2690 sortorder = level[0]
2691
2692 indexer = lexsort_indexer(
2693 codes, orders=ascending, na_position=na_position, codes_given=True
2694 )
2695
2696 indexer = ensure_platform_int(indexer)
2697 new_codes = [level_codes.take(indexer) for level_codes in self.codes]
2698
2699 new_index = MultiIndex(
2700 codes=new_codes,
2701 levels=self.levels,
2702 names=self.names,
2703 sortorder=sortorder,
2704 verify_integrity=False,
2705 )
2706
2707 return new_index, indexer
2708
2709 def _wrap_reindex_result(self, target, indexer, preserve_names: bool):
2710 if not isinstance(target, MultiIndex):
2711 if indexer is None:
2712 target = self
2713 elif (indexer >= 0).all():
2714 target = self.take(indexer)
2715 else:
2716 try:
2717 target = MultiIndex.from_tuples(target)
2718 except TypeError:
2719 # not all tuples, see test_constructor_dict_multiindex_reindex_flat
2720 return target
2721
2722 target = self._maybe_preserve_names(target, preserve_names)
2723 return target
2724
2725 def _maybe_preserve_names(self, target: Index, preserve_names: bool) -> Index:
2726 if (
2727 preserve_names
2728 and target.nlevels == self.nlevels
2729 and target.names != self.names
2730 ):
2731 target = target.copy(deep=False)
2732 target.names = self.names
2733 return target
2734
2735 # --------------------------------------------------------------------
2736 # Indexing Methods
2737
2738 def _check_indexing_error(self, key) -> None:
2739 if not is_hashable(key) or is_iterator(key):
2740 # We allow tuples if they are hashable, whereas other Index
2741 # subclasses require scalar.
2742 # We have to explicitly exclude generators, as these are hashable.
2743 raise InvalidIndexError(key)
2744
2745 @cache_readonly
2746 def _should_fallback_to_positional(self) -> bool:
2747 """
2748 Should integer key(s) be treated as positional?
2749 """
2750 # GH#33355
2751 return self.levels[0]._should_fallback_to_positional
2752
2753 def _get_indexer_strict(
2754 self, key, axis_name: str
2755 ) -> tuple[Index, npt.NDArray[np.intp]]:
2756 keyarr = key
2757 if not isinstance(keyarr, Index):
2758 keyarr = com.asarray_tuplesafe(keyarr)
2759
2760 if len(keyarr) and not isinstance(keyarr[0], tuple):
2761 indexer = self._get_indexer_level_0(keyarr)
2762
2763 self._raise_if_missing(key, indexer, axis_name)
2764 return self[indexer], indexer
2765
2766 return super()._get_indexer_strict(key, axis_name)
2767
2768 def _raise_if_missing(self, key, indexer, axis_name: str) -> None:
2769 keyarr = key
2770 if not isinstance(key, Index):
2771 keyarr = com.asarray_tuplesafe(key)
2772
2773 if len(keyarr) and not isinstance(keyarr[0], tuple):
2774 # i.e. same condition for special case in MultiIndex._get_indexer_strict
2775
2776 mask = indexer == -1
2777 if mask.any():
2778 check = self.levels[0].get_indexer(keyarr)
2779 cmask = check == -1
2780 if cmask.any():
2781 raise KeyError(f"{keyarr[cmask]} not in index")
2782 # We get here when levels still contain values which are not
2783 # actually in Index anymore
2784 raise KeyError(f"{keyarr} not in index")
2785 else:
2786 return super()._raise_if_missing(key, indexer, axis_name)
2787
2788 def _get_indexer_level_0(self, target) -> npt.NDArray[np.intp]:
2789 """
2790 Optimized equivalent to `self.get_level_values(0).get_indexer_for(target)`.
2791 """
2792 lev = self.levels[0]
2793 codes = self._codes[0]
2794 cat = Categorical.from_codes(codes=codes, categories=lev, validate=False)
2795 ci = Index(cat)
2796 return ci.get_indexer_for(target)
2797
2798 def get_slice_bound(
2799 self,
2800 label: Hashable | Sequence[Hashable],
2801 side: Literal["left", "right"],
2802 ) -> int:
2803 """
2804 For an ordered MultiIndex, compute slice bound
2805 that corresponds to given label.
2806
2807 Returns leftmost (one-past-the-rightmost if `side=='right') position
2808 of given label.
2809
2810 Parameters
2811 ----------
2812 label : object or tuple of objects
2813 side : {'left', 'right'}
2814
2815 Returns
2816 -------
2817 int
2818 Index of label.
2819
2820 Notes
2821 -----
2822 This method only works if level 0 index of the MultiIndex is lexsorted.
2823
2824 Examples
2825 --------
2826 >>> mi = pd.MultiIndex.from_arrays([list('abbc'), list('gefd')])
2827
2828 Get the locations from the leftmost 'b' in the first level
2829 until the end of the multiindex:
2830
2831 >>> mi.get_slice_bound('b', side="left")
2832 1
2833
2834 Like above, but if you get the locations from the rightmost
2835 'b' in the first level and 'f' in the second level:
2836
2837 >>> mi.get_slice_bound(('b','f'), side="right")
2838 3
2839
2840 See Also
2841 --------
2842 MultiIndex.get_loc : Get location for a label or a tuple of labels.
2843 MultiIndex.get_locs : Get location for a label/slice/list/mask or a
2844 sequence of such.
2845 """
2846 if not isinstance(label, tuple):
2847 label = (label,)
2848 return self._partial_tup_index(label, side=side)
2849
2850 # pylint: disable-next=useless-parent-delegation
2851 def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]:
2852 """
2853 For an ordered MultiIndex, compute the slice locations for input
2854 labels.
2855
2856 The input labels can be tuples representing partial levels, e.g. for a
2857 MultiIndex with 3 levels, you can pass a single value (corresponding to
2858 the first level), or a 1-, 2-, or 3-tuple.
2859
2860 Parameters
2861 ----------
2862 start : label or tuple, default None
2863 If None, defaults to the beginning
2864 end : label or tuple
2865 If None, defaults to the end
2866 step : int or None
2867 Slice step
2868
2869 Returns
2870 -------
2871 (start, end) : (int, int)
2872
2873 Notes
2874 -----
2875 This method only works if the MultiIndex is properly lexsorted. So,
2876 if only the first 2 levels of a 3-level MultiIndex are lexsorted,
2877 you can only pass two levels to ``.slice_locs``.
2878
2879 Examples
2880 --------
2881 >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')],
2882 ... names=['A', 'B'])
2883
2884 Get the slice locations from the beginning of 'b' in the first level
2885 until the end of the multiindex:
2886
2887 >>> mi.slice_locs(start='b')
2888 (1, 4)
2889
2890 Like above, but stop at the end of 'b' in the first level and 'f' in
2891 the second level:
2892
2893 >>> mi.slice_locs(start='b', end=('b', 'f'))
2894 (1, 3)
2895
2896 See Also
2897 --------
2898 MultiIndex.get_loc : Get location for a label or a tuple of labels.
2899 MultiIndex.get_locs : Get location for a label/slice/list/mask or a
2900 sequence of such.
2901 """
2902 # This function adds nothing to its parent implementation (the magic
2903 # happens in get_slice_bound method), but it adds meaningful doc.
2904 return super().slice_locs(start, end, step)
2905
2906 def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left"):
2907 if len(tup) > self._lexsort_depth:
2908 raise UnsortedIndexError(
2909 f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth "
2910 f"({self._lexsort_depth})"
2911 )
2912
2913 n = len(tup)
2914 start, end = 0, len(self)
2915 zipped = zip(tup, self.levels, self.codes)
2916 for k, (lab, lev, level_codes) in enumerate(zipped):
2917 section = level_codes[start:end]
2918
2919 loc: npt.NDArray[np.intp] | np.intp | int
2920 if lab not in lev and not isna(lab):
2921 # short circuit
2922 try:
2923 loc = algos.searchsorted(lev, lab, side=side)
2924 except TypeError as err:
2925 # non-comparable e.g. test_slice_locs_with_type_mismatch
2926 raise TypeError(f"Level type mismatch: {lab}") from err
2927 if not is_integer(loc):
2928 # non-comparable level, e.g. test_groupby_example
2929 raise TypeError(f"Level type mismatch: {lab}")
2930 if side == "right" and loc >= 0:
2931 loc -= 1
2932 return start + algos.searchsorted(section, loc, side=side)
2933
2934 idx = self._get_loc_single_level_index(lev, lab)
2935 if isinstance(idx, slice) and k < n - 1:
2936 # Get start and end value from slice, necessary when a non-integer
2937 # interval is given as input GH#37707
2938 start = idx.start
2939 end = idx.stop
2940 elif k < n - 1:
2941 # error: Incompatible types in assignment (expression has type
2942 # "Union[ndarray[Any, dtype[signedinteger[Any]]]
2943 end = start + algos.searchsorted( # type: ignore[assignment]
2944 section, idx, side="right"
2945 )
2946 # error: Incompatible types in assignment (expression has type
2947 # "Union[ndarray[Any, dtype[signedinteger[Any]]]
2948 start = start + algos.searchsorted( # type: ignore[assignment]
2949 section, idx, side="left"
2950 )
2951 elif isinstance(idx, slice):
2952 idx = idx.start
2953 return start + algos.searchsorted(section, idx, side=side)
2954 else:
2955 return start + algos.searchsorted(section, idx, side=side)
2956
2957 def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int:
2958 """
2959 If key is NA value, location of index unify as -1.
2960
2961 Parameters
2962 ----------
2963 level_index: Index
2964 key : label
2965
2966 Returns
2967 -------
2968 loc : int
2969 If key is NA value, loc is -1
2970 Else, location of key in index.
2971
2972 See Also
2973 --------
2974 Index.get_loc : The get_loc method for (single-level) index.
2975 """
2976 if is_scalar(key) and isna(key):
2977 # TODO: need is_valid_na_for_dtype(key, level_index.dtype)
2978 return -1
2979 else:
2980 return level_index.get_loc(key)
2981
2982 def get_loc(self, key):
2983 """
2984 Get location for a label or a tuple of labels.
2985
2986 The location is returned as an integer/slice or boolean
2987 mask.
2988
2989 Parameters
2990 ----------
2991 key : label or tuple of labels (one for each level)
2992
2993 Returns
2994 -------
2995 int, slice object or boolean mask
2996 If the key is past the lexsort depth, the return may be a
2997 boolean mask array, otherwise it is always a slice or int.
2998
2999 See Also
3000 --------
3001 Index.get_loc : The get_loc method for (single-level) index.
3002 MultiIndex.slice_locs : Get slice location given start label(s) and
3003 end label(s).
3004 MultiIndex.get_locs : Get location for a label/slice/list/mask or a
3005 sequence of such.
3006
3007 Notes
3008 -----
3009 The key cannot be a slice, list of same-level labels, a boolean mask,
3010 or a sequence of such. If you want to use those, use
3011 :meth:`MultiIndex.get_locs` instead.
3012
3013 Examples
3014 --------
3015 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])
3016
3017 >>> mi.get_loc('b')
3018 slice(1, 3, None)
3019
3020 >>> mi.get_loc(('b', 'e'))
3021 1
3022 """
3023 self._check_indexing_error(key)
3024
3025 def _maybe_to_slice(loc):
3026 """convert integer indexer to boolean mask or slice if possible"""
3027 if not isinstance(loc, np.ndarray) or loc.dtype != np.intp:
3028 return loc
3029
3030 loc = lib.maybe_indices_to_slice(loc, len(self))
3031 if isinstance(loc, slice):
3032 return loc
3033
3034 mask = np.empty(len(self), dtype="bool")
3035 mask.fill(False)
3036 mask[loc] = True
3037 return mask
3038
3039 if not isinstance(key, tuple):
3040 loc = self._get_level_indexer(key, level=0)
3041 return _maybe_to_slice(loc)
3042
3043 keylen = len(key)
3044 if self.nlevels < keylen:
3045 raise KeyError(
3046 f"Key length ({keylen}) exceeds index depth ({self.nlevels})"
3047 )
3048
3049 if keylen == self.nlevels and self.is_unique:
3050 # TODO: what if we have an IntervalIndex level?
3051 # i.e. do we need _index_as_unique on that level?
3052 try:
3053 return self._engine.get_loc(key)
3054 except KeyError as err:
3055 raise KeyError(key) from err
3056 except TypeError:
3057 # e.g. test_partial_slicing_with_multiindex partial string slicing
3058 loc, _ = self.get_loc_level(key, list(range(self.nlevels)))
3059 return loc
3060
3061 # -- partial selection or non-unique index
3062 # break the key into 2 parts based on the lexsort_depth of the index;
3063 # the first part returns a continuous slice of the index; the 2nd part
3064 # needs linear search within the slice
3065 i = self._lexsort_depth
3066 lead_key, follow_key = key[:i], key[i:]
3067
3068 if not lead_key:
3069 start = 0
3070 stop = len(self)
3071 else:
3072 try:
3073 start, stop = self.slice_locs(lead_key, lead_key)
3074 except TypeError as err:
3075 # e.g. test_groupby_example key = ((0, 0, 1, 2), "new_col")
3076 # when self has 5 integer levels
3077 raise KeyError(key) from err
3078
3079 if start == stop:
3080 raise KeyError(key)
3081
3082 if not follow_key:
3083 return slice(start, stop)
3084
3085 warnings.warn(
3086 "indexing past lexsort depth may impact performance.",
3087 PerformanceWarning,
3088 stacklevel=find_stack_level(),
3089 )
3090
3091 loc = np.arange(start, stop, dtype=np.intp)
3092
3093 for i, k in enumerate(follow_key, len(lead_key)):
3094 mask = self.codes[i][loc] == self._get_loc_single_level_index(
3095 self.levels[i], k
3096 )
3097 if not mask.all():
3098 loc = loc[mask]
3099 if not len(loc):
3100 raise KeyError(key)
3101
3102 return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop)
3103
3104 def get_loc_level(self, key, level: IndexLabel = 0, drop_level: bool = True):
3105 """
3106 Get location and sliced index for requested label(s)/level(s).
3107
3108 Parameters
3109 ----------
3110 key : label or sequence of labels
3111 level : int/level name or list thereof, optional
3112 drop_level : bool, default True
3113 If ``False``, the resulting index will not drop any level.
3114
3115 Returns
3116 -------
3117 tuple
3118 A 2-tuple where the elements :
3119
3120 Element 0: int, slice object or boolean array.
3121
3122 Element 1: The resulting sliced multiindex/index. If the key
3123 contains all levels, this will be ``None``.
3124
3125 See Also
3126 --------
3127 MultiIndex.get_loc : Get location for a label or a tuple of labels.
3128 MultiIndex.get_locs : Get location for a label/slice/list/mask or a
3129 sequence of such.
3130
3131 Examples
3132 --------
3133 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')],
3134 ... names=['A', 'B'])
3135
3136 >>> mi.get_loc_level('b')
3137 (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B'))
3138
3139 >>> mi.get_loc_level('e', level='B')
3140 (array([False, True, False]), Index(['b'], dtype='object', name='A'))
3141
3142 >>> mi.get_loc_level(['b', 'e'])
3143 (1, None)
3144 """
3145 if not isinstance(level, (list, tuple)):
3146 level = self._get_level_number(level)
3147 else:
3148 level = [self._get_level_number(lev) for lev in level]
3149
3150 loc, mi = self._get_loc_level(key, level=level)
3151 if not drop_level:
3152 if lib.is_integer(loc):
3153 # Slice index must be an integer or None
3154 mi = self[loc : loc + 1]
3155 else:
3156 mi = self[loc]
3157 return loc, mi
3158
3159 def _get_loc_level(self, key, level: int | list[int] = 0):
3160 """
3161 get_loc_level but with `level` known to be positional, not name-based.
3162 """
3163
3164 # different name to distinguish from maybe_droplevels
3165 def maybe_mi_droplevels(indexer, levels):
3166 """
3167 If level does not exist or all levels were dropped, the exception
3168 has to be handled outside.
3169 """
3170 new_index = self[indexer]
3171
3172 for i in sorted(levels, reverse=True):
3173 new_index = new_index._drop_level_numbers([i])
3174
3175 return new_index
3176
3177 if isinstance(level, (tuple, list)):
3178 if len(key) != len(level):
3179 raise AssertionError(
3180 "Key for location must have same length as number of levels"
3181 )
3182 result = None
3183 for lev, k in zip(level, key):
3184 loc, new_index = self._get_loc_level(k, level=lev)
3185 if isinstance(loc, slice):
3186 mask = np.zeros(len(self), dtype=bool)
3187 mask[loc] = True
3188 loc = mask
3189 result = loc if result is None else result & loc
3190
3191 try:
3192 # FIXME: we should be only dropping levels on which we are
3193 # scalar-indexing
3194 mi = maybe_mi_droplevels(result, level)
3195 except ValueError:
3196 # droplevel failed because we tried to drop all levels,
3197 # i.e. len(level) == self.nlevels
3198 mi = self[result]
3199
3200 return result, mi
3201
3202 # kludge for #1796
3203 if isinstance(key, list):
3204 key = tuple(key)
3205
3206 if isinstance(key, tuple) and level == 0:
3207 try:
3208 # Check if this tuple is a single key in our first level
3209 if key in self.levels[0]:
3210 indexer = self._get_level_indexer(key, level=level)
3211 new_index = maybe_mi_droplevels(indexer, [0])
3212 return indexer, new_index
3213 except (TypeError, InvalidIndexError):
3214 pass
3215
3216 if not any(isinstance(k, slice) for k in key):
3217 if len(key) == self.nlevels and self.is_unique:
3218 # Complete key in unique index -> standard get_loc
3219 try:
3220 return (self._engine.get_loc(key), None)
3221 except KeyError as err:
3222 raise KeyError(key) from err
3223 except TypeError:
3224 # e.g. partial string indexing
3225 # test_partial_string_timestamp_multiindex
3226 pass
3227
3228 # partial selection
3229 indexer = self.get_loc(key)
3230 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)]
3231 if len(ilevels) == self.nlevels:
3232 if is_integer(indexer):
3233 # we are dropping all levels
3234 return indexer, None
3235
3236 # TODO: in some cases we still need to drop some levels,
3237 # e.g. test_multiindex_perf_warn
3238 # test_partial_string_timestamp_multiindex
3239 ilevels = [
3240 i
3241 for i in range(len(key))
3242 if (
3243 not isinstance(key[i], str)
3244 or not self.levels[i]._supports_partial_string_indexing
3245 )
3246 and key[i] != slice(None, None)
3247 ]
3248 if len(ilevels) == self.nlevels:
3249 # TODO: why?
3250 ilevels = []
3251 return indexer, maybe_mi_droplevels(indexer, ilevels)
3252
3253 else:
3254 indexer = None
3255 for i, k in enumerate(key):
3256 if not isinstance(k, slice):
3257 loc_level = self._get_level_indexer(k, level=i)
3258 if isinstance(loc_level, slice):
3259 if com.is_null_slice(loc_level) or com.is_full_slice(
3260 loc_level, len(self)
3261 ):
3262 # everything
3263 continue
3264
3265 # e.g. test_xs_IndexSlice_argument_not_implemented
3266 k_index = np.zeros(len(self), dtype=bool)
3267 k_index[loc_level] = True
3268
3269 else:
3270 k_index = loc_level
3271
3272 elif com.is_null_slice(k):
3273 # taking everything, does not affect `indexer` below
3274 continue
3275
3276 else:
3277 # FIXME: this message can be inaccurate, e.g.
3278 # test_series_varied_multiindex_alignment
3279 raise TypeError(f"Expected label or tuple of labels, got {key}")
3280
3281 if indexer is None:
3282 indexer = k_index
3283 else:
3284 indexer &= k_index
3285 if indexer is None:
3286 indexer = slice(None, None)
3287 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)]
3288 return indexer, maybe_mi_droplevels(indexer, ilevels)
3289 else:
3290 indexer = self._get_level_indexer(key, level=level)
3291 if (
3292 isinstance(key, str)
3293 and self.levels[level]._supports_partial_string_indexing
3294 ):
3295 # check to see if we did an exact lookup vs sliced
3296 check = self.levels[level].get_loc(key)
3297 if not is_integer(check):
3298 # e.g. test_partial_string_timestamp_multiindex
3299 return indexer, self[indexer]
3300
3301 try:
3302 result_index = maybe_mi_droplevels(indexer, [level])
3303 except ValueError:
3304 result_index = self[indexer]
3305
3306 return indexer, result_index
3307
3308 def _get_level_indexer(
3309 self, key, level: int = 0, indexer: npt.NDArray[np.bool_] | None = None
3310 ):
3311 # `level` kwarg is _always_ positional, never name
3312 # return a boolean array or slice showing where the key is
3313 # in the totality of values
3314 # if the indexer is provided, then use this
3315
3316 level_index = self.levels[level]
3317 level_codes = self.codes[level]
3318
3319 def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
3320 # Compute a bool indexer to identify the positions to take.
3321 # If we have an existing indexer, we only need to examine the
3322 # subset of positions where the existing indexer is True.
3323 if indexer is not None:
3324 # we only need to look at the subset of codes where the
3325 # existing indexer equals True
3326 codes = codes[indexer]
3327
3328 if step is None or step == 1:
3329 new_indexer = (codes >= start) & (codes < stop)
3330 else:
3331 r = np.arange(start, stop, step, dtype=codes.dtype)
3332 new_indexer = algos.isin(codes, r)
3333
3334 if indexer is None:
3335 return new_indexer
3336
3337 indexer = indexer.copy()
3338 indexer[indexer] = new_indexer
3339 return indexer
3340
3341 if isinstance(key, slice):
3342 # handle a slice, returning a slice if we can
3343 # otherwise a boolean indexer
3344 step = key.step
3345 is_negative_step = step is not None and step < 0
3346
3347 try:
3348 if key.start is not None:
3349 start = level_index.get_loc(key.start)
3350 elif is_negative_step:
3351 start = len(level_index) - 1
3352 else:
3353 start = 0
3354
3355 if key.stop is not None:
3356 stop = level_index.get_loc(key.stop)
3357 elif is_negative_step:
3358 stop = 0
3359 elif isinstance(start, slice):
3360 stop = len(level_index)
3361 else:
3362 stop = len(level_index) - 1
3363 except KeyError:
3364 # we have a partial slice (like looking up a partial date
3365 # string)
3366 start = stop = level_index.slice_indexer(key.start, key.stop, key.step)
3367 step = start.step
3368
3369 if isinstance(start, slice) or isinstance(stop, slice):
3370 # we have a slice for start and/or stop
3371 # a partial date slicer on a DatetimeIndex generates a slice
3372 # note that the stop ALREADY includes the stopped point (if
3373 # it was a string sliced)
3374 start = getattr(start, "start", start)
3375 stop = getattr(stop, "stop", stop)
3376 return convert_indexer(start, stop, step)
3377
3378 elif level > 0 or self._lexsort_depth == 0 or step is not None:
3379 # need to have like semantics here to right
3380 # searching as when we are using a slice
3381 # so adjust the stop by 1 (so we include stop)
3382 stop = (stop - 1) if is_negative_step else (stop + 1)
3383 return convert_indexer(start, stop, step)
3384 else:
3385 # sorted, so can return slice object -> view
3386 i = algos.searchsorted(level_codes, start, side="left")
3387 j = algos.searchsorted(level_codes, stop, side="right")
3388 return slice(i, j, step)
3389
3390 else:
3391 idx = self._get_loc_single_level_index(level_index, key)
3392
3393 if level > 0 or self._lexsort_depth == 0:
3394 # Desired level is not sorted
3395 if isinstance(idx, slice):
3396 # test_get_loc_partial_timestamp_multiindex
3397 locs = (level_codes >= idx.start) & (level_codes < idx.stop)
3398 return locs
3399
3400 locs = np.asarray(level_codes == idx, dtype=bool)
3401
3402 if not locs.any():
3403 # The label is present in self.levels[level] but unused:
3404 raise KeyError(key)
3405 return locs
3406
3407 if isinstance(idx, slice):
3408 # e.g. test_partial_string_timestamp_multiindex
3409 start = algos.searchsorted(level_codes, idx.start, side="left")
3410 # NB: "left" here bc of slice semantics
3411 end = algos.searchsorted(level_codes, idx.stop, side="left")
3412 else:
3413 start = algos.searchsorted(level_codes, idx, side="left")
3414 end = algos.searchsorted(level_codes, idx, side="right")
3415
3416 if start == end:
3417 # The label is present in self.levels[level] but unused:
3418 raise KeyError(key)
3419 return slice(start, end)
3420
3421 def get_locs(self, seq) -> npt.NDArray[np.intp]:
3422 """
3423 Get location for a sequence of labels.
3424
3425 Parameters
3426 ----------
3427 seq : label, slice, list, mask or a sequence of such
3428 You should use one of the above for each level.
3429 If a level should not be used, set it to ``slice(None)``.
3430
3431 Returns
3432 -------
3433 numpy.ndarray
3434 NumPy array of integers suitable for passing to iloc.
3435
3436 See Also
3437 --------
3438 MultiIndex.get_loc : Get location for a label or a tuple of labels.
3439 MultiIndex.slice_locs : Get slice location given start label(s) and
3440 end label(s).
3441
3442 Examples
3443 --------
3444 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])
3445
3446 >>> mi.get_locs('b') # doctest: +SKIP
3447 array([1, 2], dtype=int64)
3448
3449 >>> mi.get_locs([slice(None), ['e', 'f']]) # doctest: +SKIP
3450 array([1, 2], dtype=int64)
3451
3452 >>> mi.get_locs([[True, False, True], slice('e', 'f')]) # doctest: +SKIP
3453 array([2], dtype=int64)
3454 """
3455
3456 # must be lexsorted to at least as many levels
3457 true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s]
3458 if true_slices and true_slices[-1] >= self._lexsort_depth:
3459 raise UnsortedIndexError(
3460 "MultiIndex slicing requires the index to be lexsorted: slicing "
3461 f"on levels {true_slices}, lexsort depth {self._lexsort_depth}"
3462 )
3463
3464 if any(x is Ellipsis for x in seq):
3465 raise NotImplementedError(
3466 "MultiIndex does not support indexing with Ellipsis"
3467 )
3468
3469 n = len(self)
3470
3471 def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]:
3472 if isinstance(indexer, slice):
3473 new_indexer = np.zeros(n, dtype=np.bool_)
3474 new_indexer[indexer] = True
3475 return new_indexer
3476 return indexer
3477
3478 # a bool indexer for the positions we want to take
3479 indexer: npt.NDArray[np.bool_] | None = None
3480
3481 for i, k in enumerate(seq):
3482 lvl_indexer: npt.NDArray[np.bool_] | slice | None = None
3483
3484 if com.is_bool_indexer(k):
3485 if len(k) != n:
3486 raise ValueError(
3487 "cannot index with a boolean indexer that "
3488 "is not the same length as the index"
3489 )
3490 lvl_indexer = np.asarray(k)
3491 if indexer is None:
3492 lvl_indexer = lvl_indexer.copy()
3493
3494 elif is_list_like(k):
3495 # a collection of labels to include from this level (these are or'd)
3496
3497 # GH#27591 check if this is a single tuple key in the level
3498 try:
3499 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer)
3500 except (InvalidIndexError, TypeError, KeyError) as err:
3501 # InvalidIndexError e.g. non-hashable, fall back to treating
3502 # this as a sequence of labels
3503 # KeyError it can be ambiguous if this is a label or sequence
3504 # of labels
3505 # github.com/pandas-dev/pandas/issues/39424#issuecomment-871626708
3506 for x in k:
3507 if not is_hashable(x):
3508 # e.g. slice
3509 raise err
3510 # GH 39424: Ignore not founds
3511 # GH 42351: No longer ignore not founds & enforced in 2.0
3512 # TODO: how to handle IntervalIndex level? (no test cases)
3513 item_indexer = self._get_level_indexer(
3514 x, level=i, indexer=indexer
3515 )
3516 if lvl_indexer is None:
3517 lvl_indexer = _to_bool_indexer(item_indexer)
3518 elif isinstance(item_indexer, slice):
3519 lvl_indexer[item_indexer] = True # type: ignore[index]
3520 else:
3521 lvl_indexer |= item_indexer
3522
3523 if lvl_indexer is None:
3524 # no matches we are done
3525 # test_loc_getitem_duplicates_multiindex_empty_indexer
3526 return np.array([], dtype=np.intp)
3527
3528 elif com.is_null_slice(k):
3529 # empty slice
3530 if indexer is None and i == len(seq) - 1:
3531 return np.arange(n, dtype=np.intp)
3532 continue
3533
3534 else:
3535 # a slice or a single label
3536 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer)
3537
3538 # update indexer
3539 lvl_indexer = _to_bool_indexer(lvl_indexer)
3540 if indexer is None:
3541 indexer = lvl_indexer
3542 else:
3543 indexer &= lvl_indexer
3544 if not np.any(indexer) and np.any(lvl_indexer):
3545 raise KeyError(seq)
3546
3547 # empty indexer
3548 if indexer is None:
3549 return np.array([], dtype=np.intp)
3550
3551 pos_indexer = indexer.nonzero()[0]
3552 return self._reorder_indexer(seq, pos_indexer)
3553
3554 # --------------------------------------------------------------------
3555
3556 def _reorder_indexer(
3557 self,
3558 seq: tuple[Scalar | Iterable | AnyArrayLike, ...],
3559 indexer: npt.NDArray[np.intp],
3560 ) -> npt.NDArray[np.intp]:
3561 """
3562 Reorder an indexer of a MultiIndex (self) so that the labels are in the
3563 same order as given in seq
3564
3565 Parameters
3566 ----------
3567 seq : label/slice/list/mask or a sequence of such
3568 indexer: a position indexer of self
3569
3570 Returns
3571 -------
3572 indexer : a sorted position indexer of self ordered as seq
3573 """
3574
3575 # check if sorting is necessary
3576 need_sort = False
3577 for i, k in enumerate(seq):
3578 if com.is_null_slice(k) or com.is_bool_indexer(k) or is_scalar(k):
3579 pass
3580 elif is_list_like(k):
3581 if len(k) <= 1: # type: ignore[arg-type]
3582 pass
3583 elif self._is_lexsorted():
3584 # If the index is lexsorted and the list_like label
3585 # in seq are sorted then we do not need to sort
3586 k_codes = self.levels[i].get_indexer(k)
3587 k_codes = k_codes[k_codes >= 0] # Filter absent keys
3588 # True if the given codes are not ordered
3589 need_sort = (k_codes[:-1] > k_codes[1:]).any()
3590 else:
3591 need_sort = True
3592 elif isinstance(k, slice):
3593 if self._is_lexsorted():
3594 need_sort = k.step is not None and k.step < 0
3595 else:
3596 need_sort = True
3597 else:
3598 need_sort = True
3599 if need_sort:
3600 break
3601 if not need_sort:
3602 return indexer
3603
3604 n = len(self)
3605 keys: tuple[np.ndarray, ...] = ()
3606 # For each level of the sequence in seq, map the level codes with the
3607 # order they appears in a list-like sequence
3608 # This mapping is then use to reorder the indexer
3609 for i, k in enumerate(seq):
3610 if is_scalar(k):
3611 # GH#34603 we want to treat a scalar the same as an all equal list
3612 k = [k]
3613 if com.is_bool_indexer(k):
3614 new_order = np.arange(n)[indexer]
3615 elif is_list_like(k):
3616 # Generate a map with all level codes as sorted initially
3617 if not isinstance(k, (np.ndarray, ExtensionArray, Index, ABCSeries)):
3618 k = sanitize_array(k, None)
3619 k = algos.unique(k)
3620 key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len(
3621 self.levels[i]
3622 )
3623 # Set order as given in the indexer list
3624 level_indexer = self.levels[i].get_indexer(k)
3625 level_indexer = level_indexer[level_indexer >= 0] # Filter absent keys
3626 key_order_map[level_indexer] = np.arange(len(level_indexer))
3627
3628 new_order = key_order_map[self.codes[i][indexer]]
3629 elif isinstance(k, slice) and k.step is not None and k.step < 0:
3630 # flip order for negative step
3631 new_order = np.arange(n)[::-1][indexer]
3632 elif isinstance(k, slice) and k.start is None and k.stop is None:
3633 # slice(None) should not determine order GH#31330
3634 new_order = np.ones((n,), dtype=np.intp)[indexer]
3635 else:
3636 # For all other case, use the same order as the level
3637 new_order = np.arange(n)[indexer]
3638 keys = (new_order,) + keys
3639
3640 # Find the reordering using lexsort on the keys mapping
3641 ind = np.lexsort(keys)
3642 return indexer[ind]
3643
3644 def truncate(self, before=None, after=None) -> MultiIndex:
3645 """
3646 Slice index between two labels / tuples, return new MultiIndex.
3647
3648 Parameters
3649 ----------
3650 before : label or tuple, can be partial. Default None
3651 None defaults to start.
3652 after : label or tuple, can be partial. Default None
3653 None defaults to end.
3654
3655 Returns
3656 -------
3657 MultiIndex
3658 The truncated MultiIndex.
3659
3660 Examples
3661 --------
3662 >>> mi = pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['x', 'y', 'z']])
3663 >>> mi
3664 MultiIndex([('a', 'x'), ('b', 'y'), ('c', 'z')],
3665 )
3666 >>> mi.truncate(before='a', after='b')
3667 MultiIndex([('a', 'x'), ('b', 'y')],
3668 )
3669 """
3670 if after and before and after < before:
3671 raise ValueError("after < before")
3672
3673 i, j = self.levels[0].slice_locs(before, after)
3674 left, right = self.slice_locs(before, after)
3675
3676 new_levels = list(self.levels)
3677 new_levels[0] = new_levels[0][i:j]
3678
3679 new_codes = [level_codes[left:right] for level_codes in self.codes]
3680 new_codes[0] = new_codes[0] - i
3681
3682 return MultiIndex(
3683 levels=new_levels,
3684 codes=new_codes,
3685 names=self._names,
3686 verify_integrity=False,
3687 )
3688
3689 def equals(self, other: object) -> bool:
3690 """
3691 Determines if two MultiIndex objects have the same labeling information
3692 (the levels themselves do not necessarily have to be the same)
3693
3694 See Also
3695 --------
3696 equal_levels
3697 """
3698 if self.is_(other):
3699 return True
3700
3701 if not isinstance(other, Index):
3702 return False
3703
3704 if len(self) != len(other):
3705 return False
3706
3707 if not isinstance(other, MultiIndex):
3708 # d-level MultiIndex can equal d-tuple Index
3709 if not self._should_compare(other):
3710 # object Index or Categorical[object] may contain tuples
3711 return False
3712 return array_equivalent(self._values, other._values)
3713
3714 if self.nlevels != other.nlevels:
3715 return False
3716
3717 for i in range(self.nlevels):
3718 self_codes = self.codes[i]
3719 other_codes = other.codes[i]
3720 self_mask = self_codes == -1
3721 other_mask = other_codes == -1
3722 if not np.array_equal(self_mask, other_mask):
3723 return False
3724 self_codes = self_codes[~self_mask]
3725 self_values = self.levels[i]._values.take(self_codes)
3726
3727 other_codes = other_codes[~other_mask]
3728 other_values = other.levels[i]._values.take(other_codes)
3729
3730 # since we use NaT both datetime64 and timedelta64 we can have a
3731 # situation where a level is typed say timedelta64 in self (IOW it
3732 # has other values than NaT) but types datetime64 in other (where
3733 # its all NaT) but these are equivalent
3734 if len(self_values) == 0 and len(other_values) == 0:
3735 continue
3736
3737 if not isinstance(self_values, np.ndarray):
3738 # i.e. ExtensionArray
3739 if not self_values.equals(other_values):
3740 return False
3741 elif not isinstance(other_values, np.ndarray):
3742 # i.e. other is ExtensionArray
3743 if not other_values.equals(self_values):
3744 return False
3745 else:
3746 if not array_equivalent(self_values, other_values):
3747 return False
3748
3749 return True
3750
3751 def equal_levels(self, other: MultiIndex) -> bool:
3752 """
3753 Return True if the levels of both MultiIndex objects are the same
3754
3755 """
3756 if self.nlevels != other.nlevels:
3757 return False
3758
3759 for i in range(self.nlevels):
3760 if not self.levels[i].equals(other.levels[i]):
3761 return False
3762 return True
3763
3764 # --------------------------------------------------------------------
3765 # Set Methods
3766
3767 def _union(self, other, sort) -> MultiIndex:
3768 other, result_names = self._convert_can_do_setop(other)
3769 if other.has_duplicates:
3770 # This is only necessary if other has dupes,
3771 # otherwise difference is faster
3772 result = super()._union(other, sort)
3773
3774 if isinstance(result, MultiIndex):
3775 return result
3776 return MultiIndex.from_arrays(
3777 zip(*result), sortorder=None, names=result_names
3778 )
3779
3780 else:
3781 right_missing = other.difference(self, sort=False)
3782 if len(right_missing):
3783 result = self.append(right_missing)
3784 else:
3785 result = self._get_reconciled_name_object(other)
3786
3787 if sort is not False:
3788 try:
3789 result = result.sort_values()
3790 except TypeError:
3791 if sort is True:
3792 raise
3793 warnings.warn(
3794 "The values in the array are unorderable. "
3795 "Pass `sort=False` to suppress this warning.",
3796 RuntimeWarning,
3797 stacklevel=find_stack_level(),
3798 )
3799 return result
3800
3801 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
3802 return is_object_dtype(dtype)
3803
3804 def _get_reconciled_name_object(self, other) -> MultiIndex:
3805 """
3806 If the result of a set operation will be self,
3807 return self, unless the names change, in which
3808 case make a shallow copy of self.
3809 """
3810 names = self._maybe_match_names(other)
3811 if self.names != names:
3812 # error: Cannot determine type of "rename"
3813 return self.rename(names) # type: ignore[has-type]
3814 return self
3815
3816 def _maybe_match_names(self, other):
3817 """
3818 Try to find common names to attach to the result of an operation between
3819 a and b. Return a consensus list of names if they match at least partly
3820 or list of None if they have completely different names.
3821 """
3822 if len(self.names) != len(other.names):
3823 return [None] * len(self.names)
3824 names = []
3825 for a_name, b_name in zip(self.names, other.names):
3826 if a_name == b_name:
3827 names.append(a_name)
3828 else:
3829 # TODO: what if they both have np.nan for their names?
3830 names.append(None)
3831 return names
3832
3833 def _wrap_intersection_result(self, other, result) -> MultiIndex:
3834 _, result_names = self._convert_can_do_setop(other)
3835 return result.set_names(result_names)
3836
3837 def _wrap_difference_result(self, other, result: MultiIndex) -> MultiIndex:
3838 _, result_names = self._convert_can_do_setop(other)
3839
3840 if len(result) == 0:
3841 return result.remove_unused_levels().set_names(result_names)
3842 else:
3843 return result.set_names(result_names)
3844
3845 def _convert_can_do_setop(self, other):
3846 result_names = self.names
3847
3848 if not isinstance(other, Index):
3849 if len(other) == 0:
3850 return self[:0], self.names
3851 else:
3852 msg = "other must be a MultiIndex or a list of tuples"
3853 try:
3854 other = MultiIndex.from_tuples(other, names=self.names)
3855 except (ValueError, TypeError) as err:
3856 # ValueError raised by tuples_to_object_array if we
3857 # have non-object dtype
3858 raise TypeError(msg) from err
3859 else:
3860 result_names = get_unanimous_names(self, other)
3861
3862 return other, result_names
3863
3864 # --------------------------------------------------------------------
3865
3866 @doc(Index.astype)
3867 def astype(self, dtype, copy: bool = True):
3868 dtype = pandas_dtype(dtype)
3869 if isinstance(dtype, CategoricalDtype):
3870 msg = "> 1 ndim Categorical are not supported at this time"
3871 raise NotImplementedError(msg)
3872 if not is_object_dtype(dtype):
3873 raise TypeError(
3874 "Setting a MultiIndex dtype to anything other than object "
3875 "is not supported"
3876 )
3877 if copy is True:
3878 return self._view()
3879 return self
3880
3881 def _validate_fill_value(self, item):
3882 if isinstance(item, MultiIndex):
3883 # GH#43212
3884 if item.nlevels != self.nlevels:
3885 raise ValueError("Item must have length equal to number of levels.")
3886 return item._values
3887 elif not isinstance(item, tuple):
3888 # Pad the key with empty strings if lower levels of the key
3889 # aren't specified:
3890 item = (item,) + ("",) * (self.nlevels - 1)
3891 elif len(item) != self.nlevels:
3892 raise ValueError("Item must have length equal to number of levels.")
3893 return item
3894
3895 def putmask(self, mask, value: MultiIndex) -> MultiIndex:
3896 """
3897 Return a new MultiIndex of the values set with the mask.
3898
3899 Parameters
3900 ----------
3901 mask : array like
3902 value : MultiIndex
3903 Must either be the same length as self or length one
3904
3905 Returns
3906 -------
3907 MultiIndex
3908 """
3909 mask, noop = validate_putmask(self, mask)
3910 if noop:
3911 return self.copy()
3912
3913 if len(mask) == len(value):
3914 subset = value[mask].remove_unused_levels()
3915 else:
3916 subset = value.remove_unused_levels()
3917
3918 new_levels = []
3919 new_codes = []
3920
3921 for i, (value_level, level, level_codes) in enumerate(
3922 zip(subset.levels, self.levels, self.codes)
3923 ):
3924 new_level = level.union(value_level, sort=False)
3925 value_codes = new_level.get_indexer_for(subset.get_level_values(i))
3926 new_code = ensure_int64(level_codes)
3927 new_code[mask] = value_codes
3928 new_levels.append(new_level)
3929 new_codes.append(new_code)
3930
3931 return MultiIndex(
3932 levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False
3933 )
3934
3935 def insert(self, loc: int, item) -> MultiIndex:
3936 """
3937 Make new MultiIndex inserting new item at location
3938
3939 Parameters
3940 ----------
3941 loc : int
3942 item : tuple
3943 Must be same length as number of levels in the MultiIndex
3944
3945 Returns
3946 -------
3947 new_index : Index
3948 """
3949 item = self._validate_fill_value(item)
3950
3951 new_levels = []
3952 new_codes = []
3953 for k, level, level_codes in zip(item, self.levels, self.codes):
3954 if k not in level:
3955 # have to insert into level
3956 # must insert at end otherwise you have to recompute all the
3957 # other codes
3958 lev_loc = len(level)
3959 level = level.insert(lev_loc, k)
3960 else:
3961 lev_loc = level.get_loc(k)
3962
3963 new_levels.append(level)
3964 new_codes.append(np.insert(ensure_int64(level_codes), loc, lev_loc))
3965
3966 return MultiIndex(
3967 levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False
3968 )
3969
3970 def delete(self, loc) -> MultiIndex:
3971 """
3972 Make new index with passed location deleted
3973
3974 Returns
3975 -------
3976 new_index : MultiIndex
3977 """
3978 new_codes = [np.delete(level_codes, loc) for level_codes in self.codes]
3979 return MultiIndex(
3980 levels=self.levels,
3981 codes=new_codes,
3982 names=self.names,
3983 verify_integrity=False,
3984 )
3985
3986 @doc(Index.isin)
3987 def isin(self, values, level=None) -> npt.NDArray[np.bool_]:
3988 if isinstance(values, Generator):
3989 values = list(values)
3990
3991 if level is None:
3992 if len(values) == 0:
3993 return np.zeros((len(self),), dtype=np.bool_)
3994 if not isinstance(values, MultiIndex):
3995 values = MultiIndex.from_tuples(values)
3996 return values.unique().get_indexer_for(self) != -1
3997 else:
3998 num = self._get_level_number(level)
3999 levs = self.get_level_values(num)
4000
4001 if levs.size == 0:
4002 return np.zeros(len(levs), dtype=np.bool_)
4003 return levs.isin(values)
4004
4005 # error: Incompatible types in assignment (expression has type overloaded function,
4006 # base class "Index" defined the type as "Callable[[Index, Any, bool], Any]")
4007 rename = Index.set_names # type: ignore[assignment]
4008
4009 # ---------------------------------------------------------------
4010 # Arithmetic/Numeric Methods - Disabled
4011
4012 __add__ = make_invalid_op("__add__")
4013 __radd__ = make_invalid_op("__radd__")
4014 __iadd__ = make_invalid_op("__iadd__")
4015 __sub__ = make_invalid_op("__sub__")
4016 __rsub__ = make_invalid_op("__rsub__")
4017 __isub__ = make_invalid_op("__isub__")
4018 __pow__ = make_invalid_op("__pow__")
4019 __rpow__ = make_invalid_op("__rpow__")
4020 __mul__ = make_invalid_op("__mul__")
4021 __rmul__ = make_invalid_op("__rmul__")
4022 __floordiv__ = make_invalid_op("__floordiv__")
4023 __rfloordiv__ = make_invalid_op("__rfloordiv__")
4024 __truediv__ = make_invalid_op("__truediv__")
4025 __rtruediv__ = make_invalid_op("__rtruediv__")
4026 __mod__ = make_invalid_op("__mod__")
4027 __rmod__ = make_invalid_op("__rmod__")
4028 __divmod__ = make_invalid_op("__divmod__")
4029 __rdivmod__ = make_invalid_op("__rdivmod__")
4030 # Unary methods disabled
4031 __neg__ = make_invalid_op("__neg__")
4032 __pos__ = make_invalid_op("__pos__")
4033 __abs__ = make_invalid_op("__abs__")
4034 __invert__ = make_invalid_op("__invert__")
4035
4036
4037def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int:
4038 """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted."""
4039 int64_codes = [ensure_int64(level_codes) for level_codes in codes]
4040 for k in range(nlevels, 0, -1):
4041 if libalgos.is_lexsorted(int64_codes[:k]):
4042 return k
4043 return 0
4044
4045
4046def sparsify_labels(label_list, start: int = 0, sentinel: object = ""):
4047 pivoted = list(zip(*label_list))
4048 k = len(label_list)
4049
4050 result = pivoted[: start + 1]
4051 prev = pivoted[start]
4052
4053 for cur in pivoted[start + 1 :]:
4054 sparse_cur = []
4055
4056 for i, (p, t) in enumerate(zip(prev, cur)):
4057 if i == k - 1:
4058 sparse_cur.append(t)
4059 # error: Argument 1 to "append" of "list" has incompatible
4060 # type "list[Any]"; expected "tuple[Any, ...]"
4061 result.append(sparse_cur) # type: ignore[arg-type]
4062 break
4063
4064 if p == t:
4065 sparse_cur.append(sentinel)
4066 else:
4067 sparse_cur.extend(cur[i:])
4068 # error: Argument 1 to "append" of "list" has incompatible
4069 # type "list[Any]"; expected "tuple[Any, ...]"
4070 result.append(sparse_cur) # type: ignore[arg-type]
4071 break
4072
4073 prev = cur
4074
4075 return list(zip(*result))
4076
4077
4078def _get_na_rep(dtype: DtypeObj) -> str:
4079 if isinstance(dtype, ExtensionDtype):
4080 return f"{dtype.na_value}"
4081 else:
4082 dtype_type = dtype.type
4083
4084 return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype_type, "NaN")
4085
4086
4087def maybe_droplevels(index: Index, key) -> Index:
4088 """
4089 Attempt to drop level or levels from the given index.
4090
4091 Parameters
4092 ----------
4093 index: Index
4094 key : scalar or tuple
4095
4096 Returns
4097 -------
4098 Index
4099 """
4100 # drop levels
4101 original_index = index
4102 if isinstance(key, tuple):
4103 # Caller is responsible for ensuring the key is not an entry in the first
4104 # level of the MultiIndex.
4105 for _ in key:
4106 try:
4107 index = index._drop_level_numbers([0])
4108 except ValueError:
4109 # we have dropped too much, so back out
4110 return original_index
4111 else:
4112 try:
4113 index = index._drop_level_numbers([0])
4114 except ValueError:
4115 pass
4116
4117 return index
4118
4119
4120def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray:
4121 """
4122 Coerce the array-like indexer to the smallest integer dtype that can encode all
4123 of the given categories.
4124
4125 Parameters
4126 ----------
4127 array_like : array-like
4128 categories : array-like
4129 copy : bool
4130
4131 Returns
4132 -------
4133 np.ndarray
4134 Non-writeable.
4135 """
4136 array_like = coerce_indexer_dtype(array_like, categories)
4137 if copy:
4138 array_like = array_like.copy()
4139 array_like.flags.writeable = False
4140 return array_like
4141
4142
4143def _require_listlike(level, arr, arrname: str):
4144 """
4145 Ensure that level is either None or listlike, and arr is list-of-listlike.
4146 """
4147 if level is not None and not is_list_like(level):
4148 if not is_list_like(arr):
4149 raise TypeError(f"{arrname} must be list-like")
4150 if len(arr) > 0 and is_list_like(arr[0]):
4151 raise TypeError(f"{arrname} must be list-like")
4152 level = [level]
4153 arr = [arr]
4154 elif level is None or is_list_like(level):
4155 if not is_list_like(arr) or not is_list_like(arr[0]):
4156 raise TypeError(f"{arrname} must be list of lists-like")
4157 return level, arr