Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/indexes/multi.py: 24%

1from __future__ import annotations

3from collections.abc import (

4 Collection,

5 Generator,

6 Hashable,

7 Iterable,

8 Sequence,

10from functools import wraps

11from sys import getsizeof

12from typing import (

13 TYPE_CHECKING,

14 Any,

15 Callable,

16 Literal,

17 cast,

18)

19import warnings

21import numpy as np

23from pandas._config import get_option

25from pandas._libs import (

26 algos as libalgos,

27 index as libindex,

28 lib,

29)

30from pandas._libs.hashtable import duplicated

31from pandas._typing import (

32 AnyAll,

33 AnyArrayLike,

34 Axis,

35 DropKeep,

36 DtypeObj,

37 F,

38 IgnoreRaise,

39 IndexLabel,

40 Scalar,

41 Self,

42 Shape,

43 npt,

44)

45from pandas.compat.numpy import function as nv

46from pandas.errors import (

47 InvalidIndexError,

48 PerformanceWarning,

49 UnsortedIndexError,

50)

51from pandas.util._decorators import (

52 Appender,

53 cache_readonly,

54 doc,

55)

56from pandas.util._exceptions import find_stack_level

58from pandas.core.dtypes.cast import coerce_indexer_dtype

59from pandas.core.dtypes.common import (

60 ensure_int64,

61 ensure_platform_int,

62 is_hashable,

63 is_integer,

64 is_iterator,

65 is_list_like,

66 is_object_dtype,

67 is_scalar,

68 pandas_dtype,

69)

70from pandas.core.dtypes.dtypes import (

71 CategoricalDtype,

72 ExtensionDtype,

73)

74from pandas.core.dtypes.generic import (

75 ABCDataFrame,

76 ABCSeries,

77)

78from pandas.core.dtypes.inference import is_array_like

79from pandas.core.dtypes.missing import (

80 array_equivalent,

81 isna,

82)

84import pandas.core.algorithms as algos

85from pandas.core.array_algos.putmask import validate_putmask

86from pandas.core.arrays import (

87 Categorical,

88 ExtensionArray,

89)

90from pandas.core.arrays.categorical import (

91 factorize_from_iterables,

92 recode_for_categories,

93)

94import pandas.core.common as com

95from pandas.core.construction import sanitize_array

96import pandas.core.indexes.base as ibase

97from pandas.core.indexes.base import (

98 Index,

99 _index_shared_docs,

100 ensure_index,

101 get_unanimous_names,

102)

103from pandas.core.indexes.frozen import FrozenList

104from pandas.core.ops.invalid import make_invalid_op

105from pandas.core.sorting import (

106 get_group_index,

107 lexsort_indexer,

108)

109

110from pandas.io.formats.printing import (

111 get_adjustment,

112 pprint_thing,

113)

114

115if TYPE_CHECKING:

116 from pandas import (

117 CategoricalIndex,

118 DataFrame,

119 Series,

120 )

121

122_index_doc_kwargs = dict(ibase._index_doc_kwargs)

123_index_doc_kwargs.update(

124 {"klass": "MultiIndex", "target_klass": "MultiIndex or list of tuples"}

125)

126

127

128class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine):

129 """

130 This class manages a MultiIndex by mapping label combinations to positive

131 integers.

132 """

133

134 _base = libindex.UInt64Engine

135

136 def _codes_to_ints(self, codes):

137 """

138 Transform combination(s) of uint64 in one uint64 (each), in a strictly

139 monotonic way (i.e. respecting the lexicographic order of integer

140 combinations): see BaseMultiIndexCodesEngine documentation.

141

142 Parameters

143 ----------

144 codes : 1- or 2-dimensional array of dtype uint64

145 Combinations of integers (one per row)

146

147 Returns

148 -------

149 scalar or 1-dimensional array, of dtype uint64

150 Integer(s) representing one combination (each).

151 """

152 # Shift the representation of each level by the pre-calculated number

153 # of bits:

154 codes <<= self.offsets

155

156 # Now sum and OR are in fact interchangeable. This is a simple

157 # composition of the (disjunct) significant bits of each level (i.e.

158 # each column in "codes") in a single positive integer:

159 if codes.ndim == 1:

160 # Single key

161 return np.bitwise_or.reduce(codes)

162

163 # Multiple keys

164 return np.bitwise_or.reduce(codes, axis=1)

165

166

167class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine):

168 """

169 This class manages those (extreme) cases in which the number of possible

170 label combinations overflows the 64 bits integers, and uses an ObjectEngine

171 containing Python integers.

172 """

173

174 _base = libindex.ObjectEngine

175

176 def _codes_to_ints(self, codes):

177 """

178 Transform combination(s) of uint64 in one Python integer (each), in a

179 strictly monotonic way (i.e. respecting the lexicographic order of

180 integer combinations): see BaseMultiIndexCodesEngine documentation.

181

182 Parameters

183 ----------

184 codes : 1- or 2-dimensional array of dtype uint64

185 Combinations of integers (one per row)

186

187 Returns

188 -------

189 int, or 1-dimensional array of dtype object

190 Integer(s) representing one combination (each).

191 """

192 # Shift the representation of each level by the pre-calculated number

193 # of bits. Since this can overflow uint64, first make sure we are

194 # working with Python integers:

195 codes = codes.astype("object") << self.offsets

196

197 # Now sum and OR are in fact interchangeable. This is a simple

198 # composition of the (disjunct) significant bits of each level (i.e.

199 # each column in "codes") in a single positive integer (per row):

200 if codes.ndim == 1:

201 # Single key

202 return np.bitwise_or.reduce(codes)

203

204 # Multiple keys

205 return np.bitwise_or.reduce(codes, axis=1)

206

207

208def names_compat(meth: F) -> F:

209 """

210 A decorator to allow either `name` or `names` keyword but not both.

211

212 This makes it easier to share code with base class.

213 """

214

215 @wraps(meth)

216 def new_meth(self_or_cls, *args, **kwargs):

217 if "name" in kwargs and "names" in kwargs:

218 raise TypeError("Can only provide one of `names` and `name`")

219 if "name" in kwargs:

220 kwargs["names"] = kwargs.pop("name")

221

222 return meth(self_or_cls, *args, **kwargs)

223

224 return cast(F, new_meth)

225

226

227class MultiIndex(Index):

228 """

229 A multi-level, or hierarchical, index object for pandas objects.

230

231 Parameters

232 ----------

233 levels : sequence of arrays

234 The unique labels for each level.

235 codes : sequence of arrays

236 Integers for each level designating which label at each location.

237 sortorder : optional int

238 Level of sortedness (must be lexicographically sorted by that

239 level).

240 names : optional sequence of objects

241 Names for each of the index levels. (name is accepted for compat).

242 copy : bool, default False

243 Copy the meta-data.

244 verify_integrity : bool, default True

245 Check that the levels/codes are consistent and valid.

246

247 Attributes

248 ----------

249 names

250 levels

251 codes

252 nlevels

253 levshape

254 dtypes

255

256 Methods

257 -------

258 from_arrays

259 from_tuples

260 from_product

261 from_frame

262 set_levels

263 set_codes

264 to_frame

265 to_flat_index

266 sortlevel

267 droplevel

268 swaplevel

269 reorder_levels

270 remove_unused_levels

271 get_level_values

272 get_indexer

273 get_loc

274 get_locs

275 get_loc_level

276 drop

277

278 See Also

279 --------

280 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.

281 MultiIndex.from_product : Create a MultiIndex from the cartesian product

282 of iterables.

283 MultiIndex.from_tuples : Convert list of tuples to a MultiIndex.

284 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.

285 Index : The base pandas Index type.

286

287 Notes

288 -----

289 See the `user guide

290 <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html>`__

291 for more.

292

293 Examples

294 --------

295 A new ``MultiIndex`` is typically constructed using one of the helper

296 methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product`

297 and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``):

298

299 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]

300 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))

301 MultiIndex([(1, 'red'),

302 (1, 'blue'),

303 (2, 'red'),

304 (2, 'blue')],

305 names=['number', 'color'])

306

307 See further examples for how to construct a MultiIndex in the doc strings

308 of the mentioned helper methods.

309 """

310

311 _hidden_attrs = Index._hidden_attrs | frozenset()

312

313 # initialize to zero-length tuples to make everything work

314 _typ = "multiindex"

315 _names: list[Hashable | None] = []

316 _levels = FrozenList()

317 _codes = FrozenList()

318 _comparables = ["names"]

319

320 sortorder: int | None

321

322 # --------------------------------------------------------------------

323 # Constructors

324

325 def __new__(

326 cls,

327 levels=None,

328 codes=None,

329 sortorder=None,

330 names=None,

331 dtype=None,

332 copy: bool = False,

333 name=None,

334 verify_integrity: bool = True,

335 ) -> Self:

336 # compat with Index

337 if name is not None:

338 names = name

339 if levels is None or codes is None:

340 raise TypeError("Must pass both levels and codes")

341 if len(levels) != len(codes):

342 raise ValueError("Length of levels and codes must be the same.")

343 if len(levels) == 0:

344 raise ValueError("Must pass non-zero number of levels/codes")

345

346 result = object.__new__(cls)

347 result._cache = {}

348

349 # we've already validated levels and codes, so shortcut here

350 result._set_levels(levels, copy=copy, validate=False)

351 result._set_codes(codes, copy=copy, validate=False)

352

353 result._names = [None] * len(levels)

354 if names is not None:

355 # handles name validation

356 result._set_names(names)

357

358 if sortorder is not None:

359 result.sortorder = int(sortorder)

360 else:

361 result.sortorder = sortorder

362

363 if verify_integrity:

364 new_codes = result._verify_integrity()

365 result._codes = new_codes

366

367 result._reset_identity()

368 result._references = None

369

370 return result

371

372 def _validate_codes(self, level: list, code: list):

373 """

374 Reassign code values as -1 if their corresponding levels are NaN.

375

376 Parameters

377 ----------

378 code : list

379 Code to reassign.

380 level : list

381 Level to check for missing values (NaN, NaT, None).

382

383 Returns

384 -------

385 new code where code value = -1 if it corresponds

386 to a level with missing values (NaN, NaT, None).

387 """

388 null_mask = isna(level)

389 if np.any(null_mask):

390 # error: Incompatible types in assignment

391 # (expression has type "ndarray[Any, dtype[Any]]",

392 # variable has type "List[Any]")

393 code = np.where(null_mask[code], -1, code) # type: ignore[assignment]

394 return code

395

396 def _verify_integrity(

397 self,

398 codes: list | None = None,

399 levels: list | None = None,

400 levels_to_verify: list[int] | range | None = None,

401 ):

402 """

403 Parameters

404 ----------

405 codes : optional list

406 Codes to check for validity. Defaults to current codes.

407 levels : optional list

408 Levels to check for validity. Defaults to current levels.

409 levels_to_validate: optional list

410 Specifies the levels to verify.

411

412 Raises

413 ------

414 ValueError

415 If length of levels and codes don't match, if the codes for any

416 level would exceed level bounds, or there are any duplicate levels.

417

418 Returns

419 -------

420 new codes where code value = -1 if it corresponds to a

421 NaN level.

422 """

423 # NOTE: Currently does not check, among other things, that cached

424 # nlevels matches nor that sortorder matches actually sortorder.

425 codes = codes or self.codes

426 levels = levels or self.levels

427 if levels_to_verify is None:

428 levels_to_verify = range(len(levels))

429

430 if len(levels) != len(codes):

431 raise ValueError(

432 "Length of levels and codes must match. NOTE: "

433 "this index is in an inconsistent state."

434 )

435 codes_length = len(codes[0])

436 for i in levels_to_verify:

437 level = levels[i]

438 level_codes = codes[i]

439

440 if len(level_codes) != codes_length:

441 raise ValueError(

442 f"Unequal code lengths: {[len(code_) for code_ in codes]}"

443 )

444 if len(level_codes) and level_codes.max() >= len(level):

445 raise ValueError(

446 f"On level {i}, code max ({level_codes.max()}) >= length of "

447 f"level ({len(level)}). NOTE: this index is in an "

448 "inconsistent state"

449 )

450 if len(level_codes) and level_codes.min() < -1:

451 raise ValueError(f"On level {i}, code value ({level_codes.min()}) < -1")

452 if not level.is_unique:

453 raise ValueError(

454 f"Level values must be unique: {list(level)} on level {i}"

455 )

456 if self.sortorder is not None:

457 if self.sortorder > _lexsort_depth(self.codes, self.nlevels):

458 raise ValueError(

459 "Value for sortorder must be inferior or equal to actual "

460 f"lexsort_depth: sortorder {self.sortorder} "

461 f"with lexsort_depth {_lexsort_depth(self.codes, self.nlevels)}"

462 )

463

464 result_codes = []

465 for i in range(len(levels)):

466 if i in levels_to_verify:

467 result_codes.append(self._validate_codes(levels[i], codes[i]))

468 else:

469 result_codes.append(codes[i])

470

471 new_codes = FrozenList(result_codes)

472 return new_codes

473

474 @classmethod

475 def from_arrays(

476 cls,

477 arrays,

478 sortorder: int | None = None,

479 names: Sequence[Hashable] | Hashable | lib.NoDefault = lib.no_default,

480 ) -> MultiIndex:

481 """

482 Convert arrays to MultiIndex.

483

484 Parameters

485 ----------

486 arrays : list / sequence of array-likes

487 Each array-like gives one level's value for each data point.

488 len(arrays) is the number of levels.

489 sortorder : int or None

490 Level of sortedness (must be lexicographically sorted by that

491 level).

492 names : list / sequence of str, optional

493 Names for the levels in the index.

494

495 Returns

496 -------

497 MultiIndex

498

499 See Also

500 --------

501 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.

502 MultiIndex.from_product : Make a MultiIndex from cartesian product

503 of iterables.

504 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.

505

506 Examples

507 --------

508 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]

509 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))

510 MultiIndex([(1, 'red'),

511 (1, 'blue'),

512 (2, 'red'),

513 (2, 'blue')],

514 names=['number', 'color'])

515 """

516 error_msg = "Input must be a list / sequence of array-likes."

517 if not is_list_like(arrays):

518 raise TypeError(error_msg)

519 if is_iterator(arrays):

520 arrays = list(arrays)

521

522 # Check if elements of array are list-like

523 for array in arrays:

524 if not is_list_like(array):

525 raise TypeError(error_msg)

526

527 # Check if lengths of all arrays are equal or not,

528 # raise ValueError, if not

529 for i in range(1, len(arrays)):

530 if len(arrays[i]) != len(arrays[i - 1]):

531 raise ValueError("all arrays must be same length")

532

533 codes, levels = factorize_from_iterables(arrays)

534 if names is lib.no_default:

535 names = [getattr(arr, "name", None) for arr in arrays]

536

537 return cls(

538 levels=levels,

539 codes=codes,

540 sortorder=sortorder,

541 names=names,

542 verify_integrity=False,

543 )

544

545 @classmethod

546 @names_compat

547 def from_tuples(

548 cls,

549 tuples: Iterable[tuple[Hashable, ...]],

550 sortorder: int | None = None,

551 names: Sequence[Hashable] | Hashable | None = None,

552 ) -> MultiIndex:

553 """

554 Convert list of tuples to MultiIndex.

555

556 Parameters

557 ----------

558 tuples : list / sequence of tuple-likes

559 Each tuple is the index of one row/column.

560 sortorder : int or None

561 Level of sortedness (must be lexicographically sorted by that

562 level).

563 names : list / sequence of str, optional

564 Names for the levels in the index.

565

566 Returns

567 -------

568 MultiIndex

569

570 See Also

571 --------

572 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.

573 MultiIndex.from_product : Make a MultiIndex from cartesian product

574 of iterables.

575 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.

576

577 Examples

578 --------

579 >>> tuples = [(1, 'red'), (1, 'blue'),

580 ... (2, 'red'), (2, 'blue')]

581 >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color'))

582 MultiIndex([(1, 'red'),

583 (1, 'blue'),

584 (2, 'red'),

585 (2, 'blue')],

586 names=['number', 'color'])

587 """

588 if not is_list_like(tuples):

589 raise TypeError("Input must be a list / sequence of tuple-likes.")

590 if is_iterator(tuples):

591 tuples = list(tuples)

592 tuples = cast(Collection[tuple[Hashable, ...]], tuples)

593

594 # handling the empty tuple cases

595 if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples):

596 codes = [np.zeros(len(tuples))]

597 levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))]

598 return cls(

599 levels=levels,

600 codes=codes,

601 sortorder=sortorder,

602 names=names,

603 verify_integrity=False,

604 )

605

606 arrays: list[Sequence[Hashable]]

607 if len(tuples) == 0:

608 if names is None:

609 raise TypeError("Cannot infer number of levels from empty list")

610 # error: Argument 1 to "len" has incompatible type "Hashable";

611 # expected "Sized"

612 arrays = [[]] * len(names) # type: ignore[arg-type]

613 elif isinstance(tuples, (np.ndarray, Index)):

614 if isinstance(tuples, Index):

615 tuples = np.asarray(tuples._values)

616

617 arrays = list(lib.tuples_to_object_array(tuples).T)

618 elif isinstance(tuples, list):

619 arrays = list(lib.to_object_array_tuples(tuples).T)

620 else:

621 arrs = zip(*tuples)

622 arrays = cast(list[Sequence[Hashable]], arrs)

623

624 return cls.from_arrays(arrays, sortorder=sortorder, names=names)

625

626 @classmethod

627 def from_product(

628 cls,

629 iterables: Sequence[Iterable[Hashable]],

630 sortorder: int | None = None,

631 names: Sequence[Hashable] | Hashable | lib.NoDefault = lib.no_default,

632 ) -> MultiIndex:

633 """

634 Make a MultiIndex from the cartesian product of multiple iterables.

635

636 Parameters

637 ----------

638 iterables : list / sequence of iterables

639 Each iterable has unique labels for each level of the index.

640 sortorder : int or None

641 Level of sortedness (must be lexicographically sorted by that

642 level).

643 names : list / sequence of str, optional

644 Names for the levels in the index.

645 If not explicitly provided, names will be inferred from the

646 elements of iterables if an element has a name attribute.

647

648 Returns

649 -------

650 MultiIndex

651

652 See Also

653 --------

654 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.

655 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.

656 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.

657

658 Examples

659 --------

660 >>> numbers = [0, 1, 2]

661 >>> colors = ['green', 'purple']

662 >>> pd.MultiIndex.from_product([numbers, colors],

663 ... names=['number', 'color'])

664 MultiIndex([(0, 'green'),

665 (0, 'purple'),

666 (1, 'green'),

667 (1, 'purple'),

668 (2, 'green'),

669 (2, 'purple')],

670 names=['number', 'color'])

671 """

672 from pandas.core.reshape.util import cartesian_product

673

674 if not is_list_like(iterables):

675 raise TypeError("Input must be a list / sequence of iterables.")

676 if is_iterator(iterables):

677 iterables = list(iterables)

678

679 codes, levels = factorize_from_iterables(iterables)

680 if names is lib.no_default:

681 names = [getattr(it, "name", None) for it in iterables]

682

683 # codes are all ndarrays, so cartesian_product is lossless

684 codes = cartesian_product(codes)

685 return cls(levels, codes, sortorder=sortorder, names=names)

686

687 @classmethod

688 def from_frame(

689 cls,

690 df: DataFrame,

691 sortorder: int | None = None,

692 names: Sequence[Hashable] | Hashable | None = None,

693 ) -> MultiIndex:

694 """

695 Make a MultiIndex from a DataFrame.

696

697 Parameters

698 ----------

699 df : DataFrame

700 DataFrame to be converted to MultiIndex.

701 sortorder : int, optional

702 Level of sortedness (must be lexicographically sorted by that

703 level).

704 names : list-like, optional

705 If no names are provided, use the column names, or tuple of column

706 names if the columns is a MultiIndex. If a sequence, overwrite

707 names with the given sequence.

708

709 Returns

710 -------

711 MultiIndex

712 The MultiIndex representation of the given DataFrame.

713

714 See Also

715 --------

716 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.

717 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.

718 MultiIndex.from_product : Make a MultiIndex from cartesian product

719 of iterables.

720

721 Examples

722 --------

723 >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'],

724 ... ['NJ', 'Temp'], ['NJ', 'Precip']],

725 ... columns=['a', 'b'])

726 >>> df

727 a b

728 0 HI Temp

729 1 HI Precip

730 2 NJ Temp

731 3 NJ Precip

732

733 >>> pd.MultiIndex.from_frame(df)

734 MultiIndex([('HI', 'Temp'),

735 ('HI', 'Precip'),

736 ('NJ', 'Temp'),

737 ('NJ', 'Precip')],

738 names=['a', 'b'])

739

740 Using explicit names, instead of the column names

741

742 >>> pd.MultiIndex.from_frame(df, names=['state', 'observation'])

743 MultiIndex([('HI', 'Temp'),

744 ('HI', 'Precip'),

745 ('NJ', 'Temp'),

746 ('NJ', 'Precip')],

747 names=['state', 'observation'])

748 """

749 if not isinstance(df, ABCDataFrame):

750 raise TypeError("Input must be a DataFrame")

751

752 column_names, columns = zip(*df.items())

753 names = column_names if names is None else names

754 return cls.from_arrays(columns, sortorder=sortorder, names=names)

755

756 # --------------------------------------------------------------------

757

758 @cache_readonly

759 def _values(self) -> np.ndarray:

760 # We override here, since our parent uses _data, which we don't use.

761 values = []

762

763 for i in range(self.nlevels):

764 index = self.levels[i]

765 codes = self.codes[i]

766

767 vals = index

768 if isinstance(vals.dtype, CategoricalDtype):

769 vals = cast("CategoricalIndex", vals)

770 vals = vals._data._internal_get_values()

771

772 if isinstance(vals.dtype, ExtensionDtype) or lib.is_np_dtype(

773 vals.dtype, "mM"

774 ):

775 vals = vals.astype(object)

776

777 vals = np.asarray(vals)

778 vals = algos.take_nd(vals, codes, fill_value=index._na_value)

779 values.append(vals)

780

781 arr = lib.fast_zip(values)

782 return arr

783

784 @property

785 def values(self) -> np.ndarray:

786 return self._values

787

788 @property

789 def array(self):

790 """

791 Raises a ValueError for `MultiIndex` because there's no single

792 array backing a MultiIndex.

793

794 Raises

795 ------

796 ValueError

797 """

798 raise ValueError(

799 "MultiIndex has no single backing array. Use "

800 "'MultiIndex.to_numpy()' to get a NumPy array of tuples."

801 )

802

803 @cache_readonly

804 def dtypes(self) -> Series:

805 """

806 Return the dtypes as a Series for the underlying MultiIndex.

807

808 Examples

809 --------

810 >>> idx = pd.MultiIndex.from_product([(0, 1, 2), ('green', 'purple')],

811 ... names=['number', 'color'])

812 >>> idx

813 MultiIndex([(0, 'green'),

814 (0, 'purple'),

815 (1, 'green'),

816 (1, 'purple'),

817 (2, 'green'),

818 (2, 'purple')],

819 names=['number', 'color'])

820 >>> idx.dtypes

821 number int64

822 color object

823 dtype: object

824 """

825 from pandas import Series

826

827 names = com.fill_missing_names([level.name for level in self.levels])

828 return Series([level.dtype for level in self.levels], index=Index(names))

829

830 def __len__(self) -> int:

831 return len(self.codes[0])

832

833 @property

834 def size(self) -> int:

835 """

836 Return the number of elements in the underlying data.

837 """

838 # override Index.size to avoid materializing _values

839 return len(self)

840

841 # --------------------------------------------------------------------

842 # Levels Methods

843

844 @cache_readonly

845 def levels(self) -> FrozenList:

846 """

847 Levels of the MultiIndex.

848

849 Levels refer to the different hierarchical levels or layers in a MultiIndex.

850 In a MultiIndex, each level represents a distinct dimension or category of

851 the index.

852

853 To access the levels, you can use the levels attribute of the MultiIndex,

854 which returns a tuple of Index objects. Each Index object represents a

855 level in the MultiIndex and contains the unique values found in that

856 specific level.

857

858 If a MultiIndex is created with levels A, B, C, and the DataFrame using

859 it filters out all rows of the level C, MultiIndex.levels will still

860 return A, B, C.

861

862 Examples

863 --------

864 >>> index = pd.MultiIndex.from_product([['mammal'],

865 ... ('goat', 'human', 'cat', 'dog')],

866 ... names=['Category', 'Animals'])

867 >>> leg_num = pd.DataFrame(data=(4, 2, 4, 4), index=index, columns=['Legs'])

868 >>> leg_num

869 Legs

870 Category Animals

871 mammal goat 4

872 human 2

873 cat 4

874 dog 4

875

876 >>> leg_num.index.levels

877 FrozenList([['mammal'], ['cat', 'dog', 'goat', 'human']])

878

879 MultiIndex levels will not change even if the DataFrame using the MultiIndex

880 does not contain all them anymore.

881 See how "human" is not in the DataFrame, but it is still in levels:

882

883 >>> large_leg_num = leg_num[leg_num.Legs > 2]

884 >>> large_leg_num

885 Legs

886 Category Animals

887 mammal goat 4

888 cat 4

889 dog 4

890

891 >>> large_leg_num.index.levels

892 FrozenList([['mammal'], ['cat', 'dog', 'goat', 'human']])

893 """

894 # Use cache_readonly to ensure that self.get_locs doesn't repeatedly

895 # create new IndexEngine

896 # https://github.com/pandas-dev/pandas/issues/31648

897 result = [x._rename(name=name) for x, name in zip(self._levels, self._names)]

898 for level in result:

899 # disallow midx.levels[0].name = "foo"

900 level._no_setting_name = True

901 return FrozenList(result)

902

903 def _set_levels(

904 self,

905 levels,

906 *,

907 level=None,

908 copy: bool = False,

909 validate: bool = True,

910 verify_integrity: bool = False,

911 ) -> None:

912 # This is NOT part of the levels property because it should be

913 # externally not allowed to set levels. User beware if you change

914 # _levels directly

915 if validate:

916 if len(levels) == 0:

917 raise ValueError("Must set non-zero number of levels.")

918 if level is None and len(levels) != self.nlevels:

919 raise ValueError("Length of levels must match number of levels.")

920 if level is not None and len(levels) != len(level):

921 raise ValueError("Length of levels must match length of level.")

922

923 if level is None:

924 new_levels = FrozenList(

925 ensure_index(lev, copy=copy)._view() for lev in levels

926 )

927 level_numbers = list(range(len(new_levels)))

928 else:

929 level_numbers = [self._get_level_number(lev) for lev in level]

930 new_levels_list = list(self._levels)

931 for lev_num, lev in zip(level_numbers, levels):

932 new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view()

933 new_levels = FrozenList(new_levels_list)

934

935 if verify_integrity:

936 new_codes = self._verify_integrity(

937 levels=new_levels, levels_to_verify=level_numbers

938 )

939 self._codes = new_codes

940

941 names = self.names

942 self._levels = new_levels

943 if any(names):

944 self._set_names(names)

945

946 self._reset_cache()

947

948 def set_levels(

949 self, levels, *, level=None, verify_integrity: bool = True

950 ) -> MultiIndex:

951 """

952 Set new levels on MultiIndex. Defaults to returning new index.

953

954 Parameters

955 ----------

956 levels : sequence or list of sequence

957 New level(s) to apply.

958 level : int, level name, or sequence of int/level names (default None)

959 Level(s) to set (None for all levels).

960 verify_integrity : bool, default True

961 If True, checks that levels and codes are compatible.

962

963 Returns

964 -------

965 MultiIndex

966

967 Examples

968 --------

969 >>> idx = pd.MultiIndex.from_tuples(

970 ... [

971 ... (1, "one"),

972 ... (1, "two"),

973 ... (2, "one"),

974 ... (2, "two"),

975 ... (3, "one"),

976 ... (3, "two")

977 ... ],

978 ... names=["foo", "bar"]

979 ... )

980 >>> idx

981 MultiIndex([(1, 'one'),

982 (1, 'two'),

983 (2, 'one'),

984 (2, 'two'),

985 (3, 'one'),

986 (3, 'two')],

987 names=['foo', 'bar'])

988

989 >>> idx.set_levels([['a', 'b', 'c'], [1, 2]])

990 MultiIndex([('a', 1),

991 ('a', 2),

992 ('b', 1),

993 ('b', 2),

994 ('c', 1),

995 ('c', 2)],

996 names=['foo', 'bar'])

997 >>> idx.set_levels(['a', 'b', 'c'], level=0)

998 MultiIndex([('a', 'one'),

999 ('a', 'two'),

1000 ('b', 'one'),

1001 ('b', 'two'),

1002 ('c', 'one'),

1003 ('c', 'two')],

1004 names=['foo', 'bar'])

1005 >>> idx.set_levels(['a', 'b'], level='bar')

1006 MultiIndex([(1, 'a'),

1007 (1, 'b'),

1008 (2, 'a'),

1009 (2, 'b'),

1010 (3, 'a'),

1011 (3, 'b')],

1012 names=['foo', 'bar'])

1013

1014 If any of the levels passed to ``set_levels()`` exceeds the

1015 existing length, all of the values from that argument will

1016 be stored in the MultiIndex levels, though the values will

1017 be truncated in the MultiIndex output.

1018

1019 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1])

1020 MultiIndex([('a', 1),

1021 ('a', 2),

1022 ('b', 1),

1023 ('b', 2),

1024 ('c', 1),

1025 ('c', 2)],

1026 names=['foo', 'bar'])

1027 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels

1028 FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]])

1029 """

1030

1031 if isinstance(levels, Index):

1032 pass

1033 elif is_array_like(levels):

1034 levels = Index(levels)

1035 elif is_list_like(levels):

1036 levels = list(levels)

1037

1038 level, levels = _require_listlike(level, levels, "Levels")

1039 idx = self._view()

1040 idx._reset_identity()

1041 idx._set_levels(

1042 levels, level=level, validate=True, verify_integrity=verify_integrity

1043 )

1044 return idx

1045

1046 @property

1047 def nlevels(self) -> int:

1048 """

1049 Integer number of levels in this MultiIndex.

1050

1051 Examples

1052 --------

1053 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])

1054 >>> mi

1055 MultiIndex([('a', 'b', 'c')],

1056 )

1057 >>> mi.nlevels

1058 3

1059 """

1060 return len(self._levels)

1061

1062 @property

1063 def levshape(self) -> Shape:

1064 """

1065 A tuple with the length of each level.

1066

1067 Examples

1068 --------

1069 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])

1070 >>> mi

1071 MultiIndex([('a', 'b', 'c')],

1072 )

1073 >>> mi.levshape

1074 (1, 1, 1)

1075 """

1076 return tuple(len(x) for x in self.levels)

1077

1078 # --------------------------------------------------------------------

1079 # Codes Methods

1080

1081 @property

1082 def codes(self) -> FrozenList:

1083 return self._codes

1084

1085 def _set_codes(

1086 self,

1087 codes,

1088 *,

1089 level=None,

1090 copy: bool = False,

1091 validate: bool = True,

1092 verify_integrity: bool = False,

1093 ) -> None:

1094 if validate:

1095 if level is None and len(codes) != self.nlevels:

1096 raise ValueError("Length of codes must match number of levels")

1097 if level is not None and len(codes) != len(level):

1098 raise ValueError("Length of codes must match length of levels.")

1099

1100 level_numbers: list[int] | range

1101 if level is None:

1102 new_codes = FrozenList(

1103 _coerce_indexer_frozen(level_codes, lev, copy=copy).view()

1104 for lev, level_codes in zip(self._levels, codes)

1105 )

1106 level_numbers = range(len(new_codes))

1107 else:

1108 level_numbers = [self._get_level_number(lev) for lev in level]

1109 new_codes_list = list(self._codes)

1110 for lev_num, level_codes in zip(level_numbers, codes):

1111 lev = self.levels[lev_num]

1112 new_codes_list[lev_num] = _coerce_indexer_frozen(

1113 level_codes, lev, copy=copy

1114 )

1115 new_codes = FrozenList(new_codes_list)

1116

1117 if verify_integrity:

1118 new_codes = self._verify_integrity(

1119 codes=new_codes, levels_to_verify=level_numbers

1120 )

1121

1122 self._codes = new_codes

1123

1124 self._reset_cache()

1125

1126 def set_codes(

1127 self, codes, *, level=None, verify_integrity: bool = True

1128 ) -> MultiIndex:

1129 """

1130 Set new codes on MultiIndex. Defaults to returning new index.

1131

1132 Parameters

1133 ----------

1134 codes : sequence or list of sequence

1135 New codes to apply.

1136 level : int, level name, or sequence of int/level names (default None)

1137 Level(s) to set (None for all levels).

1138 verify_integrity : bool, default True

1139 If True, checks that levels and codes are compatible.

1140

1141 Returns

1142 -------

1143 new index (of same type and class...etc) or None

1144 The same type as the caller or None if ``inplace=True``.

1145

1146 Examples

1147 --------

1148 >>> idx = pd.MultiIndex.from_tuples(

1149 ... [(1, "one"), (1, "two"), (2, "one"), (2, "two")], names=["foo", "bar"]

1150 ... )

1151 >>> idx

1152 MultiIndex([(1, 'one'),

1153 (1, 'two'),

1154 (2, 'one'),

1155 (2, 'two')],

1156 names=['foo', 'bar'])

1157

1158 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]])

1159 MultiIndex([(2, 'one'),

1160 (1, 'one'),

1161 (2, 'two'),

1162 (1, 'two')],

1163 names=['foo', 'bar'])

1164 >>> idx.set_codes([1, 0, 1, 0], level=0)

1165 MultiIndex([(2, 'one'),

1166 (1, 'two'),

1167 (2, 'one'),

1168 (1, 'two')],

1169 names=['foo', 'bar'])

1170 >>> idx.set_codes([0, 0, 1, 1], level='bar')

1171 MultiIndex([(1, 'one'),

1172 (1, 'one'),

1173 (2, 'two'),

1174 (2, 'two')],

1175 names=['foo', 'bar'])

1176 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1])

1177 MultiIndex([(2, 'one'),

1178 (1, 'one'),

1179 (2, 'two'),

1180 (1, 'two')],

1181 names=['foo', 'bar'])

1182 """

1183

1184 level, codes = _require_listlike(level, codes, "Codes")

1185 idx = self._view()

1186 idx._reset_identity()

1187 idx._set_codes(codes, level=level, verify_integrity=verify_integrity)

1188 return idx

1189

1190 # --------------------------------------------------------------------

1191 # Index Internals

1192

1193 @cache_readonly

1194 def _engine(self):

1195 # Calculate the number of bits needed to represent labels in each

1196 # level, as log2 of their sizes:

1197 # NaN values are shifted to 1 and missing values in other while

1198 # calculating the indexer are shifted to 0

1199 sizes = np.ceil(

1200 np.log2(

1201 [len(level) + libindex.multiindex_nulls_shift for level in self.levels]

1202 )

1203 )

1204

1205 # Sum bit counts, starting from the _right_....

1206 lev_bits = np.cumsum(sizes[::-1])[::-1]

1207

1208 # ... in order to obtain offsets such that sorting the combination of

1209 # shifted codes (one for each level, resulting in a unique integer) is

1210 # equivalent to sorting lexicographically the codes themselves. Notice

1211 # that each level needs to be shifted by the number of bits needed to

1212 # represent the _previous_ ones:

1213 offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64")

1214

1215 # Check the total number of bits needed for our representation:

1216 if lev_bits[0] > 64:

1217 # The levels would overflow a 64 bit uint - use Python integers:

1218 return MultiIndexPyIntEngine(self.levels, self.codes, offsets)

1219 return MultiIndexUIntEngine(self.levels, self.codes, offsets)

1220

1221 # Return type "Callable[..., MultiIndex]" of "_constructor" incompatible with return

1222 # type "Type[MultiIndex]" in supertype "Index"

1223 @property

1224 def _constructor(self) -> Callable[..., MultiIndex]: # type: ignore[override]

1225 return type(self).from_tuples

1226

1227 @doc(Index._shallow_copy)

1228 def _shallow_copy(self, values: np.ndarray, name=lib.no_default) -> MultiIndex:

1229 names = name if name is not lib.no_default else self.names

1230

1231 return type(self).from_tuples(values, sortorder=None, names=names)

1232

1233 def _view(self) -> MultiIndex:

1234 result = type(self)(

1235 levels=self.levels,

1236 codes=self.codes,

1237 sortorder=self.sortorder,

1238 names=self.names,

1239 verify_integrity=False,

1240 )

1241 result._cache = self._cache.copy()

1242 result._cache.pop("levels", None) # GH32669

1243 return result

1244

1245 # --------------------------------------------------------------------

1246

1247 # error: Signature of "copy" incompatible with supertype "Index"

1248 def copy( # type: ignore[override]

1249 self,

1250 names=None,

1251 deep: bool = False,

1252 name=None,

1253 ) -> Self:

1254 """

1255 Make a copy of this object.

1256

1257 Names, dtype, levels and codes can be passed and will be set on new copy.

1258

1259 Parameters

1260 ----------

1261 names : sequence, optional

1262 deep : bool, default False

1263 name : Label

1264 Kept for compatibility with 1-dimensional Index. Should not be used.

1265

1266 Returns

1267 -------

1268 MultiIndex

1269

1270 Notes

1271 -----

1272 In most cases, there should be no functional difference from using

1273 ``deep``, but if ``deep`` is passed it will attempt to deepcopy.

1274 This could be potentially expensive on large MultiIndex objects.

1275

1276 Examples

1277 --------

1278 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])

1279 >>> mi

1280 MultiIndex([('a', 'b', 'c')],

1281 )

1282 >>> mi.copy()

1283 MultiIndex([('a', 'b', 'c')],

1284 )

1285 """

1286 names = self._validate_names(name=name, names=names, deep=deep)

1287 keep_id = not deep

1288 levels, codes = None, None

1289

1290 if deep:

1291 from copy import deepcopy

1292

1293 levels = deepcopy(self.levels)

1294 codes = deepcopy(self.codes)

1295

1296 levels = levels if levels is not None else self.levels

1297 codes = codes if codes is not None else self.codes

1298

1299 new_index = type(self)(

1300 levels=levels,

1301 codes=codes,

1302 sortorder=self.sortorder,

1303 names=names,

1304 verify_integrity=False,

1305 )

1306 new_index._cache = self._cache.copy()

1307 new_index._cache.pop("levels", None) # GH32669

1308 if keep_id:

1309 new_index._id = self._id

1310 return new_index

1311

1312 def __array__(self, dtype=None, copy=None) -> np.ndarray:

1313 """the array interface, return my values"""

1314 return self.values

1315

1316 def view(self, cls=None) -> Self:

1317 """this is defined as a copy with the same identity"""

1318 result = self.copy()

1319 result._id = self._id

1320 return result

1321

1322 @doc(Index.__contains__)

1323 def __contains__(self, key: Any) -> bool:

1324 hash(key)

1325 try:

1326 self.get_loc(key)

1327 return True

1328 except (LookupError, TypeError, ValueError):

1329 return False

1330

1331 @cache_readonly

1332 def dtype(self) -> np.dtype:

1333 return np.dtype("O")

1334

1335 def _is_memory_usage_qualified(self) -> bool:

1336 """return a boolean if we need a qualified .info display"""

1337

1338 def f(level) -> bool:

1339 return "mixed" in level or "string" in level or "unicode" in level

1340

1341 return any(f(level) for level in self._inferred_type_levels)

1342

1343 # Cannot determine type of "memory_usage"

1344 @doc(Index.memory_usage) # type: ignore[has-type]

1345 def memory_usage(self, deep: bool = False) -> int:

1346 # we are overwriting our base class to avoid

1347 # computing .values here which could materialize

1348 # a tuple representation unnecessarily

1349 return self._nbytes(deep)

1350

1351 @cache_readonly

1352 def nbytes(self) -> int:

1353 """return the number of bytes in the underlying data"""

1354 return self._nbytes(False)

1355

1356 def _nbytes(self, deep: bool = False) -> int:

1357 """

1358 return the number of bytes in the underlying data

1359 deeply introspect the level data if deep=True

1360

1361 include the engine hashtable

1362

1363 *this is in internal routine*

1364

1365 """

1366 # for implementations with no useful getsizeof (PyPy)

1367 objsize = 24

1368

1369 level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels)

1370 label_nbytes = sum(i.nbytes for i in self.codes)

1371 names_nbytes = sum(getsizeof(i, objsize) for i in self.names)

1372 result = level_nbytes + label_nbytes + names_nbytes

1373

1374 # include our engine hashtable

1375 result += self._engine.sizeof(deep=deep)

1376 return result

1377

1378 # --------------------------------------------------------------------

1379 # Rendering Methods

1380

1381 def _formatter_func(self, tup):

1382 """

1383 Formats each item in tup according to its level's formatter function.

1384 """

1385 formatter_funcs = [level._formatter_func for level in self.levels]

1386 return tuple(func(val) for func, val in zip(formatter_funcs, tup))

1387

1388 def _get_values_for_csv(

1389 self, *, na_rep: str = "nan", **kwargs

1390 ) -> npt.NDArray[np.object_]:

1391 new_levels = []

1392 new_codes = []

1393

1394 # go through the levels and format them

1395 for level, level_codes in zip(self.levels, self.codes):

1396 level_strs = level._get_values_for_csv(na_rep=na_rep, **kwargs)

1397 # add nan values, if there are any

1398 mask = level_codes == -1

1399 if mask.any():

1400 nan_index = len(level_strs)

1401 # numpy 1.21 deprecated implicit string casting

1402 level_strs = level_strs.astype(str)

1403 level_strs = np.append(level_strs, na_rep)

1404 assert not level_codes.flags.writeable # i.e. copy is needed

1405 level_codes = level_codes.copy() # make writeable

1406 level_codes[mask] = nan_index

1407 new_levels.append(level_strs)

1408 new_codes.append(level_codes)

1409

1410 if len(new_levels) == 1:

1411 # a single-level multi-index

1412 return Index(new_levels[0].take(new_codes[0]))._get_values_for_csv()

1413 else:

1414 # reconstruct the multi-index

1415 mi = MultiIndex(

1416 levels=new_levels,

1417 codes=new_codes,

1418 names=self.names,

1419 sortorder=self.sortorder,

1420 verify_integrity=False,

1421 )

1422 return mi._values

1423

1424 def format(

1425 self,

1426 name: bool | None = None,

1427 formatter: Callable | None = None,

1428 na_rep: str | None = None,

1429 names: bool = False,

1430 space: int = 2,

1431 sparsify=None,

1432 adjoin: bool = True,

1433 ) -> list:

1434 warnings.warn(

1435 # GH#55413

1436 f"{type(self).__name__}.format is deprecated and will be removed "

1437 "in a future version. Convert using index.astype(str) or "

1438 "index.map(formatter) instead.",

1439 FutureWarning,

1440 stacklevel=find_stack_level(),

1441 )

1442

1443 if name is not None:

1444 names = name

1445

1446 if len(self) == 0:

1447 return []

1448

1449 stringified_levels = []

1450 for lev, level_codes in zip(self.levels, self.codes):

1451 na = na_rep if na_rep is not None else _get_na_rep(lev.dtype)

1452

1453 if len(lev) > 0:

1454 formatted = lev.take(level_codes).format(formatter=formatter)

1455

1456 # we have some NA

1457 mask = level_codes == -1

1458 if mask.any():

1459 formatted = np.array(formatted, dtype=object)

1460 formatted[mask] = na

1461 formatted = formatted.tolist()

1462

1463 else:

1464 # weird all NA case

1465 formatted = [

1466 pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n"))

1467 for x in algos.take_nd(lev._values, level_codes)

1468 ]

1469 stringified_levels.append(formatted)

1470

1471 result_levels = []

1472 for lev, lev_name in zip(stringified_levels, self.names):

1473 level = []

1474

1475 if names:

1476 level.append(

1477 pprint_thing(lev_name, escape_chars=("\t", "\r", "\n"))

1478 if lev_name is not None

1479 else ""

1480 )

1481

1482 level.extend(np.array(lev, dtype=object))

1483 result_levels.append(level)

1484

1485 if sparsify is None:

1486 sparsify = get_option("display.multi_sparse")

1487

1488 if sparsify:

1489 sentinel: Literal[""] | bool | lib.NoDefault = ""

1490 # GH3547 use value of sparsify as sentinel if it's "Falsey"

1491 assert isinstance(sparsify, bool) or sparsify is lib.no_default

1492 if sparsify in [False, lib.no_default]:

1493 sentinel = sparsify

1494 # little bit of a kludge job for #1217

1495 result_levels = sparsify_labels(

1496 result_levels, start=int(names), sentinel=sentinel

1497 )

1498

1499 if adjoin:

1500 adj = get_adjustment()

1501 return adj.adjoin(space, *result_levels).split("\n")

1502 else:

1503 return result_levels

1504

1505 def _format_multi(

1506 self,

1507 *,

1508 include_names: bool,

1509 sparsify: bool | None | lib.NoDefault,

1510 formatter: Callable | None = None,

1511 ) -> list:

1512 if len(self) == 0:

1513 return []

1514

1515 stringified_levels = []

1516 for lev, level_codes in zip(self.levels, self.codes):

1517 na = _get_na_rep(lev.dtype)

1518

1519 if len(lev) > 0:

1520 taken = formatted = lev.take(level_codes)

1521 formatted = taken._format_flat(include_name=False, formatter=formatter)

1522

1523 # we have some NA

1524 mask = level_codes == -1

1525 if mask.any():

1526 formatted = np.array(formatted, dtype=object)

1527 formatted[mask] = na

1528 formatted = formatted.tolist()

1529

1530 else:

1531 # weird all NA case

1532 formatted = [

1533 pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n"))

1534 for x in algos.take_nd(lev._values, level_codes)

1535 ]

1536 stringified_levels.append(formatted)

1537

1538 result_levels = []

1539 for lev, lev_name in zip(stringified_levels, self.names):

1540 level = []

1541

1542 if include_names:

1543 level.append(

1544 pprint_thing(lev_name, escape_chars=("\t", "\r", "\n"))

1545 if lev_name is not None

1546 else ""

1547 )

1548

1549 level.extend(np.array(lev, dtype=object))

1550 result_levels.append(level)

1551

1552 if sparsify is None:

1553 sparsify = get_option("display.multi_sparse")

1554

1555 if sparsify:

1556 sentinel: Literal[""] | bool | lib.NoDefault = ""

1557 # GH3547 use value of sparsify as sentinel if it's "Falsey"

1558 assert isinstance(sparsify, bool) or sparsify is lib.no_default

1559 if sparsify is lib.no_default:

1560 sentinel = sparsify

1561 # little bit of a kludge job for #1217

1562 result_levels = sparsify_labels(

1563 result_levels, start=int(include_names), sentinel=sentinel

1564 )

1565

1566 return result_levels

1567

1568 # --------------------------------------------------------------------

1569 # Names Methods

1570

1571 def _get_names(self) -> FrozenList:

1572 return FrozenList(self._names)

1573

1574 def _set_names(self, names, *, level=None, validate: bool = True):

1575 """

1576 Set new names on index. Each name has to be a hashable type.

1577

1578 Parameters

1579 ----------

1580 values : str or sequence

1581 name(s) to set

1582 level : int, level name, or sequence of int/level names (default None)

1583 If the index is a MultiIndex (hierarchical), level(s) to set (None

1584 for all levels). Otherwise level must be None

1585 validate : bool, default True

1586 validate that the names match level lengths

1587

1588 Raises

1589 ------

1590 TypeError if each name is not hashable.

1591

1592 Notes

1593 -----

1594 sets names on levels. WARNING: mutates!

1595

1596 Note that you generally want to set this *after* changing levels, so

1597 that it only acts on copies

1598 """

1599 # GH 15110

1600 # Don't allow a single string for names in a MultiIndex

1601 if names is not None and not is_list_like(names):

1602 raise ValueError("Names should be list-like for a MultiIndex")

1603 names = list(names)

1604

1605 if validate:

1606 if level is not None and len(names) != len(level):

1607 raise ValueError("Length of names must match length of level.")

1608 if level is None and len(names) != self.nlevels:

1609 raise ValueError(

1610 "Length of names must match number of levels in MultiIndex."

1611 )

1612

1613 if level is None:

1614 level = range(self.nlevels)

1615 else:

1616 level = [self._get_level_number(lev) for lev in level]

1617

1618 # set the name

1619 for lev, name in zip(level, names):

1620 if name is not None:

1621 # GH 20527

1622 # All items in 'names' need to be hashable:

1623 if not is_hashable(name):

1624 raise TypeError(

1625 f"{type(self).__name__}.name must be a hashable type"

1626 )

1627 self._names[lev] = name

1628

1629 # If .levels has been accessed, the names in our cache will be stale.

1630 self._reset_cache()

1631

1632 names = property(

1633 fset=_set_names,

1634 fget=_get_names,

1635 doc="""

1636 Names of levels in MultiIndex.

1637

1638 Examples

1639 --------

1640 >>> mi = pd.MultiIndex.from_arrays(

1641 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'])

1642 >>> mi

1643 MultiIndex([(1, 3, 5),

1644 (2, 4, 6)],

1645 names=['x', 'y', 'z'])

1646 >>> mi.names

1647 FrozenList(['x', 'y', 'z'])

1648 """,

1649 )

1650

1651 # --------------------------------------------------------------------

1652

1653 @cache_readonly

1654 def inferred_type(self) -> str:

1655 return "mixed"

1656

1657 def _get_level_number(self, level) -> int:

1658 count = self.names.count(level)

1659 if (count > 1) and not is_integer(level):

1660 raise ValueError(

1661 f"The name {level} occurs multiple times, use a level number"

1662 )

1663 try:

1664 level = self.names.index(level)

1665 except ValueError as err:

1666 if not is_integer(level):

1667 raise KeyError(f"Level {level} not found") from err

1668 if level < 0:

1669 level += self.nlevels

1670 if level < 0:

1671 orig_level = level - self.nlevels

1672 raise IndexError(

1673 f"Too many levels: Index has only {self.nlevels} levels, "

1674 f"{orig_level} is not a valid level number"

1675 ) from err

1676 # Note: levels are zero-based

1677 elif level >= self.nlevels:

1678 raise IndexError(

1679 f"Too many levels: Index has only {self.nlevels} levels, "

1680 f"not {level + 1}"

1681 ) from err

1682 return level

1683

1684 @cache_readonly

1685 def is_monotonic_increasing(self) -> bool:

1686 """

1687 Return a boolean if the values are equal or increasing.

1688 """

1689 if any(-1 in code for code in self.codes):

1690 return False

1691

1692 if all(level.is_monotonic_increasing for level in self.levels):

1693 # If each level is sorted, we can operate on the codes directly. GH27495

1694 return libalgos.is_lexsorted(

1695 [x.astype("int64", copy=False) for x in self.codes]

1696 )

1697

1698 # reversed() because lexsort() wants the most significant key last.

1699 values = [

1700 self._get_level_values(i)._values for i in reversed(range(len(self.levels)))

1701 ]

1702 try:

1703 # error: Argument 1 to "lexsort" has incompatible type

1704 # "List[Union[ExtensionArray, ndarray[Any, Any]]]";

1705 # expected "Union[_SupportsArray[dtype[Any]],

1706 # _NestedSequence[_SupportsArray[dtype[Any]]], bool,

1707 # int, float, complex, str, bytes, _NestedSequence[Union

1708 # [bool, int, float, complex, str, bytes]]]"

1709 sort_order = np.lexsort(values) # type: ignore[arg-type]

1710 return Index(sort_order).is_monotonic_increasing

1711 except TypeError:

1712 # we have mixed types and np.lexsort is not happy

1713 return Index(self._values).is_monotonic_increasing

1714

1715 @cache_readonly

1716 def is_monotonic_decreasing(self) -> bool:

1717 """

1718 Return a boolean if the values are equal or decreasing.

1719 """

1720 # monotonic decreasing if and only if reverse is monotonic increasing

1721 return self[::-1].is_monotonic_increasing

1722

1723 @cache_readonly

1724 def _inferred_type_levels(self) -> list[str]:

1725 """return a list of the inferred types, one for each level"""

1726 return [i.inferred_type for i in self.levels]

1727

1728 @doc(Index.duplicated)

1729 def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:

1730 shape = tuple(len(lev) for lev in self.levels)

1731 ids = get_group_index(self.codes, shape, sort=False, xnull=False)

1732

1733 return duplicated(ids, keep)

1734

1735 # error: Cannot override final attribute "_duplicated"

1736 # (previously declared in base class "IndexOpsMixin")

1737 _duplicated = duplicated # type: ignore[misc]

1738

1739 def fillna(self, value=None, downcast=None):

1740 """

1741 fillna is not implemented for MultiIndex

1742 """

1743 raise NotImplementedError("isna is not defined for MultiIndex")

1744

1745 @doc(Index.dropna)

1746 def dropna(self, how: AnyAll = "any") -> MultiIndex:

1747 nans = [level_codes == -1 for level_codes in self.codes]

1748 if how == "any":

1749 indexer = np.any(nans, axis=0)

1750 elif how == "all":

1751 indexer = np.all(nans, axis=0)

1752 else:

1753 raise ValueError(f"invalid how option: {how}")

1754

1755 new_codes = [level_codes[~indexer] for level_codes in self.codes]

1756 return self.set_codes(codes=new_codes)

1757

1758 def _get_level_values(self, level: int, unique: bool = False) -> Index:

1759 """

1760 Return vector of label values for requested level,

1761 equal to the length of the index

1762

1763 **this is an internal method**

1764

1765 Parameters

1766 ----------

1767 level : int

1768 unique : bool, default False

1769 if True, drop duplicated values

1770

1771 Returns

1772 -------

1773 Index

1774 """

1775 lev = self.levels[level]

1776 level_codes = self.codes[level]

1777 name = self._names[level]

1778 if unique:

1779 level_codes = algos.unique(level_codes)

1780 filled = algos.take_nd(lev._values, level_codes, fill_value=lev._na_value)

1781 return lev._shallow_copy(filled, name=name)

1782

1783 # error: Signature of "get_level_values" incompatible with supertype "Index"

1784 def get_level_values(self, level) -> Index: # type: ignore[override]

1785 """

1786 Return vector of label values for requested level.

1787

1788 Length of returned vector is equal to the length of the index.

1789

1790 Parameters

1791 ----------

1792 level : int or str

1793 ``level`` is either the integer position of the level in the

1794 MultiIndex, or the name of the level.

1795

1796 Returns

1797 -------

1798 Index

1799 Values is a level of this MultiIndex converted to

1800 a single :class:`Index` (or subclass thereof).

1801

1802 Notes

1803 -----

1804 If the level contains missing values, the result may be casted to

1805 ``float`` with missing values specified as ``NaN``. This is because

1806 the level is converted to a regular ``Index``.

1807

1808 Examples

1809 --------

1810 Create a MultiIndex:

1811

1812 >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def')))

1813 >>> mi.names = ['level_1', 'level_2']

1814

1815 Get level values by supplying level as either integer or name:

1816

1817 >>> mi.get_level_values(0)

1818 Index(['a', 'b', 'c'], dtype='object', name='level_1')

1819 >>> mi.get_level_values('level_2')

1820 Index(['d', 'e', 'f'], dtype='object', name='level_2')

1821

1822 If a level contains missing values, the return type of the level

1823 may be cast to ``float``.

1824

1825 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).dtypes

1826 level_0 int64

1827 level_1 int64

1828 dtype: object

1829 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).get_level_values(0)

1830 Index([1.0, nan, 2.0], dtype='float64')

1831 """

1832 level = self._get_level_number(level)

1833 values = self._get_level_values(level)

1834 return values

1835

1836 @doc(Index.unique)

1837 def unique(self, level=None):

1838 if level is None:

1839 return self.drop_duplicates()

1840 else:

1841 level = self._get_level_number(level)

1842 return self._get_level_values(level=level, unique=True)

1843

1844 def to_frame(

1845 self,

1846 index: bool = True,

1847 name=lib.no_default,

1848 allow_duplicates: bool = False,

1849 ) -> DataFrame:

1850 """

1851 Create a DataFrame with the levels of the MultiIndex as columns.

1852

1853 Column ordering is determined by the DataFrame constructor with data as

1854 a dict.

1855

1856 Parameters

1857 ----------

1858 index : bool, default True

1859 Set the index of the returned DataFrame as the original MultiIndex.

1860

1861 name : list / sequence of str, optional

1862 The passed names should substitute index level names.

1863

1864 allow_duplicates : bool, optional default False

1865 Allow duplicate column labels to be created.

1866

1867 .. versionadded:: 1.5.0

1868

1869 Returns

1870 -------

1871 DataFrame

1872

1873 See Also

1874 --------

1875 DataFrame : Two-dimensional, size-mutable, potentially heterogeneous

1876 tabular data.

1877

1878 Examples

1879 --------

1880 >>> mi = pd.MultiIndex.from_arrays([['a', 'b'], ['c', 'd']])

1881 >>> mi

1882 MultiIndex([('a', 'c'),

1883 ('b', 'd')],

1884 )

1885

1886 >>> df = mi.to_frame()

1887 >>> df

1888 0 1

1889 a c a c

1890 b d b d

1891

1892 >>> df = mi.to_frame(index=False)

1893 >>> df

1894 0 1

1895 0 a c

1896 1 b d

1897

1898 >>> df = mi.to_frame(name=['x', 'y'])

1899 >>> df

1900 x y

1901 a c a c

1902 b d b d

1903 """

1904 from pandas import DataFrame

1905

1906 if name is not lib.no_default:

1907 if not is_list_like(name):

1908 raise TypeError("'name' must be a list / sequence of column names.")

1909

1910 if len(name) != len(self.levels):

1911 raise ValueError(

1912 "'name' should have same length as number of levels on index."

1913 )

1914 idx_names = name

1915 else:

1916 idx_names = self._get_level_names()

1917

1918 if not allow_duplicates and len(set(idx_names)) != len(idx_names):

1919 raise ValueError(

1920 "Cannot create duplicate column labels if allow_duplicates is False"

1921 )

1922

1923 # Guarantee resulting column order - PY36+ dict maintains insertion order

1924 result = DataFrame(

1925 {level: self._get_level_values(level) for level in range(len(self.levels))},

1926 copy=False,

1927 )

1928 result.columns = idx_names

1929

1930 if index:

1931 result.index = self

1932 return result

1933

1934 # error: Return type "Index" of "to_flat_index" incompatible with return type

1935 # "MultiIndex" in supertype "Index"

1936 def to_flat_index(self) -> Index: # type: ignore[override]

1937 """

1938 Convert a MultiIndex to an Index of Tuples containing the level values.

1939

1940 Returns

1941 -------

1942 pd.Index

1943 Index with the MultiIndex data represented in Tuples.

1944

1945 See Also

1946 --------

1947 MultiIndex.from_tuples : Convert flat index back to MultiIndex.

1948

1949 Notes

1950 -----

1951 This method will simply return the caller if called by anything other

1952 than a MultiIndex.

1953

1954 Examples

1955 --------

1956 >>> index = pd.MultiIndex.from_product(

1957 ... [['foo', 'bar'], ['baz', 'qux']],

1958 ... names=['a', 'b'])

1959 >>> index.to_flat_index()

1960 Index([('foo', 'baz'), ('foo', 'qux'),

1961 ('bar', 'baz'), ('bar', 'qux')],

1962 dtype='object')

1963 """

1964 return Index(self._values, tupleize_cols=False)

1965

1966 def _is_lexsorted(self) -> bool:

1967 """

1968 Return True if the codes are lexicographically sorted.

1969

1970 Returns

1971 -------

1972 bool

1973

1974 Examples

1975 --------

1976 In the below examples, the first level of the MultiIndex is sorted because

1977 a<b<c, so there is no need to look at the next level.

1978

1979 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'],

1980 ... ['d', 'e', 'f']])._is_lexsorted()

1981 True

1982 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'],

1983 ... ['d', 'f', 'e']])._is_lexsorted()

1984 True

1985

1986 In case there is a tie, the lexicographical sorting looks

1987 at the next level of the MultiIndex.

1988

1989 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']])._is_lexsorted()

1990 True

1991 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']])._is_lexsorted()

1992 False

1993 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'],

1994 ... ['aa', 'bb', 'aa', 'bb']])._is_lexsorted()

1995 True

1996 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'],

1997 ... ['bb', 'aa', 'aa', 'bb']])._is_lexsorted()

1998 False

1999 """

2000 return self._lexsort_depth == self.nlevels

2001

2002 @cache_readonly

2003 def _lexsort_depth(self) -> int:

2004 """

2005 Compute and return the lexsort_depth, the number of levels of the

2006 MultiIndex that are sorted lexically

2007

2008 Returns

2009 -------

2010 int

2011 """

2012 if self.sortorder is not None:

2013 return self.sortorder

2014 return _lexsort_depth(self.codes, self.nlevels)

2015

2016 def _sort_levels_monotonic(self, raise_if_incomparable: bool = False) -> MultiIndex:

2017 """

2018 This is an *internal* function.

2019

2020 Create a new MultiIndex from the current to monotonically sorted

2021 items IN the levels. This does not actually make the entire MultiIndex

2022 monotonic, JUST the levels.

2023

2024 The resulting MultiIndex will have the same outward

2025 appearance, meaning the same .values and ordering. It will also

2026 be .equals() to the original.

2027

2028 Returns

2029 -------

2030 MultiIndex

2031

2032 Examples

2033 --------

2034 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],

2035 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])

2036 >>> mi

2037 MultiIndex([('a', 'bb'),

2038 ('a', 'aa'),

2039 ('b', 'bb'),

2040 ('b', 'aa')],

2041 )

2042

2043 >>> mi.sort_values()

2044 MultiIndex([('a', 'aa'),

2045 ('a', 'bb'),

2046 ('b', 'aa'),

2047 ('b', 'bb')],

2048 )

2049 """

2050 if self._is_lexsorted() and self.is_monotonic_increasing:

2051 return self

2052

2053 new_levels = []

2054 new_codes = []

2055

2056 for lev, level_codes in zip(self.levels, self.codes):

2057 if not lev.is_monotonic_increasing:

2058 try:

2059 # indexer to reorder the levels

2060 indexer = lev.argsort()

2061 except TypeError:

2062 if raise_if_incomparable:

2063 raise

2064 else:

2065 lev = lev.take(indexer)

2066

2067 # indexer to reorder the level codes

2068 indexer = ensure_platform_int(indexer)

2069 ri = lib.get_reverse_indexer(indexer, len(indexer))

2070 level_codes = algos.take_nd(ri, level_codes, fill_value=-1)

2071

2072 new_levels.append(lev)

2073 new_codes.append(level_codes)

2074

2075 return MultiIndex(

2076 new_levels,

2077 new_codes,

2078 names=self.names,

2079 sortorder=self.sortorder,

2080 verify_integrity=False,

2081 )

2082

2083 def remove_unused_levels(self) -> MultiIndex:

2084 """

2085 Create new MultiIndex from current that removes unused levels.

2086

2087 Unused level(s) means levels that are not expressed in the

2088 labels. The resulting MultiIndex will have the same outward

2089 appearance, meaning the same .values and ordering. It will

2090 also be .equals() to the original.

2091

2092 Returns

2093 -------

2094 MultiIndex

2095

2096 Examples

2097 --------

2098 >>> mi = pd.MultiIndex.from_product([range(2), list('ab')])

2099 >>> mi

2100 MultiIndex([(0, 'a'),

2101 (0, 'b'),

2102 (1, 'a'),

2103 (1, 'b')],

2104 )

2105

2106 >>> mi[2:]

2107 MultiIndex([(1, 'a'),

2108 (1, 'b')],

2109 )

2110

2111 The 0 from the first level is not represented

2112 and can be removed

2113

2114 >>> mi2 = mi[2:].remove_unused_levels()

2115 >>> mi2.levels

2116 FrozenList([[1], ['a', 'b']])

2117 """

2118 new_levels = []

2119 new_codes = []

2120

2121 changed = False

2122 for lev, level_codes in zip(self.levels, self.codes):

2123 # Since few levels are typically unused, bincount() is more

2124 # efficient than unique() - however it only accepts positive values

2125 # (and drops order):

2126 uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1

2127 has_na = int(len(uniques) and (uniques[0] == -1))

2128

2129 if len(uniques) != len(lev) + has_na:

2130 if lev.isna().any() and len(uniques) == len(lev):

2131 break

2132 # We have unused levels

2133 changed = True

2134

2135 # Recalculate uniques, now preserving order.

2136 # Can easily be cythonized by exploiting the already existing

2137 # "uniques" and stop parsing "level_codes" when all items

2138 # are found:

2139 uniques = algos.unique(level_codes)

2140 if has_na:

2141 na_idx = np.where(uniques == -1)[0]

2142 # Just ensure that -1 is in first position:

2143 uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]]

2144

2145 # codes get mapped from uniques to 0:len(uniques)

2146 # -1 (if present) is mapped to last position

2147 code_mapping = np.zeros(len(lev) + has_na)

2148 # ... and reassigned value -1:

2149 code_mapping[uniques] = np.arange(len(uniques)) - has_na

2150

2151 level_codes = code_mapping[level_codes]

2152

2153 # new levels are simple

2154 lev = lev.take(uniques[has_na:])

2155

2156 new_levels.append(lev)

2157 new_codes.append(level_codes)

2158

2159 result = self.view()

2160

2161 if changed:

2162 result._reset_identity()

2163 result._set_levels(new_levels, validate=False)

2164 result._set_codes(new_codes, validate=False)

2165

2166 return result

2167

2168 # --------------------------------------------------------------------

2169 # Pickling Methods

2170

2171 def __reduce__(self):

2172 """Necessary for making this object picklable"""

2173 d = {

2174 "levels": list(self.levels),

2175 "codes": list(self.codes),

2176 "sortorder": self.sortorder,

2177 "names": list(self.names),

2178 }

2179 return ibase._new_Index, (type(self), d), None

2180

2181 # --------------------------------------------------------------------

2182

2183 def __getitem__(self, key):

2184 if is_scalar(key):

2185 key = com.cast_scalar_indexer(key)

2186

2187 retval = []

2188 for lev, level_codes in zip(self.levels, self.codes):

2189 if level_codes[key] == -1:

2190 retval.append(np.nan)

2191 else:

2192 retval.append(lev[level_codes[key]])

2193

2194 return tuple(retval)

2195 else:

2196 # in general cannot be sure whether the result will be sorted

2197 sortorder = None

2198 if com.is_bool_indexer(key):

2199 key = np.asarray(key, dtype=bool)

2200 sortorder = self.sortorder

2201 elif isinstance(key, slice):

2202 if key.step is None or key.step > 0:

2203 sortorder = self.sortorder

2204 elif isinstance(key, Index):

2205 key = np.asarray(key)

2206

2207 new_codes = [level_codes[key] for level_codes in self.codes]

2208

2209 return MultiIndex(

2210 levels=self.levels,

2211 codes=new_codes,

2212 names=self.names,

2213 sortorder=sortorder,

2214 verify_integrity=False,

2215 )

2216

2217 def _getitem_slice(self: MultiIndex, slobj: slice) -> MultiIndex:

2218 """

2219 Fastpath for __getitem__ when we know we have a slice.

2220 """

2221 sortorder = None

2222 if slobj.step is None or slobj.step > 0:

2223 sortorder = self.sortorder

2224

2225 new_codes = [level_codes[slobj] for level_codes in self.codes]

2226

2227 return type(self)(

2228 levels=self.levels,

2229 codes=new_codes,

2230 names=self._names,

2231 sortorder=sortorder,

2232 verify_integrity=False,

2233 )

2234

2235 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)

2236 def take(

2237 self: MultiIndex,

2238 indices,

2239 axis: Axis = 0,

2240 allow_fill: bool = True,

2241 fill_value=None,

2242 **kwargs,

2243 ) -> MultiIndex:

2244 nv.validate_take((), kwargs)

2245 indices = ensure_platform_int(indices)

2246

2247 # only fill if we are passing a non-None fill_value

2248 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices)

2249

2250 na_value = -1

2251

2252 taken = [lab.take(indices) for lab in self.codes]

2253 if allow_fill:

2254 mask = indices == -1

2255 if mask.any():

2256 masked = []

2257 for new_label in taken:

2258 label_values = new_label

2259 label_values[mask] = na_value

2260 masked.append(np.asarray(label_values))

2261 taken = masked

2262

2263 return MultiIndex(

2264 levels=self.levels, codes=taken, names=self.names, verify_integrity=False

2265 )

2266

2267 def append(self, other):

2268 """

2269 Append a collection of Index options together.

2270

2271 Parameters

2272 ----------

2273 other : Index or list/tuple of indices

2274

2275 Returns

2276 -------

2277 Index

2278 The combined index.

2279

2280 Examples

2281 --------

2282 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b']])

2283 >>> mi

2284 MultiIndex([('a', 'b')],

2285 )

2286 >>> mi.append(mi)

2287 MultiIndex([('a', 'b'), ('a', 'b')],

2288 )

2289 """

2290 if not isinstance(other, (list, tuple)):

2291 other = [other]

2292

2293 if all(

2294 (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other

2295 ):

2296 codes = []

2297 levels = []

2298 names = []

2299 for i in range(self.nlevels):

2300 level_values = self.levels[i]

2301 for mi in other:

2302 level_values = level_values.union(mi.levels[i])

2303 level_codes = [

2304 recode_for_categories(

2305 mi.codes[i], mi.levels[i], level_values, copy=False

2306 )

2307 for mi in ([self, *other])

2308 ]

2309 level_name = self.names[i]

2310 if any(mi.names[i] != level_name for mi in other):

2311 level_name = None

2312 codes.append(np.concatenate(level_codes))

2313 levels.append(level_values)

2314 names.append(level_name)

2315 return MultiIndex(

2316 codes=codes, levels=levels, names=names, verify_integrity=False

2317 )

2318

2319 to_concat = (self._values,) + tuple(k._values for k in other)

2320 new_tuples = np.concatenate(to_concat)

2321

2322 # if all(isinstance(x, MultiIndex) for x in other):

2323 try:

2324 # We only get here if other contains at least one index with tuples,

2325 # setting names to None automatically

2326 return MultiIndex.from_tuples(new_tuples)

2327 except (TypeError, IndexError):

2328 return Index(new_tuples)

2329

2330 def argsort(

2331 self, *args, na_position: str = "last", **kwargs

2332 ) -> npt.NDArray[np.intp]:

2333 target = self._sort_levels_monotonic(raise_if_incomparable=True)

2334 keys = [lev.codes for lev in target._get_codes_for_sorting()]

2335 return lexsort_indexer(keys, na_position=na_position, codes_given=True)

2336

2337 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)

2338 def repeat(self, repeats: int, axis=None) -> MultiIndex:

2339 nv.validate_repeat((), {"axis": axis})

2340 # error: Incompatible types in assignment (expression has type "ndarray",

2341 # variable has type "int")

2342 repeats = ensure_platform_int(repeats) # type: ignore[assignment]

2343 return MultiIndex(

2344 levels=self.levels,

2345 codes=[

2346 level_codes.view(np.ndarray).astype(np.intp, copy=False).repeat(repeats)

2347 for level_codes in self.codes

2348 ],

2349 names=self.names,

2350 sortorder=self.sortorder,

2351 verify_integrity=False,

2352 )

2353

2354 # error: Signature of "drop" incompatible with supertype "Index"

2355 def drop( # type: ignore[override]

2356 self,

2357 codes,

2358 level: Index | np.ndarray | Iterable[Hashable] | None = None,

2359 errors: IgnoreRaise = "raise",

2360 ) -> MultiIndex:

2361 """

2362 Make a new :class:`pandas.MultiIndex` with the passed list of codes deleted.

2363

2364 Parameters

2365 ----------

2366 codes : array-like

2367 Must be a list of tuples when ``level`` is not specified.

2368 level : int or level name, default None

2369 errors : str, default 'raise'

2370

2371 Returns

2372 -------

2373 MultiIndex

2374

2375 Examples

2376 --------

2377 >>> idx = pd.MultiIndex.from_product([(0, 1, 2), ('green', 'purple')],

2378 ... names=["number", "color"])

2379 >>> idx

2380 MultiIndex([(0, 'green'),

2381 (0, 'purple'),

2382 (1, 'green'),

2383 (1, 'purple'),

2384 (2, 'green'),

2385 (2, 'purple')],

2386 names=['number', 'color'])

2387 >>> idx.drop([(1, 'green'), (2, 'purple')])

2388 MultiIndex([(0, 'green'),

2389 (0, 'purple'),

2390 (1, 'purple'),

2391 (2, 'green')],

2392 names=['number', 'color'])

2393

2394 We can also drop from a specific level.

2395

2396 >>> idx.drop('green', level='color')

2397 MultiIndex([(0, 'purple'),

2398 (1, 'purple'),

2399 (2, 'purple')],

2400 names=['number', 'color'])

2401

2402 >>> idx.drop([1, 2], level=0)

2403 MultiIndex([(0, 'green'),

2404 (0, 'purple')],

2405 names=['number', 'color'])

2406 """

2407 if level is not None:

2408 return self._drop_from_level(codes, level, errors)

2409

2410 if not isinstance(codes, (np.ndarray, Index)):

2411 try:

2412 codes = com.index_labels_to_array(codes, dtype=np.dtype("object"))

2413 except ValueError:

2414 pass

2415

2416 inds = []

2417 for level_codes in codes:

2418 try:

2419 loc = self.get_loc(level_codes)

2420 # get_loc returns either an integer, a slice, or a boolean

2421 # mask

2422 if isinstance(loc, int):

2423 inds.append(loc)

2424 elif isinstance(loc, slice):

2425 step = loc.step if loc.step is not None else 1

2426 inds.extend(range(loc.start, loc.stop, step))

2427 elif com.is_bool_indexer(loc):

2428 if self._lexsort_depth == 0:

2429 warnings.warn(

2430 "dropping on a non-lexsorted multi-index "

2431 "without a level parameter may impact performance.",

2432 PerformanceWarning,

2433 stacklevel=find_stack_level(),

2434 )

2435 loc = loc.nonzero()[0]

2436 inds.extend(loc)

2437 else:

2438 msg = f"unsupported indexer of type {type(loc)}"

2439 raise AssertionError(msg)

2440 except KeyError:

2441 if errors != "ignore":

2442 raise

2443

2444 return self.delete(inds)

2445

2446 def _drop_from_level(

2447 self, codes, level, errors: IgnoreRaise = "raise"

2448 ) -> MultiIndex:

2449 codes = com.index_labels_to_array(codes)

2450 i = self._get_level_number(level)

2451 index = self.levels[i]

2452 values = index.get_indexer(codes)

2453 # If nan should be dropped it will equal -1 here. We have to check which values

2454 # are not nan and equal -1, this means they are missing in the index

2455 nan_codes = isna(codes)

2456 values[(np.equal(nan_codes, False)) & (values == -1)] = -2

2457 if index.shape[0] == self.shape[0]:

2458 values[np.equal(nan_codes, True)] = -2

2459

2460 not_found = codes[values == -2]

2461 if len(not_found) != 0 and errors != "ignore":

2462 raise KeyError(f"labels {not_found} not found in level")

2463 mask = ~algos.isin(self.codes[i], values)

2464

2465 return self[mask]

2466

2467 def swaplevel(self, i=-2, j=-1) -> MultiIndex:

2468 """

2469 Swap level i with level j.

2470

2471 Calling this method does not change the ordering of the values.

2472

2473 Parameters

2474 ----------

2475 i : int, str, default -2

2476 First level of index to be swapped. Can pass level name as string.

2477 Type of parameters can be mixed.

2478 j : int, str, default -1

2479 Second level of index to be swapped. Can pass level name as string.

2480 Type of parameters can be mixed.

2481

2482 Returns

2483 -------

2484 MultiIndex

2485 A new MultiIndex.

2486

2487 See Also

2488 --------

2489 Series.swaplevel : Swap levels i and j in a MultiIndex.

2490 DataFrame.swaplevel : Swap levels i and j in a MultiIndex on a

2491 particular axis.

2492

2493 Examples

2494 --------

2495 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],

2496 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])

2497 >>> mi

2498 MultiIndex([('a', 'bb'),

2499 ('a', 'aa'),

2500 ('b', 'bb'),

2501 ('b', 'aa')],

2502 )

2503 >>> mi.swaplevel(0, 1)

2504 MultiIndex([('bb', 'a'),

2505 ('aa', 'a'),

2506 ('bb', 'b'),

2507 ('aa', 'b')],

2508 )

2509 """

2510 new_levels = list(self.levels)

2511 new_codes = list(self.codes)

2512 new_names = list(self.names)

2513

2514 i = self._get_level_number(i)

2515 j = self._get_level_number(j)

2516

2517 new_levels[i], new_levels[j] = new_levels[j], new_levels[i]

2518 new_codes[i], new_codes[j] = new_codes[j], new_codes[i]

2519 new_names[i], new_names[j] = new_names[j], new_names[i]

2520

2521 return MultiIndex(

2522 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False

2523 )

2524

2525 def reorder_levels(self, order) -> MultiIndex:

2526 """

2527 Rearrange levels using input order. May not drop or duplicate levels.

2528

2529 Parameters

2530 ----------

2531 order : list of int or list of str

2532 List representing new level order. Reference level by number

2533 (position) or by key (label).

2534

2535 Returns

2536 -------

2537 MultiIndex

2538

2539 Examples

2540 --------

2541 >>> mi = pd.MultiIndex.from_arrays([[1, 2], [3, 4]], names=['x', 'y'])

2542 >>> mi

2543 MultiIndex([(1, 3),

2544 (2, 4)],

2545 names=['x', 'y'])

2546

2547 >>> mi.reorder_levels(order=[1, 0])

2548 MultiIndex([(3, 1),

2549 (4, 2)],

2550 names=['y', 'x'])

2551

2552 >>> mi.reorder_levels(order=['y', 'x'])

2553 MultiIndex([(3, 1),

2554 (4, 2)],

2555 names=['y', 'x'])

2556 """

2557 order = [self._get_level_number(i) for i in order]

2558 result = self._reorder_ilevels(order)

2559 return result

2560

2561 def _reorder_ilevels(self, order) -> MultiIndex:

2562 if len(order) != self.nlevels:

2563 raise AssertionError(

2564 f"Length of order must be same as number of levels ({self.nlevels}), "

2565 f"got {len(order)}"

2566 )

2567 new_levels = [self.levels[i] for i in order]

2568 new_codes = [self.codes[i] for i in order]

2569 new_names = [self.names[i] for i in order]

2570

2571 return MultiIndex(

2572 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False

2573 )

2574

2575 def _recode_for_new_levels(

2576 self, new_levels, copy: bool = True

2577 ) -> Generator[np.ndarray, None, None]:

2578 if len(new_levels) > self.nlevels:

2579 raise AssertionError(

2580 f"Length of new_levels ({len(new_levels)}) "

2581 f"must be <= self.nlevels ({self.nlevels})"

2582 )

2583 for i in range(len(new_levels)):

2584 yield recode_for_categories(

2585 self.codes[i], self.levels[i], new_levels[i], copy=copy

2586 )

2587

2588 def _get_codes_for_sorting(self) -> list[Categorical]:

2589 """

2590 we are categorizing our codes by using the

2591 available categories (all, not just observed)

2592 excluding any missing ones (-1); this is in preparation

2593 for sorting, where we need to disambiguate that -1 is not

2594 a valid valid

2595 """

2596

2597 def cats(level_codes):

2598 return np.arange(

2599 np.array(level_codes).max() + 1 if len(level_codes) else 0,

2600 dtype=level_codes.dtype,

2601 )

2602

2603 return [

2604 Categorical.from_codes(level_codes, cats(level_codes), True, validate=False)

2605 for level_codes in self.codes

2606 ]

2607

2608 def sortlevel(

2609 self,

2610 level: IndexLabel = 0,

2611 ascending: bool | list[bool] = True,

2612 sort_remaining: bool = True,

2613 na_position: str = "first",

2614 ) -> tuple[MultiIndex, npt.NDArray[np.intp]]:

2615 """

2616 Sort MultiIndex at the requested level.

2617

2618 The result will respect the original ordering of the associated

2619 factor at that level.

2620

2621 Parameters

2622 ----------

2623 level : list-like, int or str, default 0

2624 If a string is given, must be a name of the level.

2625 If list-like must be names or ints of levels.

2626 ascending : bool, default True

2627 False to sort in descending order.

2628 Can also be a list to specify a directed ordering.

2629 sort_remaining : sort by the remaining levels after level

2630 na_position : {'first' or 'last'}, default 'first'

2631 Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at

2632 the end.

2633

2634 .. versionadded:: 2.1.0

2635

2636 Returns

2637 -------

2638 sorted_index : pd.MultiIndex

2639 Resulting index.

2640 indexer : np.ndarray[np.intp]

2641 Indices of output values in original index.

2642

2643 Examples

2644 --------

2645 >>> mi = pd.MultiIndex.from_arrays([[0, 0], [2, 1]])

2646 >>> mi

2647 MultiIndex([(0, 2),

2648 (0, 1)],

2649 )

2650

2651 >>> mi.sortlevel()

2652 (MultiIndex([(0, 1),

2653 (0, 2)],

2654 ), array([1, 0]))

2655

2656 >>> mi.sortlevel(sort_remaining=False)

2657 (MultiIndex([(0, 2),

2658 (0, 1)],

2659 ), array([0, 1]))

2660

2661 >>> mi.sortlevel(1)

2662 (MultiIndex([(0, 1),

2663 (0, 2)],

2664 ), array([1, 0]))

2665

2666 >>> mi.sortlevel(1, ascending=False)

2667 (MultiIndex([(0, 2),

2668 (0, 1)],

2669 ), array([0, 1]))

2670 """

2671 if not is_list_like(level):

2672 level = [level]

2673 # error: Item "Hashable" of "Union[Hashable, Sequence[Hashable]]" has

2674 # no attribute "__iter__" (not iterable)

2675 level = [

2676 self._get_level_number(lev) for lev in level # type: ignore[union-attr]

2677 ]

2678 sortorder = None

2679

2680 codes = [self.codes[lev] for lev in level]

2681 # we have a directed ordering via ascending

2682 if isinstance(ascending, list):

2683 if not len(level) == len(ascending):

2684 raise ValueError("level must have same length as ascending")

2685 elif sort_remaining:

2686 codes.extend(

2687 [self.codes[lev] for lev in range(len(self.levels)) if lev not in level]

2688 )

2689 else:

2690 sortorder = level[0]

2691

2692 indexer = lexsort_indexer(

2693 codes, orders=ascending, na_position=na_position, codes_given=True

2694 )

2695

2696 indexer = ensure_platform_int(indexer)

2697 new_codes = [level_codes.take(indexer) for level_codes in self.codes]

2698

2699 new_index = MultiIndex(

2700 codes=new_codes,

2701 levels=self.levels,

2702 names=self.names,

2703 sortorder=sortorder,

2704 verify_integrity=False,

2705 )

2706

2707 return new_index, indexer

2708

2709 def _wrap_reindex_result(self, target, indexer, preserve_names: bool):

2710 if not isinstance(target, MultiIndex):

2711 if indexer is None:

2712 target = self

2713 elif (indexer >= 0).all():

2714 target = self.take(indexer)

2715 else:

2716 try:

2717 target = MultiIndex.from_tuples(target)

2718 except TypeError:

2719 # not all tuples, see test_constructor_dict_multiindex_reindex_flat

2720 return target

2721

2722 target = self._maybe_preserve_names(target, preserve_names)

2723 return target

2724

2725 def _maybe_preserve_names(self, target: Index, preserve_names: bool) -> Index:

2726 if (

2727 preserve_names

2728 and target.nlevels == self.nlevels

2729 and target.names != self.names

2730 ):

2731 target = target.copy(deep=False)

2732 target.names = self.names

2733 return target

2734

2735 # --------------------------------------------------------------------

2736 # Indexing Methods

2737

2738 def _check_indexing_error(self, key) -> None:

2739 if not is_hashable(key) or is_iterator(key):

2740 # We allow tuples if they are hashable, whereas other Index

2741 # subclasses require scalar.

2742 # We have to explicitly exclude generators, as these are hashable.

2743 raise InvalidIndexError(key)

2744

2745 @cache_readonly

2746 def _should_fallback_to_positional(self) -> bool:

2747 """

2748 Should integer key(s) be treated as positional?

2749 """

2750 # GH#33355

2751 return self.levels[0]._should_fallback_to_positional

2752

2753 def _get_indexer_strict(

2754 self, key, axis_name: str

2755 ) -> tuple[Index, npt.NDArray[np.intp]]:

2756 keyarr = key

2757 if not isinstance(keyarr, Index):

2758 keyarr = com.asarray_tuplesafe(keyarr)

2759

2760 if len(keyarr) and not isinstance(keyarr[0], tuple):

2761 indexer = self._get_indexer_level_0(keyarr)

2762

2763 self._raise_if_missing(key, indexer, axis_name)

2764 return self[indexer], indexer

2765

2766 return super()._get_indexer_strict(key, axis_name)

2767

2768 def _raise_if_missing(self, key, indexer, axis_name: str) -> None:

2769 keyarr = key

2770 if not isinstance(key, Index):

2771 keyarr = com.asarray_tuplesafe(key)

2772

2773 if len(keyarr) and not isinstance(keyarr[0], tuple):

2774 # i.e. same condition for special case in MultiIndex._get_indexer_strict

2775

2776 mask = indexer == -1

2777 if mask.any():

2778 check = self.levels[0].get_indexer(keyarr)

2779 cmask = check == -1

2780 if cmask.any():

2781 raise KeyError(f"{keyarr[cmask]} not in index")

2782 # We get here when levels still contain values which are not

2783 # actually in Index anymore

2784 raise KeyError(f"{keyarr} not in index")

2785 else:

2786 return super()._raise_if_missing(key, indexer, axis_name)

2787

2788 def _get_indexer_level_0(self, target) -> npt.NDArray[np.intp]:

2789 """

2790 Optimized equivalent to `self.get_level_values(0).get_indexer_for(target)`.

2791 """

2792 lev = self.levels[0]

2793 codes = self._codes[0]

2794 cat = Categorical.from_codes(codes=codes, categories=lev, validate=False)

2795 ci = Index(cat)

2796 return ci.get_indexer_for(target)

2797

2798 def get_slice_bound(

2799 self,

2800 label: Hashable | Sequence[Hashable],

2801 side: Literal["left", "right"],

2802 ) -> int:

2803 """

2804 For an ordered MultiIndex, compute slice bound

2805 that corresponds to given label.

2806

2807 Returns leftmost (one-past-the-rightmost if `side=='right') position

2808 of given label.

2809

2810 Parameters

2811 ----------

2812 label : object or tuple of objects

2813 side : {'left', 'right'}

2814

2815 Returns

2816 -------

2817 int

2818 Index of label.

2819

2820 Notes

2821 -----

2822 This method only works if level 0 index of the MultiIndex is lexsorted.

2823

2824 Examples

2825 --------

2826 >>> mi = pd.MultiIndex.from_arrays([list('abbc'), list('gefd')])

2827

2828 Get the locations from the leftmost 'b' in the first level

2829 until the end of the multiindex:

2830

2831 >>> mi.get_slice_bound('b', side="left")

2832 1

2833

2834 Like above, but if you get the locations from the rightmost

2835 'b' in the first level and 'f' in the second level:

2836

2837 >>> mi.get_slice_bound(('b','f'), side="right")

2838 3

2839

2840 See Also

2841 --------

2842 MultiIndex.get_loc : Get location for a label or a tuple of labels.

2843 MultiIndex.get_locs : Get location for a label/slice/list/mask or a

2844 sequence of such.

2845 """

2846 if not isinstance(label, tuple):

2847 label = (label,)

2848 return self._partial_tup_index(label, side=side)

2849

2850 # pylint: disable-next=useless-parent-delegation

2851 def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]:

2852 """

2853 For an ordered MultiIndex, compute the slice locations for input

2854 labels.

2855

2856 The input labels can be tuples representing partial levels, e.g. for a

2857 MultiIndex with 3 levels, you can pass a single value (corresponding to

2858 the first level), or a 1-, 2-, or 3-tuple.

2859

2860 Parameters

2861 ----------

2862 start : label or tuple, default None

2863 If None, defaults to the beginning

2864 end : label or tuple

2865 If None, defaults to the end

2866 step : int or None

2867 Slice step

2868

2869 Returns

2870 -------

2871 (start, end) : (int, int)

2872

2873 Notes

2874 -----

2875 This method only works if the MultiIndex is properly lexsorted. So,

2876 if only the first 2 levels of a 3-level MultiIndex are lexsorted,

2877 you can only pass two levels to ``.slice_locs``.

2878

2879 Examples

2880 --------

2881 >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')],

2882 ... names=['A', 'B'])

2883

2884 Get the slice locations from the beginning of 'b' in the first level

2885 until the end of the multiindex:

2886

2887 >>> mi.slice_locs(start='b')

2888 (1, 4)

2889

2890 Like above, but stop at the end of 'b' in the first level and 'f' in

2891 the second level:

2892

2893 >>> mi.slice_locs(start='b', end=('b', 'f'))

2894 (1, 3)

2895

2896 See Also

2897 --------

2898 MultiIndex.get_loc : Get location for a label or a tuple of labels.

2899 MultiIndex.get_locs : Get location for a label/slice/list/mask or a

2900 sequence of such.

2901 """

2902 # This function adds nothing to its parent implementation (the magic

2903 # happens in get_slice_bound method), but it adds meaningful doc.

2904 return super().slice_locs(start, end, step)

2905

2906 def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left"):

2907 if len(tup) > self._lexsort_depth:

2908 raise UnsortedIndexError(

2909 f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth "

2910 f"({self._lexsort_depth})"

2911 )

2912

2913 n = len(tup)

2914 start, end = 0, len(self)

2915 zipped = zip(tup, self.levels, self.codes)

2916 for k, (lab, lev, level_codes) in enumerate(zipped):

2917 section = level_codes[start:end]

2918

2919 loc: npt.NDArray[np.intp] | np.intp | int

2920 if lab not in lev and not isna(lab):

2921 # short circuit

2922 try:

2923 loc = algos.searchsorted(lev, lab, side=side)

2924 except TypeError as err:

2925 # non-comparable e.g. test_slice_locs_with_type_mismatch

2926 raise TypeError(f"Level type mismatch: {lab}") from err

2927 if not is_integer(loc):

2928 # non-comparable level, e.g. test_groupby_example

2929 raise TypeError(f"Level type mismatch: {lab}")

2930 if side == "right" and loc >= 0:

2931 loc -= 1

2932 return start + algos.searchsorted(section, loc, side=side)

2933

2934 idx = self._get_loc_single_level_index(lev, lab)

2935 if isinstance(idx, slice) and k < n - 1:

2936 # Get start and end value from slice, necessary when a non-integer

2937 # interval is given as input GH#37707

2938 start = idx.start

2939 end = idx.stop

2940 elif k < n - 1:

2941 # error: Incompatible types in assignment (expression has type

2942 # "Union[ndarray[Any, dtype[signedinteger[Any]]]

2943 end = start + algos.searchsorted( # type: ignore[assignment]

2944 section, idx, side="right"

2945 )

2946 # error: Incompatible types in assignment (expression has type

2947 # "Union[ndarray[Any, dtype[signedinteger[Any]]]

2948 start = start + algos.searchsorted( # type: ignore[assignment]

2949 section, idx, side="left"

2950 )

2951 elif isinstance(idx, slice):

2952 idx = idx.start

2953 return start + algos.searchsorted(section, idx, side=side)

2954 else:

2955 return start + algos.searchsorted(section, idx, side=side)

2956

2957 def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int:

2958 """

2959 If key is NA value, location of index unify as -1.

2960

2961 Parameters

2962 ----------

2963 level_index: Index

2964 key : label

2965

2966 Returns

2967 -------

2968 loc : int

2969 If key is NA value, loc is -1

2970 Else, location of key in index.

2971

2972 See Also

2973 --------

2974 Index.get_loc : The get_loc method for (single-level) index.

2975 """

2976 if is_scalar(key) and isna(key):

2977 # TODO: need is_valid_na_for_dtype(key, level_index.dtype)

2978 return -1

2979 else:

2980 return level_index.get_loc(key)

2981

2982 def get_loc(self, key):

2983 """

2984 Get location for a label or a tuple of labels.

2985

2986 The location is returned as an integer/slice or boolean

2987 mask.

2988

2989 Parameters

2990 ----------

2991 key : label or tuple of labels (one for each level)

2992

2993 Returns

2994 -------

2995 int, slice object or boolean mask

2996 If the key is past the lexsort depth, the return may be a

2997 boolean mask array, otherwise it is always a slice or int.

2998

2999 See Also

3000 --------

3001 Index.get_loc : The get_loc method for (single-level) index.

3002 MultiIndex.slice_locs : Get slice location given start label(s) and

3003 end label(s).

3004 MultiIndex.get_locs : Get location for a label/slice/list/mask or a

3005 sequence of such.

3006

3007 Notes

3008 -----

3009 The key cannot be a slice, list of same-level labels, a boolean mask,

3010 or a sequence of such. If you want to use those, use

3011 :meth:`MultiIndex.get_locs` instead.

3012

3013 Examples

3014 --------

3015 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])

3016

3017 >>> mi.get_loc('b')

3018 slice(1, 3, None)

3019

3020 >>> mi.get_loc(('b', 'e'))

3021 1

3022 """

3023 self._check_indexing_error(key)

3024

3025 def _maybe_to_slice(loc):

3026 """convert integer indexer to boolean mask or slice if possible"""

3027 if not isinstance(loc, np.ndarray) or loc.dtype != np.intp:

3028 return loc

3029

3030 loc = lib.maybe_indices_to_slice(loc, len(self))

3031 if isinstance(loc, slice):

3032 return loc

3033

3034 mask = np.empty(len(self), dtype="bool")

3035 mask.fill(False)

3036 mask[loc] = True

3037 return mask

3038

3039 if not isinstance(key, tuple):

3040 loc = self._get_level_indexer(key, level=0)

3041 return _maybe_to_slice(loc)

3042

3043 keylen = len(key)

3044 if self.nlevels < keylen:

3045 raise KeyError(

3046 f"Key length ({keylen}) exceeds index depth ({self.nlevels})"

3047 )

3048

3049 if keylen == self.nlevels and self.is_unique:

3050 # TODO: what if we have an IntervalIndex level?

3051 # i.e. do we need _index_as_unique on that level?

3052 try:

3053 return self._engine.get_loc(key)

3054 except KeyError as err:

3055 raise KeyError(key) from err

3056 except TypeError:

3057 # e.g. test_partial_slicing_with_multiindex partial string slicing

3058 loc, _ = self.get_loc_level(key, list(range(self.nlevels)))

3059 return loc

3060

3061 # -- partial selection or non-unique index

3062 # break the key into 2 parts based on the lexsort_depth of the index;

3063 # the first part returns a continuous slice of the index; the 2nd part

3064 # needs linear search within the slice

3065 i = self._lexsort_depth

3066 lead_key, follow_key = key[:i], key[i:]

3067

3068 if not lead_key:

3069 start = 0

3070 stop = len(self)

3071 else:

3072 try:

3073 start, stop = self.slice_locs(lead_key, lead_key)

3074 except TypeError as err:

3075 # e.g. test_groupby_example key = ((0, 0, 1, 2), "new_col")

3076 # when self has 5 integer levels

3077 raise KeyError(key) from err

3078

3079 if start == stop:

3080 raise KeyError(key)

3081

3082 if not follow_key:

3083 return slice(start, stop)

3084

3085 warnings.warn(

3086 "indexing past lexsort depth may impact performance.",

3087 PerformanceWarning,

3088 stacklevel=find_stack_level(),

3089 )

3090

3091 loc = np.arange(start, stop, dtype=np.intp)

3092

3093 for i, k in enumerate(follow_key, len(lead_key)):

3094 mask = self.codes[i][loc] == self._get_loc_single_level_index(

3095 self.levels[i], k

3096 )

3097 if not mask.all():

3098 loc = loc[mask]

3099 if not len(loc):

3100 raise KeyError(key)

3101

3102 return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop)

3103

3104 def get_loc_level(self, key, level: IndexLabel = 0, drop_level: bool = True):

3105 """

3106 Get location and sliced index for requested label(s)/level(s).

3107

3108 Parameters

3109 ----------

3110 key : label or sequence of labels

3111 level : int/level name or list thereof, optional

3112 drop_level : bool, default True

3113 If ``False``, the resulting index will not drop any level.

3114

3115 Returns

3116 -------

3117 tuple

3118 A 2-tuple where the elements :

3119

3120 Element 0: int, slice object or boolean array.

3121

3122 Element 1: The resulting sliced multiindex/index. If the key

3123 contains all levels, this will be ``None``.

3124

3125 See Also

3126 --------

3127 MultiIndex.get_loc : Get location for a label or a tuple of labels.

3128 MultiIndex.get_locs : Get location for a label/slice/list/mask or a

3129 sequence of such.

3130

3131 Examples

3132 --------

3133 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')],

3134 ... names=['A', 'B'])

3135

3136 >>> mi.get_loc_level('b')

3137 (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B'))

3138

3139 >>> mi.get_loc_level('e', level='B')

3140 (array([False, True, False]), Index(['b'], dtype='object', name='A'))

3141

3142 >>> mi.get_loc_level(['b', 'e'])

3143 (1, None)

3144 """

3145 if not isinstance(level, (list, tuple)):

3146 level = self._get_level_number(level)

3147 else:

3148 level = [self._get_level_number(lev) for lev in level]

3149

3150 loc, mi = self._get_loc_level(key, level=level)

3151 if not drop_level:

3152 if lib.is_integer(loc):

3153 # Slice index must be an integer or None

3154 mi = self[loc : loc + 1]

3155 else:

3156 mi = self[loc]

3157 return loc, mi

3158

3159 def _get_loc_level(self, key, level: int | list[int] = 0):

3160 """

3161 get_loc_level but with `level` known to be positional, not name-based.

3162 """

3163

3164 # different name to distinguish from maybe_droplevels

3165 def maybe_mi_droplevels(indexer, levels):

3166 """

3167 If level does not exist or all levels were dropped, the exception

3168 has to be handled outside.

3169 """

3170 new_index = self[indexer]

3171

3172 for i in sorted(levels, reverse=True):

3173 new_index = new_index._drop_level_numbers([i])

3174

3175 return new_index

3176

3177 if isinstance(level, (tuple, list)):

3178 if len(key) != len(level):

3179 raise AssertionError(

3180 "Key for location must have same length as number of levels"

3181 )

3182 result = None

3183 for lev, k in zip(level, key):

3184 loc, new_index = self._get_loc_level(k, level=lev)

3185 if isinstance(loc, slice):

3186 mask = np.zeros(len(self), dtype=bool)

3187 mask[loc] = True

3188 loc = mask

3189 result = loc if result is None else result & loc

3190

3191 try:

3192 # FIXME: we should be only dropping levels on which we are

3193 # scalar-indexing

3194 mi = maybe_mi_droplevels(result, level)

3195 except ValueError:

3196 # droplevel failed because we tried to drop all levels,

3197 # i.e. len(level) == self.nlevels

3198 mi = self[result]

3199

3200 return result, mi

3201

3202 # kludge for #1796

3203 if isinstance(key, list):

3204 key = tuple(key)

3205

3206 if isinstance(key, tuple) and level == 0:

3207 try:

3208 # Check if this tuple is a single key in our first level

3209 if key in self.levels[0]:

3210 indexer = self._get_level_indexer(key, level=level)

3211 new_index = maybe_mi_droplevels(indexer, [0])

3212 return indexer, new_index

3213 except (TypeError, InvalidIndexError):

3214 pass

3215

3216 if not any(isinstance(k, slice) for k in key):

3217 if len(key) == self.nlevels and self.is_unique:

3218 # Complete key in unique index -> standard get_loc

3219 try:

3220 return (self._engine.get_loc(key), None)

3221 except KeyError as err:

3222 raise KeyError(key) from err

3223 except TypeError:

3224 # e.g. partial string indexing

3225 # test_partial_string_timestamp_multiindex

3226 pass

3227

3228 # partial selection

3229 indexer = self.get_loc(key)

3230 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)]

3231 if len(ilevels) == self.nlevels:

3232 if is_integer(indexer):

3233 # we are dropping all levels

3234 return indexer, None

3235

3236 # TODO: in some cases we still need to drop some levels,

3237 # e.g. test_multiindex_perf_warn

3238 # test_partial_string_timestamp_multiindex

3239 ilevels = [

3240 i

3241 for i in range(len(key))

3242 if (

3243 not isinstance(key[i], str)

3244 or not self.levels[i]._supports_partial_string_indexing

3245 )

3246 and key[i] != slice(None, None)

3247 ]

3248 if len(ilevels) == self.nlevels:

3249 # TODO: why?

3250 ilevels = []

3251 return indexer, maybe_mi_droplevels(indexer, ilevels)

3252

3253 else:

3254 indexer = None

3255 for i, k in enumerate(key):

3256 if not isinstance(k, slice):

3257 loc_level = self._get_level_indexer(k, level=i)

3258 if isinstance(loc_level, slice):

3259 if com.is_null_slice(loc_level) or com.is_full_slice(

3260 loc_level, len(self)

3261 ):

3262 # everything

3263 continue

3264

3265 # e.g. test_xs_IndexSlice_argument_not_implemented

3266 k_index = np.zeros(len(self), dtype=bool)

3267 k_index[loc_level] = True

3268

3269 else:

3270 k_index = loc_level

3271

3272 elif com.is_null_slice(k):

3273 # taking everything, does not affect `indexer` below

3274 continue

3275

3276 else:

3277 # FIXME: this message can be inaccurate, e.g.

3278 # test_series_varied_multiindex_alignment

3279 raise TypeError(f"Expected label or tuple of labels, got {key}")

3280

3281 if indexer is None:

3282 indexer = k_index

3283 else:

3284 indexer &= k_index

3285 if indexer is None:

3286 indexer = slice(None, None)

3287 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)]

3288 return indexer, maybe_mi_droplevels(indexer, ilevels)

3289 else:

3290 indexer = self._get_level_indexer(key, level=level)

3291 if (

3292 isinstance(key, str)

3293 and self.levels[level]._supports_partial_string_indexing

3294 ):

3295 # check to see if we did an exact lookup vs sliced

3296 check = self.levels[level].get_loc(key)

3297 if not is_integer(check):

3298 # e.g. test_partial_string_timestamp_multiindex

3299 return indexer, self[indexer]

3300

3301 try:

3302 result_index = maybe_mi_droplevels(indexer, [level])

3303 except ValueError:

3304 result_index = self[indexer]

3305

3306 return indexer, result_index

3307

3308 def _get_level_indexer(

3309 self, key, level: int = 0, indexer: npt.NDArray[np.bool_] | None = None

3310 ):

3311 # `level` kwarg is _always_ positional, never name

3312 # return a boolean array or slice showing where the key is

3313 # in the totality of values

3314 # if the indexer is provided, then use this

3315

3316 level_index = self.levels[level]

3317 level_codes = self.codes[level]

3318

3319 def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):

3320 # Compute a bool indexer to identify the positions to take.

3321 # If we have an existing indexer, we only need to examine the

3322 # subset of positions where the existing indexer is True.

3323 if indexer is not None:

3324 # we only need to look at the subset of codes where the

3325 # existing indexer equals True

3326 codes = codes[indexer]

3327

3328 if step is None or step == 1:

3329 new_indexer = (codes >= start) & (codes < stop)

3330 else:

3331 r = np.arange(start, stop, step, dtype=codes.dtype)

3332 new_indexer = algos.isin(codes, r)

3333

3334 if indexer is None:

3335 return new_indexer

3336

3337 indexer = indexer.copy()

3338 indexer[indexer] = new_indexer

3339 return indexer

3340

3341 if isinstance(key, slice):

3342 # handle a slice, returning a slice if we can

3343 # otherwise a boolean indexer

3344 step = key.step

3345 is_negative_step = step is not None and step < 0

3346

3347 try:

3348 if key.start is not None:

3349 start = level_index.get_loc(key.start)

3350 elif is_negative_step:

3351 start = len(level_index) - 1

3352 else:

3353 start = 0

3354

3355 if key.stop is not None:

3356 stop = level_index.get_loc(key.stop)

3357 elif is_negative_step:

3358 stop = 0

3359 elif isinstance(start, slice):

3360 stop = len(level_index)

3361 else:

3362 stop = len(level_index) - 1

3363 except KeyError:

3364 # we have a partial slice (like looking up a partial date

3365 # string)

3366 start = stop = level_index.slice_indexer(key.start, key.stop, key.step)

3367 step = start.step

3368

3369 if isinstance(start, slice) or isinstance(stop, slice):

3370 # we have a slice for start and/or stop

3371 # a partial date slicer on a DatetimeIndex generates a slice

3372 # note that the stop ALREADY includes the stopped point (if

3373 # it was a string sliced)

3374 start = getattr(start, "start", start)

3375 stop = getattr(stop, "stop", stop)

3376 return convert_indexer(start, stop, step)

3377

3378 elif level > 0 or self._lexsort_depth == 0 or step is not None:

3379 # need to have like semantics here to right

3380 # searching as when we are using a slice

3381 # so adjust the stop by 1 (so we include stop)

3382 stop = (stop - 1) if is_negative_step else (stop + 1)

3383 return convert_indexer(start, stop, step)

3384 else:

3385 # sorted, so can return slice object -> view

3386 i = algos.searchsorted(level_codes, start, side="left")

3387 j = algos.searchsorted(level_codes, stop, side="right")

3388 return slice(i, j, step)

3389

3390 else:

3391 idx = self._get_loc_single_level_index(level_index, key)

3392

3393 if level > 0 or self._lexsort_depth == 0:

3394 # Desired level is not sorted

3395 if isinstance(idx, slice):

3396 # test_get_loc_partial_timestamp_multiindex

3397 locs = (level_codes >= idx.start) & (level_codes < idx.stop)

3398 return locs

3399

3400 locs = np.asarray(level_codes == idx, dtype=bool)

3401

3402 if not locs.any():

3403 # The label is present in self.levels[level] but unused:

3404 raise KeyError(key)

3405 return locs

3406

3407 if isinstance(idx, slice):

3408 # e.g. test_partial_string_timestamp_multiindex

3409 start = algos.searchsorted(level_codes, idx.start, side="left")

3410 # NB: "left" here bc of slice semantics

3411 end = algos.searchsorted(level_codes, idx.stop, side="left")

3412 else:

3413 start = algos.searchsorted(level_codes, idx, side="left")

3414 end = algos.searchsorted(level_codes, idx, side="right")

3415

3416 if start == end:

3417 # The label is present in self.levels[level] but unused:

3418 raise KeyError(key)

3419 return slice(start, end)

3420

3421 def get_locs(self, seq) -> npt.NDArray[np.intp]:

3422 """

3423 Get location for a sequence of labels.

3424

3425 Parameters

3426 ----------

3427 seq : label, slice, list, mask or a sequence of such

3428 You should use one of the above for each level.

3429 If a level should not be used, set it to ``slice(None)``.

3430

3431 Returns

3432 -------

3433 numpy.ndarray

3434 NumPy array of integers suitable for passing to iloc.

3435

3436 See Also

3437 --------

3438 MultiIndex.get_loc : Get location for a label or a tuple of labels.

3439 MultiIndex.slice_locs : Get slice location given start label(s) and

3440 end label(s).

3441

3442 Examples

3443 --------

3444 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])

3445

3446 >>> mi.get_locs('b') # doctest: +SKIP

3447 array([1, 2], dtype=int64)

3448

3449 >>> mi.get_locs([slice(None), ['e', 'f']]) # doctest: +SKIP

3450 array([1, 2], dtype=int64)

3451

3452 >>> mi.get_locs([[True, False, True], slice('e', 'f')]) # doctest: +SKIP

3453 array([2], dtype=int64)

3454 """

3455

3456 # must be lexsorted to at least as many levels

3457 true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s]

3458 if true_slices and true_slices[-1] >= self._lexsort_depth:

3459 raise UnsortedIndexError(

3460 "MultiIndex slicing requires the index to be lexsorted: slicing "

3461 f"on levels {true_slices}, lexsort depth {self._lexsort_depth}"

3462 )

3463

3464 if any(x is Ellipsis for x in seq):

3465 raise NotImplementedError(

3466 "MultiIndex does not support indexing with Ellipsis"

3467 )

3468

3469 n = len(self)

3470

3471 def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]:

3472 if isinstance(indexer, slice):

3473 new_indexer = np.zeros(n, dtype=np.bool_)

3474 new_indexer[indexer] = True

3475 return new_indexer

3476 return indexer

3477

3478 # a bool indexer for the positions we want to take

3479 indexer: npt.NDArray[np.bool_] | None = None

3480

3481 for i, k in enumerate(seq):

3482 lvl_indexer: npt.NDArray[np.bool_] | slice | None = None

3483

3484 if com.is_bool_indexer(k):

3485 if len(k) != n:

3486 raise ValueError(

3487 "cannot index with a boolean indexer that "

3488 "is not the same length as the index"

3489 )

3490 lvl_indexer = np.asarray(k)

3491 if indexer is None:

3492 lvl_indexer = lvl_indexer.copy()

3493

3494 elif is_list_like(k):

3495 # a collection of labels to include from this level (these are or'd)

3496

3497 # GH#27591 check if this is a single tuple key in the level

3498 try:

3499 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer)

3500 except (InvalidIndexError, TypeError, KeyError) as err:

3501 # InvalidIndexError e.g. non-hashable, fall back to treating

3502 # this as a sequence of labels

3503 # KeyError it can be ambiguous if this is a label or sequence

3504 # of labels

3505 # github.com/pandas-dev/pandas/issues/39424#issuecomment-871626708

3506 for x in k:

3507 if not is_hashable(x):

3508 # e.g. slice

3509 raise err

3510 # GH 39424: Ignore not founds

3511 # GH 42351: No longer ignore not founds & enforced in 2.0

3512 # TODO: how to handle IntervalIndex level? (no test cases)

3513 item_indexer = self._get_level_indexer(

3514 x, level=i, indexer=indexer

3515 )

3516 if lvl_indexer is None:

3517 lvl_indexer = _to_bool_indexer(item_indexer)

3518 elif isinstance(item_indexer, slice):

3519 lvl_indexer[item_indexer] = True # type: ignore[index]

3520 else:

3521 lvl_indexer |= item_indexer

3522

3523 if lvl_indexer is None:

3524 # no matches we are done

3525 # test_loc_getitem_duplicates_multiindex_empty_indexer

3526 return np.array([], dtype=np.intp)

3527

3528 elif com.is_null_slice(k):

3529 # empty slice

3530 if indexer is None and i == len(seq) - 1:

3531 return np.arange(n, dtype=np.intp)

3532 continue

3533

3534 else:

3535 # a slice or a single label

3536 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer)

3537

3538 # update indexer

3539 lvl_indexer = _to_bool_indexer(lvl_indexer)

3540 if indexer is None:

3541 indexer = lvl_indexer

3542 else:

3543 indexer &= lvl_indexer

3544 if not np.any(indexer) and np.any(lvl_indexer):

3545 raise KeyError(seq)

3546

3547 # empty indexer

3548 if indexer is None:

3549 return np.array([], dtype=np.intp)

3550

3551 pos_indexer = indexer.nonzero()[0]

3552 return self._reorder_indexer(seq, pos_indexer)

3553

3554 # --------------------------------------------------------------------

3555

3556 def _reorder_indexer(

3557 self,

3558 seq: tuple[Scalar | Iterable | AnyArrayLike, ...],

3559 indexer: npt.NDArray[np.intp],

3560 ) -> npt.NDArray[np.intp]:

3561 """

3562 Reorder an indexer of a MultiIndex (self) so that the labels are in the

3563 same order as given in seq

3564

3565 Parameters

3566 ----------

3567 seq : label/slice/list/mask or a sequence of such

3568 indexer: a position indexer of self

3569

3570 Returns

3571 -------

3572 indexer : a sorted position indexer of self ordered as seq

3573 """

3574

3575 # check if sorting is necessary

3576 need_sort = False

3577 for i, k in enumerate(seq):

3578 if com.is_null_slice(k) or com.is_bool_indexer(k) or is_scalar(k):

3579 pass

3580 elif is_list_like(k):

3581 if len(k) <= 1: # type: ignore[arg-type]

3582 pass

3583 elif self._is_lexsorted():

3584 # If the index is lexsorted and the list_like label

3585 # in seq are sorted then we do not need to sort

3586 k_codes = self.levels[i].get_indexer(k)

3587 k_codes = k_codes[k_codes >= 0] # Filter absent keys

3588 # True if the given codes are not ordered

3589 need_sort = (k_codes[:-1] > k_codes[1:]).any()

3590 else:

3591 need_sort = True

3592 elif isinstance(k, slice):

3593 if self._is_lexsorted():

3594 need_sort = k.step is not None and k.step < 0

3595 else:

3596 need_sort = True

3597 else:

3598 need_sort = True

3599 if need_sort:

3600 break

3601 if not need_sort:

3602 return indexer

3603

3604 n = len(self)

3605 keys: tuple[np.ndarray, ...] = ()

3606 # For each level of the sequence in seq, map the level codes with the

3607 # order they appears in a list-like sequence

3608 # This mapping is then use to reorder the indexer

3609 for i, k in enumerate(seq):

3610 if is_scalar(k):

3611 # GH#34603 we want to treat a scalar the same as an all equal list

3612 k = [k]

3613 if com.is_bool_indexer(k):

3614 new_order = np.arange(n)[indexer]

3615 elif is_list_like(k):

3616 # Generate a map with all level codes as sorted initially

3617 if not isinstance(k, (np.ndarray, ExtensionArray, Index, ABCSeries)):

3618 k = sanitize_array(k, None)

3619 k = algos.unique(k)

3620 key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len(

3621 self.levels[i]

3622 )

3623 # Set order as given in the indexer list

3624 level_indexer = self.levels[i].get_indexer(k)

3625 level_indexer = level_indexer[level_indexer >= 0] # Filter absent keys

3626 key_order_map[level_indexer] = np.arange(len(level_indexer))

3627

3628 new_order = key_order_map[self.codes[i][indexer]]

3629 elif isinstance(k, slice) and k.step is not None and k.step < 0:

3630 # flip order for negative step

3631 new_order = np.arange(n)[::-1][indexer]

3632 elif isinstance(k, slice) and k.start is None and k.stop is None:

3633 # slice(None) should not determine order GH#31330

3634 new_order = np.ones((n,), dtype=np.intp)[indexer]

3635 else:

3636 # For all other case, use the same order as the level

3637 new_order = np.arange(n)[indexer]

3638 keys = (new_order,) + keys

3639

3640 # Find the reordering using lexsort on the keys mapping

3641 ind = np.lexsort(keys)

3642 return indexer[ind]

3643

3644 def truncate(self, before=None, after=None) -> MultiIndex:

3645 """

3646 Slice index between two labels / tuples, return new MultiIndex.

3647

3648 Parameters

3649 ----------

3650 before : label or tuple, can be partial. Default None

3651 None defaults to start.

3652 after : label or tuple, can be partial. Default None

3653 None defaults to end.

3654

3655 Returns

3656 -------

3657 MultiIndex

3658 The truncated MultiIndex.

3659

3660 Examples

3661 --------

3662 >>> mi = pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['x', 'y', 'z']])

3663 >>> mi

3664 MultiIndex([('a', 'x'), ('b', 'y'), ('c', 'z')],

3665 )

3666 >>> mi.truncate(before='a', after='b')

3667 MultiIndex([('a', 'x'), ('b', 'y')],

3668 )

3669 """

3670 if after and before and after < before:

3671 raise ValueError("after < before")

3672

3673 i, j = self.levels[0].slice_locs(before, after)

3674 left, right = self.slice_locs(before, after)

3675

3676 new_levels = list(self.levels)

3677 new_levels[0] = new_levels[0][i:j]

3678

3679 new_codes = [level_codes[left:right] for level_codes in self.codes]

3680 new_codes[0] = new_codes[0] - i

3681

3682 return MultiIndex(

3683 levels=new_levels,

3684 codes=new_codes,

3685 names=self._names,

3686 verify_integrity=False,

3687 )

3688

3689 def equals(self, other: object) -> bool:

3690 """

3691 Determines if two MultiIndex objects have the same labeling information

3692 (the levels themselves do not necessarily have to be the same)

3693

3694 See Also

3695 --------

3696 equal_levels

3697 """

3698 if self.is_(other):

3699 return True

3700

3701 if not isinstance(other, Index):

3702 return False

3703

3704 if len(self) != len(other):

3705 return False

3706

3707 if not isinstance(other, MultiIndex):

3708 # d-level MultiIndex can equal d-tuple Index

3709 if not self._should_compare(other):

3710 # object Index or Categorical[object] may contain tuples

3711 return False

3712 return array_equivalent(self._values, other._values)

3713

3714 if self.nlevels != other.nlevels:

3715 return False

3716

3717 for i in range(self.nlevels):

3718 self_codes = self.codes[i]

3719 other_codes = other.codes[i]

3720 self_mask = self_codes == -1

3721 other_mask = other_codes == -1

3722 if not np.array_equal(self_mask, other_mask):

3723 return False

3724 self_codes = self_codes[~self_mask]

3725 self_values = self.levels[i]._values.take(self_codes)

3726

3727 other_codes = other_codes[~other_mask]

3728 other_values = other.levels[i]._values.take(other_codes)

3729

3730 # since we use NaT both datetime64 and timedelta64 we can have a

3731 # situation where a level is typed say timedelta64 in self (IOW it

3732 # has other values than NaT) but types datetime64 in other (where

3733 # its all NaT) but these are equivalent

3734 if len(self_values) == 0 and len(other_values) == 0:

3735 continue

3736

3737 if not isinstance(self_values, np.ndarray):

3738 # i.e. ExtensionArray

3739 if not self_values.equals(other_values):

3740 return False

3741 elif not isinstance(other_values, np.ndarray):

3742 # i.e. other is ExtensionArray

3743 if not other_values.equals(self_values):

3744 return False

3745 else:

3746 if not array_equivalent(self_values, other_values):

3747 return False

3748

3749 return True

3750

3751 def equal_levels(self, other: MultiIndex) -> bool:

3752 """

3753 Return True if the levels of both MultiIndex objects are the same

3754

3755 """

3756 if self.nlevels != other.nlevels:

3757 return False

3758

3759 for i in range(self.nlevels):

3760 if not self.levels[i].equals(other.levels[i]):

3761 return False

3762 return True

3763

3764 # --------------------------------------------------------------------

3765 # Set Methods

3766

3767 def _union(self, other, sort) -> MultiIndex:

3768 other, result_names = self._convert_can_do_setop(other)

3769 if other.has_duplicates:

3770 # This is only necessary if other has dupes,

3771 # otherwise difference is faster

3772 result = super()._union(other, sort)

3773

3774 if isinstance(result, MultiIndex):

3775 return result

3776 return MultiIndex.from_arrays(

3777 zip(*result), sortorder=None, names=result_names

3778 )

3779

3780 else:

3781 right_missing = other.difference(self, sort=False)

3782 if len(right_missing):

3783 result = self.append(right_missing)

3784 else:

3785 result = self._get_reconciled_name_object(other)

3786

3787 if sort is not False:

3788 try:

3789 result = result.sort_values()

3790 except TypeError:

3791 if sort is True:

3792 raise

3793 warnings.warn(

3794 "The values in the array are unorderable. "

3795 "Pass `sort=False` to suppress this warning.",

3796 RuntimeWarning,

3797 stacklevel=find_stack_level(),

3798 )

3799 return result

3800

3801 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:

3802 return is_object_dtype(dtype)

3803

3804 def _get_reconciled_name_object(self, other) -> MultiIndex:

3805 """

3806 If the result of a set operation will be self,

3807 return self, unless the names change, in which

3808 case make a shallow copy of self.

3809 """

3810 names = self._maybe_match_names(other)

3811 if self.names != names:

3812 # error: Cannot determine type of "rename"

3813 return self.rename(names) # type: ignore[has-type]

3814 return self

3815

3816 def _maybe_match_names(self, other):

3817 """

3818 Try to find common names to attach to the result of an operation between

3819 a and b. Return a consensus list of names if they match at least partly

3820 or list of None if they have completely different names.

3821 """

3822 if len(self.names) != len(other.names):

3823 return [None] * len(self.names)

3824 names = []

3825 for a_name, b_name in zip(self.names, other.names):

3826 if a_name == b_name:

3827 names.append(a_name)

3828 else:

3829 # TODO: what if they both have np.nan for their names?

3830 names.append(None)

3831 return names

3832

3833 def _wrap_intersection_result(self, other, result) -> MultiIndex:

3834 _, result_names = self._convert_can_do_setop(other)

3835 return result.set_names(result_names)

3836

3837 def _wrap_difference_result(self, other, result: MultiIndex) -> MultiIndex:

3838 _, result_names = self._convert_can_do_setop(other)

3839

3840 if len(result) == 0:

3841 return result.remove_unused_levels().set_names(result_names)

3842 else:

3843 return result.set_names(result_names)

3844

3845 def _convert_can_do_setop(self, other):

3846 result_names = self.names

3847

3848 if not isinstance(other, Index):

3849 if len(other) == 0:

3850 return self[:0], self.names

3851 else:

3852 msg = "other must be a MultiIndex or a list of tuples"

3853 try:

3854 other = MultiIndex.from_tuples(other, names=self.names)

3855 except (ValueError, TypeError) as err:

3856 # ValueError raised by tuples_to_object_array if we

3857 # have non-object dtype

3858 raise TypeError(msg) from err

3859 else:

3860 result_names = get_unanimous_names(self, other)

3861

3862 return other, result_names

3863

3864 # --------------------------------------------------------------------

3865

3866 @doc(Index.astype)

3867 def astype(self, dtype, copy: bool = True):

3868 dtype = pandas_dtype(dtype)

3869 if isinstance(dtype, CategoricalDtype):

3870 msg = "> 1 ndim Categorical are not supported at this time"

3871 raise NotImplementedError(msg)

3872 if not is_object_dtype(dtype):

3873 raise TypeError(

3874 "Setting a MultiIndex dtype to anything other than object "

3875 "is not supported"

3876 )

3877 if copy is True:

3878 return self._view()

3879 return self

3880

3881 def _validate_fill_value(self, item):

3882 if isinstance(item, MultiIndex):

3883 # GH#43212

3884 if item.nlevels != self.nlevels:

3885 raise ValueError("Item must have length equal to number of levels.")

3886 return item._values

3887 elif not isinstance(item, tuple):

3888 # Pad the key with empty strings if lower levels of the key

3889 # aren't specified:

3890 item = (item,) + ("",) * (self.nlevels - 1)

3891 elif len(item) != self.nlevels:

3892 raise ValueError("Item must have length equal to number of levels.")

3893 return item

3894

3895 def putmask(self, mask, value: MultiIndex) -> MultiIndex:

3896 """

3897 Return a new MultiIndex of the values set with the mask.

3898

3899 Parameters

3900 ----------

3901 mask : array like

3902 value : MultiIndex

3903 Must either be the same length as self or length one

3904

3905 Returns

3906 -------

3907 MultiIndex

3908 """

3909 mask, noop = validate_putmask(self, mask)

3910 if noop:

3911 return self.copy()

3912

3913 if len(mask) == len(value):

3914 subset = value[mask].remove_unused_levels()

3915 else:

3916 subset = value.remove_unused_levels()

3917

3918 new_levels = []

3919 new_codes = []

3920

3921 for i, (value_level, level, level_codes) in enumerate(

3922 zip(subset.levels, self.levels, self.codes)

3923 ):

3924 new_level = level.union(value_level, sort=False)

3925 value_codes = new_level.get_indexer_for(subset.get_level_values(i))

3926 new_code = ensure_int64(level_codes)

3927 new_code[mask] = value_codes

3928 new_levels.append(new_level)

3929 new_codes.append(new_code)

3930

3931 return MultiIndex(

3932 levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False

3933 )

3934

3935 def insert(self, loc: int, item) -> MultiIndex:

3936 """

3937 Make new MultiIndex inserting new item at location

3938

3939 Parameters

3940 ----------

3941 loc : int

3942 item : tuple

3943 Must be same length as number of levels in the MultiIndex

3944

3945 Returns

3946 -------

3947 new_index : Index

3948 """

3949 item = self._validate_fill_value(item)

3950

3951 new_levels = []

3952 new_codes = []

3953 for k, level, level_codes in zip(item, self.levels, self.codes):

3954 if k not in level:

3955 # have to insert into level

3956 # must insert at end otherwise you have to recompute all the

3957 # other codes

3958 lev_loc = len(level)

3959 level = level.insert(lev_loc, k)

3960 else:

3961 lev_loc = level.get_loc(k)

3962

3963 new_levels.append(level)

3964 new_codes.append(np.insert(ensure_int64(level_codes), loc, lev_loc))

3965

3966 return MultiIndex(

3967 levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False

3968 )

3969

3970 def delete(self, loc) -> MultiIndex:

3971 """

3972 Make new index with passed location deleted

3973

3974 Returns

3975 -------

3976 new_index : MultiIndex

3977 """

3978 new_codes = [np.delete(level_codes, loc) for level_codes in self.codes]

3979 return MultiIndex(

3980 levels=self.levels,

3981 codes=new_codes,

3982 names=self.names,

3983 verify_integrity=False,

3984 )

3985

3986 @doc(Index.isin)

3987 def isin(self, values, level=None) -> npt.NDArray[np.bool_]:

3988 if isinstance(values, Generator):

3989 values = list(values)

3990

3991 if level is None:

3992 if len(values) == 0:

3993 return np.zeros((len(self),), dtype=np.bool_)

3994 if not isinstance(values, MultiIndex):

3995 values = MultiIndex.from_tuples(values)

3996 return values.unique().get_indexer_for(self) != -1

3997 else:

3998 num = self._get_level_number(level)

3999 levs = self.get_level_values(num)

4000

4001 if levs.size == 0:

4002 return np.zeros(len(levs), dtype=np.bool_)

4003 return levs.isin(values)

4004

4005 # error: Incompatible types in assignment (expression has type overloaded function,

4006 # base class "Index" defined the type as "Callable[[Index, Any, bool], Any]")

4007 rename = Index.set_names # type: ignore[assignment]

4008

4009 # ---------------------------------------------------------------

4010 # Arithmetic/Numeric Methods - Disabled

4011

4012 __add__ = make_invalid_op("__add__")

4013 __radd__ = make_invalid_op("__radd__")

4014 __iadd__ = make_invalid_op("__iadd__")

4015 __sub__ = make_invalid_op("__sub__")

4016 __rsub__ = make_invalid_op("__rsub__")

4017 __isub__ = make_invalid_op("__isub__")

4018 __pow__ = make_invalid_op("__pow__")

4019 __rpow__ = make_invalid_op("__rpow__")

4020 __mul__ = make_invalid_op("__mul__")

4021 __rmul__ = make_invalid_op("__rmul__")

4022 __floordiv__ = make_invalid_op("__floordiv__")

4023 __rfloordiv__ = make_invalid_op("__rfloordiv__")

4024 __truediv__ = make_invalid_op("__truediv__")

4025 __rtruediv__ = make_invalid_op("__rtruediv__")

4026 __mod__ = make_invalid_op("__mod__")

4027 __rmod__ = make_invalid_op("__rmod__")

4028 __divmod__ = make_invalid_op("__divmod__")

4029 __rdivmod__ = make_invalid_op("__rdivmod__")

4030 # Unary methods disabled

4031 __neg__ = make_invalid_op("__neg__")

4032 __pos__ = make_invalid_op("__pos__")

4033 __abs__ = make_invalid_op("__abs__")

4034 __invert__ = make_invalid_op("__invert__")

4035

4036

4037def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int:

4038 """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted."""

4039 int64_codes = [ensure_int64(level_codes) for level_codes in codes]

4040 for k in range(nlevels, 0, -1):

4041 if libalgos.is_lexsorted(int64_codes[:k]):

4042 return k

4043 return 0

4044

4045

4046def sparsify_labels(label_list, start: int = 0, sentinel: object = ""):

4047 pivoted = list(zip(*label_list))

4048 k = len(label_list)

4049

4050 result = pivoted[: start + 1]

4051 prev = pivoted[start]

4052

4053 for cur in pivoted[start + 1 :]:

4054 sparse_cur = []

4055

4056 for i, (p, t) in enumerate(zip(prev, cur)):

4057 if i == k - 1:

4058 sparse_cur.append(t)

4059 # error: Argument 1 to "append" of "list" has incompatible

4060 # type "list[Any]"; expected "tuple[Any, ...]"

4061 result.append(sparse_cur) # type: ignore[arg-type]

4062 break

4063

4064 if p == t:

4065 sparse_cur.append(sentinel)

4066 else:

4067 sparse_cur.extend(cur[i:])

4068 # error: Argument 1 to "append" of "list" has incompatible

4069 # type "list[Any]"; expected "tuple[Any, ...]"

4070 result.append(sparse_cur) # type: ignore[arg-type]

4071 break

4072

4073 prev = cur

4074

4075 return list(zip(*result))

4076

4077

4078def _get_na_rep(dtype: DtypeObj) -> str:

4079 if isinstance(dtype, ExtensionDtype):

4080 return f"{dtype.na_value}"

4081 else:

4082 dtype_type = dtype.type

4083

4084 return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype_type, "NaN")

4085

4086

4087def maybe_droplevels(index: Index, key) -> Index:

4088 """

4089 Attempt to drop level or levels from the given index.

4090

4091 Parameters

4092 ----------

4093 index: Index

4094 key : scalar or tuple

4095

4096 Returns

4097 -------

4098 Index

4099 """

4100 # drop levels

4101 original_index = index

4102 if isinstance(key, tuple):

4103 # Caller is responsible for ensuring the key is not an entry in the first

4104 # level of the MultiIndex.

4105 for _ in key:

4106 try:

4107 index = index._drop_level_numbers([0])

4108 except ValueError:

4109 # we have dropped too much, so back out

4110 return original_index

4111 else:

4112 try:

4113 index = index._drop_level_numbers([0])

4114 except ValueError:

4115 pass

4116

4117 return index

4118

4119

4120def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray:

4121 """

4122 Coerce the array-like indexer to the smallest integer dtype that can encode all

4123 of the given categories.

4124

4125 Parameters

4126 ----------

4127 array_like : array-like

4128 categories : array-like

4129 copy : bool

4130

4131 Returns

4132 -------

4133 np.ndarray

4134 Non-writeable.

4135 """

4136 array_like = coerce_indexer_dtype(array_like, categories)

4137 if copy:

4138 array_like = array_like.copy()

4139 array_like.flags.writeable = False

4140 return array_like

4141

4142

4143def _require_listlike(level, arr, arrname: str):

4144 """

4145 Ensure that level is either None or listlike, and arr is list-of-listlike.

4146 """

4147 if level is not None and not is_list_like(level):

4148 if not is_list_like(arr):

4149 raise TypeError(f"{arrname} must be list-like")

4150 if len(arr) > 0 and is_list_like(arr[0]):

4151 raise TypeError(f"{arrname} must be list-like")

4152 level = [level]

4153 arr = [arr]

4154 elif level is None or is_list_like(level):

4155 if not is_list_like(arr) or not is_list_like(arr[0]):

4156 raise TypeError(f"{arrname} must be list of lists-like")

4157 return level, arr