Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/indexes/multi.py: 16%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1380 statements  

1from __future__ import annotations 

2 

3from functools import wraps 

4from sys import getsizeof 

5from typing import ( 

6 TYPE_CHECKING, 

7 Any, 

8 Callable, 

9 Collection, 

10 Generator, 

11 Hashable, 

12 Iterable, 

13 List, 

14 Literal, 

15 Sequence, 

16 Tuple, 

17 cast, 

18) 

19import warnings 

20 

21import numpy as np 

22 

23from pandas._config import get_option 

24 

25from pandas._libs import ( 

26 algos as libalgos, 

27 index as libindex, 

28 lib, 

29) 

30from pandas._libs.hashtable import duplicated 

31from pandas._typing import ( 

32 AnyAll, 

33 AnyArrayLike, 

34 Axis, 

35 DropKeep, 

36 DtypeObj, 

37 F, 

38 IgnoreRaise, 

39 IndexLabel, 

40 Scalar, 

41 Shape, 

42 npt, 

43) 

44from pandas.compat.numpy import function as nv 

45from pandas.errors import ( 

46 InvalidIndexError, 

47 PerformanceWarning, 

48 UnsortedIndexError, 

49) 

50from pandas.util._decorators import ( 

51 Appender, 

52 cache_readonly, 

53 doc, 

54) 

55from pandas.util._exceptions import find_stack_level 

56 

57from pandas.core.dtypes.cast import coerce_indexer_dtype 

58from pandas.core.dtypes.common import ( 

59 ensure_int64, 

60 ensure_platform_int, 

61 is_categorical_dtype, 

62 is_extension_array_dtype, 

63 is_hashable, 

64 is_integer, 

65 is_iterator, 

66 is_list_like, 

67 is_object_dtype, 

68 is_scalar, 

69 pandas_dtype, 

70) 

71from pandas.core.dtypes.dtypes import ExtensionDtype 

72from pandas.core.dtypes.generic import ( 

73 ABCDataFrame, 

74 ABCDatetimeIndex, 

75 ABCTimedeltaIndex, 

76) 

77from pandas.core.dtypes.missing import ( 

78 array_equivalent, 

79 isna, 

80) 

81 

82import pandas.core.algorithms as algos 

83from pandas.core.array_algos.putmask import validate_putmask 

84from pandas.core.arrays import Categorical 

85from pandas.core.arrays.categorical import factorize_from_iterables 

86import pandas.core.common as com 

87import pandas.core.indexes.base as ibase 

88from pandas.core.indexes.base import ( 

89 Index, 

90 _index_shared_docs, 

91 ensure_index, 

92 get_unanimous_names, 

93) 

94from pandas.core.indexes.frozen import FrozenList 

95from pandas.core.ops.invalid import make_invalid_op 

96from pandas.core.sorting import ( 

97 get_group_index, 

98 indexer_from_factorized, 

99 lexsort_indexer, 

100) 

101 

102from pandas.io.formats.printing import pprint_thing 

103 

104if TYPE_CHECKING: 

105 from pandas import ( 

106 CategoricalIndex, 

107 DataFrame, 

108 Series, 

109 ) 

110 

111_index_doc_kwargs = dict(ibase._index_doc_kwargs) 

112_index_doc_kwargs.update( 

113 {"klass": "MultiIndex", "target_klass": "MultiIndex or list of tuples"} 

114) 

115 

116 

117class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine): 

118 """ 

119 This class manages a MultiIndex by mapping label combinations to positive 

120 integers. 

121 """ 

122 

123 _base = libindex.UInt64Engine 

124 

125 def _codes_to_ints(self, codes): 

126 """ 

127 Transform combination(s) of uint64 in one uint64 (each), in a strictly 

128 monotonic way (i.e. respecting the lexicographic order of integer 

129 combinations): see BaseMultiIndexCodesEngine documentation. 

130 

131 Parameters 

132 ---------- 

133 codes : 1- or 2-dimensional array of dtype uint64 

134 Combinations of integers (one per row) 

135 

136 Returns 

137 ------- 

138 scalar or 1-dimensional array, of dtype uint64 

139 Integer(s) representing one combination (each). 

140 """ 

141 # Shift the representation of each level by the pre-calculated number 

142 # of bits: 

143 codes <<= self.offsets 

144 

145 # Now sum and OR are in fact interchangeable. This is a simple 

146 # composition of the (disjunct) significant bits of each level (i.e. 

147 # each column in "codes") in a single positive integer: 

148 if codes.ndim == 1: 

149 # Single key 

150 return np.bitwise_or.reduce(codes) 

151 

152 # Multiple keys 

153 return np.bitwise_or.reduce(codes, axis=1) 

154 

155 

156class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine): 

157 """ 

158 This class manages those (extreme) cases in which the number of possible 

159 label combinations overflows the 64 bits integers, and uses an ObjectEngine 

160 containing Python integers. 

161 """ 

162 

163 _base = libindex.ObjectEngine 

164 

165 def _codes_to_ints(self, codes): 

166 """ 

167 Transform combination(s) of uint64 in one Python integer (each), in a 

168 strictly monotonic way (i.e. respecting the lexicographic order of 

169 integer combinations): see BaseMultiIndexCodesEngine documentation. 

170 

171 Parameters 

172 ---------- 

173 codes : 1- or 2-dimensional array of dtype uint64 

174 Combinations of integers (one per row) 

175 

176 Returns 

177 ------- 

178 int, or 1-dimensional array of dtype object 

179 Integer(s) representing one combination (each). 

180 """ 

181 # Shift the representation of each level by the pre-calculated number 

182 # of bits. Since this can overflow uint64, first make sure we are 

183 # working with Python integers: 

184 codes = codes.astype("object") << self.offsets 

185 

186 # Now sum and OR are in fact interchangeable. This is a simple 

187 # composition of the (disjunct) significant bits of each level (i.e. 

188 # each column in "codes") in a single positive integer (per row): 

189 if codes.ndim == 1: 

190 # Single key 

191 return np.bitwise_or.reduce(codes) 

192 

193 # Multiple keys 

194 return np.bitwise_or.reduce(codes, axis=1) 

195 

196 

197def names_compat(meth: F) -> F: 

198 """ 

199 A decorator to allow either `name` or `names` keyword but not both. 

200 

201 This makes it easier to share code with base class. 

202 """ 

203 

204 @wraps(meth) 

205 def new_meth(self_or_cls, *args, **kwargs): 

206 if "name" in kwargs and "names" in kwargs: 

207 raise TypeError("Can only provide one of `names` and `name`") 

208 if "name" in kwargs: 

209 kwargs["names"] = kwargs.pop("name") 

210 

211 return meth(self_or_cls, *args, **kwargs) 

212 

213 return cast(F, new_meth) 

214 

215 

216class MultiIndex(Index): 

217 """ 

218 A multi-level, or hierarchical, index object for pandas objects. 

219 

220 Parameters 

221 ---------- 

222 levels : sequence of arrays 

223 The unique labels for each level. 

224 codes : sequence of arrays 

225 Integers for each level designating which label at each location. 

226 sortorder : optional int 

227 Level of sortedness (must be lexicographically sorted by that 

228 level). 

229 names : optional sequence of objects 

230 Names for each of the index levels. (name is accepted for compat). 

231 copy : bool, default False 

232 Copy the meta-data. 

233 verify_integrity : bool, default True 

234 Check that the levels/codes are consistent and valid. 

235 

236 Attributes 

237 ---------- 

238 names 

239 levels 

240 codes 

241 nlevels 

242 levshape 

243 dtypes 

244 

245 Methods 

246 ------- 

247 from_arrays 

248 from_tuples 

249 from_product 

250 from_frame 

251 set_levels 

252 set_codes 

253 to_frame 

254 to_flat_index 

255 sortlevel 

256 droplevel 

257 swaplevel 

258 reorder_levels 

259 remove_unused_levels 

260 get_level_values 

261 get_indexer 

262 get_loc 

263 get_locs 

264 get_loc_level 

265 drop 

266 

267 See Also 

268 -------- 

269 MultiIndex.from_arrays : Convert list of arrays to MultiIndex. 

270 MultiIndex.from_product : Create a MultiIndex from the cartesian product 

271 of iterables. 

272 MultiIndex.from_tuples : Convert list of tuples to a MultiIndex. 

273 MultiIndex.from_frame : Make a MultiIndex from a DataFrame. 

274 Index : The base pandas Index type. 

275 

276 Notes 

277 ----- 

278 See the `user guide 

279 <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html>`__ 

280 for more. 

281 

282 Examples 

283 -------- 

284 A new ``MultiIndex`` is typically constructed using one of the helper 

285 methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product` 

286 and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``): 

287 

288 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] 

289 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) 

290 MultiIndex([(1, 'red'), 

291 (1, 'blue'), 

292 (2, 'red'), 

293 (2, 'blue')], 

294 names=['number', 'color']) 

295 

296 See further examples for how to construct a MultiIndex in the doc strings 

297 of the mentioned helper methods. 

298 """ 

299 

300 _hidden_attrs = Index._hidden_attrs | frozenset() 

301 

302 # initialize to zero-length tuples to make everything work 

303 _typ = "multiindex" 

304 _names: list[Hashable | None] = [] 

305 _levels = FrozenList() 

306 _codes = FrozenList() 

307 _comparables = ["names"] 

308 

309 sortorder: int | None 

310 

311 # -------------------------------------------------------------------- 

312 # Constructors 

313 

314 def __new__( 

315 cls, 

316 levels=None, 

317 codes=None, 

318 sortorder=None, 

319 names=None, 

320 dtype=None, 

321 copy: bool = False, 

322 name=None, 

323 verify_integrity: bool = True, 

324 ) -> MultiIndex: 

325 # compat with Index 

326 if name is not None: 

327 names = name 

328 if levels is None or codes is None: 

329 raise TypeError("Must pass both levels and codes") 

330 if len(levels) != len(codes): 

331 raise ValueError("Length of levels and codes must be the same.") 

332 if len(levels) == 0: 

333 raise ValueError("Must pass non-zero number of levels/codes") 

334 

335 result = object.__new__(cls) 

336 result._cache = {} 

337 

338 # we've already validated levels and codes, so shortcut here 

339 result._set_levels(levels, copy=copy, validate=False) 

340 result._set_codes(codes, copy=copy, validate=False) 

341 

342 result._names = [None] * len(levels) 

343 if names is not None: 

344 # handles name validation 

345 result._set_names(names) 

346 

347 if sortorder is not None: 

348 result.sortorder = int(sortorder) 

349 else: 

350 result.sortorder = sortorder 

351 

352 if verify_integrity: 

353 new_codes = result._verify_integrity() 

354 result._codes = new_codes 

355 

356 result._reset_identity() 

357 result._references = None 

358 

359 return result 

360 

361 def _validate_codes(self, level: list, code: list): 

362 """ 

363 Reassign code values as -1 if their corresponding levels are NaN. 

364 

365 Parameters 

366 ---------- 

367 code : list 

368 Code to reassign. 

369 level : list 

370 Level to check for missing values (NaN, NaT, None). 

371 

372 Returns 

373 ------- 

374 new code where code value = -1 if it corresponds 

375 to a level with missing values (NaN, NaT, None). 

376 """ 

377 null_mask = isna(level) 

378 if np.any(null_mask): 

379 # error: Incompatible types in assignment 

380 # (expression has type "ndarray[Any, dtype[Any]]", 

381 # variable has type "List[Any]") 

382 code = np.where(null_mask[code], -1, code) # type: ignore[assignment] 

383 return code 

384 

385 def _verify_integrity(self, codes: list | None = None, levels: list | None = None): 

386 """ 

387 Parameters 

388 ---------- 

389 codes : optional list 

390 Codes to check for validity. Defaults to current codes. 

391 levels : optional list 

392 Levels to check for validity. Defaults to current levels. 

393 

394 Raises 

395 ------ 

396 ValueError 

397 If length of levels and codes don't match, if the codes for any 

398 level would exceed level bounds, or there are any duplicate levels. 

399 

400 Returns 

401 ------- 

402 new codes where code value = -1 if it corresponds to a 

403 NaN level. 

404 """ 

405 # NOTE: Currently does not check, among other things, that cached 

406 # nlevels matches nor that sortorder matches actually sortorder. 

407 codes = codes or self.codes 

408 levels = levels or self.levels 

409 

410 if len(levels) != len(codes): 

411 raise ValueError( 

412 "Length of levels and codes must match. NOTE: " 

413 "this index is in an inconsistent state." 

414 ) 

415 codes_length = len(codes[0]) 

416 for i, (level, level_codes) in enumerate(zip(levels, codes)): 

417 if len(level_codes) != codes_length: 

418 raise ValueError( 

419 f"Unequal code lengths: {[len(code_) for code_ in codes]}" 

420 ) 

421 if len(level_codes) and level_codes.max() >= len(level): 

422 raise ValueError( 

423 f"On level {i}, code max ({level_codes.max()}) >= length of " 

424 f"level ({len(level)}). NOTE: this index is in an " 

425 "inconsistent state" 

426 ) 

427 if len(level_codes) and level_codes.min() < -1: 

428 raise ValueError(f"On level {i}, code value ({level_codes.min()}) < -1") 

429 if not level.is_unique: 

430 raise ValueError( 

431 f"Level values must be unique: {list(level)} on level {i}" 

432 ) 

433 if self.sortorder is not None: 

434 if self.sortorder > _lexsort_depth(self.codes, self.nlevels): 

435 raise ValueError( 

436 "Value for sortorder must be inferior or equal to actual " 

437 f"lexsort_depth: sortorder {self.sortorder} " 

438 f"with lexsort_depth {_lexsort_depth(self.codes, self.nlevels)}" 

439 ) 

440 

441 codes = [ 

442 self._validate_codes(level, code) for level, code in zip(levels, codes) 

443 ] 

444 new_codes = FrozenList(codes) 

445 return new_codes 

446 

447 @classmethod 

448 def from_arrays( 

449 cls, 

450 arrays, 

451 sortorder=None, 

452 names: Sequence[Hashable] | Hashable | lib.NoDefault = lib.no_default, 

453 ) -> MultiIndex: 

454 """ 

455 Convert arrays to MultiIndex. 

456 

457 Parameters 

458 ---------- 

459 arrays : list / sequence of array-likes 

460 Each array-like gives one level's value for each data point. 

461 len(arrays) is the number of levels. 

462 sortorder : int or None 

463 Level of sortedness (must be lexicographically sorted by that 

464 level). 

465 names : list / sequence of str, optional 

466 Names for the levels in the index. 

467 

468 Returns 

469 ------- 

470 MultiIndex 

471 

472 See Also 

473 -------- 

474 MultiIndex.from_tuples : Convert list of tuples to MultiIndex. 

475 MultiIndex.from_product : Make a MultiIndex from cartesian product 

476 of iterables. 

477 MultiIndex.from_frame : Make a MultiIndex from a DataFrame. 

478 

479 Examples 

480 -------- 

481 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] 

482 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) 

483 MultiIndex([(1, 'red'), 

484 (1, 'blue'), 

485 (2, 'red'), 

486 (2, 'blue')], 

487 names=['number', 'color']) 

488 """ 

489 error_msg = "Input must be a list / sequence of array-likes." 

490 if not is_list_like(arrays): 

491 raise TypeError(error_msg) 

492 if is_iterator(arrays): 

493 arrays = list(arrays) 

494 

495 # Check if elements of array are list-like 

496 for array in arrays: 

497 if not is_list_like(array): 

498 raise TypeError(error_msg) 

499 

500 # Check if lengths of all arrays are equal or not, 

501 # raise ValueError, if not 

502 for i in range(1, len(arrays)): 

503 if len(arrays[i]) != len(arrays[i - 1]): 

504 raise ValueError("all arrays must be same length") 

505 

506 codes, levels = factorize_from_iterables(arrays) 

507 if names is lib.no_default: 

508 names = [getattr(arr, "name", None) for arr in arrays] 

509 

510 return cls( 

511 levels=levels, 

512 codes=codes, 

513 sortorder=sortorder, 

514 names=names, 

515 verify_integrity=False, 

516 ) 

517 

518 @classmethod 

519 @names_compat 

520 def from_tuples( 

521 cls, 

522 tuples: Iterable[tuple[Hashable, ...]], 

523 sortorder: int | None = None, 

524 names: Sequence[Hashable] | Hashable = None, 

525 ) -> MultiIndex: 

526 """ 

527 Convert list of tuples to MultiIndex. 

528 

529 Parameters 

530 ---------- 

531 tuples : list / sequence of tuple-likes 

532 Each tuple is the index of one row/column. 

533 sortorder : int or None 

534 Level of sortedness (must be lexicographically sorted by that 

535 level). 

536 names : list / sequence of str, optional 

537 Names for the levels in the index. 

538 

539 Returns 

540 ------- 

541 MultiIndex 

542 

543 See Also 

544 -------- 

545 MultiIndex.from_arrays : Convert list of arrays to MultiIndex. 

546 MultiIndex.from_product : Make a MultiIndex from cartesian product 

547 of iterables. 

548 MultiIndex.from_frame : Make a MultiIndex from a DataFrame. 

549 

550 Examples 

551 -------- 

552 >>> tuples = [(1, 'red'), (1, 'blue'), 

553 ... (2, 'red'), (2, 'blue')] 

554 >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color')) 

555 MultiIndex([(1, 'red'), 

556 (1, 'blue'), 

557 (2, 'red'), 

558 (2, 'blue')], 

559 names=['number', 'color']) 

560 """ 

561 if not is_list_like(tuples): 

562 raise TypeError("Input must be a list / sequence of tuple-likes.") 

563 if is_iterator(tuples): 

564 tuples = list(tuples) 

565 tuples = cast(Collection[Tuple[Hashable, ...]], tuples) 

566 

567 # handling the empty tuple cases 

568 if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples): 

569 codes = [np.zeros(len(tuples))] 

570 levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))] 

571 return cls( 

572 levels=levels, 

573 codes=codes, 

574 sortorder=sortorder, 

575 names=names, 

576 verify_integrity=False, 

577 ) 

578 

579 arrays: list[Sequence[Hashable]] 

580 if len(tuples) == 0: 

581 if names is None: 

582 raise TypeError("Cannot infer number of levels from empty list") 

583 # error: Argument 1 to "len" has incompatible type "Hashable"; 

584 # expected "Sized" 

585 arrays = [[]] * len(names) # type: ignore[arg-type] 

586 elif isinstance(tuples, (np.ndarray, Index)): 

587 if isinstance(tuples, Index): 

588 tuples = np.asarray(tuples._values) 

589 

590 arrays = list(lib.tuples_to_object_array(tuples).T) 

591 elif isinstance(tuples, list): 

592 arrays = list(lib.to_object_array_tuples(tuples).T) 

593 else: 

594 arrs = zip(*tuples) 

595 arrays = cast(List[Sequence[Hashable]], arrs) 

596 

597 return cls.from_arrays(arrays, sortorder=sortorder, names=names) 

598 

599 @classmethod 

600 def from_product( 

601 cls, 

602 iterables: Sequence[Iterable[Hashable]], 

603 sortorder: int | None = None, 

604 names: Sequence[Hashable] | Hashable | lib.NoDefault = lib.no_default, 

605 ) -> MultiIndex: 

606 """ 

607 Make a MultiIndex from the cartesian product of multiple iterables. 

608 

609 Parameters 

610 ---------- 

611 iterables : list / sequence of iterables 

612 Each iterable has unique labels for each level of the index. 

613 sortorder : int or None 

614 Level of sortedness (must be lexicographically sorted by that 

615 level). 

616 names : list / sequence of str, optional 

617 Names for the levels in the index. 

618 If not explicitly provided, names will be inferred from the 

619 elements of iterables if an element has a name attribute. 

620 

621 Returns 

622 ------- 

623 MultiIndex 

624 

625 See Also 

626 -------- 

627 MultiIndex.from_arrays : Convert list of arrays to MultiIndex. 

628 MultiIndex.from_tuples : Convert list of tuples to MultiIndex. 

629 MultiIndex.from_frame : Make a MultiIndex from a DataFrame. 

630 

631 Examples 

632 -------- 

633 >>> numbers = [0, 1, 2] 

634 >>> colors = ['green', 'purple'] 

635 >>> pd.MultiIndex.from_product([numbers, colors], 

636 ... names=['number', 'color']) 

637 MultiIndex([(0, 'green'), 

638 (0, 'purple'), 

639 (1, 'green'), 

640 (1, 'purple'), 

641 (2, 'green'), 

642 (2, 'purple')], 

643 names=['number', 'color']) 

644 """ 

645 from pandas.core.reshape.util import cartesian_product 

646 

647 if not is_list_like(iterables): 

648 raise TypeError("Input must be a list / sequence of iterables.") 

649 if is_iterator(iterables): 

650 iterables = list(iterables) 

651 

652 codes, levels = factorize_from_iterables(iterables) 

653 if names is lib.no_default: 

654 names = [getattr(it, "name", None) for it in iterables] 

655 

656 # codes are all ndarrays, so cartesian_product is lossless 

657 codes = cartesian_product(codes) 

658 return cls(levels, codes, sortorder=sortorder, names=names) 

659 

660 @classmethod 

661 def from_frame(cls, df: DataFrame, sortorder=None, names=None) -> MultiIndex: 

662 """ 

663 Make a MultiIndex from a DataFrame. 

664 

665 Parameters 

666 ---------- 

667 df : DataFrame 

668 DataFrame to be converted to MultiIndex. 

669 sortorder : int, optional 

670 Level of sortedness (must be lexicographically sorted by that 

671 level). 

672 names : list-like, optional 

673 If no names are provided, use the column names, or tuple of column 

674 names if the columns is a MultiIndex. If a sequence, overwrite 

675 names with the given sequence. 

676 

677 Returns 

678 ------- 

679 MultiIndex 

680 The MultiIndex representation of the given DataFrame. 

681 

682 See Also 

683 -------- 

684 MultiIndex.from_arrays : Convert list of arrays to MultiIndex. 

685 MultiIndex.from_tuples : Convert list of tuples to MultiIndex. 

686 MultiIndex.from_product : Make a MultiIndex from cartesian product 

687 of iterables. 

688 

689 Examples 

690 -------- 

691 >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'], 

692 ... ['NJ', 'Temp'], ['NJ', 'Precip']], 

693 ... columns=['a', 'b']) 

694 >>> df 

695 a b 

696 0 HI Temp 

697 1 HI Precip 

698 2 NJ Temp 

699 3 NJ Precip 

700 

701 >>> pd.MultiIndex.from_frame(df) 

702 MultiIndex([('HI', 'Temp'), 

703 ('HI', 'Precip'), 

704 ('NJ', 'Temp'), 

705 ('NJ', 'Precip')], 

706 names=['a', 'b']) 

707 

708 Using explicit names, instead of the column names 

709 

710 >>> pd.MultiIndex.from_frame(df, names=['state', 'observation']) 

711 MultiIndex([('HI', 'Temp'), 

712 ('HI', 'Precip'), 

713 ('NJ', 'Temp'), 

714 ('NJ', 'Precip')], 

715 names=['state', 'observation']) 

716 """ 

717 if not isinstance(df, ABCDataFrame): 

718 raise TypeError("Input must be a DataFrame") 

719 

720 column_names, columns = zip(*df.items()) 

721 names = column_names if names is None else names 

722 return cls.from_arrays(columns, sortorder=sortorder, names=names) 

723 

724 # -------------------------------------------------------------------- 

725 

726 @cache_readonly 

727 def _values(self) -> np.ndarray: 

728 # We override here, since our parent uses _data, which we don't use. 

729 values = [] 

730 

731 for i in range(self.nlevels): 

732 index = self.levels[i] 

733 codes = self.codes[i] 

734 

735 vals = index 

736 if is_categorical_dtype(vals.dtype): 

737 vals = cast("CategoricalIndex", vals) 

738 vals = vals._data._internal_get_values() 

739 

740 if isinstance(vals.dtype, ExtensionDtype) or isinstance( 

741 vals, (ABCDatetimeIndex, ABCTimedeltaIndex) 

742 ): 

743 vals = vals.astype(object) 

744 

745 vals = np.array(vals, copy=False) 

746 vals = algos.take_nd(vals, codes, fill_value=index._na_value) 

747 values.append(vals) 

748 

749 arr = lib.fast_zip(values) 

750 return arr 

751 

752 @property 

753 def values(self) -> np.ndarray: 

754 return self._values 

755 

756 @property 

757 def array(self): 

758 """ 

759 Raises a ValueError for `MultiIndex` because there's no single 

760 array backing a MultiIndex. 

761 

762 Raises 

763 ------ 

764 ValueError 

765 """ 

766 raise ValueError( 

767 "MultiIndex has no single backing array. Use " 

768 "'MultiIndex.to_numpy()' to get a NumPy array of tuples." 

769 ) 

770 

771 @cache_readonly 

772 def dtypes(self) -> Series: 

773 """ 

774 Return the dtypes as a Series for the underlying MultiIndex. 

775 """ 

776 from pandas import Series 

777 

778 names = com.fill_missing_names([level.name for level in self.levels]) 

779 return Series([level.dtype for level in self.levels], index=Index(names)) 

780 

781 def __len__(self) -> int: 

782 return len(self.codes[0]) 

783 

784 @property 

785 def size(self) -> int: 

786 """ 

787 Return the number of elements in the underlying data. 

788 """ 

789 # override Index.size to avoid materializing _values 

790 return len(self) 

791 

792 # -------------------------------------------------------------------- 

793 # Levels Methods 

794 

795 @cache_readonly 

796 def levels(self) -> FrozenList: 

797 # Use cache_readonly to ensure that self.get_locs doesn't repeatedly 

798 # create new IndexEngine 

799 # https://github.com/pandas-dev/pandas/issues/31648 

800 result = [x._rename(name=name) for x, name in zip(self._levels, self._names)] 

801 for level in result: 

802 # disallow midx.levels[0].name = "foo" 

803 level._no_setting_name = True 

804 return FrozenList(result) 

805 

806 def _set_levels( 

807 self, 

808 levels, 

809 *, 

810 level=None, 

811 copy: bool = False, 

812 validate: bool = True, 

813 verify_integrity: bool = False, 

814 ) -> None: 

815 # This is NOT part of the levels property because it should be 

816 # externally not allowed to set levels. User beware if you change 

817 # _levels directly 

818 if validate: 

819 if len(levels) == 0: 

820 raise ValueError("Must set non-zero number of levels.") 

821 if level is None and len(levels) != self.nlevels: 

822 raise ValueError("Length of levels must match number of levels.") 

823 if level is not None and len(levels) != len(level): 

824 raise ValueError("Length of levels must match length of level.") 

825 

826 if level is None: 

827 new_levels = FrozenList( 

828 ensure_index(lev, copy=copy)._view() for lev in levels 

829 ) 

830 else: 

831 level_numbers = [self._get_level_number(lev) for lev in level] 

832 new_levels_list = list(self._levels) 

833 for lev_num, lev in zip(level_numbers, levels): 

834 new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view() 

835 new_levels = FrozenList(new_levels_list) 

836 

837 if verify_integrity: 

838 new_codes = self._verify_integrity(levels=new_levels) 

839 self._codes = new_codes 

840 

841 names = self.names 

842 self._levels = new_levels 

843 if any(names): 

844 self._set_names(names) 

845 

846 self._reset_cache() 

847 

848 def set_levels( 

849 self, levels, *, level=None, verify_integrity: bool = True 

850 ) -> MultiIndex: 

851 """ 

852 Set new levels on MultiIndex. Defaults to returning new index. 

853 

854 Parameters 

855 ---------- 

856 levels : sequence or list of sequence 

857 New level(s) to apply. 

858 level : int, level name, or sequence of int/level names (default None) 

859 Level(s) to set (None for all levels). 

860 verify_integrity : bool, default True 

861 If True, checks that levels and codes are compatible. 

862 

863 Returns 

864 ------- 

865 MultiIndex 

866 

867 Examples 

868 -------- 

869 >>> idx = pd.MultiIndex.from_tuples( 

870 ... [ 

871 ... (1, "one"), 

872 ... (1, "two"), 

873 ... (2, "one"), 

874 ... (2, "two"), 

875 ... (3, "one"), 

876 ... (3, "two") 

877 ... ], 

878 ... names=["foo", "bar"] 

879 ... ) 

880 >>> idx 

881 MultiIndex([(1, 'one'), 

882 (1, 'two'), 

883 (2, 'one'), 

884 (2, 'two'), 

885 (3, 'one'), 

886 (3, 'two')], 

887 names=['foo', 'bar']) 

888 

889 >>> idx.set_levels([['a', 'b', 'c'], [1, 2]]) 

890 MultiIndex([('a', 1), 

891 ('a', 2), 

892 ('b', 1), 

893 ('b', 2), 

894 ('c', 1), 

895 ('c', 2)], 

896 names=['foo', 'bar']) 

897 >>> idx.set_levels(['a', 'b', 'c'], level=0) 

898 MultiIndex([('a', 'one'), 

899 ('a', 'two'), 

900 ('b', 'one'), 

901 ('b', 'two'), 

902 ('c', 'one'), 

903 ('c', 'two')], 

904 names=['foo', 'bar']) 

905 >>> idx.set_levels(['a', 'b'], level='bar') 

906 MultiIndex([(1, 'a'), 

907 (1, 'b'), 

908 (2, 'a'), 

909 (2, 'b'), 

910 (3, 'a'), 

911 (3, 'b')], 

912 names=['foo', 'bar']) 

913 

914 If any of the levels passed to ``set_levels()`` exceeds the 

915 existing length, all of the values from that argument will 

916 be stored in the MultiIndex levels, though the values will 

917 be truncated in the MultiIndex output. 

918 

919 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]) 

920 MultiIndex([('a', 1), 

921 ('a', 2), 

922 ('b', 1), 

923 ('b', 2), 

924 ('c', 1), 

925 ('c', 2)], 

926 names=['foo', 'bar']) 

927 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels 

928 FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]]) 

929 """ 

930 

931 if is_list_like(levels) and not isinstance(levels, Index): 

932 levels = list(levels) 

933 

934 level, levels = _require_listlike(level, levels, "Levels") 

935 idx = self._view() 

936 idx._reset_identity() 

937 idx._set_levels( 

938 levels, level=level, validate=True, verify_integrity=verify_integrity 

939 ) 

940 return idx 

941 

942 @property 

943 def nlevels(self) -> int: 

944 """ 

945 Integer number of levels in this MultiIndex. 

946 

947 Examples 

948 -------- 

949 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) 

950 >>> mi 

951 MultiIndex([('a', 'b', 'c')], 

952 ) 

953 >>> mi.nlevels 

954 3 

955 """ 

956 return len(self._levels) 

957 

958 @property 

959 def levshape(self) -> Shape: 

960 """ 

961 A tuple with the length of each level. 

962 

963 Examples 

964 -------- 

965 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) 

966 >>> mi 

967 MultiIndex([('a', 'b', 'c')], 

968 ) 

969 >>> mi.levshape 

970 (1, 1, 1) 

971 """ 

972 return tuple(len(x) for x in self.levels) 

973 

974 # -------------------------------------------------------------------- 

975 # Codes Methods 

976 

977 @property 

978 def codes(self): 

979 return self._codes 

980 

981 def _set_codes( 

982 self, 

983 codes, 

984 *, 

985 level=None, 

986 copy: bool = False, 

987 validate: bool = True, 

988 verify_integrity: bool = False, 

989 ) -> None: 

990 if validate: 

991 if level is None and len(codes) != self.nlevels: 

992 raise ValueError("Length of codes must match number of levels") 

993 if level is not None and len(codes) != len(level): 

994 raise ValueError("Length of codes must match length of levels.") 

995 

996 if level is None: 

997 new_codes = FrozenList( 

998 _coerce_indexer_frozen(level_codes, lev, copy=copy).view() 

999 for lev, level_codes in zip(self._levels, codes) 

1000 ) 

1001 else: 

1002 level_numbers = [self._get_level_number(lev) for lev in level] 

1003 new_codes_list = list(self._codes) 

1004 for lev_num, level_codes in zip(level_numbers, codes): 

1005 lev = self.levels[lev_num] 

1006 new_codes_list[lev_num] = _coerce_indexer_frozen( 

1007 level_codes, lev, copy=copy 

1008 ) 

1009 new_codes = FrozenList(new_codes_list) 

1010 

1011 if verify_integrity: 

1012 new_codes = self._verify_integrity(codes=new_codes) 

1013 

1014 self._codes = new_codes 

1015 

1016 self._reset_cache() 

1017 

1018 def set_codes(self, codes, *, level=None, verify_integrity: bool = True): 

1019 """ 

1020 Set new codes on MultiIndex. Defaults to returning new index. 

1021 

1022 Parameters 

1023 ---------- 

1024 codes : sequence or list of sequence 

1025 New codes to apply. 

1026 level : int, level name, or sequence of int/level names (default None) 

1027 Level(s) to set (None for all levels). 

1028 verify_integrity : bool, default True 

1029 If True, checks that levels and codes are compatible. 

1030 

1031 Returns 

1032 ------- 

1033 new index (of same type and class...etc) or None 

1034 The same type as the caller or None if ``inplace=True``. 

1035 

1036 Examples 

1037 -------- 

1038 >>> idx = pd.MultiIndex.from_tuples( 

1039 ... [(1, "one"), (1, "two"), (2, "one"), (2, "two")], names=["foo", "bar"] 

1040 ... ) 

1041 >>> idx 

1042 MultiIndex([(1, 'one'), 

1043 (1, 'two'), 

1044 (2, 'one'), 

1045 (2, 'two')], 

1046 names=['foo', 'bar']) 

1047 

1048 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]]) 

1049 MultiIndex([(2, 'one'), 

1050 (1, 'one'), 

1051 (2, 'two'), 

1052 (1, 'two')], 

1053 names=['foo', 'bar']) 

1054 >>> idx.set_codes([1, 0, 1, 0], level=0) 

1055 MultiIndex([(2, 'one'), 

1056 (1, 'two'), 

1057 (2, 'one'), 

1058 (1, 'two')], 

1059 names=['foo', 'bar']) 

1060 >>> idx.set_codes([0, 0, 1, 1], level='bar') 

1061 MultiIndex([(1, 'one'), 

1062 (1, 'one'), 

1063 (2, 'two'), 

1064 (2, 'two')], 

1065 names=['foo', 'bar']) 

1066 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1]) 

1067 MultiIndex([(2, 'one'), 

1068 (1, 'one'), 

1069 (2, 'two'), 

1070 (1, 'two')], 

1071 names=['foo', 'bar']) 

1072 """ 

1073 

1074 level, codes = _require_listlike(level, codes, "Codes") 

1075 idx = self._view() 

1076 idx._reset_identity() 

1077 idx._set_codes(codes, level=level, verify_integrity=verify_integrity) 

1078 return idx 

1079 

1080 # -------------------------------------------------------------------- 

1081 # Index Internals 

1082 

1083 @cache_readonly 

1084 def _engine(self): 

1085 # Calculate the number of bits needed to represent labels in each 

1086 # level, as log2 of their sizes: 

1087 # NaN values are shifted to 1 and missing values in other while 

1088 # calculating the indexer are shifted to 0 

1089 sizes = np.ceil( 

1090 np.log2( 

1091 [ 

1092 len(level) 

1093 + libindex.multiindex_nulls_shift # type: ignore[attr-defined] 

1094 for level in self.levels 

1095 ] 

1096 ) 

1097 ) 

1098 

1099 # Sum bit counts, starting from the _right_.... 

1100 lev_bits = np.cumsum(sizes[::-1])[::-1] 

1101 

1102 # ... in order to obtain offsets such that sorting the combination of 

1103 # shifted codes (one for each level, resulting in a unique integer) is 

1104 # equivalent to sorting lexicographically the codes themselves. Notice 

1105 # that each level needs to be shifted by the number of bits needed to 

1106 # represent the _previous_ ones: 

1107 offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64") 

1108 

1109 # Check the total number of bits needed for our representation: 

1110 if lev_bits[0] > 64: 

1111 # The levels would overflow a 64 bit uint - use Python integers: 

1112 return MultiIndexPyIntEngine(self.levels, self.codes, offsets) 

1113 return MultiIndexUIntEngine(self.levels, self.codes, offsets) 

1114 

1115 # Return type "Callable[..., MultiIndex]" of "_constructor" incompatible with return 

1116 # type "Type[MultiIndex]" in supertype "Index" 

1117 @property 

1118 def _constructor(self) -> Callable[..., MultiIndex]: # type: ignore[override] 

1119 return type(self).from_tuples 

1120 

1121 @doc(Index._shallow_copy) 

1122 def _shallow_copy(self, values: np.ndarray, name=lib.no_default) -> MultiIndex: 

1123 names = name if name is not lib.no_default else self.names 

1124 

1125 return type(self).from_tuples(values, sortorder=None, names=names) 

1126 

1127 def _view(self) -> MultiIndex: 

1128 result = type(self)( 

1129 levels=self.levels, 

1130 codes=self.codes, 

1131 sortorder=self.sortorder, 

1132 names=self.names, 

1133 verify_integrity=False, 

1134 ) 

1135 result._cache = self._cache.copy() 

1136 result._cache.pop("levels", None) # GH32669 

1137 return result 

1138 

1139 # -------------------------------------------------------------------- 

1140 

1141 # error: Signature of "copy" incompatible with supertype "Index" 

1142 def copy( # type: ignore[override] 

1143 self, 

1144 names=None, 

1145 deep: bool = False, 

1146 name=None, 

1147 ): 

1148 """ 

1149 Make a copy of this object. 

1150 

1151 Names, dtype, levels and codes can be passed and will be set on new copy. 

1152 

1153 Parameters 

1154 ---------- 

1155 names : sequence, optional 

1156 deep : bool, default False 

1157 name : Label 

1158 Kept for compatibility with 1-dimensional Index. Should not be used. 

1159 

1160 Returns 

1161 ------- 

1162 MultiIndex 

1163 

1164 Notes 

1165 ----- 

1166 In most cases, there should be no functional difference from using 

1167 ``deep``, but if ``deep`` is passed it will attempt to deepcopy. 

1168 This could be potentially expensive on large MultiIndex objects. 

1169 

1170 Examples 

1171 -------- 

1172 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) 

1173 >>> mi 

1174 MultiIndex([('a', 'b', 'c')], 

1175 ) 

1176 >>> mi.copy() 

1177 MultiIndex([('a', 'b', 'c')], 

1178 ) 

1179 """ 

1180 names = self._validate_names(name=name, names=names, deep=deep) 

1181 keep_id = not deep 

1182 levels, codes = None, None 

1183 

1184 if deep: 

1185 from copy import deepcopy 

1186 

1187 levels = deepcopy(self.levels) 

1188 codes = deepcopy(self.codes) 

1189 

1190 levels = levels if levels is not None else self.levels 

1191 codes = codes if codes is not None else self.codes 

1192 

1193 new_index = type(self)( 

1194 levels=levels, 

1195 codes=codes, 

1196 sortorder=self.sortorder, 

1197 names=names, 

1198 verify_integrity=False, 

1199 ) 

1200 new_index._cache = self._cache.copy() 

1201 new_index._cache.pop("levels", None) # GH32669 

1202 if keep_id: 

1203 new_index._id = self._id 

1204 return new_index 

1205 

1206 def __array__(self, dtype=None) -> np.ndarray: 

1207 """the array interface, return my values""" 

1208 return self.values 

1209 

1210 def view(self, cls=None): 

1211 """this is defined as a copy with the same identity""" 

1212 result = self.copy() 

1213 result._id = self._id 

1214 return result 

1215 

1216 @doc(Index.__contains__) 

1217 def __contains__(self, key: Any) -> bool: 

1218 hash(key) 

1219 try: 

1220 self.get_loc(key) 

1221 return True 

1222 except (LookupError, TypeError, ValueError): 

1223 return False 

1224 

1225 @cache_readonly 

1226 def dtype(self) -> np.dtype: 

1227 return np.dtype("O") 

1228 

1229 def _is_memory_usage_qualified(self) -> bool: 

1230 """return a boolean if we need a qualified .info display""" 

1231 

1232 def f(level) -> bool: 

1233 return "mixed" in level or "string" in level or "unicode" in level 

1234 

1235 return any(f(level) for level in self._inferred_type_levels) 

1236 

1237 # Cannot determine type of "memory_usage" 

1238 @doc(Index.memory_usage) # type: ignore[has-type] 

1239 def memory_usage(self, deep: bool = False) -> int: 

1240 # we are overwriting our base class to avoid 

1241 # computing .values here which could materialize 

1242 # a tuple representation unnecessarily 

1243 return self._nbytes(deep) 

1244 

1245 @cache_readonly 

1246 def nbytes(self) -> int: 

1247 """return the number of bytes in the underlying data""" 

1248 return self._nbytes(False) 

1249 

1250 def _nbytes(self, deep: bool = False) -> int: 

1251 """ 

1252 return the number of bytes in the underlying data 

1253 deeply introspect the level data if deep=True 

1254 

1255 include the engine hashtable 

1256 

1257 *this is in internal routine* 

1258 

1259 """ 

1260 # for implementations with no useful getsizeof (PyPy) 

1261 objsize = 24 

1262 

1263 level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels) 

1264 label_nbytes = sum(i.nbytes for i in self.codes) 

1265 names_nbytes = sum(getsizeof(i, objsize) for i in self.names) 

1266 result = level_nbytes + label_nbytes + names_nbytes 

1267 

1268 # include our engine hashtable 

1269 result += self._engine.sizeof(deep=deep) 

1270 return result 

1271 

1272 # -------------------------------------------------------------------- 

1273 # Rendering Methods 

1274 

1275 def _formatter_func(self, tup): 

1276 """ 

1277 Formats each item in tup according to its level's formatter function. 

1278 """ 

1279 formatter_funcs = [level._formatter_func for level in self.levels] 

1280 return tuple(func(val) for func, val in zip(formatter_funcs, tup)) 

1281 

1282 def _format_native_types( 

1283 self, *, na_rep: str = "nan", **kwargs 

1284 ) -> npt.NDArray[np.object_]: 

1285 new_levels = [] 

1286 new_codes = [] 

1287 

1288 # go through the levels and format them 

1289 for level, level_codes in zip(self.levels, self.codes): 

1290 level_strs = level._format_native_types(na_rep=na_rep, **kwargs) 

1291 # add nan values, if there are any 

1292 mask = level_codes == -1 

1293 if mask.any(): 

1294 nan_index = len(level_strs) 

1295 # numpy 1.21 deprecated implicit string casting 

1296 level_strs = level_strs.astype(str) 

1297 level_strs = np.append(level_strs, na_rep) 

1298 assert not level_codes.flags.writeable # i.e. copy is needed 

1299 level_codes = level_codes.copy() # make writeable 

1300 level_codes[mask] = nan_index 

1301 new_levels.append(level_strs) 

1302 new_codes.append(level_codes) 

1303 

1304 if len(new_levels) == 1: 

1305 # a single-level multi-index 

1306 return Index(new_levels[0].take(new_codes[0]))._format_native_types() 

1307 else: 

1308 # reconstruct the multi-index 

1309 mi = MultiIndex( 

1310 levels=new_levels, 

1311 codes=new_codes, 

1312 names=self.names, 

1313 sortorder=self.sortorder, 

1314 verify_integrity=False, 

1315 ) 

1316 return mi._values 

1317 

1318 def format( 

1319 self, 

1320 name: bool | None = None, 

1321 formatter: Callable | None = None, 

1322 na_rep: str | None = None, 

1323 names: bool = False, 

1324 space: int = 2, 

1325 sparsify=None, 

1326 adjoin: bool = True, 

1327 ) -> list: 

1328 if name is not None: 

1329 names = name 

1330 

1331 if len(self) == 0: 

1332 return [] 

1333 

1334 stringified_levels = [] 

1335 for lev, level_codes in zip(self.levels, self.codes): 

1336 na = na_rep if na_rep is not None else _get_na_rep(lev.dtype) 

1337 

1338 if len(lev) > 0: 

1339 formatted = lev.take(level_codes).format(formatter=formatter) 

1340 

1341 # we have some NA 

1342 mask = level_codes == -1 

1343 if mask.any(): 

1344 formatted = np.array(formatted, dtype=object) 

1345 formatted[mask] = na 

1346 formatted = formatted.tolist() 

1347 

1348 else: 

1349 # weird all NA case 

1350 formatted = [ 

1351 pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n")) 

1352 for x in algos.take_nd(lev._values, level_codes) 

1353 ] 

1354 stringified_levels.append(formatted) 

1355 

1356 result_levels = [] 

1357 for lev, lev_name in zip(stringified_levels, self.names): 

1358 level = [] 

1359 

1360 if names: 

1361 level.append( 

1362 pprint_thing(lev_name, escape_chars=("\t", "\r", "\n")) 

1363 if lev_name is not None 

1364 else "" 

1365 ) 

1366 

1367 level.extend(np.array(lev, dtype=object)) 

1368 result_levels.append(level) 

1369 

1370 if sparsify is None: 

1371 sparsify = get_option("display.multi_sparse") 

1372 

1373 if sparsify: 

1374 sentinel: Literal[""] | bool | lib.NoDefault = "" 

1375 # GH3547 use value of sparsify as sentinel if it's "Falsey" 

1376 assert isinstance(sparsify, bool) or sparsify is lib.no_default 

1377 if sparsify in [False, lib.no_default]: 

1378 sentinel = sparsify 

1379 # little bit of a kludge job for #1217 

1380 result_levels = sparsify_labels( 

1381 result_levels, start=int(names), sentinel=sentinel 

1382 ) 

1383 

1384 if adjoin: 

1385 from pandas.io.formats.format import get_adjustment 

1386 

1387 adj = get_adjustment() 

1388 return adj.adjoin(space, *result_levels).split("\n") 

1389 else: 

1390 return result_levels 

1391 

1392 # -------------------------------------------------------------------- 

1393 # Names Methods 

1394 

1395 def _get_names(self) -> FrozenList: 

1396 return FrozenList(self._names) 

1397 

1398 def _set_names(self, names, *, level=None, validate: bool = True): 

1399 """ 

1400 Set new names on index. Each name has to be a hashable type. 

1401 

1402 Parameters 

1403 ---------- 

1404 values : str or sequence 

1405 name(s) to set 

1406 level : int, level name, or sequence of int/level names (default None) 

1407 If the index is a MultiIndex (hierarchical), level(s) to set (None 

1408 for all levels). Otherwise level must be None 

1409 validate : bool, default True 

1410 validate that the names match level lengths 

1411 

1412 Raises 

1413 ------ 

1414 TypeError if each name is not hashable. 

1415 

1416 Notes 

1417 ----- 

1418 sets names on levels. WARNING: mutates! 

1419 

1420 Note that you generally want to set this *after* changing levels, so 

1421 that it only acts on copies 

1422 """ 

1423 # GH 15110 

1424 # Don't allow a single string for names in a MultiIndex 

1425 if names is not None and not is_list_like(names): 

1426 raise ValueError("Names should be list-like for a MultiIndex") 

1427 names = list(names) 

1428 

1429 if validate: 

1430 if level is not None and len(names) != len(level): 

1431 raise ValueError("Length of names must match length of level.") 

1432 if level is None and len(names) != self.nlevels: 

1433 raise ValueError( 

1434 "Length of names must match number of levels in MultiIndex." 

1435 ) 

1436 

1437 if level is None: 

1438 level = range(self.nlevels) 

1439 else: 

1440 level = [self._get_level_number(lev) for lev in level] 

1441 

1442 # set the name 

1443 for lev, name in zip(level, names): 

1444 if name is not None: 

1445 # GH 20527 

1446 # All items in 'names' need to be hashable: 

1447 if not is_hashable(name): 

1448 raise TypeError( 

1449 f"{type(self).__name__}.name must be a hashable type" 

1450 ) 

1451 self._names[lev] = name 

1452 

1453 # If .levels has been accessed, the names in our cache will be stale. 

1454 self._reset_cache() 

1455 

1456 names = property( 

1457 fset=_set_names, 

1458 fget=_get_names, 

1459 doc=""" 

1460 Names of levels in MultiIndex. 

1461 

1462 Examples 

1463 -------- 

1464 >>> mi = pd.MultiIndex.from_arrays( 

1465 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z']) 

1466 >>> mi 

1467 MultiIndex([(1, 3, 5), 

1468 (2, 4, 6)], 

1469 names=['x', 'y', 'z']) 

1470 >>> mi.names 

1471 FrozenList(['x', 'y', 'z']) 

1472 """, 

1473 ) 

1474 

1475 # -------------------------------------------------------------------- 

1476 

1477 @cache_readonly 

1478 def inferred_type(self) -> str: 

1479 return "mixed" 

1480 

1481 def _get_level_number(self, level) -> int: 

1482 count = self.names.count(level) 

1483 if (count > 1) and not is_integer(level): 

1484 raise ValueError( 

1485 f"The name {level} occurs multiple times, use a level number" 

1486 ) 

1487 try: 

1488 level = self.names.index(level) 

1489 except ValueError as err: 

1490 if not is_integer(level): 

1491 raise KeyError(f"Level {level} not found") from err 

1492 if level < 0: 

1493 level += self.nlevels 

1494 if level < 0: 

1495 orig_level = level - self.nlevels 

1496 raise IndexError( 

1497 f"Too many levels: Index has only {self.nlevels} levels, " 

1498 f"{orig_level} is not a valid level number" 

1499 ) from err 

1500 # Note: levels are zero-based 

1501 elif level >= self.nlevels: 

1502 raise IndexError( 

1503 f"Too many levels: Index has only {self.nlevels} levels, " 

1504 f"not {level + 1}" 

1505 ) from err 

1506 return level 

1507 

1508 @cache_readonly 

1509 def is_monotonic_increasing(self) -> bool: 

1510 """ 

1511 Return a boolean if the values are equal or increasing. 

1512 """ 

1513 if any(-1 in code for code in self.codes): 

1514 return False 

1515 

1516 if all(level.is_monotonic_increasing for level in self.levels): 

1517 # If each level is sorted, we can operate on the codes directly. GH27495 

1518 return libalgos.is_lexsorted( 

1519 [x.astype("int64", copy=False) for x in self.codes] 

1520 ) 

1521 

1522 # reversed() because lexsort() wants the most significant key last. 

1523 values = [ 

1524 self._get_level_values(i)._values for i in reversed(range(len(self.levels))) 

1525 ] 

1526 try: 

1527 # error: Argument 1 to "lexsort" has incompatible type 

1528 # "List[Union[ExtensionArray, ndarray[Any, Any]]]"; 

1529 # expected "Union[_SupportsArray[dtype[Any]], 

1530 # _NestedSequence[_SupportsArray[dtype[Any]]], bool, 

1531 # int, float, complex, str, bytes, _NestedSequence[Union 

1532 # [bool, int, float, complex, str, bytes]]]" 

1533 sort_order = np.lexsort(values) # type: ignore[arg-type] 

1534 return Index(sort_order).is_monotonic_increasing 

1535 except TypeError: 

1536 # we have mixed types and np.lexsort is not happy 

1537 return Index(self._values).is_monotonic_increasing 

1538 

1539 @cache_readonly 

1540 def is_monotonic_decreasing(self) -> bool: 

1541 """ 

1542 Return a boolean if the values are equal or decreasing. 

1543 """ 

1544 # monotonic decreasing if and only if reverse is monotonic increasing 

1545 return self[::-1].is_monotonic_increasing 

1546 

1547 @cache_readonly 

1548 def _inferred_type_levels(self) -> list[str]: 

1549 """return a list of the inferred types, one for each level""" 

1550 return [i.inferred_type for i in self.levels] 

1551 

1552 @doc(Index.duplicated) 

1553 def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]: 

1554 shape = tuple(len(lev) for lev in self.levels) 

1555 ids = get_group_index(self.codes, shape, sort=False, xnull=False) 

1556 

1557 return duplicated(ids, keep) 

1558 

1559 # error: Cannot override final attribute "_duplicated" 

1560 # (previously declared in base class "IndexOpsMixin") 

1561 _duplicated = duplicated # type: ignore[misc] 

1562 

1563 def fillna(self, value=None, downcast=None): 

1564 """ 

1565 fillna is not implemented for MultiIndex 

1566 """ 

1567 raise NotImplementedError("isna is not defined for MultiIndex") 

1568 

1569 @doc(Index.dropna) 

1570 def dropna(self, how: AnyAll = "any") -> MultiIndex: 

1571 nans = [level_codes == -1 for level_codes in self.codes] 

1572 if how == "any": 

1573 indexer = np.any(nans, axis=0) 

1574 elif how == "all": 

1575 indexer = np.all(nans, axis=0) 

1576 else: 

1577 raise ValueError(f"invalid how option: {how}") 

1578 

1579 new_codes = [level_codes[~indexer] for level_codes in self.codes] 

1580 return self.set_codes(codes=new_codes) 

1581 

1582 def _get_level_values(self, level: int, unique: bool = False) -> Index: 

1583 """ 

1584 Return vector of label values for requested level, 

1585 equal to the length of the index 

1586 

1587 **this is an internal method** 

1588 

1589 Parameters 

1590 ---------- 

1591 level : int 

1592 unique : bool, default False 

1593 if True, drop duplicated values 

1594 

1595 Returns 

1596 ------- 

1597 Index 

1598 """ 

1599 lev = self.levels[level] 

1600 level_codes = self.codes[level] 

1601 name = self._names[level] 

1602 if unique: 

1603 level_codes = algos.unique(level_codes) 

1604 filled = algos.take_nd(lev._values, level_codes, fill_value=lev._na_value) 

1605 return lev._shallow_copy(filled, name=name) 

1606 

1607 def get_level_values(self, level): 

1608 """ 

1609 Return vector of label values for requested level. 

1610 

1611 Length of returned vector is equal to the length of the index. 

1612 

1613 Parameters 

1614 ---------- 

1615 level : int or str 

1616 ``level`` is either the integer position of the level in the 

1617 MultiIndex, or the name of the level. 

1618 

1619 Returns 

1620 ------- 

1621 Index 

1622 Values is a level of this MultiIndex converted to 

1623 a single :class:`Index` (or subclass thereof). 

1624 

1625 Notes 

1626 ----- 

1627 If the level contains missing values, the result may be casted to 

1628 ``float`` with missing values specified as ``NaN``. This is because 

1629 the level is converted to a regular ``Index``. 

1630 

1631 Examples 

1632 -------- 

1633 Create a MultiIndex: 

1634 

1635 >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def'))) 

1636 >>> mi.names = ['level_1', 'level_2'] 

1637 

1638 Get level values by supplying level as either integer or name: 

1639 

1640 >>> mi.get_level_values(0) 

1641 Index(['a', 'b', 'c'], dtype='object', name='level_1') 

1642 >>> mi.get_level_values('level_2') 

1643 Index(['d', 'e', 'f'], dtype='object', name='level_2') 

1644 

1645 If a level contains missing values, the return type of the level 

1646 may be cast to ``float``. 

1647 

1648 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).dtypes 

1649 level_0 int64 

1650 level_1 int64 

1651 dtype: object 

1652 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).get_level_values(0) 

1653 Index([1.0, nan, 2.0], dtype='float64') 

1654 """ 

1655 level = self._get_level_number(level) 

1656 values = self._get_level_values(level) 

1657 return values 

1658 

1659 @doc(Index.unique) 

1660 def unique(self, level=None): 

1661 if level is None: 

1662 return self.drop_duplicates() 

1663 else: 

1664 level = self._get_level_number(level) 

1665 return self._get_level_values(level=level, unique=True) 

1666 

1667 def to_frame( 

1668 self, 

1669 index: bool = True, 

1670 name=lib.no_default, 

1671 allow_duplicates: bool = False, 

1672 ) -> DataFrame: 

1673 """ 

1674 Create a DataFrame with the levels of the MultiIndex as columns. 

1675 

1676 Column ordering is determined by the DataFrame constructor with data as 

1677 a dict. 

1678 

1679 Parameters 

1680 ---------- 

1681 index : bool, default True 

1682 Set the index of the returned DataFrame as the original MultiIndex. 

1683 

1684 name : list / sequence of str, optional 

1685 The passed names should substitute index level names. 

1686 

1687 allow_duplicates : bool, optional default False 

1688 Allow duplicate column labels to be created. 

1689 

1690 .. versionadded:: 1.5.0 

1691 

1692 Returns 

1693 ------- 

1694 DataFrame 

1695 

1696 See Also 

1697 -------- 

1698 DataFrame : Two-dimensional, size-mutable, potentially heterogeneous 

1699 tabular data. 

1700 

1701 Examples 

1702 -------- 

1703 >>> mi = pd.MultiIndex.from_arrays([['a', 'b'], ['c', 'd']]) 

1704 >>> mi 

1705 MultiIndex([('a', 'c'), 

1706 ('b', 'd')], 

1707 ) 

1708 

1709 >>> df = mi.to_frame() 

1710 >>> df 

1711 0 1 

1712 a c a c 

1713 b d b d 

1714 

1715 >>> df = mi.to_frame(index=False) 

1716 >>> df 

1717 0 1 

1718 0 a c 

1719 1 b d 

1720 

1721 >>> df = mi.to_frame(name=['x', 'y']) 

1722 >>> df 

1723 x y 

1724 a c a c 

1725 b d b d 

1726 """ 

1727 from pandas import DataFrame 

1728 

1729 if name is not lib.no_default: 

1730 if not is_list_like(name): 

1731 raise TypeError("'name' must be a list / sequence of column names.") 

1732 

1733 if len(name) != len(self.levels): 

1734 raise ValueError( 

1735 "'name' should have same length as number of levels on index." 

1736 ) 

1737 idx_names = name 

1738 else: 

1739 idx_names = self._get_level_names() 

1740 

1741 if not allow_duplicates and len(set(idx_names)) != len(idx_names): 

1742 raise ValueError( 

1743 "Cannot create duplicate column labels if allow_duplicates is False" 

1744 ) 

1745 

1746 # Guarantee resulting column order - PY36+ dict maintains insertion order 

1747 result = DataFrame( 

1748 {level: self._get_level_values(level) for level in range(len(self.levels))}, 

1749 copy=False, 

1750 ) 

1751 result.columns = idx_names 

1752 

1753 if index: 

1754 result.index = self 

1755 return result 

1756 

1757 # error: Return type "Index" of "to_flat_index" incompatible with return type 

1758 # "MultiIndex" in supertype "Index" 

1759 def to_flat_index(self) -> Index: # type: ignore[override] 

1760 """ 

1761 Convert a MultiIndex to an Index of Tuples containing the level values. 

1762 

1763 Returns 

1764 ------- 

1765 pd.Index 

1766 Index with the MultiIndex data represented in Tuples. 

1767 

1768 See Also 

1769 -------- 

1770 MultiIndex.from_tuples : Convert flat index back to MultiIndex. 

1771 

1772 Notes 

1773 ----- 

1774 This method will simply return the caller if called by anything other 

1775 than a MultiIndex. 

1776 

1777 Examples 

1778 -------- 

1779 >>> index = pd.MultiIndex.from_product( 

1780 ... [['foo', 'bar'], ['baz', 'qux']], 

1781 ... names=['a', 'b']) 

1782 >>> index.to_flat_index() 

1783 Index([('foo', 'baz'), ('foo', 'qux'), 

1784 ('bar', 'baz'), ('bar', 'qux')], 

1785 dtype='object') 

1786 """ 

1787 return Index(self._values, tupleize_cols=False) 

1788 

1789 def _is_lexsorted(self) -> bool: 

1790 """ 

1791 Return True if the codes are lexicographically sorted. 

1792 

1793 Returns 

1794 ------- 

1795 bool 

1796 

1797 Examples 

1798 -------- 

1799 In the below examples, the first level of the MultiIndex is sorted because 

1800 a<b<c, so there is no need to look at the next level. 

1801 

1802 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], 

1803 ... ['d', 'e', 'f']])._is_lexsorted() 

1804 True 

1805 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], 

1806 ... ['d', 'f', 'e']])._is_lexsorted() 

1807 True 

1808 

1809 In case there is a tie, the lexicographical sorting looks 

1810 at the next level of the MultiIndex. 

1811 

1812 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']])._is_lexsorted() 

1813 True 

1814 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']])._is_lexsorted() 

1815 False 

1816 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], 

1817 ... ['aa', 'bb', 'aa', 'bb']])._is_lexsorted() 

1818 True 

1819 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], 

1820 ... ['bb', 'aa', 'aa', 'bb']])._is_lexsorted() 

1821 False 

1822 """ 

1823 return self._lexsort_depth == self.nlevels 

1824 

1825 @cache_readonly 

1826 def _lexsort_depth(self) -> int: 

1827 """ 

1828 Compute and return the lexsort_depth, the number of levels of the 

1829 MultiIndex that are sorted lexically 

1830 

1831 Returns 

1832 ------- 

1833 int 

1834 """ 

1835 if self.sortorder is not None: 

1836 return self.sortorder 

1837 return _lexsort_depth(self.codes, self.nlevels) 

1838 

1839 def _sort_levels_monotonic(self, raise_if_incomparable: bool = False) -> MultiIndex: 

1840 """ 

1841 This is an *internal* function. 

1842 

1843 Create a new MultiIndex from the current to monotonically sorted 

1844 items IN the levels. This does not actually make the entire MultiIndex 

1845 monotonic, JUST the levels. 

1846 

1847 The resulting MultiIndex will have the same outward 

1848 appearance, meaning the same .values and ordering. It will also 

1849 be .equals() to the original. 

1850 

1851 Returns 

1852 ------- 

1853 MultiIndex 

1854 

1855 Examples 

1856 -------- 

1857 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], 

1858 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) 

1859 >>> mi 

1860 MultiIndex([('a', 'bb'), 

1861 ('a', 'aa'), 

1862 ('b', 'bb'), 

1863 ('b', 'aa')], 

1864 ) 

1865 

1866 >>> mi.sort_values() 

1867 MultiIndex([('a', 'aa'), 

1868 ('a', 'bb'), 

1869 ('b', 'aa'), 

1870 ('b', 'bb')], 

1871 ) 

1872 """ 

1873 if self._is_lexsorted() and self.is_monotonic_increasing: 

1874 return self 

1875 

1876 new_levels = [] 

1877 new_codes = [] 

1878 

1879 for lev, level_codes in zip(self.levels, self.codes): 

1880 if not lev.is_monotonic_increasing: 

1881 try: 

1882 # indexer to reorder the levels 

1883 indexer = lev.argsort() 

1884 except TypeError: 

1885 if raise_if_incomparable: 

1886 raise 

1887 else: 

1888 lev = lev.take(indexer) 

1889 

1890 # indexer to reorder the level codes 

1891 indexer = ensure_platform_int(indexer) 

1892 ri = lib.get_reverse_indexer(indexer, len(indexer)) 

1893 level_codes = algos.take_nd(ri, level_codes) 

1894 

1895 new_levels.append(lev) 

1896 new_codes.append(level_codes) 

1897 

1898 return MultiIndex( 

1899 new_levels, 

1900 new_codes, 

1901 names=self.names, 

1902 sortorder=self.sortorder, 

1903 verify_integrity=False, 

1904 ) 

1905 

1906 def remove_unused_levels(self) -> MultiIndex: 

1907 """ 

1908 Create new MultiIndex from current that removes unused levels. 

1909 

1910 Unused level(s) means levels that are not expressed in the 

1911 labels. The resulting MultiIndex will have the same outward 

1912 appearance, meaning the same .values and ordering. It will 

1913 also be .equals() to the original. 

1914 

1915 Returns 

1916 ------- 

1917 MultiIndex 

1918 

1919 Examples 

1920 -------- 

1921 >>> mi = pd.MultiIndex.from_product([range(2), list('ab')]) 

1922 >>> mi 

1923 MultiIndex([(0, 'a'), 

1924 (0, 'b'), 

1925 (1, 'a'), 

1926 (1, 'b')], 

1927 ) 

1928 

1929 >>> mi[2:] 

1930 MultiIndex([(1, 'a'), 

1931 (1, 'b')], 

1932 ) 

1933 

1934 The 0 from the first level is not represented 

1935 and can be removed 

1936 

1937 >>> mi2 = mi[2:].remove_unused_levels() 

1938 >>> mi2.levels 

1939 FrozenList([[1], ['a', 'b']]) 

1940 """ 

1941 new_levels = [] 

1942 new_codes = [] 

1943 

1944 changed = False 

1945 for lev, level_codes in zip(self.levels, self.codes): 

1946 # Since few levels are typically unused, bincount() is more 

1947 # efficient than unique() - however it only accepts positive values 

1948 # (and drops order): 

1949 uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1 

1950 has_na = int(len(uniques) and (uniques[0] == -1)) 

1951 

1952 if len(uniques) != len(lev) + has_na: 

1953 if lev.isna().any() and len(uniques) == len(lev): 

1954 break 

1955 # We have unused levels 

1956 changed = True 

1957 

1958 # Recalculate uniques, now preserving order. 

1959 # Can easily be cythonized by exploiting the already existing 

1960 # "uniques" and stop parsing "level_codes" when all items 

1961 # are found: 

1962 uniques = algos.unique(level_codes) 

1963 if has_na: 

1964 na_idx = np.where(uniques == -1)[0] 

1965 # Just ensure that -1 is in first position: 

1966 uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]] 

1967 

1968 # codes get mapped from uniques to 0:len(uniques) 

1969 # -1 (if present) is mapped to last position 

1970 code_mapping = np.zeros(len(lev) + has_na) 

1971 # ... and reassigned value -1: 

1972 code_mapping[uniques] = np.arange(len(uniques)) - has_na 

1973 

1974 level_codes = code_mapping[level_codes] 

1975 

1976 # new levels are simple 

1977 lev = lev.take(uniques[has_na:]) 

1978 

1979 new_levels.append(lev) 

1980 new_codes.append(level_codes) 

1981 

1982 result = self.view() 

1983 

1984 if changed: 

1985 result._reset_identity() 

1986 result._set_levels(new_levels, validate=False) 

1987 result._set_codes(new_codes, validate=False) 

1988 

1989 return result 

1990 

1991 # -------------------------------------------------------------------- 

1992 # Pickling Methods 

1993 

1994 def __reduce__(self): 

1995 """Necessary for making this object picklable""" 

1996 d = { 

1997 "levels": list(self.levels), 

1998 "codes": list(self.codes), 

1999 "sortorder": self.sortorder, 

2000 "names": list(self.names), 

2001 } 

2002 return ibase._new_Index, (type(self), d), None 

2003 

2004 # -------------------------------------------------------------------- 

2005 

2006 def __getitem__(self, key): 

2007 if is_scalar(key): 

2008 key = com.cast_scalar_indexer(key) 

2009 

2010 retval = [] 

2011 for lev, level_codes in zip(self.levels, self.codes): 

2012 if level_codes[key] == -1: 

2013 retval.append(np.nan) 

2014 else: 

2015 retval.append(lev[level_codes[key]]) 

2016 

2017 return tuple(retval) 

2018 else: 

2019 # in general cannot be sure whether the result will be sorted 

2020 sortorder = None 

2021 if com.is_bool_indexer(key): 

2022 key = np.asarray(key, dtype=bool) 

2023 sortorder = self.sortorder 

2024 elif isinstance(key, slice): 

2025 if key.step is None or key.step > 0: 

2026 sortorder = self.sortorder 

2027 elif isinstance(key, Index): 

2028 key = np.asarray(key) 

2029 

2030 new_codes = [level_codes[key] for level_codes in self.codes] 

2031 

2032 return MultiIndex( 

2033 levels=self.levels, 

2034 codes=new_codes, 

2035 names=self.names, 

2036 sortorder=sortorder, 

2037 verify_integrity=False, 

2038 ) 

2039 

2040 def _getitem_slice(self: MultiIndex, slobj: slice) -> MultiIndex: 

2041 """ 

2042 Fastpath for __getitem__ when we know we have a slice. 

2043 """ 

2044 sortorder = None 

2045 if slobj.step is None or slobj.step > 0: 

2046 sortorder = self.sortorder 

2047 

2048 new_codes = [level_codes[slobj] for level_codes in self.codes] 

2049 

2050 return type(self)( 

2051 levels=self.levels, 

2052 codes=new_codes, 

2053 names=self._names, 

2054 sortorder=sortorder, 

2055 verify_integrity=False, 

2056 ) 

2057 

2058 @Appender(_index_shared_docs["take"] % _index_doc_kwargs) 

2059 def take( 

2060 self: MultiIndex, 

2061 indices, 

2062 axis: Axis = 0, 

2063 allow_fill: bool = True, 

2064 fill_value=None, 

2065 **kwargs, 

2066 ) -> MultiIndex: 

2067 nv.validate_take((), kwargs) 

2068 indices = ensure_platform_int(indices) 

2069 

2070 # only fill if we are passing a non-None fill_value 

2071 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices) 

2072 

2073 na_value = -1 

2074 

2075 taken = [lab.take(indices) for lab in self.codes] 

2076 if allow_fill: 

2077 mask = indices == -1 

2078 if mask.any(): 

2079 masked = [] 

2080 for new_label in taken: 

2081 label_values = new_label 

2082 label_values[mask] = na_value 

2083 masked.append(np.asarray(label_values)) 

2084 taken = masked 

2085 

2086 return MultiIndex( 

2087 levels=self.levels, codes=taken, names=self.names, verify_integrity=False 

2088 ) 

2089 

2090 def append(self, other): 

2091 """ 

2092 Append a collection of Index options together. 

2093 

2094 Parameters 

2095 ---------- 

2096 other : Index or list/tuple of indices 

2097 

2098 Returns 

2099 ------- 

2100 Index 

2101 The combined index. 

2102 

2103 Examples 

2104 -------- 

2105 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b']]) 

2106 >>> mi 

2107 MultiIndex([('a', 'b')], 

2108 ) 

2109 >>> mi.append(mi) 

2110 MultiIndex([('a', 'b'), ('a', 'b')], 

2111 ) 

2112 """ 

2113 if not isinstance(other, (list, tuple)): 

2114 other = [other] 

2115 

2116 if all( 

2117 (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other 

2118 ): 

2119 arrays, names = [], [] 

2120 for i in range(self.nlevels): 

2121 label = self._get_level_values(i) 

2122 appended = [o._get_level_values(i) for o in other] 

2123 arrays.append(label.append(appended)) 

2124 single_label_name = all(label.name == x.name for x in appended) 

2125 names.append(label.name if single_label_name else None) 

2126 return MultiIndex.from_arrays(arrays, names=names) 

2127 

2128 to_concat = (self._values,) + tuple(k._values for k in other) 

2129 new_tuples = np.concatenate(to_concat) 

2130 

2131 # if all(isinstance(x, MultiIndex) for x in other): 

2132 try: 

2133 # We only get here if other contains at least one index with tuples, 

2134 # setting names to None automatically 

2135 return MultiIndex.from_tuples(new_tuples) 

2136 except (TypeError, IndexError): 

2137 return Index(new_tuples) 

2138 

2139 def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: 

2140 if len(args) == 0 and len(kwargs) == 0: 

2141 # lexsort is significantly faster than self._values.argsort() 

2142 target = self._sort_levels_monotonic(raise_if_incomparable=True) 

2143 return lexsort_indexer(target._get_codes_for_sorting()) 

2144 return self._values.argsort(*args, **kwargs) 

2145 

2146 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) 

2147 def repeat(self, repeats: int, axis=None) -> MultiIndex: 

2148 nv.validate_repeat((), {"axis": axis}) 

2149 # error: Incompatible types in assignment (expression has type "ndarray", 

2150 # variable has type "int") 

2151 repeats = ensure_platform_int(repeats) # type: ignore[assignment] 

2152 return MultiIndex( 

2153 levels=self.levels, 

2154 codes=[ 

2155 level_codes.view(np.ndarray).astype(np.intp, copy=False).repeat(repeats) 

2156 for level_codes in self.codes 

2157 ], 

2158 names=self.names, 

2159 sortorder=self.sortorder, 

2160 verify_integrity=False, 

2161 ) 

2162 

2163 # error: Signature of "drop" incompatible with supertype "Index" 

2164 def drop( # type: ignore[override] 

2165 self, 

2166 codes, 

2167 level: Index | np.ndarray | Iterable[Hashable] | None = None, 

2168 errors: IgnoreRaise = "raise", 

2169 ) -> MultiIndex: 

2170 """ 

2171 Make new MultiIndex with passed list of codes deleted. 

2172 

2173 Parameters 

2174 ---------- 

2175 codes : array-like 

2176 Must be a list of tuples when level is not specified. 

2177 level : int or level name, default None 

2178 errors : str, default 'raise' 

2179 

2180 Returns 

2181 ------- 

2182 MultiIndex 

2183 """ 

2184 if level is not None: 

2185 return self._drop_from_level(codes, level, errors) 

2186 

2187 if not isinstance(codes, (np.ndarray, Index)): 

2188 try: 

2189 codes = com.index_labels_to_array(codes, dtype=np.dtype("object")) 

2190 except ValueError: 

2191 pass 

2192 

2193 inds = [] 

2194 for level_codes in codes: 

2195 try: 

2196 loc = self.get_loc(level_codes) 

2197 # get_loc returns either an integer, a slice, or a boolean 

2198 # mask 

2199 if isinstance(loc, int): 

2200 inds.append(loc) 

2201 elif isinstance(loc, slice): 

2202 step = loc.step if loc.step is not None else 1 

2203 inds.extend(range(loc.start, loc.stop, step)) 

2204 elif com.is_bool_indexer(loc): 

2205 if self._lexsort_depth == 0: 

2206 warnings.warn( 

2207 "dropping on a non-lexsorted multi-index " 

2208 "without a level parameter may impact performance.", 

2209 PerformanceWarning, 

2210 stacklevel=find_stack_level(), 

2211 ) 

2212 loc = loc.nonzero()[0] 

2213 inds.extend(loc) 

2214 else: 

2215 msg = f"unsupported indexer of type {type(loc)}" 

2216 raise AssertionError(msg) 

2217 except KeyError: 

2218 if errors != "ignore": 

2219 raise 

2220 

2221 return self.delete(inds) 

2222 

2223 def _drop_from_level( 

2224 self, codes, level, errors: IgnoreRaise = "raise" 

2225 ) -> MultiIndex: 

2226 codes = com.index_labels_to_array(codes) 

2227 i = self._get_level_number(level) 

2228 index = self.levels[i] 

2229 values = index.get_indexer(codes) 

2230 # If nan should be dropped it will equal -1 here. We have to check which values 

2231 # are not nan and equal -1, this means they are missing in the index 

2232 nan_codes = isna(codes) 

2233 values[(np.equal(nan_codes, False)) & (values == -1)] = -2 

2234 if index.shape[0] == self.shape[0]: 

2235 values[np.equal(nan_codes, True)] = -2 

2236 

2237 not_found = codes[values == -2] 

2238 if len(not_found) != 0 and errors != "ignore": 

2239 raise KeyError(f"labels {not_found} not found in level") 

2240 mask = ~algos.isin(self.codes[i], values) 

2241 

2242 return self[mask] 

2243 

2244 def swaplevel(self, i=-2, j=-1) -> MultiIndex: 

2245 """ 

2246 Swap level i with level j. 

2247 

2248 Calling this method does not change the ordering of the values. 

2249 

2250 Parameters 

2251 ---------- 

2252 i : int, str, default -2 

2253 First level of index to be swapped. Can pass level name as string. 

2254 Type of parameters can be mixed. 

2255 j : int, str, default -1 

2256 Second level of index to be swapped. Can pass level name as string. 

2257 Type of parameters can be mixed. 

2258 

2259 Returns 

2260 ------- 

2261 MultiIndex 

2262 A new MultiIndex. 

2263 

2264 See Also 

2265 -------- 

2266 Series.swaplevel : Swap levels i and j in a MultiIndex. 

2267 DataFrame.swaplevel : Swap levels i and j in a MultiIndex on a 

2268 particular axis. 

2269 

2270 Examples 

2271 -------- 

2272 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], 

2273 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) 

2274 >>> mi 

2275 MultiIndex([('a', 'bb'), 

2276 ('a', 'aa'), 

2277 ('b', 'bb'), 

2278 ('b', 'aa')], 

2279 ) 

2280 >>> mi.swaplevel(0, 1) 

2281 MultiIndex([('bb', 'a'), 

2282 ('aa', 'a'), 

2283 ('bb', 'b'), 

2284 ('aa', 'b')], 

2285 ) 

2286 """ 

2287 new_levels = list(self.levels) 

2288 new_codes = list(self.codes) 

2289 new_names = list(self.names) 

2290 

2291 i = self._get_level_number(i) 

2292 j = self._get_level_number(j) 

2293 

2294 new_levels[i], new_levels[j] = new_levels[j], new_levels[i] 

2295 new_codes[i], new_codes[j] = new_codes[j], new_codes[i] 

2296 new_names[i], new_names[j] = new_names[j], new_names[i] 

2297 

2298 return MultiIndex( 

2299 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False 

2300 ) 

2301 

2302 def reorder_levels(self, order) -> MultiIndex: 

2303 """ 

2304 Rearrange levels using input order. May not drop or duplicate levels. 

2305 

2306 Parameters 

2307 ---------- 

2308 order : list of int or list of str 

2309 List representing new level order. Reference level by number 

2310 (position) or by key (label). 

2311 

2312 Returns 

2313 ------- 

2314 MultiIndex 

2315 

2316 Examples 

2317 -------- 

2318 >>> mi = pd.MultiIndex.from_arrays([[1, 2], [3, 4]], names=['x', 'y']) 

2319 >>> mi 

2320 MultiIndex([(1, 3), 

2321 (2, 4)], 

2322 names=['x', 'y']) 

2323 

2324 >>> mi.reorder_levels(order=[1, 0]) 

2325 MultiIndex([(3, 1), 

2326 (4, 2)], 

2327 names=['y', 'x']) 

2328 

2329 >>> mi.reorder_levels(order=['y', 'x']) 

2330 MultiIndex([(3, 1), 

2331 (4, 2)], 

2332 names=['y', 'x']) 

2333 """ 

2334 order = [self._get_level_number(i) for i in order] 

2335 if len(order) != self.nlevels: 

2336 raise AssertionError( 

2337 f"Length of order must be same as number of levels ({self.nlevels}), " 

2338 f"got {len(order)}" 

2339 ) 

2340 new_levels = [self.levels[i] for i in order] 

2341 new_codes = [self.codes[i] for i in order] 

2342 new_names = [self.names[i] for i in order] 

2343 

2344 return MultiIndex( 

2345 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False 

2346 ) 

2347 

2348 def _get_codes_for_sorting(self) -> list[Categorical]: 

2349 """ 

2350 we are categorizing our codes by using the 

2351 available categories (all, not just observed) 

2352 excluding any missing ones (-1); this is in preparation 

2353 for sorting, where we need to disambiguate that -1 is not 

2354 a valid valid 

2355 """ 

2356 

2357 def cats(level_codes): 

2358 return np.arange( 

2359 np.array(level_codes).max() + 1 if len(level_codes) else 0, 

2360 dtype=level_codes.dtype, 

2361 ) 

2362 

2363 return [ 

2364 Categorical.from_codes(level_codes, cats(level_codes), ordered=True) 

2365 for level_codes in self.codes 

2366 ] 

2367 

2368 def sortlevel( 

2369 self, 

2370 level: IndexLabel = 0, 

2371 ascending: bool | list[bool] = True, 

2372 sort_remaining: bool = True, 

2373 ) -> tuple[MultiIndex, npt.NDArray[np.intp]]: 

2374 """ 

2375 Sort MultiIndex at the requested level. 

2376 

2377 The result will respect the original ordering of the associated 

2378 factor at that level. 

2379 

2380 Parameters 

2381 ---------- 

2382 level : list-like, int or str, default 0 

2383 If a string is given, must be a name of the level. 

2384 If list-like must be names or ints of levels. 

2385 ascending : bool, default True 

2386 False to sort in descending order. 

2387 Can also be a list to specify a directed ordering. 

2388 sort_remaining : sort by the remaining levels after level 

2389 

2390 Returns 

2391 ------- 

2392 sorted_index : pd.MultiIndex 

2393 Resulting index. 

2394 indexer : np.ndarray[np.intp] 

2395 Indices of output values in original index. 

2396 

2397 Examples 

2398 -------- 

2399 >>> mi = pd.MultiIndex.from_arrays([[0, 0], [2, 1]]) 

2400 >>> mi 

2401 MultiIndex([(0, 2), 

2402 (0, 1)], 

2403 ) 

2404 

2405 >>> mi.sortlevel() 

2406 (MultiIndex([(0, 1), 

2407 (0, 2)], 

2408 ), array([1, 0])) 

2409 

2410 >>> mi.sortlevel(sort_remaining=False) 

2411 (MultiIndex([(0, 2), 

2412 (0, 1)], 

2413 ), array([0, 1])) 

2414 

2415 >>> mi.sortlevel(1) 

2416 (MultiIndex([(0, 1), 

2417 (0, 2)], 

2418 ), array([1, 0])) 

2419 

2420 >>> mi.sortlevel(1, ascending=False) 

2421 (MultiIndex([(0, 2), 

2422 (0, 1)], 

2423 ), array([0, 1])) 

2424 """ 

2425 if not is_list_like(level): 

2426 level = [level] 

2427 # error: Item "Hashable" of "Union[Hashable, Sequence[Hashable]]" has 

2428 # no attribute "__iter__" (not iterable) 

2429 level = [ 

2430 self._get_level_number(lev) for lev in level # type: ignore[union-attr] 

2431 ] 

2432 sortorder = None 

2433 

2434 # we have a directed ordering via ascending 

2435 if isinstance(ascending, list): 

2436 if not len(level) == len(ascending): 

2437 raise ValueError("level must have same length as ascending") 

2438 

2439 indexer = lexsort_indexer( 

2440 [self.codes[lev] for lev in level], orders=ascending 

2441 ) 

2442 

2443 # level ordering 

2444 else: 

2445 codes = list(self.codes) 

2446 shape = list(self.levshape) 

2447 

2448 # partition codes and shape 

2449 primary = tuple(codes[lev] for lev in level) 

2450 primshp = tuple(shape[lev] for lev in level) 

2451 

2452 # Reverse sorted to retain the order of 

2453 # smaller indices that needs to be removed 

2454 for lev in sorted(level, reverse=True): 

2455 codes.pop(lev) 

2456 shape.pop(lev) 

2457 

2458 if sort_remaining: 

2459 primary += primary + tuple(codes) 

2460 primshp += primshp + tuple(shape) 

2461 else: 

2462 sortorder = level[0] 

2463 

2464 indexer = indexer_from_factorized(primary, primshp, compress=False) 

2465 

2466 if not ascending: 

2467 indexer = indexer[::-1] 

2468 

2469 indexer = ensure_platform_int(indexer) 

2470 new_codes = [level_codes.take(indexer) for level_codes in self.codes] 

2471 

2472 new_index = MultiIndex( 

2473 codes=new_codes, 

2474 levels=self.levels, 

2475 names=self.names, 

2476 sortorder=sortorder, 

2477 verify_integrity=False, 

2478 ) 

2479 

2480 return new_index, indexer 

2481 

2482 def _wrap_reindex_result(self, target, indexer, preserve_names: bool): 

2483 if not isinstance(target, MultiIndex): 

2484 if indexer is None: 

2485 target = self 

2486 elif (indexer >= 0).all(): 

2487 target = self.take(indexer) 

2488 else: 

2489 try: 

2490 target = MultiIndex.from_tuples(target) 

2491 except TypeError: 

2492 # not all tuples, see test_constructor_dict_multiindex_reindex_flat 

2493 return target 

2494 

2495 target = self._maybe_preserve_names(target, preserve_names) 

2496 return target 

2497 

2498 def _maybe_preserve_names(self, target: Index, preserve_names: bool) -> Index: 

2499 if ( 

2500 preserve_names 

2501 and target.nlevels == self.nlevels 

2502 and target.names != self.names 

2503 ): 

2504 target = target.copy(deep=False) 

2505 target.names = self.names 

2506 return target 

2507 

2508 # -------------------------------------------------------------------- 

2509 # Indexing Methods 

2510 

2511 def _check_indexing_error(self, key) -> None: 

2512 if not is_hashable(key) or is_iterator(key): 

2513 # We allow tuples if they are hashable, whereas other Index 

2514 # subclasses require scalar. 

2515 # We have to explicitly exclude generators, as these are hashable. 

2516 raise InvalidIndexError(key) 

2517 

2518 @cache_readonly 

2519 def _should_fallback_to_positional(self) -> bool: 

2520 """ 

2521 Should integer key(s) be treated as positional? 

2522 """ 

2523 # GH#33355 

2524 return self.levels[0]._should_fallback_to_positional 

2525 

2526 def _get_indexer_strict( 

2527 self, key, axis_name: str 

2528 ) -> tuple[Index, npt.NDArray[np.intp]]: 

2529 keyarr = key 

2530 if not isinstance(keyarr, Index): 

2531 keyarr = com.asarray_tuplesafe(keyarr) 

2532 

2533 if len(keyarr) and not isinstance(keyarr[0], tuple): 

2534 indexer = self._get_indexer_level_0(keyarr) 

2535 

2536 self._raise_if_missing(key, indexer, axis_name) 

2537 return self[indexer], indexer 

2538 

2539 return super()._get_indexer_strict(key, axis_name) 

2540 

2541 def _raise_if_missing(self, key, indexer, axis_name: str) -> None: 

2542 keyarr = key 

2543 if not isinstance(key, Index): 

2544 keyarr = com.asarray_tuplesafe(key) 

2545 

2546 if len(keyarr) and not isinstance(keyarr[0], tuple): 

2547 # i.e. same condition for special case in MultiIndex._get_indexer_strict 

2548 

2549 mask = indexer == -1 

2550 if mask.any(): 

2551 check = self.levels[0].get_indexer(keyarr) 

2552 cmask = check == -1 

2553 if cmask.any(): 

2554 raise KeyError(f"{keyarr[cmask]} not in index") 

2555 # We get here when levels still contain values which are not 

2556 # actually in Index anymore 

2557 raise KeyError(f"{keyarr} not in index") 

2558 else: 

2559 return super()._raise_if_missing(key, indexer, axis_name) 

2560 

2561 def _get_indexer_level_0(self, target) -> npt.NDArray[np.intp]: 

2562 """ 

2563 Optimized equivalent to `self.get_level_values(0).get_indexer_for(target)`. 

2564 """ 

2565 lev = self.levels[0] 

2566 codes = self._codes[0] 

2567 cat = Categorical.from_codes(codes=codes, categories=lev) 

2568 ci = Index(cat) 

2569 return ci.get_indexer_for(target) 

2570 

2571 def get_slice_bound( 

2572 self, 

2573 label: Hashable | Sequence[Hashable], 

2574 side: Literal["left", "right"], 

2575 ) -> int: 

2576 """ 

2577 For an ordered MultiIndex, compute slice bound 

2578 that corresponds to given label. 

2579 

2580 Returns leftmost (one-past-the-rightmost if `side=='right') position 

2581 of given label. 

2582 

2583 Parameters 

2584 ---------- 

2585 label : object or tuple of objects 

2586 side : {'left', 'right'} 

2587 

2588 Returns 

2589 ------- 

2590 int 

2591 Index of label. 

2592 

2593 Notes 

2594 ----- 

2595 This method only works if level 0 index of the MultiIndex is lexsorted. 

2596 

2597 Examples 

2598 -------- 

2599 >>> mi = pd.MultiIndex.from_arrays([list('abbc'), list('gefd')]) 

2600 

2601 Get the locations from the leftmost 'b' in the first level 

2602 until the end of the multiindex: 

2603 

2604 >>> mi.get_slice_bound('b', side="left") 

2605 1 

2606 

2607 Like above, but if you get the locations from the rightmost 

2608 'b' in the first level and 'f' in the second level: 

2609 

2610 >>> mi.get_slice_bound(('b','f'), side="right") 

2611 3 

2612 

2613 See Also 

2614 -------- 

2615 MultiIndex.get_loc : Get location for a label or a tuple of labels. 

2616 MultiIndex.get_locs : Get location for a label/slice/list/mask or a 

2617 sequence of such. 

2618 """ 

2619 if not isinstance(label, tuple): 

2620 label = (label,) 

2621 return self._partial_tup_index(label, side=side) 

2622 

2623 # pylint: disable-next=useless-parent-delegation 

2624 def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]: 

2625 """ 

2626 For an ordered MultiIndex, compute the slice locations for input 

2627 labels. 

2628 

2629 The input labels can be tuples representing partial levels, e.g. for a 

2630 MultiIndex with 3 levels, you can pass a single value (corresponding to 

2631 the first level), or a 1-, 2-, or 3-tuple. 

2632 

2633 Parameters 

2634 ---------- 

2635 start : label or tuple, default None 

2636 If None, defaults to the beginning 

2637 end : label or tuple 

2638 If None, defaults to the end 

2639 step : int or None 

2640 Slice step 

2641 

2642 Returns 

2643 ------- 

2644 (start, end) : (int, int) 

2645 

2646 Notes 

2647 ----- 

2648 This method only works if the MultiIndex is properly lexsorted. So, 

2649 if only the first 2 levels of a 3-level MultiIndex are lexsorted, 

2650 you can only pass two levels to ``.slice_locs``. 

2651 

2652 Examples 

2653 -------- 

2654 >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')], 

2655 ... names=['A', 'B']) 

2656 

2657 Get the slice locations from the beginning of 'b' in the first level 

2658 until the end of the multiindex: 

2659 

2660 >>> mi.slice_locs(start='b') 

2661 (1, 4) 

2662 

2663 Like above, but stop at the end of 'b' in the first level and 'f' in 

2664 the second level: 

2665 

2666 >>> mi.slice_locs(start='b', end=('b', 'f')) 

2667 (1, 3) 

2668 

2669 See Also 

2670 -------- 

2671 MultiIndex.get_loc : Get location for a label or a tuple of labels. 

2672 MultiIndex.get_locs : Get location for a label/slice/list/mask or a 

2673 sequence of such. 

2674 """ 

2675 # This function adds nothing to its parent implementation (the magic 

2676 # happens in get_slice_bound method), but it adds meaningful doc. 

2677 return super().slice_locs(start, end, step) 

2678 

2679 def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left"): 

2680 if len(tup) > self._lexsort_depth: 

2681 raise UnsortedIndexError( 

2682 f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth " 

2683 f"({self._lexsort_depth})" 

2684 ) 

2685 

2686 n = len(tup) 

2687 start, end = 0, len(self) 

2688 zipped = zip(tup, self.levels, self.codes) 

2689 for k, (lab, lev, level_codes) in enumerate(zipped): 

2690 section = level_codes[start:end] 

2691 

2692 if lab not in lev and not isna(lab): 

2693 # short circuit 

2694 try: 

2695 loc = algos.searchsorted(lev, lab, side=side) 

2696 except TypeError as err: 

2697 # non-comparable e.g. test_slice_locs_with_type_mismatch 

2698 raise TypeError(f"Level type mismatch: {lab}") from err 

2699 if not is_integer(loc): 

2700 # non-comparable level, e.g. test_groupby_example 

2701 raise TypeError(f"Level type mismatch: {lab}") 

2702 if side == "right" and loc >= 0: 

2703 loc -= 1 

2704 return start + algos.searchsorted(section, loc, side=side) 

2705 

2706 idx = self._get_loc_single_level_index(lev, lab) 

2707 if isinstance(idx, slice) and k < n - 1: 

2708 # Get start and end value from slice, necessary when a non-integer 

2709 # interval is given as input GH#37707 

2710 start = idx.start 

2711 end = idx.stop 

2712 elif k < n - 1: 

2713 # error: Incompatible types in assignment (expression has type 

2714 # "Union[ndarray[Any, dtype[signedinteger[Any]]] 

2715 end = start + algos.searchsorted( # type: ignore[assignment] 

2716 section, idx, side="right" 

2717 ) 

2718 # error: Incompatible types in assignment (expression has type 

2719 # "Union[ndarray[Any, dtype[signedinteger[Any]]] 

2720 start = start + algos.searchsorted( # type: ignore[assignment] 

2721 section, idx, side="left" 

2722 ) 

2723 elif isinstance(idx, slice): 

2724 idx = idx.start 

2725 return start + algos.searchsorted(section, idx, side=side) 

2726 else: 

2727 return start + algos.searchsorted(section, idx, side=side) 

2728 

2729 def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: 

2730 """ 

2731 If key is NA value, location of index unify as -1. 

2732 

2733 Parameters 

2734 ---------- 

2735 level_index: Index 

2736 key : label 

2737 

2738 Returns 

2739 ------- 

2740 loc : int 

2741 If key is NA value, loc is -1 

2742 Else, location of key in index. 

2743 

2744 See Also 

2745 -------- 

2746 Index.get_loc : The get_loc method for (single-level) index. 

2747 """ 

2748 if is_scalar(key) and isna(key): 

2749 # TODO: need is_valid_na_for_dtype(key, level_index.dtype) 

2750 return -1 

2751 else: 

2752 return level_index.get_loc(key) 

2753 

2754 def get_loc(self, key): 

2755 """ 

2756 Get location for a label or a tuple of labels. 

2757 

2758 The location is returned as an integer/slice or boolean 

2759 mask. 

2760 

2761 Parameters 

2762 ---------- 

2763 key : label or tuple of labels (one for each level) 

2764 

2765 Returns 

2766 ------- 

2767 int, slice object or boolean mask 

2768 If the key is past the lexsort depth, the return may be a 

2769 boolean mask array, otherwise it is always a slice or int. 

2770 

2771 See Also 

2772 -------- 

2773 Index.get_loc : The get_loc method for (single-level) index. 

2774 MultiIndex.slice_locs : Get slice location given start label(s) and 

2775 end label(s). 

2776 MultiIndex.get_locs : Get location for a label/slice/list/mask or a 

2777 sequence of such. 

2778 

2779 Notes 

2780 ----- 

2781 The key cannot be a slice, list of same-level labels, a boolean mask, 

2782 or a sequence of such. If you want to use those, use 

2783 :meth:`MultiIndex.get_locs` instead. 

2784 

2785 Examples 

2786 -------- 

2787 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')]) 

2788 

2789 >>> mi.get_loc('b') 

2790 slice(1, 3, None) 

2791 

2792 >>> mi.get_loc(('b', 'e')) 

2793 1 

2794 """ 

2795 self._check_indexing_error(key) 

2796 

2797 def _maybe_to_slice(loc): 

2798 """convert integer indexer to boolean mask or slice if possible""" 

2799 if not isinstance(loc, np.ndarray) or loc.dtype != np.intp: 

2800 return loc 

2801 

2802 loc = lib.maybe_indices_to_slice(loc, len(self)) 

2803 if isinstance(loc, slice): 

2804 return loc 

2805 

2806 mask = np.empty(len(self), dtype="bool") 

2807 mask.fill(False) 

2808 mask[loc] = True 

2809 return mask 

2810 

2811 if not isinstance(key, tuple): 

2812 loc = self._get_level_indexer(key, level=0) 

2813 return _maybe_to_slice(loc) 

2814 

2815 keylen = len(key) 

2816 if self.nlevels < keylen: 

2817 raise KeyError( 

2818 f"Key length ({keylen}) exceeds index depth ({self.nlevels})" 

2819 ) 

2820 

2821 if keylen == self.nlevels and self.is_unique: 

2822 # TODO: what if we have an IntervalIndex level? 

2823 # i.e. do we need _index_as_unique on that level? 

2824 try: 

2825 return self._engine.get_loc(key) 

2826 except TypeError: 

2827 # e.g. test_partial_slicing_with_multiindex partial string slicing 

2828 loc, _ = self.get_loc_level(key, list(range(self.nlevels))) 

2829 return loc 

2830 

2831 # -- partial selection or non-unique index 

2832 # break the key into 2 parts based on the lexsort_depth of the index; 

2833 # the first part returns a continuous slice of the index; the 2nd part 

2834 # needs linear search within the slice 

2835 i = self._lexsort_depth 

2836 lead_key, follow_key = key[:i], key[i:] 

2837 

2838 if not lead_key: 

2839 start = 0 

2840 stop = len(self) 

2841 else: 

2842 try: 

2843 start, stop = self.slice_locs(lead_key, lead_key) 

2844 except TypeError as err: 

2845 # e.g. test_groupby_example key = ((0, 0, 1, 2), "new_col") 

2846 # when self has 5 integer levels 

2847 raise KeyError(key) from err 

2848 

2849 if start == stop: 

2850 raise KeyError(key) 

2851 

2852 if not follow_key: 

2853 return slice(start, stop) 

2854 

2855 warnings.warn( 

2856 "indexing past lexsort depth may impact performance.", 

2857 PerformanceWarning, 

2858 stacklevel=find_stack_level(), 

2859 ) 

2860 

2861 loc = np.arange(start, stop, dtype=np.intp) 

2862 

2863 for i, k in enumerate(follow_key, len(lead_key)): 

2864 mask = self.codes[i][loc] == self._get_loc_single_level_index( 

2865 self.levels[i], k 

2866 ) 

2867 if not mask.all(): 

2868 loc = loc[mask] 

2869 if not len(loc): 

2870 raise KeyError(key) 

2871 

2872 return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop) 

2873 

2874 def get_loc_level(self, key, level: IndexLabel = 0, drop_level: bool = True): 

2875 """ 

2876 Get location and sliced index for requested label(s)/level(s). 

2877 

2878 Parameters 

2879 ---------- 

2880 key : label or sequence of labels 

2881 level : int/level name or list thereof, optional 

2882 drop_level : bool, default True 

2883 If ``False``, the resulting index will not drop any level. 

2884 

2885 Returns 

2886 ------- 

2887 tuple 

2888 A 2-tuple where the elements : 

2889 

2890 Element 0: int, slice object or boolean array. 

2891 

2892 Element 1: The resulting sliced multiindex/index. If the key 

2893 contains all levels, this will be ``None``. 

2894 

2895 See Also 

2896 -------- 

2897 MultiIndex.get_loc : Get location for a label or a tuple of labels. 

2898 MultiIndex.get_locs : Get location for a label/slice/list/mask or a 

2899 sequence of such. 

2900 

2901 Examples 

2902 -------- 

2903 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')], 

2904 ... names=['A', 'B']) 

2905 

2906 >>> mi.get_loc_level('b') 

2907 (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B')) 

2908 

2909 >>> mi.get_loc_level('e', level='B') 

2910 (array([False, True, False]), Index(['b'], dtype='object', name='A')) 

2911 

2912 >>> mi.get_loc_level(['b', 'e']) 

2913 (1, None) 

2914 """ 

2915 if not isinstance(level, (list, tuple)): 

2916 level = self._get_level_number(level) 

2917 else: 

2918 level = [self._get_level_number(lev) for lev in level] 

2919 

2920 loc, mi = self._get_loc_level(key, level=level) 

2921 if not drop_level: 

2922 if lib.is_integer(loc): 

2923 mi = self[loc : loc + 1] 

2924 else: 

2925 mi = self[loc] 

2926 return loc, mi 

2927 

2928 def _get_loc_level(self, key, level: int | list[int] = 0): 

2929 """ 

2930 get_loc_level but with `level` known to be positional, not name-based. 

2931 """ 

2932 

2933 # different name to distinguish from maybe_droplevels 

2934 def maybe_mi_droplevels(indexer, levels): 

2935 """ 

2936 If level does not exist or all levels were dropped, the exception 

2937 has to be handled outside. 

2938 """ 

2939 new_index = self[indexer] 

2940 

2941 for i in sorted(levels, reverse=True): 

2942 new_index = new_index._drop_level_numbers([i]) 

2943 

2944 return new_index 

2945 

2946 if isinstance(level, (tuple, list)): 

2947 if len(key) != len(level): 

2948 raise AssertionError( 

2949 "Key for location must have same length as number of levels" 

2950 ) 

2951 result = None 

2952 for lev, k in zip(level, key): 

2953 loc, new_index = self._get_loc_level(k, level=lev) 

2954 if isinstance(loc, slice): 

2955 mask = np.zeros(len(self), dtype=bool) 

2956 mask[loc] = True 

2957 loc = mask 

2958 result = loc if result is None else result & loc 

2959 

2960 try: 

2961 # FIXME: we should be only dropping levels on which we are 

2962 # scalar-indexing 

2963 mi = maybe_mi_droplevels(result, level) 

2964 except ValueError: 

2965 # droplevel failed because we tried to drop all levels, 

2966 # i.e. len(level) == self.nlevels 

2967 mi = self[result] 

2968 

2969 return result, mi 

2970 

2971 # kludge for #1796 

2972 if isinstance(key, list): 

2973 key = tuple(key) 

2974 

2975 if isinstance(key, tuple) and level == 0: 

2976 try: 

2977 # Check if this tuple is a single key in our first level 

2978 if key in self.levels[0]: 

2979 indexer = self._get_level_indexer(key, level=level) 

2980 new_index = maybe_mi_droplevels(indexer, [0]) 

2981 return indexer, new_index 

2982 except (TypeError, InvalidIndexError): 

2983 pass 

2984 

2985 if not any(isinstance(k, slice) for k in key): 

2986 if len(key) == self.nlevels and self.is_unique: 

2987 # Complete key in unique index -> standard get_loc 

2988 try: 

2989 return (self._engine.get_loc(key), None) 

2990 except KeyError as err: 

2991 raise KeyError(key) from err 

2992 except TypeError: 

2993 # e.g. partial string indexing 

2994 # test_partial_string_timestamp_multiindex 

2995 pass 

2996 

2997 # partial selection 

2998 indexer = self.get_loc(key) 

2999 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] 

3000 if len(ilevels) == self.nlevels: 

3001 if is_integer(indexer): 

3002 # we are dropping all levels 

3003 return indexer, None 

3004 

3005 # TODO: in some cases we still need to drop some levels, 

3006 # e.g. test_multiindex_perf_warn 

3007 # test_partial_string_timestamp_multiindex 

3008 ilevels = [ 

3009 i 

3010 for i in range(len(key)) 

3011 if ( 

3012 not isinstance(key[i], str) 

3013 or not self.levels[i]._supports_partial_string_indexing 

3014 ) 

3015 and key[i] != slice(None, None) 

3016 ] 

3017 if len(ilevels) == self.nlevels: 

3018 # TODO: why? 

3019 ilevels = [] 

3020 return indexer, maybe_mi_droplevels(indexer, ilevels) 

3021 

3022 else: 

3023 indexer = None 

3024 for i, k in enumerate(key): 

3025 if not isinstance(k, slice): 

3026 loc_level = self._get_level_indexer(k, level=i) 

3027 if isinstance(loc_level, slice): 

3028 if com.is_null_slice(loc_level) or com.is_full_slice( 

3029 loc_level, len(self) 

3030 ): 

3031 # everything 

3032 continue 

3033 

3034 # e.g. test_xs_IndexSlice_argument_not_implemented 

3035 k_index = np.zeros(len(self), dtype=bool) 

3036 k_index[loc_level] = True 

3037 

3038 else: 

3039 k_index = loc_level 

3040 

3041 elif com.is_null_slice(k): 

3042 # taking everything, does not affect `indexer` below 

3043 continue 

3044 

3045 else: 

3046 # FIXME: this message can be inaccurate, e.g. 

3047 # test_series_varied_multiindex_alignment 

3048 raise TypeError(f"Expected label or tuple of labels, got {key}") 

3049 

3050 if indexer is None: 

3051 indexer = k_index 

3052 else: 

3053 indexer &= k_index 

3054 if indexer is None: 

3055 indexer = slice(None, None) 

3056 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] 

3057 return indexer, maybe_mi_droplevels(indexer, ilevels) 

3058 else: 

3059 indexer = self._get_level_indexer(key, level=level) 

3060 if ( 

3061 isinstance(key, str) 

3062 and self.levels[level]._supports_partial_string_indexing 

3063 ): 

3064 # check to see if we did an exact lookup vs sliced 

3065 check = self.levels[level].get_loc(key) 

3066 if not is_integer(check): 

3067 # e.g. test_partial_string_timestamp_multiindex 

3068 return indexer, self[indexer] 

3069 

3070 try: 

3071 result_index = maybe_mi_droplevels(indexer, [level]) 

3072 except ValueError: 

3073 result_index = self[indexer] 

3074 

3075 return indexer, result_index 

3076 

3077 def _get_level_indexer( 

3078 self, key, level: int = 0, indexer: npt.NDArray[np.bool_] | None = None 

3079 ): 

3080 # `level` kwarg is _always_ positional, never name 

3081 # return a boolean array or slice showing where the key is 

3082 # in the totality of values 

3083 # if the indexer is provided, then use this 

3084 

3085 level_index = self.levels[level] 

3086 level_codes = self.codes[level] 

3087 

3088 def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): 

3089 # Compute a bool indexer to identify the positions to take. 

3090 # If we have an existing indexer, we only need to examine the 

3091 # subset of positions where the existing indexer is True. 

3092 if indexer is not None: 

3093 # we only need to look at the subset of codes where the 

3094 # existing indexer equals True 

3095 codes = codes[indexer] 

3096 

3097 if step is None or step == 1: 

3098 new_indexer = (codes >= start) & (codes < stop) 

3099 else: 

3100 r = np.arange(start, stop, step, dtype=codes.dtype) 

3101 new_indexer = algos.isin(codes, r) 

3102 

3103 if indexer is None: 

3104 return new_indexer 

3105 

3106 indexer = indexer.copy() 

3107 indexer[indexer] = new_indexer 

3108 return indexer 

3109 

3110 if isinstance(key, slice): 

3111 # handle a slice, returning a slice if we can 

3112 # otherwise a boolean indexer 

3113 step = key.step 

3114 is_negative_step = step is not None and step < 0 

3115 

3116 try: 

3117 if key.start is not None: 

3118 start = level_index.get_loc(key.start) 

3119 elif is_negative_step: 

3120 start = len(level_index) - 1 

3121 else: 

3122 start = 0 

3123 

3124 if key.stop is not None: 

3125 stop = level_index.get_loc(key.stop) 

3126 elif is_negative_step: 

3127 stop = 0 

3128 elif isinstance(start, slice): 

3129 stop = len(level_index) 

3130 else: 

3131 stop = len(level_index) - 1 

3132 except KeyError: 

3133 # we have a partial slice (like looking up a partial date 

3134 # string) 

3135 start = stop = level_index.slice_indexer(key.start, key.stop, key.step) 

3136 step = start.step 

3137 

3138 if isinstance(start, slice) or isinstance(stop, slice): 

3139 # we have a slice for start and/or stop 

3140 # a partial date slicer on a DatetimeIndex generates a slice 

3141 # note that the stop ALREADY includes the stopped point (if 

3142 # it was a string sliced) 

3143 start = getattr(start, "start", start) 

3144 stop = getattr(stop, "stop", stop) 

3145 return convert_indexer(start, stop, step) 

3146 

3147 elif level > 0 or self._lexsort_depth == 0 or step is not None: 

3148 # need to have like semantics here to right 

3149 # searching as when we are using a slice 

3150 # so adjust the stop by 1 (so we include stop) 

3151 stop = (stop - 1) if is_negative_step else (stop + 1) 

3152 return convert_indexer(start, stop, step) 

3153 else: 

3154 # sorted, so can return slice object -> view 

3155 i = algos.searchsorted(level_codes, start, side="left") 

3156 j = algos.searchsorted(level_codes, stop, side="right") 

3157 return slice(i, j, step) 

3158 

3159 else: 

3160 idx = self._get_loc_single_level_index(level_index, key) 

3161 

3162 if level > 0 or self._lexsort_depth == 0: 

3163 # Desired level is not sorted 

3164 if isinstance(idx, slice): 

3165 # test_get_loc_partial_timestamp_multiindex 

3166 locs = (level_codes >= idx.start) & (level_codes < idx.stop) 

3167 return locs 

3168 

3169 locs = np.array(level_codes == idx, dtype=bool, copy=False) 

3170 

3171 if not locs.any(): 

3172 # The label is present in self.levels[level] but unused: 

3173 raise KeyError(key) 

3174 return locs 

3175 

3176 if isinstance(idx, slice): 

3177 # e.g. test_partial_string_timestamp_multiindex 

3178 start = algos.searchsorted(level_codes, idx.start, side="left") 

3179 # NB: "left" here bc of slice semantics 

3180 end = algos.searchsorted(level_codes, idx.stop, side="left") 

3181 else: 

3182 start = algos.searchsorted(level_codes, idx, side="left") 

3183 end = algos.searchsorted(level_codes, idx, side="right") 

3184 

3185 if start == end: 

3186 # The label is present in self.levels[level] but unused: 

3187 raise KeyError(key) 

3188 return slice(start, end) 

3189 

3190 def get_locs(self, seq): 

3191 """ 

3192 Get location for a sequence of labels. 

3193 

3194 Parameters 

3195 ---------- 

3196 seq : label, slice, list, mask or a sequence of such 

3197 You should use one of the above for each level. 

3198 If a level should not be used, set it to ``slice(None)``. 

3199 

3200 Returns 

3201 ------- 

3202 numpy.ndarray 

3203 NumPy array of integers suitable for passing to iloc. 

3204 

3205 See Also 

3206 -------- 

3207 MultiIndex.get_loc : Get location for a label or a tuple of labels. 

3208 MultiIndex.slice_locs : Get slice location given start label(s) and 

3209 end label(s). 

3210 

3211 Examples 

3212 -------- 

3213 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')]) 

3214 

3215 >>> mi.get_locs('b') # doctest: +SKIP 

3216 array([1, 2], dtype=int64) 

3217 

3218 >>> mi.get_locs([slice(None), ['e', 'f']]) # doctest: +SKIP 

3219 array([1, 2], dtype=int64) 

3220 

3221 >>> mi.get_locs([[True, False, True], slice('e', 'f')]) # doctest: +SKIP 

3222 array([2], dtype=int64) 

3223 """ 

3224 

3225 # must be lexsorted to at least as many levels 

3226 true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s] 

3227 if true_slices and true_slices[-1] >= self._lexsort_depth: 

3228 raise UnsortedIndexError( 

3229 "MultiIndex slicing requires the index to be lexsorted: slicing " 

3230 f"on levels {true_slices}, lexsort depth {self._lexsort_depth}" 

3231 ) 

3232 

3233 if any(x is Ellipsis for x in seq): 

3234 raise NotImplementedError( 

3235 "MultiIndex does not support indexing with Ellipsis" 

3236 ) 

3237 

3238 n = len(self) 

3239 

3240 def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]: 

3241 if isinstance(indexer, slice): 

3242 new_indexer = np.zeros(n, dtype=np.bool_) 

3243 new_indexer[indexer] = True 

3244 return new_indexer 

3245 return indexer 

3246 

3247 # a bool indexer for the positions we want to take 

3248 indexer: npt.NDArray[np.bool_] | None = None 

3249 

3250 for i, k in enumerate(seq): 

3251 lvl_indexer: npt.NDArray[np.bool_] | slice | None = None 

3252 

3253 if com.is_bool_indexer(k): 

3254 if len(k) != n: 

3255 raise ValueError( 

3256 "cannot index with a boolean indexer that " 

3257 "is not the same length as the index" 

3258 ) 

3259 lvl_indexer = np.asarray(k) 

3260 

3261 elif is_list_like(k): 

3262 # a collection of labels to include from this level (these are or'd) 

3263 

3264 # GH#27591 check if this is a single tuple key in the level 

3265 try: 

3266 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer) 

3267 except (InvalidIndexError, TypeError, KeyError) as err: 

3268 # InvalidIndexError e.g. non-hashable, fall back to treating 

3269 # this as a sequence of labels 

3270 # KeyError it can be ambiguous if this is a label or sequence 

3271 # of labels 

3272 # github.com/pandas-dev/pandas/issues/39424#issuecomment-871626708 

3273 for x in k: 

3274 if not is_hashable(x): 

3275 # e.g. slice 

3276 raise err 

3277 # GH 39424: Ignore not founds 

3278 # GH 42351: No longer ignore not founds & enforced in 2.0 

3279 # TODO: how to handle IntervalIndex level? (no test cases) 

3280 item_indexer = self._get_level_indexer( 

3281 x, level=i, indexer=indexer 

3282 ) 

3283 if lvl_indexer is None: 

3284 lvl_indexer = _to_bool_indexer(item_indexer) 

3285 elif isinstance(item_indexer, slice): 

3286 lvl_indexer[item_indexer] = True # type: ignore[index] 

3287 else: 

3288 lvl_indexer |= item_indexer 

3289 

3290 if lvl_indexer is None: 

3291 # no matches we are done 

3292 # test_loc_getitem_duplicates_multiindex_empty_indexer 

3293 return np.array([], dtype=np.intp) 

3294 

3295 elif com.is_null_slice(k): 

3296 # empty slice 

3297 if indexer is None and i == len(seq) - 1: 

3298 return np.arange(n, dtype=np.intp) 

3299 continue 

3300 

3301 else: 

3302 # a slice or a single label 

3303 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer) 

3304 

3305 # update indexer 

3306 lvl_indexer = _to_bool_indexer(lvl_indexer) 

3307 if indexer is None: 

3308 indexer = lvl_indexer 

3309 else: 

3310 indexer &= lvl_indexer 

3311 if not np.any(indexer) and np.any(lvl_indexer): 

3312 raise KeyError(seq) 

3313 

3314 # empty indexer 

3315 if indexer is None: 

3316 return np.array([], dtype=np.intp) 

3317 

3318 pos_indexer = indexer.nonzero()[0] 

3319 return self._reorder_indexer(seq, pos_indexer) 

3320 

3321 # -------------------------------------------------------------------- 

3322 

3323 def _reorder_indexer( 

3324 self, 

3325 seq: tuple[Scalar | Iterable | AnyArrayLike, ...], 

3326 indexer: npt.NDArray[np.intp], 

3327 ) -> npt.NDArray[np.intp]: 

3328 """ 

3329 Reorder an indexer of a MultiIndex (self) so that the labels are in the 

3330 same order as given in seq 

3331 

3332 Parameters 

3333 ---------- 

3334 seq : label/slice/list/mask or a sequence of such 

3335 indexer: a position indexer of self 

3336 

3337 Returns 

3338 ------- 

3339 indexer : a sorted position indexer of self ordered as seq 

3340 """ 

3341 

3342 # check if sorting is necessary 

3343 need_sort = False 

3344 for i, k in enumerate(seq): 

3345 if com.is_null_slice(k) or com.is_bool_indexer(k) or is_scalar(k): 

3346 pass 

3347 elif is_list_like(k): 

3348 if len(k) <= 1: # type: ignore[arg-type] 

3349 pass 

3350 elif self._is_lexsorted(): 

3351 # If the index is lexsorted and the list_like label 

3352 # in seq are sorted then we do not need to sort 

3353 k_codes = self.levels[i].get_indexer(k) 

3354 k_codes = k_codes[k_codes >= 0] # Filter absent keys 

3355 # True if the given codes are not ordered 

3356 need_sort = (k_codes[:-1] > k_codes[1:]).any() 

3357 else: 

3358 need_sort = True 

3359 elif isinstance(k, slice): 

3360 if self._is_lexsorted(): 

3361 need_sort = k.step is not None and k.step < 0 

3362 else: 

3363 need_sort = True 

3364 else: 

3365 need_sort = True 

3366 if need_sort: 

3367 break 

3368 if not need_sort: 

3369 return indexer 

3370 

3371 n = len(self) 

3372 keys: tuple[np.ndarray, ...] = () 

3373 # For each level of the sequence in seq, map the level codes with the 

3374 # order they appears in a list-like sequence 

3375 # This mapping is then use to reorder the indexer 

3376 for i, k in enumerate(seq): 

3377 if is_scalar(k): 

3378 # GH#34603 we want to treat a scalar the same as an all equal list 

3379 k = [k] 

3380 if com.is_bool_indexer(k): 

3381 new_order = np.arange(n)[indexer] 

3382 elif is_list_like(k): 

3383 # Generate a map with all level codes as sorted initially 

3384 k = algos.unique(k) 

3385 key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len( 

3386 self.levels[i] 

3387 ) 

3388 # Set order as given in the indexer list 

3389 level_indexer = self.levels[i].get_indexer(k) 

3390 level_indexer = level_indexer[level_indexer >= 0] # Filter absent keys 

3391 key_order_map[level_indexer] = np.arange(len(level_indexer)) 

3392 

3393 new_order = key_order_map[self.codes[i][indexer]] 

3394 elif isinstance(k, slice) and k.step is not None and k.step < 0: 

3395 # flip order for negative step 

3396 new_order = np.arange(n)[::-1][indexer] 

3397 elif isinstance(k, slice) and k.start is None and k.stop is None: 

3398 # slice(None) should not determine order GH#31330 

3399 new_order = np.ones((n,), dtype=np.intp)[indexer] 

3400 else: 

3401 # For all other case, use the same order as the level 

3402 new_order = np.arange(n)[indexer] 

3403 keys = (new_order,) + keys 

3404 

3405 # Find the reordering using lexsort on the keys mapping 

3406 ind = np.lexsort(keys) 

3407 return indexer[ind] 

3408 

3409 def truncate(self, before=None, after=None) -> MultiIndex: 

3410 """ 

3411 Slice index between two labels / tuples, return new MultiIndex. 

3412 

3413 Parameters 

3414 ---------- 

3415 before : label or tuple, can be partial. Default None 

3416 None defaults to start. 

3417 after : label or tuple, can be partial. Default None 

3418 None defaults to end. 

3419 

3420 Returns 

3421 ------- 

3422 MultiIndex 

3423 The truncated MultiIndex. 

3424 

3425 Examples 

3426 -------- 

3427 >>> mi = pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['x', 'y', 'z']]) 

3428 >>> mi 

3429 MultiIndex([('a', 'x'), ('b', 'y'), ('c', 'z')], 

3430 ) 

3431 >>> mi.truncate(before='a', after='b') 

3432 MultiIndex([('a', 'x'), ('b', 'y')], 

3433 ) 

3434 """ 

3435 if after and before and after < before: 

3436 raise ValueError("after < before") 

3437 

3438 i, j = self.levels[0].slice_locs(before, after) 

3439 left, right = self.slice_locs(before, after) 

3440 

3441 new_levels = list(self.levels) 

3442 new_levels[0] = new_levels[0][i:j] 

3443 

3444 new_codes = [level_codes[left:right] for level_codes in self.codes] 

3445 new_codes[0] = new_codes[0] - i 

3446 

3447 return MultiIndex( 

3448 levels=new_levels, 

3449 codes=new_codes, 

3450 names=self._names, 

3451 verify_integrity=False, 

3452 ) 

3453 

3454 def equals(self, other: object) -> bool: 

3455 """ 

3456 Determines if two MultiIndex objects have the same labeling information 

3457 (the levels themselves do not necessarily have to be the same) 

3458 

3459 See Also 

3460 -------- 

3461 equal_levels 

3462 """ 

3463 if self.is_(other): 

3464 return True 

3465 

3466 if not isinstance(other, Index): 

3467 return False 

3468 

3469 if len(self) != len(other): 

3470 return False 

3471 

3472 if not isinstance(other, MultiIndex): 

3473 # d-level MultiIndex can equal d-tuple Index 

3474 if not self._should_compare(other): 

3475 # object Index or Categorical[object] may contain tuples 

3476 return False 

3477 return array_equivalent(self._values, other._values) 

3478 

3479 if self.nlevels != other.nlevels: 

3480 return False 

3481 

3482 for i in range(self.nlevels): 

3483 self_codes = self.codes[i] 

3484 other_codes = other.codes[i] 

3485 self_mask = self_codes == -1 

3486 other_mask = other_codes == -1 

3487 if not np.array_equal(self_mask, other_mask): 

3488 return False 

3489 self_codes = self_codes[~self_mask] 

3490 self_values = self.levels[i]._values.take(self_codes) 

3491 

3492 other_codes = other_codes[~other_mask] 

3493 other_values = other.levels[i]._values.take(other_codes) 

3494 

3495 # since we use NaT both datetime64 and timedelta64 we can have a 

3496 # situation where a level is typed say timedelta64 in self (IOW it 

3497 # has other values than NaT) but types datetime64 in other (where 

3498 # its all NaT) but these are equivalent 

3499 if len(self_values) == 0 and len(other_values) == 0: 

3500 continue 

3501 

3502 if not isinstance(self_values, np.ndarray): 

3503 # i.e. ExtensionArray 

3504 if not self_values.equals(other_values): 

3505 return False 

3506 elif not isinstance(other_values, np.ndarray): 

3507 # i.e. other is ExtensionArray 

3508 if not other_values.equals(self_values): 

3509 return False 

3510 else: 

3511 if not array_equivalent(self_values, other_values): 

3512 return False 

3513 

3514 return True 

3515 

3516 def equal_levels(self, other: MultiIndex) -> bool: 

3517 """ 

3518 Return True if the levels of both MultiIndex objects are the same 

3519 

3520 """ 

3521 if self.nlevels != other.nlevels: 

3522 return False 

3523 

3524 for i in range(self.nlevels): 

3525 if not self.levels[i].equals(other.levels[i]): 

3526 return False 

3527 return True 

3528 

3529 # -------------------------------------------------------------------- 

3530 # Set Methods 

3531 

3532 def _union(self, other, sort) -> MultiIndex: 

3533 other, result_names = self._convert_can_do_setop(other) 

3534 if other.has_duplicates: 

3535 # This is only necessary if other has dupes, 

3536 # otherwise difference is faster 

3537 result = super()._union(other, sort) 

3538 

3539 if isinstance(result, MultiIndex): 

3540 return result 

3541 return MultiIndex.from_arrays( 

3542 zip(*result), sortorder=None, names=result_names 

3543 ) 

3544 

3545 else: 

3546 right_missing = other.difference(self, sort=False) 

3547 if len(right_missing): 

3548 result = self.append(right_missing) 

3549 else: 

3550 result = self._get_reconciled_name_object(other) 

3551 

3552 if sort is not False: 

3553 try: 

3554 result = result.sort_values() 

3555 except TypeError: 

3556 if sort is True: 

3557 raise 

3558 warnings.warn( 

3559 "The values in the array are unorderable. " 

3560 "Pass `sort=False` to suppress this warning.", 

3561 RuntimeWarning, 

3562 stacklevel=find_stack_level(), 

3563 ) 

3564 return result 

3565 

3566 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: 

3567 return is_object_dtype(dtype) 

3568 

3569 def _get_reconciled_name_object(self, other) -> MultiIndex: 

3570 """ 

3571 If the result of a set operation will be self, 

3572 return self, unless the names change, in which 

3573 case make a shallow copy of self. 

3574 """ 

3575 names = self._maybe_match_names(other) 

3576 if self.names != names: 

3577 # error: Cannot determine type of "rename" 

3578 return self.rename(names) # type: ignore[has-type] 

3579 return self 

3580 

3581 def _maybe_match_names(self, other): 

3582 """ 

3583 Try to find common names to attach to the result of an operation between 

3584 a and b. Return a consensus list of names if they match at least partly 

3585 or list of None if they have completely different names. 

3586 """ 

3587 if len(self.names) != len(other.names): 

3588 return [None] * len(self.names) 

3589 names = [] 

3590 for a_name, b_name in zip(self.names, other.names): 

3591 if a_name == b_name: 

3592 names.append(a_name) 

3593 else: 

3594 # TODO: what if they both have np.nan for their names? 

3595 names.append(None) 

3596 return names 

3597 

3598 def _wrap_intersection_result(self, other, result) -> MultiIndex: 

3599 _, result_names = self._convert_can_do_setop(other) 

3600 return result.set_names(result_names) 

3601 

3602 def _wrap_difference_result(self, other, result: MultiIndex) -> MultiIndex: 

3603 _, result_names = self._convert_can_do_setop(other) 

3604 

3605 if len(result) == 0: 

3606 return result.remove_unused_levels().set_names(result_names) 

3607 else: 

3608 return result.set_names(result_names) 

3609 

3610 def _convert_can_do_setop(self, other): 

3611 result_names = self.names 

3612 

3613 if not isinstance(other, Index): 

3614 if len(other) == 0: 

3615 return self[:0], self.names 

3616 else: 

3617 msg = "other must be a MultiIndex or a list of tuples" 

3618 try: 

3619 other = MultiIndex.from_tuples(other, names=self.names) 

3620 except (ValueError, TypeError) as err: 

3621 # ValueError raised by tuples_to_object_array if we 

3622 # have non-object dtype 

3623 raise TypeError(msg) from err 

3624 else: 

3625 result_names = get_unanimous_names(self, other) 

3626 

3627 return other, result_names 

3628 

3629 # -------------------------------------------------------------------- 

3630 

3631 @doc(Index.astype) 

3632 def astype(self, dtype, copy: bool = True): 

3633 dtype = pandas_dtype(dtype) 

3634 if is_categorical_dtype(dtype): 

3635 msg = "> 1 ndim Categorical are not supported at this time" 

3636 raise NotImplementedError(msg) 

3637 if not is_object_dtype(dtype): 

3638 raise TypeError( 

3639 "Setting a MultiIndex dtype to anything other than object " 

3640 "is not supported" 

3641 ) 

3642 if copy is True: 

3643 return self._view() 

3644 return self 

3645 

3646 def _validate_fill_value(self, item): 

3647 if isinstance(item, MultiIndex): 

3648 # GH#43212 

3649 if item.nlevels != self.nlevels: 

3650 raise ValueError("Item must have length equal to number of levels.") 

3651 return item._values 

3652 elif not isinstance(item, tuple): 

3653 # Pad the key with empty strings if lower levels of the key 

3654 # aren't specified: 

3655 item = (item,) + ("",) * (self.nlevels - 1) 

3656 elif len(item) != self.nlevels: 

3657 raise ValueError("Item must have length equal to number of levels.") 

3658 return item 

3659 

3660 def putmask(self, mask, value: MultiIndex) -> MultiIndex: 

3661 """ 

3662 Return a new MultiIndex of the values set with the mask. 

3663 

3664 Parameters 

3665 ---------- 

3666 mask : array like 

3667 value : MultiIndex 

3668 Must either be the same length as self or length one 

3669 

3670 Returns 

3671 ------- 

3672 MultiIndex 

3673 """ 

3674 mask, noop = validate_putmask(self, mask) 

3675 if noop: 

3676 return self.copy() 

3677 

3678 if len(mask) == len(value): 

3679 subset = value[mask].remove_unused_levels() 

3680 else: 

3681 subset = value.remove_unused_levels() 

3682 

3683 new_levels = [] 

3684 new_codes = [] 

3685 

3686 for i, (value_level, level, level_codes) in enumerate( 

3687 zip(subset.levels, self.levels, self.codes) 

3688 ): 

3689 new_level = level.union(value_level, sort=False) 

3690 value_codes = new_level.get_indexer_for(subset.get_level_values(i)) 

3691 new_code = ensure_int64(level_codes) 

3692 new_code[mask] = value_codes 

3693 new_levels.append(new_level) 

3694 new_codes.append(new_code) 

3695 

3696 return MultiIndex( 

3697 levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False 

3698 ) 

3699 

3700 def insert(self, loc: int, item) -> MultiIndex: 

3701 """ 

3702 Make new MultiIndex inserting new item at location 

3703 

3704 Parameters 

3705 ---------- 

3706 loc : int 

3707 item : tuple 

3708 Must be same length as number of levels in the MultiIndex 

3709 

3710 Returns 

3711 ------- 

3712 new_index : Index 

3713 """ 

3714 item = self._validate_fill_value(item) 

3715 

3716 new_levels = [] 

3717 new_codes = [] 

3718 for k, level, level_codes in zip(item, self.levels, self.codes): 

3719 if k not in level: 

3720 # have to insert into level 

3721 # must insert at end otherwise you have to recompute all the 

3722 # other codes 

3723 lev_loc = len(level) 

3724 level = level.insert(lev_loc, k) 

3725 else: 

3726 lev_loc = level.get_loc(k) 

3727 

3728 new_levels.append(level) 

3729 new_codes.append(np.insert(ensure_int64(level_codes), loc, lev_loc)) 

3730 

3731 return MultiIndex( 

3732 levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False 

3733 ) 

3734 

3735 def delete(self, loc) -> MultiIndex: 

3736 """ 

3737 Make new index with passed location deleted 

3738 

3739 Returns 

3740 ------- 

3741 new_index : MultiIndex 

3742 """ 

3743 new_codes = [np.delete(level_codes, loc) for level_codes in self.codes] 

3744 return MultiIndex( 

3745 levels=self.levels, 

3746 codes=new_codes, 

3747 names=self.names, 

3748 verify_integrity=False, 

3749 ) 

3750 

3751 @doc(Index.isin) 

3752 def isin(self, values, level=None) -> npt.NDArray[np.bool_]: 

3753 if isinstance(values, Generator): 

3754 values = list(values) 

3755 

3756 if level is None: 

3757 if len(values) == 0: 

3758 return np.zeros((len(self),), dtype=np.bool_) 

3759 if not isinstance(values, MultiIndex): 

3760 values = MultiIndex.from_tuples(values) 

3761 return values.unique().get_indexer_for(self) != -1 

3762 else: 

3763 num = self._get_level_number(level) 

3764 levs = self.get_level_values(num) 

3765 

3766 if levs.size == 0: 

3767 return np.zeros(len(levs), dtype=np.bool_) 

3768 return levs.isin(values) 

3769 

3770 # error: Incompatible types in assignment (expression has type overloaded function, 

3771 # base class "Index" defined the type as "Callable[[Index, Any, bool], Any]") 

3772 rename = Index.set_names # type: ignore[assignment] 

3773 

3774 # --------------------------------------------------------------- 

3775 # Arithmetic/Numeric Methods - Disabled 

3776 

3777 __add__ = make_invalid_op("__add__") 

3778 __radd__ = make_invalid_op("__radd__") 

3779 __iadd__ = make_invalid_op("__iadd__") 

3780 __sub__ = make_invalid_op("__sub__") 

3781 __rsub__ = make_invalid_op("__rsub__") 

3782 __isub__ = make_invalid_op("__isub__") 

3783 __pow__ = make_invalid_op("__pow__") 

3784 __rpow__ = make_invalid_op("__rpow__") 

3785 __mul__ = make_invalid_op("__mul__") 

3786 __rmul__ = make_invalid_op("__rmul__") 

3787 __floordiv__ = make_invalid_op("__floordiv__") 

3788 __rfloordiv__ = make_invalid_op("__rfloordiv__") 

3789 __truediv__ = make_invalid_op("__truediv__") 

3790 __rtruediv__ = make_invalid_op("__rtruediv__") 

3791 __mod__ = make_invalid_op("__mod__") 

3792 __rmod__ = make_invalid_op("__rmod__") 

3793 __divmod__ = make_invalid_op("__divmod__") 

3794 __rdivmod__ = make_invalid_op("__rdivmod__") 

3795 # Unary methods disabled 

3796 __neg__ = make_invalid_op("__neg__") 

3797 __pos__ = make_invalid_op("__pos__") 

3798 __abs__ = make_invalid_op("__abs__") 

3799 __invert__ = make_invalid_op("__invert__") 

3800 

3801 

3802def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int: 

3803 """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted.""" 

3804 int64_codes = [ensure_int64(level_codes) for level_codes in codes] 

3805 for k in range(nlevels, 0, -1): 

3806 if libalgos.is_lexsorted(int64_codes[:k]): 

3807 return k 

3808 return 0 

3809 

3810 

3811def sparsify_labels(label_list, start: int = 0, sentinel: object = ""): 

3812 pivoted = list(zip(*label_list)) 

3813 k = len(label_list) 

3814 

3815 result = pivoted[: start + 1] 

3816 prev = pivoted[start] 

3817 

3818 for cur in pivoted[start + 1 :]: 

3819 sparse_cur = [] 

3820 

3821 for i, (p, t) in enumerate(zip(prev, cur)): 

3822 if i == k - 1: 

3823 sparse_cur.append(t) 

3824 result.append(sparse_cur) 

3825 break 

3826 

3827 if p == t: 

3828 sparse_cur.append(sentinel) 

3829 else: 

3830 sparse_cur.extend(cur[i:]) 

3831 result.append(sparse_cur) 

3832 break 

3833 

3834 prev = cur 

3835 

3836 return list(zip(*result)) 

3837 

3838 

3839def _get_na_rep(dtype) -> str: 

3840 if is_extension_array_dtype(dtype): 

3841 return f"{dtype.na_value}" 

3842 else: 

3843 dtype = dtype.type 

3844 

3845 return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN") 

3846 

3847 

3848def maybe_droplevels(index: Index, key) -> Index: 

3849 """ 

3850 Attempt to drop level or levels from the given index. 

3851 

3852 Parameters 

3853 ---------- 

3854 index: Index 

3855 key : scalar or tuple 

3856 

3857 Returns 

3858 ------- 

3859 Index 

3860 """ 

3861 # drop levels 

3862 original_index = index 

3863 if isinstance(key, tuple): 

3864 # Caller is responsible for ensuring the key is not an entry in the first 

3865 # level of the MultiIndex. 

3866 for _ in key: 

3867 try: 

3868 index = index._drop_level_numbers([0]) 

3869 except ValueError: 

3870 # we have dropped too much, so back out 

3871 return original_index 

3872 else: 

3873 try: 

3874 index = index._drop_level_numbers([0]) 

3875 except ValueError: 

3876 pass 

3877 

3878 return index 

3879 

3880 

3881def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray: 

3882 """ 

3883 Coerce the array-like indexer to the smallest integer dtype that can encode all 

3884 of the given categories. 

3885 

3886 Parameters 

3887 ---------- 

3888 array_like : array-like 

3889 categories : array-like 

3890 copy : bool 

3891 

3892 Returns 

3893 ------- 

3894 np.ndarray 

3895 Non-writeable. 

3896 """ 

3897 array_like = coerce_indexer_dtype(array_like, categories) 

3898 if copy: 

3899 array_like = array_like.copy() 

3900 array_like.flags.writeable = False 

3901 return array_like 

3902 

3903 

3904def _require_listlike(level, arr, arrname: str): 

3905 """ 

3906 Ensure that level is either None or listlike, and arr is list-of-listlike. 

3907 """ 

3908 if level is not None and not is_list_like(level): 

3909 if not is_list_like(arr): 

3910 raise TypeError(f"{arrname} must be list-like") 

3911 if len(arr) > 0 and is_list_like(arr[0]): 

3912 raise TypeError(f"{arrname} must be list-like") 

3913 level = [level] 

3914 arr = [arr] 

3915 elif level is None or is_list_like(level): 

3916 if not is_list_like(arr) or not is_list_like(arr[0]): 

3917 raise TypeError(f"{arrname} must be list of lists-like") 

3918 return level, arr