Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/indexes/multi.py: 24%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1439 statements  

1from __future__ import annotations 

2 

3from collections.abc import ( 

4 Collection, 

5 Generator, 

6 Hashable, 

7 Iterable, 

8 Sequence, 

9) 

10from functools import wraps 

11from sys import getsizeof 

12from typing import ( 

13 TYPE_CHECKING, 

14 Any, 

15 Callable, 

16 Literal, 

17 cast, 

18) 

19import warnings 

20 

21import numpy as np 

22 

23from pandas._config import get_option 

24 

25from pandas._libs import ( 

26 algos as libalgos, 

27 index as libindex, 

28 lib, 

29) 

30from pandas._libs.hashtable import duplicated 

31from pandas._typing import ( 

32 AnyAll, 

33 AnyArrayLike, 

34 Axis, 

35 DropKeep, 

36 DtypeObj, 

37 F, 

38 IgnoreRaise, 

39 IndexLabel, 

40 Scalar, 

41 Self, 

42 Shape, 

43 npt, 

44) 

45from pandas.compat.numpy import function as nv 

46from pandas.errors import ( 

47 InvalidIndexError, 

48 PerformanceWarning, 

49 UnsortedIndexError, 

50) 

51from pandas.util._decorators import ( 

52 Appender, 

53 cache_readonly, 

54 doc, 

55) 

56from pandas.util._exceptions import find_stack_level 

57 

58from pandas.core.dtypes.cast import coerce_indexer_dtype 

59from pandas.core.dtypes.common import ( 

60 ensure_int64, 

61 ensure_platform_int, 

62 is_hashable, 

63 is_integer, 

64 is_iterator, 

65 is_list_like, 

66 is_object_dtype, 

67 is_scalar, 

68 pandas_dtype, 

69) 

70from pandas.core.dtypes.dtypes import ( 

71 CategoricalDtype, 

72 ExtensionDtype, 

73) 

74from pandas.core.dtypes.generic import ( 

75 ABCDataFrame, 

76 ABCSeries, 

77) 

78from pandas.core.dtypes.inference import is_array_like 

79from pandas.core.dtypes.missing import ( 

80 array_equivalent, 

81 isna, 

82) 

83 

84import pandas.core.algorithms as algos 

85from pandas.core.array_algos.putmask import validate_putmask 

86from pandas.core.arrays import ( 

87 Categorical, 

88 ExtensionArray, 

89) 

90from pandas.core.arrays.categorical import ( 

91 factorize_from_iterables, 

92 recode_for_categories, 

93) 

94import pandas.core.common as com 

95from pandas.core.construction import sanitize_array 

96import pandas.core.indexes.base as ibase 

97from pandas.core.indexes.base import ( 

98 Index, 

99 _index_shared_docs, 

100 ensure_index, 

101 get_unanimous_names, 

102) 

103from pandas.core.indexes.frozen import FrozenList 

104from pandas.core.ops.invalid import make_invalid_op 

105from pandas.core.sorting import ( 

106 get_group_index, 

107 lexsort_indexer, 

108) 

109 

110from pandas.io.formats.printing import ( 

111 get_adjustment, 

112 pprint_thing, 

113) 

114 

115if TYPE_CHECKING: 

116 from pandas import ( 

117 CategoricalIndex, 

118 DataFrame, 

119 Series, 

120 ) 

121 

122_index_doc_kwargs = dict(ibase._index_doc_kwargs) 

123_index_doc_kwargs.update( 

124 {"klass": "MultiIndex", "target_klass": "MultiIndex or list of tuples"} 

125) 

126 

127 

128class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine): 

129 """ 

130 This class manages a MultiIndex by mapping label combinations to positive 

131 integers. 

132 """ 

133 

134 _base = libindex.UInt64Engine 

135 

136 def _codes_to_ints(self, codes): 

137 """ 

138 Transform combination(s) of uint64 in one uint64 (each), in a strictly 

139 monotonic way (i.e. respecting the lexicographic order of integer 

140 combinations): see BaseMultiIndexCodesEngine documentation. 

141 

142 Parameters 

143 ---------- 

144 codes : 1- or 2-dimensional array of dtype uint64 

145 Combinations of integers (one per row) 

146 

147 Returns 

148 ------- 

149 scalar or 1-dimensional array, of dtype uint64 

150 Integer(s) representing one combination (each). 

151 """ 

152 # Shift the representation of each level by the pre-calculated number 

153 # of bits: 

154 codes <<= self.offsets 

155 

156 # Now sum and OR are in fact interchangeable. This is a simple 

157 # composition of the (disjunct) significant bits of each level (i.e. 

158 # each column in "codes") in a single positive integer: 

159 if codes.ndim == 1: 

160 # Single key 

161 return np.bitwise_or.reduce(codes) 

162 

163 # Multiple keys 

164 return np.bitwise_or.reduce(codes, axis=1) 

165 

166 

167class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine): 

168 """ 

169 This class manages those (extreme) cases in which the number of possible 

170 label combinations overflows the 64 bits integers, and uses an ObjectEngine 

171 containing Python integers. 

172 """ 

173 

174 _base = libindex.ObjectEngine 

175 

176 def _codes_to_ints(self, codes): 

177 """ 

178 Transform combination(s) of uint64 in one Python integer (each), in a 

179 strictly monotonic way (i.e. respecting the lexicographic order of 

180 integer combinations): see BaseMultiIndexCodesEngine documentation. 

181 

182 Parameters 

183 ---------- 

184 codes : 1- or 2-dimensional array of dtype uint64 

185 Combinations of integers (one per row) 

186 

187 Returns 

188 ------- 

189 int, or 1-dimensional array of dtype object 

190 Integer(s) representing one combination (each). 

191 """ 

192 # Shift the representation of each level by the pre-calculated number 

193 # of bits. Since this can overflow uint64, first make sure we are 

194 # working with Python integers: 

195 codes = codes.astype("object") << self.offsets 

196 

197 # Now sum and OR are in fact interchangeable. This is a simple 

198 # composition of the (disjunct) significant bits of each level (i.e. 

199 # each column in "codes") in a single positive integer (per row): 

200 if codes.ndim == 1: 

201 # Single key 

202 return np.bitwise_or.reduce(codes) 

203 

204 # Multiple keys 

205 return np.bitwise_or.reduce(codes, axis=1) 

206 

207 

208def names_compat(meth: F) -> F: 

209 """ 

210 A decorator to allow either `name` or `names` keyword but not both. 

211 

212 This makes it easier to share code with base class. 

213 """ 

214 

215 @wraps(meth) 

216 def new_meth(self_or_cls, *args, **kwargs): 

217 if "name" in kwargs and "names" in kwargs: 

218 raise TypeError("Can only provide one of `names` and `name`") 

219 if "name" in kwargs: 

220 kwargs["names"] = kwargs.pop("name") 

221 

222 return meth(self_or_cls, *args, **kwargs) 

223 

224 return cast(F, new_meth) 

225 

226 

227class MultiIndex(Index): 

228 """ 

229 A multi-level, or hierarchical, index object for pandas objects. 

230 

231 Parameters 

232 ---------- 

233 levels : sequence of arrays 

234 The unique labels for each level. 

235 codes : sequence of arrays 

236 Integers for each level designating which label at each location. 

237 sortorder : optional int 

238 Level of sortedness (must be lexicographically sorted by that 

239 level). 

240 names : optional sequence of objects 

241 Names for each of the index levels. (name is accepted for compat). 

242 copy : bool, default False 

243 Copy the meta-data. 

244 verify_integrity : bool, default True 

245 Check that the levels/codes are consistent and valid. 

246 

247 Attributes 

248 ---------- 

249 names 

250 levels 

251 codes 

252 nlevels 

253 levshape 

254 dtypes 

255 

256 Methods 

257 ------- 

258 from_arrays 

259 from_tuples 

260 from_product 

261 from_frame 

262 set_levels 

263 set_codes 

264 to_frame 

265 to_flat_index 

266 sortlevel 

267 droplevel 

268 swaplevel 

269 reorder_levels 

270 remove_unused_levels 

271 get_level_values 

272 get_indexer 

273 get_loc 

274 get_locs 

275 get_loc_level 

276 drop 

277 

278 See Also 

279 -------- 

280 MultiIndex.from_arrays : Convert list of arrays to MultiIndex. 

281 MultiIndex.from_product : Create a MultiIndex from the cartesian product 

282 of iterables. 

283 MultiIndex.from_tuples : Convert list of tuples to a MultiIndex. 

284 MultiIndex.from_frame : Make a MultiIndex from a DataFrame. 

285 Index : The base pandas Index type. 

286 

287 Notes 

288 ----- 

289 See the `user guide 

290 <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html>`__ 

291 for more. 

292 

293 Examples 

294 -------- 

295 A new ``MultiIndex`` is typically constructed using one of the helper 

296 methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product` 

297 and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``): 

298 

299 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] 

300 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) 

301 MultiIndex([(1, 'red'), 

302 (1, 'blue'), 

303 (2, 'red'), 

304 (2, 'blue')], 

305 names=['number', 'color']) 

306 

307 See further examples for how to construct a MultiIndex in the doc strings 

308 of the mentioned helper methods. 

309 """ 

310 

311 _hidden_attrs = Index._hidden_attrs | frozenset() 

312 

313 # initialize to zero-length tuples to make everything work 

314 _typ = "multiindex" 

315 _names: list[Hashable | None] = [] 

316 _levels = FrozenList() 

317 _codes = FrozenList() 

318 _comparables = ["names"] 

319 

320 sortorder: int | None 

321 

322 # -------------------------------------------------------------------- 

323 # Constructors 

324 

325 def __new__( 

326 cls, 

327 levels=None, 

328 codes=None, 

329 sortorder=None, 

330 names=None, 

331 dtype=None, 

332 copy: bool = False, 

333 name=None, 

334 verify_integrity: bool = True, 

335 ) -> Self: 

336 # compat with Index 

337 if name is not None: 

338 names = name 

339 if levels is None or codes is None: 

340 raise TypeError("Must pass both levels and codes") 

341 if len(levels) != len(codes): 

342 raise ValueError("Length of levels and codes must be the same.") 

343 if len(levels) == 0: 

344 raise ValueError("Must pass non-zero number of levels/codes") 

345 

346 result = object.__new__(cls) 

347 result._cache = {} 

348 

349 # we've already validated levels and codes, so shortcut here 

350 result._set_levels(levels, copy=copy, validate=False) 

351 result._set_codes(codes, copy=copy, validate=False) 

352 

353 result._names = [None] * len(levels) 

354 if names is not None: 

355 # handles name validation 

356 result._set_names(names) 

357 

358 if sortorder is not None: 

359 result.sortorder = int(sortorder) 

360 else: 

361 result.sortorder = sortorder 

362 

363 if verify_integrity: 

364 new_codes = result._verify_integrity() 

365 result._codes = new_codes 

366 

367 result._reset_identity() 

368 result._references = None 

369 

370 return result 

371 

372 def _validate_codes(self, level: list, code: list): 

373 """ 

374 Reassign code values as -1 if their corresponding levels are NaN. 

375 

376 Parameters 

377 ---------- 

378 code : list 

379 Code to reassign. 

380 level : list 

381 Level to check for missing values (NaN, NaT, None). 

382 

383 Returns 

384 ------- 

385 new code where code value = -1 if it corresponds 

386 to a level with missing values (NaN, NaT, None). 

387 """ 

388 null_mask = isna(level) 

389 if np.any(null_mask): 

390 # error: Incompatible types in assignment 

391 # (expression has type "ndarray[Any, dtype[Any]]", 

392 # variable has type "List[Any]") 

393 code = np.where(null_mask[code], -1, code) # type: ignore[assignment] 

394 return code 

395 

396 def _verify_integrity( 

397 self, 

398 codes: list | None = None, 

399 levels: list | None = None, 

400 levels_to_verify: list[int] | range | None = None, 

401 ): 

402 """ 

403 Parameters 

404 ---------- 

405 codes : optional list 

406 Codes to check for validity. Defaults to current codes. 

407 levels : optional list 

408 Levels to check for validity. Defaults to current levels. 

409 levels_to_validate: optional list 

410 Specifies the levels to verify. 

411 

412 Raises 

413 ------ 

414 ValueError 

415 If length of levels and codes don't match, if the codes for any 

416 level would exceed level bounds, or there are any duplicate levels. 

417 

418 Returns 

419 ------- 

420 new codes where code value = -1 if it corresponds to a 

421 NaN level. 

422 """ 

423 # NOTE: Currently does not check, among other things, that cached 

424 # nlevels matches nor that sortorder matches actually sortorder. 

425 codes = codes or self.codes 

426 levels = levels or self.levels 

427 if levels_to_verify is None: 

428 levels_to_verify = range(len(levels)) 

429 

430 if len(levels) != len(codes): 

431 raise ValueError( 

432 "Length of levels and codes must match. NOTE: " 

433 "this index is in an inconsistent state." 

434 ) 

435 codes_length = len(codes[0]) 

436 for i in levels_to_verify: 

437 level = levels[i] 

438 level_codes = codes[i] 

439 

440 if len(level_codes) != codes_length: 

441 raise ValueError( 

442 f"Unequal code lengths: {[len(code_) for code_ in codes]}" 

443 ) 

444 if len(level_codes) and level_codes.max() >= len(level): 

445 raise ValueError( 

446 f"On level {i}, code max ({level_codes.max()}) >= length of " 

447 f"level ({len(level)}). NOTE: this index is in an " 

448 "inconsistent state" 

449 ) 

450 if len(level_codes) and level_codes.min() < -1: 

451 raise ValueError(f"On level {i}, code value ({level_codes.min()}) < -1") 

452 if not level.is_unique: 

453 raise ValueError( 

454 f"Level values must be unique: {list(level)} on level {i}" 

455 ) 

456 if self.sortorder is not None: 

457 if self.sortorder > _lexsort_depth(self.codes, self.nlevels): 

458 raise ValueError( 

459 "Value for sortorder must be inferior or equal to actual " 

460 f"lexsort_depth: sortorder {self.sortorder} " 

461 f"with lexsort_depth {_lexsort_depth(self.codes, self.nlevels)}" 

462 ) 

463 

464 result_codes = [] 

465 for i in range(len(levels)): 

466 if i in levels_to_verify: 

467 result_codes.append(self._validate_codes(levels[i], codes[i])) 

468 else: 

469 result_codes.append(codes[i]) 

470 

471 new_codes = FrozenList(result_codes) 

472 return new_codes 

473 

474 @classmethod 

475 def from_arrays( 

476 cls, 

477 arrays, 

478 sortorder: int | None = None, 

479 names: Sequence[Hashable] | Hashable | lib.NoDefault = lib.no_default, 

480 ) -> MultiIndex: 

481 """ 

482 Convert arrays to MultiIndex. 

483 

484 Parameters 

485 ---------- 

486 arrays : list / sequence of array-likes 

487 Each array-like gives one level's value for each data point. 

488 len(arrays) is the number of levels. 

489 sortorder : int or None 

490 Level of sortedness (must be lexicographically sorted by that 

491 level). 

492 names : list / sequence of str, optional 

493 Names for the levels in the index. 

494 

495 Returns 

496 ------- 

497 MultiIndex 

498 

499 See Also 

500 -------- 

501 MultiIndex.from_tuples : Convert list of tuples to MultiIndex. 

502 MultiIndex.from_product : Make a MultiIndex from cartesian product 

503 of iterables. 

504 MultiIndex.from_frame : Make a MultiIndex from a DataFrame. 

505 

506 Examples 

507 -------- 

508 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']] 

509 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color')) 

510 MultiIndex([(1, 'red'), 

511 (1, 'blue'), 

512 (2, 'red'), 

513 (2, 'blue')], 

514 names=['number', 'color']) 

515 """ 

516 error_msg = "Input must be a list / sequence of array-likes." 

517 if not is_list_like(arrays): 

518 raise TypeError(error_msg) 

519 if is_iterator(arrays): 

520 arrays = list(arrays) 

521 

522 # Check if elements of array are list-like 

523 for array in arrays: 

524 if not is_list_like(array): 

525 raise TypeError(error_msg) 

526 

527 # Check if lengths of all arrays are equal or not, 

528 # raise ValueError, if not 

529 for i in range(1, len(arrays)): 

530 if len(arrays[i]) != len(arrays[i - 1]): 

531 raise ValueError("all arrays must be same length") 

532 

533 codes, levels = factorize_from_iterables(arrays) 

534 if names is lib.no_default: 

535 names = [getattr(arr, "name", None) for arr in arrays] 

536 

537 return cls( 

538 levels=levels, 

539 codes=codes, 

540 sortorder=sortorder, 

541 names=names, 

542 verify_integrity=False, 

543 ) 

544 

545 @classmethod 

546 @names_compat 

547 def from_tuples( 

548 cls, 

549 tuples: Iterable[tuple[Hashable, ...]], 

550 sortorder: int | None = None, 

551 names: Sequence[Hashable] | Hashable | None = None, 

552 ) -> MultiIndex: 

553 """ 

554 Convert list of tuples to MultiIndex. 

555 

556 Parameters 

557 ---------- 

558 tuples : list / sequence of tuple-likes 

559 Each tuple is the index of one row/column. 

560 sortorder : int or None 

561 Level of sortedness (must be lexicographically sorted by that 

562 level). 

563 names : list / sequence of str, optional 

564 Names for the levels in the index. 

565 

566 Returns 

567 ------- 

568 MultiIndex 

569 

570 See Also 

571 -------- 

572 MultiIndex.from_arrays : Convert list of arrays to MultiIndex. 

573 MultiIndex.from_product : Make a MultiIndex from cartesian product 

574 of iterables. 

575 MultiIndex.from_frame : Make a MultiIndex from a DataFrame. 

576 

577 Examples 

578 -------- 

579 >>> tuples = [(1, 'red'), (1, 'blue'), 

580 ... (2, 'red'), (2, 'blue')] 

581 >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color')) 

582 MultiIndex([(1, 'red'), 

583 (1, 'blue'), 

584 (2, 'red'), 

585 (2, 'blue')], 

586 names=['number', 'color']) 

587 """ 

588 if not is_list_like(tuples): 

589 raise TypeError("Input must be a list / sequence of tuple-likes.") 

590 if is_iterator(tuples): 

591 tuples = list(tuples) 

592 tuples = cast(Collection[tuple[Hashable, ...]], tuples) 

593 

594 # handling the empty tuple cases 

595 if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples): 

596 codes = [np.zeros(len(tuples))] 

597 levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))] 

598 return cls( 

599 levels=levels, 

600 codes=codes, 

601 sortorder=sortorder, 

602 names=names, 

603 verify_integrity=False, 

604 ) 

605 

606 arrays: list[Sequence[Hashable]] 

607 if len(tuples) == 0: 

608 if names is None: 

609 raise TypeError("Cannot infer number of levels from empty list") 

610 # error: Argument 1 to "len" has incompatible type "Hashable"; 

611 # expected "Sized" 

612 arrays = [[]] * len(names) # type: ignore[arg-type] 

613 elif isinstance(tuples, (np.ndarray, Index)): 

614 if isinstance(tuples, Index): 

615 tuples = np.asarray(tuples._values) 

616 

617 arrays = list(lib.tuples_to_object_array(tuples).T) 

618 elif isinstance(tuples, list): 

619 arrays = list(lib.to_object_array_tuples(tuples).T) 

620 else: 

621 arrs = zip(*tuples) 

622 arrays = cast(list[Sequence[Hashable]], arrs) 

623 

624 return cls.from_arrays(arrays, sortorder=sortorder, names=names) 

625 

626 @classmethod 

627 def from_product( 

628 cls, 

629 iterables: Sequence[Iterable[Hashable]], 

630 sortorder: int | None = None, 

631 names: Sequence[Hashable] | Hashable | lib.NoDefault = lib.no_default, 

632 ) -> MultiIndex: 

633 """ 

634 Make a MultiIndex from the cartesian product of multiple iterables. 

635 

636 Parameters 

637 ---------- 

638 iterables : list / sequence of iterables 

639 Each iterable has unique labels for each level of the index. 

640 sortorder : int or None 

641 Level of sortedness (must be lexicographically sorted by that 

642 level). 

643 names : list / sequence of str, optional 

644 Names for the levels in the index. 

645 If not explicitly provided, names will be inferred from the 

646 elements of iterables if an element has a name attribute. 

647 

648 Returns 

649 ------- 

650 MultiIndex 

651 

652 See Also 

653 -------- 

654 MultiIndex.from_arrays : Convert list of arrays to MultiIndex. 

655 MultiIndex.from_tuples : Convert list of tuples to MultiIndex. 

656 MultiIndex.from_frame : Make a MultiIndex from a DataFrame. 

657 

658 Examples 

659 -------- 

660 >>> numbers = [0, 1, 2] 

661 >>> colors = ['green', 'purple'] 

662 >>> pd.MultiIndex.from_product([numbers, colors], 

663 ... names=['number', 'color']) 

664 MultiIndex([(0, 'green'), 

665 (0, 'purple'), 

666 (1, 'green'), 

667 (1, 'purple'), 

668 (2, 'green'), 

669 (2, 'purple')], 

670 names=['number', 'color']) 

671 """ 

672 from pandas.core.reshape.util import cartesian_product 

673 

674 if not is_list_like(iterables): 

675 raise TypeError("Input must be a list / sequence of iterables.") 

676 if is_iterator(iterables): 

677 iterables = list(iterables) 

678 

679 codes, levels = factorize_from_iterables(iterables) 

680 if names is lib.no_default: 

681 names = [getattr(it, "name", None) for it in iterables] 

682 

683 # codes are all ndarrays, so cartesian_product is lossless 

684 codes = cartesian_product(codes) 

685 return cls(levels, codes, sortorder=sortorder, names=names) 

686 

687 @classmethod 

688 def from_frame( 

689 cls, 

690 df: DataFrame, 

691 sortorder: int | None = None, 

692 names: Sequence[Hashable] | Hashable | None = None, 

693 ) -> MultiIndex: 

694 """ 

695 Make a MultiIndex from a DataFrame. 

696 

697 Parameters 

698 ---------- 

699 df : DataFrame 

700 DataFrame to be converted to MultiIndex. 

701 sortorder : int, optional 

702 Level of sortedness (must be lexicographically sorted by that 

703 level). 

704 names : list-like, optional 

705 If no names are provided, use the column names, or tuple of column 

706 names if the columns is a MultiIndex. If a sequence, overwrite 

707 names with the given sequence. 

708 

709 Returns 

710 ------- 

711 MultiIndex 

712 The MultiIndex representation of the given DataFrame. 

713 

714 See Also 

715 -------- 

716 MultiIndex.from_arrays : Convert list of arrays to MultiIndex. 

717 MultiIndex.from_tuples : Convert list of tuples to MultiIndex. 

718 MultiIndex.from_product : Make a MultiIndex from cartesian product 

719 of iterables. 

720 

721 Examples 

722 -------- 

723 >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'], 

724 ... ['NJ', 'Temp'], ['NJ', 'Precip']], 

725 ... columns=['a', 'b']) 

726 >>> df 

727 a b 

728 0 HI Temp 

729 1 HI Precip 

730 2 NJ Temp 

731 3 NJ Precip 

732 

733 >>> pd.MultiIndex.from_frame(df) 

734 MultiIndex([('HI', 'Temp'), 

735 ('HI', 'Precip'), 

736 ('NJ', 'Temp'), 

737 ('NJ', 'Precip')], 

738 names=['a', 'b']) 

739 

740 Using explicit names, instead of the column names 

741 

742 >>> pd.MultiIndex.from_frame(df, names=['state', 'observation']) 

743 MultiIndex([('HI', 'Temp'), 

744 ('HI', 'Precip'), 

745 ('NJ', 'Temp'), 

746 ('NJ', 'Precip')], 

747 names=['state', 'observation']) 

748 """ 

749 if not isinstance(df, ABCDataFrame): 

750 raise TypeError("Input must be a DataFrame") 

751 

752 column_names, columns = zip(*df.items()) 

753 names = column_names if names is None else names 

754 return cls.from_arrays(columns, sortorder=sortorder, names=names) 

755 

756 # -------------------------------------------------------------------- 

757 

758 @cache_readonly 

759 def _values(self) -> np.ndarray: 

760 # We override here, since our parent uses _data, which we don't use. 

761 values = [] 

762 

763 for i in range(self.nlevels): 

764 index = self.levels[i] 

765 codes = self.codes[i] 

766 

767 vals = index 

768 if isinstance(vals.dtype, CategoricalDtype): 

769 vals = cast("CategoricalIndex", vals) 

770 vals = vals._data._internal_get_values() 

771 

772 if isinstance(vals.dtype, ExtensionDtype) or lib.is_np_dtype( 

773 vals.dtype, "mM" 

774 ): 

775 vals = vals.astype(object) 

776 

777 vals = np.asarray(vals) 

778 vals = algos.take_nd(vals, codes, fill_value=index._na_value) 

779 values.append(vals) 

780 

781 arr = lib.fast_zip(values) 

782 return arr 

783 

784 @property 

785 def values(self) -> np.ndarray: 

786 return self._values 

787 

788 @property 

789 def array(self): 

790 """ 

791 Raises a ValueError for `MultiIndex` because there's no single 

792 array backing a MultiIndex. 

793 

794 Raises 

795 ------ 

796 ValueError 

797 """ 

798 raise ValueError( 

799 "MultiIndex has no single backing array. Use " 

800 "'MultiIndex.to_numpy()' to get a NumPy array of tuples." 

801 ) 

802 

803 @cache_readonly 

804 def dtypes(self) -> Series: 

805 """ 

806 Return the dtypes as a Series for the underlying MultiIndex. 

807 

808 Examples 

809 -------- 

810 >>> idx = pd.MultiIndex.from_product([(0, 1, 2), ('green', 'purple')], 

811 ... names=['number', 'color']) 

812 >>> idx 

813 MultiIndex([(0, 'green'), 

814 (0, 'purple'), 

815 (1, 'green'), 

816 (1, 'purple'), 

817 (2, 'green'), 

818 (2, 'purple')], 

819 names=['number', 'color']) 

820 >>> idx.dtypes 

821 number int64 

822 color object 

823 dtype: object 

824 """ 

825 from pandas import Series 

826 

827 names = com.fill_missing_names([level.name for level in self.levels]) 

828 return Series([level.dtype for level in self.levels], index=Index(names)) 

829 

830 def __len__(self) -> int: 

831 return len(self.codes[0]) 

832 

833 @property 

834 def size(self) -> int: 

835 """ 

836 Return the number of elements in the underlying data. 

837 """ 

838 # override Index.size to avoid materializing _values 

839 return len(self) 

840 

841 # -------------------------------------------------------------------- 

842 # Levels Methods 

843 

844 @cache_readonly 

845 def levels(self) -> FrozenList: 

846 """ 

847 Levels of the MultiIndex. 

848 

849 Levels refer to the different hierarchical levels or layers in a MultiIndex. 

850 In a MultiIndex, each level represents a distinct dimension or category of 

851 the index. 

852 

853 To access the levels, you can use the levels attribute of the MultiIndex, 

854 which returns a tuple of Index objects. Each Index object represents a 

855 level in the MultiIndex and contains the unique values found in that 

856 specific level. 

857 

858 If a MultiIndex is created with levels A, B, C, and the DataFrame using 

859 it filters out all rows of the level C, MultiIndex.levels will still 

860 return A, B, C. 

861 

862 Examples 

863 -------- 

864 >>> index = pd.MultiIndex.from_product([['mammal'], 

865 ... ('goat', 'human', 'cat', 'dog')], 

866 ... names=['Category', 'Animals']) 

867 >>> leg_num = pd.DataFrame(data=(4, 2, 4, 4), index=index, columns=['Legs']) 

868 >>> leg_num 

869 Legs 

870 Category Animals 

871 mammal goat 4 

872 human 2 

873 cat 4 

874 dog 4 

875 

876 >>> leg_num.index.levels 

877 FrozenList([['mammal'], ['cat', 'dog', 'goat', 'human']]) 

878 

879 MultiIndex levels will not change even if the DataFrame using the MultiIndex 

880 does not contain all them anymore. 

881 See how "human" is not in the DataFrame, but it is still in levels: 

882 

883 >>> large_leg_num = leg_num[leg_num.Legs > 2] 

884 >>> large_leg_num 

885 Legs 

886 Category Animals 

887 mammal goat 4 

888 cat 4 

889 dog 4 

890 

891 >>> large_leg_num.index.levels 

892 FrozenList([['mammal'], ['cat', 'dog', 'goat', 'human']]) 

893 """ 

894 # Use cache_readonly to ensure that self.get_locs doesn't repeatedly 

895 # create new IndexEngine 

896 # https://github.com/pandas-dev/pandas/issues/31648 

897 result = [x._rename(name=name) for x, name in zip(self._levels, self._names)] 

898 for level in result: 

899 # disallow midx.levels[0].name = "foo" 

900 level._no_setting_name = True 

901 return FrozenList(result) 

902 

903 def _set_levels( 

904 self, 

905 levels, 

906 *, 

907 level=None, 

908 copy: bool = False, 

909 validate: bool = True, 

910 verify_integrity: bool = False, 

911 ) -> None: 

912 # This is NOT part of the levels property because it should be 

913 # externally not allowed to set levels. User beware if you change 

914 # _levels directly 

915 if validate: 

916 if len(levels) == 0: 

917 raise ValueError("Must set non-zero number of levels.") 

918 if level is None and len(levels) != self.nlevels: 

919 raise ValueError("Length of levels must match number of levels.") 

920 if level is not None and len(levels) != len(level): 

921 raise ValueError("Length of levels must match length of level.") 

922 

923 if level is None: 

924 new_levels = FrozenList( 

925 ensure_index(lev, copy=copy)._view() for lev in levels 

926 ) 

927 level_numbers = list(range(len(new_levels))) 

928 else: 

929 level_numbers = [self._get_level_number(lev) for lev in level] 

930 new_levels_list = list(self._levels) 

931 for lev_num, lev in zip(level_numbers, levels): 

932 new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view() 

933 new_levels = FrozenList(new_levels_list) 

934 

935 if verify_integrity: 

936 new_codes = self._verify_integrity( 

937 levels=new_levels, levels_to_verify=level_numbers 

938 ) 

939 self._codes = new_codes 

940 

941 names = self.names 

942 self._levels = new_levels 

943 if any(names): 

944 self._set_names(names) 

945 

946 self._reset_cache() 

947 

948 def set_levels( 

949 self, levels, *, level=None, verify_integrity: bool = True 

950 ) -> MultiIndex: 

951 """ 

952 Set new levels on MultiIndex. Defaults to returning new index. 

953 

954 Parameters 

955 ---------- 

956 levels : sequence or list of sequence 

957 New level(s) to apply. 

958 level : int, level name, or sequence of int/level names (default None) 

959 Level(s) to set (None for all levels). 

960 verify_integrity : bool, default True 

961 If True, checks that levels and codes are compatible. 

962 

963 Returns 

964 ------- 

965 MultiIndex 

966 

967 Examples 

968 -------- 

969 >>> idx = pd.MultiIndex.from_tuples( 

970 ... [ 

971 ... (1, "one"), 

972 ... (1, "two"), 

973 ... (2, "one"), 

974 ... (2, "two"), 

975 ... (3, "one"), 

976 ... (3, "two") 

977 ... ], 

978 ... names=["foo", "bar"] 

979 ... ) 

980 >>> idx 

981 MultiIndex([(1, 'one'), 

982 (1, 'two'), 

983 (2, 'one'), 

984 (2, 'two'), 

985 (3, 'one'), 

986 (3, 'two')], 

987 names=['foo', 'bar']) 

988 

989 >>> idx.set_levels([['a', 'b', 'c'], [1, 2]]) 

990 MultiIndex([('a', 1), 

991 ('a', 2), 

992 ('b', 1), 

993 ('b', 2), 

994 ('c', 1), 

995 ('c', 2)], 

996 names=['foo', 'bar']) 

997 >>> idx.set_levels(['a', 'b', 'c'], level=0) 

998 MultiIndex([('a', 'one'), 

999 ('a', 'two'), 

1000 ('b', 'one'), 

1001 ('b', 'two'), 

1002 ('c', 'one'), 

1003 ('c', 'two')], 

1004 names=['foo', 'bar']) 

1005 >>> idx.set_levels(['a', 'b'], level='bar') 

1006 MultiIndex([(1, 'a'), 

1007 (1, 'b'), 

1008 (2, 'a'), 

1009 (2, 'b'), 

1010 (3, 'a'), 

1011 (3, 'b')], 

1012 names=['foo', 'bar']) 

1013 

1014 If any of the levels passed to ``set_levels()`` exceeds the 

1015 existing length, all of the values from that argument will 

1016 be stored in the MultiIndex levels, though the values will 

1017 be truncated in the MultiIndex output. 

1018 

1019 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]) 

1020 MultiIndex([('a', 1), 

1021 ('a', 2), 

1022 ('b', 1), 

1023 ('b', 2), 

1024 ('c', 1), 

1025 ('c', 2)], 

1026 names=['foo', 'bar']) 

1027 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels 

1028 FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]]) 

1029 """ 

1030 

1031 if isinstance(levels, Index): 

1032 pass 

1033 elif is_array_like(levels): 

1034 levels = Index(levels) 

1035 elif is_list_like(levels): 

1036 levels = list(levels) 

1037 

1038 level, levels = _require_listlike(level, levels, "Levels") 

1039 idx = self._view() 

1040 idx._reset_identity() 

1041 idx._set_levels( 

1042 levels, level=level, validate=True, verify_integrity=verify_integrity 

1043 ) 

1044 return idx 

1045 

1046 @property 

1047 def nlevels(self) -> int: 

1048 """ 

1049 Integer number of levels in this MultiIndex. 

1050 

1051 Examples 

1052 -------- 

1053 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) 

1054 >>> mi 

1055 MultiIndex([('a', 'b', 'c')], 

1056 ) 

1057 >>> mi.nlevels 

1058 3 

1059 """ 

1060 return len(self._levels) 

1061 

1062 @property 

1063 def levshape(self) -> Shape: 

1064 """ 

1065 A tuple with the length of each level. 

1066 

1067 Examples 

1068 -------- 

1069 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) 

1070 >>> mi 

1071 MultiIndex([('a', 'b', 'c')], 

1072 ) 

1073 >>> mi.levshape 

1074 (1, 1, 1) 

1075 """ 

1076 return tuple(len(x) for x in self.levels) 

1077 

1078 # -------------------------------------------------------------------- 

1079 # Codes Methods 

1080 

1081 @property 

1082 def codes(self) -> FrozenList: 

1083 return self._codes 

1084 

1085 def _set_codes( 

1086 self, 

1087 codes, 

1088 *, 

1089 level=None, 

1090 copy: bool = False, 

1091 validate: bool = True, 

1092 verify_integrity: bool = False, 

1093 ) -> None: 

1094 if validate: 

1095 if level is None and len(codes) != self.nlevels: 

1096 raise ValueError("Length of codes must match number of levels") 

1097 if level is not None and len(codes) != len(level): 

1098 raise ValueError("Length of codes must match length of levels.") 

1099 

1100 level_numbers: list[int] | range 

1101 if level is None: 

1102 new_codes = FrozenList( 

1103 _coerce_indexer_frozen(level_codes, lev, copy=copy).view() 

1104 for lev, level_codes in zip(self._levels, codes) 

1105 ) 

1106 level_numbers = range(len(new_codes)) 

1107 else: 

1108 level_numbers = [self._get_level_number(lev) for lev in level] 

1109 new_codes_list = list(self._codes) 

1110 for lev_num, level_codes in zip(level_numbers, codes): 

1111 lev = self.levels[lev_num] 

1112 new_codes_list[lev_num] = _coerce_indexer_frozen( 

1113 level_codes, lev, copy=copy 

1114 ) 

1115 new_codes = FrozenList(new_codes_list) 

1116 

1117 if verify_integrity: 

1118 new_codes = self._verify_integrity( 

1119 codes=new_codes, levels_to_verify=level_numbers 

1120 ) 

1121 

1122 self._codes = new_codes 

1123 

1124 self._reset_cache() 

1125 

1126 def set_codes( 

1127 self, codes, *, level=None, verify_integrity: bool = True 

1128 ) -> MultiIndex: 

1129 """ 

1130 Set new codes on MultiIndex. Defaults to returning new index. 

1131 

1132 Parameters 

1133 ---------- 

1134 codes : sequence or list of sequence 

1135 New codes to apply. 

1136 level : int, level name, or sequence of int/level names (default None) 

1137 Level(s) to set (None for all levels). 

1138 verify_integrity : bool, default True 

1139 If True, checks that levels and codes are compatible. 

1140 

1141 Returns 

1142 ------- 

1143 new index (of same type and class...etc) or None 

1144 The same type as the caller or None if ``inplace=True``. 

1145 

1146 Examples 

1147 -------- 

1148 >>> idx = pd.MultiIndex.from_tuples( 

1149 ... [(1, "one"), (1, "two"), (2, "one"), (2, "two")], names=["foo", "bar"] 

1150 ... ) 

1151 >>> idx 

1152 MultiIndex([(1, 'one'), 

1153 (1, 'two'), 

1154 (2, 'one'), 

1155 (2, 'two')], 

1156 names=['foo', 'bar']) 

1157 

1158 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]]) 

1159 MultiIndex([(2, 'one'), 

1160 (1, 'one'), 

1161 (2, 'two'), 

1162 (1, 'two')], 

1163 names=['foo', 'bar']) 

1164 >>> idx.set_codes([1, 0, 1, 0], level=0) 

1165 MultiIndex([(2, 'one'), 

1166 (1, 'two'), 

1167 (2, 'one'), 

1168 (1, 'two')], 

1169 names=['foo', 'bar']) 

1170 >>> idx.set_codes([0, 0, 1, 1], level='bar') 

1171 MultiIndex([(1, 'one'), 

1172 (1, 'one'), 

1173 (2, 'two'), 

1174 (2, 'two')], 

1175 names=['foo', 'bar']) 

1176 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1]) 

1177 MultiIndex([(2, 'one'), 

1178 (1, 'one'), 

1179 (2, 'two'), 

1180 (1, 'two')], 

1181 names=['foo', 'bar']) 

1182 """ 

1183 

1184 level, codes = _require_listlike(level, codes, "Codes") 

1185 idx = self._view() 

1186 idx._reset_identity() 

1187 idx._set_codes(codes, level=level, verify_integrity=verify_integrity) 

1188 return idx 

1189 

1190 # -------------------------------------------------------------------- 

1191 # Index Internals 

1192 

1193 @cache_readonly 

1194 def _engine(self): 

1195 # Calculate the number of bits needed to represent labels in each 

1196 # level, as log2 of their sizes: 

1197 # NaN values are shifted to 1 and missing values in other while 

1198 # calculating the indexer are shifted to 0 

1199 sizes = np.ceil( 

1200 np.log2( 

1201 [len(level) + libindex.multiindex_nulls_shift for level in self.levels] 

1202 ) 

1203 ) 

1204 

1205 # Sum bit counts, starting from the _right_.... 

1206 lev_bits = np.cumsum(sizes[::-1])[::-1] 

1207 

1208 # ... in order to obtain offsets such that sorting the combination of 

1209 # shifted codes (one for each level, resulting in a unique integer) is 

1210 # equivalent to sorting lexicographically the codes themselves. Notice 

1211 # that each level needs to be shifted by the number of bits needed to 

1212 # represent the _previous_ ones: 

1213 offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64") 

1214 

1215 # Check the total number of bits needed for our representation: 

1216 if lev_bits[0] > 64: 

1217 # The levels would overflow a 64 bit uint - use Python integers: 

1218 return MultiIndexPyIntEngine(self.levels, self.codes, offsets) 

1219 return MultiIndexUIntEngine(self.levels, self.codes, offsets) 

1220 

1221 # Return type "Callable[..., MultiIndex]" of "_constructor" incompatible with return 

1222 # type "Type[MultiIndex]" in supertype "Index" 

1223 @property 

1224 def _constructor(self) -> Callable[..., MultiIndex]: # type: ignore[override] 

1225 return type(self).from_tuples 

1226 

1227 @doc(Index._shallow_copy) 

1228 def _shallow_copy(self, values: np.ndarray, name=lib.no_default) -> MultiIndex: 

1229 names = name if name is not lib.no_default else self.names 

1230 

1231 return type(self).from_tuples(values, sortorder=None, names=names) 

1232 

1233 def _view(self) -> MultiIndex: 

1234 result = type(self)( 

1235 levels=self.levels, 

1236 codes=self.codes, 

1237 sortorder=self.sortorder, 

1238 names=self.names, 

1239 verify_integrity=False, 

1240 ) 

1241 result._cache = self._cache.copy() 

1242 result._cache.pop("levels", None) # GH32669 

1243 return result 

1244 

1245 # -------------------------------------------------------------------- 

1246 

1247 # error: Signature of "copy" incompatible with supertype "Index" 

1248 def copy( # type: ignore[override] 

1249 self, 

1250 names=None, 

1251 deep: bool = False, 

1252 name=None, 

1253 ) -> Self: 

1254 """ 

1255 Make a copy of this object. 

1256 

1257 Names, dtype, levels and codes can be passed and will be set on new copy. 

1258 

1259 Parameters 

1260 ---------- 

1261 names : sequence, optional 

1262 deep : bool, default False 

1263 name : Label 

1264 Kept for compatibility with 1-dimensional Index. Should not be used. 

1265 

1266 Returns 

1267 ------- 

1268 MultiIndex 

1269 

1270 Notes 

1271 ----- 

1272 In most cases, there should be no functional difference from using 

1273 ``deep``, but if ``deep`` is passed it will attempt to deepcopy. 

1274 This could be potentially expensive on large MultiIndex objects. 

1275 

1276 Examples 

1277 -------- 

1278 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']]) 

1279 >>> mi 

1280 MultiIndex([('a', 'b', 'c')], 

1281 ) 

1282 >>> mi.copy() 

1283 MultiIndex([('a', 'b', 'c')], 

1284 ) 

1285 """ 

1286 names = self._validate_names(name=name, names=names, deep=deep) 

1287 keep_id = not deep 

1288 levels, codes = None, None 

1289 

1290 if deep: 

1291 from copy import deepcopy 

1292 

1293 levels = deepcopy(self.levels) 

1294 codes = deepcopy(self.codes) 

1295 

1296 levels = levels if levels is not None else self.levels 

1297 codes = codes if codes is not None else self.codes 

1298 

1299 new_index = type(self)( 

1300 levels=levels, 

1301 codes=codes, 

1302 sortorder=self.sortorder, 

1303 names=names, 

1304 verify_integrity=False, 

1305 ) 

1306 new_index._cache = self._cache.copy() 

1307 new_index._cache.pop("levels", None) # GH32669 

1308 if keep_id: 

1309 new_index._id = self._id 

1310 return new_index 

1311 

1312 def __array__(self, dtype=None, copy=None) -> np.ndarray: 

1313 """the array interface, return my values""" 

1314 return self.values 

1315 

1316 def view(self, cls=None) -> Self: 

1317 """this is defined as a copy with the same identity""" 

1318 result = self.copy() 

1319 result._id = self._id 

1320 return result 

1321 

1322 @doc(Index.__contains__) 

1323 def __contains__(self, key: Any) -> bool: 

1324 hash(key) 

1325 try: 

1326 self.get_loc(key) 

1327 return True 

1328 except (LookupError, TypeError, ValueError): 

1329 return False 

1330 

1331 @cache_readonly 

1332 def dtype(self) -> np.dtype: 

1333 return np.dtype("O") 

1334 

1335 def _is_memory_usage_qualified(self) -> bool: 

1336 """return a boolean if we need a qualified .info display""" 

1337 

1338 def f(level) -> bool: 

1339 return "mixed" in level or "string" in level or "unicode" in level 

1340 

1341 return any(f(level) for level in self._inferred_type_levels) 

1342 

1343 # Cannot determine type of "memory_usage" 

1344 @doc(Index.memory_usage) # type: ignore[has-type] 

1345 def memory_usage(self, deep: bool = False) -> int: 

1346 # we are overwriting our base class to avoid 

1347 # computing .values here which could materialize 

1348 # a tuple representation unnecessarily 

1349 return self._nbytes(deep) 

1350 

1351 @cache_readonly 

1352 def nbytes(self) -> int: 

1353 """return the number of bytes in the underlying data""" 

1354 return self._nbytes(False) 

1355 

1356 def _nbytes(self, deep: bool = False) -> int: 

1357 """ 

1358 return the number of bytes in the underlying data 

1359 deeply introspect the level data if deep=True 

1360 

1361 include the engine hashtable 

1362 

1363 *this is in internal routine* 

1364 

1365 """ 

1366 # for implementations with no useful getsizeof (PyPy) 

1367 objsize = 24 

1368 

1369 level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels) 

1370 label_nbytes = sum(i.nbytes for i in self.codes) 

1371 names_nbytes = sum(getsizeof(i, objsize) for i in self.names) 

1372 result = level_nbytes + label_nbytes + names_nbytes 

1373 

1374 # include our engine hashtable 

1375 result += self._engine.sizeof(deep=deep) 

1376 return result 

1377 

1378 # -------------------------------------------------------------------- 

1379 # Rendering Methods 

1380 

1381 def _formatter_func(self, tup): 

1382 """ 

1383 Formats each item in tup according to its level's formatter function. 

1384 """ 

1385 formatter_funcs = [level._formatter_func for level in self.levels] 

1386 return tuple(func(val) for func, val in zip(formatter_funcs, tup)) 

1387 

1388 def _get_values_for_csv( 

1389 self, *, na_rep: str = "nan", **kwargs 

1390 ) -> npt.NDArray[np.object_]: 

1391 new_levels = [] 

1392 new_codes = [] 

1393 

1394 # go through the levels and format them 

1395 for level, level_codes in zip(self.levels, self.codes): 

1396 level_strs = level._get_values_for_csv(na_rep=na_rep, **kwargs) 

1397 # add nan values, if there are any 

1398 mask = level_codes == -1 

1399 if mask.any(): 

1400 nan_index = len(level_strs) 

1401 # numpy 1.21 deprecated implicit string casting 

1402 level_strs = level_strs.astype(str) 

1403 level_strs = np.append(level_strs, na_rep) 

1404 assert not level_codes.flags.writeable # i.e. copy is needed 

1405 level_codes = level_codes.copy() # make writeable 

1406 level_codes[mask] = nan_index 

1407 new_levels.append(level_strs) 

1408 new_codes.append(level_codes) 

1409 

1410 if len(new_levels) == 1: 

1411 # a single-level multi-index 

1412 return Index(new_levels[0].take(new_codes[0]))._get_values_for_csv() 

1413 else: 

1414 # reconstruct the multi-index 

1415 mi = MultiIndex( 

1416 levels=new_levels, 

1417 codes=new_codes, 

1418 names=self.names, 

1419 sortorder=self.sortorder, 

1420 verify_integrity=False, 

1421 ) 

1422 return mi._values 

1423 

1424 def format( 

1425 self, 

1426 name: bool | None = None, 

1427 formatter: Callable | None = None, 

1428 na_rep: str | None = None, 

1429 names: bool = False, 

1430 space: int = 2, 

1431 sparsify=None, 

1432 adjoin: bool = True, 

1433 ) -> list: 

1434 warnings.warn( 

1435 # GH#55413 

1436 f"{type(self).__name__}.format is deprecated and will be removed " 

1437 "in a future version. Convert using index.astype(str) or " 

1438 "index.map(formatter) instead.", 

1439 FutureWarning, 

1440 stacklevel=find_stack_level(), 

1441 ) 

1442 

1443 if name is not None: 

1444 names = name 

1445 

1446 if len(self) == 0: 

1447 return [] 

1448 

1449 stringified_levels = [] 

1450 for lev, level_codes in zip(self.levels, self.codes): 

1451 na = na_rep if na_rep is not None else _get_na_rep(lev.dtype) 

1452 

1453 if len(lev) > 0: 

1454 formatted = lev.take(level_codes).format(formatter=formatter) 

1455 

1456 # we have some NA 

1457 mask = level_codes == -1 

1458 if mask.any(): 

1459 formatted = np.array(formatted, dtype=object) 

1460 formatted[mask] = na 

1461 formatted = formatted.tolist() 

1462 

1463 else: 

1464 # weird all NA case 

1465 formatted = [ 

1466 pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n")) 

1467 for x in algos.take_nd(lev._values, level_codes) 

1468 ] 

1469 stringified_levels.append(formatted) 

1470 

1471 result_levels = [] 

1472 for lev, lev_name in zip(stringified_levels, self.names): 

1473 level = [] 

1474 

1475 if names: 

1476 level.append( 

1477 pprint_thing(lev_name, escape_chars=("\t", "\r", "\n")) 

1478 if lev_name is not None 

1479 else "" 

1480 ) 

1481 

1482 level.extend(np.array(lev, dtype=object)) 

1483 result_levels.append(level) 

1484 

1485 if sparsify is None: 

1486 sparsify = get_option("display.multi_sparse") 

1487 

1488 if sparsify: 

1489 sentinel: Literal[""] | bool | lib.NoDefault = "" 

1490 # GH3547 use value of sparsify as sentinel if it's "Falsey" 

1491 assert isinstance(sparsify, bool) or sparsify is lib.no_default 

1492 if sparsify in [False, lib.no_default]: 

1493 sentinel = sparsify 

1494 # little bit of a kludge job for #1217 

1495 result_levels = sparsify_labels( 

1496 result_levels, start=int(names), sentinel=sentinel 

1497 ) 

1498 

1499 if adjoin: 

1500 adj = get_adjustment() 

1501 return adj.adjoin(space, *result_levels).split("\n") 

1502 else: 

1503 return result_levels 

1504 

1505 def _format_multi( 

1506 self, 

1507 *, 

1508 include_names: bool, 

1509 sparsify: bool | None | lib.NoDefault, 

1510 formatter: Callable | None = None, 

1511 ) -> list: 

1512 if len(self) == 0: 

1513 return [] 

1514 

1515 stringified_levels = [] 

1516 for lev, level_codes in zip(self.levels, self.codes): 

1517 na = _get_na_rep(lev.dtype) 

1518 

1519 if len(lev) > 0: 

1520 taken = formatted = lev.take(level_codes) 

1521 formatted = taken._format_flat(include_name=False, formatter=formatter) 

1522 

1523 # we have some NA 

1524 mask = level_codes == -1 

1525 if mask.any(): 

1526 formatted = np.array(formatted, dtype=object) 

1527 formatted[mask] = na 

1528 formatted = formatted.tolist() 

1529 

1530 else: 

1531 # weird all NA case 

1532 formatted = [ 

1533 pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n")) 

1534 for x in algos.take_nd(lev._values, level_codes) 

1535 ] 

1536 stringified_levels.append(formatted) 

1537 

1538 result_levels = [] 

1539 for lev, lev_name in zip(stringified_levels, self.names): 

1540 level = [] 

1541 

1542 if include_names: 

1543 level.append( 

1544 pprint_thing(lev_name, escape_chars=("\t", "\r", "\n")) 

1545 if lev_name is not None 

1546 else "" 

1547 ) 

1548 

1549 level.extend(np.array(lev, dtype=object)) 

1550 result_levels.append(level) 

1551 

1552 if sparsify is None: 

1553 sparsify = get_option("display.multi_sparse") 

1554 

1555 if sparsify: 

1556 sentinel: Literal[""] | bool | lib.NoDefault = "" 

1557 # GH3547 use value of sparsify as sentinel if it's "Falsey" 

1558 assert isinstance(sparsify, bool) or sparsify is lib.no_default 

1559 if sparsify is lib.no_default: 

1560 sentinel = sparsify 

1561 # little bit of a kludge job for #1217 

1562 result_levels = sparsify_labels( 

1563 result_levels, start=int(include_names), sentinel=sentinel 

1564 ) 

1565 

1566 return result_levels 

1567 

1568 # -------------------------------------------------------------------- 

1569 # Names Methods 

1570 

1571 def _get_names(self) -> FrozenList: 

1572 return FrozenList(self._names) 

1573 

1574 def _set_names(self, names, *, level=None, validate: bool = True): 

1575 """ 

1576 Set new names on index. Each name has to be a hashable type. 

1577 

1578 Parameters 

1579 ---------- 

1580 values : str or sequence 

1581 name(s) to set 

1582 level : int, level name, or sequence of int/level names (default None) 

1583 If the index is a MultiIndex (hierarchical), level(s) to set (None 

1584 for all levels). Otherwise level must be None 

1585 validate : bool, default True 

1586 validate that the names match level lengths 

1587 

1588 Raises 

1589 ------ 

1590 TypeError if each name is not hashable. 

1591 

1592 Notes 

1593 ----- 

1594 sets names on levels. WARNING: mutates! 

1595 

1596 Note that you generally want to set this *after* changing levels, so 

1597 that it only acts on copies 

1598 """ 

1599 # GH 15110 

1600 # Don't allow a single string for names in a MultiIndex 

1601 if names is not None and not is_list_like(names): 

1602 raise ValueError("Names should be list-like for a MultiIndex") 

1603 names = list(names) 

1604 

1605 if validate: 

1606 if level is not None and len(names) != len(level): 

1607 raise ValueError("Length of names must match length of level.") 

1608 if level is None and len(names) != self.nlevels: 

1609 raise ValueError( 

1610 "Length of names must match number of levels in MultiIndex." 

1611 ) 

1612 

1613 if level is None: 

1614 level = range(self.nlevels) 

1615 else: 

1616 level = [self._get_level_number(lev) for lev in level] 

1617 

1618 # set the name 

1619 for lev, name in zip(level, names): 

1620 if name is not None: 

1621 # GH 20527 

1622 # All items in 'names' need to be hashable: 

1623 if not is_hashable(name): 

1624 raise TypeError( 

1625 f"{type(self).__name__}.name must be a hashable type" 

1626 ) 

1627 self._names[lev] = name 

1628 

1629 # If .levels has been accessed, the names in our cache will be stale. 

1630 self._reset_cache() 

1631 

1632 names = property( 

1633 fset=_set_names, 

1634 fget=_get_names, 

1635 doc=""" 

1636 Names of levels in MultiIndex. 

1637 

1638 Examples 

1639 -------- 

1640 >>> mi = pd.MultiIndex.from_arrays( 

1641 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z']) 

1642 >>> mi 

1643 MultiIndex([(1, 3, 5), 

1644 (2, 4, 6)], 

1645 names=['x', 'y', 'z']) 

1646 >>> mi.names 

1647 FrozenList(['x', 'y', 'z']) 

1648 """, 

1649 ) 

1650 

1651 # -------------------------------------------------------------------- 

1652 

1653 @cache_readonly 

1654 def inferred_type(self) -> str: 

1655 return "mixed" 

1656 

1657 def _get_level_number(self, level) -> int: 

1658 count = self.names.count(level) 

1659 if (count > 1) and not is_integer(level): 

1660 raise ValueError( 

1661 f"The name {level} occurs multiple times, use a level number" 

1662 ) 

1663 try: 

1664 level = self.names.index(level) 

1665 except ValueError as err: 

1666 if not is_integer(level): 

1667 raise KeyError(f"Level {level} not found") from err 

1668 if level < 0: 

1669 level += self.nlevels 

1670 if level < 0: 

1671 orig_level = level - self.nlevels 

1672 raise IndexError( 

1673 f"Too many levels: Index has only {self.nlevels} levels, " 

1674 f"{orig_level} is not a valid level number" 

1675 ) from err 

1676 # Note: levels are zero-based 

1677 elif level >= self.nlevels: 

1678 raise IndexError( 

1679 f"Too many levels: Index has only {self.nlevels} levels, " 

1680 f"not {level + 1}" 

1681 ) from err 

1682 return level 

1683 

1684 @cache_readonly 

1685 def is_monotonic_increasing(self) -> bool: 

1686 """ 

1687 Return a boolean if the values are equal or increasing. 

1688 """ 

1689 if any(-1 in code for code in self.codes): 

1690 return False 

1691 

1692 if all(level.is_monotonic_increasing for level in self.levels): 

1693 # If each level is sorted, we can operate on the codes directly. GH27495 

1694 return libalgos.is_lexsorted( 

1695 [x.astype("int64", copy=False) for x in self.codes] 

1696 ) 

1697 

1698 # reversed() because lexsort() wants the most significant key last. 

1699 values = [ 

1700 self._get_level_values(i)._values for i in reversed(range(len(self.levels))) 

1701 ] 

1702 try: 

1703 # error: Argument 1 to "lexsort" has incompatible type 

1704 # "List[Union[ExtensionArray, ndarray[Any, Any]]]"; 

1705 # expected "Union[_SupportsArray[dtype[Any]], 

1706 # _NestedSequence[_SupportsArray[dtype[Any]]], bool, 

1707 # int, float, complex, str, bytes, _NestedSequence[Union 

1708 # [bool, int, float, complex, str, bytes]]]" 

1709 sort_order = np.lexsort(values) # type: ignore[arg-type] 

1710 return Index(sort_order).is_monotonic_increasing 

1711 except TypeError: 

1712 # we have mixed types and np.lexsort is not happy 

1713 return Index(self._values).is_monotonic_increasing 

1714 

1715 @cache_readonly 

1716 def is_monotonic_decreasing(self) -> bool: 

1717 """ 

1718 Return a boolean if the values are equal or decreasing. 

1719 """ 

1720 # monotonic decreasing if and only if reverse is monotonic increasing 

1721 return self[::-1].is_monotonic_increasing 

1722 

1723 @cache_readonly 

1724 def _inferred_type_levels(self) -> list[str]: 

1725 """return a list of the inferred types, one for each level""" 

1726 return [i.inferred_type for i in self.levels] 

1727 

1728 @doc(Index.duplicated) 

1729 def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]: 

1730 shape = tuple(len(lev) for lev in self.levels) 

1731 ids = get_group_index(self.codes, shape, sort=False, xnull=False) 

1732 

1733 return duplicated(ids, keep) 

1734 

1735 # error: Cannot override final attribute "_duplicated" 

1736 # (previously declared in base class "IndexOpsMixin") 

1737 _duplicated = duplicated # type: ignore[misc] 

1738 

1739 def fillna(self, value=None, downcast=None): 

1740 """ 

1741 fillna is not implemented for MultiIndex 

1742 """ 

1743 raise NotImplementedError("isna is not defined for MultiIndex") 

1744 

1745 @doc(Index.dropna) 

1746 def dropna(self, how: AnyAll = "any") -> MultiIndex: 

1747 nans = [level_codes == -1 for level_codes in self.codes] 

1748 if how == "any": 

1749 indexer = np.any(nans, axis=0) 

1750 elif how == "all": 

1751 indexer = np.all(nans, axis=0) 

1752 else: 

1753 raise ValueError(f"invalid how option: {how}") 

1754 

1755 new_codes = [level_codes[~indexer] for level_codes in self.codes] 

1756 return self.set_codes(codes=new_codes) 

1757 

1758 def _get_level_values(self, level: int, unique: bool = False) -> Index: 

1759 """ 

1760 Return vector of label values for requested level, 

1761 equal to the length of the index 

1762 

1763 **this is an internal method** 

1764 

1765 Parameters 

1766 ---------- 

1767 level : int 

1768 unique : bool, default False 

1769 if True, drop duplicated values 

1770 

1771 Returns 

1772 ------- 

1773 Index 

1774 """ 

1775 lev = self.levels[level] 

1776 level_codes = self.codes[level] 

1777 name = self._names[level] 

1778 if unique: 

1779 level_codes = algos.unique(level_codes) 

1780 filled = algos.take_nd(lev._values, level_codes, fill_value=lev._na_value) 

1781 return lev._shallow_copy(filled, name=name) 

1782 

1783 # error: Signature of "get_level_values" incompatible with supertype "Index" 

1784 def get_level_values(self, level) -> Index: # type: ignore[override] 

1785 """ 

1786 Return vector of label values for requested level. 

1787 

1788 Length of returned vector is equal to the length of the index. 

1789 

1790 Parameters 

1791 ---------- 

1792 level : int or str 

1793 ``level`` is either the integer position of the level in the 

1794 MultiIndex, or the name of the level. 

1795 

1796 Returns 

1797 ------- 

1798 Index 

1799 Values is a level of this MultiIndex converted to 

1800 a single :class:`Index` (or subclass thereof). 

1801 

1802 Notes 

1803 ----- 

1804 If the level contains missing values, the result may be casted to 

1805 ``float`` with missing values specified as ``NaN``. This is because 

1806 the level is converted to a regular ``Index``. 

1807 

1808 Examples 

1809 -------- 

1810 Create a MultiIndex: 

1811 

1812 >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def'))) 

1813 >>> mi.names = ['level_1', 'level_2'] 

1814 

1815 Get level values by supplying level as either integer or name: 

1816 

1817 >>> mi.get_level_values(0) 

1818 Index(['a', 'b', 'c'], dtype='object', name='level_1') 

1819 >>> mi.get_level_values('level_2') 

1820 Index(['d', 'e', 'f'], dtype='object', name='level_2') 

1821 

1822 If a level contains missing values, the return type of the level 

1823 may be cast to ``float``. 

1824 

1825 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).dtypes 

1826 level_0 int64 

1827 level_1 int64 

1828 dtype: object 

1829 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).get_level_values(0) 

1830 Index([1.0, nan, 2.0], dtype='float64') 

1831 """ 

1832 level = self._get_level_number(level) 

1833 values = self._get_level_values(level) 

1834 return values 

1835 

1836 @doc(Index.unique) 

1837 def unique(self, level=None): 

1838 if level is None: 

1839 return self.drop_duplicates() 

1840 else: 

1841 level = self._get_level_number(level) 

1842 return self._get_level_values(level=level, unique=True) 

1843 

1844 def to_frame( 

1845 self, 

1846 index: bool = True, 

1847 name=lib.no_default, 

1848 allow_duplicates: bool = False, 

1849 ) -> DataFrame: 

1850 """ 

1851 Create a DataFrame with the levels of the MultiIndex as columns. 

1852 

1853 Column ordering is determined by the DataFrame constructor with data as 

1854 a dict. 

1855 

1856 Parameters 

1857 ---------- 

1858 index : bool, default True 

1859 Set the index of the returned DataFrame as the original MultiIndex. 

1860 

1861 name : list / sequence of str, optional 

1862 The passed names should substitute index level names. 

1863 

1864 allow_duplicates : bool, optional default False 

1865 Allow duplicate column labels to be created. 

1866 

1867 .. versionadded:: 1.5.0 

1868 

1869 Returns 

1870 ------- 

1871 DataFrame 

1872 

1873 See Also 

1874 -------- 

1875 DataFrame : Two-dimensional, size-mutable, potentially heterogeneous 

1876 tabular data. 

1877 

1878 Examples 

1879 -------- 

1880 >>> mi = pd.MultiIndex.from_arrays([['a', 'b'], ['c', 'd']]) 

1881 >>> mi 

1882 MultiIndex([('a', 'c'), 

1883 ('b', 'd')], 

1884 ) 

1885 

1886 >>> df = mi.to_frame() 

1887 >>> df 

1888 0 1 

1889 a c a c 

1890 b d b d 

1891 

1892 >>> df = mi.to_frame(index=False) 

1893 >>> df 

1894 0 1 

1895 0 a c 

1896 1 b d 

1897 

1898 >>> df = mi.to_frame(name=['x', 'y']) 

1899 >>> df 

1900 x y 

1901 a c a c 

1902 b d b d 

1903 """ 

1904 from pandas import DataFrame 

1905 

1906 if name is not lib.no_default: 

1907 if not is_list_like(name): 

1908 raise TypeError("'name' must be a list / sequence of column names.") 

1909 

1910 if len(name) != len(self.levels): 

1911 raise ValueError( 

1912 "'name' should have same length as number of levels on index." 

1913 ) 

1914 idx_names = name 

1915 else: 

1916 idx_names = self._get_level_names() 

1917 

1918 if not allow_duplicates and len(set(idx_names)) != len(idx_names): 

1919 raise ValueError( 

1920 "Cannot create duplicate column labels if allow_duplicates is False" 

1921 ) 

1922 

1923 # Guarantee resulting column order - PY36+ dict maintains insertion order 

1924 result = DataFrame( 

1925 {level: self._get_level_values(level) for level in range(len(self.levels))}, 

1926 copy=False, 

1927 ) 

1928 result.columns = idx_names 

1929 

1930 if index: 

1931 result.index = self 

1932 return result 

1933 

1934 # error: Return type "Index" of "to_flat_index" incompatible with return type 

1935 # "MultiIndex" in supertype "Index" 

1936 def to_flat_index(self) -> Index: # type: ignore[override] 

1937 """ 

1938 Convert a MultiIndex to an Index of Tuples containing the level values. 

1939 

1940 Returns 

1941 ------- 

1942 pd.Index 

1943 Index with the MultiIndex data represented in Tuples. 

1944 

1945 See Also 

1946 -------- 

1947 MultiIndex.from_tuples : Convert flat index back to MultiIndex. 

1948 

1949 Notes 

1950 ----- 

1951 This method will simply return the caller if called by anything other 

1952 than a MultiIndex. 

1953 

1954 Examples 

1955 -------- 

1956 >>> index = pd.MultiIndex.from_product( 

1957 ... [['foo', 'bar'], ['baz', 'qux']], 

1958 ... names=['a', 'b']) 

1959 >>> index.to_flat_index() 

1960 Index([('foo', 'baz'), ('foo', 'qux'), 

1961 ('bar', 'baz'), ('bar', 'qux')], 

1962 dtype='object') 

1963 """ 

1964 return Index(self._values, tupleize_cols=False) 

1965 

1966 def _is_lexsorted(self) -> bool: 

1967 """ 

1968 Return True if the codes are lexicographically sorted. 

1969 

1970 Returns 

1971 ------- 

1972 bool 

1973 

1974 Examples 

1975 -------- 

1976 In the below examples, the first level of the MultiIndex is sorted because 

1977 a<b<c, so there is no need to look at the next level. 

1978 

1979 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], 

1980 ... ['d', 'e', 'f']])._is_lexsorted() 

1981 True 

1982 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'], 

1983 ... ['d', 'f', 'e']])._is_lexsorted() 

1984 True 

1985 

1986 In case there is a tie, the lexicographical sorting looks 

1987 at the next level of the MultiIndex. 

1988 

1989 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']])._is_lexsorted() 

1990 True 

1991 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']])._is_lexsorted() 

1992 False 

1993 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], 

1994 ... ['aa', 'bb', 'aa', 'bb']])._is_lexsorted() 

1995 True 

1996 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'], 

1997 ... ['bb', 'aa', 'aa', 'bb']])._is_lexsorted() 

1998 False 

1999 """ 

2000 return self._lexsort_depth == self.nlevels 

2001 

2002 @cache_readonly 

2003 def _lexsort_depth(self) -> int: 

2004 """ 

2005 Compute and return the lexsort_depth, the number of levels of the 

2006 MultiIndex that are sorted lexically 

2007 

2008 Returns 

2009 ------- 

2010 int 

2011 """ 

2012 if self.sortorder is not None: 

2013 return self.sortorder 

2014 return _lexsort_depth(self.codes, self.nlevels) 

2015 

2016 def _sort_levels_monotonic(self, raise_if_incomparable: bool = False) -> MultiIndex: 

2017 """ 

2018 This is an *internal* function. 

2019 

2020 Create a new MultiIndex from the current to monotonically sorted 

2021 items IN the levels. This does not actually make the entire MultiIndex 

2022 monotonic, JUST the levels. 

2023 

2024 The resulting MultiIndex will have the same outward 

2025 appearance, meaning the same .values and ordering. It will also 

2026 be .equals() to the original. 

2027 

2028 Returns 

2029 ------- 

2030 MultiIndex 

2031 

2032 Examples 

2033 -------- 

2034 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], 

2035 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) 

2036 >>> mi 

2037 MultiIndex([('a', 'bb'), 

2038 ('a', 'aa'), 

2039 ('b', 'bb'), 

2040 ('b', 'aa')], 

2041 ) 

2042 

2043 >>> mi.sort_values() 

2044 MultiIndex([('a', 'aa'), 

2045 ('a', 'bb'), 

2046 ('b', 'aa'), 

2047 ('b', 'bb')], 

2048 ) 

2049 """ 

2050 if self._is_lexsorted() and self.is_monotonic_increasing: 

2051 return self 

2052 

2053 new_levels = [] 

2054 new_codes = [] 

2055 

2056 for lev, level_codes in zip(self.levels, self.codes): 

2057 if not lev.is_monotonic_increasing: 

2058 try: 

2059 # indexer to reorder the levels 

2060 indexer = lev.argsort() 

2061 except TypeError: 

2062 if raise_if_incomparable: 

2063 raise 

2064 else: 

2065 lev = lev.take(indexer) 

2066 

2067 # indexer to reorder the level codes 

2068 indexer = ensure_platform_int(indexer) 

2069 ri = lib.get_reverse_indexer(indexer, len(indexer)) 

2070 level_codes = algos.take_nd(ri, level_codes, fill_value=-1) 

2071 

2072 new_levels.append(lev) 

2073 new_codes.append(level_codes) 

2074 

2075 return MultiIndex( 

2076 new_levels, 

2077 new_codes, 

2078 names=self.names, 

2079 sortorder=self.sortorder, 

2080 verify_integrity=False, 

2081 ) 

2082 

2083 def remove_unused_levels(self) -> MultiIndex: 

2084 """ 

2085 Create new MultiIndex from current that removes unused levels. 

2086 

2087 Unused level(s) means levels that are not expressed in the 

2088 labels. The resulting MultiIndex will have the same outward 

2089 appearance, meaning the same .values and ordering. It will 

2090 also be .equals() to the original. 

2091 

2092 Returns 

2093 ------- 

2094 MultiIndex 

2095 

2096 Examples 

2097 -------- 

2098 >>> mi = pd.MultiIndex.from_product([range(2), list('ab')]) 

2099 >>> mi 

2100 MultiIndex([(0, 'a'), 

2101 (0, 'b'), 

2102 (1, 'a'), 

2103 (1, 'b')], 

2104 ) 

2105 

2106 >>> mi[2:] 

2107 MultiIndex([(1, 'a'), 

2108 (1, 'b')], 

2109 ) 

2110 

2111 The 0 from the first level is not represented 

2112 and can be removed 

2113 

2114 >>> mi2 = mi[2:].remove_unused_levels() 

2115 >>> mi2.levels 

2116 FrozenList([[1], ['a', 'b']]) 

2117 """ 

2118 new_levels = [] 

2119 new_codes = [] 

2120 

2121 changed = False 

2122 for lev, level_codes in zip(self.levels, self.codes): 

2123 # Since few levels are typically unused, bincount() is more 

2124 # efficient than unique() - however it only accepts positive values 

2125 # (and drops order): 

2126 uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1 

2127 has_na = int(len(uniques) and (uniques[0] == -1)) 

2128 

2129 if len(uniques) != len(lev) + has_na: 

2130 if lev.isna().any() and len(uniques) == len(lev): 

2131 break 

2132 # We have unused levels 

2133 changed = True 

2134 

2135 # Recalculate uniques, now preserving order. 

2136 # Can easily be cythonized by exploiting the already existing 

2137 # "uniques" and stop parsing "level_codes" when all items 

2138 # are found: 

2139 uniques = algos.unique(level_codes) 

2140 if has_na: 

2141 na_idx = np.where(uniques == -1)[0] 

2142 # Just ensure that -1 is in first position: 

2143 uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]] 

2144 

2145 # codes get mapped from uniques to 0:len(uniques) 

2146 # -1 (if present) is mapped to last position 

2147 code_mapping = np.zeros(len(lev) + has_na) 

2148 # ... and reassigned value -1: 

2149 code_mapping[uniques] = np.arange(len(uniques)) - has_na 

2150 

2151 level_codes = code_mapping[level_codes] 

2152 

2153 # new levels are simple 

2154 lev = lev.take(uniques[has_na:]) 

2155 

2156 new_levels.append(lev) 

2157 new_codes.append(level_codes) 

2158 

2159 result = self.view() 

2160 

2161 if changed: 

2162 result._reset_identity() 

2163 result._set_levels(new_levels, validate=False) 

2164 result._set_codes(new_codes, validate=False) 

2165 

2166 return result 

2167 

2168 # -------------------------------------------------------------------- 

2169 # Pickling Methods 

2170 

2171 def __reduce__(self): 

2172 """Necessary for making this object picklable""" 

2173 d = { 

2174 "levels": list(self.levels), 

2175 "codes": list(self.codes), 

2176 "sortorder": self.sortorder, 

2177 "names": list(self.names), 

2178 } 

2179 return ibase._new_Index, (type(self), d), None 

2180 

2181 # -------------------------------------------------------------------- 

2182 

2183 def __getitem__(self, key): 

2184 if is_scalar(key): 

2185 key = com.cast_scalar_indexer(key) 

2186 

2187 retval = [] 

2188 for lev, level_codes in zip(self.levels, self.codes): 

2189 if level_codes[key] == -1: 

2190 retval.append(np.nan) 

2191 else: 

2192 retval.append(lev[level_codes[key]]) 

2193 

2194 return tuple(retval) 

2195 else: 

2196 # in general cannot be sure whether the result will be sorted 

2197 sortorder = None 

2198 if com.is_bool_indexer(key): 

2199 key = np.asarray(key, dtype=bool) 

2200 sortorder = self.sortorder 

2201 elif isinstance(key, slice): 

2202 if key.step is None or key.step > 0: 

2203 sortorder = self.sortorder 

2204 elif isinstance(key, Index): 

2205 key = np.asarray(key) 

2206 

2207 new_codes = [level_codes[key] for level_codes in self.codes] 

2208 

2209 return MultiIndex( 

2210 levels=self.levels, 

2211 codes=new_codes, 

2212 names=self.names, 

2213 sortorder=sortorder, 

2214 verify_integrity=False, 

2215 ) 

2216 

2217 def _getitem_slice(self: MultiIndex, slobj: slice) -> MultiIndex: 

2218 """ 

2219 Fastpath for __getitem__ when we know we have a slice. 

2220 """ 

2221 sortorder = None 

2222 if slobj.step is None or slobj.step > 0: 

2223 sortorder = self.sortorder 

2224 

2225 new_codes = [level_codes[slobj] for level_codes in self.codes] 

2226 

2227 return type(self)( 

2228 levels=self.levels, 

2229 codes=new_codes, 

2230 names=self._names, 

2231 sortorder=sortorder, 

2232 verify_integrity=False, 

2233 ) 

2234 

2235 @Appender(_index_shared_docs["take"] % _index_doc_kwargs) 

2236 def take( 

2237 self: MultiIndex, 

2238 indices, 

2239 axis: Axis = 0, 

2240 allow_fill: bool = True, 

2241 fill_value=None, 

2242 **kwargs, 

2243 ) -> MultiIndex: 

2244 nv.validate_take((), kwargs) 

2245 indices = ensure_platform_int(indices) 

2246 

2247 # only fill if we are passing a non-None fill_value 

2248 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices) 

2249 

2250 na_value = -1 

2251 

2252 taken = [lab.take(indices) for lab in self.codes] 

2253 if allow_fill: 

2254 mask = indices == -1 

2255 if mask.any(): 

2256 masked = [] 

2257 for new_label in taken: 

2258 label_values = new_label 

2259 label_values[mask] = na_value 

2260 masked.append(np.asarray(label_values)) 

2261 taken = masked 

2262 

2263 return MultiIndex( 

2264 levels=self.levels, codes=taken, names=self.names, verify_integrity=False 

2265 ) 

2266 

2267 def append(self, other): 

2268 """ 

2269 Append a collection of Index options together. 

2270 

2271 Parameters 

2272 ---------- 

2273 other : Index or list/tuple of indices 

2274 

2275 Returns 

2276 ------- 

2277 Index 

2278 The combined index. 

2279 

2280 Examples 

2281 -------- 

2282 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b']]) 

2283 >>> mi 

2284 MultiIndex([('a', 'b')], 

2285 ) 

2286 >>> mi.append(mi) 

2287 MultiIndex([('a', 'b'), ('a', 'b')], 

2288 ) 

2289 """ 

2290 if not isinstance(other, (list, tuple)): 

2291 other = [other] 

2292 

2293 if all( 

2294 (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other 

2295 ): 

2296 codes = [] 

2297 levels = [] 

2298 names = [] 

2299 for i in range(self.nlevels): 

2300 level_values = self.levels[i] 

2301 for mi in other: 

2302 level_values = level_values.union(mi.levels[i]) 

2303 level_codes = [ 

2304 recode_for_categories( 

2305 mi.codes[i], mi.levels[i], level_values, copy=False 

2306 ) 

2307 for mi in ([self, *other]) 

2308 ] 

2309 level_name = self.names[i] 

2310 if any(mi.names[i] != level_name for mi in other): 

2311 level_name = None 

2312 codes.append(np.concatenate(level_codes)) 

2313 levels.append(level_values) 

2314 names.append(level_name) 

2315 return MultiIndex( 

2316 codes=codes, levels=levels, names=names, verify_integrity=False 

2317 ) 

2318 

2319 to_concat = (self._values,) + tuple(k._values for k in other) 

2320 new_tuples = np.concatenate(to_concat) 

2321 

2322 # if all(isinstance(x, MultiIndex) for x in other): 

2323 try: 

2324 # We only get here if other contains at least one index with tuples, 

2325 # setting names to None automatically 

2326 return MultiIndex.from_tuples(new_tuples) 

2327 except (TypeError, IndexError): 

2328 return Index(new_tuples) 

2329 

2330 def argsort( 

2331 self, *args, na_position: str = "last", **kwargs 

2332 ) -> npt.NDArray[np.intp]: 

2333 target = self._sort_levels_monotonic(raise_if_incomparable=True) 

2334 keys = [lev.codes for lev in target._get_codes_for_sorting()] 

2335 return lexsort_indexer(keys, na_position=na_position, codes_given=True) 

2336 

2337 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) 

2338 def repeat(self, repeats: int, axis=None) -> MultiIndex: 

2339 nv.validate_repeat((), {"axis": axis}) 

2340 # error: Incompatible types in assignment (expression has type "ndarray", 

2341 # variable has type "int") 

2342 repeats = ensure_platform_int(repeats) # type: ignore[assignment] 

2343 return MultiIndex( 

2344 levels=self.levels, 

2345 codes=[ 

2346 level_codes.view(np.ndarray).astype(np.intp, copy=False).repeat(repeats) 

2347 for level_codes in self.codes 

2348 ], 

2349 names=self.names, 

2350 sortorder=self.sortorder, 

2351 verify_integrity=False, 

2352 ) 

2353 

2354 # error: Signature of "drop" incompatible with supertype "Index" 

2355 def drop( # type: ignore[override] 

2356 self, 

2357 codes, 

2358 level: Index | np.ndarray | Iterable[Hashable] | None = None, 

2359 errors: IgnoreRaise = "raise", 

2360 ) -> MultiIndex: 

2361 """ 

2362 Make a new :class:`pandas.MultiIndex` with the passed list of codes deleted. 

2363 

2364 Parameters 

2365 ---------- 

2366 codes : array-like 

2367 Must be a list of tuples when ``level`` is not specified. 

2368 level : int or level name, default None 

2369 errors : str, default 'raise' 

2370 

2371 Returns 

2372 ------- 

2373 MultiIndex 

2374 

2375 Examples 

2376 -------- 

2377 >>> idx = pd.MultiIndex.from_product([(0, 1, 2), ('green', 'purple')], 

2378 ... names=["number", "color"]) 

2379 >>> idx 

2380 MultiIndex([(0, 'green'), 

2381 (0, 'purple'), 

2382 (1, 'green'), 

2383 (1, 'purple'), 

2384 (2, 'green'), 

2385 (2, 'purple')], 

2386 names=['number', 'color']) 

2387 >>> idx.drop([(1, 'green'), (2, 'purple')]) 

2388 MultiIndex([(0, 'green'), 

2389 (0, 'purple'), 

2390 (1, 'purple'), 

2391 (2, 'green')], 

2392 names=['number', 'color']) 

2393 

2394 We can also drop from a specific level. 

2395 

2396 >>> idx.drop('green', level='color') 

2397 MultiIndex([(0, 'purple'), 

2398 (1, 'purple'), 

2399 (2, 'purple')], 

2400 names=['number', 'color']) 

2401 

2402 >>> idx.drop([1, 2], level=0) 

2403 MultiIndex([(0, 'green'), 

2404 (0, 'purple')], 

2405 names=['number', 'color']) 

2406 """ 

2407 if level is not None: 

2408 return self._drop_from_level(codes, level, errors) 

2409 

2410 if not isinstance(codes, (np.ndarray, Index)): 

2411 try: 

2412 codes = com.index_labels_to_array(codes, dtype=np.dtype("object")) 

2413 except ValueError: 

2414 pass 

2415 

2416 inds = [] 

2417 for level_codes in codes: 

2418 try: 

2419 loc = self.get_loc(level_codes) 

2420 # get_loc returns either an integer, a slice, or a boolean 

2421 # mask 

2422 if isinstance(loc, int): 

2423 inds.append(loc) 

2424 elif isinstance(loc, slice): 

2425 step = loc.step if loc.step is not None else 1 

2426 inds.extend(range(loc.start, loc.stop, step)) 

2427 elif com.is_bool_indexer(loc): 

2428 if self._lexsort_depth == 0: 

2429 warnings.warn( 

2430 "dropping on a non-lexsorted multi-index " 

2431 "without a level parameter may impact performance.", 

2432 PerformanceWarning, 

2433 stacklevel=find_stack_level(), 

2434 ) 

2435 loc = loc.nonzero()[0] 

2436 inds.extend(loc) 

2437 else: 

2438 msg = f"unsupported indexer of type {type(loc)}" 

2439 raise AssertionError(msg) 

2440 except KeyError: 

2441 if errors != "ignore": 

2442 raise 

2443 

2444 return self.delete(inds) 

2445 

2446 def _drop_from_level( 

2447 self, codes, level, errors: IgnoreRaise = "raise" 

2448 ) -> MultiIndex: 

2449 codes = com.index_labels_to_array(codes) 

2450 i = self._get_level_number(level) 

2451 index = self.levels[i] 

2452 values = index.get_indexer(codes) 

2453 # If nan should be dropped it will equal -1 here. We have to check which values 

2454 # are not nan and equal -1, this means they are missing in the index 

2455 nan_codes = isna(codes) 

2456 values[(np.equal(nan_codes, False)) & (values == -1)] = -2 

2457 if index.shape[0] == self.shape[0]: 

2458 values[np.equal(nan_codes, True)] = -2 

2459 

2460 not_found = codes[values == -2] 

2461 if len(not_found) != 0 and errors != "ignore": 

2462 raise KeyError(f"labels {not_found} not found in level") 

2463 mask = ~algos.isin(self.codes[i], values) 

2464 

2465 return self[mask] 

2466 

2467 def swaplevel(self, i=-2, j=-1) -> MultiIndex: 

2468 """ 

2469 Swap level i with level j. 

2470 

2471 Calling this method does not change the ordering of the values. 

2472 

2473 Parameters 

2474 ---------- 

2475 i : int, str, default -2 

2476 First level of index to be swapped. Can pass level name as string. 

2477 Type of parameters can be mixed. 

2478 j : int, str, default -1 

2479 Second level of index to be swapped. Can pass level name as string. 

2480 Type of parameters can be mixed. 

2481 

2482 Returns 

2483 ------- 

2484 MultiIndex 

2485 A new MultiIndex. 

2486 

2487 See Also 

2488 -------- 

2489 Series.swaplevel : Swap levels i and j in a MultiIndex. 

2490 DataFrame.swaplevel : Swap levels i and j in a MultiIndex on a 

2491 particular axis. 

2492 

2493 Examples 

2494 -------- 

2495 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']], 

2496 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]]) 

2497 >>> mi 

2498 MultiIndex([('a', 'bb'), 

2499 ('a', 'aa'), 

2500 ('b', 'bb'), 

2501 ('b', 'aa')], 

2502 ) 

2503 >>> mi.swaplevel(0, 1) 

2504 MultiIndex([('bb', 'a'), 

2505 ('aa', 'a'), 

2506 ('bb', 'b'), 

2507 ('aa', 'b')], 

2508 ) 

2509 """ 

2510 new_levels = list(self.levels) 

2511 new_codes = list(self.codes) 

2512 new_names = list(self.names) 

2513 

2514 i = self._get_level_number(i) 

2515 j = self._get_level_number(j) 

2516 

2517 new_levels[i], new_levels[j] = new_levels[j], new_levels[i] 

2518 new_codes[i], new_codes[j] = new_codes[j], new_codes[i] 

2519 new_names[i], new_names[j] = new_names[j], new_names[i] 

2520 

2521 return MultiIndex( 

2522 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False 

2523 ) 

2524 

2525 def reorder_levels(self, order) -> MultiIndex: 

2526 """ 

2527 Rearrange levels using input order. May not drop or duplicate levels. 

2528 

2529 Parameters 

2530 ---------- 

2531 order : list of int or list of str 

2532 List representing new level order. Reference level by number 

2533 (position) or by key (label). 

2534 

2535 Returns 

2536 ------- 

2537 MultiIndex 

2538 

2539 Examples 

2540 -------- 

2541 >>> mi = pd.MultiIndex.from_arrays([[1, 2], [3, 4]], names=['x', 'y']) 

2542 >>> mi 

2543 MultiIndex([(1, 3), 

2544 (2, 4)], 

2545 names=['x', 'y']) 

2546 

2547 >>> mi.reorder_levels(order=[1, 0]) 

2548 MultiIndex([(3, 1), 

2549 (4, 2)], 

2550 names=['y', 'x']) 

2551 

2552 >>> mi.reorder_levels(order=['y', 'x']) 

2553 MultiIndex([(3, 1), 

2554 (4, 2)], 

2555 names=['y', 'x']) 

2556 """ 

2557 order = [self._get_level_number(i) for i in order] 

2558 result = self._reorder_ilevels(order) 

2559 return result 

2560 

2561 def _reorder_ilevels(self, order) -> MultiIndex: 

2562 if len(order) != self.nlevels: 

2563 raise AssertionError( 

2564 f"Length of order must be same as number of levels ({self.nlevels}), " 

2565 f"got {len(order)}" 

2566 ) 

2567 new_levels = [self.levels[i] for i in order] 

2568 new_codes = [self.codes[i] for i in order] 

2569 new_names = [self.names[i] for i in order] 

2570 

2571 return MultiIndex( 

2572 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False 

2573 ) 

2574 

2575 def _recode_for_new_levels( 

2576 self, new_levels, copy: bool = True 

2577 ) -> Generator[np.ndarray, None, None]: 

2578 if len(new_levels) > self.nlevels: 

2579 raise AssertionError( 

2580 f"Length of new_levels ({len(new_levels)}) " 

2581 f"must be <= self.nlevels ({self.nlevels})" 

2582 ) 

2583 for i in range(len(new_levels)): 

2584 yield recode_for_categories( 

2585 self.codes[i], self.levels[i], new_levels[i], copy=copy 

2586 ) 

2587 

2588 def _get_codes_for_sorting(self) -> list[Categorical]: 

2589 """ 

2590 we are categorizing our codes by using the 

2591 available categories (all, not just observed) 

2592 excluding any missing ones (-1); this is in preparation 

2593 for sorting, where we need to disambiguate that -1 is not 

2594 a valid valid 

2595 """ 

2596 

2597 def cats(level_codes): 

2598 return np.arange( 

2599 np.array(level_codes).max() + 1 if len(level_codes) else 0, 

2600 dtype=level_codes.dtype, 

2601 ) 

2602 

2603 return [ 

2604 Categorical.from_codes(level_codes, cats(level_codes), True, validate=False) 

2605 for level_codes in self.codes 

2606 ] 

2607 

2608 def sortlevel( 

2609 self, 

2610 level: IndexLabel = 0, 

2611 ascending: bool | list[bool] = True, 

2612 sort_remaining: bool = True, 

2613 na_position: str = "first", 

2614 ) -> tuple[MultiIndex, npt.NDArray[np.intp]]: 

2615 """ 

2616 Sort MultiIndex at the requested level. 

2617 

2618 The result will respect the original ordering of the associated 

2619 factor at that level. 

2620 

2621 Parameters 

2622 ---------- 

2623 level : list-like, int or str, default 0 

2624 If a string is given, must be a name of the level. 

2625 If list-like must be names or ints of levels. 

2626 ascending : bool, default True 

2627 False to sort in descending order. 

2628 Can also be a list to specify a directed ordering. 

2629 sort_remaining : sort by the remaining levels after level 

2630 na_position : {'first' or 'last'}, default 'first' 

2631 Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at 

2632 the end. 

2633 

2634 .. versionadded:: 2.1.0 

2635 

2636 Returns 

2637 ------- 

2638 sorted_index : pd.MultiIndex 

2639 Resulting index. 

2640 indexer : np.ndarray[np.intp] 

2641 Indices of output values in original index. 

2642 

2643 Examples 

2644 -------- 

2645 >>> mi = pd.MultiIndex.from_arrays([[0, 0], [2, 1]]) 

2646 >>> mi 

2647 MultiIndex([(0, 2), 

2648 (0, 1)], 

2649 ) 

2650 

2651 >>> mi.sortlevel() 

2652 (MultiIndex([(0, 1), 

2653 (0, 2)], 

2654 ), array([1, 0])) 

2655 

2656 >>> mi.sortlevel(sort_remaining=False) 

2657 (MultiIndex([(0, 2), 

2658 (0, 1)], 

2659 ), array([0, 1])) 

2660 

2661 >>> mi.sortlevel(1) 

2662 (MultiIndex([(0, 1), 

2663 (0, 2)], 

2664 ), array([1, 0])) 

2665 

2666 >>> mi.sortlevel(1, ascending=False) 

2667 (MultiIndex([(0, 2), 

2668 (0, 1)], 

2669 ), array([0, 1])) 

2670 """ 

2671 if not is_list_like(level): 

2672 level = [level] 

2673 # error: Item "Hashable" of "Union[Hashable, Sequence[Hashable]]" has 

2674 # no attribute "__iter__" (not iterable) 

2675 level = [ 

2676 self._get_level_number(lev) for lev in level # type: ignore[union-attr] 

2677 ] 

2678 sortorder = None 

2679 

2680 codes = [self.codes[lev] for lev in level] 

2681 # we have a directed ordering via ascending 

2682 if isinstance(ascending, list): 

2683 if not len(level) == len(ascending): 

2684 raise ValueError("level must have same length as ascending") 

2685 elif sort_remaining: 

2686 codes.extend( 

2687 [self.codes[lev] for lev in range(len(self.levels)) if lev not in level] 

2688 ) 

2689 else: 

2690 sortorder = level[0] 

2691 

2692 indexer = lexsort_indexer( 

2693 codes, orders=ascending, na_position=na_position, codes_given=True 

2694 ) 

2695 

2696 indexer = ensure_platform_int(indexer) 

2697 new_codes = [level_codes.take(indexer) for level_codes in self.codes] 

2698 

2699 new_index = MultiIndex( 

2700 codes=new_codes, 

2701 levels=self.levels, 

2702 names=self.names, 

2703 sortorder=sortorder, 

2704 verify_integrity=False, 

2705 ) 

2706 

2707 return new_index, indexer 

2708 

2709 def _wrap_reindex_result(self, target, indexer, preserve_names: bool): 

2710 if not isinstance(target, MultiIndex): 

2711 if indexer is None: 

2712 target = self 

2713 elif (indexer >= 0).all(): 

2714 target = self.take(indexer) 

2715 else: 

2716 try: 

2717 target = MultiIndex.from_tuples(target) 

2718 except TypeError: 

2719 # not all tuples, see test_constructor_dict_multiindex_reindex_flat 

2720 return target 

2721 

2722 target = self._maybe_preserve_names(target, preserve_names) 

2723 return target 

2724 

2725 def _maybe_preserve_names(self, target: Index, preserve_names: bool) -> Index: 

2726 if ( 

2727 preserve_names 

2728 and target.nlevels == self.nlevels 

2729 and target.names != self.names 

2730 ): 

2731 target = target.copy(deep=False) 

2732 target.names = self.names 

2733 return target 

2734 

2735 # -------------------------------------------------------------------- 

2736 # Indexing Methods 

2737 

2738 def _check_indexing_error(self, key) -> None: 

2739 if not is_hashable(key) or is_iterator(key): 

2740 # We allow tuples if they are hashable, whereas other Index 

2741 # subclasses require scalar. 

2742 # We have to explicitly exclude generators, as these are hashable. 

2743 raise InvalidIndexError(key) 

2744 

2745 @cache_readonly 

2746 def _should_fallback_to_positional(self) -> bool: 

2747 """ 

2748 Should integer key(s) be treated as positional? 

2749 """ 

2750 # GH#33355 

2751 return self.levels[0]._should_fallback_to_positional 

2752 

2753 def _get_indexer_strict( 

2754 self, key, axis_name: str 

2755 ) -> tuple[Index, npt.NDArray[np.intp]]: 

2756 keyarr = key 

2757 if not isinstance(keyarr, Index): 

2758 keyarr = com.asarray_tuplesafe(keyarr) 

2759 

2760 if len(keyarr) and not isinstance(keyarr[0], tuple): 

2761 indexer = self._get_indexer_level_0(keyarr) 

2762 

2763 self._raise_if_missing(key, indexer, axis_name) 

2764 return self[indexer], indexer 

2765 

2766 return super()._get_indexer_strict(key, axis_name) 

2767 

2768 def _raise_if_missing(self, key, indexer, axis_name: str) -> None: 

2769 keyarr = key 

2770 if not isinstance(key, Index): 

2771 keyarr = com.asarray_tuplesafe(key) 

2772 

2773 if len(keyarr) and not isinstance(keyarr[0], tuple): 

2774 # i.e. same condition for special case in MultiIndex._get_indexer_strict 

2775 

2776 mask = indexer == -1 

2777 if mask.any(): 

2778 check = self.levels[0].get_indexer(keyarr) 

2779 cmask = check == -1 

2780 if cmask.any(): 

2781 raise KeyError(f"{keyarr[cmask]} not in index") 

2782 # We get here when levels still contain values which are not 

2783 # actually in Index anymore 

2784 raise KeyError(f"{keyarr} not in index") 

2785 else: 

2786 return super()._raise_if_missing(key, indexer, axis_name) 

2787 

2788 def _get_indexer_level_0(self, target) -> npt.NDArray[np.intp]: 

2789 """ 

2790 Optimized equivalent to `self.get_level_values(0).get_indexer_for(target)`. 

2791 """ 

2792 lev = self.levels[0] 

2793 codes = self._codes[0] 

2794 cat = Categorical.from_codes(codes=codes, categories=lev, validate=False) 

2795 ci = Index(cat) 

2796 return ci.get_indexer_for(target) 

2797 

2798 def get_slice_bound( 

2799 self, 

2800 label: Hashable | Sequence[Hashable], 

2801 side: Literal["left", "right"], 

2802 ) -> int: 

2803 """ 

2804 For an ordered MultiIndex, compute slice bound 

2805 that corresponds to given label. 

2806 

2807 Returns leftmost (one-past-the-rightmost if `side=='right') position 

2808 of given label. 

2809 

2810 Parameters 

2811 ---------- 

2812 label : object or tuple of objects 

2813 side : {'left', 'right'} 

2814 

2815 Returns 

2816 ------- 

2817 int 

2818 Index of label. 

2819 

2820 Notes 

2821 ----- 

2822 This method only works if level 0 index of the MultiIndex is lexsorted. 

2823 

2824 Examples 

2825 -------- 

2826 >>> mi = pd.MultiIndex.from_arrays([list('abbc'), list('gefd')]) 

2827 

2828 Get the locations from the leftmost 'b' in the first level 

2829 until the end of the multiindex: 

2830 

2831 >>> mi.get_slice_bound('b', side="left") 

2832 1 

2833 

2834 Like above, but if you get the locations from the rightmost 

2835 'b' in the first level and 'f' in the second level: 

2836 

2837 >>> mi.get_slice_bound(('b','f'), side="right") 

2838 3 

2839 

2840 See Also 

2841 -------- 

2842 MultiIndex.get_loc : Get location for a label or a tuple of labels. 

2843 MultiIndex.get_locs : Get location for a label/slice/list/mask or a 

2844 sequence of such. 

2845 """ 

2846 if not isinstance(label, tuple): 

2847 label = (label,) 

2848 return self._partial_tup_index(label, side=side) 

2849 

2850 # pylint: disable-next=useless-parent-delegation 

2851 def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]: 

2852 """ 

2853 For an ordered MultiIndex, compute the slice locations for input 

2854 labels. 

2855 

2856 The input labels can be tuples representing partial levels, e.g. for a 

2857 MultiIndex with 3 levels, you can pass a single value (corresponding to 

2858 the first level), or a 1-, 2-, or 3-tuple. 

2859 

2860 Parameters 

2861 ---------- 

2862 start : label or tuple, default None 

2863 If None, defaults to the beginning 

2864 end : label or tuple 

2865 If None, defaults to the end 

2866 step : int or None 

2867 Slice step 

2868 

2869 Returns 

2870 ------- 

2871 (start, end) : (int, int) 

2872 

2873 Notes 

2874 ----- 

2875 This method only works if the MultiIndex is properly lexsorted. So, 

2876 if only the first 2 levels of a 3-level MultiIndex are lexsorted, 

2877 you can only pass two levels to ``.slice_locs``. 

2878 

2879 Examples 

2880 -------- 

2881 >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')], 

2882 ... names=['A', 'B']) 

2883 

2884 Get the slice locations from the beginning of 'b' in the first level 

2885 until the end of the multiindex: 

2886 

2887 >>> mi.slice_locs(start='b') 

2888 (1, 4) 

2889 

2890 Like above, but stop at the end of 'b' in the first level and 'f' in 

2891 the second level: 

2892 

2893 >>> mi.slice_locs(start='b', end=('b', 'f')) 

2894 (1, 3) 

2895 

2896 See Also 

2897 -------- 

2898 MultiIndex.get_loc : Get location for a label or a tuple of labels. 

2899 MultiIndex.get_locs : Get location for a label/slice/list/mask or a 

2900 sequence of such. 

2901 """ 

2902 # This function adds nothing to its parent implementation (the magic 

2903 # happens in get_slice_bound method), but it adds meaningful doc. 

2904 return super().slice_locs(start, end, step) 

2905 

2906 def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left"): 

2907 if len(tup) > self._lexsort_depth: 

2908 raise UnsortedIndexError( 

2909 f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth " 

2910 f"({self._lexsort_depth})" 

2911 ) 

2912 

2913 n = len(tup) 

2914 start, end = 0, len(self) 

2915 zipped = zip(tup, self.levels, self.codes) 

2916 for k, (lab, lev, level_codes) in enumerate(zipped): 

2917 section = level_codes[start:end] 

2918 

2919 loc: npt.NDArray[np.intp] | np.intp | int 

2920 if lab not in lev and not isna(lab): 

2921 # short circuit 

2922 try: 

2923 loc = algos.searchsorted(lev, lab, side=side) 

2924 except TypeError as err: 

2925 # non-comparable e.g. test_slice_locs_with_type_mismatch 

2926 raise TypeError(f"Level type mismatch: {lab}") from err 

2927 if not is_integer(loc): 

2928 # non-comparable level, e.g. test_groupby_example 

2929 raise TypeError(f"Level type mismatch: {lab}") 

2930 if side == "right" and loc >= 0: 

2931 loc -= 1 

2932 return start + algos.searchsorted(section, loc, side=side) 

2933 

2934 idx = self._get_loc_single_level_index(lev, lab) 

2935 if isinstance(idx, slice) and k < n - 1: 

2936 # Get start and end value from slice, necessary when a non-integer 

2937 # interval is given as input GH#37707 

2938 start = idx.start 

2939 end = idx.stop 

2940 elif k < n - 1: 

2941 # error: Incompatible types in assignment (expression has type 

2942 # "Union[ndarray[Any, dtype[signedinteger[Any]]] 

2943 end = start + algos.searchsorted( # type: ignore[assignment] 

2944 section, idx, side="right" 

2945 ) 

2946 # error: Incompatible types in assignment (expression has type 

2947 # "Union[ndarray[Any, dtype[signedinteger[Any]]] 

2948 start = start + algos.searchsorted( # type: ignore[assignment] 

2949 section, idx, side="left" 

2950 ) 

2951 elif isinstance(idx, slice): 

2952 idx = idx.start 

2953 return start + algos.searchsorted(section, idx, side=side) 

2954 else: 

2955 return start + algos.searchsorted(section, idx, side=side) 

2956 

2957 def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int: 

2958 """ 

2959 If key is NA value, location of index unify as -1. 

2960 

2961 Parameters 

2962 ---------- 

2963 level_index: Index 

2964 key : label 

2965 

2966 Returns 

2967 ------- 

2968 loc : int 

2969 If key is NA value, loc is -1 

2970 Else, location of key in index. 

2971 

2972 See Also 

2973 -------- 

2974 Index.get_loc : The get_loc method for (single-level) index. 

2975 """ 

2976 if is_scalar(key) and isna(key): 

2977 # TODO: need is_valid_na_for_dtype(key, level_index.dtype) 

2978 return -1 

2979 else: 

2980 return level_index.get_loc(key) 

2981 

2982 def get_loc(self, key): 

2983 """ 

2984 Get location for a label or a tuple of labels. 

2985 

2986 The location is returned as an integer/slice or boolean 

2987 mask. 

2988 

2989 Parameters 

2990 ---------- 

2991 key : label or tuple of labels (one for each level) 

2992 

2993 Returns 

2994 ------- 

2995 int, slice object or boolean mask 

2996 If the key is past the lexsort depth, the return may be a 

2997 boolean mask array, otherwise it is always a slice or int. 

2998 

2999 See Also 

3000 -------- 

3001 Index.get_loc : The get_loc method for (single-level) index. 

3002 MultiIndex.slice_locs : Get slice location given start label(s) and 

3003 end label(s). 

3004 MultiIndex.get_locs : Get location for a label/slice/list/mask or a 

3005 sequence of such. 

3006 

3007 Notes 

3008 ----- 

3009 The key cannot be a slice, list of same-level labels, a boolean mask, 

3010 or a sequence of such. If you want to use those, use 

3011 :meth:`MultiIndex.get_locs` instead. 

3012 

3013 Examples 

3014 -------- 

3015 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')]) 

3016 

3017 >>> mi.get_loc('b') 

3018 slice(1, 3, None) 

3019 

3020 >>> mi.get_loc(('b', 'e')) 

3021 1 

3022 """ 

3023 self._check_indexing_error(key) 

3024 

3025 def _maybe_to_slice(loc): 

3026 """convert integer indexer to boolean mask or slice if possible""" 

3027 if not isinstance(loc, np.ndarray) or loc.dtype != np.intp: 

3028 return loc 

3029 

3030 loc = lib.maybe_indices_to_slice(loc, len(self)) 

3031 if isinstance(loc, slice): 

3032 return loc 

3033 

3034 mask = np.empty(len(self), dtype="bool") 

3035 mask.fill(False) 

3036 mask[loc] = True 

3037 return mask 

3038 

3039 if not isinstance(key, tuple): 

3040 loc = self._get_level_indexer(key, level=0) 

3041 return _maybe_to_slice(loc) 

3042 

3043 keylen = len(key) 

3044 if self.nlevels < keylen: 

3045 raise KeyError( 

3046 f"Key length ({keylen}) exceeds index depth ({self.nlevels})" 

3047 ) 

3048 

3049 if keylen == self.nlevels and self.is_unique: 

3050 # TODO: what if we have an IntervalIndex level? 

3051 # i.e. do we need _index_as_unique on that level? 

3052 try: 

3053 return self._engine.get_loc(key) 

3054 except KeyError as err: 

3055 raise KeyError(key) from err 

3056 except TypeError: 

3057 # e.g. test_partial_slicing_with_multiindex partial string slicing 

3058 loc, _ = self.get_loc_level(key, list(range(self.nlevels))) 

3059 return loc 

3060 

3061 # -- partial selection or non-unique index 

3062 # break the key into 2 parts based on the lexsort_depth of the index; 

3063 # the first part returns a continuous slice of the index; the 2nd part 

3064 # needs linear search within the slice 

3065 i = self._lexsort_depth 

3066 lead_key, follow_key = key[:i], key[i:] 

3067 

3068 if not lead_key: 

3069 start = 0 

3070 stop = len(self) 

3071 else: 

3072 try: 

3073 start, stop = self.slice_locs(lead_key, lead_key) 

3074 except TypeError as err: 

3075 # e.g. test_groupby_example key = ((0, 0, 1, 2), "new_col") 

3076 # when self has 5 integer levels 

3077 raise KeyError(key) from err 

3078 

3079 if start == stop: 

3080 raise KeyError(key) 

3081 

3082 if not follow_key: 

3083 return slice(start, stop) 

3084 

3085 warnings.warn( 

3086 "indexing past lexsort depth may impact performance.", 

3087 PerformanceWarning, 

3088 stacklevel=find_stack_level(), 

3089 ) 

3090 

3091 loc = np.arange(start, stop, dtype=np.intp) 

3092 

3093 for i, k in enumerate(follow_key, len(lead_key)): 

3094 mask = self.codes[i][loc] == self._get_loc_single_level_index( 

3095 self.levels[i], k 

3096 ) 

3097 if not mask.all(): 

3098 loc = loc[mask] 

3099 if not len(loc): 

3100 raise KeyError(key) 

3101 

3102 return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop) 

3103 

3104 def get_loc_level(self, key, level: IndexLabel = 0, drop_level: bool = True): 

3105 """ 

3106 Get location and sliced index for requested label(s)/level(s). 

3107 

3108 Parameters 

3109 ---------- 

3110 key : label or sequence of labels 

3111 level : int/level name or list thereof, optional 

3112 drop_level : bool, default True 

3113 If ``False``, the resulting index will not drop any level. 

3114 

3115 Returns 

3116 ------- 

3117 tuple 

3118 A 2-tuple where the elements : 

3119 

3120 Element 0: int, slice object or boolean array. 

3121 

3122 Element 1: The resulting sliced multiindex/index. If the key 

3123 contains all levels, this will be ``None``. 

3124 

3125 See Also 

3126 -------- 

3127 MultiIndex.get_loc : Get location for a label or a tuple of labels. 

3128 MultiIndex.get_locs : Get location for a label/slice/list/mask or a 

3129 sequence of such. 

3130 

3131 Examples 

3132 -------- 

3133 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')], 

3134 ... names=['A', 'B']) 

3135 

3136 >>> mi.get_loc_level('b') 

3137 (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B')) 

3138 

3139 >>> mi.get_loc_level('e', level='B') 

3140 (array([False, True, False]), Index(['b'], dtype='object', name='A')) 

3141 

3142 >>> mi.get_loc_level(['b', 'e']) 

3143 (1, None) 

3144 """ 

3145 if not isinstance(level, (list, tuple)): 

3146 level = self._get_level_number(level) 

3147 else: 

3148 level = [self._get_level_number(lev) for lev in level] 

3149 

3150 loc, mi = self._get_loc_level(key, level=level) 

3151 if not drop_level: 

3152 if lib.is_integer(loc): 

3153 # Slice index must be an integer or None 

3154 mi = self[loc : loc + 1] 

3155 else: 

3156 mi = self[loc] 

3157 return loc, mi 

3158 

3159 def _get_loc_level(self, key, level: int | list[int] = 0): 

3160 """ 

3161 get_loc_level but with `level` known to be positional, not name-based. 

3162 """ 

3163 

3164 # different name to distinguish from maybe_droplevels 

3165 def maybe_mi_droplevels(indexer, levels): 

3166 """ 

3167 If level does not exist or all levels were dropped, the exception 

3168 has to be handled outside. 

3169 """ 

3170 new_index = self[indexer] 

3171 

3172 for i in sorted(levels, reverse=True): 

3173 new_index = new_index._drop_level_numbers([i]) 

3174 

3175 return new_index 

3176 

3177 if isinstance(level, (tuple, list)): 

3178 if len(key) != len(level): 

3179 raise AssertionError( 

3180 "Key for location must have same length as number of levels" 

3181 ) 

3182 result = None 

3183 for lev, k in zip(level, key): 

3184 loc, new_index = self._get_loc_level(k, level=lev) 

3185 if isinstance(loc, slice): 

3186 mask = np.zeros(len(self), dtype=bool) 

3187 mask[loc] = True 

3188 loc = mask 

3189 result = loc if result is None else result & loc 

3190 

3191 try: 

3192 # FIXME: we should be only dropping levels on which we are 

3193 # scalar-indexing 

3194 mi = maybe_mi_droplevels(result, level) 

3195 except ValueError: 

3196 # droplevel failed because we tried to drop all levels, 

3197 # i.e. len(level) == self.nlevels 

3198 mi = self[result] 

3199 

3200 return result, mi 

3201 

3202 # kludge for #1796 

3203 if isinstance(key, list): 

3204 key = tuple(key) 

3205 

3206 if isinstance(key, tuple) and level == 0: 

3207 try: 

3208 # Check if this tuple is a single key in our first level 

3209 if key in self.levels[0]: 

3210 indexer = self._get_level_indexer(key, level=level) 

3211 new_index = maybe_mi_droplevels(indexer, [0]) 

3212 return indexer, new_index 

3213 except (TypeError, InvalidIndexError): 

3214 pass 

3215 

3216 if not any(isinstance(k, slice) for k in key): 

3217 if len(key) == self.nlevels and self.is_unique: 

3218 # Complete key in unique index -> standard get_loc 

3219 try: 

3220 return (self._engine.get_loc(key), None) 

3221 except KeyError as err: 

3222 raise KeyError(key) from err 

3223 except TypeError: 

3224 # e.g. partial string indexing 

3225 # test_partial_string_timestamp_multiindex 

3226 pass 

3227 

3228 # partial selection 

3229 indexer = self.get_loc(key) 

3230 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] 

3231 if len(ilevels) == self.nlevels: 

3232 if is_integer(indexer): 

3233 # we are dropping all levels 

3234 return indexer, None 

3235 

3236 # TODO: in some cases we still need to drop some levels, 

3237 # e.g. test_multiindex_perf_warn 

3238 # test_partial_string_timestamp_multiindex 

3239 ilevels = [ 

3240 i 

3241 for i in range(len(key)) 

3242 if ( 

3243 not isinstance(key[i], str) 

3244 or not self.levels[i]._supports_partial_string_indexing 

3245 ) 

3246 and key[i] != slice(None, None) 

3247 ] 

3248 if len(ilevels) == self.nlevels: 

3249 # TODO: why? 

3250 ilevels = [] 

3251 return indexer, maybe_mi_droplevels(indexer, ilevels) 

3252 

3253 else: 

3254 indexer = None 

3255 for i, k in enumerate(key): 

3256 if not isinstance(k, slice): 

3257 loc_level = self._get_level_indexer(k, level=i) 

3258 if isinstance(loc_level, slice): 

3259 if com.is_null_slice(loc_level) or com.is_full_slice( 

3260 loc_level, len(self) 

3261 ): 

3262 # everything 

3263 continue 

3264 

3265 # e.g. test_xs_IndexSlice_argument_not_implemented 

3266 k_index = np.zeros(len(self), dtype=bool) 

3267 k_index[loc_level] = True 

3268 

3269 else: 

3270 k_index = loc_level 

3271 

3272 elif com.is_null_slice(k): 

3273 # taking everything, does not affect `indexer` below 

3274 continue 

3275 

3276 else: 

3277 # FIXME: this message can be inaccurate, e.g. 

3278 # test_series_varied_multiindex_alignment 

3279 raise TypeError(f"Expected label or tuple of labels, got {key}") 

3280 

3281 if indexer is None: 

3282 indexer = k_index 

3283 else: 

3284 indexer &= k_index 

3285 if indexer is None: 

3286 indexer = slice(None, None) 

3287 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)] 

3288 return indexer, maybe_mi_droplevels(indexer, ilevels) 

3289 else: 

3290 indexer = self._get_level_indexer(key, level=level) 

3291 if ( 

3292 isinstance(key, str) 

3293 and self.levels[level]._supports_partial_string_indexing 

3294 ): 

3295 # check to see if we did an exact lookup vs sliced 

3296 check = self.levels[level].get_loc(key) 

3297 if not is_integer(check): 

3298 # e.g. test_partial_string_timestamp_multiindex 

3299 return indexer, self[indexer] 

3300 

3301 try: 

3302 result_index = maybe_mi_droplevels(indexer, [level]) 

3303 except ValueError: 

3304 result_index = self[indexer] 

3305 

3306 return indexer, result_index 

3307 

3308 def _get_level_indexer( 

3309 self, key, level: int = 0, indexer: npt.NDArray[np.bool_] | None = None 

3310 ): 

3311 # `level` kwarg is _always_ positional, never name 

3312 # return a boolean array or slice showing where the key is 

3313 # in the totality of values 

3314 # if the indexer is provided, then use this 

3315 

3316 level_index = self.levels[level] 

3317 level_codes = self.codes[level] 

3318 

3319 def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes): 

3320 # Compute a bool indexer to identify the positions to take. 

3321 # If we have an existing indexer, we only need to examine the 

3322 # subset of positions where the existing indexer is True. 

3323 if indexer is not None: 

3324 # we only need to look at the subset of codes where the 

3325 # existing indexer equals True 

3326 codes = codes[indexer] 

3327 

3328 if step is None or step == 1: 

3329 new_indexer = (codes >= start) & (codes < stop) 

3330 else: 

3331 r = np.arange(start, stop, step, dtype=codes.dtype) 

3332 new_indexer = algos.isin(codes, r) 

3333 

3334 if indexer is None: 

3335 return new_indexer 

3336 

3337 indexer = indexer.copy() 

3338 indexer[indexer] = new_indexer 

3339 return indexer 

3340 

3341 if isinstance(key, slice): 

3342 # handle a slice, returning a slice if we can 

3343 # otherwise a boolean indexer 

3344 step = key.step 

3345 is_negative_step = step is not None and step < 0 

3346 

3347 try: 

3348 if key.start is not None: 

3349 start = level_index.get_loc(key.start) 

3350 elif is_negative_step: 

3351 start = len(level_index) - 1 

3352 else: 

3353 start = 0 

3354 

3355 if key.stop is not None: 

3356 stop = level_index.get_loc(key.stop) 

3357 elif is_negative_step: 

3358 stop = 0 

3359 elif isinstance(start, slice): 

3360 stop = len(level_index) 

3361 else: 

3362 stop = len(level_index) - 1 

3363 except KeyError: 

3364 # we have a partial slice (like looking up a partial date 

3365 # string) 

3366 start = stop = level_index.slice_indexer(key.start, key.stop, key.step) 

3367 step = start.step 

3368 

3369 if isinstance(start, slice) or isinstance(stop, slice): 

3370 # we have a slice for start and/or stop 

3371 # a partial date slicer on a DatetimeIndex generates a slice 

3372 # note that the stop ALREADY includes the stopped point (if 

3373 # it was a string sliced) 

3374 start = getattr(start, "start", start) 

3375 stop = getattr(stop, "stop", stop) 

3376 return convert_indexer(start, stop, step) 

3377 

3378 elif level > 0 or self._lexsort_depth == 0 or step is not None: 

3379 # need to have like semantics here to right 

3380 # searching as when we are using a slice 

3381 # so adjust the stop by 1 (so we include stop) 

3382 stop = (stop - 1) if is_negative_step else (stop + 1) 

3383 return convert_indexer(start, stop, step) 

3384 else: 

3385 # sorted, so can return slice object -> view 

3386 i = algos.searchsorted(level_codes, start, side="left") 

3387 j = algos.searchsorted(level_codes, stop, side="right") 

3388 return slice(i, j, step) 

3389 

3390 else: 

3391 idx = self._get_loc_single_level_index(level_index, key) 

3392 

3393 if level > 0 or self._lexsort_depth == 0: 

3394 # Desired level is not sorted 

3395 if isinstance(idx, slice): 

3396 # test_get_loc_partial_timestamp_multiindex 

3397 locs = (level_codes >= idx.start) & (level_codes < idx.stop) 

3398 return locs 

3399 

3400 locs = np.asarray(level_codes == idx, dtype=bool) 

3401 

3402 if not locs.any(): 

3403 # The label is present in self.levels[level] but unused: 

3404 raise KeyError(key) 

3405 return locs 

3406 

3407 if isinstance(idx, slice): 

3408 # e.g. test_partial_string_timestamp_multiindex 

3409 start = algos.searchsorted(level_codes, idx.start, side="left") 

3410 # NB: "left" here bc of slice semantics 

3411 end = algos.searchsorted(level_codes, idx.stop, side="left") 

3412 else: 

3413 start = algos.searchsorted(level_codes, idx, side="left") 

3414 end = algos.searchsorted(level_codes, idx, side="right") 

3415 

3416 if start == end: 

3417 # The label is present in self.levels[level] but unused: 

3418 raise KeyError(key) 

3419 return slice(start, end) 

3420 

3421 def get_locs(self, seq) -> npt.NDArray[np.intp]: 

3422 """ 

3423 Get location for a sequence of labels. 

3424 

3425 Parameters 

3426 ---------- 

3427 seq : label, slice, list, mask or a sequence of such 

3428 You should use one of the above for each level. 

3429 If a level should not be used, set it to ``slice(None)``. 

3430 

3431 Returns 

3432 ------- 

3433 numpy.ndarray 

3434 NumPy array of integers suitable for passing to iloc. 

3435 

3436 See Also 

3437 -------- 

3438 MultiIndex.get_loc : Get location for a label or a tuple of labels. 

3439 MultiIndex.slice_locs : Get slice location given start label(s) and 

3440 end label(s). 

3441 

3442 Examples 

3443 -------- 

3444 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')]) 

3445 

3446 >>> mi.get_locs('b') # doctest: +SKIP 

3447 array([1, 2], dtype=int64) 

3448 

3449 >>> mi.get_locs([slice(None), ['e', 'f']]) # doctest: +SKIP 

3450 array([1, 2], dtype=int64) 

3451 

3452 >>> mi.get_locs([[True, False, True], slice('e', 'f')]) # doctest: +SKIP 

3453 array([2], dtype=int64) 

3454 """ 

3455 

3456 # must be lexsorted to at least as many levels 

3457 true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s] 

3458 if true_slices and true_slices[-1] >= self._lexsort_depth: 

3459 raise UnsortedIndexError( 

3460 "MultiIndex slicing requires the index to be lexsorted: slicing " 

3461 f"on levels {true_slices}, lexsort depth {self._lexsort_depth}" 

3462 ) 

3463 

3464 if any(x is Ellipsis for x in seq): 

3465 raise NotImplementedError( 

3466 "MultiIndex does not support indexing with Ellipsis" 

3467 ) 

3468 

3469 n = len(self) 

3470 

3471 def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]: 

3472 if isinstance(indexer, slice): 

3473 new_indexer = np.zeros(n, dtype=np.bool_) 

3474 new_indexer[indexer] = True 

3475 return new_indexer 

3476 return indexer 

3477 

3478 # a bool indexer for the positions we want to take 

3479 indexer: npt.NDArray[np.bool_] | None = None 

3480 

3481 for i, k in enumerate(seq): 

3482 lvl_indexer: npt.NDArray[np.bool_] | slice | None = None 

3483 

3484 if com.is_bool_indexer(k): 

3485 if len(k) != n: 

3486 raise ValueError( 

3487 "cannot index with a boolean indexer that " 

3488 "is not the same length as the index" 

3489 ) 

3490 lvl_indexer = np.asarray(k) 

3491 if indexer is None: 

3492 lvl_indexer = lvl_indexer.copy() 

3493 

3494 elif is_list_like(k): 

3495 # a collection of labels to include from this level (these are or'd) 

3496 

3497 # GH#27591 check if this is a single tuple key in the level 

3498 try: 

3499 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer) 

3500 except (InvalidIndexError, TypeError, KeyError) as err: 

3501 # InvalidIndexError e.g. non-hashable, fall back to treating 

3502 # this as a sequence of labels 

3503 # KeyError it can be ambiguous if this is a label or sequence 

3504 # of labels 

3505 # github.com/pandas-dev/pandas/issues/39424#issuecomment-871626708 

3506 for x in k: 

3507 if not is_hashable(x): 

3508 # e.g. slice 

3509 raise err 

3510 # GH 39424: Ignore not founds 

3511 # GH 42351: No longer ignore not founds & enforced in 2.0 

3512 # TODO: how to handle IntervalIndex level? (no test cases) 

3513 item_indexer = self._get_level_indexer( 

3514 x, level=i, indexer=indexer 

3515 ) 

3516 if lvl_indexer is None: 

3517 lvl_indexer = _to_bool_indexer(item_indexer) 

3518 elif isinstance(item_indexer, slice): 

3519 lvl_indexer[item_indexer] = True # type: ignore[index] 

3520 else: 

3521 lvl_indexer |= item_indexer 

3522 

3523 if lvl_indexer is None: 

3524 # no matches we are done 

3525 # test_loc_getitem_duplicates_multiindex_empty_indexer 

3526 return np.array([], dtype=np.intp) 

3527 

3528 elif com.is_null_slice(k): 

3529 # empty slice 

3530 if indexer is None and i == len(seq) - 1: 

3531 return np.arange(n, dtype=np.intp) 

3532 continue 

3533 

3534 else: 

3535 # a slice or a single label 

3536 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer) 

3537 

3538 # update indexer 

3539 lvl_indexer = _to_bool_indexer(lvl_indexer) 

3540 if indexer is None: 

3541 indexer = lvl_indexer 

3542 else: 

3543 indexer &= lvl_indexer 

3544 if not np.any(indexer) and np.any(lvl_indexer): 

3545 raise KeyError(seq) 

3546 

3547 # empty indexer 

3548 if indexer is None: 

3549 return np.array([], dtype=np.intp) 

3550 

3551 pos_indexer = indexer.nonzero()[0] 

3552 return self._reorder_indexer(seq, pos_indexer) 

3553 

3554 # -------------------------------------------------------------------- 

3555 

3556 def _reorder_indexer( 

3557 self, 

3558 seq: tuple[Scalar | Iterable | AnyArrayLike, ...], 

3559 indexer: npt.NDArray[np.intp], 

3560 ) -> npt.NDArray[np.intp]: 

3561 """ 

3562 Reorder an indexer of a MultiIndex (self) so that the labels are in the 

3563 same order as given in seq 

3564 

3565 Parameters 

3566 ---------- 

3567 seq : label/slice/list/mask or a sequence of such 

3568 indexer: a position indexer of self 

3569 

3570 Returns 

3571 ------- 

3572 indexer : a sorted position indexer of self ordered as seq 

3573 """ 

3574 

3575 # check if sorting is necessary 

3576 need_sort = False 

3577 for i, k in enumerate(seq): 

3578 if com.is_null_slice(k) or com.is_bool_indexer(k) or is_scalar(k): 

3579 pass 

3580 elif is_list_like(k): 

3581 if len(k) <= 1: # type: ignore[arg-type] 

3582 pass 

3583 elif self._is_lexsorted(): 

3584 # If the index is lexsorted and the list_like label 

3585 # in seq are sorted then we do not need to sort 

3586 k_codes = self.levels[i].get_indexer(k) 

3587 k_codes = k_codes[k_codes >= 0] # Filter absent keys 

3588 # True if the given codes are not ordered 

3589 need_sort = (k_codes[:-1] > k_codes[1:]).any() 

3590 else: 

3591 need_sort = True 

3592 elif isinstance(k, slice): 

3593 if self._is_lexsorted(): 

3594 need_sort = k.step is not None and k.step < 0 

3595 else: 

3596 need_sort = True 

3597 else: 

3598 need_sort = True 

3599 if need_sort: 

3600 break 

3601 if not need_sort: 

3602 return indexer 

3603 

3604 n = len(self) 

3605 keys: tuple[np.ndarray, ...] = () 

3606 # For each level of the sequence in seq, map the level codes with the 

3607 # order they appears in a list-like sequence 

3608 # This mapping is then use to reorder the indexer 

3609 for i, k in enumerate(seq): 

3610 if is_scalar(k): 

3611 # GH#34603 we want to treat a scalar the same as an all equal list 

3612 k = [k] 

3613 if com.is_bool_indexer(k): 

3614 new_order = np.arange(n)[indexer] 

3615 elif is_list_like(k): 

3616 # Generate a map with all level codes as sorted initially 

3617 if not isinstance(k, (np.ndarray, ExtensionArray, Index, ABCSeries)): 

3618 k = sanitize_array(k, None) 

3619 k = algos.unique(k) 

3620 key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len( 

3621 self.levels[i] 

3622 ) 

3623 # Set order as given in the indexer list 

3624 level_indexer = self.levels[i].get_indexer(k) 

3625 level_indexer = level_indexer[level_indexer >= 0] # Filter absent keys 

3626 key_order_map[level_indexer] = np.arange(len(level_indexer)) 

3627 

3628 new_order = key_order_map[self.codes[i][indexer]] 

3629 elif isinstance(k, slice) and k.step is not None and k.step < 0: 

3630 # flip order for negative step 

3631 new_order = np.arange(n)[::-1][indexer] 

3632 elif isinstance(k, slice) and k.start is None and k.stop is None: 

3633 # slice(None) should not determine order GH#31330 

3634 new_order = np.ones((n,), dtype=np.intp)[indexer] 

3635 else: 

3636 # For all other case, use the same order as the level 

3637 new_order = np.arange(n)[indexer] 

3638 keys = (new_order,) + keys 

3639 

3640 # Find the reordering using lexsort on the keys mapping 

3641 ind = np.lexsort(keys) 

3642 return indexer[ind] 

3643 

3644 def truncate(self, before=None, after=None) -> MultiIndex: 

3645 """ 

3646 Slice index between two labels / tuples, return new MultiIndex. 

3647 

3648 Parameters 

3649 ---------- 

3650 before : label or tuple, can be partial. Default None 

3651 None defaults to start. 

3652 after : label or tuple, can be partial. Default None 

3653 None defaults to end. 

3654 

3655 Returns 

3656 ------- 

3657 MultiIndex 

3658 The truncated MultiIndex. 

3659 

3660 Examples 

3661 -------- 

3662 >>> mi = pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['x', 'y', 'z']]) 

3663 >>> mi 

3664 MultiIndex([('a', 'x'), ('b', 'y'), ('c', 'z')], 

3665 ) 

3666 >>> mi.truncate(before='a', after='b') 

3667 MultiIndex([('a', 'x'), ('b', 'y')], 

3668 ) 

3669 """ 

3670 if after and before and after < before: 

3671 raise ValueError("after < before") 

3672 

3673 i, j = self.levels[0].slice_locs(before, after) 

3674 left, right = self.slice_locs(before, after) 

3675 

3676 new_levels = list(self.levels) 

3677 new_levels[0] = new_levels[0][i:j] 

3678 

3679 new_codes = [level_codes[left:right] for level_codes in self.codes] 

3680 new_codes[0] = new_codes[0] - i 

3681 

3682 return MultiIndex( 

3683 levels=new_levels, 

3684 codes=new_codes, 

3685 names=self._names, 

3686 verify_integrity=False, 

3687 ) 

3688 

3689 def equals(self, other: object) -> bool: 

3690 """ 

3691 Determines if two MultiIndex objects have the same labeling information 

3692 (the levels themselves do not necessarily have to be the same) 

3693 

3694 See Also 

3695 -------- 

3696 equal_levels 

3697 """ 

3698 if self.is_(other): 

3699 return True 

3700 

3701 if not isinstance(other, Index): 

3702 return False 

3703 

3704 if len(self) != len(other): 

3705 return False 

3706 

3707 if not isinstance(other, MultiIndex): 

3708 # d-level MultiIndex can equal d-tuple Index 

3709 if not self._should_compare(other): 

3710 # object Index or Categorical[object] may contain tuples 

3711 return False 

3712 return array_equivalent(self._values, other._values) 

3713 

3714 if self.nlevels != other.nlevels: 

3715 return False 

3716 

3717 for i in range(self.nlevels): 

3718 self_codes = self.codes[i] 

3719 other_codes = other.codes[i] 

3720 self_mask = self_codes == -1 

3721 other_mask = other_codes == -1 

3722 if not np.array_equal(self_mask, other_mask): 

3723 return False 

3724 self_codes = self_codes[~self_mask] 

3725 self_values = self.levels[i]._values.take(self_codes) 

3726 

3727 other_codes = other_codes[~other_mask] 

3728 other_values = other.levels[i]._values.take(other_codes) 

3729 

3730 # since we use NaT both datetime64 and timedelta64 we can have a 

3731 # situation where a level is typed say timedelta64 in self (IOW it 

3732 # has other values than NaT) but types datetime64 in other (where 

3733 # its all NaT) but these are equivalent 

3734 if len(self_values) == 0 and len(other_values) == 0: 

3735 continue 

3736 

3737 if not isinstance(self_values, np.ndarray): 

3738 # i.e. ExtensionArray 

3739 if not self_values.equals(other_values): 

3740 return False 

3741 elif not isinstance(other_values, np.ndarray): 

3742 # i.e. other is ExtensionArray 

3743 if not other_values.equals(self_values): 

3744 return False 

3745 else: 

3746 if not array_equivalent(self_values, other_values): 

3747 return False 

3748 

3749 return True 

3750 

3751 def equal_levels(self, other: MultiIndex) -> bool: 

3752 """ 

3753 Return True if the levels of both MultiIndex objects are the same 

3754 

3755 """ 

3756 if self.nlevels != other.nlevels: 

3757 return False 

3758 

3759 for i in range(self.nlevels): 

3760 if not self.levels[i].equals(other.levels[i]): 

3761 return False 

3762 return True 

3763 

3764 # -------------------------------------------------------------------- 

3765 # Set Methods 

3766 

3767 def _union(self, other, sort) -> MultiIndex: 

3768 other, result_names = self._convert_can_do_setop(other) 

3769 if other.has_duplicates: 

3770 # This is only necessary if other has dupes, 

3771 # otherwise difference is faster 

3772 result = super()._union(other, sort) 

3773 

3774 if isinstance(result, MultiIndex): 

3775 return result 

3776 return MultiIndex.from_arrays( 

3777 zip(*result), sortorder=None, names=result_names 

3778 ) 

3779 

3780 else: 

3781 right_missing = other.difference(self, sort=False) 

3782 if len(right_missing): 

3783 result = self.append(right_missing) 

3784 else: 

3785 result = self._get_reconciled_name_object(other) 

3786 

3787 if sort is not False: 

3788 try: 

3789 result = result.sort_values() 

3790 except TypeError: 

3791 if sort is True: 

3792 raise 

3793 warnings.warn( 

3794 "The values in the array are unorderable. " 

3795 "Pass `sort=False` to suppress this warning.", 

3796 RuntimeWarning, 

3797 stacklevel=find_stack_level(), 

3798 ) 

3799 return result 

3800 

3801 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: 

3802 return is_object_dtype(dtype) 

3803 

3804 def _get_reconciled_name_object(self, other) -> MultiIndex: 

3805 """ 

3806 If the result of a set operation will be self, 

3807 return self, unless the names change, in which 

3808 case make a shallow copy of self. 

3809 """ 

3810 names = self._maybe_match_names(other) 

3811 if self.names != names: 

3812 # error: Cannot determine type of "rename" 

3813 return self.rename(names) # type: ignore[has-type] 

3814 return self 

3815 

3816 def _maybe_match_names(self, other): 

3817 """ 

3818 Try to find common names to attach to the result of an operation between 

3819 a and b. Return a consensus list of names if they match at least partly 

3820 or list of None if they have completely different names. 

3821 """ 

3822 if len(self.names) != len(other.names): 

3823 return [None] * len(self.names) 

3824 names = [] 

3825 for a_name, b_name in zip(self.names, other.names): 

3826 if a_name == b_name: 

3827 names.append(a_name) 

3828 else: 

3829 # TODO: what if they both have np.nan for their names? 

3830 names.append(None) 

3831 return names 

3832 

3833 def _wrap_intersection_result(self, other, result) -> MultiIndex: 

3834 _, result_names = self._convert_can_do_setop(other) 

3835 return result.set_names(result_names) 

3836 

3837 def _wrap_difference_result(self, other, result: MultiIndex) -> MultiIndex: 

3838 _, result_names = self._convert_can_do_setop(other) 

3839 

3840 if len(result) == 0: 

3841 return result.remove_unused_levels().set_names(result_names) 

3842 else: 

3843 return result.set_names(result_names) 

3844 

3845 def _convert_can_do_setop(self, other): 

3846 result_names = self.names 

3847 

3848 if not isinstance(other, Index): 

3849 if len(other) == 0: 

3850 return self[:0], self.names 

3851 else: 

3852 msg = "other must be a MultiIndex or a list of tuples" 

3853 try: 

3854 other = MultiIndex.from_tuples(other, names=self.names) 

3855 except (ValueError, TypeError) as err: 

3856 # ValueError raised by tuples_to_object_array if we 

3857 # have non-object dtype 

3858 raise TypeError(msg) from err 

3859 else: 

3860 result_names = get_unanimous_names(self, other) 

3861 

3862 return other, result_names 

3863 

3864 # -------------------------------------------------------------------- 

3865 

3866 @doc(Index.astype) 

3867 def astype(self, dtype, copy: bool = True): 

3868 dtype = pandas_dtype(dtype) 

3869 if isinstance(dtype, CategoricalDtype): 

3870 msg = "> 1 ndim Categorical are not supported at this time" 

3871 raise NotImplementedError(msg) 

3872 if not is_object_dtype(dtype): 

3873 raise TypeError( 

3874 "Setting a MultiIndex dtype to anything other than object " 

3875 "is not supported" 

3876 ) 

3877 if copy is True: 

3878 return self._view() 

3879 return self 

3880 

3881 def _validate_fill_value(self, item): 

3882 if isinstance(item, MultiIndex): 

3883 # GH#43212 

3884 if item.nlevels != self.nlevels: 

3885 raise ValueError("Item must have length equal to number of levels.") 

3886 return item._values 

3887 elif not isinstance(item, tuple): 

3888 # Pad the key with empty strings if lower levels of the key 

3889 # aren't specified: 

3890 item = (item,) + ("",) * (self.nlevels - 1) 

3891 elif len(item) != self.nlevels: 

3892 raise ValueError("Item must have length equal to number of levels.") 

3893 return item 

3894 

3895 def putmask(self, mask, value: MultiIndex) -> MultiIndex: 

3896 """ 

3897 Return a new MultiIndex of the values set with the mask. 

3898 

3899 Parameters 

3900 ---------- 

3901 mask : array like 

3902 value : MultiIndex 

3903 Must either be the same length as self or length one 

3904 

3905 Returns 

3906 ------- 

3907 MultiIndex 

3908 """ 

3909 mask, noop = validate_putmask(self, mask) 

3910 if noop: 

3911 return self.copy() 

3912 

3913 if len(mask) == len(value): 

3914 subset = value[mask].remove_unused_levels() 

3915 else: 

3916 subset = value.remove_unused_levels() 

3917 

3918 new_levels = [] 

3919 new_codes = [] 

3920 

3921 for i, (value_level, level, level_codes) in enumerate( 

3922 zip(subset.levels, self.levels, self.codes) 

3923 ): 

3924 new_level = level.union(value_level, sort=False) 

3925 value_codes = new_level.get_indexer_for(subset.get_level_values(i)) 

3926 new_code = ensure_int64(level_codes) 

3927 new_code[mask] = value_codes 

3928 new_levels.append(new_level) 

3929 new_codes.append(new_code) 

3930 

3931 return MultiIndex( 

3932 levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False 

3933 ) 

3934 

3935 def insert(self, loc: int, item) -> MultiIndex: 

3936 """ 

3937 Make new MultiIndex inserting new item at location 

3938 

3939 Parameters 

3940 ---------- 

3941 loc : int 

3942 item : tuple 

3943 Must be same length as number of levels in the MultiIndex 

3944 

3945 Returns 

3946 ------- 

3947 new_index : Index 

3948 """ 

3949 item = self._validate_fill_value(item) 

3950 

3951 new_levels = [] 

3952 new_codes = [] 

3953 for k, level, level_codes in zip(item, self.levels, self.codes): 

3954 if k not in level: 

3955 # have to insert into level 

3956 # must insert at end otherwise you have to recompute all the 

3957 # other codes 

3958 lev_loc = len(level) 

3959 level = level.insert(lev_loc, k) 

3960 else: 

3961 lev_loc = level.get_loc(k) 

3962 

3963 new_levels.append(level) 

3964 new_codes.append(np.insert(ensure_int64(level_codes), loc, lev_loc)) 

3965 

3966 return MultiIndex( 

3967 levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False 

3968 ) 

3969 

3970 def delete(self, loc) -> MultiIndex: 

3971 """ 

3972 Make new index with passed location deleted 

3973 

3974 Returns 

3975 ------- 

3976 new_index : MultiIndex 

3977 """ 

3978 new_codes = [np.delete(level_codes, loc) for level_codes in self.codes] 

3979 return MultiIndex( 

3980 levels=self.levels, 

3981 codes=new_codes, 

3982 names=self.names, 

3983 verify_integrity=False, 

3984 ) 

3985 

3986 @doc(Index.isin) 

3987 def isin(self, values, level=None) -> npt.NDArray[np.bool_]: 

3988 if isinstance(values, Generator): 

3989 values = list(values) 

3990 

3991 if level is None: 

3992 if len(values) == 0: 

3993 return np.zeros((len(self),), dtype=np.bool_) 

3994 if not isinstance(values, MultiIndex): 

3995 values = MultiIndex.from_tuples(values) 

3996 return values.unique().get_indexer_for(self) != -1 

3997 else: 

3998 num = self._get_level_number(level) 

3999 levs = self.get_level_values(num) 

4000 

4001 if levs.size == 0: 

4002 return np.zeros(len(levs), dtype=np.bool_) 

4003 return levs.isin(values) 

4004 

4005 # error: Incompatible types in assignment (expression has type overloaded function, 

4006 # base class "Index" defined the type as "Callable[[Index, Any, bool], Any]") 

4007 rename = Index.set_names # type: ignore[assignment] 

4008 

4009 # --------------------------------------------------------------- 

4010 # Arithmetic/Numeric Methods - Disabled 

4011 

4012 __add__ = make_invalid_op("__add__") 

4013 __radd__ = make_invalid_op("__radd__") 

4014 __iadd__ = make_invalid_op("__iadd__") 

4015 __sub__ = make_invalid_op("__sub__") 

4016 __rsub__ = make_invalid_op("__rsub__") 

4017 __isub__ = make_invalid_op("__isub__") 

4018 __pow__ = make_invalid_op("__pow__") 

4019 __rpow__ = make_invalid_op("__rpow__") 

4020 __mul__ = make_invalid_op("__mul__") 

4021 __rmul__ = make_invalid_op("__rmul__") 

4022 __floordiv__ = make_invalid_op("__floordiv__") 

4023 __rfloordiv__ = make_invalid_op("__rfloordiv__") 

4024 __truediv__ = make_invalid_op("__truediv__") 

4025 __rtruediv__ = make_invalid_op("__rtruediv__") 

4026 __mod__ = make_invalid_op("__mod__") 

4027 __rmod__ = make_invalid_op("__rmod__") 

4028 __divmod__ = make_invalid_op("__divmod__") 

4029 __rdivmod__ = make_invalid_op("__rdivmod__") 

4030 # Unary methods disabled 

4031 __neg__ = make_invalid_op("__neg__") 

4032 __pos__ = make_invalid_op("__pos__") 

4033 __abs__ = make_invalid_op("__abs__") 

4034 __invert__ = make_invalid_op("__invert__") 

4035 

4036 

4037def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int: 

4038 """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted.""" 

4039 int64_codes = [ensure_int64(level_codes) for level_codes in codes] 

4040 for k in range(nlevels, 0, -1): 

4041 if libalgos.is_lexsorted(int64_codes[:k]): 

4042 return k 

4043 return 0 

4044 

4045 

4046def sparsify_labels(label_list, start: int = 0, sentinel: object = ""): 

4047 pivoted = list(zip(*label_list)) 

4048 k = len(label_list) 

4049 

4050 result = pivoted[: start + 1] 

4051 prev = pivoted[start] 

4052 

4053 for cur in pivoted[start + 1 :]: 

4054 sparse_cur = [] 

4055 

4056 for i, (p, t) in enumerate(zip(prev, cur)): 

4057 if i == k - 1: 

4058 sparse_cur.append(t) 

4059 # error: Argument 1 to "append" of "list" has incompatible 

4060 # type "list[Any]"; expected "tuple[Any, ...]" 

4061 result.append(sparse_cur) # type: ignore[arg-type] 

4062 break 

4063 

4064 if p == t: 

4065 sparse_cur.append(sentinel) 

4066 else: 

4067 sparse_cur.extend(cur[i:]) 

4068 # error: Argument 1 to "append" of "list" has incompatible 

4069 # type "list[Any]"; expected "tuple[Any, ...]" 

4070 result.append(sparse_cur) # type: ignore[arg-type] 

4071 break 

4072 

4073 prev = cur 

4074 

4075 return list(zip(*result)) 

4076 

4077 

4078def _get_na_rep(dtype: DtypeObj) -> str: 

4079 if isinstance(dtype, ExtensionDtype): 

4080 return f"{dtype.na_value}" 

4081 else: 

4082 dtype_type = dtype.type 

4083 

4084 return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype_type, "NaN") 

4085 

4086 

4087def maybe_droplevels(index: Index, key) -> Index: 

4088 """ 

4089 Attempt to drop level or levels from the given index. 

4090 

4091 Parameters 

4092 ---------- 

4093 index: Index 

4094 key : scalar or tuple 

4095 

4096 Returns 

4097 ------- 

4098 Index 

4099 """ 

4100 # drop levels 

4101 original_index = index 

4102 if isinstance(key, tuple): 

4103 # Caller is responsible for ensuring the key is not an entry in the first 

4104 # level of the MultiIndex. 

4105 for _ in key: 

4106 try: 

4107 index = index._drop_level_numbers([0]) 

4108 except ValueError: 

4109 # we have dropped too much, so back out 

4110 return original_index 

4111 else: 

4112 try: 

4113 index = index._drop_level_numbers([0]) 

4114 except ValueError: 

4115 pass 

4116 

4117 return index 

4118 

4119 

4120def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray: 

4121 """ 

4122 Coerce the array-like indexer to the smallest integer dtype that can encode all 

4123 of the given categories. 

4124 

4125 Parameters 

4126 ---------- 

4127 array_like : array-like 

4128 categories : array-like 

4129 copy : bool 

4130 

4131 Returns 

4132 ------- 

4133 np.ndarray 

4134 Non-writeable. 

4135 """ 

4136 array_like = coerce_indexer_dtype(array_like, categories) 

4137 if copy: 

4138 array_like = array_like.copy() 

4139 array_like.flags.writeable = False 

4140 return array_like 

4141 

4142 

4143def _require_listlike(level, arr, arrname: str): 

4144 """ 

4145 Ensure that level is either None or listlike, and arr is list-of-listlike. 

4146 """ 

4147 if level is not None and not is_list_like(level): 

4148 if not is_list_like(arr): 

4149 raise TypeError(f"{arrname} must be list-like") 

4150 if len(arr) > 0 and is_list_like(arr[0]): 

4151 raise TypeError(f"{arrname} must be list-like") 

4152 level = [level] 

4153 arr = [arr] 

4154 elif level is None or is_list_like(level): 

4155 if not is_list_like(arr) or not is_list_like(arr[0]): 

4156 raise TypeError(f"{arrname} must be list of lists-like") 

4157 return level, arr