Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/indexes/multi.py: 16%

1from __future__ import annotations

3from functools import wraps

4from sys import getsizeof

5from typing import (

6 TYPE_CHECKING,

7 Any,

8 Callable,

9 Collection,

10 Generator,

11 Hashable,

12 Iterable,

13 List,

14 Literal,

15 Sequence,

16 Tuple,

17 cast,

18)

19import warnings

21import numpy as np

23from pandas._config import get_option

25from pandas._libs import (

26 algos as libalgos,

27 index as libindex,

28 lib,

29)

30from pandas._libs.hashtable import duplicated

31from pandas._typing import (

32 AnyAll,

33 AnyArrayLike,

34 Axis,

35 DropKeep,

36 DtypeObj,

37 F,

38 IgnoreRaise,

39 IndexLabel,

40 Scalar,

41 Shape,

42 npt,

43)

44from pandas.compat.numpy import function as nv

45from pandas.errors import (

46 InvalidIndexError,

47 PerformanceWarning,

48 UnsortedIndexError,

49)

50from pandas.util._decorators import (

51 Appender,

52 cache_readonly,

53 doc,

54)

55from pandas.util._exceptions import find_stack_level

57from pandas.core.dtypes.cast import coerce_indexer_dtype

58from pandas.core.dtypes.common import (

59 ensure_int64,

60 ensure_platform_int,

61 is_categorical_dtype,

62 is_extension_array_dtype,

63 is_hashable,

64 is_integer,

65 is_iterator,

66 is_list_like,

67 is_object_dtype,

68 is_scalar,

69 pandas_dtype,

70)

71from pandas.core.dtypes.dtypes import ExtensionDtype

72from pandas.core.dtypes.generic import (

73 ABCDataFrame,

74 ABCDatetimeIndex,

75 ABCTimedeltaIndex,

76)

77from pandas.core.dtypes.missing import (

78 array_equivalent,

79 isna,

80)

82import pandas.core.algorithms as algos

83from pandas.core.array_algos.putmask import validate_putmask

84from pandas.core.arrays import Categorical

85from pandas.core.arrays.categorical import factorize_from_iterables

86import pandas.core.common as com

87import pandas.core.indexes.base as ibase

88from pandas.core.indexes.base import (

89 Index,

90 _index_shared_docs,

91 ensure_index,

92 get_unanimous_names,

93)

94from pandas.core.indexes.frozen import FrozenList

95from pandas.core.ops.invalid import make_invalid_op

96from pandas.core.sorting import (

97 get_group_index,

98 indexer_from_factorized,

99 lexsort_indexer,

100)

101

102from pandas.io.formats.printing import pprint_thing

103

104if TYPE_CHECKING:

105 from pandas import (

106 CategoricalIndex,

107 DataFrame,

108 Series,

109 )

110

111_index_doc_kwargs = dict(ibase._index_doc_kwargs)

112_index_doc_kwargs.update(

113 {"klass": "MultiIndex", "target_klass": "MultiIndex or list of tuples"}

114)

115

116

117class MultiIndexUIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.UInt64Engine):

118 """

119 This class manages a MultiIndex by mapping label combinations to positive

120 integers.

121 """

122

123 _base = libindex.UInt64Engine

124

125 def _codes_to_ints(self, codes):

126 """

127 Transform combination(s) of uint64 in one uint64 (each), in a strictly

128 monotonic way (i.e. respecting the lexicographic order of integer

129 combinations): see BaseMultiIndexCodesEngine documentation.

130

131 Parameters

132 ----------

133 codes : 1- or 2-dimensional array of dtype uint64

134 Combinations of integers (one per row)

135

136 Returns

137 -------

138 scalar or 1-dimensional array, of dtype uint64

139 Integer(s) representing one combination (each).

140 """

141 # Shift the representation of each level by the pre-calculated number

142 # of bits:

143 codes <<= self.offsets

144

145 # Now sum and OR are in fact interchangeable. This is a simple

146 # composition of the (disjunct) significant bits of each level (i.e.

147 # each column in "codes") in a single positive integer:

148 if codes.ndim == 1:

149 # Single key

150 return np.bitwise_or.reduce(codes)

151

152 # Multiple keys

153 return np.bitwise_or.reduce(codes, axis=1)

154

155

156class MultiIndexPyIntEngine(libindex.BaseMultiIndexCodesEngine, libindex.ObjectEngine):

157 """

158 This class manages those (extreme) cases in which the number of possible

159 label combinations overflows the 64 bits integers, and uses an ObjectEngine

160 containing Python integers.

161 """

162

163 _base = libindex.ObjectEngine

164

165 def _codes_to_ints(self, codes):

166 """

167 Transform combination(s) of uint64 in one Python integer (each), in a

168 strictly monotonic way (i.e. respecting the lexicographic order of

169 integer combinations): see BaseMultiIndexCodesEngine documentation.

170

171 Parameters

172 ----------

173 codes : 1- or 2-dimensional array of dtype uint64

174 Combinations of integers (one per row)

175

176 Returns

177 -------

178 int, or 1-dimensional array of dtype object

179 Integer(s) representing one combination (each).

180 """

181 # Shift the representation of each level by the pre-calculated number

182 # of bits. Since this can overflow uint64, first make sure we are

183 # working with Python integers:

184 codes = codes.astype("object") << self.offsets

185

186 # Now sum and OR are in fact interchangeable. This is a simple

187 # composition of the (disjunct) significant bits of each level (i.e.

188 # each column in "codes") in a single positive integer (per row):

189 if codes.ndim == 1:

190 # Single key

191 return np.bitwise_or.reduce(codes)

192

193 # Multiple keys

194 return np.bitwise_or.reduce(codes, axis=1)

195

196

197def names_compat(meth: F) -> F:

198 """

199 A decorator to allow either `name` or `names` keyword but not both.

200

201 This makes it easier to share code with base class.

202 """

203

204 @wraps(meth)

205 def new_meth(self_or_cls, *args, **kwargs):

206 if "name" in kwargs and "names" in kwargs:

207 raise TypeError("Can only provide one of `names` and `name`")

208 if "name" in kwargs:

209 kwargs["names"] = kwargs.pop("name")

210

211 return meth(self_or_cls, *args, **kwargs)

212

213 return cast(F, new_meth)

214

215

216class MultiIndex(Index):

217 """

218 A multi-level, or hierarchical, index object for pandas objects.

219

220 Parameters

221 ----------

222 levels : sequence of arrays

223 The unique labels for each level.

224 codes : sequence of arrays

225 Integers for each level designating which label at each location.

226 sortorder : optional int

227 Level of sortedness (must be lexicographically sorted by that

228 level).

229 names : optional sequence of objects

230 Names for each of the index levels. (name is accepted for compat).

231 copy : bool, default False

232 Copy the meta-data.

233 verify_integrity : bool, default True

234 Check that the levels/codes are consistent and valid.

235

236 Attributes

237 ----------

238 names

239 levels

240 codes

241 nlevels

242 levshape

243 dtypes

244

245 Methods

246 -------

247 from_arrays

248 from_tuples

249 from_product

250 from_frame

251 set_levels

252 set_codes

253 to_frame

254 to_flat_index

255 sortlevel

256 droplevel

257 swaplevel

258 reorder_levels

259 remove_unused_levels

260 get_level_values

261 get_indexer

262 get_loc

263 get_locs

264 get_loc_level

265 drop

266

267 See Also

268 --------

269 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.

270 MultiIndex.from_product : Create a MultiIndex from the cartesian product

271 of iterables.

272 MultiIndex.from_tuples : Convert list of tuples to a MultiIndex.

273 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.

274 Index : The base pandas Index type.

275

276 Notes

277 -----

278 See the `user guide

279 <https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html>`__

280 for more.

281

282 Examples

283 --------

284 A new ``MultiIndex`` is typically constructed using one of the helper

285 methods :meth:`MultiIndex.from_arrays`, :meth:`MultiIndex.from_product`

286 and :meth:`MultiIndex.from_tuples`. For example (using ``.from_arrays``):

287

288 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]

289 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))

290 MultiIndex([(1, 'red'),

291 (1, 'blue'),

292 (2, 'red'),

293 (2, 'blue')],

294 names=['number', 'color'])

295

296 See further examples for how to construct a MultiIndex in the doc strings

297 of the mentioned helper methods.

298 """

299

300 _hidden_attrs = Index._hidden_attrs | frozenset()

301

302 # initialize to zero-length tuples to make everything work

303 _typ = "multiindex"

304 _names: list[Hashable | None] = []

305 _levels = FrozenList()

306 _codes = FrozenList()

307 _comparables = ["names"]

308

309 sortorder: int | None

310

311 # --------------------------------------------------------------------

312 # Constructors

313

314 def __new__(

315 cls,

316 levels=None,

317 codes=None,

318 sortorder=None,

319 names=None,

320 dtype=None,

321 copy: bool = False,

322 name=None,

323 verify_integrity: bool = True,

324 ) -> MultiIndex:

325 # compat with Index

326 if name is not None:

327 names = name

328 if levels is None or codes is None:

329 raise TypeError("Must pass both levels and codes")

330 if len(levels) != len(codes):

331 raise ValueError("Length of levels and codes must be the same.")

332 if len(levels) == 0:

333 raise ValueError("Must pass non-zero number of levels/codes")

334

335 result = object.__new__(cls)

336 result._cache = {}

337

338 # we've already validated levels and codes, so shortcut here

339 result._set_levels(levels, copy=copy, validate=False)

340 result._set_codes(codes, copy=copy, validate=False)

341

342 result._names = [None] * len(levels)

343 if names is not None:

344 # handles name validation

345 result._set_names(names)

346

347 if sortorder is not None:

348 result.sortorder = int(sortorder)

349 else:

350 result.sortorder = sortorder

351

352 if verify_integrity:

353 new_codes = result._verify_integrity()

354 result._codes = new_codes

355

356 result._reset_identity()

357 result._references = None

358

359 return result

360

361 def _validate_codes(self, level: list, code: list):

362 """

363 Reassign code values as -1 if their corresponding levels are NaN.

364

365 Parameters

366 ----------

367 code : list

368 Code to reassign.

369 level : list

370 Level to check for missing values (NaN, NaT, None).

371

372 Returns

373 -------

374 new code where code value = -1 if it corresponds

375 to a level with missing values (NaN, NaT, None).

376 """

377 null_mask = isna(level)

378 if np.any(null_mask):

379 # error: Incompatible types in assignment

380 # (expression has type "ndarray[Any, dtype[Any]]",

381 # variable has type "List[Any]")

382 code = np.where(null_mask[code], -1, code) # type: ignore[assignment]

383 return code

384

385 def _verify_integrity(self, codes: list | None = None, levels: list | None = None):

386 """

387 Parameters

388 ----------

389 codes : optional list

390 Codes to check for validity. Defaults to current codes.

391 levels : optional list

392 Levels to check for validity. Defaults to current levels.

393

394 Raises

395 ------

396 ValueError

397 If length of levels and codes don't match, if the codes for any

398 level would exceed level bounds, or there are any duplicate levels.

399

400 Returns

401 -------

402 new codes where code value = -1 if it corresponds to a

403 NaN level.

404 """

405 # NOTE: Currently does not check, among other things, that cached

406 # nlevels matches nor that sortorder matches actually sortorder.

407 codes = codes or self.codes

408 levels = levels or self.levels

409

410 if len(levels) != len(codes):

411 raise ValueError(

412 "Length of levels and codes must match. NOTE: "

413 "this index is in an inconsistent state."

414 )

415 codes_length = len(codes[0])

416 for i, (level, level_codes) in enumerate(zip(levels, codes)):

417 if len(level_codes) != codes_length:

418 raise ValueError(

419 f"Unequal code lengths: {[len(code_) for code_ in codes]}"

420 )

421 if len(level_codes) and level_codes.max() >= len(level):

422 raise ValueError(

423 f"On level {i}, code max ({level_codes.max()}) >= length of "

424 f"level ({len(level)}). NOTE: this index is in an "

425 "inconsistent state"

426 )

427 if len(level_codes) and level_codes.min() < -1:

428 raise ValueError(f"On level {i}, code value ({level_codes.min()}) < -1")

429 if not level.is_unique:

430 raise ValueError(

431 f"Level values must be unique: {list(level)} on level {i}"

432 )

433 if self.sortorder is not None:

434 if self.sortorder > _lexsort_depth(self.codes, self.nlevels):

435 raise ValueError(

436 "Value for sortorder must be inferior or equal to actual "

437 f"lexsort_depth: sortorder {self.sortorder} "

438 f"with lexsort_depth {_lexsort_depth(self.codes, self.nlevels)}"

439 )

440

441 codes = [

442 self._validate_codes(level, code) for level, code in zip(levels, codes)

443 ]

444 new_codes = FrozenList(codes)

445 return new_codes

446

447 @classmethod

448 def from_arrays(

449 cls,

450 arrays,

451 sortorder=None,

452 names: Sequence[Hashable] | Hashable | lib.NoDefault = lib.no_default,

453 ) -> MultiIndex:

454 """

455 Convert arrays to MultiIndex.

456

457 Parameters

458 ----------

459 arrays : list / sequence of array-likes

460 Each array-like gives one level's value for each data point.

461 len(arrays) is the number of levels.

462 sortorder : int or None

463 Level of sortedness (must be lexicographically sorted by that

464 level).

465 names : list / sequence of str, optional

466 Names for the levels in the index.

467

468 Returns

469 -------

470 MultiIndex

471

472 See Also

473 --------

474 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.

475 MultiIndex.from_product : Make a MultiIndex from cartesian product

476 of iterables.

477 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.

478

479 Examples

480 --------

481 >>> arrays = [[1, 1, 2, 2], ['red', 'blue', 'red', 'blue']]

482 >>> pd.MultiIndex.from_arrays(arrays, names=('number', 'color'))

483 MultiIndex([(1, 'red'),

484 (1, 'blue'),

485 (2, 'red'),

486 (2, 'blue')],

487 names=['number', 'color'])

488 """

489 error_msg = "Input must be a list / sequence of array-likes."

490 if not is_list_like(arrays):

491 raise TypeError(error_msg)

492 if is_iterator(arrays):

493 arrays = list(arrays)

494

495 # Check if elements of array are list-like

496 for array in arrays:

497 if not is_list_like(array):

498 raise TypeError(error_msg)

499

500 # Check if lengths of all arrays are equal or not,

501 # raise ValueError, if not

502 for i in range(1, len(arrays)):

503 if len(arrays[i]) != len(arrays[i - 1]):

504 raise ValueError("all arrays must be same length")

505

506 codes, levels = factorize_from_iterables(arrays)

507 if names is lib.no_default:

508 names = [getattr(arr, "name", None) for arr in arrays]

509

510 return cls(

511 levels=levels,

512 codes=codes,

513 sortorder=sortorder,

514 names=names,

515 verify_integrity=False,

516 )

517

518 @classmethod

519 @names_compat

520 def from_tuples(

521 cls,

522 tuples: Iterable[tuple[Hashable, ...]],

523 sortorder: int | None = None,

524 names: Sequence[Hashable] | Hashable = None,

525 ) -> MultiIndex:

526 """

527 Convert list of tuples to MultiIndex.

528

529 Parameters

530 ----------

531 tuples : list / sequence of tuple-likes

532 Each tuple is the index of one row/column.

533 sortorder : int or None

534 Level of sortedness (must be lexicographically sorted by that

535 level).

536 names : list / sequence of str, optional

537 Names for the levels in the index.

538

539 Returns

540 -------

541 MultiIndex

542

543 See Also

544 --------

545 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.

546 MultiIndex.from_product : Make a MultiIndex from cartesian product

547 of iterables.

548 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.

549

550 Examples

551 --------

552 >>> tuples = [(1, 'red'), (1, 'blue'),

553 ... (2, 'red'), (2, 'blue')]

554 >>> pd.MultiIndex.from_tuples(tuples, names=('number', 'color'))

555 MultiIndex([(1, 'red'),

556 (1, 'blue'),

557 (2, 'red'),

558 (2, 'blue')],

559 names=['number', 'color'])

560 """

561 if not is_list_like(tuples):

562 raise TypeError("Input must be a list / sequence of tuple-likes.")

563 if is_iterator(tuples):

564 tuples = list(tuples)

565 tuples = cast(Collection[Tuple[Hashable, ...]], tuples)

566

567 # handling the empty tuple cases

568 if len(tuples) and all(isinstance(e, tuple) and not e for e in tuples):

569 codes = [np.zeros(len(tuples))]

570 levels = [Index(com.asarray_tuplesafe(tuples, dtype=np.dtype("object")))]

571 return cls(

572 levels=levels,

573 codes=codes,

574 sortorder=sortorder,

575 names=names,

576 verify_integrity=False,

577 )

578

579 arrays: list[Sequence[Hashable]]

580 if len(tuples) == 0:

581 if names is None:

582 raise TypeError("Cannot infer number of levels from empty list")

583 # error: Argument 1 to "len" has incompatible type "Hashable";

584 # expected "Sized"

585 arrays = [[]] * len(names) # type: ignore[arg-type]

586 elif isinstance(tuples, (np.ndarray, Index)):

587 if isinstance(tuples, Index):

588 tuples = np.asarray(tuples._values)

589

590 arrays = list(lib.tuples_to_object_array(tuples).T)

591 elif isinstance(tuples, list):

592 arrays = list(lib.to_object_array_tuples(tuples).T)

593 else:

594 arrs = zip(*tuples)

595 arrays = cast(List[Sequence[Hashable]], arrs)

596

597 return cls.from_arrays(arrays, sortorder=sortorder, names=names)

598

599 @classmethod

600 def from_product(

601 cls,

602 iterables: Sequence[Iterable[Hashable]],

603 sortorder: int | None = None,

604 names: Sequence[Hashable] | Hashable | lib.NoDefault = lib.no_default,

605 ) -> MultiIndex:

606 """

607 Make a MultiIndex from the cartesian product of multiple iterables.

608

609 Parameters

610 ----------

611 iterables : list / sequence of iterables

612 Each iterable has unique labels for each level of the index.

613 sortorder : int or None

614 Level of sortedness (must be lexicographically sorted by that

615 level).

616 names : list / sequence of str, optional

617 Names for the levels in the index.

618 If not explicitly provided, names will be inferred from the

619 elements of iterables if an element has a name attribute.

620

621 Returns

622 -------

623 MultiIndex

624

625 See Also

626 --------

627 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.

628 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.

629 MultiIndex.from_frame : Make a MultiIndex from a DataFrame.

630

631 Examples

632 --------

633 >>> numbers = [0, 1, 2]

634 >>> colors = ['green', 'purple']

635 >>> pd.MultiIndex.from_product([numbers, colors],

636 ... names=['number', 'color'])

637 MultiIndex([(0, 'green'),

638 (0, 'purple'),

639 (1, 'green'),

640 (1, 'purple'),

641 (2, 'green'),

642 (2, 'purple')],

643 names=['number', 'color'])

644 """

645 from pandas.core.reshape.util import cartesian_product

646

647 if not is_list_like(iterables):

648 raise TypeError("Input must be a list / sequence of iterables.")

649 if is_iterator(iterables):

650 iterables = list(iterables)

651

652 codes, levels = factorize_from_iterables(iterables)

653 if names is lib.no_default:

654 names = [getattr(it, "name", None) for it in iterables]

655

656 # codes are all ndarrays, so cartesian_product is lossless

657 codes = cartesian_product(codes)

658 return cls(levels, codes, sortorder=sortorder, names=names)

659

660 @classmethod

661 def from_frame(cls, df: DataFrame, sortorder=None, names=None) -> MultiIndex:

662 """

663 Make a MultiIndex from a DataFrame.

664

665 Parameters

666 ----------

667 df : DataFrame

668 DataFrame to be converted to MultiIndex.

669 sortorder : int, optional

670 Level of sortedness (must be lexicographically sorted by that

671 level).

672 names : list-like, optional

673 If no names are provided, use the column names, or tuple of column

674 names if the columns is a MultiIndex. If a sequence, overwrite

675 names with the given sequence.

676

677 Returns

678 -------

679 MultiIndex

680 The MultiIndex representation of the given DataFrame.

681

682 See Also

683 --------

684 MultiIndex.from_arrays : Convert list of arrays to MultiIndex.

685 MultiIndex.from_tuples : Convert list of tuples to MultiIndex.

686 MultiIndex.from_product : Make a MultiIndex from cartesian product

687 of iterables.

688

689 Examples

690 --------

691 >>> df = pd.DataFrame([['HI', 'Temp'], ['HI', 'Precip'],

692 ... ['NJ', 'Temp'], ['NJ', 'Precip']],

693 ... columns=['a', 'b'])

694 >>> df

695 a b

696 0 HI Temp

697 1 HI Precip

698 2 NJ Temp

699 3 NJ Precip

700

701 >>> pd.MultiIndex.from_frame(df)

702 MultiIndex([('HI', 'Temp'),

703 ('HI', 'Precip'),

704 ('NJ', 'Temp'),

705 ('NJ', 'Precip')],

706 names=['a', 'b'])

707

708 Using explicit names, instead of the column names

709

710 >>> pd.MultiIndex.from_frame(df, names=['state', 'observation'])

711 MultiIndex([('HI', 'Temp'),

712 ('HI', 'Precip'),

713 ('NJ', 'Temp'),

714 ('NJ', 'Precip')],

715 names=['state', 'observation'])

716 """

717 if not isinstance(df, ABCDataFrame):

718 raise TypeError("Input must be a DataFrame")

719

720 column_names, columns = zip(*df.items())

721 names = column_names if names is None else names

722 return cls.from_arrays(columns, sortorder=sortorder, names=names)

723

724 # --------------------------------------------------------------------

725

726 @cache_readonly

727 def _values(self) -> np.ndarray:

728 # We override here, since our parent uses _data, which we don't use.

729 values = []

730

731 for i in range(self.nlevels):

732 index = self.levels[i]

733 codes = self.codes[i]

734

735 vals = index

736 if is_categorical_dtype(vals.dtype):

737 vals = cast("CategoricalIndex", vals)

738 vals = vals._data._internal_get_values()

739

740 if isinstance(vals.dtype, ExtensionDtype) or isinstance(

741 vals, (ABCDatetimeIndex, ABCTimedeltaIndex)

742 ):

743 vals = vals.astype(object)

744

745 vals = np.array(vals, copy=False)

746 vals = algos.take_nd(vals, codes, fill_value=index._na_value)

747 values.append(vals)

748

749 arr = lib.fast_zip(values)

750 return arr

751

752 @property

753 def values(self) -> np.ndarray:

754 return self._values

755

756 @property

757 def array(self):

758 """

759 Raises a ValueError for `MultiIndex` because there's no single

760 array backing a MultiIndex.

761

762 Raises

763 ------

764 ValueError

765 """

766 raise ValueError(

767 "MultiIndex has no single backing array. Use "

768 "'MultiIndex.to_numpy()' to get a NumPy array of tuples."

769 )

770

771 @cache_readonly

772 def dtypes(self) -> Series:

773 """

774 Return the dtypes as a Series for the underlying MultiIndex.

775 """

776 from pandas import Series

777

778 names = com.fill_missing_names([level.name for level in self.levels])

779 return Series([level.dtype for level in self.levels], index=Index(names))

780

781 def __len__(self) -> int:

782 return len(self.codes[0])

783

784 @property

785 def size(self) -> int:

786 """

787 Return the number of elements in the underlying data.

788 """

789 # override Index.size to avoid materializing _values

790 return len(self)

791

792 # --------------------------------------------------------------------

793 # Levels Methods

794

795 @cache_readonly

796 def levels(self) -> FrozenList:

797 # Use cache_readonly to ensure that self.get_locs doesn't repeatedly

798 # create new IndexEngine

799 # https://github.com/pandas-dev/pandas/issues/31648

800 result = [x._rename(name=name) for x, name in zip(self._levels, self._names)]

801 for level in result:

802 # disallow midx.levels[0].name = "foo"

803 level._no_setting_name = True

804 return FrozenList(result)

805

806 def _set_levels(

807 self,

808 levels,

809 *,

810 level=None,

811 copy: bool = False,

812 validate: bool = True,

813 verify_integrity: bool = False,

814 ) -> None:

815 # This is NOT part of the levels property because it should be

816 # externally not allowed to set levels. User beware if you change

817 # _levels directly

818 if validate:

819 if len(levels) == 0:

820 raise ValueError("Must set non-zero number of levels.")

821 if level is None and len(levels) != self.nlevels:

822 raise ValueError("Length of levels must match number of levels.")

823 if level is not None and len(levels) != len(level):

824 raise ValueError("Length of levels must match length of level.")

825

826 if level is None:

827 new_levels = FrozenList(

828 ensure_index(lev, copy=copy)._view() for lev in levels

829 )

830 else:

831 level_numbers = [self._get_level_number(lev) for lev in level]

832 new_levels_list = list(self._levels)

833 for lev_num, lev in zip(level_numbers, levels):

834 new_levels_list[lev_num] = ensure_index(lev, copy=copy)._view()

835 new_levels = FrozenList(new_levels_list)

836

837 if verify_integrity:

838 new_codes = self._verify_integrity(levels=new_levels)

839 self._codes = new_codes

840

841 names = self.names

842 self._levels = new_levels

843 if any(names):

844 self._set_names(names)

845

846 self._reset_cache()

847

848 def set_levels(

849 self, levels, *, level=None, verify_integrity: bool = True

850 ) -> MultiIndex:

851 """

852 Set new levels on MultiIndex. Defaults to returning new index.

853

854 Parameters

855 ----------

856 levels : sequence or list of sequence

857 New level(s) to apply.

858 level : int, level name, or sequence of int/level names (default None)

859 Level(s) to set (None for all levels).

860 verify_integrity : bool, default True

861 If True, checks that levels and codes are compatible.

862

863 Returns

864 -------

865 MultiIndex

866

867 Examples

868 --------

869 >>> idx = pd.MultiIndex.from_tuples(

870 ... [

871 ... (1, "one"),

872 ... (1, "two"),

873 ... (2, "one"),

874 ... (2, "two"),

875 ... (3, "one"),

876 ... (3, "two")

877 ... ],

878 ... names=["foo", "bar"]

879 ... )

880 >>> idx

881 MultiIndex([(1, 'one'),

882 (1, 'two'),

883 (2, 'one'),

884 (2, 'two'),

885 (3, 'one'),

886 (3, 'two')],

887 names=['foo', 'bar'])

888

889 >>> idx.set_levels([['a', 'b', 'c'], [1, 2]])

890 MultiIndex([('a', 1),

891 ('a', 2),

892 ('b', 1),

893 ('b', 2),

894 ('c', 1),

895 ('c', 2)],

896 names=['foo', 'bar'])

897 >>> idx.set_levels(['a', 'b', 'c'], level=0)

898 MultiIndex([('a', 'one'),

899 ('a', 'two'),

900 ('b', 'one'),

901 ('b', 'two'),

902 ('c', 'one'),

903 ('c', 'two')],

904 names=['foo', 'bar'])

905 >>> idx.set_levels(['a', 'b'], level='bar')

906 MultiIndex([(1, 'a'),

907 (1, 'b'),

908 (2, 'a'),

909 (2, 'b'),

910 (3, 'a'),

911 (3, 'b')],

912 names=['foo', 'bar'])

913

914 If any of the levels passed to ``set_levels()`` exceeds the

915 existing length, all of the values from that argument will

916 be stored in the MultiIndex levels, though the values will

917 be truncated in the MultiIndex output.

918

919 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1])

920 MultiIndex([('a', 1),

921 ('a', 2),

922 ('b', 1),

923 ('b', 2),

924 ('c', 1),

925 ('c', 2)],

926 names=['foo', 'bar'])

927 >>> idx.set_levels([['a', 'b', 'c'], [1, 2, 3, 4]], level=[0, 1]).levels

928 FrozenList([['a', 'b', 'c'], [1, 2, 3, 4]])

929 """

930

931 if is_list_like(levels) and not isinstance(levels, Index):

932 levels = list(levels)

933

934 level, levels = _require_listlike(level, levels, "Levels")

935 idx = self._view()

936 idx._reset_identity()

937 idx._set_levels(

938 levels, level=level, validate=True, verify_integrity=verify_integrity

939 )

940 return idx

941

942 @property

943 def nlevels(self) -> int:

944 """

945 Integer number of levels in this MultiIndex.

946

947 Examples

948 --------

949 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])

950 >>> mi

951 MultiIndex([('a', 'b', 'c')],

952 )

953 >>> mi.nlevels

954 3

955 """

956 return len(self._levels)

957

958 @property

959 def levshape(self) -> Shape:

960 """

961 A tuple with the length of each level.

962

963 Examples

964 --------

965 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])

966 >>> mi

967 MultiIndex([('a', 'b', 'c')],

968 )

969 >>> mi.levshape

970 (1, 1, 1)

971 """

972 return tuple(len(x) for x in self.levels)

973

974 # --------------------------------------------------------------------

975 # Codes Methods

976

977 @property

978 def codes(self):

979 return self._codes

980

981 def _set_codes(

982 self,

983 codes,

984 *,

985 level=None,

986 copy: bool = False,

987 validate: bool = True,

988 verify_integrity: bool = False,

989 ) -> None:

990 if validate:

991 if level is None and len(codes) != self.nlevels:

992 raise ValueError("Length of codes must match number of levels")

993 if level is not None and len(codes) != len(level):

994 raise ValueError("Length of codes must match length of levels.")

995

996 if level is None:

997 new_codes = FrozenList(

998 _coerce_indexer_frozen(level_codes, lev, copy=copy).view()

999 for lev, level_codes in zip(self._levels, codes)

1000 )

1001 else:

1002 level_numbers = [self._get_level_number(lev) for lev in level]

1003 new_codes_list = list(self._codes)

1004 for lev_num, level_codes in zip(level_numbers, codes):

1005 lev = self.levels[lev_num]

1006 new_codes_list[lev_num] = _coerce_indexer_frozen(

1007 level_codes, lev, copy=copy

1008 )

1009 new_codes = FrozenList(new_codes_list)

1010

1011 if verify_integrity:

1012 new_codes = self._verify_integrity(codes=new_codes)

1013

1014 self._codes = new_codes

1015

1016 self._reset_cache()

1017

1018 def set_codes(self, codes, *, level=None, verify_integrity: bool = True):

1019 """

1020 Set new codes on MultiIndex. Defaults to returning new index.

1021

1022 Parameters

1023 ----------

1024 codes : sequence or list of sequence

1025 New codes to apply.

1026 level : int, level name, or sequence of int/level names (default None)

1027 Level(s) to set (None for all levels).

1028 verify_integrity : bool, default True

1029 If True, checks that levels and codes are compatible.

1030

1031 Returns

1032 -------

1033 new index (of same type and class...etc) or None

1034 The same type as the caller or None if ``inplace=True``.

1035

1036 Examples

1037 --------

1038 >>> idx = pd.MultiIndex.from_tuples(

1039 ... [(1, "one"), (1, "two"), (2, "one"), (2, "two")], names=["foo", "bar"]

1040 ... )

1041 >>> idx

1042 MultiIndex([(1, 'one'),

1043 (1, 'two'),

1044 (2, 'one'),

1045 (2, 'two')],

1046 names=['foo', 'bar'])

1047

1048 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]])

1049 MultiIndex([(2, 'one'),

1050 (1, 'one'),

1051 (2, 'two'),

1052 (1, 'two')],

1053 names=['foo', 'bar'])

1054 >>> idx.set_codes([1, 0, 1, 0], level=0)

1055 MultiIndex([(2, 'one'),

1056 (1, 'two'),

1057 (2, 'one'),

1058 (1, 'two')],

1059 names=['foo', 'bar'])

1060 >>> idx.set_codes([0, 0, 1, 1], level='bar')

1061 MultiIndex([(1, 'one'),

1062 (1, 'one'),

1063 (2, 'two'),

1064 (2, 'two')],

1065 names=['foo', 'bar'])

1066 >>> idx.set_codes([[1, 0, 1, 0], [0, 0, 1, 1]], level=[0, 1])

1067 MultiIndex([(2, 'one'),

1068 (1, 'one'),

1069 (2, 'two'),

1070 (1, 'two')],

1071 names=['foo', 'bar'])

1072 """

1073

1074 level, codes = _require_listlike(level, codes, "Codes")

1075 idx = self._view()

1076 idx._reset_identity()

1077 idx._set_codes(codes, level=level, verify_integrity=verify_integrity)

1078 return idx

1079

1080 # --------------------------------------------------------------------

1081 # Index Internals

1082

1083 @cache_readonly

1084 def _engine(self):

1085 # Calculate the number of bits needed to represent labels in each

1086 # level, as log2 of their sizes:

1087 # NaN values are shifted to 1 and missing values in other while

1088 # calculating the indexer are shifted to 0

1089 sizes = np.ceil(

1090 np.log2(

1091 [

1092 len(level)

1093 + libindex.multiindex_nulls_shift # type: ignore[attr-defined]

1094 for level in self.levels

1095 ]

1096 )

1097 )

1098

1099 # Sum bit counts, starting from the _right_....

1100 lev_bits = np.cumsum(sizes[::-1])[::-1]

1101

1102 # ... in order to obtain offsets such that sorting the combination of

1103 # shifted codes (one for each level, resulting in a unique integer) is

1104 # equivalent to sorting lexicographically the codes themselves. Notice

1105 # that each level needs to be shifted by the number of bits needed to

1106 # represent the _previous_ ones:

1107 offsets = np.concatenate([lev_bits[1:], [0]]).astype("uint64")

1108

1109 # Check the total number of bits needed for our representation:

1110 if lev_bits[0] > 64:

1111 # The levels would overflow a 64 bit uint - use Python integers:

1112 return MultiIndexPyIntEngine(self.levels, self.codes, offsets)

1113 return MultiIndexUIntEngine(self.levels, self.codes, offsets)

1114

1115 # Return type "Callable[..., MultiIndex]" of "_constructor" incompatible with return

1116 # type "Type[MultiIndex]" in supertype "Index"

1117 @property

1118 def _constructor(self) -> Callable[..., MultiIndex]: # type: ignore[override]

1119 return type(self).from_tuples

1120

1121 @doc(Index._shallow_copy)

1122 def _shallow_copy(self, values: np.ndarray, name=lib.no_default) -> MultiIndex:

1123 names = name if name is not lib.no_default else self.names

1124

1125 return type(self).from_tuples(values, sortorder=None, names=names)

1126

1127 def _view(self) -> MultiIndex:

1128 result = type(self)(

1129 levels=self.levels,

1130 codes=self.codes,

1131 sortorder=self.sortorder,

1132 names=self.names,

1133 verify_integrity=False,

1134 )

1135 result._cache = self._cache.copy()

1136 result._cache.pop("levels", None) # GH32669

1137 return result

1138

1139 # --------------------------------------------------------------------

1140

1141 # error: Signature of "copy" incompatible with supertype "Index"

1142 def copy( # type: ignore[override]

1143 self,

1144 names=None,

1145 deep: bool = False,

1146 name=None,

1147 ):

1148 """

1149 Make a copy of this object.

1150

1151 Names, dtype, levels and codes can be passed and will be set on new copy.

1152

1153 Parameters

1154 ----------

1155 names : sequence, optional

1156 deep : bool, default False

1157 name : Label

1158 Kept for compatibility with 1-dimensional Index. Should not be used.

1159

1160 Returns

1161 -------

1162 MultiIndex

1163

1164 Notes

1165 -----

1166 In most cases, there should be no functional difference from using

1167 ``deep``, but if ``deep`` is passed it will attempt to deepcopy.

1168 This could be potentially expensive on large MultiIndex objects.

1169

1170 Examples

1171 --------

1172 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b'], ['c']])

1173 >>> mi

1174 MultiIndex([('a', 'b', 'c')],

1175 )

1176 >>> mi.copy()

1177 MultiIndex([('a', 'b', 'c')],

1178 )

1179 """

1180 names = self._validate_names(name=name, names=names, deep=deep)

1181 keep_id = not deep

1182 levels, codes = None, None

1183

1184 if deep:

1185 from copy import deepcopy

1186

1187 levels = deepcopy(self.levels)

1188 codes = deepcopy(self.codes)

1189

1190 levels = levels if levels is not None else self.levels

1191 codes = codes if codes is not None else self.codes

1192

1193 new_index = type(self)(

1194 levels=levels,

1195 codes=codes,

1196 sortorder=self.sortorder,

1197 names=names,

1198 verify_integrity=False,

1199 )

1200 new_index._cache = self._cache.copy()

1201 new_index._cache.pop("levels", None) # GH32669

1202 if keep_id:

1203 new_index._id = self._id

1204 return new_index

1205

1206 def __array__(self, dtype=None) -> np.ndarray:

1207 """the array interface, return my values"""

1208 return self.values

1209

1210 def view(self, cls=None):

1211 """this is defined as a copy with the same identity"""

1212 result = self.copy()

1213 result._id = self._id

1214 return result

1215

1216 @doc(Index.__contains__)

1217 def __contains__(self, key: Any) -> bool:

1218 hash(key)

1219 try:

1220 self.get_loc(key)

1221 return True

1222 except (LookupError, TypeError, ValueError):

1223 return False

1224

1225 @cache_readonly

1226 def dtype(self) -> np.dtype:

1227 return np.dtype("O")

1228

1229 def _is_memory_usage_qualified(self) -> bool:

1230 """return a boolean if we need a qualified .info display"""

1231

1232 def f(level) -> bool:

1233 return "mixed" in level or "string" in level or "unicode" in level

1234

1235 return any(f(level) for level in self._inferred_type_levels)

1236

1237 # Cannot determine type of "memory_usage"

1238 @doc(Index.memory_usage) # type: ignore[has-type]

1239 def memory_usage(self, deep: bool = False) -> int:

1240 # we are overwriting our base class to avoid

1241 # computing .values here which could materialize

1242 # a tuple representation unnecessarily

1243 return self._nbytes(deep)

1244

1245 @cache_readonly

1246 def nbytes(self) -> int:

1247 """return the number of bytes in the underlying data"""

1248 return self._nbytes(False)

1249

1250 def _nbytes(self, deep: bool = False) -> int:

1251 """

1252 return the number of bytes in the underlying data

1253 deeply introspect the level data if deep=True

1254

1255 include the engine hashtable

1256

1257 *this is in internal routine*

1258

1259 """

1260 # for implementations with no useful getsizeof (PyPy)

1261 objsize = 24

1262

1263 level_nbytes = sum(i.memory_usage(deep=deep) for i in self.levels)

1264 label_nbytes = sum(i.nbytes for i in self.codes)

1265 names_nbytes = sum(getsizeof(i, objsize) for i in self.names)

1266 result = level_nbytes + label_nbytes + names_nbytes

1267

1268 # include our engine hashtable

1269 result += self._engine.sizeof(deep=deep)

1270 return result

1271

1272 # --------------------------------------------------------------------

1273 # Rendering Methods

1274

1275 def _formatter_func(self, tup):

1276 """

1277 Formats each item in tup according to its level's formatter function.

1278 """

1279 formatter_funcs = [level._formatter_func for level in self.levels]

1280 return tuple(func(val) for func, val in zip(formatter_funcs, tup))

1281

1282 def _format_native_types(

1283 self, *, na_rep: str = "nan", **kwargs

1284 ) -> npt.NDArray[np.object_]:

1285 new_levels = []

1286 new_codes = []

1287

1288 # go through the levels and format them

1289 for level, level_codes in zip(self.levels, self.codes):

1290 level_strs = level._format_native_types(na_rep=na_rep, **kwargs)

1291 # add nan values, if there are any

1292 mask = level_codes == -1

1293 if mask.any():

1294 nan_index = len(level_strs)

1295 # numpy 1.21 deprecated implicit string casting

1296 level_strs = level_strs.astype(str)

1297 level_strs = np.append(level_strs, na_rep)

1298 assert not level_codes.flags.writeable # i.e. copy is needed

1299 level_codes = level_codes.copy() # make writeable

1300 level_codes[mask] = nan_index

1301 new_levels.append(level_strs)

1302 new_codes.append(level_codes)

1303

1304 if len(new_levels) == 1:

1305 # a single-level multi-index

1306 return Index(new_levels[0].take(new_codes[0]))._format_native_types()

1307 else:

1308 # reconstruct the multi-index

1309 mi = MultiIndex(

1310 levels=new_levels,

1311 codes=new_codes,

1312 names=self.names,

1313 sortorder=self.sortorder,

1314 verify_integrity=False,

1315 )

1316 return mi._values

1317

1318 def format(

1319 self,

1320 name: bool | None = None,

1321 formatter: Callable | None = None,

1322 na_rep: str | None = None,

1323 names: bool = False,

1324 space: int = 2,

1325 sparsify=None,

1326 adjoin: bool = True,

1327 ) -> list:

1328 if name is not None:

1329 names = name

1330

1331 if len(self) == 0:

1332 return []

1333

1334 stringified_levels = []

1335 for lev, level_codes in zip(self.levels, self.codes):

1336 na = na_rep if na_rep is not None else _get_na_rep(lev.dtype)

1337

1338 if len(lev) > 0:

1339 formatted = lev.take(level_codes).format(formatter=formatter)

1340

1341 # we have some NA

1342 mask = level_codes == -1

1343 if mask.any():

1344 formatted = np.array(formatted, dtype=object)

1345 formatted[mask] = na

1346 formatted = formatted.tolist()

1347

1348 else:

1349 # weird all NA case

1350 formatted = [

1351 pprint_thing(na if isna(x) else x, escape_chars=("\t", "\r", "\n"))

1352 for x in algos.take_nd(lev._values, level_codes)

1353 ]

1354 stringified_levels.append(formatted)

1355

1356 result_levels = []

1357 for lev, lev_name in zip(stringified_levels, self.names):

1358 level = []

1359

1360 if names:

1361 level.append(

1362 pprint_thing(lev_name, escape_chars=("\t", "\r", "\n"))

1363 if lev_name is not None

1364 else ""

1365 )

1366

1367 level.extend(np.array(lev, dtype=object))

1368 result_levels.append(level)

1369

1370 if sparsify is None:

1371 sparsify = get_option("display.multi_sparse")

1372

1373 if sparsify:

1374 sentinel: Literal[""] | bool | lib.NoDefault = ""

1375 # GH3547 use value of sparsify as sentinel if it's "Falsey"

1376 assert isinstance(sparsify, bool) or sparsify is lib.no_default

1377 if sparsify in [False, lib.no_default]:

1378 sentinel = sparsify

1379 # little bit of a kludge job for #1217

1380 result_levels = sparsify_labels(

1381 result_levels, start=int(names), sentinel=sentinel

1382 )

1383

1384 if adjoin:

1385 from pandas.io.formats.format import get_adjustment

1386

1387 adj = get_adjustment()

1388 return adj.adjoin(space, *result_levels).split("\n")

1389 else:

1390 return result_levels

1391

1392 # --------------------------------------------------------------------

1393 # Names Methods

1394

1395 def _get_names(self) -> FrozenList:

1396 return FrozenList(self._names)

1397

1398 def _set_names(self, names, *, level=None, validate: bool = True):

1399 """

1400 Set new names on index. Each name has to be a hashable type.

1401

1402 Parameters

1403 ----------

1404 values : str or sequence

1405 name(s) to set

1406 level : int, level name, or sequence of int/level names (default None)

1407 If the index is a MultiIndex (hierarchical), level(s) to set (None

1408 for all levels). Otherwise level must be None

1409 validate : bool, default True

1410 validate that the names match level lengths

1411

1412 Raises

1413 ------

1414 TypeError if each name is not hashable.

1415

1416 Notes

1417 -----

1418 sets names on levels. WARNING: mutates!

1419

1420 Note that you generally want to set this *after* changing levels, so

1421 that it only acts on copies

1422 """

1423 # GH 15110

1424 # Don't allow a single string for names in a MultiIndex

1425 if names is not None and not is_list_like(names):

1426 raise ValueError("Names should be list-like for a MultiIndex")

1427 names = list(names)

1428

1429 if validate:

1430 if level is not None and len(names) != len(level):

1431 raise ValueError("Length of names must match length of level.")

1432 if level is None and len(names) != self.nlevels:

1433 raise ValueError(

1434 "Length of names must match number of levels in MultiIndex."

1435 )

1436

1437 if level is None:

1438 level = range(self.nlevels)

1439 else:

1440 level = [self._get_level_number(lev) for lev in level]

1441

1442 # set the name

1443 for lev, name in zip(level, names):

1444 if name is not None:

1445 # GH 20527

1446 # All items in 'names' need to be hashable:

1447 if not is_hashable(name):

1448 raise TypeError(

1449 f"{type(self).__name__}.name must be a hashable type"

1450 )

1451 self._names[lev] = name

1452

1453 # If .levels has been accessed, the names in our cache will be stale.

1454 self._reset_cache()

1455

1456 names = property(

1457 fset=_set_names,

1458 fget=_get_names,

1459 doc="""

1460 Names of levels in MultiIndex.

1461

1462 Examples

1463 --------

1464 >>> mi = pd.MultiIndex.from_arrays(

1465 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'])

1466 >>> mi

1467 MultiIndex([(1, 3, 5),

1468 (2, 4, 6)],

1469 names=['x', 'y', 'z'])

1470 >>> mi.names

1471 FrozenList(['x', 'y', 'z'])

1472 """,

1473 )

1474

1475 # --------------------------------------------------------------------

1476

1477 @cache_readonly

1478 def inferred_type(self) -> str:

1479 return "mixed"

1480

1481 def _get_level_number(self, level) -> int:

1482 count = self.names.count(level)

1483 if (count > 1) and not is_integer(level):

1484 raise ValueError(

1485 f"The name {level} occurs multiple times, use a level number"

1486 )

1487 try:

1488 level = self.names.index(level)

1489 except ValueError as err:

1490 if not is_integer(level):

1491 raise KeyError(f"Level {level} not found") from err

1492 if level < 0:

1493 level += self.nlevels

1494 if level < 0:

1495 orig_level = level - self.nlevels

1496 raise IndexError(

1497 f"Too many levels: Index has only {self.nlevels} levels, "

1498 f"{orig_level} is not a valid level number"

1499 ) from err

1500 # Note: levels are zero-based

1501 elif level >= self.nlevels:

1502 raise IndexError(

1503 f"Too many levels: Index has only {self.nlevels} levels, "

1504 f"not {level + 1}"

1505 ) from err

1506 return level

1507

1508 @cache_readonly

1509 def is_monotonic_increasing(self) -> bool:

1510 """

1511 Return a boolean if the values are equal or increasing.

1512 """

1513 if any(-1 in code for code in self.codes):

1514 return False

1515

1516 if all(level.is_monotonic_increasing for level in self.levels):

1517 # If each level is sorted, we can operate on the codes directly. GH27495

1518 return libalgos.is_lexsorted(

1519 [x.astype("int64", copy=False) for x in self.codes]

1520 )

1521

1522 # reversed() because lexsort() wants the most significant key last.

1523 values = [

1524 self._get_level_values(i)._values for i in reversed(range(len(self.levels)))

1525 ]

1526 try:

1527 # error: Argument 1 to "lexsort" has incompatible type

1528 # "List[Union[ExtensionArray, ndarray[Any, Any]]]";

1529 # expected "Union[_SupportsArray[dtype[Any]],

1530 # _NestedSequence[_SupportsArray[dtype[Any]]], bool,

1531 # int, float, complex, str, bytes, _NestedSequence[Union

1532 # [bool, int, float, complex, str, bytes]]]"

1533 sort_order = np.lexsort(values) # type: ignore[arg-type]

1534 return Index(sort_order).is_monotonic_increasing

1535 except TypeError:

1536 # we have mixed types and np.lexsort is not happy

1537 return Index(self._values).is_monotonic_increasing

1538

1539 @cache_readonly

1540 def is_monotonic_decreasing(self) -> bool:

1541 """

1542 Return a boolean if the values are equal or decreasing.

1543 """

1544 # monotonic decreasing if and only if reverse is monotonic increasing

1545 return self[::-1].is_monotonic_increasing

1546

1547 @cache_readonly

1548 def _inferred_type_levels(self) -> list[str]:

1549 """return a list of the inferred types, one for each level"""

1550 return [i.inferred_type for i in self.levels]

1551

1552 @doc(Index.duplicated)

1553 def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:

1554 shape = tuple(len(lev) for lev in self.levels)

1555 ids = get_group_index(self.codes, shape, sort=False, xnull=False)

1556

1557 return duplicated(ids, keep)

1558

1559 # error: Cannot override final attribute "_duplicated"

1560 # (previously declared in base class "IndexOpsMixin")

1561 _duplicated = duplicated # type: ignore[misc]

1562

1563 def fillna(self, value=None, downcast=None):

1564 """

1565 fillna is not implemented for MultiIndex

1566 """

1567 raise NotImplementedError("isna is not defined for MultiIndex")

1568

1569 @doc(Index.dropna)

1570 def dropna(self, how: AnyAll = "any") -> MultiIndex:

1571 nans = [level_codes == -1 for level_codes in self.codes]

1572 if how == "any":

1573 indexer = np.any(nans, axis=0)

1574 elif how == "all":

1575 indexer = np.all(nans, axis=0)

1576 else:

1577 raise ValueError(f"invalid how option: {how}")

1578

1579 new_codes = [level_codes[~indexer] for level_codes in self.codes]

1580 return self.set_codes(codes=new_codes)

1581

1582 def _get_level_values(self, level: int, unique: bool = False) -> Index:

1583 """

1584 Return vector of label values for requested level,

1585 equal to the length of the index

1586

1587 **this is an internal method**

1588

1589 Parameters

1590 ----------

1591 level : int

1592 unique : bool, default False

1593 if True, drop duplicated values

1594

1595 Returns

1596 -------

1597 Index

1598 """

1599 lev = self.levels[level]

1600 level_codes = self.codes[level]

1601 name = self._names[level]

1602 if unique:

1603 level_codes = algos.unique(level_codes)

1604 filled = algos.take_nd(lev._values, level_codes, fill_value=lev._na_value)

1605 return lev._shallow_copy(filled, name=name)

1606

1607 def get_level_values(self, level):

1608 """

1609 Return vector of label values for requested level.

1610

1611 Length of returned vector is equal to the length of the index.

1612

1613 Parameters

1614 ----------

1615 level : int or str

1616 ``level`` is either the integer position of the level in the

1617 MultiIndex, or the name of the level.

1618

1619 Returns

1620 -------

1621 Index

1622 Values is a level of this MultiIndex converted to

1623 a single :class:`Index` (or subclass thereof).

1624

1625 Notes

1626 -----

1627 If the level contains missing values, the result may be casted to

1628 ``float`` with missing values specified as ``NaN``. This is because

1629 the level is converted to a regular ``Index``.

1630

1631 Examples

1632 --------

1633 Create a MultiIndex:

1634

1635 >>> mi = pd.MultiIndex.from_arrays((list('abc'), list('def')))

1636 >>> mi.names = ['level_1', 'level_2']

1637

1638 Get level values by supplying level as either integer or name:

1639

1640 >>> mi.get_level_values(0)

1641 Index(['a', 'b', 'c'], dtype='object', name='level_1')

1642 >>> mi.get_level_values('level_2')

1643 Index(['d', 'e', 'f'], dtype='object', name='level_2')

1644

1645 If a level contains missing values, the return type of the level

1646 may be cast to ``float``.

1647

1648 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).dtypes

1649 level_0 int64

1650 level_1 int64

1651 dtype: object

1652 >>> pd.MultiIndex.from_arrays([[1, None, 2], [3, 4, 5]]).get_level_values(0)

1653 Index([1.0, nan, 2.0], dtype='float64')

1654 """

1655 level = self._get_level_number(level)

1656 values = self._get_level_values(level)

1657 return values

1658

1659 @doc(Index.unique)

1660 def unique(self, level=None):

1661 if level is None:

1662 return self.drop_duplicates()

1663 else:

1664 level = self._get_level_number(level)

1665 return self._get_level_values(level=level, unique=True)

1666

1667 def to_frame(

1668 self,

1669 index: bool = True,

1670 name=lib.no_default,

1671 allow_duplicates: bool = False,

1672 ) -> DataFrame:

1673 """

1674 Create a DataFrame with the levels of the MultiIndex as columns.

1675

1676 Column ordering is determined by the DataFrame constructor with data as

1677 a dict.

1678

1679 Parameters

1680 ----------

1681 index : bool, default True

1682 Set the index of the returned DataFrame as the original MultiIndex.

1683

1684 name : list / sequence of str, optional

1685 The passed names should substitute index level names.

1686

1687 allow_duplicates : bool, optional default False

1688 Allow duplicate column labels to be created.

1689

1690 .. versionadded:: 1.5.0

1691

1692 Returns

1693 -------

1694 DataFrame

1695

1696 See Also

1697 --------

1698 DataFrame : Two-dimensional, size-mutable, potentially heterogeneous

1699 tabular data.

1700

1701 Examples

1702 --------

1703 >>> mi = pd.MultiIndex.from_arrays([['a', 'b'], ['c', 'd']])

1704 >>> mi

1705 MultiIndex([('a', 'c'),

1706 ('b', 'd')],

1707 )

1708

1709 >>> df = mi.to_frame()

1710 >>> df

1711 0 1

1712 a c a c

1713 b d b d

1714

1715 >>> df = mi.to_frame(index=False)

1716 >>> df

1717 0 1

1718 0 a c

1719 1 b d

1720

1721 >>> df = mi.to_frame(name=['x', 'y'])

1722 >>> df

1723 x y

1724 a c a c

1725 b d b d

1726 """

1727 from pandas import DataFrame

1728

1729 if name is not lib.no_default:

1730 if not is_list_like(name):

1731 raise TypeError("'name' must be a list / sequence of column names.")

1732

1733 if len(name) != len(self.levels):

1734 raise ValueError(

1735 "'name' should have same length as number of levels on index."

1736 )

1737 idx_names = name

1738 else:

1739 idx_names = self._get_level_names()

1740

1741 if not allow_duplicates and len(set(idx_names)) != len(idx_names):

1742 raise ValueError(

1743 "Cannot create duplicate column labels if allow_duplicates is False"

1744 )

1745

1746 # Guarantee resulting column order - PY36+ dict maintains insertion order

1747 result = DataFrame(

1748 {level: self._get_level_values(level) for level in range(len(self.levels))},

1749 copy=False,

1750 )

1751 result.columns = idx_names

1752

1753 if index:

1754 result.index = self

1755 return result

1756

1757 # error: Return type "Index" of "to_flat_index" incompatible with return type

1758 # "MultiIndex" in supertype "Index"

1759 def to_flat_index(self) -> Index: # type: ignore[override]

1760 """

1761 Convert a MultiIndex to an Index of Tuples containing the level values.

1762

1763 Returns

1764 -------

1765 pd.Index

1766 Index with the MultiIndex data represented in Tuples.

1767

1768 See Also

1769 --------

1770 MultiIndex.from_tuples : Convert flat index back to MultiIndex.

1771

1772 Notes

1773 -----

1774 This method will simply return the caller if called by anything other

1775 than a MultiIndex.

1776

1777 Examples

1778 --------

1779 >>> index = pd.MultiIndex.from_product(

1780 ... [['foo', 'bar'], ['baz', 'qux']],

1781 ... names=['a', 'b'])

1782 >>> index.to_flat_index()

1783 Index([('foo', 'baz'), ('foo', 'qux'),

1784 ('bar', 'baz'), ('bar', 'qux')],

1785 dtype='object')

1786 """

1787 return Index(self._values, tupleize_cols=False)

1788

1789 def _is_lexsorted(self) -> bool:

1790 """

1791 Return True if the codes are lexicographically sorted.

1792

1793 Returns

1794 -------

1795 bool

1796

1797 Examples

1798 --------

1799 In the below examples, the first level of the MultiIndex is sorted because

1800 a<b<c, so there is no need to look at the next level.

1801

1802 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'],

1803 ... ['d', 'e', 'f']])._is_lexsorted()

1804 True

1805 >>> pd.MultiIndex.from_arrays([['a', 'b', 'c'],

1806 ... ['d', 'f', 'e']])._is_lexsorted()

1807 True

1808

1809 In case there is a tie, the lexicographical sorting looks

1810 at the next level of the MultiIndex.

1811

1812 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'b', 'c']])._is_lexsorted()

1813 True

1814 >>> pd.MultiIndex.from_arrays([[0, 1, 1], ['a', 'c', 'b']])._is_lexsorted()

1815 False

1816 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'],

1817 ... ['aa', 'bb', 'aa', 'bb']])._is_lexsorted()

1818 True

1819 >>> pd.MultiIndex.from_arrays([['a', 'a', 'b', 'b'],

1820 ... ['bb', 'aa', 'aa', 'bb']])._is_lexsorted()

1821 False

1822 """

1823 return self._lexsort_depth == self.nlevels

1824

1825 @cache_readonly

1826 def _lexsort_depth(self) -> int:

1827 """

1828 Compute and return the lexsort_depth, the number of levels of the

1829 MultiIndex that are sorted lexically

1830

1831 Returns

1832 -------

1833 int

1834 """

1835 if self.sortorder is not None:

1836 return self.sortorder

1837 return _lexsort_depth(self.codes, self.nlevels)

1838

1839 def _sort_levels_monotonic(self, raise_if_incomparable: bool = False) -> MultiIndex:

1840 """

1841 This is an *internal* function.

1842

1843 Create a new MultiIndex from the current to monotonically sorted

1844 items IN the levels. This does not actually make the entire MultiIndex

1845 monotonic, JUST the levels.

1846

1847 The resulting MultiIndex will have the same outward

1848 appearance, meaning the same .values and ordering. It will also

1849 be .equals() to the original.

1850

1851 Returns

1852 -------

1853 MultiIndex

1854

1855 Examples

1856 --------

1857 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],

1858 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])

1859 >>> mi

1860 MultiIndex([('a', 'bb'),

1861 ('a', 'aa'),

1862 ('b', 'bb'),

1863 ('b', 'aa')],

1864 )

1865

1866 >>> mi.sort_values()

1867 MultiIndex([('a', 'aa'),

1868 ('a', 'bb'),

1869 ('b', 'aa'),

1870 ('b', 'bb')],

1871 )

1872 """

1873 if self._is_lexsorted() and self.is_monotonic_increasing:

1874 return self

1875

1876 new_levels = []

1877 new_codes = []

1878

1879 for lev, level_codes in zip(self.levels, self.codes):

1880 if not lev.is_monotonic_increasing:

1881 try:

1882 # indexer to reorder the levels

1883 indexer = lev.argsort()

1884 except TypeError:

1885 if raise_if_incomparable:

1886 raise

1887 else:

1888 lev = lev.take(indexer)

1889

1890 # indexer to reorder the level codes

1891 indexer = ensure_platform_int(indexer)

1892 ri = lib.get_reverse_indexer(indexer, len(indexer))

1893 level_codes = algos.take_nd(ri, level_codes)

1894

1895 new_levels.append(lev)

1896 new_codes.append(level_codes)

1897

1898 return MultiIndex(

1899 new_levels,

1900 new_codes,

1901 names=self.names,

1902 sortorder=self.sortorder,

1903 verify_integrity=False,

1904 )

1905

1906 def remove_unused_levels(self) -> MultiIndex:

1907 """

1908 Create new MultiIndex from current that removes unused levels.

1909

1910 Unused level(s) means levels that are not expressed in the

1911 labels. The resulting MultiIndex will have the same outward

1912 appearance, meaning the same .values and ordering. It will

1913 also be .equals() to the original.

1914

1915 Returns

1916 -------

1917 MultiIndex

1918

1919 Examples

1920 --------

1921 >>> mi = pd.MultiIndex.from_product([range(2), list('ab')])

1922 >>> mi

1923 MultiIndex([(0, 'a'),

1924 (0, 'b'),

1925 (1, 'a'),

1926 (1, 'b')],

1927 )

1928

1929 >>> mi[2:]

1930 MultiIndex([(1, 'a'),

1931 (1, 'b')],

1932 )

1933

1934 The 0 from the first level is not represented

1935 and can be removed

1936

1937 >>> mi2 = mi[2:].remove_unused_levels()

1938 >>> mi2.levels

1939 FrozenList([[1], ['a', 'b']])

1940 """

1941 new_levels = []

1942 new_codes = []

1943

1944 changed = False

1945 for lev, level_codes in zip(self.levels, self.codes):

1946 # Since few levels are typically unused, bincount() is more

1947 # efficient than unique() - however it only accepts positive values

1948 # (and drops order):

1949 uniques = np.where(np.bincount(level_codes + 1) > 0)[0] - 1

1950 has_na = int(len(uniques) and (uniques[0] == -1))

1951

1952 if len(uniques) != len(lev) + has_na:

1953 if lev.isna().any() and len(uniques) == len(lev):

1954 break

1955 # We have unused levels

1956 changed = True

1957

1958 # Recalculate uniques, now preserving order.

1959 # Can easily be cythonized by exploiting the already existing

1960 # "uniques" and stop parsing "level_codes" when all items

1961 # are found:

1962 uniques = algos.unique(level_codes)

1963 if has_na:

1964 na_idx = np.where(uniques == -1)[0]

1965 # Just ensure that -1 is in first position:

1966 uniques[[0, na_idx[0]]] = uniques[[na_idx[0], 0]]

1967

1968 # codes get mapped from uniques to 0:len(uniques)

1969 # -1 (if present) is mapped to last position

1970 code_mapping = np.zeros(len(lev) + has_na)

1971 # ... and reassigned value -1:

1972 code_mapping[uniques] = np.arange(len(uniques)) - has_na

1973

1974 level_codes = code_mapping[level_codes]

1975

1976 # new levels are simple

1977 lev = lev.take(uniques[has_na:])

1978

1979 new_levels.append(lev)

1980 new_codes.append(level_codes)

1981

1982 result = self.view()

1983

1984 if changed:

1985 result._reset_identity()

1986 result._set_levels(new_levels, validate=False)

1987 result._set_codes(new_codes, validate=False)

1988

1989 return result

1990

1991 # --------------------------------------------------------------------

1992 # Pickling Methods

1993

1994 def __reduce__(self):

1995 """Necessary for making this object picklable"""

1996 d = {

1997 "levels": list(self.levels),

1998 "codes": list(self.codes),

1999 "sortorder": self.sortorder,

2000 "names": list(self.names),

2001 }

2002 return ibase._new_Index, (type(self), d), None

2003

2004 # --------------------------------------------------------------------

2005

2006 def __getitem__(self, key):

2007 if is_scalar(key):

2008 key = com.cast_scalar_indexer(key)

2009

2010 retval = []

2011 for lev, level_codes in zip(self.levels, self.codes):

2012 if level_codes[key] == -1:

2013 retval.append(np.nan)

2014 else:

2015 retval.append(lev[level_codes[key]])

2016

2017 return tuple(retval)

2018 else:

2019 # in general cannot be sure whether the result will be sorted

2020 sortorder = None

2021 if com.is_bool_indexer(key):

2022 key = np.asarray(key, dtype=bool)

2023 sortorder = self.sortorder

2024 elif isinstance(key, slice):

2025 if key.step is None or key.step > 0:

2026 sortorder = self.sortorder

2027 elif isinstance(key, Index):

2028 key = np.asarray(key)

2029

2030 new_codes = [level_codes[key] for level_codes in self.codes]

2031

2032 return MultiIndex(

2033 levels=self.levels,

2034 codes=new_codes,

2035 names=self.names,

2036 sortorder=sortorder,

2037 verify_integrity=False,

2038 )

2039

2040 def _getitem_slice(self: MultiIndex, slobj: slice) -> MultiIndex:

2041 """

2042 Fastpath for __getitem__ when we know we have a slice.

2043 """

2044 sortorder = None

2045 if slobj.step is None or slobj.step > 0:

2046 sortorder = self.sortorder

2047

2048 new_codes = [level_codes[slobj] for level_codes in self.codes]

2049

2050 return type(self)(

2051 levels=self.levels,

2052 codes=new_codes,

2053 names=self._names,

2054 sortorder=sortorder,

2055 verify_integrity=False,

2056 )

2057

2058 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)

2059 def take(

2060 self: MultiIndex,

2061 indices,

2062 axis: Axis = 0,

2063 allow_fill: bool = True,

2064 fill_value=None,

2065 **kwargs,

2066 ) -> MultiIndex:

2067 nv.validate_take((), kwargs)

2068 indices = ensure_platform_int(indices)

2069

2070 # only fill if we are passing a non-None fill_value

2071 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices)

2072

2073 na_value = -1

2074

2075 taken = [lab.take(indices) for lab in self.codes]

2076 if allow_fill:

2077 mask = indices == -1

2078 if mask.any():

2079 masked = []

2080 for new_label in taken:

2081 label_values = new_label

2082 label_values[mask] = na_value

2083 masked.append(np.asarray(label_values))

2084 taken = masked

2085

2086 return MultiIndex(

2087 levels=self.levels, codes=taken, names=self.names, verify_integrity=False

2088 )

2089

2090 def append(self, other):

2091 """

2092 Append a collection of Index options together.

2093

2094 Parameters

2095 ----------

2096 other : Index or list/tuple of indices

2097

2098 Returns

2099 -------

2100 Index

2101 The combined index.

2102

2103 Examples

2104 --------

2105 >>> mi = pd.MultiIndex.from_arrays([['a'], ['b']])

2106 >>> mi

2107 MultiIndex([('a', 'b')],

2108 )

2109 >>> mi.append(mi)

2110 MultiIndex([('a', 'b'), ('a', 'b')],

2111 )

2112 """

2113 if not isinstance(other, (list, tuple)):

2114 other = [other]

2115

2116 if all(

2117 (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other

2118 ):

2119 arrays, names = [], []

2120 for i in range(self.nlevels):

2121 label = self._get_level_values(i)

2122 appended = [o._get_level_values(i) for o in other]

2123 arrays.append(label.append(appended))

2124 single_label_name = all(label.name == x.name for x in appended)

2125 names.append(label.name if single_label_name else None)

2126 return MultiIndex.from_arrays(arrays, names=names)

2127

2128 to_concat = (self._values,) + tuple(k._values for k in other)

2129 new_tuples = np.concatenate(to_concat)

2130

2131 # if all(isinstance(x, MultiIndex) for x in other):

2132 try:

2133 # We only get here if other contains at least one index with tuples,

2134 # setting names to None automatically

2135 return MultiIndex.from_tuples(new_tuples)

2136 except (TypeError, IndexError):

2137 return Index(new_tuples)

2138

2139 def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:

2140 if len(args) == 0 and len(kwargs) == 0:

2141 # lexsort is significantly faster than self._values.argsort()

2142 target = self._sort_levels_monotonic(raise_if_incomparable=True)

2143 return lexsort_indexer(target._get_codes_for_sorting())

2144 return self._values.argsort(*args, **kwargs)

2145

2146 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)

2147 def repeat(self, repeats: int, axis=None) -> MultiIndex:

2148 nv.validate_repeat((), {"axis": axis})

2149 # error: Incompatible types in assignment (expression has type "ndarray",

2150 # variable has type "int")

2151 repeats = ensure_platform_int(repeats) # type: ignore[assignment]

2152 return MultiIndex(

2153 levels=self.levels,

2154 codes=[

2155 level_codes.view(np.ndarray).astype(np.intp, copy=False).repeat(repeats)

2156 for level_codes in self.codes

2157 ],

2158 names=self.names,

2159 sortorder=self.sortorder,

2160 verify_integrity=False,

2161 )

2162

2163 # error: Signature of "drop" incompatible with supertype "Index"

2164 def drop( # type: ignore[override]

2165 self,

2166 codes,

2167 level: Index | np.ndarray | Iterable[Hashable] | None = None,

2168 errors: IgnoreRaise = "raise",

2169 ) -> MultiIndex:

2170 """

2171 Make new MultiIndex with passed list of codes deleted.

2172

2173 Parameters

2174 ----------

2175 codes : array-like

2176 Must be a list of tuples when level is not specified.

2177 level : int or level name, default None

2178 errors : str, default 'raise'

2179

2180 Returns

2181 -------

2182 MultiIndex

2183 """

2184 if level is not None:

2185 return self._drop_from_level(codes, level, errors)

2186

2187 if not isinstance(codes, (np.ndarray, Index)):

2188 try:

2189 codes = com.index_labels_to_array(codes, dtype=np.dtype("object"))

2190 except ValueError:

2191 pass

2192

2193 inds = []

2194 for level_codes in codes:

2195 try:

2196 loc = self.get_loc(level_codes)

2197 # get_loc returns either an integer, a slice, or a boolean

2198 # mask

2199 if isinstance(loc, int):

2200 inds.append(loc)

2201 elif isinstance(loc, slice):

2202 step = loc.step if loc.step is not None else 1

2203 inds.extend(range(loc.start, loc.stop, step))

2204 elif com.is_bool_indexer(loc):

2205 if self._lexsort_depth == 0:

2206 warnings.warn(

2207 "dropping on a non-lexsorted multi-index "

2208 "without a level parameter may impact performance.",

2209 PerformanceWarning,

2210 stacklevel=find_stack_level(),

2211 )

2212 loc = loc.nonzero()[0]

2213 inds.extend(loc)

2214 else:

2215 msg = f"unsupported indexer of type {type(loc)}"

2216 raise AssertionError(msg)

2217 except KeyError:

2218 if errors != "ignore":

2219 raise

2220

2221 return self.delete(inds)

2222

2223 def _drop_from_level(

2224 self, codes, level, errors: IgnoreRaise = "raise"

2225 ) -> MultiIndex:

2226 codes = com.index_labels_to_array(codes)

2227 i = self._get_level_number(level)

2228 index = self.levels[i]

2229 values = index.get_indexer(codes)

2230 # If nan should be dropped it will equal -1 here. We have to check which values

2231 # are not nan and equal -1, this means they are missing in the index

2232 nan_codes = isna(codes)

2233 values[(np.equal(nan_codes, False)) & (values == -1)] = -2

2234 if index.shape[0] == self.shape[0]:

2235 values[np.equal(nan_codes, True)] = -2

2236

2237 not_found = codes[values == -2]

2238 if len(not_found) != 0 and errors != "ignore":

2239 raise KeyError(f"labels {not_found} not found in level")

2240 mask = ~algos.isin(self.codes[i], values)

2241

2242 return self[mask]

2243

2244 def swaplevel(self, i=-2, j=-1) -> MultiIndex:

2245 """

2246 Swap level i with level j.

2247

2248 Calling this method does not change the ordering of the values.

2249

2250 Parameters

2251 ----------

2252 i : int, str, default -2

2253 First level of index to be swapped. Can pass level name as string.

2254 Type of parameters can be mixed.

2255 j : int, str, default -1

2256 Second level of index to be swapped. Can pass level name as string.

2257 Type of parameters can be mixed.

2258

2259 Returns

2260 -------

2261 MultiIndex

2262 A new MultiIndex.

2263

2264 See Also

2265 --------

2266 Series.swaplevel : Swap levels i and j in a MultiIndex.

2267 DataFrame.swaplevel : Swap levels i and j in a MultiIndex on a

2268 particular axis.

2269

2270 Examples

2271 --------

2272 >>> mi = pd.MultiIndex(levels=[['a', 'b'], ['bb', 'aa']],

2273 ... codes=[[0, 0, 1, 1], [0, 1, 0, 1]])

2274 >>> mi

2275 MultiIndex([('a', 'bb'),

2276 ('a', 'aa'),

2277 ('b', 'bb'),

2278 ('b', 'aa')],

2279 )

2280 >>> mi.swaplevel(0, 1)

2281 MultiIndex([('bb', 'a'),

2282 ('aa', 'a'),

2283 ('bb', 'b'),

2284 ('aa', 'b')],

2285 )

2286 """

2287 new_levels = list(self.levels)

2288 new_codes = list(self.codes)

2289 new_names = list(self.names)

2290

2291 i = self._get_level_number(i)

2292 j = self._get_level_number(j)

2293

2294 new_levels[i], new_levels[j] = new_levels[j], new_levels[i]

2295 new_codes[i], new_codes[j] = new_codes[j], new_codes[i]

2296 new_names[i], new_names[j] = new_names[j], new_names[i]

2297

2298 return MultiIndex(

2299 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False

2300 )

2301

2302 def reorder_levels(self, order) -> MultiIndex:

2303 """

2304 Rearrange levels using input order. May not drop or duplicate levels.

2305

2306 Parameters

2307 ----------

2308 order : list of int or list of str

2309 List representing new level order. Reference level by number

2310 (position) or by key (label).

2311

2312 Returns

2313 -------

2314 MultiIndex

2315

2316 Examples

2317 --------

2318 >>> mi = pd.MultiIndex.from_arrays([[1, 2], [3, 4]], names=['x', 'y'])

2319 >>> mi

2320 MultiIndex([(1, 3),

2321 (2, 4)],

2322 names=['x', 'y'])

2323

2324 >>> mi.reorder_levels(order=[1, 0])

2325 MultiIndex([(3, 1),

2326 (4, 2)],

2327 names=['y', 'x'])

2328

2329 >>> mi.reorder_levels(order=['y', 'x'])

2330 MultiIndex([(3, 1),

2331 (4, 2)],

2332 names=['y', 'x'])

2333 """

2334 order = [self._get_level_number(i) for i in order]

2335 if len(order) != self.nlevels:

2336 raise AssertionError(

2337 f"Length of order must be same as number of levels ({self.nlevels}), "

2338 f"got {len(order)}"

2339 )

2340 new_levels = [self.levels[i] for i in order]

2341 new_codes = [self.codes[i] for i in order]

2342 new_names = [self.names[i] for i in order]

2343

2344 return MultiIndex(

2345 levels=new_levels, codes=new_codes, names=new_names, verify_integrity=False

2346 )

2347

2348 def _get_codes_for_sorting(self) -> list[Categorical]:

2349 """

2350 we are categorizing our codes by using the

2351 available categories (all, not just observed)

2352 excluding any missing ones (-1); this is in preparation

2353 for sorting, where we need to disambiguate that -1 is not

2354 a valid valid

2355 """

2356

2357 def cats(level_codes):

2358 return np.arange(

2359 np.array(level_codes).max() + 1 if len(level_codes) else 0,

2360 dtype=level_codes.dtype,

2361 )

2362

2363 return [

2364 Categorical.from_codes(level_codes, cats(level_codes), ordered=True)

2365 for level_codes in self.codes

2366 ]

2367

2368 def sortlevel(

2369 self,

2370 level: IndexLabel = 0,

2371 ascending: bool | list[bool] = True,

2372 sort_remaining: bool = True,

2373 ) -> tuple[MultiIndex, npt.NDArray[np.intp]]:

2374 """

2375 Sort MultiIndex at the requested level.

2376

2377 The result will respect the original ordering of the associated

2378 factor at that level.

2379

2380 Parameters

2381 ----------

2382 level : list-like, int or str, default 0

2383 If a string is given, must be a name of the level.

2384 If list-like must be names or ints of levels.

2385 ascending : bool, default True

2386 False to sort in descending order.

2387 Can also be a list to specify a directed ordering.

2388 sort_remaining : sort by the remaining levels after level

2389

2390 Returns

2391 -------

2392 sorted_index : pd.MultiIndex

2393 Resulting index.

2394 indexer : np.ndarray[np.intp]

2395 Indices of output values in original index.

2396

2397 Examples

2398 --------

2399 >>> mi = pd.MultiIndex.from_arrays([[0, 0], [2, 1]])

2400 >>> mi

2401 MultiIndex([(0, 2),

2402 (0, 1)],

2403 )

2404

2405 >>> mi.sortlevel()

2406 (MultiIndex([(0, 1),

2407 (0, 2)],

2408 ), array([1, 0]))

2409

2410 >>> mi.sortlevel(sort_remaining=False)

2411 (MultiIndex([(0, 2),

2412 (0, 1)],

2413 ), array([0, 1]))

2414

2415 >>> mi.sortlevel(1)

2416 (MultiIndex([(0, 1),

2417 (0, 2)],

2418 ), array([1, 0]))

2419

2420 >>> mi.sortlevel(1, ascending=False)

2421 (MultiIndex([(0, 2),

2422 (0, 1)],

2423 ), array([0, 1]))

2424 """

2425 if not is_list_like(level):

2426 level = [level]

2427 # error: Item "Hashable" of "Union[Hashable, Sequence[Hashable]]" has

2428 # no attribute "__iter__" (not iterable)

2429 level = [

2430 self._get_level_number(lev) for lev in level # type: ignore[union-attr]

2431 ]

2432 sortorder = None

2433

2434 # we have a directed ordering via ascending

2435 if isinstance(ascending, list):

2436 if not len(level) == len(ascending):

2437 raise ValueError("level must have same length as ascending")

2438

2439 indexer = lexsort_indexer(

2440 [self.codes[lev] for lev in level], orders=ascending

2441 )

2442

2443 # level ordering

2444 else:

2445 codes = list(self.codes)

2446 shape = list(self.levshape)

2447

2448 # partition codes and shape

2449 primary = tuple(codes[lev] for lev in level)

2450 primshp = tuple(shape[lev] for lev in level)

2451

2452 # Reverse sorted to retain the order of

2453 # smaller indices that needs to be removed

2454 for lev in sorted(level, reverse=True):

2455 codes.pop(lev)

2456 shape.pop(lev)

2457

2458 if sort_remaining:

2459 primary += primary + tuple(codes)

2460 primshp += primshp + tuple(shape)

2461 else:

2462 sortorder = level[0]

2463

2464 indexer = indexer_from_factorized(primary, primshp, compress=False)

2465

2466 if not ascending:

2467 indexer = indexer[::-1]

2468

2469 indexer = ensure_platform_int(indexer)

2470 new_codes = [level_codes.take(indexer) for level_codes in self.codes]

2471

2472 new_index = MultiIndex(

2473 codes=new_codes,

2474 levels=self.levels,

2475 names=self.names,

2476 sortorder=sortorder,

2477 verify_integrity=False,

2478 )

2479

2480 return new_index, indexer

2481

2482 def _wrap_reindex_result(self, target, indexer, preserve_names: bool):

2483 if not isinstance(target, MultiIndex):

2484 if indexer is None:

2485 target = self

2486 elif (indexer >= 0).all():

2487 target = self.take(indexer)

2488 else:

2489 try:

2490 target = MultiIndex.from_tuples(target)

2491 except TypeError:

2492 # not all tuples, see test_constructor_dict_multiindex_reindex_flat

2493 return target

2494

2495 target = self._maybe_preserve_names(target, preserve_names)

2496 return target

2497

2498 def _maybe_preserve_names(self, target: Index, preserve_names: bool) -> Index:

2499 if (

2500 preserve_names

2501 and target.nlevels == self.nlevels

2502 and target.names != self.names

2503 ):

2504 target = target.copy(deep=False)

2505 target.names = self.names

2506 return target

2507

2508 # --------------------------------------------------------------------

2509 # Indexing Methods

2510

2511 def _check_indexing_error(self, key) -> None:

2512 if not is_hashable(key) or is_iterator(key):

2513 # We allow tuples if they are hashable, whereas other Index

2514 # subclasses require scalar.

2515 # We have to explicitly exclude generators, as these are hashable.

2516 raise InvalidIndexError(key)

2517

2518 @cache_readonly

2519 def _should_fallback_to_positional(self) -> bool:

2520 """

2521 Should integer key(s) be treated as positional?

2522 """

2523 # GH#33355

2524 return self.levels[0]._should_fallback_to_positional

2525

2526 def _get_indexer_strict(

2527 self, key, axis_name: str

2528 ) -> tuple[Index, npt.NDArray[np.intp]]:

2529 keyarr = key

2530 if not isinstance(keyarr, Index):

2531 keyarr = com.asarray_tuplesafe(keyarr)

2532

2533 if len(keyarr) and not isinstance(keyarr[0], tuple):

2534 indexer = self._get_indexer_level_0(keyarr)

2535

2536 self._raise_if_missing(key, indexer, axis_name)

2537 return self[indexer], indexer

2538

2539 return super()._get_indexer_strict(key, axis_name)

2540

2541 def _raise_if_missing(self, key, indexer, axis_name: str) -> None:

2542 keyarr = key

2543 if not isinstance(key, Index):

2544 keyarr = com.asarray_tuplesafe(key)

2545

2546 if len(keyarr) and not isinstance(keyarr[0], tuple):

2547 # i.e. same condition for special case in MultiIndex._get_indexer_strict

2548

2549 mask = indexer == -1

2550 if mask.any():

2551 check = self.levels[0].get_indexer(keyarr)

2552 cmask = check == -1

2553 if cmask.any():

2554 raise KeyError(f"{keyarr[cmask]} not in index")

2555 # We get here when levels still contain values which are not

2556 # actually in Index anymore

2557 raise KeyError(f"{keyarr} not in index")

2558 else:

2559 return super()._raise_if_missing(key, indexer, axis_name)

2560

2561 def _get_indexer_level_0(self, target) -> npt.NDArray[np.intp]:

2562 """

2563 Optimized equivalent to `self.get_level_values(0).get_indexer_for(target)`.

2564 """

2565 lev = self.levels[0]

2566 codes = self._codes[0]

2567 cat = Categorical.from_codes(codes=codes, categories=lev)

2568 ci = Index(cat)

2569 return ci.get_indexer_for(target)

2570

2571 def get_slice_bound(

2572 self,

2573 label: Hashable | Sequence[Hashable],

2574 side: Literal["left", "right"],

2575 ) -> int:

2576 """

2577 For an ordered MultiIndex, compute slice bound

2578 that corresponds to given label.

2579

2580 Returns leftmost (one-past-the-rightmost if `side=='right') position

2581 of given label.

2582

2583 Parameters

2584 ----------

2585 label : object or tuple of objects

2586 side : {'left', 'right'}

2587

2588 Returns

2589 -------

2590 int

2591 Index of label.

2592

2593 Notes

2594 -----

2595 This method only works if level 0 index of the MultiIndex is lexsorted.

2596

2597 Examples

2598 --------

2599 >>> mi = pd.MultiIndex.from_arrays([list('abbc'), list('gefd')])

2600

2601 Get the locations from the leftmost 'b' in the first level

2602 until the end of the multiindex:

2603

2604 >>> mi.get_slice_bound('b', side="left")

2605 1

2606

2607 Like above, but if you get the locations from the rightmost

2608 'b' in the first level and 'f' in the second level:

2609

2610 >>> mi.get_slice_bound(('b','f'), side="right")

2611 3

2612

2613 See Also

2614 --------

2615 MultiIndex.get_loc : Get location for a label or a tuple of labels.

2616 MultiIndex.get_locs : Get location for a label/slice/list/mask or a

2617 sequence of such.

2618 """

2619 if not isinstance(label, tuple):

2620 label = (label,)

2621 return self._partial_tup_index(label, side=side)

2622

2623 # pylint: disable-next=useless-parent-delegation

2624 def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]:

2625 """

2626 For an ordered MultiIndex, compute the slice locations for input

2627 labels.

2628

2629 The input labels can be tuples representing partial levels, e.g. for a

2630 MultiIndex with 3 levels, you can pass a single value (corresponding to

2631 the first level), or a 1-, 2-, or 3-tuple.

2632

2633 Parameters

2634 ----------

2635 start : label or tuple, default None

2636 If None, defaults to the beginning

2637 end : label or tuple

2638 If None, defaults to the end

2639 step : int or None

2640 Slice step

2641

2642 Returns

2643 -------

2644 (start, end) : (int, int)

2645

2646 Notes

2647 -----

2648 This method only works if the MultiIndex is properly lexsorted. So,

2649 if only the first 2 levels of a 3-level MultiIndex are lexsorted,

2650 you can only pass two levels to ``.slice_locs``.

2651

2652 Examples

2653 --------

2654 >>> mi = pd.MultiIndex.from_arrays([list('abbd'), list('deff')],

2655 ... names=['A', 'B'])

2656

2657 Get the slice locations from the beginning of 'b' in the first level

2658 until the end of the multiindex:

2659

2660 >>> mi.slice_locs(start='b')

2661 (1, 4)

2662

2663 Like above, but stop at the end of 'b' in the first level and 'f' in

2664 the second level:

2665

2666 >>> mi.slice_locs(start='b', end=('b', 'f'))

2667 (1, 3)

2668

2669 See Also

2670 --------

2671 MultiIndex.get_loc : Get location for a label or a tuple of labels.

2672 MultiIndex.get_locs : Get location for a label/slice/list/mask or a

2673 sequence of such.

2674 """

2675 # This function adds nothing to its parent implementation (the magic

2676 # happens in get_slice_bound method), but it adds meaningful doc.

2677 return super().slice_locs(start, end, step)

2678

2679 def _partial_tup_index(self, tup: tuple, side: Literal["left", "right"] = "left"):

2680 if len(tup) > self._lexsort_depth:

2681 raise UnsortedIndexError(

2682 f"Key length ({len(tup)}) was greater than MultiIndex lexsort depth "

2683 f"({self._lexsort_depth})"

2684 )

2685

2686 n = len(tup)

2687 start, end = 0, len(self)

2688 zipped = zip(tup, self.levels, self.codes)

2689 for k, (lab, lev, level_codes) in enumerate(zipped):

2690 section = level_codes[start:end]

2691

2692 if lab not in lev and not isna(lab):

2693 # short circuit

2694 try:

2695 loc = algos.searchsorted(lev, lab, side=side)

2696 except TypeError as err:

2697 # non-comparable e.g. test_slice_locs_with_type_mismatch

2698 raise TypeError(f"Level type mismatch: {lab}") from err

2699 if not is_integer(loc):

2700 # non-comparable level, e.g. test_groupby_example

2701 raise TypeError(f"Level type mismatch: {lab}")

2702 if side == "right" and loc >= 0:

2703 loc -= 1

2704 return start + algos.searchsorted(section, loc, side=side)

2705

2706 idx = self._get_loc_single_level_index(lev, lab)

2707 if isinstance(idx, slice) and k < n - 1:

2708 # Get start and end value from slice, necessary when a non-integer

2709 # interval is given as input GH#37707

2710 start = idx.start

2711 end = idx.stop

2712 elif k < n - 1:

2713 # error: Incompatible types in assignment (expression has type

2714 # "Union[ndarray[Any, dtype[signedinteger[Any]]]

2715 end = start + algos.searchsorted( # type: ignore[assignment]

2716 section, idx, side="right"

2717 )

2718 # error: Incompatible types in assignment (expression has type

2719 # "Union[ndarray[Any, dtype[signedinteger[Any]]]

2720 start = start + algos.searchsorted( # type: ignore[assignment]

2721 section, idx, side="left"

2722 )

2723 elif isinstance(idx, slice):

2724 idx = idx.start

2725 return start + algos.searchsorted(section, idx, side=side)

2726 else:

2727 return start + algos.searchsorted(section, idx, side=side)

2728

2729 def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int:

2730 """

2731 If key is NA value, location of index unify as -1.

2732

2733 Parameters

2734 ----------

2735 level_index: Index

2736 key : label

2737

2738 Returns

2739 -------

2740 loc : int

2741 If key is NA value, loc is -1

2742 Else, location of key in index.

2743

2744 See Also

2745 --------

2746 Index.get_loc : The get_loc method for (single-level) index.

2747 """

2748 if is_scalar(key) and isna(key):

2749 # TODO: need is_valid_na_for_dtype(key, level_index.dtype)

2750 return -1

2751 else:

2752 return level_index.get_loc(key)

2753

2754 def get_loc(self, key):

2755 """

2756 Get location for a label or a tuple of labels.

2757

2758 The location is returned as an integer/slice or boolean

2759 mask.

2760

2761 Parameters

2762 ----------

2763 key : label or tuple of labels (one for each level)

2764

2765 Returns

2766 -------

2767 int, slice object or boolean mask

2768 If the key is past the lexsort depth, the return may be a

2769 boolean mask array, otherwise it is always a slice or int.

2770

2771 See Also

2772 --------

2773 Index.get_loc : The get_loc method for (single-level) index.

2774 MultiIndex.slice_locs : Get slice location given start label(s) and

2775 end label(s).

2776 MultiIndex.get_locs : Get location for a label/slice/list/mask or a

2777 sequence of such.

2778

2779 Notes

2780 -----

2781 The key cannot be a slice, list of same-level labels, a boolean mask,

2782 or a sequence of such. If you want to use those, use

2783 :meth:`MultiIndex.get_locs` instead.

2784

2785 Examples

2786 --------

2787 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])

2788

2789 >>> mi.get_loc('b')

2790 slice(1, 3, None)

2791

2792 >>> mi.get_loc(('b', 'e'))

2793 1

2794 """

2795 self._check_indexing_error(key)

2796

2797 def _maybe_to_slice(loc):

2798 """convert integer indexer to boolean mask or slice if possible"""

2799 if not isinstance(loc, np.ndarray) or loc.dtype != np.intp:

2800 return loc

2801

2802 loc = lib.maybe_indices_to_slice(loc, len(self))

2803 if isinstance(loc, slice):

2804 return loc

2805

2806 mask = np.empty(len(self), dtype="bool")

2807 mask.fill(False)

2808 mask[loc] = True

2809 return mask

2810

2811 if not isinstance(key, tuple):

2812 loc = self._get_level_indexer(key, level=0)

2813 return _maybe_to_slice(loc)

2814

2815 keylen = len(key)

2816 if self.nlevels < keylen:

2817 raise KeyError(

2818 f"Key length ({keylen}) exceeds index depth ({self.nlevels})"

2819 )

2820

2821 if keylen == self.nlevels and self.is_unique:

2822 # TODO: what if we have an IntervalIndex level?

2823 # i.e. do we need _index_as_unique on that level?

2824 try:

2825 return self._engine.get_loc(key)

2826 except TypeError:

2827 # e.g. test_partial_slicing_with_multiindex partial string slicing

2828 loc, _ = self.get_loc_level(key, list(range(self.nlevels)))

2829 return loc

2830

2831 # -- partial selection or non-unique index

2832 # break the key into 2 parts based on the lexsort_depth of the index;

2833 # the first part returns a continuous slice of the index; the 2nd part

2834 # needs linear search within the slice

2835 i = self._lexsort_depth

2836 lead_key, follow_key = key[:i], key[i:]

2837

2838 if not lead_key:

2839 start = 0

2840 stop = len(self)

2841 else:

2842 try:

2843 start, stop = self.slice_locs(lead_key, lead_key)

2844 except TypeError as err:

2845 # e.g. test_groupby_example key = ((0, 0, 1, 2), "new_col")

2846 # when self has 5 integer levels

2847 raise KeyError(key) from err

2848

2849 if start == stop:

2850 raise KeyError(key)

2851

2852 if not follow_key:

2853 return slice(start, stop)

2854

2855 warnings.warn(

2856 "indexing past lexsort depth may impact performance.",

2857 PerformanceWarning,

2858 stacklevel=find_stack_level(),

2859 )

2860

2861 loc = np.arange(start, stop, dtype=np.intp)

2862

2863 for i, k in enumerate(follow_key, len(lead_key)):

2864 mask = self.codes[i][loc] == self._get_loc_single_level_index(

2865 self.levels[i], k

2866 )

2867 if not mask.all():

2868 loc = loc[mask]

2869 if not len(loc):

2870 raise KeyError(key)

2871

2872 return _maybe_to_slice(loc) if len(loc) != stop - start else slice(start, stop)

2873

2874 def get_loc_level(self, key, level: IndexLabel = 0, drop_level: bool = True):

2875 """

2876 Get location and sliced index for requested label(s)/level(s).

2877

2878 Parameters

2879 ----------

2880 key : label or sequence of labels

2881 level : int/level name or list thereof, optional

2882 drop_level : bool, default True

2883 If ``False``, the resulting index will not drop any level.

2884

2885 Returns

2886 -------

2887 tuple

2888 A 2-tuple where the elements :

2889

2890 Element 0: int, slice object or boolean array.

2891

2892 Element 1: The resulting sliced multiindex/index. If the key

2893 contains all levels, this will be ``None``.

2894

2895 See Also

2896 --------

2897 MultiIndex.get_loc : Get location for a label or a tuple of labels.

2898 MultiIndex.get_locs : Get location for a label/slice/list/mask or a

2899 sequence of such.

2900

2901 Examples

2902 --------

2903 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')],

2904 ... names=['A', 'B'])

2905

2906 >>> mi.get_loc_level('b')

2907 (slice(1, 3, None), Index(['e', 'f'], dtype='object', name='B'))

2908

2909 >>> mi.get_loc_level('e', level='B')

2910 (array([False, True, False]), Index(['b'], dtype='object', name='A'))

2911

2912 >>> mi.get_loc_level(['b', 'e'])

2913 (1, None)

2914 """

2915 if not isinstance(level, (list, tuple)):

2916 level = self._get_level_number(level)

2917 else:

2918 level = [self._get_level_number(lev) for lev in level]

2919

2920 loc, mi = self._get_loc_level(key, level=level)

2921 if not drop_level:

2922 if lib.is_integer(loc):

2923 mi = self[loc : loc + 1]

2924 else:

2925 mi = self[loc]

2926 return loc, mi

2927

2928 def _get_loc_level(self, key, level: int | list[int] = 0):

2929 """

2930 get_loc_level but with `level` known to be positional, not name-based.

2931 """

2932

2933 # different name to distinguish from maybe_droplevels

2934 def maybe_mi_droplevels(indexer, levels):

2935 """

2936 If level does not exist or all levels were dropped, the exception

2937 has to be handled outside.

2938 """

2939 new_index = self[indexer]

2940

2941 for i in sorted(levels, reverse=True):

2942 new_index = new_index._drop_level_numbers([i])

2943

2944 return new_index

2945

2946 if isinstance(level, (tuple, list)):

2947 if len(key) != len(level):

2948 raise AssertionError(

2949 "Key for location must have same length as number of levels"

2950 )

2951 result = None

2952 for lev, k in zip(level, key):

2953 loc, new_index = self._get_loc_level(k, level=lev)

2954 if isinstance(loc, slice):

2955 mask = np.zeros(len(self), dtype=bool)

2956 mask[loc] = True

2957 loc = mask

2958 result = loc if result is None else result & loc

2959

2960 try:

2961 # FIXME: we should be only dropping levels on which we are

2962 # scalar-indexing

2963 mi = maybe_mi_droplevels(result, level)

2964 except ValueError:

2965 # droplevel failed because we tried to drop all levels,

2966 # i.e. len(level) == self.nlevels

2967 mi = self[result]

2968

2969 return result, mi

2970

2971 # kludge for #1796

2972 if isinstance(key, list):

2973 key = tuple(key)

2974

2975 if isinstance(key, tuple) and level == 0:

2976 try:

2977 # Check if this tuple is a single key in our first level

2978 if key in self.levels[0]:

2979 indexer = self._get_level_indexer(key, level=level)

2980 new_index = maybe_mi_droplevels(indexer, [0])

2981 return indexer, new_index

2982 except (TypeError, InvalidIndexError):

2983 pass

2984

2985 if not any(isinstance(k, slice) for k in key):

2986 if len(key) == self.nlevels and self.is_unique:

2987 # Complete key in unique index -> standard get_loc

2988 try:

2989 return (self._engine.get_loc(key), None)

2990 except KeyError as err:

2991 raise KeyError(key) from err

2992 except TypeError:

2993 # e.g. partial string indexing

2994 # test_partial_string_timestamp_multiindex

2995 pass

2996

2997 # partial selection

2998 indexer = self.get_loc(key)

2999 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)]

3000 if len(ilevels) == self.nlevels:

3001 if is_integer(indexer):

3002 # we are dropping all levels

3003 return indexer, None

3004

3005 # TODO: in some cases we still need to drop some levels,

3006 # e.g. test_multiindex_perf_warn

3007 # test_partial_string_timestamp_multiindex

3008 ilevels = [

3009 i

3010 for i in range(len(key))

3011 if (

3012 not isinstance(key[i], str)

3013 or not self.levels[i]._supports_partial_string_indexing

3014 )

3015 and key[i] != slice(None, None)

3016 ]

3017 if len(ilevels) == self.nlevels:

3018 # TODO: why?

3019 ilevels = []

3020 return indexer, maybe_mi_droplevels(indexer, ilevels)

3021

3022 else:

3023 indexer = None

3024 for i, k in enumerate(key):

3025 if not isinstance(k, slice):

3026 loc_level = self._get_level_indexer(k, level=i)

3027 if isinstance(loc_level, slice):

3028 if com.is_null_slice(loc_level) or com.is_full_slice(

3029 loc_level, len(self)

3030 ):

3031 # everything

3032 continue

3033

3034 # e.g. test_xs_IndexSlice_argument_not_implemented

3035 k_index = np.zeros(len(self), dtype=bool)

3036 k_index[loc_level] = True

3037

3038 else:

3039 k_index = loc_level

3040

3041 elif com.is_null_slice(k):

3042 # taking everything, does not affect `indexer` below

3043 continue

3044

3045 else:

3046 # FIXME: this message can be inaccurate, e.g.

3047 # test_series_varied_multiindex_alignment

3048 raise TypeError(f"Expected label or tuple of labels, got {key}")

3049

3050 if indexer is None:

3051 indexer = k_index

3052 else:

3053 indexer &= k_index

3054 if indexer is None:

3055 indexer = slice(None, None)

3056 ilevels = [i for i in range(len(key)) if key[i] != slice(None, None)]

3057 return indexer, maybe_mi_droplevels(indexer, ilevels)

3058 else:

3059 indexer = self._get_level_indexer(key, level=level)

3060 if (

3061 isinstance(key, str)

3062 and self.levels[level]._supports_partial_string_indexing

3063 ):

3064 # check to see if we did an exact lookup vs sliced

3065 check = self.levels[level].get_loc(key)

3066 if not is_integer(check):

3067 # e.g. test_partial_string_timestamp_multiindex

3068 return indexer, self[indexer]

3069

3070 try:

3071 result_index = maybe_mi_droplevels(indexer, [level])

3072 except ValueError:

3073 result_index = self[indexer]

3074

3075 return indexer, result_index

3076

3077 def _get_level_indexer(

3078 self, key, level: int = 0, indexer: npt.NDArray[np.bool_] | None = None

3079 ):

3080 # `level` kwarg is _always_ positional, never name

3081 # return a boolean array or slice showing where the key is

3082 # in the totality of values

3083 # if the indexer is provided, then use this

3084

3085 level_index = self.levels[level]

3086 level_codes = self.codes[level]

3087

3088 def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):

3089 # Compute a bool indexer to identify the positions to take.

3090 # If we have an existing indexer, we only need to examine the

3091 # subset of positions where the existing indexer is True.

3092 if indexer is not None:

3093 # we only need to look at the subset of codes where the

3094 # existing indexer equals True

3095 codes = codes[indexer]

3096

3097 if step is None or step == 1:

3098 new_indexer = (codes >= start) & (codes < stop)

3099 else:

3100 r = np.arange(start, stop, step, dtype=codes.dtype)

3101 new_indexer = algos.isin(codes, r)

3102

3103 if indexer is None:

3104 return new_indexer

3105

3106 indexer = indexer.copy()

3107 indexer[indexer] = new_indexer

3108 return indexer

3109

3110 if isinstance(key, slice):

3111 # handle a slice, returning a slice if we can

3112 # otherwise a boolean indexer

3113 step = key.step

3114 is_negative_step = step is not None and step < 0

3115

3116 try:

3117 if key.start is not None:

3118 start = level_index.get_loc(key.start)

3119 elif is_negative_step:

3120 start = len(level_index) - 1

3121 else:

3122 start = 0

3123

3124 if key.stop is not None:

3125 stop = level_index.get_loc(key.stop)

3126 elif is_negative_step:

3127 stop = 0

3128 elif isinstance(start, slice):

3129 stop = len(level_index)

3130 else:

3131 stop = len(level_index) - 1

3132 except KeyError:

3133 # we have a partial slice (like looking up a partial date

3134 # string)

3135 start = stop = level_index.slice_indexer(key.start, key.stop, key.step)

3136 step = start.step

3137

3138 if isinstance(start, slice) or isinstance(stop, slice):

3139 # we have a slice for start and/or stop

3140 # a partial date slicer on a DatetimeIndex generates a slice

3141 # note that the stop ALREADY includes the stopped point (if

3142 # it was a string sliced)

3143 start = getattr(start, "start", start)

3144 stop = getattr(stop, "stop", stop)

3145 return convert_indexer(start, stop, step)

3146

3147 elif level > 0 or self._lexsort_depth == 0 or step is not None:

3148 # need to have like semantics here to right

3149 # searching as when we are using a slice

3150 # so adjust the stop by 1 (so we include stop)

3151 stop = (stop - 1) if is_negative_step else (stop + 1)

3152 return convert_indexer(start, stop, step)

3153 else:

3154 # sorted, so can return slice object -> view

3155 i = algos.searchsorted(level_codes, start, side="left")

3156 j = algos.searchsorted(level_codes, stop, side="right")

3157 return slice(i, j, step)

3158

3159 else:

3160 idx = self._get_loc_single_level_index(level_index, key)

3161

3162 if level > 0 or self._lexsort_depth == 0:

3163 # Desired level is not sorted

3164 if isinstance(idx, slice):

3165 # test_get_loc_partial_timestamp_multiindex

3166 locs = (level_codes >= idx.start) & (level_codes < idx.stop)

3167 return locs

3168

3169 locs = np.array(level_codes == idx, dtype=bool, copy=False)

3170

3171 if not locs.any():

3172 # The label is present in self.levels[level] but unused:

3173 raise KeyError(key)

3174 return locs

3175

3176 if isinstance(idx, slice):

3177 # e.g. test_partial_string_timestamp_multiindex

3178 start = algos.searchsorted(level_codes, idx.start, side="left")

3179 # NB: "left" here bc of slice semantics

3180 end = algos.searchsorted(level_codes, idx.stop, side="left")

3181 else:

3182 start = algos.searchsorted(level_codes, idx, side="left")

3183 end = algos.searchsorted(level_codes, idx, side="right")

3184

3185 if start == end:

3186 # The label is present in self.levels[level] but unused:

3187 raise KeyError(key)

3188 return slice(start, end)

3189

3190 def get_locs(self, seq):

3191 """

3192 Get location for a sequence of labels.

3193

3194 Parameters

3195 ----------

3196 seq : label, slice, list, mask or a sequence of such

3197 You should use one of the above for each level.

3198 If a level should not be used, set it to ``slice(None)``.

3199

3200 Returns

3201 -------

3202 numpy.ndarray

3203 NumPy array of integers suitable for passing to iloc.

3204

3205 See Also

3206 --------

3207 MultiIndex.get_loc : Get location for a label or a tuple of labels.

3208 MultiIndex.slice_locs : Get slice location given start label(s) and

3209 end label(s).

3210

3211 Examples

3212 --------

3213 >>> mi = pd.MultiIndex.from_arrays([list('abb'), list('def')])

3214

3215 >>> mi.get_locs('b') # doctest: +SKIP

3216 array([1, 2], dtype=int64)

3217

3218 >>> mi.get_locs([slice(None), ['e', 'f']]) # doctest: +SKIP

3219 array([1, 2], dtype=int64)

3220

3221 >>> mi.get_locs([[True, False, True], slice('e', 'f')]) # doctest: +SKIP

3222 array([2], dtype=int64)

3223 """

3224

3225 # must be lexsorted to at least as many levels

3226 true_slices = [i for (i, s) in enumerate(com.is_true_slices(seq)) if s]

3227 if true_slices and true_slices[-1] >= self._lexsort_depth:

3228 raise UnsortedIndexError(

3229 "MultiIndex slicing requires the index to be lexsorted: slicing "

3230 f"on levels {true_slices}, lexsort depth {self._lexsort_depth}"

3231 )

3232

3233 if any(x is Ellipsis for x in seq):

3234 raise NotImplementedError(

3235 "MultiIndex does not support indexing with Ellipsis"

3236 )

3237

3238 n = len(self)

3239

3240 def _to_bool_indexer(indexer) -> npt.NDArray[np.bool_]:

3241 if isinstance(indexer, slice):

3242 new_indexer = np.zeros(n, dtype=np.bool_)

3243 new_indexer[indexer] = True

3244 return new_indexer

3245 return indexer

3246

3247 # a bool indexer for the positions we want to take

3248 indexer: npt.NDArray[np.bool_] | None = None

3249

3250 for i, k in enumerate(seq):

3251 lvl_indexer: npt.NDArray[np.bool_] | slice | None = None

3252

3253 if com.is_bool_indexer(k):

3254 if len(k) != n:

3255 raise ValueError(

3256 "cannot index with a boolean indexer that "

3257 "is not the same length as the index"

3258 )

3259 lvl_indexer = np.asarray(k)

3260

3261 elif is_list_like(k):

3262 # a collection of labels to include from this level (these are or'd)

3263

3264 # GH#27591 check if this is a single tuple key in the level

3265 try:

3266 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer)

3267 except (InvalidIndexError, TypeError, KeyError) as err:

3268 # InvalidIndexError e.g. non-hashable, fall back to treating

3269 # this as a sequence of labels

3270 # KeyError it can be ambiguous if this is a label or sequence

3271 # of labels

3272 # github.com/pandas-dev/pandas/issues/39424#issuecomment-871626708

3273 for x in k:

3274 if not is_hashable(x):

3275 # e.g. slice

3276 raise err

3277 # GH 39424: Ignore not founds

3278 # GH 42351: No longer ignore not founds & enforced in 2.0

3279 # TODO: how to handle IntervalIndex level? (no test cases)

3280 item_indexer = self._get_level_indexer(

3281 x, level=i, indexer=indexer

3282 )

3283 if lvl_indexer is None:

3284 lvl_indexer = _to_bool_indexer(item_indexer)

3285 elif isinstance(item_indexer, slice):

3286 lvl_indexer[item_indexer] = True # type: ignore[index]

3287 else:

3288 lvl_indexer |= item_indexer

3289

3290 if lvl_indexer is None:

3291 # no matches we are done

3292 # test_loc_getitem_duplicates_multiindex_empty_indexer

3293 return np.array([], dtype=np.intp)

3294

3295 elif com.is_null_slice(k):

3296 # empty slice

3297 if indexer is None and i == len(seq) - 1:

3298 return np.arange(n, dtype=np.intp)

3299 continue

3300

3301 else:

3302 # a slice or a single label

3303 lvl_indexer = self._get_level_indexer(k, level=i, indexer=indexer)

3304

3305 # update indexer

3306 lvl_indexer = _to_bool_indexer(lvl_indexer)

3307 if indexer is None:

3308 indexer = lvl_indexer

3309 else:

3310 indexer &= lvl_indexer

3311 if not np.any(indexer) and np.any(lvl_indexer):

3312 raise KeyError(seq)

3313

3314 # empty indexer

3315 if indexer is None:

3316 return np.array([], dtype=np.intp)

3317

3318 pos_indexer = indexer.nonzero()[0]

3319 return self._reorder_indexer(seq, pos_indexer)

3320

3321 # --------------------------------------------------------------------

3322

3323 def _reorder_indexer(

3324 self,

3325 seq: tuple[Scalar | Iterable | AnyArrayLike, ...],

3326 indexer: npt.NDArray[np.intp],

3327 ) -> npt.NDArray[np.intp]:

3328 """

3329 Reorder an indexer of a MultiIndex (self) so that the labels are in the

3330 same order as given in seq

3331

3332 Parameters

3333 ----------

3334 seq : label/slice/list/mask or a sequence of such

3335 indexer: a position indexer of self

3336

3337 Returns

3338 -------

3339 indexer : a sorted position indexer of self ordered as seq

3340 """

3341

3342 # check if sorting is necessary

3343 need_sort = False

3344 for i, k in enumerate(seq):

3345 if com.is_null_slice(k) or com.is_bool_indexer(k) or is_scalar(k):

3346 pass

3347 elif is_list_like(k):

3348 if len(k) <= 1: # type: ignore[arg-type]

3349 pass

3350 elif self._is_lexsorted():

3351 # If the index is lexsorted and the list_like label

3352 # in seq are sorted then we do not need to sort

3353 k_codes = self.levels[i].get_indexer(k)

3354 k_codes = k_codes[k_codes >= 0] # Filter absent keys

3355 # True if the given codes are not ordered

3356 need_sort = (k_codes[:-1] > k_codes[1:]).any()

3357 else:

3358 need_sort = True

3359 elif isinstance(k, slice):

3360 if self._is_lexsorted():

3361 need_sort = k.step is not None and k.step < 0

3362 else:

3363 need_sort = True

3364 else:

3365 need_sort = True

3366 if need_sort:

3367 break

3368 if not need_sort:

3369 return indexer

3370

3371 n = len(self)

3372 keys: tuple[np.ndarray, ...] = ()

3373 # For each level of the sequence in seq, map the level codes with the

3374 # order they appears in a list-like sequence

3375 # This mapping is then use to reorder the indexer

3376 for i, k in enumerate(seq):

3377 if is_scalar(k):

3378 # GH#34603 we want to treat a scalar the same as an all equal list

3379 k = [k]

3380 if com.is_bool_indexer(k):

3381 new_order = np.arange(n)[indexer]

3382 elif is_list_like(k):

3383 # Generate a map with all level codes as sorted initially

3384 k = algos.unique(k)

3385 key_order_map = np.ones(len(self.levels[i]), dtype=np.uint64) * len(

3386 self.levels[i]

3387 )

3388 # Set order as given in the indexer list

3389 level_indexer = self.levels[i].get_indexer(k)

3390 level_indexer = level_indexer[level_indexer >= 0] # Filter absent keys

3391 key_order_map[level_indexer] = np.arange(len(level_indexer))

3392

3393 new_order = key_order_map[self.codes[i][indexer]]

3394 elif isinstance(k, slice) and k.step is not None and k.step < 0:

3395 # flip order for negative step

3396 new_order = np.arange(n)[::-1][indexer]

3397 elif isinstance(k, slice) and k.start is None and k.stop is None:

3398 # slice(None) should not determine order GH#31330

3399 new_order = np.ones((n,), dtype=np.intp)[indexer]

3400 else:

3401 # For all other case, use the same order as the level

3402 new_order = np.arange(n)[indexer]

3403 keys = (new_order,) + keys

3404

3405 # Find the reordering using lexsort on the keys mapping

3406 ind = np.lexsort(keys)

3407 return indexer[ind]

3408

3409 def truncate(self, before=None, after=None) -> MultiIndex:

3410 """

3411 Slice index between two labels / tuples, return new MultiIndex.

3412

3413 Parameters

3414 ----------

3415 before : label or tuple, can be partial. Default None

3416 None defaults to start.

3417 after : label or tuple, can be partial. Default None

3418 None defaults to end.

3419

3420 Returns

3421 -------

3422 MultiIndex

3423 The truncated MultiIndex.

3424

3425 Examples

3426 --------

3427 >>> mi = pd.MultiIndex.from_arrays([['a', 'b', 'c'], ['x', 'y', 'z']])

3428 >>> mi

3429 MultiIndex([('a', 'x'), ('b', 'y'), ('c', 'z')],

3430 )

3431 >>> mi.truncate(before='a', after='b')

3432 MultiIndex([('a', 'x'), ('b', 'y')],

3433 )

3434 """

3435 if after and before and after < before:

3436 raise ValueError("after < before")

3437

3438 i, j = self.levels[0].slice_locs(before, after)

3439 left, right = self.slice_locs(before, after)

3440

3441 new_levels = list(self.levels)

3442 new_levels[0] = new_levels[0][i:j]

3443

3444 new_codes = [level_codes[left:right] for level_codes in self.codes]

3445 new_codes[0] = new_codes[0] - i

3446

3447 return MultiIndex(

3448 levels=new_levels,

3449 codes=new_codes,

3450 names=self._names,

3451 verify_integrity=False,

3452 )

3453

3454 def equals(self, other: object) -> bool:

3455 """

3456 Determines if two MultiIndex objects have the same labeling information

3457 (the levels themselves do not necessarily have to be the same)

3458

3459 See Also

3460 --------

3461 equal_levels

3462 """

3463 if self.is_(other):

3464 return True

3465

3466 if not isinstance(other, Index):

3467 return False

3468

3469 if len(self) != len(other):

3470 return False

3471

3472 if not isinstance(other, MultiIndex):

3473 # d-level MultiIndex can equal d-tuple Index

3474 if not self._should_compare(other):

3475 # object Index or Categorical[object] may contain tuples

3476 return False

3477 return array_equivalent(self._values, other._values)

3478

3479 if self.nlevels != other.nlevels:

3480 return False

3481

3482 for i in range(self.nlevels):

3483 self_codes = self.codes[i]

3484 other_codes = other.codes[i]

3485 self_mask = self_codes == -1

3486 other_mask = other_codes == -1

3487 if not np.array_equal(self_mask, other_mask):

3488 return False

3489 self_codes = self_codes[~self_mask]

3490 self_values = self.levels[i]._values.take(self_codes)

3491

3492 other_codes = other_codes[~other_mask]

3493 other_values = other.levels[i]._values.take(other_codes)

3494

3495 # since we use NaT both datetime64 and timedelta64 we can have a

3496 # situation where a level is typed say timedelta64 in self (IOW it

3497 # has other values than NaT) but types datetime64 in other (where

3498 # its all NaT) but these are equivalent

3499 if len(self_values) == 0 and len(other_values) == 0:

3500 continue

3501

3502 if not isinstance(self_values, np.ndarray):

3503 # i.e. ExtensionArray

3504 if not self_values.equals(other_values):

3505 return False

3506 elif not isinstance(other_values, np.ndarray):

3507 # i.e. other is ExtensionArray

3508 if not other_values.equals(self_values):

3509 return False

3510 else:

3511 if not array_equivalent(self_values, other_values):

3512 return False

3513

3514 return True

3515

3516 def equal_levels(self, other: MultiIndex) -> bool:

3517 """

3518 Return True if the levels of both MultiIndex objects are the same

3519

3520 """

3521 if self.nlevels != other.nlevels:

3522 return False

3523

3524 for i in range(self.nlevels):

3525 if not self.levels[i].equals(other.levels[i]):

3526 return False

3527 return True

3528

3529 # --------------------------------------------------------------------

3530 # Set Methods

3531

3532 def _union(self, other, sort) -> MultiIndex:

3533 other, result_names = self._convert_can_do_setop(other)

3534 if other.has_duplicates:

3535 # This is only necessary if other has dupes,

3536 # otherwise difference is faster

3537 result = super()._union(other, sort)

3538

3539 if isinstance(result, MultiIndex):

3540 return result

3541 return MultiIndex.from_arrays(

3542 zip(*result), sortorder=None, names=result_names

3543 )

3544

3545 else:

3546 right_missing = other.difference(self, sort=False)

3547 if len(right_missing):

3548 result = self.append(right_missing)

3549 else:

3550 result = self._get_reconciled_name_object(other)

3551

3552 if sort is not False:

3553 try:

3554 result = result.sort_values()

3555 except TypeError:

3556 if sort is True:

3557 raise

3558 warnings.warn(

3559 "The values in the array are unorderable. "

3560 "Pass `sort=False` to suppress this warning.",

3561 RuntimeWarning,

3562 stacklevel=find_stack_level(),

3563 )

3564 return result

3565

3566 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:

3567 return is_object_dtype(dtype)

3568

3569 def _get_reconciled_name_object(self, other) -> MultiIndex:

3570 """

3571 If the result of a set operation will be self,

3572 return self, unless the names change, in which

3573 case make a shallow copy of self.

3574 """

3575 names = self._maybe_match_names(other)

3576 if self.names != names:

3577 # error: Cannot determine type of "rename"

3578 return self.rename(names) # type: ignore[has-type]

3579 return self

3580

3581 def _maybe_match_names(self, other):

3582 """

3583 Try to find common names to attach to the result of an operation between

3584 a and b. Return a consensus list of names if they match at least partly

3585 or list of None if they have completely different names.

3586 """

3587 if len(self.names) != len(other.names):

3588 return [None] * len(self.names)

3589 names = []

3590 for a_name, b_name in zip(self.names, other.names):

3591 if a_name == b_name:

3592 names.append(a_name)

3593 else:

3594 # TODO: what if they both have np.nan for their names?

3595 names.append(None)

3596 return names

3597

3598 def _wrap_intersection_result(self, other, result) -> MultiIndex:

3599 _, result_names = self._convert_can_do_setop(other)

3600 return result.set_names(result_names)

3601

3602 def _wrap_difference_result(self, other, result: MultiIndex) -> MultiIndex:

3603 _, result_names = self._convert_can_do_setop(other)

3604

3605 if len(result) == 0:

3606 return result.remove_unused_levels().set_names(result_names)

3607 else:

3608 return result.set_names(result_names)

3609

3610 def _convert_can_do_setop(self, other):

3611 result_names = self.names

3612

3613 if not isinstance(other, Index):

3614 if len(other) == 0:

3615 return self[:0], self.names

3616 else:

3617 msg = "other must be a MultiIndex or a list of tuples"

3618 try:

3619 other = MultiIndex.from_tuples(other, names=self.names)

3620 except (ValueError, TypeError) as err:

3621 # ValueError raised by tuples_to_object_array if we

3622 # have non-object dtype

3623 raise TypeError(msg) from err

3624 else:

3625 result_names = get_unanimous_names(self, other)

3626

3627 return other, result_names

3628

3629 # --------------------------------------------------------------------

3630

3631 @doc(Index.astype)

3632 def astype(self, dtype, copy: bool = True):

3633 dtype = pandas_dtype(dtype)

3634 if is_categorical_dtype(dtype):

3635 msg = "> 1 ndim Categorical are not supported at this time"

3636 raise NotImplementedError(msg)

3637 if not is_object_dtype(dtype):

3638 raise TypeError(

3639 "Setting a MultiIndex dtype to anything other than object "

3640 "is not supported"

3641 )

3642 if copy is True:

3643 return self._view()

3644 return self

3645

3646 def _validate_fill_value(self, item):

3647 if isinstance(item, MultiIndex):

3648 # GH#43212

3649 if item.nlevels != self.nlevels:

3650 raise ValueError("Item must have length equal to number of levels.")

3651 return item._values

3652 elif not isinstance(item, tuple):

3653 # Pad the key with empty strings if lower levels of the key

3654 # aren't specified:

3655 item = (item,) + ("",) * (self.nlevels - 1)

3656 elif len(item) != self.nlevels:

3657 raise ValueError("Item must have length equal to number of levels.")

3658 return item

3659

3660 def putmask(self, mask, value: MultiIndex) -> MultiIndex:

3661 """

3662 Return a new MultiIndex of the values set with the mask.

3663

3664 Parameters

3665 ----------

3666 mask : array like

3667 value : MultiIndex

3668 Must either be the same length as self or length one

3669

3670 Returns

3671 -------

3672 MultiIndex

3673 """

3674 mask, noop = validate_putmask(self, mask)

3675 if noop:

3676 return self.copy()

3677

3678 if len(mask) == len(value):

3679 subset = value[mask].remove_unused_levels()

3680 else:

3681 subset = value.remove_unused_levels()

3682

3683 new_levels = []

3684 new_codes = []

3685

3686 for i, (value_level, level, level_codes) in enumerate(

3687 zip(subset.levels, self.levels, self.codes)

3688 ):

3689 new_level = level.union(value_level, sort=False)

3690 value_codes = new_level.get_indexer_for(subset.get_level_values(i))

3691 new_code = ensure_int64(level_codes)

3692 new_code[mask] = value_codes

3693 new_levels.append(new_level)

3694 new_codes.append(new_code)

3695

3696 return MultiIndex(

3697 levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False

3698 )

3699

3700 def insert(self, loc: int, item) -> MultiIndex:

3701 """

3702 Make new MultiIndex inserting new item at location

3703

3704 Parameters

3705 ----------

3706 loc : int

3707 item : tuple

3708 Must be same length as number of levels in the MultiIndex

3709

3710 Returns

3711 -------

3712 new_index : Index

3713 """

3714 item = self._validate_fill_value(item)

3715

3716 new_levels = []

3717 new_codes = []

3718 for k, level, level_codes in zip(item, self.levels, self.codes):

3719 if k not in level:

3720 # have to insert into level

3721 # must insert at end otherwise you have to recompute all the

3722 # other codes

3723 lev_loc = len(level)

3724 level = level.insert(lev_loc, k)

3725 else:

3726 lev_loc = level.get_loc(k)

3727

3728 new_levels.append(level)

3729 new_codes.append(np.insert(ensure_int64(level_codes), loc, lev_loc))

3730

3731 return MultiIndex(

3732 levels=new_levels, codes=new_codes, names=self.names, verify_integrity=False

3733 )

3734

3735 def delete(self, loc) -> MultiIndex:

3736 """

3737 Make new index with passed location deleted

3738

3739 Returns

3740 -------

3741 new_index : MultiIndex

3742 """

3743 new_codes = [np.delete(level_codes, loc) for level_codes in self.codes]

3744 return MultiIndex(

3745 levels=self.levels,

3746 codes=new_codes,

3747 names=self.names,

3748 verify_integrity=False,

3749 )

3750

3751 @doc(Index.isin)

3752 def isin(self, values, level=None) -> npt.NDArray[np.bool_]:

3753 if isinstance(values, Generator):

3754 values = list(values)

3755

3756 if level is None:

3757 if len(values) == 0:

3758 return np.zeros((len(self),), dtype=np.bool_)

3759 if not isinstance(values, MultiIndex):

3760 values = MultiIndex.from_tuples(values)

3761 return values.unique().get_indexer_for(self) != -1

3762 else:

3763 num = self._get_level_number(level)

3764 levs = self.get_level_values(num)

3765

3766 if levs.size == 0:

3767 return np.zeros(len(levs), dtype=np.bool_)

3768 return levs.isin(values)

3769

3770 # error: Incompatible types in assignment (expression has type overloaded function,

3771 # base class "Index" defined the type as "Callable[[Index, Any, bool], Any]")

3772 rename = Index.set_names # type: ignore[assignment]

3773

3774 # ---------------------------------------------------------------

3775 # Arithmetic/Numeric Methods - Disabled

3776

3777 __add__ = make_invalid_op("__add__")

3778 __radd__ = make_invalid_op("__radd__")

3779 __iadd__ = make_invalid_op("__iadd__")

3780 __sub__ = make_invalid_op("__sub__")

3781 __rsub__ = make_invalid_op("__rsub__")

3782 __isub__ = make_invalid_op("__isub__")

3783 __pow__ = make_invalid_op("__pow__")

3784 __rpow__ = make_invalid_op("__rpow__")

3785 __mul__ = make_invalid_op("__mul__")

3786 __rmul__ = make_invalid_op("__rmul__")

3787 __floordiv__ = make_invalid_op("__floordiv__")

3788 __rfloordiv__ = make_invalid_op("__rfloordiv__")

3789 __truediv__ = make_invalid_op("__truediv__")

3790 __rtruediv__ = make_invalid_op("__rtruediv__")

3791 __mod__ = make_invalid_op("__mod__")

3792 __rmod__ = make_invalid_op("__rmod__")

3793 __divmod__ = make_invalid_op("__divmod__")

3794 __rdivmod__ = make_invalid_op("__rdivmod__")

3795 # Unary methods disabled

3796 __neg__ = make_invalid_op("__neg__")

3797 __pos__ = make_invalid_op("__pos__")

3798 __abs__ = make_invalid_op("__abs__")

3799 __invert__ = make_invalid_op("__invert__")

3800

3801

3802def _lexsort_depth(codes: list[np.ndarray], nlevels: int) -> int:

3803 """Count depth (up to a maximum of `nlevels`) with which codes are lexsorted."""

3804 int64_codes = [ensure_int64(level_codes) for level_codes in codes]

3805 for k in range(nlevels, 0, -1):

3806 if libalgos.is_lexsorted(int64_codes[:k]):

3807 return k

3808 return 0

3809

3810

3811def sparsify_labels(label_list, start: int = 0, sentinel: object = ""):

3812 pivoted = list(zip(*label_list))

3813 k = len(label_list)

3814

3815 result = pivoted[: start + 1]

3816 prev = pivoted[start]

3817

3818 for cur in pivoted[start + 1 :]:

3819 sparse_cur = []

3820

3821 for i, (p, t) in enumerate(zip(prev, cur)):

3822 if i == k - 1:

3823 sparse_cur.append(t)

3824 result.append(sparse_cur)

3825 break

3826

3827 if p == t:

3828 sparse_cur.append(sentinel)

3829 else:

3830 sparse_cur.extend(cur[i:])

3831 result.append(sparse_cur)

3832 break

3833

3834 prev = cur

3835

3836 return list(zip(*result))

3837

3838

3839def _get_na_rep(dtype) -> str:

3840 if is_extension_array_dtype(dtype):

3841 return f"{dtype.na_value}"

3842 else:

3843 dtype = dtype.type

3844

3845 return {np.datetime64: "NaT", np.timedelta64: "NaT"}.get(dtype, "NaN")

3846

3847

3848def maybe_droplevels(index: Index, key) -> Index:

3849 """

3850 Attempt to drop level or levels from the given index.

3851

3852 Parameters

3853 ----------

3854 index: Index

3855 key : scalar or tuple

3856

3857 Returns

3858 -------

3859 Index

3860 """

3861 # drop levels

3862 original_index = index

3863 if isinstance(key, tuple):

3864 # Caller is responsible for ensuring the key is not an entry in the first

3865 # level of the MultiIndex.

3866 for _ in key:

3867 try:

3868 index = index._drop_level_numbers([0])

3869 except ValueError:

3870 # we have dropped too much, so back out

3871 return original_index

3872 else:

3873 try:

3874 index = index._drop_level_numbers([0])

3875 except ValueError:

3876 pass

3877

3878 return index

3879

3880

3881def _coerce_indexer_frozen(array_like, categories, copy: bool = False) -> np.ndarray:

3882 """

3883 Coerce the array-like indexer to the smallest integer dtype that can encode all

3884 of the given categories.

3885

3886 Parameters

3887 ----------

3888 array_like : array-like

3889 categories : array-like

3890 copy : bool

3891

3892 Returns

3893 -------

3894 np.ndarray

3895 Non-writeable.

3896 """

3897 array_like = coerce_indexer_dtype(array_like, categories)

3898 if copy:

3899 array_like = array_like.copy()

3900 array_like.flags.writeable = False

3901 return array_like

3902

3903

3904def _require_listlike(level, arr, arrname: str):

3905 """

3906 Ensure that level is either None or listlike, and arr is list-of-listlike.

3907 """

3908 if level is not None and not is_list_like(level):

3909 if not is_list_like(arr):

3910 raise TypeError(f"{arrname} must be list-like")

3911 if len(arr) > 0 and is_list_like(arr[0]):

3912 raise TypeError(f"{arrname} must be list-like")

3913 level = [level]

3914 arr = [arr]

3915 elif level is None or is_list_like(level):

3916 if not is_list_like(arr) or not is_list_like(arr[0]):

3917 raise TypeError(f"{arrname} must be list of lists-like")

3918 return level, arr