Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/indexes/base.py: 40%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

2392 statements  

1from __future__ import annotations 

2 

3from collections import abc 

4from datetime import datetime 

5import functools 

6from itertools import zip_longest 

7import operator 

8from typing import ( 

9 TYPE_CHECKING, 

10 Any, 

11 Callable, 

12 ClassVar, 

13 Literal, 

14 NoReturn, 

15 cast, 

16 final, 

17 overload, 

18) 

19import warnings 

20 

21import numpy as np 

22 

23from pandas._config import ( 

24 get_option, 

25 using_copy_on_write, 

26 using_pyarrow_string_dtype, 

27) 

28 

29from pandas._libs import ( 

30 NaT, 

31 algos as libalgos, 

32 index as libindex, 

33 lib, 

34 writers, 

35) 

36from pandas._libs.internals import BlockValuesRefs 

37import pandas._libs.join as libjoin 

38from pandas._libs.lib import ( 

39 is_datetime_array, 

40 no_default, 

41) 

42from pandas._libs.tslibs import ( 

43 IncompatibleFrequency, 

44 OutOfBoundsDatetime, 

45 Timestamp, 

46 tz_compare, 

47) 

48from pandas._typing import ( 

49 AnyAll, 

50 ArrayLike, 

51 Axes, 

52 Axis, 

53 DropKeep, 

54 DtypeObj, 

55 F, 

56 IgnoreRaise, 

57 IndexLabel, 

58 JoinHow, 

59 Level, 

60 NaPosition, 

61 ReindexMethod, 

62 Self, 

63 Shape, 

64 npt, 

65) 

66from pandas.compat.numpy import function as nv 

67from pandas.errors import ( 

68 DuplicateLabelError, 

69 InvalidIndexError, 

70) 

71from pandas.util._decorators import ( 

72 Appender, 

73 cache_readonly, 

74 deprecate_nonkeyword_arguments, 

75 doc, 

76) 

77from pandas.util._exceptions import ( 

78 find_stack_level, 

79 rewrite_exception, 

80) 

81 

82from pandas.core.dtypes.astype import ( 

83 astype_array, 

84 astype_is_view, 

85) 

86from pandas.core.dtypes.cast import ( 

87 LossySetitemError, 

88 can_hold_element, 

89 common_dtype_categorical_compat, 

90 find_result_type, 

91 infer_dtype_from, 

92 maybe_cast_pointwise_result, 

93 np_can_hold_element, 

94) 

95from pandas.core.dtypes.common import ( 

96 ensure_int64, 

97 ensure_object, 

98 ensure_platform_int, 

99 is_any_real_numeric_dtype, 

100 is_bool_dtype, 

101 is_ea_or_datetimelike_dtype, 

102 is_float, 

103 is_hashable, 

104 is_integer, 

105 is_iterator, 

106 is_list_like, 

107 is_numeric_dtype, 

108 is_object_dtype, 

109 is_scalar, 

110 is_signed_integer_dtype, 

111 is_string_dtype, 

112 needs_i8_conversion, 

113 pandas_dtype, 

114 validate_all_hashable, 

115) 

116from pandas.core.dtypes.concat import concat_compat 

117from pandas.core.dtypes.dtypes import ( 

118 ArrowDtype, 

119 CategoricalDtype, 

120 DatetimeTZDtype, 

121 ExtensionDtype, 

122 IntervalDtype, 

123 PeriodDtype, 

124 SparseDtype, 

125) 

126from pandas.core.dtypes.generic import ( 

127 ABCCategoricalIndex, 

128 ABCDataFrame, 

129 ABCDatetimeIndex, 

130 ABCIntervalIndex, 

131 ABCMultiIndex, 

132 ABCPeriodIndex, 

133 ABCRangeIndex, 

134 ABCSeries, 

135 ABCTimedeltaIndex, 

136) 

137from pandas.core.dtypes.inference import is_dict_like 

138from pandas.core.dtypes.missing import ( 

139 array_equivalent, 

140 is_valid_na_for_dtype, 

141 isna, 

142) 

143 

144from pandas.core import ( 

145 arraylike, 

146 nanops, 

147 ops, 

148) 

149from pandas.core.accessor import CachedAccessor 

150import pandas.core.algorithms as algos 

151from pandas.core.array_algos.putmask import ( 

152 setitem_datetimelike_compat, 

153 validate_putmask, 

154) 

155from pandas.core.arrays import ( 

156 ArrowExtensionArray, 

157 BaseMaskedArray, 

158 Categorical, 

159 DatetimeArray, 

160 ExtensionArray, 

161 TimedeltaArray, 

162) 

163from pandas.core.arrays.string_ import ( 

164 StringArray, 

165 StringDtype, 

166) 

167from pandas.core.base import ( 

168 IndexOpsMixin, 

169 PandasObject, 

170) 

171import pandas.core.common as com 

172from pandas.core.construction import ( 

173 ensure_wrapped_if_datetimelike, 

174 extract_array, 

175 sanitize_array, 

176) 

177from pandas.core.indexers import ( 

178 disallow_ndim_indexing, 

179 is_valid_positional_slice, 

180) 

181from pandas.core.indexes.frozen import FrozenList 

182from pandas.core.missing import clean_reindex_fill_method 

183from pandas.core.ops import get_op_result_name 

184from pandas.core.ops.invalid import make_invalid_op 

185from pandas.core.sorting import ( 

186 ensure_key_mapped, 

187 get_group_index_sorter, 

188 nargsort, 

189) 

190from pandas.core.strings.accessor import StringMethods 

191 

192from pandas.io.formats.printing import ( 

193 PrettyDict, 

194 default_pprint, 

195 format_object_summary, 

196 pprint_thing, 

197) 

198 

199if TYPE_CHECKING: 

200 from collections.abc import ( 

201 Hashable, 

202 Iterable, 

203 Sequence, 

204 ) 

205 

206 from pandas import ( 

207 CategoricalIndex, 

208 DataFrame, 

209 MultiIndex, 

210 Series, 

211 ) 

212 from pandas.core.arrays import ( 

213 IntervalArray, 

214 PeriodArray, 

215 ) 

216 

217__all__ = ["Index"] 

218 

219_unsortable_types = frozenset(("mixed", "mixed-integer")) 

220 

221_index_doc_kwargs: dict[str, str] = { 

222 "klass": "Index", 

223 "inplace": "", 

224 "target_klass": "Index", 

225 "raises_section": "", 

226 "unique": "Index", 

227 "duplicated": "np.ndarray", 

228} 

229_index_shared_docs: dict[str, str] = {} 

230str_t = str 

231 

232_dtype_obj = np.dtype("object") 

233 

234_masked_engines = { 

235 "Complex128": libindex.MaskedComplex128Engine, 

236 "Complex64": libindex.MaskedComplex64Engine, 

237 "Float64": libindex.MaskedFloat64Engine, 

238 "Float32": libindex.MaskedFloat32Engine, 

239 "UInt64": libindex.MaskedUInt64Engine, 

240 "UInt32": libindex.MaskedUInt32Engine, 

241 "UInt16": libindex.MaskedUInt16Engine, 

242 "UInt8": libindex.MaskedUInt8Engine, 

243 "Int64": libindex.MaskedInt64Engine, 

244 "Int32": libindex.MaskedInt32Engine, 

245 "Int16": libindex.MaskedInt16Engine, 

246 "Int8": libindex.MaskedInt8Engine, 

247 "boolean": libindex.MaskedBoolEngine, 

248 "double[pyarrow]": libindex.MaskedFloat64Engine, 

249 "float64[pyarrow]": libindex.MaskedFloat64Engine, 

250 "float32[pyarrow]": libindex.MaskedFloat32Engine, 

251 "float[pyarrow]": libindex.MaskedFloat32Engine, 

252 "uint64[pyarrow]": libindex.MaskedUInt64Engine, 

253 "uint32[pyarrow]": libindex.MaskedUInt32Engine, 

254 "uint16[pyarrow]": libindex.MaskedUInt16Engine, 

255 "uint8[pyarrow]": libindex.MaskedUInt8Engine, 

256 "int64[pyarrow]": libindex.MaskedInt64Engine, 

257 "int32[pyarrow]": libindex.MaskedInt32Engine, 

258 "int16[pyarrow]": libindex.MaskedInt16Engine, 

259 "int8[pyarrow]": libindex.MaskedInt8Engine, 

260 "bool[pyarrow]": libindex.MaskedBoolEngine, 

261} 

262 

263 

264def _maybe_return_indexers(meth: F) -> F: 

265 """ 

266 Decorator to simplify 'return_indexers' checks in Index.join. 

267 """ 

268 

269 @functools.wraps(meth) 

270 def join( 

271 self, 

272 other: Index, 

273 *, 

274 how: JoinHow = "left", 

275 level=None, 

276 return_indexers: bool = False, 

277 sort: bool = False, 

278 ): 

279 join_index, lidx, ridx = meth(self, other, how=how, level=level, sort=sort) 

280 if not return_indexers: 

281 return join_index 

282 

283 if lidx is not None: 

284 lidx = ensure_platform_int(lidx) 

285 if ridx is not None: 

286 ridx = ensure_platform_int(ridx) 

287 return join_index, lidx, ridx 

288 

289 return cast(F, join) 

290 

291 

292def _new_Index(cls, d): 

293 """ 

294 This is called upon unpickling, rather than the default which doesn't 

295 have arguments and breaks __new__. 

296 """ 

297 # required for backward compat, because PI can't be instantiated with 

298 # ordinals through __new__ GH #13277 

299 if issubclass(cls, ABCPeriodIndex): 

300 from pandas.core.indexes.period import _new_PeriodIndex 

301 

302 return _new_PeriodIndex(cls, **d) 

303 

304 if issubclass(cls, ABCMultiIndex): 

305 if "labels" in d and "codes" not in d: 

306 # GH#23752 "labels" kwarg has been replaced with "codes" 

307 d["codes"] = d.pop("labels") 

308 

309 # Since this was a valid MultiIndex at pickle-time, we don't need to 

310 # check validty at un-pickle time. 

311 d["verify_integrity"] = False 

312 

313 elif "dtype" not in d and "data" in d: 

314 # Prevent Index.__new__ from conducting inference; 

315 # "data" key not in RangeIndex 

316 d["dtype"] = d["data"].dtype 

317 return cls.__new__(cls, **d) 

318 

319 

320class Index(IndexOpsMixin, PandasObject): 

321 """ 

322 Immutable sequence used for indexing and alignment. 

323 

324 The basic object storing axis labels for all pandas objects. 

325 

326 .. versionchanged:: 2.0.0 

327 

328 Index can hold all numpy numeric dtypes (except float16). Previously only 

329 int64/uint64/float64 dtypes were accepted. 

330 

331 Parameters 

332 ---------- 

333 data : array-like (1-dimensional) 

334 dtype : str, numpy.dtype, or ExtensionDtype, optional 

335 Data type for the output Index. If not specified, this will be 

336 inferred from `data`. 

337 See the :ref:`user guide <basics.dtypes>` for more usages. 

338 copy : bool, default False 

339 Copy input data. 

340 name : object 

341 Name to be stored in the index. 

342 tupleize_cols : bool (default: True) 

343 When True, attempt to create a MultiIndex if possible. 

344 

345 See Also 

346 -------- 

347 RangeIndex : Index implementing a monotonic integer range. 

348 CategoricalIndex : Index of :class:`Categorical` s. 

349 MultiIndex : A multi-level, or hierarchical Index. 

350 IntervalIndex : An Index of :class:`Interval` s. 

351 DatetimeIndex : Index of datetime64 data. 

352 TimedeltaIndex : Index of timedelta64 data. 

353 PeriodIndex : Index of Period data. 

354 

355 Notes 

356 ----- 

357 An Index instance can **only** contain hashable objects. 

358 An Index instance *can not* hold numpy float16 dtype. 

359 

360 Examples 

361 -------- 

362 >>> pd.Index([1, 2, 3]) 

363 Index([1, 2, 3], dtype='int64') 

364 

365 >>> pd.Index(list('abc')) 

366 Index(['a', 'b', 'c'], dtype='object') 

367 

368 >>> pd.Index([1, 2, 3], dtype="uint8") 

369 Index([1, 2, 3], dtype='uint8') 

370 """ 

371 

372 # similar to __array_priority__, positions Index after Series and DataFrame 

373 # but before ExtensionArray. Should NOT be overridden by subclasses. 

374 __pandas_priority__ = 2000 

375 

376 # Cython methods; see github.com/cython/cython/issues/2647 

377 # for why we need to wrap these instead of making them class attributes 

378 # Moreover, cython will choose the appropriate-dtyped sub-function 

379 # given the dtypes of the passed arguments 

380 

381 @final 

382 def _left_indexer_unique(self, other: Self) -> npt.NDArray[np.intp]: 

383 # Caller is responsible for ensuring other.dtype == self.dtype 

384 sv = self._get_join_target() 

385 ov = other._get_join_target() 

386 # similar but not identical to ov.searchsorted(sv) 

387 return libjoin.left_join_indexer_unique(sv, ov) 

388 

389 @final 

390 def _left_indexer( 

391 self, other: Self 

392 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

393 # Caller is responsible for ensuring other.dtype == self.dtype 

394 sv = self._get_join_target() 

395 ov = other._get_join_target() 

396 joined_ndarray, lidx, ridx = libjoin.left_join_indexer(sv, ov) 

397 joined = self._from_join_target(joined_ndarray) 

398 return joined, lidx, ridx 

399 

400 @final 

401 def _inner_indexer( 

402 self, other: Self 

403 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

404 # Caller is responsible for ensuring other.dtype == self.dtype 

405 sv = self._get_join_target() 

406 ov = other._get_join_target() 

407 joined_ndarray, lidx, ridx = libjoin.inner_join_indexer(sv, ov) 

408 joined = self._from_join_target(joined_ndarray) 

409 return joined, lidx, ridx 

410 

411 @final 

412 def _outer_indexer( 

413 self, other: Self 

414 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

415 # Caller is responsible for ensuring other.dtype == self.dtype 

416 sv = self._get_join_target() 

417 ov = other._get_join_target() 

418 joined_ndarray, lidx, ridx = libjoin.outer_join_indexer(sv, ov) 

419 joined = self._from_join_target(joined_ndarray) 

420 return joined, lidx, ridx 

421 

422 _typ: str = "index" 

423 _data: ExtensionArray | np.ndarray 

424 _data_cls: type[ExtensionArray] | tuple[type[np.ndarray], type[ExtensionArray]] = ( 

425 np.ndarray, 

426 ExtensionArray, 

427 ) 

428 _id: object | None = None 

429 _name: Hashable = None 

430 # MultiIndex.levels previously allowed setting the index name. We 

431 # don't allow this anymore, and raise if it happens rather than 

432 # failing silently. 

433 _no_setting_name: bool = False 

434 _comparables: list[str] = ["name"] 

435 _attributes: list[str] = ["name"] 

436 

437 @cache_readonly 

438 def _can_hold_strings(self) -> bool: 

439 return not is_numeric_dtype(self.dtype) 

440 

441 _engine_types: dict[np.dtype | ExtensionDtype, type[libindex.IndexEngine]] = { 

442 np.dtype(np.int8): libindex.Int8Engine, 

443 np.dtype(np.int16): libindex.Int16Engine, 

444 np.dtype(np.int32): libindex.Int32Engine, 

445 np.dtype(np.int64): libindex.Int64Engine, 

446 np.dtype(np.uint8): libindex.UInt8Engine, 

447 np.dtype(np.uint16): libindex.UInt16Engine, 

448 np.dtype(np.uint32): libindex.UInt32Engine, 

449 np.dtype(np.uint64): libindex.UInt64Engine, 

450 np.dtype(np.float32): libindex.Float32Engine, 

451 np.dtype(np.float64): libindex.Float64Engine, 

452 np.dtype(np.complex64): libindex.Complex64Engine, 

453 np.dtype(np.complex128): libindex.Complex128Engine, 

454 } 

455 

456 @property 

457 def _engine_type( 

458 self, 

459 ) -> type[libindex.IndexEngine | libindex.ExtensionEngine]: 

460 return self._engine_types.get(self.dtype, libindex.ObjectEngine) 

461 

462 # whether we support partial string indexing. Overridden 

463 # in DatetimeIndex and PeriodIndex 

464 _supports_partial_string_indexing = False 

465 

466 _accessors = {"str"} 

467 

468 str = CachedAccessor("str", StringMethods) 

469 

470 _references = None 

471 

472 # -------------------------------------------------------------------- 

473 # Constructors 

474 

475 def __new__( 

476 cls, 

477 data=None, 

478 dtype=None, 

479 copy: bool = False, 

480 name=None, 

481 tupleize_cols: bool = True, 

482 ) -> Self: 

483 from pandas.core.indexes.range import RangeIndex 

484 

485 name = maybe_extract_name(name, data, cls) 

486 

487 if dtype is not None: 

488 dtype = pandas_dtype(dtype) 

489 

490 data_dtype = getattr(data, "dtype", None) 

491 

492 refs = None 

493 if not copy and isinstance(data, (ABCSeries, Index)): 

494 refs = data._references 

495 

496 is_pandas_object = isinstance(data, (ABCSeries, Index, ExtensionArray)) 

497 

498 # range 

499 if isinstance(data, (range, RangeIndex)): 

500 result = RangeIndex(start=data, copy=copy, name=name) 

501 if dtype is not None: 

502 return result.astype(dtype, copy=False) 

503 # error: Incompatible return value type (got "MultiIndex", 

504 # expected "Self") 

505 return result # type: ignore[return-value] 

506 

507 elif is_ea_or_datetimelike_dtype(dtype): 

508 # non-EA dtype indexes have special casting logic, so we punt here 

509 pass 

510 

511 elif is_ea_or_datetimelike_dtype(data_dtype): 

512 pass 

513 

514 elif isinstance(data, (np.ndarray, Index, ABCSeries)): 

515 if isinstance(data, ABCMultiIndex): 

516 data = data._values 

517 

518 if data.dtype.kind not in "iufcbmM": 

519 # GH#11836 we need to avoid having numpy coerce 

520 # things that look like ints/floats to ints unless 

521 # they are actually ints, e.g. '0' and 0.0 

522 # should not be coerced 

523 data = com.asarray_tuplesafe(data, dtype=_dtype_obj) 

524 

525 elif is_scalar(data): 

526 raise cls._raise_scalar_data_error(data) 

527 elif hasattr(data, "__array__"): 

528 return cls(np.asarray(data), dtype=dtype, copy=copy, name=name) 

529 elif not is_list_like(data) and not isinstance(data, memoryview): 

530 # 2022-11-16 the memoryview check is only necessary on some CI 

531 # builds, not clear why 

532 raise cls._raise_scalar_data_error(data) 

533 

534 else: 

535 if tupleize_cols: 

536 # GH21470: convert iterable to list before determining if empty 

537 if is_iterator(data): 

538 data = list(data) 

539 

540 if data and all(isinstance(e, tuple) for e in data): 

541 # we must be all tuples, otherwise don't construct 

542 # 10697 

543 from pandas.core.indexes.multi import MultiIndex 

544 

545 # error: Incompatible return value type (got "MultiIndex", 

546 # expected "Self") 

547 return MultiIndex.from_tuples( # type: ignore[return-value] 

548 data, names=name 

549 ) 

550 # other iterable of some kind 

551 

552 if not isinstance(data, (list, tuple)): 

553 # we allow set/frozenset, which Series/sanitize_array does not, so 

554 # cast to list here 

555 data = list(data) 

556 if len(data) == 0: 

557 # unlike Series, we default to object dtype: 

558 data = np.array(data, dtype=object) 

559 

560 if len(data) and isinstance(data[0], tuple): 

561 # Ensure we get 1-D array of tuples instead of 2D array. 

562 data = com.asarray_tuplesafe(data, dtype=_dtype_obj) 

563 

564 try: 

565 arr = sanitize_array(data, None, dtype=dtype, copy=copy) 

566 except ValueError as err: 

567 if "index must be specified when data is not list-like" in str(err): 

568 raise cls._raise_scalar_data_error(data) from err 

569 if "Data must be 1-dimensional" in str(err): 

570 raise ValueError("Index data must be 1-dimensional") from err 

571 raise 

572 arr = ensure_wrapped_if_datetimelike(arr) 

573 

574 klass = cls._dtype_to_subclass(arr.dtype) 

575 

576 arr = klass._ensure_array(arr, arr.dtype, copy=False) 

577 result = klass._simple_new(arr, name, refs=refs) 

578 if dtype is None and is_pandas_object and data_dtype == np.object_: 

579 if result.dtype != data_dtype: 

580 warnings.warn( 

581 "Dtype inference on a pandas object " 

582 "(Series, Index, ExtensionArray) is deprecated. The Index " 

583 "constructor will keep the original dtype in the future. " 

584 "Call `infer_objects` on the result to get the old " 

585 "behavior.", 

586 FutureWarning, 

587 stacklevel=2, 

588 ) 

589 return result # type: ignore[return-value] 

590 

591 @classmethod 

592 def _ensure_array(cls, data, dtype, copy: bool): 

593 """ 

594 Ensure we have a valid array to pass to _simple_new. 

595 """ 

596 if data.ndim > 1: 

597 # GH#13601, GH#20285, GH#27125 

598 raise ValueError("Index data must be 1-dimensional") 

599 elif dtype == np.float16: 

600 # float16 not supported (no indexing engine) 

601 raise NotImplementedError("float16 indexes are not supported") 

602 

603 if copy: 

604 # asarray_tuplesafe does not always copy underlying data, 

605 # so need to make sure that this happens 

606 data = data.copy() 

607 return data 

608 

609 @final 

610 @classmethod 

611 def _dtype_to_subclass(cls, dtype: DtypeObj): 

612 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 

613 

614 if isinstance(dtype, ExtensionDtype): 

615 return dtype.index_class 

616 

617 if dtype.kind == "M": 

618 from pandas import DatetimeIndex 

619 

620 return DatetimeIndex 

621 

622 elif dtype.kind == "m": 

623 from pandas import TimedeltaIndex 

624 

625 return TimedeltaIndex 

626 

627 elif dtype.kind == "O": 

628 # NB: assuming away MultiIndex 

629 return Index 

630 

631 elif issubclass(dtype.type, str) or is_numeric_dtype(dtype): 

632 return Index 

633 

634 raise NotImplementedError(dtype) 

635 

636 # NOTE for new Index creation: 

637 

638 # - _simple_new: It returns new Index with the same type as the caller. 

639 # All metadata (such as name) must be provided by caller's responsibility. 

640 # Using _shallow_copy is recommended because it fills these metadata 

641 # otherwise specified. 

642 

643 # - _shallow_copy: It returns new Index with the same type (using 

644 # _simple_new), but fills caller's metadata otherwise specified. Passed 

645 # kwargs will overwrite corresponding metadata. 

646 

647 # See each method's docstring. 

648 

649 @classmethod 

650 def _simple_new( 

651 cls, values: ArrayLike, name: Hashable | None = None, refs=None 

652 ) -> Self: 

653 """ 

654 We require that we have a dtype compat for the values. If we are passed 

655 a non-dtype compat, then coerce using the constructor. 

656 

657 Must be careful not to recurse. 

658 """ 

659 assert isinstance(values, cls._data_cls), type(values) 

660 

661 result = object.__new__(cls) 

662 result._data = values 

663 result._name = name 

664 result._cache = {} 

665 result._reset_identity() 

666 if refs is not None: 

667 result._references = refs 

668 else: 

669 result._references = BlockValuesRefs() 

670 result._references.add_index_reference(result) 

671 

672 return result 

673 

674 @classmethod 

675 def _with_infer(cls, *args, **kwargs): 

676 """ 

677 Constructor that uses the 1.0.x behavior inferring numeric dtypes 

678 for ndarray[object] inputs. 

679 """ 

680 result = cls(*args, **kwargs) 

681 

682 if result.dtype == _dtype_obj and not result._is_multi: 

683 # error: Argument 1 to "maybe_convert_objects" has incompatible type 

684 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected 

685 # "ndarray[Any, Any]" 

686 values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type] 

687 if values.dtype.kind in "iufb": 

688 return Index(values, name=result.name) 

689 

690 return result 

691 

692 @cache_readonly 

693 def _constructor(self) -> type[Self]: 

694 return type(self) 

695 

696 @final 

697 def _maybe_check_unique(self) -> None: 

698 """ 

699 Check that an Index has no duplicates. 

700 

701 This is typically only called via 

702 `NDFrame.flags.allows_duplicate_labels.setter` when it's set to 

703 True (duplicates aren't allowed). 

704 

705 Raises 

706 ------ 

707 DuplicateLabelError 

708 When the index is not unique. 

709 """ 

710 if not self.is_unique: 

711 msg = """Index has duplicates.""" 

712 duplicates = self._format_duplicate_message() 

713 msg += f"\n{duplicates}" 

714 

715 raise DuplicateLabelError(msg) 

716 

717 @final 

718 def _format_duplicate_message(self) -> DataFrame: 

719 """ 

720 Construct the DataFrame for a DuplicateLabelError. 

721 

722 This returns a DataFrame indicating the labels and positions 

723 of duplicates in an index. This should only be called when it's 

724 already known that duplicates are present. 

725 

726 Examples 

727 -------- 

728 >>> idx = pd.Index(['a', 'b', 'a']) 

729 >>> idx._format_duplicate_message() 

730 positions 

731 label 

732 a [0, 2] 

733 """ 

734 from pandas import Series 

735 

736 duplicates = self[self.duplicated(keep="first")].unique() 

737 assert len(duplicates) 

738 

739 out = ( 

740 Series(np.arange(len(self)), copy=False) 

741 .groupby(self, observed=False) 

742 .agg(list)[duplicates] 

743 ) 

744 if self._is_multi: 

745 # test_format_duplicate_labels_message_multi 

746 # error: "Type[Index]" has no attribute "from_tuples" [attr-defined] 

747 out.index = type(self).from_tuples(out.index) # type: ignore[attr-defined] 

748 

749 if self.nlevels == 1: 

750 out = out.rename_axis("label") 

751 return out.to_frame(name="positions") 

752 

753 # -------------------------------------------------------------------- 

754 # Index Internals Methods 

755 

756 def _shallow_copy(self, values, name: Hashable = no_default) -> Self: 

757 """ 

758 Create a new Index with the same class as the caller, don't copy the 

759 data, use the same object attributes with passed in attributes taking 

760 precedence. 

761 

762 *this is an internal non-public method* 

763 

764 Parameters 

765 ---------- 

766 values : the values to create the new Index, optional 

767 name : Label, defaults to self.name 

768 """ 

769 name = self._name if name is no_default else name 

770 

771 return self._simple_new(values, name=name, refs=self._references) 

772 

773 def _view(self) -> Self: 

774 """ 

775 fastpath to make a shallow copy, i.e. new object with same data. 

776 """ 

777 result = self._simple_new(self._values, name=self._name, refs=self._references) 

778 

779 result._cache = self._cache 

780 return result 

781 

782 @final 

783 def _rename(self, name: Hashable) -> Self: 

784 """ 

785 fastpath for rename if new name is already validated. 

786 """ 

787 result = self._view() 

788 result._name = name 

789 return result 

790 

791 @final 

792 def is_(self, other) -> bool: 

793 """ 

794 More flexible, faster check like ``is`` but that works through views. 

795 

796 Note: this is *not* the same as ``Index.identical()``, which checks 

797 that metadata is also the same. 

798 

799 Parameters 

800 ---------- 

801 other : object 

802 Other object to compare against. 

803 

804 Returns 

805 ------- 

806 bool 

807 True if both have same underlying data, False otherwise. 

808 

809 See Also 

810 -------- 

811 Index.identical : Works like ``Index.is_`` but also checks metadata. 

812 

813 Examples 

814 -------- 

815 >>> idx1 = pd.Index(['1', '2', '3']) 

816 >>> idx1.is_(idx1.view()) 

817 True 

818 

819 >>> idx1.is_(idx1.copy()) 

820 False 

821 """ 

822 if self is other: 

823 return True 

824 elif not hasattr(other, "_id"): 

825 return False 

826 elif self._id is None or other._id is None: 

827 return False 

828 else: 

829 return self._id is other._id 

830 

831 @final 

832 def _reset_identity(self) -> None: 

833 """ 

834 Initializes or resets ``_id`` attribute with new object. 

835 """ 

836 self._id = object() 

837 

838 @final 

839 def _cleanup(self) -> None: 

840 self._engine.clear_mapping() 

841 

842 @cache_readonly 

843 def _engine( 

844 self, 

845 ) -> libindex.IndexEngine | libindex.ExtensionEngine | libindex.MaskedIndexEngine: 

846 # For base class (object dtype) we get ObjectEngine 

847 target_values = self._get_engine_target() 

848 

849 if isinstance(self._values, ArrowExtensionArray) and self.dtype.kind in "Mm": 

850 import pyarrow as pa 

851 

852 pa_type = self._values._pa_array.type 

853 if pa.types.is_timestamp(pa_type): 

854 target_values = self._values._to_datetimearray() 

855 return libindex.DatetimeEngine(target_values._ndarray) 

856 elif pa.types.is_duration(pa_type): 

857 target_values = self._values._to_timedeltaarray() 

858 return libindex.TimedeltaEngine(target_values._ndarray) 

859 

860 if isinstance(target_values, ExtensionArray): 

861 if isinstance(target_values, (BaseMaskedArray, ArrowExtensionArray)): 

862 try: 

863 return _masked_engines[target_values.dtype.name](target_values) 

864 except KeyError: 

865 # Not supported yet e.g. decimal 

866 pass 

867 elif self._engine_type is libindex.ObjectEngine: 

868 return libindex.ExtensionEngine(target_values) 

869 

870 target_values = cast(np.ndarray, target_values) 

871 # to avoid a reference cycle, bind `target_values` to a local variable, so 

872 # `self` is not passed into the lambda. 

873 if target_values.dtype == bool: 

874 return libindex.BoolEngine(target_values) 

875 elif target_values.dtype == np.complex64: 

876 return libindex.Complex64Engine(target_values) 

877 elif target_values.dtype == np.complex128: 

878 return libindex.Complex128Engine(target_values) 

879 elif needs_i8_conversion(self.dtype): 

880 # We need to keep M8/m8 dtype when initializing the Engine, 

881 # but don't want to change _get_engine_target bc it is used 

882 # elsewhere 

883 # error: Item "ExtensionArray" of "Union[ExtensionArray, 

884 # ndarray[Any, Any]]" has no attribute "_ndarray" [union-attr] 

885 target_values = self._data._ndarray # type: ignore[union-attr] 

886 

887 # error: Argument 1 to "ExtensionEngine" has incompatible type 

888 # "ndarray[Any, Any]"; expected "ExtensionArray" 

889 return self._engine_type(target_values) # type: ignore[arg-type] 

890 

891 @final 

892 @cache_readonly 

893 def _dir_additions_for_owner(self) -> set[str_t]: 

894 """ 

895 Add the string-like labels to the owner dataframe/series dir output. 

896 

897 If this is a MultiIndex, it's first level values are used. 

898 """ 

899 return { 

900 c 

901 for c in self.unique(level=0)[: get_option("display.max_dir_items")] 

902 if isinstance(c, str) and c.isidentifier() 

903 } 

904 

905 # -------------------------------------------------------------------- 

906 # Array-Like Methods 

907 

908 # ndarray compat 

909 def __len__(self) -> int: 

910 """ 

911 Return the length of the Index. 

912 """ 

913 return len(self._data) 

914 

915 def __array__(self, dtype=None, copy=None) -> np.ndarray: 

916 """ 

917 The array interface, return my values. 

918 """ 

919 return np.asarray(self._data, dtype=dtype) 

920 

921 def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs): 

922 if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs): 

923 return NotImplemented 

924 

925 result = arraylike.maybe_dispatch_ufunc_to_dunder_op( 

926 self, ufunc, method, *inputs, **kwargs 

927 ) 

928 if result is not NotImplemented: 

929 return result 

930 

931 if "out" in kwargs: 

932 # e.g. test_dti_isub_tdi 

933 return arraylike.dispatch_ufunc_with_out( 

934 self, ufunc, method, *inputs, **kwargs 

935 ) 

936 

937 if method == "reduce": 

938 result = arraylike.dispatch_reduction_ufunc( 

939 self, ufunc, method, *inputs, **kwargs 

940 ) 

941 if result is not NotImplemented: 

942 return result 

943 

944 new_inputs = [x if x is not self else x._values for x in inputs] 

945 result = getattr(ufunc, method)(*new_inputs, **kwargs) 

946 if ufunc.nout == 2: 

947 # i.e. np.divmod, np.modf, np.frexp 

948 return tuple(self.__array_wrap__(x) for x in result) 

949 elif method == "reduce": 

950 result = lib.item_from_zerodim(result) 

951 return result 

952 

953 if result.dtype == np.float16: 

954 result = result.astype(np.float32) 

955 

956 return self.__array_wrap__(result) 

957 

958 @final 

959 def __array_wrap__(self, result, context=None, return_scalar=False): 

960 """ 

961 Gets called after a ufunc and other functions e.g. np.split. 

962 """ 

963 result = lib.item_from_zerodim(result) 

964 if (not isinstance(result, Index) and is_bool_dtype(result.dtype)) or np.ndim( 

965 result 

966 ) > 1: 

967 # exclude Index to avoid warning from is_bool_dtype deprecation; 

968 # in the Index case it doesn't matter which path we go down. 

969 # reached in plotting tests with e.g. np.nonzero(index) 

970 return result 

971 

972 return Index(result, name=self.name) 

973 

974 @cache_readonly 

975 def dtype(self) -> DtypeObj: 

976 """ 

977 Return the dtype object of the underlying data. 

978 

979 Examples 

980 -------- 

981 >>> idx = pd.Index([1, 2, 3]) 

982 >>> idx 

983 Index([1, 2, 3], dtype='int64') 

984 >>> idx.dtype 

985 dtype('int64') 

986 """ 

987 return self._data.dtype 

988 

989 @final 

990 def ravel(self, order: str_t = "C") -> Self: 

991 """ 

992 Return a view on self. 

993 

994 Returns 

995 ------- 

996 Index 

997 

998 See Also 

999 -------- 

1000 numpy.ndarray.ravel : Return a flattened array. 

1001 

1002 Examples 

1003 -------- 

1004 >>> s = pd.Series([1, 2, 3], index=['a', 'b', 'c']) 

1005 >>> s.index.ravel() 

1006 Index(['a', 'b', 'c'], dtype='object') 

1007 """ 

1008 return self[:] 

1009 

1010 def view(self, cls=None): 

1011 # we need to see if we are subclassing an 

1012 # index type here 

1013 if cls is not None and not hasattr(cls, "_typ"): 

1014 dtype = cls 

1015 if isinstance(cls, str): 

1016 dtype = pandas_dtype(cls) 

1017 

1018 if needs_i8_conversion(dtype): 

1019 idx_cls = self._dtype_to_subclass(dtype) 

1020 arr = self.array.view(dtype) 

1021 if isinstance(arr, ExtensionArray): 

1022 # here we exclude non-supported dt64/td64 dtypes 

1023 return idx_cls._simple_new( 

1024 arr, name=self.name, refs=self._references 

1025 ) 

1026 return arr 

1027 

1028 result = self._data.view(cls) 

1029 else: 

1030 if cls is not None: 

1031 warnings.warn( 

1032 # GH#55709 

1033 f"Passing a type in {type(self).__name__}.view is deprecated " 

1034 "and will raise in a future version. " 

1035 "Call view without any argument to retain the old behavior.", 

1036 FutureWarning, 

1037 stacklevel=find_stack_level(), 

1038 ) 

1039 

1040 result = self._view() 

1041 if isinstance(result, Index): 

1042 result._id = self._id 

1043 return result 

1044 

1045 def astype(self, dtype, copy: bool = True): 

1046 """ 

1047 Create an Index with values cast to dtypes. 

1048 

1049 The class of a new Index is determined by dtype. When conversion is 

1050 impossible, a TypeError exception is raised. 

1051 

1052 Parameters 

1053 ---------- 

1054 dtype : numpy dtype or pandas type 

1055 Note that any signed integer `dtype` is treated as ``'int64'``, 

1056 and any unsigned integer `dtype` is treated as ``'uint64'``, 

1057 regardless of the size. 

1058 copy : bool, default True 

1059 By default, astype always returns a newly allocated object. 

1060 If copy is set to False and internal requirements on dtype are 

1061 satisfied, the original data is used to create a new Index 

1062 or the original Index is returned. 

1063 

1064 Returns 

1065 ------- 

1066 Index 

1067 Index with values cast to specified dtype. 

1068 

1069 Examples 

1070 -------- 

1071 >>> idx = pd.Index([1, 2, 3]) 

1072 >>> idx 

1073 Index([1, 2, 3], dtype='int64') 

1074 >>> idx.astype('float') 

1075 Index([1.0, 2.0, 3.0], dtype='float64') 

1076 """ 

1077 if dtype is not None: 

1078 dtype = pandas_dtype(dtype) 

1079 

1080 if self.dtype == dtype: 

1081 # Ensure that self.astype(self.dtype) is self 

1082 return self.copy() if copy else self 

1083 

1084 values = self._data 

1085 if isinstance(values, ExtensionArray): 

1086 with rewrite_exception(type(values).__name__, type(self).__name__): 

1087 new_values = values.astype(dtype, copy=copy) 

1088 

1089 elif isinstance(dtype, ExtensionDtype): 

1090 cls = dtype.construct_array_type() 

1091 # Note: for RangeIndex and CategoricalDtype self vs self._values 

1092 # behaves differently here. 

1093 new_values = cls._from_sequence(self, dtype=dtype, copy=copy) 

1094 

1095 else: 

1096 # GH#13149 specifically use astype_array instead of astype 

1097 new_values = astype_array(values, dtype=dtype, copy=copy) 

1098 

1099 # pass copy=False because any copying will be done in the astype above 

1100 result = Index(new_values, name=self.name, dtype=new_values.dtype, copy=False) 

1101 if ( 

1102 not copy 

1103 and self._references is not None 

1104 and astype_is_view(self.dtype, dtype) 

1105 ): 

1106 result._references = self._references 

1107 result._references.add_index_reference(result) 

1108 return result 

1109 

1110 _index_shared_docs[ 

1111 "take" 

1112 ] = """ 

1113 Return a new %(klass)s of the values selected by the indices. 

1114 

1115 For internal compatibility with numpy arrays. 

1116 

1117 Parameters 

1118 ---------- 

1119 indices : array-like 

1120 Indices to be taken. 

1121 axis : int, optional 

1122 The axis over which to select values, always 0. 

1123 allow_fill : bool, default True 

1124 fill_value : scalar, default None 

1125 If allow_fill=True and fill_value is not None, indices specified by 

1126 -1 are regarded as NA. If Index doesn't hold NA, raise ValueError. 

1127 

1128 Returns 

1129 ------- 

1130 Index 

1131 An index formed of elements at the given indices. Will be the same 

1132 type as self, except for RangeIndex. 

1133 

1134 See Also 

1135 -------- 

1136 numpy.ndarray.take: Return an array formed from the 

1137 elements of a at the given indices. 

1138 

1139 Examples 

1140 -------- 

1141 >>> idx = pd.Index(['a', 'b', 'c']) 

1142 >>> idx.take([2, 2, 1, 2]) 

1143 Index(['c', 'c', 'b', 'c'], dtype='object') 

1144 """ 

1145 

1146 @Appender(_index_shared_docs["take"] % _index_doc_kwargs) 

1147 def take( 

1148 self, 

1149 indices, 

1150 axis: Axis = 0, 

1151 allow_fill: bool = True, 

1152 fill_value=None, 

1153 **kwargs, 

1154 ) -> Self: 

1155 if kwargs: 

1156 nv.validate_take((), kwargs) 

1157 if is_scalar(indices): 

1158 raise TypeError("Expected indices to be array-like") 

1159 indices = ensure_platform_int(indices) 

1160 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices) 

1161 

1162 # Note: we discard fill_value and use self._na_value, only relevant 

1163 # in the case where allow_fill is True and fill_value is not None 

1164 values = self._values 

1165 if isinstance(values, np.ndarray): 

1166 taken = algos.take( 

1167 values, indices, allow_fill=allow_fill, fill_value=self._na_value 

1168 ) 

1169 else: 

1170 # algos.take passes 'axis' keyword which not all EAs accept 

1171 taken = values.take( 

1172 indices, allow_fill=allow_fill, fill_value=self._na_value 

1173 ) 

1174 return self._constructor._simple_new(taken, name=self.name) 

1175 

1176 @final 

1177 def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool: 

1178 """ 

1179 We only use pandas-style take when allow_fill is True _and_ 

1180 fill_value is not None. 

1181 """ 

1182 if allow_fill and fill_value is not None: 

1183 # only fill if we are passing a non-None fill_value 

1184 if self._can_hold_na: 

1185 if (indices < -1).any(): 

1186 raise ValueError( 

1187 "When allow_fill=True and fill_value is not None, " 

1188 "all indices must be >= -1" 

1189 ) 

1190 else: 

1191 cls_name = type(self).__name__ 

1192 raise ValueError( 

1193 f"Unable to fill values because {cls_name} cannot contain NA" 

1194 ) 

1195 else: 

1196 allow_fill = False 

1197 return allow_fill 

1198 

1199 _index_shared_docs[ 

1200 "repeat" 

1201 ] = """ 

1202 Repeat elements of a %(klass)s. 

1203 

1204 Returns a new %(klass)s where each element of the current %(klass)s 

1205 is repeated consecutively a given number of times. 

1206 

1207 Parameters 

1208 ---------- 

1209 repeats : int or array of ints 

1210 The number of repetitions for each element. This should be a 

1211 non-negative integer. Repeating 0 times will return an empty 

1212 %(klass)s. 

1213 axis : None 

1214 Must be ``None``. Has no effect but is accepted for compatibility 

1215 with numpy. 

1216 

1217 Returns 

1218 ------- 

1219 %(klass)s 

1220 Newly created %(klass)s with repeated elements. 

1221 

1222 See Also 

1223 -------- 

1224 Series.repeat : Equivalent function for Series. 

1225 numpy.repeat : Similar method for :class:`numpy.ndarray`. 

1226 

1227 Examples 

1228 -------- 

1229 >>> idx = pd.Index(['a', 'b', 'c']) 

1230 >>> idx 

1231 Index(['a', 'b', 'c'], dtype='object') 

1232 >>> idx.repeat(2) 

1233 Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object') 

1234 >>> idx.repeat([1, 2, 3]) 

1235 Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object') 

1236 """ 

1237 

1238 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) 

1239 def repeat(self, repeats, axis: None = None) -> Self: 

1240 repeats = ensure_platform_int(repeats) 

1241 nv.validate_repeat((), {"axis": axis}) 

1242 res_values = self._values.repeat(repeats) 

1243 

1244 # _constructor so RangeIndex-> Index with an int64 dtype 

1245 return self._constructor._simple_new(res_values, name=self.name) 

1246 

1247 # -------------------------------------------------------------------- 

1248 # Copying Methods 

1249 

1250 def copy( 

1251 self, 

1252 name: Hashable | None = None, 

1253 deep: bool = False, 

1254 ) -> Self: 

1255 """ 

1256 Make a copy of this object. 

1257 

1258 Name is set on the new object. 

1259 

1260 Parameters 

1261 ---------- 

1262 name : Label, optional 

1263 Set name for new object. 

1264 deep : bool, default False 

1265 

1266 Returns 

1267 ------- 

1268 Index 

1269 Index refer to new object which is a copy of this object. 

1270 

1271 Notes 

1272 ----- 

1273 In most cases, there should be no functional difference from using 

1274 ``deep``, but if ``deep`` is passed it will attempt to deepcopy. 

1275 

1276 Examples 

1277 -------- 

1278 >>> idx = pd.Index(['a', 'b', 'c']) 

1279 >>> new_idx = idx.copy() 

1280 >>> idx is new_idx 

1281 False 

1282 """ 

1283 

1284 name = self._validate_names(name=name, deep=deep)[0] 

1285 if deep: 

1286 new_data = self._data.copy() 

1287 new_index = type(self)._simple_new(new_data, name=name) 

1288 else: 

1289 new_index = self._rename(name=name) 

1290 return new_index 

1291 

1292 @final 

1293 def __copy__(self, **kwargs) -> Self: 

1294 return self.copy(**kwargs) 

1295 

1296 @final 

1297 def __deepcopy__(self, memo=None) -> Self: 

1298 """ 

1299 Parameters 

1300 ---------- 

1301 memo, default None 

1302 Standard signature. Unused 

1303 """ 

1304 return self.copy(deep=True) 

1305 

1306 # -------------------------------------------------------------------- 

1307 # Rendering Methods 

1308 

1309 @final 

1310 def __repr__(self) -> str_t: 

1311 """ 

1312 Return a string representation for this object. 

1313 """ 

1314 klass_name = type(self).__name__ 

1315 data = self._format_data() 

1316 attrs = self._format_attrs() 

1317 attrs_str = [f"{k}={v}" for k, v in attrs] 

1318 prepr = ", ".join(attrs_str) 

1319 

1320 return f"{klass_name}({data}{prepr})" 

1321 

1322 @property 

1323 def _formatter_func(self): 

1324 """ 

1325 Return the formatter function. 

1326 """ 

1327 return default_pprint 

1328 

1329 @final 

1330 def _format_data(self, name=None) -> str_t: 

1331 """ 

1332 Return the formatted data as a unicode string. 

1333 """ 

1334 # do we want to justify (only do so for non-objects) 

1335 is_justify = True 

1336 

1337 if self.inferred_type == "string": 

1338 is_justify = False 

1339 elif isinstance(self.dtype, CategoricalDtype): 

1340 self = cast("CategoricalIndex", self) 

1341 if is_object_dtype(self.categories.dtype): 

1342 is_justify = False 

1343 elif isinstance(self, ABCRangeIndex): 

1344 # We will do the relevant formatting via attrs 

1345 return "" 

1346 

1347 return format_object_summary( 

1348 self, 

1349 self._formatter_func, 

1350 is_justify=is_justify, 

1351 name=name, 

1352 line_break_each_value=self._is_multi, 

1353 ) 

1354 

1355 def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]: 

1356 """ 

1357 Return a list of tuples of the (attr,formatted_value). 

1358 """ 

1359 attrs: list[tuple[str_t, str_t | int | bool | None]] = [] 

1360 

1361 if not self._is_multi: 

1362 attrs.append(("dtype", f"'{self.dtype}'")) 

1363 

1364 if self.name is not None: 

1365 attrs.append(("name", default_pprint(self.name))) 

1366 elif self._is_multi and any(x is not None for x in self.names): 

1367 attrs.append(("names", default_pprint(self.names))) 

1368 

1369 max_seq_items = get_option("display.max_seq_items") or len(self) 

1370 if len(self) > max_seq_items: 

1371 attrs.append(("length", len(self))) 

1372 return attrs 

1373 

1374 @final 

1375 def _get_level_names(self) -> Hashable | Sequence[Hashable]: 

1376 """ 

1377 Return a name or list of names with None replaced by the level number. 

1378 """ 

1379 if self._is_multi: 

1380 return [ 

1381 level if name is None else name for level, name in enumerate(self.names) 

1382 ] 

1383 else: 

1384 return 0 if self.name is None else self.name 

1385 

1386 @final 

1387 def _mpl_repr(self) -> np.ndarray: 

1388 # how to represent ourselves to matplotlib 

1389 if isinstance(self.dtype, np.dtype) and self.dtype.kind != "M": 

1390 return cast(np.ndarray, self.values) 

1391 return self.astype(object, copy=False)._values 

1392 

1393 def format( 

1394 self, 

1395 name: bool = False, 

1396 formatter: Callable | None = None, 

1397 na_rep: str_t = "NaN", 

1398 ) -> list[str_t]: 

1399 """ 

1400 Render a string representation of the Index. 

1401 """ 

1402 warnings.warn( 

1403 # GH#55413 

1404 f"{type(self).__name__}.format is deprecated and will be removed " 

1405 "in a future version. Convert using index.astype(str) or " 

1406 "index.map(formatter) instead.", 

1407 FutureWarning, 

1408 stacklevel=find_stack_level(), 

1409 ) 

1410 header = [] 

1411 if name: 

1412 header.append( 

1413 pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) 

1414 if self.name is not None 

1415 else "" 

1416 ) 

1417 

1418 if formatter is not None: 

1419 return header + list(self.map(formatter)) 

1420 

1421 return self._format_with_header(header=header, na_rep=na_rep) 

1422 

1423 _default_na_rep = "NaN" 

1424 

1425 @final 

1426 def _format_flat( 

1427 self, 

1428 *, 

1429 include_name: bool, 

1430 formatter: Callable | None = None, 

1431 ) -> list[str_t]: 

1432 """ 

1433 Render a string representation of the Index. 

1434 """ 

1435 header = [] 

1436 if include_name: 

1437 header.append( 

1438 pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) 

1439 if self.name is not None 

1440 else "" 

1441 ) 

1442 

1443 if formatter is not None: 

1444 return header + list(self.map(formatter)) 

1445 

1446 return self._format_with_header(header=header, na_rep=self._default_na_rep) 

1447 

1448 def _format_with_header(self, *, header: list[str_t], na_rep: str_t) -> list[str_t]: 

1449 from pandas.io.formats.format import format_array 

1450 

1451 values = self._values 

1452 

1453 if ( 

1454 is_object_dtype(values.dtype) 

1455 or is_string_dtype(values.dtype) 

1456 or isinstance(self.dtype, (IntervalDtype, CategoricalDtype)) 

1457 ): 

1458 # TODO: why do we need different justify for these cases? 

1459 justify = "all" 

1460 else: 

1461 justify = "left" 

1462 # passing leading_space=False breaks test_format_missing, 

1463 # test_index_repr_in_frame_with_nan, but would otherwise make 

1464 # trim_front unnecessary 

1465 formatted = format_array(values, None, justify=justify) 

1466 result = trim_front(formatted) 

1467 return header + result 

1468 

1469 def _get_values_for_csv( 

1470 self, 

1471 *, 

1472 na_rep: str_t = "", 

1473 decimal: str_t = ".", 

1474 float_format=None, 

1475 date_format=None, 

1476 quoting=None, 

1477 ) -> npt.NDArray[np.object_]: 

1478 return get_values_for_csv( 

1479 self._values, 

1480 na_rep=na_rep, 

1481 decimal=decimal, 

1482 float_format=float_format, 

1483 date_format=date_format, 

1484 quoting=quoting, 

1485 ) 

1486 

1487 def _summary(self, name=None) -> str_t: 

1488 """ 

1489 Return a summarized representation. 

1490 

1491 Parameters 

1492 ---------- 

1493 name : str 

1494 name to use in the summary representation 

1495 

1496 Returns 

1497 ------- 

1498 String with a summarized representation of the index 

1499 """ 

1500 if len(self) > 0: 

1501 head = self[0] 

1502 if hasattr(head, "format") and not isinstance(head, str): 

1503 head = head.format() 

1504 elif needs_i8_conversion(self.dtype): 

1505 # e.g. Timedelta, display as values, not quoted 

1506 head = self._formatter_func(head).replace("'", "") 

1507 tail = self[-1] 

1508 if hasattr(tail, "format") and not isinstance(tail, str): 

1509 tail = tail.format() 

1510 elif needs_i8_conversion(self.dtype): 

1511 # e.g. Timedelta, display as values, not quoted 

1512 tail = self._formatter_func(tail).replace("'", "") 

1513 

1514 index_summary = f", {head} to {tail}" 

1515 else: 

1516 index_summary = "" 

1517 

1518 if name is None: 

1519 name = type(self).__name__ 

1520 return f"{name}: {len(self)} entries{index_summary}" 

1521 

1522 # -------------------------------------------------------------------- 

1523 # Conversion Methods 

1524 

1525 def to_flat_index(self) -> Self: 

1526 """ 

1527 Identity method. 

1528 

1529 This is implemented for compatibility with subclass implementations 

1530 when chaining. 

1531 

1532 Returns 

1533 ------- 

1534 pd.Index 

1535 Caller. 

1536 

1537 See Also 

1538 -------- 

1539 MultiIndex.to_flat_index : Subclass implementation. 

1540 """ 

1541 return self 

1542 

1543 @final 

1544 def to_series(self, index=None, name: Hashable | None = None) -> Series: 

1545 """ 

1546 Create a Series with both index and values equal to the index keys. 

1547 

1548 Useful with map for returning an indexer based on an index. 

1549 

1550 Parameters 

1551 ---------- 

1552 index : Index, optional 

1553 Index of resulting Series. If None, defaults to original index. 

1554 name : str, optional 

1555 Name of resulting Series. If None, defaults to name of original 

1556 index. 

1557 

1558 Returns 

1559 ------- 

1560 Series 

1561 The dtype will be based on the type of the Index values. 

1562 

1563 See Also 

1564 -------- 

1565 Index.to_frame : Convert an Index to a DataFrame. 

1566 Series.to_frame : Convert Series to DataFrame. 

1567 

1568 Examples 

1569 -------- 

1570 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal') 

1571 

1572 By default, the original index and original name is reused. 

1573 

1574 >>> idx.to_series() 

1575 animal 

1576 Ant Ant 

1577 Bear Bear 

1578 Cow Cow 

1579 Name: animal, dtype: object 

1580 

1581 To enforce a new index, specify new labels to ``index``: 

1582 

1583 >>> idx.to_series(index=[0, 1, 2]) 

1584 0 Ant 

1585 1 Bear 

1586 2 Cow 

1587 Name: animal, dtype: object 

1588 

1589 To override the name of the resulting column, specify ``name``: 

1590 

1591 >>> idx.to_series(name='zoo') 

1592 animal 

1593 Ant Ant 

1594 Bear Bear 

1595 Cow Cow 

1596 Name: zoo, dtype: object 

1597 """ 

1598 from pandas import Series 

1599 

1600 if index is None: 

1601 index = self._view() 

1602 if name is None: 

1603 name = self.name 

1604 

1605 return Series(self._values.copy(), index=index, name=name) 

1606 

1607 def to_frame( 

1608 self, index: bool = True, name: Hashable = lib.no_default 

1609 ) -> DataFrame: 

1610 """ 

1611 Create a DataFrame with a column containing the Index. 

1612 

1613 Parameters 

1614 ---------- 

1615 index : bool, default True 

1616 Set the index of the returned DataFrame as the original Index. 

1617 

1618 name : object, defaults to index.name 

1619 The passed name should substitute for the index name (if it has 

1620 one). 

1621 

1622 Returns 

1623 ------- 

1624 DataFrame 

1625 DataFrame containing the original Index data. 

1626 

1627 See Also 

1628 -------- 

1629 Index.to_series : Convert an Index to a Series. 

1630 Series.to_frame : Convert Series to DataFrame. 

1631 

1632 Examples 

1633 -------- 

1634 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal') 

1635 >>> idx.to_frame() 

1636 animal 

1637 animal 

1638 Ant Ant 

1639 Bear Bear 

1640 Cow Cow 

1641 

1642 By default, the original Index is reused. To enforce a new Index: 

1643 

1644 >>> idx.to_frame(index=False) 

1645 animal 

1646 0 Ant 

1647 1 Bear 

1648 2 Cow 

1649 

1650 To override the name of the resulting column, specify `name`: 

1651 

1652 >>> idx.to_frame(index=False, name='zoo') 

1653 zoo 

1654 0 Ant 

1655 1 Bear 

1656 2 Cow 

1657 """ 

1658 from pandas import DataFrame 

1659 

1660 if name is lib.no_default: 

1661 name = self._get_level_names() 

1662 result = DataFrame({name: self}, copy=not using_copy_on_write()) 

1663 

1664 if index: 

1665 result.index = self 

1666 return result 

1667 

1668 # -------------------------------------------------------------------- 

1669 # Name-Centric Methods 

1670 

1671 @property 

1672 def name(self) -> Hashable: 

1673 """ 

1674 Return Index or MultiIndex name. 

1675 

1676 Examples 

1677 -------- 

1678 >>> idx = pd.Index([1, 2, 3], name='x') 

1679 >>> idx 

1680 Index([1, 2, 3], dtype='int64', name='x') 

1681 >>> idx.name 

1682 'x' 

1683 """ 

1684 return self._name 

1685 

1686 @name.setter 

1687 def name(self, value: Hashable) -> None: 

1688 if self._no_setting_name: 

1689 # Used in MultiIndex.levels to avoid silently ignoring name updates. 

1690 raise RuntimeError( 

1691 "Cannot set name on a level of a MultiIndex. Use " 

1692 "'MultiIndex.set_names' instead." 

1693 ) 

1694 maybe_extract_name(value, None, type(self)) 

1695 self._name = value 

1696 

1697 @final 

1698 def _validate_names( 

1699 self, name=None, names=None, deep: bool = False 

1700 ) -> list[Hashable]: 

1701 """ 

1702 Handles the quirks of having a singular 'name' parameter for general 

1703 Index and plural 'names' parameter for MultiIndex. 

1704 """ 

1705 from copy import deepcopy 

1706 

1707 if names is not None and name is not None: 

1708 raise TypeError("Can only provide one of `names` and `name`") 

1709 if names is None and name is None: 

1710 new_names = deepcopy(self.names) if deep else self.names 

1711 elif names is not None: 

1712 if not is_list_like(names): 

1713 raise TypeError("Must pass list-like as `names`.") 

1714 new_names = names 

1715 elif not is_list_like(name): 

1716 new_names = [name] 

1717 else: 

1718 new_names = name 

1719 

1720 if len(new_names) != len(self.names): 

1721 raise ValueError( 

1722 f"Length of new names must be {len(self.names)}, got {len(new_names)}" 

1723 ) 

1724 

1725 # All items in 'new_names' need to be hashable 

1726 validate_all_hashable(*new_names, error_name=f"{type(self).__name__}.name") 

1727 

1728 return new_names 

1729 

1730 def _get_default_index_names( 

1731 self, names: Hashable | Sequence[Hashable] | None = None, default=None 

1732 ) -> list[Hashable]: 

1733 """ 

1734 Get names of index. 

1735 

1736 Parameters 

1737 ---------- 

1738 names : int, str or 1-dimensional list, default None 

1739 Index names to set. 

1740 default : str 

1741 Default name of index. 

1742 

1743 Raises 

1744 ------ 

1745 TypeError 

1746 if names not str or list-like 

1747 """ 

1748 from pandas.core.indexes.multi import MultiIndex 

1749 

1750 if names is not None: 

1751 if isinstance(names, (int, str)): 

1752 names = [names] 

1753 

1754 if not isinstance(names, list) and names is not None: 

1755 raise ValueError("Index names must be str or 1-dimensional list") 

1756 

1757 if not names: 

1758 if isinstance(self, MultiIndex): 

1759 names = com.fill_missing_names(self.names) 

1760 else: 

1761 names = [default] if self.name is None else [self.name] 

1762 

1763 return names 

1764 

1765 def _get_names(self) -> FrozenList: 

1766 return FrozenList((self.name,)) 

1767 

1768 def _set_names(self, values, *, level=None) -> None: 

1769 """ 

1770 Set new names on index. Each name has to be a hashable type. 

1771 

1772 Parameters 

1773 ---------- 

1774 values : str or sequence 

1775 name(s) to set 

1776 level : int, level name, or sequence of int/level names (default None) 

1777 If the index is a MultiIndex (hierarchical), level(s) to set (None 

1778 for all levels). Otherwise level must be None 

1779 

1780 Raises 

1781 ------ 

1782 TypeError if each name is not hashable. 

1783 """ 

1784 if not is_list_like(values): 

1785 raise ValueError("Names must be a list-like") 

1786 if len(values) != 1: 

1787 raise ValueError(f"Length of new names must be 1, got {len(values)}") 

1788 

1789 # GH 20527 

1790 # All items in 'name' need to be hashable: 

1791 validate_all_hashable(*values, error_name=f"{type(self).__name__}.name") 

1792 

1793 self._name = values[0] 

1794 

1795 names = property(fset=_set_names, fget=_get_names) 

1796 

1797 @overload 

1798 def set_names(self, names, *, level=..., inplace: Literal[False] = ...) -> Self: 

1799 ... 

1800 

1801 @overload 

1802 def set_names(self, names, *, level=..., inplace: Literal[True]) -> None: 

1803 ... 

1804 

1805 @overload 

1806 def set_names(self, names, *, level=..., inplace: bool = ...) -> Self | None: 

1807 ... 

1808 

1809 def set_names(self, names, *, level=None, inplace: bool = False) -> Self | None: 

1810 """ 

1811 Set Index or MultiIndex name. 

1812 

1813 Able to set new names partially and by level. 

1814 

1815 Parameters 

1816 ---------- 

1817 

1818 names : label or list of label or dict-like for MultiIndex 

1819 Name(s) to set. 

1820 

1821 .. versionchanged:: 1.3.0 

1822 

1823 level : int, label or list of int or label, optional 

1824 If the index is a MultiIndex and names is not dict-like, level(s) to set 

1825 (None for all levels). Otherwise level must be None. 

1826 

1827 .. versionchanged:: 1.3.0 

1828 

1829 inplace : bool, default False 

1830 Modifies the object directly, instead of creating a new Index or 

1831 MultiIndex. 

1832 

1833 Returns 

1834 ------- 

1835 Index or None 

1836 The same type as the caller or None if ``inplace=True``. 

1837 

1838 See Also 

1839 -------- 

1840 Index.rename : Able to set new names without level. 

1841 

1842 Examples 

1843 -------- 

1844 >>> idx = pd.Index([1, 2, 3, 4]) 

1845 >>> idx 

1846 Index([1, 2, 3, 4], dtype='int64') 

1847 >>> idx.set_names('quarter') 

1848 Index([1, 2, 3, 4], dtype='int64', name='quarter') 

1849 

1850 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], 

1851 ... [2018, 2019]]) 

1852 >>> idx 

1853 MultiIndex([('python', 2018), 

1854 ('python', 2019), 

1855 ( 'cobra', 2018), 

1856 ( 'cobra', 2019)], 

1857 ) 

1858 >>> idx = idx.set_names(['kind', 'year']) 

1859 >>> idx.set_names('species', level=0) 

1860 MultiIndex([('python', 2018), 

1861 ('python', 2019), 

1862 ( 'cobra', 2018), 

1863 ( 'cobra', 2019)], 

1864 names=['species', 'year']) 

1865 

1866 When renaming levels with a dict, levels can not be passed. 

1867 

1868 >>> idx.set_names({'kind': 'snake'}) 

1869 MultiIndex([('python', 2018), 

1870 ('python', 2019), 

1871 ( 'cobra', 2018), 

1872 ( 'cobra', 2019)], 

1873 names=['snake', 'year']) 

1874 """ 

1875 if level is not None and not isinstance(self, ABCMultiIndex): 

1876 raise ValueError("Level must be None for non-MultiIndex") 

1877 

1878 if level is not None and not is_list_like(level) and is_list_like(names): 

1879 raise TypeError("Names must be a string when a single level is provided.") 

1880 

1881 if not is_list_like(names) and level is None and self.nlevels > 1: 

1882 raise TypeError("Must pass list-like as `names`.") 

1883 

1884 if is_dict_like(names) and not isinstance(self, ABCMultiIndex): 

1885 raise TypeError("Can only pass dict-like as `names` for MultiIndex.") 

1886 

1887 if is_dict_like(names) and level is not None: 

1888 raise TypeError("Can not pass level for dictlike `names`.") 

1889 

1890 if isinstance(self, ABCMultiIndex) and is_dict_like(names) and level is None: 

1891 # Transform dict to list of new names and corresponding levels 

1892 level, names_adjusted = [], [] 

1893 for i, name in enumerate(self.names): 

1894 if name in names.keys(): 

1895 level.append(i) 

1896 names_adjusted.append(names[name]) 

1897 names = names_adjusted 

1898 

1899 if not is_list_like(names): 

1900 names = [names] 

1901 if level is not None and not is_list_like(level): 

1902 level = [level] 

1903 

1904 if inplace: 

1905 idx = self 

1906 else: 

1907 idx = self._view() 

1908 

1909 idx._set_names(names, level=level) 

1910 if not inplace: 

1911 return idx 

1912 return None 

1913 

1914 @overload 

1915 def rename(self, name, *, inplace: Literal[False] = ...) -> Self: 

1916 ... 

1917 

1918 @overload 

1919 def rename(self, name, *, inplace: Literal[True]) -> None: 

1920 ... 

1921 

1922 @deprecate_nonkeyword_arguments( 

1923 version="3.0", allowed_args=["self", "name"], name="rename" 

1924 ) 

1925 def rename(self, name, inplace: bool = False) -> Self | None: 

1926 """ 

1927 Alter Index or MultiIndex name. 

1928 

1929 Able to set new names without level. Defaults to returning new index. 

1930 Length of names must match number of levels in MultiIndex. 

1931 

1932 Parameters 

1933 ---------- 

1934 name : label or list of labels 

1935 Name(s) to set. 

1936 inplace : bool, default False 

1937 Modifies the object directly, instead of creating a new Index or 

1938 MultiIndex. 

1939 

1940 Returns 

1941 ------- 

1942 Index or None 

1943 The same type as the caller or None if ``inplace=True``. 

1944 

1945 See Also 

1946 -------- 

1947 Index.set_names : Able to set new names partially and by level. 

1948 

1949 Examples 

1950 -------- 

1951 >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score') 

1952 >>> idx.rename('grade') 

1953 Index(['A', 'C', 'A', 'B'], dtype='object', name='grade') 

1954 

1955 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], 

1956 ... [2018, 2019]], 

1957 ... names=['kind', 'year']) 

1958 >>> idx 

1959 MultiIndex([('python', 2018), 

1960 ('python', 2019), 

1961 ( 'cobra', 2018), 

1962 ( 'cobra', 2019)], 

1963 names=['kind', 'year']) 

1964 >>> idx.rename(['species', 'year']) 

1965 MultiIndex([('python', 2018), 

1966 ('python', 2019), 

1967 ( 'cobra', 2018), 

1968 ( 'cobra', 2019)], 

1969 names=['species', 'year']) 

1970 >>> idx.rename('species') 

1971 Traceback (most recent call last): 

1972 TypeError: Must pass list-like as `names`. 

1973 """ 

1974 return self.set_names([name], inplace=inplace) 

1975 

1976 # -------------------------------------------------------------------- 

1977 # Level-Centric Methods 

1978 

1979 @property 

1980 def nlevels(self) -> int: 

1981 """ 

1982 Number of levels. 

1983 """ 

1984 return 1 

1985 

1986 def _sort_levels_monotonic(self) -> Self: 

1987 """ 

1988 Compat with MultiIndex. 

1989 """ 

1990 return self 

1991 

1992 @final 

1993 def _validate_index_level(self, level) -> None: 

1994 """ 

1995 Validate index level. 

1996 

1997 For single-level Index getting level number is a no-op, but some 

1998 verification must be done like in MultiIndex. 

1999 

2000 """ 

2001 if isinstance(level, int): 

2002 if level < 0 and level != -1: 

2003 raise IndexError( 

2004 "Too many levels: Index has only 1 level, " 

2005 f"{level} is not a valid level number" 

2006 ) 

2007 if level > 0: 

2008 raise IndexError( 

2009 f"Too many levels: Index has only 1 level, not {level + 1}" 

2010 ) 

2011 elif level != self.name: 

2012 raise KeyError( 

2013 f"Requested level ({level}) does not match index name ({self.name})" 

2014 ) 

2015 

2016 def _get_level_number(self, level) -> int: 

2017 self._validate_index_level(level) 

2018 return 0 

2019 

2020 def sortlevel( 

2021 self, 

2022 level=None, 

2023 ascending: bool | list[bool] = True, 

2024 sort_remaining=None, 

2025 na_position: NaPosition = "first", 

2026 ): 

2027 """ 

2028 For internal compatibility with the Index API. 

2029 

2030 Sort the Index. This is for compat with MultiIndex 

2031 

2032 Parameters 

2033 ---------- 

2034 ascending : bool, default True 

2035 False to sort in descending order 

2036 na_position : {'first' or 'last'}, default 'first' 

2037 Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at 

2038 the end. 

2039 

2040 .. versionadded:: 2.1.0 

2041 

2042 level, sort_remaining are compat parameters 

2043 

2044 Returns 

2045 ------- 

2046 Index 

2047 """ 

2048 if not isinstance(ascending, (list, bool)): 

2049 raise TypeError( 

2050 "ascending must be a single bool value or" 

2051 "a list of bool values of length 1" 

2052 ) 

2053 

2054 if isinstance(ascending, list): 

2055 if len(ascending) != 1: 

2056 raise TypeError("ascending must be a list of bool values of length 1") 

2057 ascending = ascending[0] 

2058 

2059 if not isinstance(ascending, bool): 

2060 raise TypeError("ascending must be a bool value") 

2061 

2062 return self.sort_values( 

2063 return_indexer=True, ascending=ascending, na_position=na_position 

2064 ) 

2065 

2066 def _get_level_values(self, level) -> Index: 

2067 """ 

2068 Return an Index of values for requested level. 

2069 

2070 This is primarily useful to get an individual level of values from a 

2071 MultiIndex, but is provided on Index as well for compatibility. 

2072 

2073 Parameters 

2074 ---------- 

2075 level : int or str 

2076 It is either the integer position or the name of the level. 

2077 

2078 Returns 

2079 ------- 

2080 Index 

2081 Calling object, as there is only one level in the Index. 

2082 

2083 See Also 

2084 -------- 

2085 MultiIndex.get_level_values : Get values for a level of a MultiIndex. 

2086 

2087 Notes 

2088 ----- 

2089 For Index, level should be 0, since there are no multiple levels. 

2090 

2091 Examples 

2092 -------- 

2093 >>> idx = pd.Index(list('abc')) 

2094 >>> idx 

2095 Index(['a', 'b', 'c'], dtype='object') 

2096 

2097 Get level values by supplying `level` as integer: 

2098 

2099 >>> idx.get_level_values(0) 

2100 Index(['a', 'b', 'c'], dtype='object') 

2101 """ 

2102 self._validate_index_level(level) 

2103 return self 

2104 

2105 get_level_values = _get_level_values 

2106 

2107 @final 

2108 def droplevel(self, level: IndexLabel = 0): 

2109 """ 

2110 Return index with requested level(s) removed. 

2111 

2112 If resulting index has only 1 level left, the result will be 

2113 of Index type, not MultiIndex. The original index is not modified inplace. 

2114 

2115 Parameters 

2116 ---------- 

2117 level : int, str, or list-like, default 0 

2118 If a string is given, must be the name of a level 

2119 If list-like, elements must be names or indexes of levels. 

2120 

2121 Returns 

2122 ------- 

2123 Index or MultiIndex 

2124 

2125 Examples 

2126 -------- 

2127 >>> mi = pd.MultiIndex.from_arrays( 

2128 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z']) 

2129 >>> mi 

2130 MultiIndex([(1, 3, 5), 

2131 (2, 4, 6)], 

2132 names=['x', 'y', 'z']) 

2133 

2134 >>> mi.droplevel() 

2135 MultiIndex([(3, 5), 

2136 (4, 6)], 

2137 names=['y', 'z']) 

2138 

2139 >>> mi.droplevel(2) 

2140 MultiIndex([(1, 3), 

2141 (2, 4)], 

2142 names=['x', 'y']) 

2143 

2144 >>> mi.droplevel('z') 

2145 MultiIndex([(1, 3), 

2146 (2, 4)], 

2147 names=['x', 'y']) 

2148 

2149 >>> mi.droplevel(['x', 'y']) 

2150 Index([5, 6], dtype='int64', name='z') 

2151 """ 

2152 if not isinstance(level, (tuple, list)): 

2153 level = [level] 

2154 

2155 levnums = sorted(self._get_level_number(lev) for lev in level)[::-1] 

2156 

2157 return self._drop_level_numbers(levnums) 

2158 

2159 @final 

2160 def _drop_level_numbers(self, levnums: list[int]): 

2161 """ 

2162 Drop MultiIndex levels by level _number_, not name. 

2163 """ 

2164 

2165 if not levnums and not isinstance(self, ABCMultiIndex): 

2166 return self 

2167 if len(levnums) >= self.nlevels: 

2168 raise ValueError( 

2169 f"Cannot remove {len(levnums)} levels from an index with " 

2170 f"{self.nlevels} levels: at least one level must be left." 

2171 ) 

2172 # The two checks above guarantee that here self is a MultiIndex 

2173 self = cast("MultiIndex", self) 

2174 

2175 new_levels = list(self.levels) 

2176 new_codes = list(self.codes) 

2177 new_names = list(self.names) 

2178 

2179 for i in levnums: 

2180 new_levels.pop(i) 

2181 new_codes.pop(i) 

2182 new_names.pop(i) 

2183 

2184 if len(new_levels) == 1: 

2185 lev = new_levels[0] 

2186 

2187 if len(lev) == 0: 

2188 # If lev is empty, lev.take will fail GH#42055 

2189 if len(new_codes[0]) == 0: 

2190 # GH#45230 preserve RangeIndex here 

2191 # see test_reset_index_empty_rangeindex 

2192 result = lev[:0] 

2193 else: 

2194 res_values = algos.take(lev._values, new_codes[0], allow_fill=True) 

2195 # _constructor instead of type(lev) for RangeIndex compat GH#35230 

2196 result = lev._constructor._simple_new(res_values, name=new_names[0]) 

2197 else: 

2198 # set nan if needed 

2199 mask = new_codes[0] == -1 

2200 result = new_levels[0].take(new_codes[0]) 

2201 if mask.any(): 

2202 result = result.putmask(mask, np.nan) 

2203 

2204 result._name = new_names[0] 

2205 

2206 return result 

2207 else: 

2208 from pandas.core.indexes.multi import MultiIndex 

2209 

2210 return MultiIndex( 

2211 levels=new_levels, 

2212 codes=new_codes, 

2213 names=new_names, 

2214 verify_integrity=False, 

2215 ) 

2216 

2217 # -------------------------------------------------------------------- 

2218 # Introspection Methods 

2219 

2220 @cache_readonly 

2221 @final 

2222 def _can_hold_na(self) -> bool: 

2223 if isinstance(self.dtype, ExtensionDtype): 

2224 return self.dtype._can_hold_na 

2225 if self.dtype.kind in "iub": 

2226 return False 

2227 return True 

2228 

2229 @property 

2230 def is_monotonic_increasing(self) -> bool: 

2231 """ 

2232 Return a boolean if the values are equal or increasing. 

2233 

2234 Returns 

2235 ------- 

2236 bool 

2237 

2238 See Also 

2239 -------- 

2240 Index.is_monotonic_decreasing : Check if the values are equal or decreasing. 

2241 

2242 Examples 

2243 -------- 

2244 >>> pd.Index([1, 2, 3]).is_monotonic_increasing 

2245 True 

2246 >>> pd.Index([1, 2, 2]).is_monotonic_increasing 

2247 True 

2248 >>> pd.Index([1, 3, 2]).is_monotonic_increasing 

2249 False 

2250 """ 

2251 return self._engine.is_monotonic_increasing 

2252 

2253 @property 

2254 def is_monotonic_decreasing(self) -> bool: 

2255 """ 

2256 Return a boolean if the values are equal or decreasing. 

2257 

2258 Returns 

2259 ------- 

2260 bool 

2261 

2262 See Also 

2263 -------- 

2264 Index.is_monotonic_increasing : Check if the values are equal or increasing. 

2265 

2266 Examples 

2267 -------- 

2268 >>> pd.Index([3, 2, 1]).is_monotonic_decreasing 

2269 True 

2270 >>> pd.Index([3, 2, 2]).is_monotonic_decreasing 

2271 True 

2272 >>> pd.Index([3, 1, 2]).is_monotonic_decreasing 

2273 False 

2274 """ 

2275 return self._engine.is_monotonic_decreasing 

2276 

2277 @final 

2278 @property 

2279 def _is_strictly_monotonic_increasing(self) -> bool: 

2280 """ 

2281 Return if the index is strictly monotonic increasing 

2282 (only increasing) values. 

2283 

2284 Examples 

2285 -------- 

2286 >>> Index([1, 2, 3])._is_strictly_monotonic_increasing 

2287 True 

2288 >>> Index([1, 2, 2])._is_strictly_monotonic_increasing 

2289 False 

2290 >>> Index([1, 3, 2])._is_strictly_monotonic_increasing 

2291 False 

2292 """ 

2293 return self.is_unique and self.is_monotonic_increasing 

2294 

2295 @final 

2296 @property 

2297 def _is_strictly_monotonic_decreasing(self) -> bool: 

2298 """ 

2299 Return if the index is strictly monotonic decreasing 

2300 (only decreasing) values. 

2301 

2302 Examples 

2303 -------- 

2304 >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing 

2305 True 

2306 >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing 

2307 False 

2308 >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing 

2309 False 

2310 """ 

2311 return self.is_unique and self.is_monotonic_decreasing 

2312 

2313 @cache_readonly 

2314 def is_unique(self) -> bool: 

2315 """ 

2316 Return if the index has unique values. 

2317 

2318 Returns 

2319 ------- 

2320 bool 

2321 

2322 See Also 

2323 -------- 

2324 Index.has_duplicates : Inverse method that checks if it has duplicate values. 

2325 

2326 Examples 

2327 -------- 

2328 >>> idx = pd.Index([1, 5, 7, 7]) 

2329 >>> idx.is_unique 

2330 False 

2331 

2332 >>> idx = pd.Index([1, 5, 7]) 

2333 >>> idx.is_unique 

2334 True 

2335 

2336 >>> idx = pd.Index(["Watermelon", "Orange", "Apple", 

2337 ... "Watermelon"]).astype("category") 

2338 >>> idx.is_unique 

2339 False 

2340 

2341 >>> idx = pd.Index(["Orange", "Apple", 

2342 ... "Watermelon"]).astype("category") 

2343 >>> idx.is_unique 

2344 True 

2345 """ 

2346 return self._engine.is_unique 

2347 

2348 @final 

2349 @property 

2350 def has_duplicates(self) -> bool: 

2351 """ 

2352 Check if the Index has duplicate values. 

2353 

2354 Returns 

2355 ------- 

2356 bool 

2357 Whether or not the Index has duplicate values. 

2358 

2359 See Also 

2360 -------- 

2361 Index.is_unique : Inverse method that checks if it has unique values. 

2362 

2363 Examples 

2364 -------- 

2365 >>> idx = pd.Index([1, 5, 7, 7]) 

2366 >>> idx.has_duplicates 

2367 True 

2368 

2369 >>> idx = pd.Index([1, 5, 7]) 

2370 >>> idx.has_duplicates 

2371 False 

2372 

2373 >>> idx = pd.Index(["Watermelon", "Orange", "Apple", 

2374 ... "Watermelon"]).astype("category") 

2375 >>> idx.has_duplicates 

2376 True 

2377 

2378 >>> idx = pd.Index(["Orange", "Apple", 

2379 ... "Watermelon"]).astype("category") 

2380 >>> idx.has_duplicates 

2381 False 

2382 """ 

2383 return not self.is_unique 

2384 

2385 @final 

2386 def is_boolean(self) -> bool: 

2387 """ 

2388 Check if the Index only consists of booleans. 

2389 

2390 .. deprecated:: 2.0.0 

2391 Use `pandas.api.types.is_bool_dtype` instead. 

2392 

2393 Returns 

2394 ------- 

2395 bool 

2396 Whether or not the Index only consists of booleans. 

2397 

2398 See Also 

2399 -------- 

2400 is_integer : Check if the Index only consists of integers (deprecated). 

2401 is_floating : Check if the Index is a floating type (deprecated). 

2402 is_numeric : Check if the Index only consists of numeric data (deprecated). 

2403 is_object : Check if the Index is of the object dtype (deprecated). 

2404 is_categorical : Check if the Index holds categorical data. 

2405 is_interval : Check if the Index holds Interval objects (deprecated). 

2406 

2407 Examples 

2408 -------- 

2409 >>> idx = pd.Index([True, False, True]) 

2410 >>> idx.is_boolean() # doctest: +SKIP 

2411 True 

2412 

2413 >>> idx = pd.Index(["True", "False", "True"]) 

2414 >>> idx.is_boolean() # doctest: +SKIP 

2415 False 

2416 

2417 >>> idx = pd.Index([True, False, "True"]) 

2418 >>> idx.is_boolean() # doctest: +SKIP 

2419 False 

2420 """ 

2421 warnings.warn( 

2422 f"{type(self).__name__}.is_boolean is deprecated. " 

2423 "Use pandas.api.types.is_bool_type instead.", 

2424 FutureWarning, 

2425 stacklevel=find_stack_level(), 

2426 ) 

2427 return self.inferred_type in ["boolean"] 

2428 

2429 @final 

2430 def is_integer(self) -> bool: 

2431 """ 

2432 Check if the Index only consists of integers. 

2433 

2434 .. deprecated:: 2.0.0 

2435 Use `pandas.api.types.is_integer_dtype` instead. 

2436 

2437 Returns 

2438 ------- 

2439 bool 

2440 Whether or not the Index only consists of integers. 

2441 

2442 See Also 

2443 -------- 

2444 is_boolean : Check if the Index only consists of booleans (deprecated). 

2445 is_floating : Check if the Index is a floating type (deprecated). 

2446 is_numeric : Check if the Index only consists of numeric data (deprecated). 

2447 is_object : Check if the Index is of the object dtype. (deprecated). 

2448 is_categorical : Check if the Index holds categorical data (deprecated). 

2449 is_interval : Check if the Index holds Interval objects (deprecated). 

2450 

2451 Examples 

2452 -------- 

2453 >>> idx = pd.Index([1, 2, 3, 4]) 

2454 >>> idx.is_integer() # doctest: +SKIP 

2455 True 

2456 

2457 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) 

2458 >>> idx.is_integer() # doctest: +SKIP 

2459 False 

2460 

2461 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"]) 

2462 >>> idx.is_integer() # doctest: +SKIP 

2463 False 

2464 """ 

2465 warnings.warn( 

2466 f"{type(self).__name__}.is_integer is deprecated. " 

2467 "Use pandas.api.types.is_integer_dtype instead.", 

2468 FutureWarning, 

2469 stacklevel=find_stack_level(), 

2470 ) 

2471 return self.inferred_type in ["integer"] 

2472 

2473 @final 

2474 def is_floating(self) -> bool: 

2475 """ 

2476 Check if the Index is a floating type. 

2477 

2478 .. deprecated:: 2.0.0 

2479 Use `pandas.api.types.is_float_dtype` instead 

2480 

2481 The Index may consist of only floats, NaNs, or a mix of floats, 

2482 integers, or NaNs. 

2483 

2484 Returns 

2485 ------- 

2486 bool 

2487 Whether or not the Index only consists of only consists of floats, NaNs, or 

2488 a mix of floats, integers, or NaNs. 

2489 

2490 See Also 

2491 -------- 

2492 is_boolean : Check if the Index only consists of booleans (deprecated). 

2493 is_integer : Check if the Index only consists of integers (deprecated). 

2494 is_numeric : Check if the Index only consists of numeric data (deprecated). 

2495 is_object : Check if the Index is of the object dtype. (deprecated). 

2496 is_categorical : Check if the Index holds categorical data (deprecated). 

2497 is_interval : Check if the Index holds Interval objects (deprecated). 

2498 

2499 Examples 

2500 -------- 

2501 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) 

2502 >>> idx.is_floating() # doctest: +SKIP 

2503 True 

2504 

2505 >>> idx = pd.Index([1.0, 2.0, np.nan, 4.0]) 

2506 >>> idx.is_floating() # doctest: +SKIP 

2507 True 

2508 

2509 >>> idx = pd.Index([1, 2, 3, 4, np.nan]) 

2510 >>> idx.is_floating() # doctest: +SKIP 

2511 True 

2512 

2513 >>> idx = pd.Index([1, 2, 3, 4]) 

2514 >>> idx.is_floating() # doctest: +SKIP 

2515 False 

2516 """ 

2517 warnings.warn( 

2518 f"{type(self).__name__}.is_floating is deprecated. " 

2519 "Use pandas.api.types.is_float_dtype instead.", 

2520 FutureWarning, 

2521 stacklevel=find_stack_level(), 

2522 ) 

2523 return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"] 

2524 

2525 @final 

2526 def is_numeric(self) -> bool: 

2527 """ 

2528 Check if the Index only consists of numeric data. 

2529 

2530 .. deprecated:: 2.0.0 

2531 Use `pandas.api.types.is_numeric_dtype` instead. 

2532 

2533 Returns 

2534 ------- 

2535 bool 

2536 Whether or not the Index only consists of numeric data. 

2537 

2538 See Also 

2539 -------- 

2540 is_boolean : Check if the Index only consists of booleans (deprecated). 

2541 is_integer : Check if the Index only consists of integers (deprecated). 

2542 is_floating : Check if the Index is a floating type (deprecated). 

2543 is_object : Check if the Index is of the object dtype. (deprecated). 

2544 is_categorical : Check if the Index holds categorical data (deprecated). 

2545 is_interval : Check if the Index holds Interval objects (deprecated). 

2546 

2547 Examples 

2548 -------- 

2549 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) 

2550 >>> idx.is_numeric() # doctest: +SKIP 

2551 True 

2552 

2553 >>> idx = pd.Index([1, 2, 3, 4.0]) 

2554 >>> idx.is_numeric() # doctest: +SKIP 

2555 True 

2556 

2557 >>> idx = pd.Index([1, 2, 3, 4]) 

2558 >>> idx.is_numeric() # doctest: +SKIP 

2559 True 

2560 

2561 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan]) 

2562 >>> idx.is_numeric() # doctest: +SKIP 

2563 True 

2564 

2565 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan, "Apple"]) 

2566 >>> idx.is_numeric() # doctest: +SKIP 

2567 False 

2568 """ 

2569 warnings.warn( 

2570 f"{type(self).__name__}.is_numeric is deprecated. " 

2571 "Use pandas.api.types.is_any_real_numeric_dtype instead", 

2572 FutureWarning, 

2573 stacklevel=find_stack_level(), 

2574 ) 

2575 return self.inferred_type in ["integer", "floating"] 

2576 

2577 @final 

2578 def is_object(self) -> bool: 

2579 """ 

2580 Check if the Index is of the object dtype. 

2581 

2582 .. deprecated:: 2.0.0 

2583 Use `pandas.api.types.is_object_dtype` instead. 

2584 

2585 Returns 

2586 ------- 

2587 bool 

2588 Whether or not the Index is of the object dtype. 

2589 

2590 See Also 

2591 -------- 

2592 is_boolean : Check if the Index only consists of booleans (deprecated). 

2593 is_integer : Check if the Index only consists of integers (deprecated). 

2594 is_floating : Check if the Index is a floating type (deprecated). 

2595 is_numeric : Check if the Index only consists of numeric data (deprecated). 

2596 is_categorical : Check if the Index holds categorical data (deprecated). 

2597 is_interval : Check if the Index holds Interval objects (deprecated). 

2598 

2599 Examples 

2600 -------- 

2601 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"]) 

2602 >>> idx.is_object() # doctest: +SKIP 

2603 True 

2604 

2605 >>> idx = pd.Index(["Apple", "Mango", 2.0]) 

2606 >>> idx.is_object() # doctest: +SKIP 

2607 True 

2608 

2609 >>> idx = pd.Index(["Watermelon", "Orange", "Apple", 

2610 ... "Watermelon"]).astype("category") 

2611 >>> idx.is_object() # doctest: +SKIP 

2612 False 

2613 

2614 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) 

2615 >>> idx.is_object() # doctest: +SKIP 

2616 False 

2617 """ 

2618 warnings.warn( 

2619 f"{type(self).__name__}.is_object is deprecated." 

2620 "Use pandas.api.types.is_object_dtype instead", 

2621 FutureWarning, 

2622 stacklevel=find_stack_level(), 

2623 ) 

2624 return is_object_dtype(self.dtype) 

2625 

2626 @final 

2627 def is_categorical(self) -> bool: 

2628 """ 

2629 Check if the Index holds categorical data. 

2630 

2631 .. deprecated:: 2.0.0 

2632 Use `isinstance(index.dtype, pd.CategoricalDtype)` instead. 

2633 

2634 Returns 

2635 ------- 

2636 bool 

2637 True if the Index is categorical. 

2638 

2639 See Also 

2640 -------- 

2641 CategoricalIndex : Index for categorical data. 

2642 is_boolean : Check if the Index only consists of booleans (deprecated). 

2643 is_integer : Check if the Index only consists of integers (deprecated). 

2644 is_floating : Check if the Index is a floating type (deprecated). 

2645 is_numeric : Check if the Index only consists of numeric data (deprecated). 

2646 is_object : Check if the Index is of the object dtype. (deprecated). 

2647 is_interval : Check if the Index holds Interval objects (deprecated). 

2648 

2649 Examples 

2650 -------- 

2651 >>> idx = pd.Index(["Watermelon", "Orange", "Apple", 

2652 ... "Watermelon"]).astype("category") 

2653 >>> idx.is_categorical() # doctest: +SKIP 

2654 True 

2655 

2656 >>> idx = pd.Index([1, 3, 5, 7]) 

2657 >>> idx.is_categorical() # doctest: +SKIP 

2658 False 

2659 

2660 >>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"]) 

2661 >>> s 

2662 0 Peter 

2663 1 Victor 

2664 2 Elisabeth 

2665 3 Mar 

2666 dtype: object 

2667 >>> s.index.is_categorical() # doctest: +SKIP 

2668 False 

2669 """ 

2670 warnings.warn( 

2671 f"{type(self).__name__}.is_categorical is deprecated." 

2672 "Use pandas.api.types.is_categorical_dtype instead", 

2673 FutureWarning, 

2674 stacklevel=find_stack_level(), 

2675 ) 

2676 

2677 return self.inferred_type in ["categorical"] 

2678 

2679 @final 

2680 def is_interval(self) -> bool: 

2681 """ 

2682 Check if the Index holds Interval objects. 

2683 

2684 .. deprecated:: 2.0.0 

2685 Use `isinstance(index.dtype, pd.IntervalDtype)` instead. 

2686 

2687 Returns 

2688 ------- 

2689 bool 

2690 Whether or not the Index holds Interval objects. 

2691 

2692 See Also 

2693 -------- 

2694 IntervalIndex : Index for Interval objects. 

2695 is_boolean : Check if the Index only consists of booleans (deprecated). 

2696 is_integer : Check if the Index only consists of integers (deprecated). 

2697 is_floating : Check if the Index is a floating type (deprecated). 

2698 is_numeric : Check if the Index only consists of numeric data (deprecated). 

2699 is_object : Check if the Index is of the object dtype. (deprecated). 

2700 is_categorical : Check if the Index holds categorical data (deprecated). 

2701 

2702 Examples 

2703 -------- 

2704 >>> idx = pd.Index([pd.Interval(left=0, right=5), 

2705 ... pd.Interval(left=5, right=10)]) 

2706 >>> idx.is_interval() # doctest: +SKIP 

2707 True 

2708 

2709 >>> idx = pd.Index([1, 3, 5, 7]) 

2710 >>> idx.is_interval() # doctest: +SKIP 

2711 False 

2712 """ 

2713 warnings.warn( 

2714 f"{type(self).__name__}.is_interval is deprecated." 

2715 "Use pandas.api.types.is_interval_dtype instead", 

2716 FutureWarning, 

2717 stacklevel=find_stack_level(), 

2718 ) 

2719 return self.inferred_type in ["interval"] 

2720 

2721 @final 

2722 def _holds_integer(self) -> bool: 

2723 """ 

2724 Whether the type is an integer type. 

2725 """ 

2726 return self.inferred_type in ["integer", "mixed-integer"] 

2727 

2728 @final 

2729 def holds_integer(self) -> bool: 

2730 """ 

2731 Whether the type is an integer type. 

2732 

2733 .. deprecated:: 2.0.0 

2734 Use `pandas.api.types.infer_dtype` instead 

2735 """ 

2736 warnings.warn( 

2737 f"{type(self).__name__}.holds_integer is deprecated. " 

2738 "Use pandas.api.types.infer_dtype instead.", 

2739 FutureWarning, 

2740 stacklevel=find_stack_level(), 

2741 ) 

2742 return self._holds_integer() 

2743 

2744 @cache_readonly 

2745 def inferred_type(self) -> str_t: 

2746 """ 

2747 Return a string of the type inferred from the values. 

2748 

2749 Examples 

2750 -------- 

2751 >>> idx = pd.Index([1, 2, 3]) 

2752 >>> idx 

2753 Index([1, 2, 3], dtype='int64') 

2754 >>> idx.inferred_type 

2755 'integer' 

2756 """ 

2757 return lib.infer_dtype(self._values, skipna=False) 

2758 

2759 @cache_readonly 

2760 @final 

2761 def _is_all_dates(self) -> bool: 

2762 """ 

2763 Whether or not the index values only consist of dates. 

2764 """ 

2765 if needs_i8_conversion(self.dtype): 

2766 return True 

2767 elif self.dtype != _dtype_obj: 

2768 # TODO(ExtensionIndex): 3rd party EA might override? 

2769 # Note: this includes IntervalIndex, even when the left/right 

2770 # contain datetime-like objects. 

2771 return False 

2772 elif self._is_multi: 

2773 return False 

2774 return is_datetime_array(ensure_object(self._values)) 

2775 

2776 @final 

2777 @cache_readonly 

2778 def _is_multi(self) -> bool: 

2779 """ 

2780 Cached check equivalent to isinstance(self, MultiIndex) 

2781 """ 

2782 return isinstance(self, ABCMultiIndex) 

2783 

2784 # -------------------------------------------------------------------- 

2785 # Pickle Methods 

2786 

2787 def __reduce__(self): 

2788 d = {"data": self._data, "name": self.name} 

2789 return _new_Index, (type(self), d), None 

2790 

2791 # -------------------------------------------------------------------- 

2792 # Null Handling Methods 

2793 

2794 @cache_readonly 

2795 def _na_value(self): 

2796 """The expected NA value to use with this index.""" 

2797 dtype = self.dtype 

2798 if isinstance(dtype, np.dtype): 

2799 if dtype.kind in "mM": 

2800 return NaT 

2801 return np.nan 

2802 return dtype.na_value 

2803 

2804 @cache_readonly 

2805 def _isnan(self) -> npt.NDArray[np.bool_]: 

2806 """ 

2807 Return if each value is NaN. 

2808 """ 

2809 if self._can_hold_na: 

2810 return isna(self) 

2811 else: 

2812 # shouldn't reach to this condition by checking hasnans beforehand 

2813 values = np.empty(len(self), dtype=np.bool_) 

2814 values.fill(False) 

2815 return values 

2816 

2817 @cache_readonly 

2818 def hasnans(self) -> bool: 

2819 """ 

2820 Return True if there are any NaNs. 

2821 

2822 Enables various performance speedups. 

2823 

2824 Returns 

2825 ------- 

2826 bool 

2827 

2828 Examples 

2829 -------- 

2830 >>> s = pd.Series([1, 2, 3], index=['a', 'b', None]) 

2831 >>> s 

2832 a 1 

2833 b 2 

2834 None 3 

2835 dtype: int64 

2836 >>> s.index.hasnans 

2837 True 

2838 """ 

2839 if self._can_hold_na: 

2840 return bool(self._isnan.any()) 

2841 else: 

2842 return False 

2843 

2844 @final 

2845 def isna(self) -> npt.NDArray[np.bool_]: 

2846 """ 

2847 Detect missing values. 

2848 

2849 Return a boolean same-sized object indicating if the values are NA. 

2850 NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get 

2851 mapped to ``True`` values. 

2852 Everything else get mapped to ``False`` values. Characters such as 

2853 empty strings `''` or :attr:`numpy.inf` are not considered NA values. 

2854 

2855 Returns 

2856 ------- 

2857 numpy.ndarray[bool] 

2858 A boolean array of whether my values are NA. 

2859 

2860 See Also 

2861 -------- 

2862 Index.notna : Boolean inverse of isna. 

2863 Index.dropna : Omit entries with missing values. 

2864 isna : Top-level isna. 

2865 Series.isna : Detect missing values in Series object. 

2866 

2867 Examples 

2868 -------- 

2869 Show which entries in a pandas.Index are NA. The result is an 

2870 array. 

2871 

2872 >>> idx = pd.Index([5.2, 6.0, np.nan]) 

2873 >>> idx 

2874 Index([5.2, 6.0, nan], dtype='float64') 

2875 >>> idx.isna() 

2876 array([False, False, True]) 

2877 

2878 Empty strings are not considered NA values. None is considered an NA 

2879 value. 

2880 

2881 >>> idx = pd.Index(['black', '', 'red', None]) 

2882 >>> idx 

2883 Index(['black', '', 'red', None], dtype='object') 

2884 >>> idx.isna() 

2885 array([False, False, False, True]) 

2886 

2887 For datetimes, `NaT` (Not a Time) is considered as an NA value. 

2888 

2889 >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'), 

2890 ... pd.Timestamp(''), None, pd.NaT]) 

2891 >>> idx 

2892 DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'], 

2893 dtype='datetime64[ns]', freq=None) 

2894 >>> idx.isna() 

2895 array([False, True, True, True]) 

2896 """ 

2897 return self._isnan 

2898 

2899 isnull = isna 

2900 

2901 @final 

2902 def notna(self) -> npt.NDArray[np.bool_]: 

2903 """ 

2904 Detect existing (non-missing) values. 

2905 

2906 Return a boolean same-sized object indicating if the values are not NA. 

2907 Non-missing values get mapped to ``True``. Characters such as empty 

2908 strings ``''`` or :attr:`numpy.inf` are not considered NA values. 

2909 NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False`` 

2910 values. 

2911 

2912 Returns 

2913 ------- 

2914 numpy.ndarray[bool] 

2915 Boolean array to indicate which entries are not NA. 

2916 

2917 See Also 

2918 -------- 

2919 Index.notnull : Alias of notna. 

2920 Index.isna: Inverse of notna. 

2921 notna : Top-level notna. 

2922 

2923 Examples 

2924 -------- 

2925 Show which entries in an Index are not NA. The result is an 

2926 array. 

2927 

2928 >>> idx = pd.Index([5.2, 6.0, np.nan]) 

2929 >>> idx 

2930 Index([5.2, 6.0, nan], dtype='float64') 

2931 >>> idx.notna() 

2932 array([ True, True, False]) 

2933 

2934 Empty strings are not considered NA values. None is considered a NA 

2935 value. 

2936 

2937 >>> idx = pd.Index(['black', '', 'red', None]) 

2938 >>> idx 

2939 Index(['black', '', 'red', None], dtype='object') 

2940 >>> idx.notna() 

2941 array([ True, True, True, False]) 

2942 """ 

2943 return ~self.isna() 

2944 

2945 notnull = notna 

2946 

2947 def fillna(self, value=None, downcast=lib.no_default): 

2948 """ 

2949 Fill NA/NaN values with the specified value. 

2950 

2951 Parameters 

2952 ---------- 

2953 value : scalar 

2954 Scalar value to use to fill holes (e.g. 0). 

2955 This value cannot be a list-likes. 

2956 downcast : dict, default is None 

2957 A dict of item->dtype of what to downcast if possible, 

2958 or the string 'infer' which will try to downcast to an appropriate 

2959 equal type (e.g. float64 to int64 if possible). 

2960 

2961 .. deprecated:: 2.1.0 

2962 

2963 Returns 

2964 ------- 

2965 Index 

2966 

2967 See Also 

2968 -------- 

2969 DataFrame.fillna : Fill NaN values of a DataFrame. 

2970 Series.fillna : Fill NaN Values of a Series. 

2971 

2972 Examples 

2973 -------- 

2974 >>> idx = pd.Index([np.nan, np.nan, 3]) 

2975 >>> idx.fillna(0) 

2976 Index([0.0, 0.0, 3.0], dtype='float64') 

2977 """ 

2978 if not is_scalar(value): 

2979 raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}") 

2980 if downcast is not lib.no_default: 

2981 warnings.warn( 

2982 f"The 'downcast' keyword in {type(self).__name__}.fillna is " 

2983 "deprecated and will be removed in a future version. " 

2984 "It was previously silently ignored.", 

2985 FutureWarning, 

2986 stacklevel=find_stack_level(), 

2987 ) 

2988 else: 

2989 downcast = None 

2990 

2991 if self.hasnans: 

2992 result = self.putmask(self._isnan, value) 

2993 if downcast is None: 

2994 # no need to care metadata other than name 

2995 # because it can't have freq if it has NaTs 

2996 # _with_infer needed for test_fillna_categorical 

2997 return Index._with_infer(result, name=self.name) 

2998 raise NotImplementedError( 

2999 f"{type(self).__name__}.fillna does not support 'downcast' " 

3000 "argument values other than 'None'." 

3001 ) 

3002 return self._view() 

3003 

3004 def dropna(self, how: AnyAll = "any") -> Self: 

3005 """ 

3006 Return Index without NA/NaN values. 

3007 

3008 Parameters 

3009 ---------- 

3010 how : {'any', 'all'}, default 'any' 

3011 If the Index is a MultiIndex, drop the value when any or all levels 

3012 are NaN. 

3013 

3014 Returns 

3015 ------- 

3016 Index 

3017 

3018 Examples 

3019 -------- 

3020 >>> idx = pd.Index([1, np.nan, 3]) 

3021 >>> idx.dropna() 

3022 Index([1.0, 3.0], dtype='float64') 

3023 """ 

3024 if how not in ("any", "all"): 

3025 raise ValueError(f"invalid how option: {how}") 

3026 

3027 if self.hasnans: 

3028 res_values = self._values[~self._isnan] 

3029 return type(self)._simple_new(res_values, name=self.name) 

3030 return self._view() 

3031 

3032 # -------------------------------------------------------------------- 

3033 # Uniqueness Methods 

3034 

3035 def unique(self, level: Hashable | None = None) -> Self: 

3036 """ 

3037 Return unique values in the index. 

3038 

3039 Unique values are returned in order of appearance, this does NOT sort. 

3040 

3041 Parameters 

3042 ---------- 

3043 level : int or hashable, optional 

3044 Only return values from specified level (for MultiIndex). 

3045 If int, gets the level by integer position, else by level name. 

3046 

3047 Returns 

3048 ------- 

3049 Index 

3050 

3051 See Also 

3052 -------- 

3053 unique : Numpy array of unique values in that column. 

3054 Series.unique : Return unique values of Series object. 

3055 

3056 Examples 

3057 -------- 

3058 >>> idx = pd.Index([1, 1, 2, 3, 3]) 

3059 >>> idx.unique() 

3060 Index([1, 2, 3], dtype='int64') 

3061 """ 

3062 if level is not None: 

3063 self._validate_index_level(level) 

3064 

3065 if self.is_unique: 

3066 return self._view() 

3067 

3068 result = super().unique() 

3069 return self._shallow_copy(result) 

3070 

3071 def drop_duplicates(self, *, keep: DropKeep = "first") -> Self: 

3072 """ 

3073 Return Index with duplicate values removed. 

3074 

3075 Parameters 

3076 ---------- 

3077 keep : {'first', 'last', ``False``}, default 'first' 

3078 - 'first' : Drop duplicates except for the first occurrence. 

3079 - 'last' : Drop duplicates except for the last occurrence. 

3080 - ``False`` : Drop all duplicates. 

3081 

3082 Returns 

3083 ------- 

3084 Index 

3085 

3086 See Also 

3087 -------- 

3088 Series.drop_duplicates : Equivalent method on Series. 

3089 DataFrame.drop_duplicates : Equivalent method on DataFrame. 

3090 Index.duplicated : Related method on Index, indicating duplicate 

3091 Index values. 

3092 

3093 Examples 

3094 -------- 

3095 Generate an pandas.Index with duplicate values. 

3096 

3097 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) 

3098 

3099 The `keep` parameter controls which duplicate values are removed. 

3100 The value 'first' keeps the first occurrence for each 

3101 set of duplicated entries. The default value of keep is 'first'. 

3102 

3103 >>> idx.drop_duplicates(keep='first') 

3104 Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object') 

3105 

3106 The value 'last' keeps the last occurrence for each set of duplicated 

3107 entries. 

3108 

3109 >>> idx.drop_duplicates(keep='last') 

3110 Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object') 

3111 

3112 The value ``False`` discards all sets of duplicated entries. 

3113 

3114 >>> idx.drop_duplicates(keep=False) 

3115 Index(['cow', 'beetle', 'hippo'], dtype='object') 

3116 """ 

3117 if self.is_unique: 

3118 return self._view() 

3119 

3120 return super().drop_duplicates(keep=keep) 

3121 

3122 def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]: 

3123 """ 

3124 Indicate duplicate index values. 

3125 

3126 Duplicated values are indicated as ``True`` values in the resulting 

3127 array. Either all duplicates, all except the first, or all except the 

3128 last occurrence of duplicates can be indicated. 

3129 

3130 Parameters 

3131 ---------- 

3132 keep : {'first', 'last', False}, default 'first' 

3133 The value or values in a set of duplicates to mark as missing. 

3134 

3135 - 'first' : Mark duplicates as ``True`` except for the first 

3136 occurrence. 

3137 - 'last' : Mark duplicates as ``True`` except for the last 

3138 occurrence. 

3139 - ``False`` : Mark all duplicates as ``True``. 

3140 

3141 Returns 

3142 ------- 

3143 np.ndarray[bool] 

3144 

3145 See Also 

3146 -------- 

3147 Series.duplicated : Equivalent method on pandas.Series. 

3148 DataFrame.duplicated : Equivalent method on pandas.DataFrame. 

3149 Index.drop_duplicates : Remove duplicate values from Index. 

3150 

3151 Examples 

3152 -------- 

3153 By default, for each set of duplicated values, the first occurrence is 

3154 set to False and all others to True: 

3155 

3156 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama']) 

3157 >>> idx.duplicated() 

3158 array([False, False, True, False, True]) 

3159 

3160 which is equivalent to 

3161 

3162 >>> idx.duplicated(keep='first') 

3163 array([False, False, True, False, True]) 

3164 

3165 By using 'last', the last occurrence of each set of duplicated values 

3166 is set on False and all others on True: 

3167 

3168 >>> idx.duplicated(keep='last') 

3169 array([ True, False, True, False, False]) 

3170 

3171 By setting keep on ``False``, all duplicates are True: 

3172 

3173 >>> idx.duplicated(keep=False) 

3174 array([ True, False, True, False, True]) 

3175 """ 

3176 if self.is_unique: 

3177 # fastpath available bc we are immutable 

3178 return np.zeros(len(self), dtype=bool) 

3179 return self._duplicated(keep=keep) 

3180 

3181 # -------------------------------------------------------------------- 

3182 # Arithmetic & Logical Methods 

3183 

3184 def __iadd__(self, other): 

3185 # alias for __add__ 

3186 return self + other 

3187 

3188 @final 

3189 def __nonzero__(self) -> NoReturn: 

3190 raise ValueError( 

3191 f"The truth value of a {type(self).__name__} is ambiguous. " 

3192 "Use a.empty, a.bool(), a.item(), a.any() or a.all()." 

3193 ) 

3194 

3195 __bool__ = __nonzero__ 

3196 

3197 # -------------------------------------------------------------------- 

3198 # Set Operation Methods 

3199 

3200 def _get_reconciled_name_object(self, other): 

3201 """ 

3202 If the result of a set operation will be self, 

3203 return self, unless the name changes, in which 

3204 case make a shallow copy of self. 

3205 """ 

3206 name = get_op_result_name(self, other) 

3207 if self.name is not name: 

3208 return self.rename(name) 

3209 return self 

3210 

3211 @final 

3212 def _validate_sort_keyword(self, sort): 

3213 if sort not in [None, False, True]: 

3214 raise ValueError( 

3215 "The 'sort' keyword only takes the values of " 

3216 f"None, True, or False; {sort} was passed." 

3217 ) 

3218 

3219 @final 

3220 def _dti_setop_align_tzs(self, other: Index, setop: str_t) -> tuple[Index, Index]: 

3221 """ 

3222 With mismatched timezones, cast both to UTC. 

3223 """ 

3224 # Caller is responsibelf or checking 

3225 # `self.dtype != other.dtype` 

3226 if ( 

3227 isinstance(self, ABCDatetimeIndex) 

3228 and isinstance(other, ABCDatetimeIndex) 

3229 and self.tz is not None 

3230 and other.tz is not None 

3231 ): 

3232 # GH#39328, GH#45357 

3233 left = self.tz_convert("UTC") 

3234 right = other.tz_convert("UTC") 

3235 return left, right 

3236 return self, other 

3237 

3238 @final 

3239 def union(self, other, sort=None): 

3240 """ 

3241 Form the union of two Index objects. 

3242 

3243 If the Index objects are incompatible, both Index objects will be 

3244 cast to dtype('object') first. 

3245 

3246 Parameters 

3247 ---------- 

3248 other : Index or array-like 

3249 sort : bool or None, default None 

3250 Whether to sort the resulting Index. 

3251 

3252 * None : Sort the result, except when 

3253 

3254 1. `self` and `other` are equal. 

3255 2. `self` or `other` has length 0. 

3256 3. Some values in `self` or `other` cannot be compared. 

3257 A RuntimeWarning is issued in this case. 

3258 

3259 * False : do not sort the result. 

3260 * True : Sort the result (which may raise TypeError). 

3261 

3262 Returns 

3263 ------- 

3264 Index 

3265 

3266 Examples 

3267 -------- 

3268 Union matching dtypes 

3269 

3270 >>> idx1 = pd.Index([1, 2, 3, 4]) 

3271 >>> idx2 = pd.Index([3, 4, 5, 6]) 

3272 >>> idx1.union(idx2) 

3273 Index([1, 2, 3, 4, 5, 6], dtype='int64') 

3274 

3275 Union mismatched dtypes 

3276 

3277 >>> idx1 = pd.Index(['a', 'b', 'c', 'd']) 

3278 >>> idx2 = pd.Index([1, 2, 3, 4]) 

3279 >>> idx1.union(idx2) 

3280 Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object') 

3281 

3282 MultiIndex case 

3283 

3284 >>> idx1 = pd.MultiIndex.from_arrays( 

3285 ... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]] 

3286 ... ) 

3287 >>> idx1 

3288 MultiIndex([(1, 'Red'), 

3289 (1, 'Blue'), 

3290 (2, 'Red'), 

3291 (2, 'Blue')], 

3292 ) 

3293 >>> idx2 = pd.MultiIndex.from_arrays( 

3294 ... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]] 

3295 ... ) 

3296 >>> idx2 

3297 MultiIndex([(3, 'Red'), 

3298 (3, 'Green'), 

3299 (2, 'Red'), 

3300 (2, 'Green')], 

3301 ) 

3302 >>> idx1.union(idx2) 

3303 MultiIndex([(1, 'Blue'), 

3304 (1, 'Red'), 

3305 (2, 'Blue'), 

3306 (2, 'Green'), 

3307 (2, 'Red'), 

3308 (3, 'Green'), 

3309 (3, 'Red')], 

3310 ) 

3311 >>> idx1.union(idx2, sort=False) 

3312 MultiIndex([(1, 'Red'), 

3313 (1, 'Blue'), 

3314 (2, 'Red'), 

3315 (2, 'Blue'), 

3316 (3, 'Red'), 

3317 (3, 'Green'), 

3318 (2, 'Green')], 

3319 ) 

3320 """ 

3321 self._validate_sort_keyword(sort) 

3322 self._assert_can_do_setop(other) 

3323 other, result_name = self._convert_can_do_setop(other) 

3324 

3325 if self.dtype != other.dtype: 

3326 if ( 

3327 isinstance(self, ABCMultiIndex) 

3328 and not is_object_dtype(_unpack_nested_dtype(other)) 

3329 and len(other) > 0 

3330 ): 

3331 raise NotImplementedError( 

3332 "Can only union MultiIndex with MultiIndex or Index of tuples, " 

3333 "try mi.to_flat_index().union(other) instead." 

3334 ) 

3335 self, other = self._dti_setop_align_tzs(other, "union") 

3336 

3337 dtype = self._find_common_type_compat(other) 

3338 left = self.astype(dtype, copy=False) 

3339 right = other.astype(dtype, copy=False) 

3340 return left.union(right, sort=sort) 

3341 

3342 elif not len(other) or self.equals(other): 

3343 # NB: whether this (and the `if not len(self)` check below) come before 

3344 # or after the dtype equality check above affects the returned dtype 

3345 result = self._get_reconciled_name_object(other) 

3346 if sort is True: 

3347 return result.sort_values() 

3348 return result 

3349 

3350 elif not len(self): 

3351 result = other._get_reconciled_name_object(self) 

3352 if sort is True: 

3353 return result.sort_values() 

3354 return result 

3355 

3356 result = self._union(other, sort=sort) 

3357 

3358 return self._wrap_setop_result(other, result) 

3359 

3360 def _union(self, other: Index, sort: bool | None): 

3361 """ 

3362 Specific union logic should go here. In subclasses, union behavior 

3363 should be overwritten here rather than in `self.union`. 

3364 

3365 Parameters 

3366 ---------- 

3367 other : Index or array-like 

3368 sort : False or None, default False 

3369 Whether to sort the resulting index. 

3370 

3371 * True : sort the result 

3372 * False : do not sort the result. 

3373 * None : sort the result, except when `self` and `other` are equal 

3374 or when the values cannot be compared. 

3375 

3376 Returns 

3377 ------- 

3378 Index 

3379 """ 

3380 lvals = self._values 

3381 rvals = other._values 

3382 

3383 if ( 

3384 sort in (None, True) 

3385 and self.is_monotonic_increasing 

3386 and other.is_monotonic_increasing 

3387 and not (self.has_duplicates and other.has_duplicates) 

3388 and self._can_use_libjoin 

3389 and other._can_use_libjoin 

3390 ): 

3391 # Both are monotonic and at least one is unique, so can use outer join 

3392 # (actually don't need either unique, but without this restriction 

3393 # test_union_same_value_duplicated_in_both fails) 

3394 try: 

3395 return self._outer_indexer(other)[0] 

3396 except (TypeError, IncompatibleFrequency): 

3397 # incomparable objects; should only be for object dtype 

3398 value_list = list(lvals) 

3399 

3400 # worth making this faster? a very unusual case 

3401 value_set = set(lvals) 

3402 value_list.extend([x for x in rvals if x not in value_set]) 

3403 # If objects are unorderable, we must have object dtype. 

3404 return np.array(value_list, dtype=object) 

3405 

3406 elif not other.is_unique: 

3407 # other has duplicates 

3408 result_dups = algos.union_with_duplicates(self, other) 

3409 return _maybe_try_sort(result_dups, sort) 

3410 

3411 # The rest of this method is analogous to Index._intersection_via_get_indexer 

3412 

3413 # Self may have duplicates; other already checked as unique 

3414 # find indexes of things in "other" that are not in "self" 

3415 if self._index_as_unique: 

3416 indexer = self.get_indexer(other) 

3417 missing = (indexer == -1).nonzero()[0] 

3418 else: 

3419 missing = algos.unique1d(self.get_indexer_non_unique(other)[1]) 

3420 

3421 result: Index | MultiIndex | ArrayLike 

3422 if self._is_multi: 

3423 # Preserve MultiIndex to avoid losing dtypes 

3424 result = self.append(other.take(missing)) 

3425 

3426 else: 

3427 if len(missing) > 0: 

3428 other_diff = rvals.take(missing) 

3429 result = concat_compat((lvals, other_diff)) 

3430 else: 

3431 result = lvals 

3432 

3433 if not self.is_monotonic_increasing or not other.is_monotonic_increasing: 

3434 # if both are monotonic then result should already be sorted 

3435 result = _maybe_try_sort(result, sort) 

3436 

3437 return result 

3438 

3439 @final 

3440 def _wrap_setop_result(self, other: Index, result) -> Index: 

3441 name = get_op_result_name(self, other) 

3442 if isinstance(result, Index): 

3443 if result.name != name: 

3444 result = result.rename(name) 

3445 else: 

3446 result = self._shallow_copy(result, name=name) 

3447 return result 

3448 

3449 @final 

3450 def intersection(self, other, sort: bool = False): 

3451 # default sort keyword is different here from other setops intentionally 

3452 # done in GH#25063 

3453 """ 

3454 Form the intersection of two Index objects. 

3455 

3456 This returns a new Index with elements common to the index and `other`. 

3457 

3458 Parameters 

3459 ---------- 

3460 other : Index or array-like 

3461 sort : True, False or None, default False 

3462 Whether to sort the resulting index. 

3463 

3464 * None : sort the result, except when `self` and `other` are equal 

3465 or when the values cannot be compared. 

3466 * False : do not sort the result. 

3467 * True : Sort the result (which may raise TypeError). 

3468 

3469 Returns 

3470 ------- 

3471 Index 

3472 

3473 Examples 

3474 -------- 

3475 >>> idx1 = pd.Index([1, 2, 3, 4]) 

3476 >>> idx2 = pd.Index([3, 4, 5, 6]) 

3477 >>> idx1.intersection(idx2) 

3478 Index([3, 4], dtype='int64') 

3479 """ 

3480 self._validate_sort_keyword(sort) 

3481 self._assert_can_do_setop(other) 

3482 other, result_name = self._convert_can_do_setop(other) 

3483 

3484 if self.dtype != other.dtype: 

3485 self, other = self._dti_setop_align_tzs(other, "intersection") 

3486 

3487 if self.equals(other): 

3488 if not self.is_unique: 

3489 result = self.unique()._get_reconciled_name_object(other) 

3490 else: 

3491 result = self._get_reconciled_name_object(other) 

3492 if sort is True: 

3493 result = result.sort_values() 

3494 return result 

3495 

3496 if len(self) == 0 or len(other) == 0: 

3497 # fastpath; we need to be careful about having commutativity 

3498 

3499 if self._is_multi or other._is_multi: 

3500 # _convert_can_do_setop ensures that we have both or neither 

3501 # We retain self.levels 

3502 return self[:0].rename(result_name) 

3503 

3504 dtype = self._find_common_type_compat(other) 

3505 if self.dtype == dtype: 

3506 # Slicing allows us to retain DTI/TDI.freq, RangeIndex 

3507 

3508 # Note: self[:0] vs other[:0] affects 

3509 # 1) which index's `freq` we get in DTI/TDI cases 

3510 # This may be a historical artifact, i.e. no documented 

3511 # reason for this choice. 

3512 # 2) The `step` we get in RangeIndex cases 

3513 if len(self) == 0: 

3514 return self[:0].rename(result_name) 

3515 else: 

3516 return other[:0].rename(result_name) 

3517 

3518 return Index([], dtype=dtype, name=result_name) 

3519 

3520 elif not self._should_compare(other): 

3521 # We can infer that the intersection is empty. 

3522 if isinstance(self, ABCMultiIndex): 

3523 return self[:0].rename(result_name) 

3524 return Index([], name=result_name) 

3525 

3526 elif self.dtype != other.dtype: 

3527 dtype = self._find_common_type_compat(other) 

3528 this = self.astype(dtype, copy=False) 

3529 other = other.astype(dtype, copy=False) 

3530 return this.intersection(other, sort=sort) 

3531 

3532 result = self._intersection(other, sort=sort) 

3533 return self._wrap_intersection_result(other, result) 

3534 

3535 def _intersection(self, other: Index, sort: bool = False): 

3536 """ 

3537 intersection specialized to the case with matching dtypes. 

3538 """ 

3539 if ( 

3540 self.is_monotonic_increasing 

3541 and other.is_monotonic_increasing 

3542 and self._can_use_libjoin 

3543 and other._can_use_libjoin 

3544 ): 

3545 try: 

3546 res_indexer, indexer, _ = self._inner_indexer(other) 

3547 except TypeError: 

3548 # non-comparable; should only be for object dtype 

3549 pass 

3550 else: 

3551 # TODO: algos.unique1d should preserve DTA/TDA 

3552 if is_numeric_dtype(self.dtype): 

3553 # This is faster, because Index.unique() checks for uniqueness 

3554 # before calculating the unique values. 

3555 res = algos.unique1d(res_indexer) 

3556 else: 

3557 result = self.take(indexer) 

3558 res = result.drop_duplicates() 

3559 return ensure_wrapped_if_datetimelike(res) 

3560 

3561 res_values = self._intersection_via_get_indexer(other, sort=sort) 

3562 res_values = _maybe_try_sort(res_values, sort) 

3563 return res_values 

3564 

3565 def _wrap_intersection_result(self, other, result): 

3566 # We will override for MultiIndex to handle empty results 

3567 return self._wrap_setop_result(other, result) 

3568 

3569 @final 

3570 def _intersection_via_get_indexer( 

3571 self, other: Index | MultiIndex, sort 

3572 ) -> ArrayLike | MultiIndex: 

3573 """ 

3574 Find the intersection of two Indexes using get_indexer. 

3575 

3576 Returns 

3577 ------- 

3578 np.ndarray or ExtensionArray or MultiIndex 

3579 The returned array will be unique. 

3580 """ 

3581 left_unique = self.unique() 

3582 right_unique = other.unique() 

3583 

3584 # even though we are unique, we need get_indexer_for for IntervalIndex 

3585 indexer = left_unique.get_indexer_for(right_unique) 

3586 

3587 mask = indexer != -1 

3588 

3589 taker = indexer.take(mask.nonzero()[0]) 

3590 if sort is False: 

3591 # sort bc we want the elements in the same order they are in self 

3592 # unnecessary in the case with sort=None bc we will sort later 

3593 taker = np.sort(taker) 

3594 

3595 result: MultiIndex | ExtensionArray | np.ndarray 

3596 if isinstance(left_unique, ABCMultiIndex): 

3597 result = left_unique.take(taker) 

3598 else: 

3599 result = left_unique.take(taker)._values 

3600 return result 

3601 

3602 @final 

3603 def difference(self, other, sort=None): 

3604 """ 

3605 Return a new Index with elements of index not in `other`. 

3606 

3607 This is the set difference of two Index objects. 

3608 

3609 Parameters 

3610 ---------- 

3611 other : Index or array-like 

3612 sort : bool or None, default None 

3613 Whether to sort the resulting index. By default, the 

3614 values are attempted to be sorted, but any TypeError from 

3615 incomparable elements is caught by pandas. 

3616 

3617 * None : Attempt to sort the result, but catch any TypeErrors 

3618 from comparing incomparable elements. 

3619 * False : Do not sort the result. 

3620 * True : Sort the result (which may raise TypeError). 

3621 

3622 Returns 

3623 ------- 

3624 Index 

3625 

3626 Examples 

3627 -------- 

3628 >>> idx1 = pd.Index([2, 1, 3, 4]) 

3629 >>> idx2 = pd.Index([3, 4, 5, 6]) 

3630 >>> idx1.difference(idx2) 

3631 Index([1, 2], dtype='int64') 

3632 >>> idx1.difference(idx2, sort=False) 

3633 Index([2, 1], dtype='int64') 

3634 """ 

3635 self._validate_sort_keyword(sort) 

3636 self._assert_can_do_setop(other) 

3637 other, result_name = self._convert_can_do_setop(other) 

3638 

3639 # Note: we do NOT call _dti_setop_align_tzs here, as there 

3640 # is no requirement that .difference be commutative, so it does 

3641 # not cast to object. 

3642 

3643 if self.equals(other): 

3644 # Note: we do not (yet) sort even if sort=None GH#24959 

3645 return self[:0].rename(result_name) 

3646 

3647 if len(other) == 0: 

3648 # Note: we do not (yet) sort even if sort=None GH#24959 

3649 result = self.unique().rename(result_name) 

3650 if sort is True: 

3651 return result.sort_values() 

3652 return result 

3653 

3654 if not self._should_compare(other): 

3655 # Nothing matches -> difference is everything 

3656 result = self.unique().rename(result_name) 

3657 if sort is True: 

3658 return result.sort_values() 

3659 return result 

3660 

3661 result = self._difference(other, sort=sort) 

3662 return self._wrap_difference_result(other, result) 

3663 

3664 def _difference(self, other, sort): 

3665 # overridden by RangeIndex 

3666 this = self 

3667 if isinstance(self, ABCCategoricalIndex) and self.hasnans and other.hasnans: 

3668 this = this.dropna() 

3669 other = other.unique() 

3670 the_diff = this[other.get_indexer_for(this) == -1] 

3671 the_diff = the_diff if this.is_unique else the_diff.unique() 

3672 the_diff = _maybe_try_sort(the_diff, sort) 

3673 return the_diff 

3674 

3675 def _wrap_difference_result(self, other, result): 

3676 # We will override for MultiIndex to handle empty results 

3677 return self._wrap_setop_result(other, result) 

3678 

3679 def symmetric_difference(self, other, result_name=None, sort=None): 

3680 """ 

3681 Compute the symmetric difference of two Index objects. 

3682 

3683 Parameters 

3684 ---------- 

3685 other : Index or array-like 

3686 result_name : str 

3687 sort : bool or None, default None 

3688 Whether to sort the resulting index. By default, the 

3689 values are attempted to be sorted, but any TypeError from 

3690 incomparable elements is caught by pandas. 

3691 

3692 * None : Attempt to sort the result, but catch any TypeErrors 

3693 from comparing incomparable elements. 

3694 * False : Do not sort the result. 

3695 * True : Sort the result (which may raise TypeError). 

3696 

3697 Returns 

3698 ------- 

3699 Index 

3700 

3701 Notes 

3702 ----- 

3703 ``symmetric_difference`` contains elements that appear in either 

3704 ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by 

3705 ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates 

3706 dropped. 

3707 

3708 Examples 

3709 -------- 

3710 >>> idx1 = pd.Index([1, 2, 3, 4]) 

3711 >>> idx2 = pd.Index([2, 3, 4, 5]) 

3712 >>> idx1.symmetric_difference(idx2) 

3713 Index([1, 5], dtype='int64') 

3714 """ 

3715 self._validate_sort_keyword(sort) 

3716 self._assert_can_do_setop(other) 

3717 other, result_name_update = self._convert_can_do_setop(other) 

3718 if result_name is None: 

3719 result_name = result_name_update 

3720 

3721 if self.dtype != other.dtype: 

3722 self, other = self._dti_setop_align_tzs(other, "symmetric_difference") 

3723 

3724 if not self._should_compare(other): 

3725 return self.union(other, sort=sort).rename(result_name) 

3726 

3727 elif self.dtype != other.dtype: 

3728 dtype = self._find_common_type_compat(other) 

3729 this = self.astype(dtype, copy=False) 

3730 that = other.astype(dtype, copy=False) 

3731 return this.symmetric_difference(that, sort=sort).rename(result_name) 

3732 

3733 this = self.unique() 

3734 other = other.unique() 

3735 indexer = this.get_indexer_for(other) 

3736 

3737 # {this} minus {other} 

3738 common_indexer = indexer.take((indexer != -1).nonzero()[0]) 

3739 left_indexer = np.setdiff1d( 

3740 np.arange(this.size), common_indexer, assume_unique=True 

3741 ) 

3742 left_diff = this.take(left_indexer) 

3743 

3744 # {other} minus {this} 

3745 right_indexer = (indexer == -1).nonzero()[0] 

3746 right_diff = other.take(right_indexer) 

3747 

3748 res_values = left_diff.append(right_diff) 

3749 result = _maybe_try_sort(res_values, sort) 

3750 

3751 if not self._is_multi: 

3752 return Index(result, name=result_name, dtype=res_values.dtype) 

3753 else: 

3754 left_diff = cast("MultiIndex", left_diff) 

3755 if len(result) == 0: 

3756 # result might be an Index, if other was an Index 

3757 return left_diff.remove_unused_levels().set_names(result_name) 

3758 return result.set_names(result_name) 

3759 

3760 @final 

3761 def _assert_can_do_setop(self, other) -> bool: 

3762 if not is_list_like(other): 

3763 raise TypeError("Input must be Index or array-like") 

3764 return True 

3765 

3766 def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]: 

3767 if not isinstance(other, Index): 

3768 other = Index(other, name=self.name) 

3769 result_name = self.name 

3770 else: 

3771 result_name = get_op_result_name(self, other) 

3772 return other, result_name 

3773 

3774 # -------------------------------------------------------------------- 

3775 # Indexing Methods 

3776 

3777 def get_loc(self, key): 

3778 """ 

3779 Get integer location, slice or boolean mask for requested label. 

3780 

3781 Parameters 

3782 ---------- 

3783 key : label 

3784 

3785 Returns 

3786 ------- 

3787 int if unique index, slice if monotonic index, else mask 

3788 

3789 Examples 

3790 -------- 

3791 >>> unique_index = pd.Index(list('abc')) 

3792 >>> unique_index.get_loc('b') 

3793 1 

3794 

3795 >>> monotonic_index = pd.Index(list('abbc')) 

3796 >>> monotonic_index.get_loc('b') 

3797 slice(1, 3, None) 

3798 

3799 >>> non_monotonic_index = pd.Index(list('abcb')) 

3800 >>> non_monotonic_index.get_loc('b') 

3801 array([False, True, False, True]) 

3802 """ 

3803 casted_key = self._maybe_cast_indexer(key) 

3804 try: 

3805 return self._engine.get_loc(casted_key) 

3806 except KeyError as err: 

3807 if isinstance(casted_key, slice) or ( 

3808 isinstance(casted_key, abc.Iterable) 

3809 and any(isinstance(x, slice) for x in casted_key) 

3810 ): 

3811 raise InvalidIndexError(key) 

3812 raise KeyError(key) from err 

3813 except TypeError: 

3814 # If we have a listlike key, _check_indexing_error will raise 

3815 # InvalidIndexError. Otherwise we fall through and re-raise 

3816 # the TypeError. 

3817 self._check_indexing_error(key) 

3818 raise 

3819 

3820 @final 

3821 def get_indexer( 

3822 self, 

3823 target, 

3824 method: ReindexMethod | None = None, 

3825 limit: int | None = None, 

3826 tolerance=None, 

3827 ) -> npt.NDArray[np.intp]: 

3828 """ 

3829 Compute indexer and mask for new index given the current index. 

3830 

3831 The indexer should be then used as an input to ndarray.take to align the 

3832 current data to the new index. 

3833 

3834 Parameters 

3835 ---------- 

3836 target : Index 

3837 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional 

3838 * default: exact matches only. 

3839 * pad / ffill: find the PREVIOUS index value if no exact match. 

3840 * backfill / bfill: use NEXT index value if no exact match 

3841 * nearest: use the NEAREST index value if no exact match. Tied 

3842 distances are broken by preferring the larger index value. 

3843 limit : int, optional 

3844 Maximum number of consecutive labels in ``target`` to match for 

3845 inexact matches. 

3846 tolerance : optional 

3847 Maximum distance between original and new labels for inexact 

3848 matches. The values of the index at the matching locations must 

3849 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 

3850 

3851 Tolerance may be a scalar value, which applies the same tolerance 

3852 to all values, or list-like, which applies variable tolerance per 

3853 element. List-like includes list, tuple, array, Series, and must be 

3854 the same size as the index and its dtype must exactly match the 

3855 index's type. 

3856 

3857 Returns 

3858 ------- 

3859 np.ndarray[np.intp] 

3860 Integers from 0 to n - 1 indicating that the index at these 

3861 positions matches the corresponding target values. Missing values 

3862 in the target are marked by -1. 

3863 

3864 Notes 

3865 ----- 

3866 Returns -1 for unmatched values, for further explanation see the 

3867 example below. 

3868 

3869 Examples 

3870 -------- 

3871 >>> index = pd.Index(['c', 'a', 'b']) 

3872 >>> index.get_indexer(['a', 'b', 'x']) 

3873 array([ 1, 2, -1]) 

3874 

3875 Notice that the return value is an array of locations in ``index`` 

3876 and ``x`` is marked by -1, as it is not in ``index``. 

3877 """ 

3878 method = clean_reindex_fill_method(method) 

3879 orig_target = target 

3880 target = self._maybe_cast_listlike_indexer(target) 

3881 

3882 self._check_indexing_method(method, limit, tolerance) 

3883 

3884 if not self._index_as_unique: 

3885 raise InvalidIndexError(self._requires_unique_msg) 

3886 

3887 if len(target) == 0: 

3888 return np.array([], dtype=np.intp) 

3889 

3890 if not self._should_compare(target) and not self._should_partial_index(target): 

3891 # IntervalIndex get special treatment bc numeric scalars can be 

3892 # matched to Interval scalars 

3893 return self._get_indexer_non_comparable(target, method=method, unique=True) 

3894 

3895 if isinstance(self.dtype, CategoricalDtype): 

3896 # _maybe_cast_listlike_indexer ensures target has our dtype 

3897 # (could improve perf by doing _should_compare check earlier?) 

3898 assert self.dtype == target.dtype 

3899 

3900 indexer = self._engine.get_indexer(target.codes) 

3901 if self.hasnans and target.hasnans: 

3902 # After _maybe_cast_listlike_indexer, target elements which do not 

3903 # belong to some category are changed to NaNs 

3904 # Mask to track actual NaN values compared to inserted NaN values 

3905 # GH#45361 

3906 target_nans = isna(orig_target) 

3907 loc = self.get_loc(np.nan) 

3908 mask = target.isna() 

3909 indexer[target_nans] = loc 

3910 indexer[mask & ~target_nans] = -1 

3911 return indexer 

3912 

3913 if isinstance(target.dtype, CategoricalDtype): 

3914 # potential fastpath 

3915 # get an indexer for unique categories then propagate to codes via take_nd 

3916 # get_indexer instead of _get_indexer needed for MultiIndex cases 

3917 # e.g. test_append_different_columns_types 

3918 categories_indexer = self.get_indexer(target.categories) 

3919 

3920 indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1) 

3921 

3922 if (not self._is_multi and self.hasnans) and target.hasnans: 

3923 # Exclude MultiIndex because hasnans raises NotImplementedError 

3924 # we should only get here if we are unique, so loc is an integer 

3925 # GH#41934 

3926 loc = self.get_loc(np.nan) 

3927 mask = target.isna() 

3928 indexer[mask] = loc 

3929 

3930 return ensure_platform_int(indexer) 

3931 

3932 pself, ptarget = self._maybe_downcast_for_indexing(target) 

3933 if pself is not self or ptarget is not target: 

3934 return pself.get_indexer( 

3935 ptarget, method=method, limit=limit, tolerance=tolerance 

3936 ) 

3937 

3938 if self.dtype == target.dtype and self.equals(target): 

3939 # Only call equals if we have same dtype to avoid inference/casting 

3940 return np.arange(len(target), dtype=np.intp) 

3941 

3942 if self.dtype != target.dtype and not self._should_partial_index(target): 

3943 # _should_partial_index e.g. IntervalIndex with numeric scalars 

3944 # that can be matched to Interval scalars. 

3945 dtype = self._find_common_type_compat(target) 

3946 

3947 this = self.astype(dtype, copy=False) 

3948 target = target.astype(dtype, copy=False) 

3949 return this._get_indexer( 

3950 target, method=method, limit=limit, tolerance=tolerance 

3951 ) 

3952 

3953 return self._get_indexer(target, method, limit, tolerance) 

3954 

3955 def _get_indexer( 

3956 self, 

3957 target: Index, 

3958 method: str_t | None = None, 

3959 limit: int | None = None, 

3960 tolerance=None, 

3961 ) -> npt.NDArray[np.intp]: 

3962 if tolerance is not None: 

3963 tolerance = self._convert_tolerance(tolerance, target) 

3964 

3965 if method in ["pad", "backfill"]: 

3966 indexer = self._get_fill_indexer(target, method, limit, tolerance) 

3967 elif method == "nearest": 

3968 indexer = self._get_nearest_indexer(target, limit, tolerance) 

3969 else: 

3970 if target._is_multi and self._is_multi: 

3971 engine = self._engine 

3972 # error: Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]" 

3973 # has no attribute "_extract_level_codes" 

3974 tgt_values = engine._extract_level_codes( # type: ignore[union-attr] 

3975 target 

3976 ) 

3977 else: 

3978 tgt_values = target._get_engine_target() 

3979 

3980 indexer = self._engine.get_indexer(tgt_values) 

3981 

3982 return ensure_platform_int(indexer) 

3983 

3984 @final 

3985 def _should_partial_index(self, target: Index) -> bool: 

3986 """ 

3987 Should we attempt partial-matching indexing? 

3988 """ 

3989 if isinstance(self.dtype, IntervalDtype): 

3990 if isinstance(target.dtype, IntervalDtype): 

3991 return False 

3992 # "Index" has no attribute "left" 

3993 return self.left._should_compare(target) # type: ignore[attr-defined] 

3994 return False 

3995 

3996 @final 

3997 def _check_indexing_method( 

3998 self, 

3999 method: str_t | None, 

4000 limit: int | None = None, 

4001 tolerance=None, 

4002 ) -> None: 

4003 """ 

4004 Raise if we have a get_indexer `method` that is not supported or valid. 

4005 """ 

4006 if method not in [None, "bfill", "backfill", "pad", "ffill", "nearest"]: 

4007 # in practice the clean_reindex_fill_method call would raise 

4008 # before we get here 

4009 raise ValueError("Invalid fill method") # pragma: no cover 

4010 

4011 if self._is_multi: 

4012 if method == "nearest": 

4013 raise NotImplementedError( 

4014 "method='nearest' not implemented yet " 

4015 "for MultiIndex; see GitHub issue 9365" 

4016 ) 

4017 if method in ("pad", "backfill"): 

4018 if tolerance is not None: 

4019 raise NotImplementedError( 

4020 "tolerance not implemented yet for MultiIndex" 

4021 ) 

4022 

4023 if isinstance(self.dtype, (IntervalDtype, CategoricalDtype)): 

4024 # GH#37871 for now this is only for IntervalIndex and CategoricalIndex 

4025 if method is not None: 

4026 raise NotImplementedError( 

4027 f"method {method} not yet implemented for {type(self).__name__}" 

4028 ) 

4029 

4030 if method is None: 

4031 if tolerance is not None: 

4032 raise ValueError( 

4033 "tolerance argument only valid if doing pad, " 

4034 "backfill or nearest reindexing" 

4035 ) 

4036 if limit is not None: 

4037 raise ValueError( 

4038 "limit argument only valid if doing pad, " 

4039 "backfill or nearest reindexing" 

4040 ) 

4041 

4042 def _convert_tolerance(self, tolerance, target: np.ndarray | Index) -> np.ndarray: 

4043 # override this method on subclasses 

4044 tolerance = np.asarray(tolerance) 

4045 if target.size != tolerance.size and tolerance.size > 1: 

4046 raise ValueError("list-like tolerance size must match target index size") 

4047 elif is_numeric_dtype(self) and not np.issubdtype(tolerance.dtype, np.number): 

4048 if tolerance.ndim > 0: 

4049 raise ValueError( 

4050 f"tolerance argument for {type(self).__name__} with dtype " 

4051 f"{self.dtype} must contain numeric elements if it is list type" 

4052 ) 

4053 

4054 raise ValueError( 

4055 f"tolerance argument for {type(self).__name__} with dtype {self.dtype} " 

4056 f"must be numeric if it is a scalar: {repr(tolerance)}" 

4057 ) 

4058 return tolerance 

4059 

4060 @final 

4061 def _get_fill_indexer( 

4062 self, target: Index, method: str_t, limit: int | None = None, tolerance=None 

4063 ) -> npt.NDArray[np.intp]: 

4064 if self._is_multi: 

4065 if not (self.is_monotonic_increasing or self.is_monotonic_decreasing): 

4066 raise ValueError("index must be monotonic increasing or decreasing") 

4067 encoded = self.append(target)._engine.values # type: ignore[union-attr] 

4068 self_encoded = Index(encoded[: len(self)]) 

4069 target_encoded = Index(encoded[len(self) :]) 

4070 return self_encoded._get_fill_indexer( 

4071 target_encoded, method, limit, tolerance 

4072 ) 

4073 

4074 if self.is_monotonic_increasing and target.is_monotonic_increasing: 

4075 target_values = target._get_engine_target() 

4076 own_values = self._get_engine_target() 

4077 if not isinstance(target_values, np.ndarray) or not isinstance( 

4078 own_values, np.ndarray 

4079 ): 

4080 raise NotImplementedError 

4081 

4082 if method == "pad": 

4083 indexer = libalgos.pad(own_values, target_values, limit=limit) 

4084 else: 

4085 # i.e. "backfill" 

4086 indexer = libalgos.backfill(own_values, target_values, limit=limit) 

4087 else: 

4088 indexer = self._get_fill_indexer_searchsorted(target, method, limit) 

4089 if tolerance is not None and len(self): 

4090 indexer = self._filter_indexer_tolerance(target, indexer, tolerance) 

4091 return indexer 

4092 

4093 @final 

4094 def _get_fill_indexer_searchsorted( 

4095 self, target: Index, method: str_t, limit: int | None = None 

4096 ) -> npt.NDArray[np.intp]: 

4097 """ 

4098 Fallback pad/backfill get_indexer that works for monotonic decreasing 

4099 indexes and non-monotonic targets. 

4100 """ 

4101 if limit is not None: 

4102 raise ValueError( 

4103 f"limit argument for {repr(method)} method only well-defined " 

4104 "if index and target are monotonic" 

4105 ) 

4106 

4107 side: Literal["left", "right"] = "left" if method == "pad" else "right" 

4108 

4109 # find exact matches first (this simplifies the algorithm) 

4110 indexer = self.get_indexer(target) 

4111 nonexact = indexer == -1 

4112 indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side) 

4113 if side == "left": 

4114 # searchsorted returns "indices into a sorted array such that, 

4115 # if the corresponding elements in v were inserted before the 

4116 # indices, the order of a would be preserved". 

4117 # Thus, we need to subtract 1 to find values to the left. 

4118 indexer[nonexact] -= 1 

4119 # This also mapped not found values (values of 0 from 

4120 # np.searchsorted) to -1, which conveniently is also our 

4121 # sentinel for missing values 

4122 else: 

4123 # Mark indices to the right of the largest value as not found 

4124 indexer[indexer == len(self)] = -1 

4125 return indexer 

4126 

4127 @final 

4128 def _get_nearest_indexer( 

4129 self, target: Index, limit: int | None, tolerance 

4130 ) -> npt.NDArray[np.intp]: 

4131 """ 

4132 Get the indexer for the nearest index labels; requires an index with 

4133 values that can be subtracted from each other (e.g., not strings or 

4134 tuples). 

4135 """ 

4136 if not len(self): 

4137 return self._get_fill_indexer(target, "pad") 

4138 

4139 left_indexer = self.get_indexer(target, "pad", limit=limit) 

4140 right_indexer = self.get_indexer(target, "backfill", limit=limit) 

4141 

4142 left_distances = self._difference_compat(target, left_indexer) 

4143 right_distances = self._difference_compat(target, right_indexer) 

4144 

4145 op = operator.lt if self.is_monotonic_increasing else operator.le 

4146 indexer = np.where( 

4147 # error: Argument 1&2 has incompatible type "Union[ExtensionArray, 

4148 # ndarray[Any, Any]]"; expected "Union[SupportsDunderLE, 

4149 # SupportsDunderGE, SupportsDunderGT, SupportsDunderLT]" 

4150 op(left_distances, right_distances) # type: ignore[arg-type] 

4151 | (right_indexer == -1), 

4152 left_indexer, 

4153 right_indexer, 

4154 ) 

4155 if tolerance is not None: 

4156 indexer = self._filter_indexer_tolerance(target, indexer, tolerance) 

4157 return indexer 

4158 

4159 @final 

4160 def _filter_indexer_tolerance( 

4161 self, 

4162 target: Index, 

4163 indexer: npt.NDArray[np.intp], 

4164 tolerance, 

4165 ) -> npt.NDArray[np.intp]: 

4166 distance = self._difference_compat(target, indexer) 

4167 

4168 return np.where(distance <= tolerance, indexer, -1) 

4169 

4170 @final 

4171 def _difference_compat( 

4172 self, target: Index, indexer: npt.NDArray[np.intp] 

4173 ) -> ArrayLike: 

4174 # Compatibility for PeriodArray, for which __sub__ returns an ndarray[object] 

4175 # of DateOffset objects, which do not support __abs__ (and would be slow 

4176 # if they did) 

4177 

4178 if isinstance(self.dtype, PeriodDtype): 

4179 # Note: we only get here with matching dtypes 

4180 own_values = cast("PeriodArray", self._data)._ndarray 

4181 target_values = cast("PeriodArray", target._data)._ndarray 

4182 diff = own_values[indexer] - target_values 

4183 else: 

4184 # error: Unsupported left operand type for - ("ExtensionArray") 

4185 diff = self._values[indexer] - target._values # type: ignore[operator] 

4186 return abs(diff) 

4187 

4188 # -------------------------------------------------------------------- 

4189 # Indexer Conversion Methods 

4190 

4191 @final 

4192 def _validate_positional_slice(self, key: slice) -> None: 

4193 """ 

4194 For positional indexing, a slice must have either int or None 

4195 for each of start, stop, and step. 

4196 """ 

4197 self._validate_indexer("positional", key.start, "iloc") 

4198 self._validate_indexer("positional", key.stop, "iloc") 

4199 self._validate_indexer("positional", key.step, "iloc") 

4200 

4201 def _convert_slice_indexer(self, key: slice, kind: Literal["loc", "getitem"]): 

4202 """ 

4203 Convert a slice indexer. 

4204 

4205 By definition, these are labels unless 'iloc' is passed in. 

4206 Floats are not allowed as the start, step, or stop of the slice. 

4207 

4208 Parameters 

4209 ---------- 

4210 key : label of the slice bound 

4211 kind : {'loc', 'getitem'} 

4212 """ 

4213 

4214 # potentially cast the bounds to integers 

4215 start, stop, step = key.start, key.stop, key.step 

4216 

4217 # figure out if this is a positional indexer 

4218 is_index_slice = is_valid_positional_slice(key) 

4219 

4220 # TODO(GH#50617): once Series.__[gs]etitem__ is removed we should be able 

4221 # to simplify this. 

4222 if lib.is_np_dtype(self.dtype, "f"): 

4223 # We always treat __getitem__ slicing as label-based 

4224 # translate to locations 

4225 if kind == "getitem" and is_index_slice and not start == stop and step != 0: 

4226 # exclude step=0 from the warning because it will raise anyway 

4227 # start/stop both None e.g. [:] or [::-1] won't change. 

4228 # exclude start==stop since it will be empty either way, or 

4229 # will be [:] or [::-1] which won't change 

4230 warnings.warn( 

4231 # GH#49612 

4232 "The behavior of obj[i:j] with a float-dtype index is " 

4233 "deprecated. In a future version, this will be treated as " 

4234 "positional instead of label-based. For label-based slicing, " 

4235 "use obj.loc[i:j] instead", 

4236 FutureWarning, 

4237 stacklevel=find_stack_level(), 

4238 ) 

4239 return self.slice_indexer(start, stop, step) 

4240 

4241 if kind == "getitem": 

4242 # called from the getitem slicers, validate that we are in fact integers 

4243 if is_index_slice: 

4244 # In this case the _validate_indexer checks below are redundant 

4245 return key 

4246 elif self.dtype.kind in "iu": 

4247 # Note: these checks are redundant if we know is_index_slice 

4248 self._validate_indexer("slice", key.start, "getitem") 

4249 self._validate_indexer("slice", key.stop, "getitem") 

4250 self._validate_indexer("slice", key.step, "getitem") 

4251 return key 

4252 

4253 # convert the slice to an indexer here; checking that the user didn't 

4254 # pass a positional slice to loc 

4255 is_positional = is_index_slice and self._should_fallback_to_positional 

4256 

4257 # if we are mixed and have integers 

4258 if is_positional: 

4259 try: 

4260 # Validate start & stop 

4261 if start is not None: 

4262 self.get_loc(start) 

4263 if stop is not None: 

4264 self.get_loc(stop) 

4265 is_positional = False 

4266 except KeyError: 

4267 pass 

4268 

4269 if com.is_null_slice(key): 

4270 # It doesn't matter if we are positional or label based 

4271 indexer = key 

4272 elif is_positional: 

4273 if kind == "loc": 

4274 # GH#16121, GH#24612, GH#31810 

4275 raise TypeError( 

4276 "Slicing a positional slice with .loc is not allowed, " 

4277 "Use .loc with labels or .iloc with positions instead.", 

4278 ) 

4279 indexer = key 

4280 else: 

4281 indexer = self.slice_indexer(start, stop, step) 

4282 

4283 return indexer 

4284 

4285 @final 

4286 def _raise_invalid_indexer( 

4287 self, 

4288 form: Literal["slice", "positional"], 

4289 key, 

4290 reraise: lib.NoDefault | None | Exception = lib.no_default, 

4291 ) -> None: 

4292 """ 

4293 Raise consistent invalid indexer message. 

4294 """ 

4295 msg = ( 

4296 f"cannot do {form} indexing on {type(self).__name__} with these " 

4297 f"indexers [{key}] of type {type(key).__name__}" 

4298 ) 

4299 if reraise is not lib.no_default: 

4300 raise TypeError(msg) from reraise 

4301 raise TypeError(msg) 

4302 

4303 # -------------------------------------------------------------------- 

4304 # Reindex Methods 

4305 

4306 @final 

4307 def _validate_can_reindex(self, indexer: np.ndarray) -> None: 

4308 """ 

4309 Check if we are allowing reindexing with this particular indexer. 

4310 

4311 Parameters 

4312 ---------- 

4313 indexer : an integer ndarray 

4314 

4315 Raises 

4316 ------ 

4317 ValueError if its a duplicate axis 

4318 """ 

4319 # trying to reindex on an axis with duplicates 

4320 if not self._index_as_unique and len(indexer): 

4321 raise ValueError("cannot reindex on an axis with duplicate labels") 

4322 

4323 def reindex( 

4324 self, 

4325 target, 

4326 method: ReindexMethod | None = None, 

4327 level=None, 

4328 limit: int | None = None, 

4329 tolerance: float | None = None, 

4330 ) -> tuple[Index, npt.NDArray[np.intp] | None]: 

4331 """ 

4332 Create index with target's values. 

4333 

4334 Parameters 

4335 ---------- 

4336 target : an iterable 

4337 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional 

4338 * default: exact matches only. 

4339 * pad / ffill: find the PREVIOUS index value if no exact match. 

4340 * backfill / bfill: use NEXT index value if no exact match 

4341 * nearest: use the NEAREST index value if no exact match. Tied 

4342 distances are broken by preferring the larger index value. 

4343 level : int, optional 

4344 Level of multiindex. 

4345 limit : int, optional 

4346 Maximum number of consecutive labels in ``target`` to match for 

4347 inexact matches. 

4348 tolerance : int or float, optional 

4349 Maximum distance between original and new labels for inexact 

4350 matches. The values of the index at the matching locations must 

4351 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 

4352 

4353 Tolerance may be a scalar value, which applies the same tolerance 

4354 to all values, or list-like, which applies variable tolerance per 

4355 element. List-like includes list, tuple, array, Series, and must be 

4356 the same size as the index and its dtype must exactly match the 

4357 index's type. 

4358 

4359 Returns 

4360 ------- 

4361 new_index : pd.Index 

4362 Resulting index. 

4363 indexer : np.ndarray[np.intp] or None 

4364 Indices of output values in original index. 

4365 

4366 Raises 

4367 ------ 

4368 TypeError 

4369 If ``method`` passed along with ``level``. 

4370 ValueError 

4371 If non-unique multi-index 

4372 ValueError 

4373 If non-unique index and ``method`` or ``limit`` passed. 

4374 

4375 See Also 

4376 -------- 

4377 Series.reindex : Conform Series to new index with optional filling logic. 

4378 DataFrame.reindex : Conform DataFrame to new index with optional filling logic. 

4379 

4380 Examples 

4381 -------- 

4382 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor']) 

4383 >>> idx 

4384 Index(['car', 'bike', 'train', 'tractor'], dtype='object') 

4385 >>> idx.reindex(['car', 'bike']) 

4386 (Index(['car', 'bike'], dtype='object'), array([0, 1])) 

4387 """ 

4388 # GH6552: preserve names when reindexing to non-named target 

4389 # (i.e. neither Index nor Series). 

4390 preserve_names = not hasattr(target, "name") 

4391 

4392 # GH7774: preserve dtype/tz if target is empty and not an Index. 

4393 target = ensure_has_len(target) # target may be an iterator 

4394 

4395 if not isinstance(target, Index) and len(target) == 0: 

4396 if level is not None and self._is_multi: 

4397 # "Index" has no attribute "levels"; maybe "nlevels"? 

4398 idx = self.levels[level] # type: ignore[attr-defined] 

4399 else: 

4400 idx = self 

4401 target = idx[:0] 

4402 else: 

4403 target = ensure_index(target) 

4404 

4405 if level is not None and ( 

4406 isinstance(self, ABCMultiIndex) or isinstance(target, ABCMultiIndex) 

4407 ): 

4408 if method is not None: 

4409 raise TypeError("Fill method not supported if level passed") 

4410 

4411 # TODO: tests where passing `keep_order=not self._is_multi` 

4412 # makes a difference for non-MultiIndex case 

4413 target, indexer, _ = self._join_level( 

4414 target, level, how="right", keep_order=not self._is_multi 

4415 ) 

4416 

4417 else: 

4418 if self.equals(target): 

4419 indexer = None 

4420 else: 

4421 if self._index_as_unique: 

4422 indexer = self.get_indexer( 

4423 target, method=method, limit=limit, tolerance=tolerance 

4424 ) 

4425 elif self._is_multi: 

4426 raise ValueError("cannot handle a non-unique multi-index!") 

4427 elif not self.is_unique: 

4428 # GH#42568 

4429 raise ValueError("cannot reindex on an axis with duplicate labels") 

4430 else: 

4431 indexer, _ = self.get_indexer_non_unique(target) 

4432 

4433 target = self._wrap_reindex_result(target, indexer, preserve_names) 

4434 return target, indexer 

4435 

4436 def _wrap_reindex_result(self, target, indexer, preserve_names: bool): 

4437 target = self._maybe_preserve_names(target, preserve_names) 

4438 return target 

4439 

4440 def _maybe_preserve_names(self, target: Index, preserve_names: bool): 

4441 if preserve_names and target.nlevels == 1 and target.name != self.name: 

4442 target = target.copy(deep=False) 

4443 target.name = self.name 

4444 return target 

4445 

4446 @final 

4447 def _reindex_non_unique( 

4448 self, target: Index 

4449 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp] | None]: 

4450 """ 

4451 Create a new index with target's values (move/add/delete values as 

4452 necessary) use with non-unique Index and a possibly non-unique target. 

4453 

4454 Parameters 

4455 ---------- 

4456 target : an iterable 

4457 

4458 Returns 

4459 ------- 

4460 new_index : pd.Index 

4461 Resulting index. 

4462 indexer : np.ndarray[np.intp] 

4463 Indices of output values in original index. 

4464 new_indexer : np.ndarray[np.intp] or None 

4465 

4466 """ 

4467 target = ensure_index(target) 

4468 if len(target) == 0: 

4469 # GH#13691 

4470 return self[:0], np.array([], dtype=np.intp), None 

4471 

4472 indexer, missing = self.get_indexer_non_unique(target) 

4473 check = indexer != -1 

4474 new_labels: Index | np.ndarray = self.take(indexer[check]) 

4475 new_indexer = None 

4476 

4477 if len(missing): 

4478 length = np.arange(len(indexer), dtype=np.intp) 

4479 

4480 missing = ensure_platform_int(missing) 

4481 missing_labels = target.take(missing) 

4482 missing_indexer = length[~check] 

4483 cur_labels = self.take(indexer[check]).values 

4484 cur_indexer = length[check] 

4485 

4486 # Index constructor below will do inference 

4487 new_labels = np.empty((len(indexer),), dtype=object) 

4488 new_labels[cur_indexer] = cur_labels 

4489 new_labels[missing_indexer] = missing_labels 

4490 

4491 # GH#38906 

4492 if not len(self): 

4493 new_indexer = np.arange(0, dtype=np.intp) 

4494 

4495 # a unique indexer 

4496 elif target.is_unique: 

4497 # see GH5553, make sure we use the right indexer 

4498 new_indexer = np.arange(len(indexer), dtype=np.intp) 

4499 new_indexer[cur_indexer] = np.arange(len(cur_labels)) 

4500 new_indexer[missing_indexer] = -1 

4501 

4502 # we have a non_unique selector, need to use the original 

4503 # indexer here 

4504 else: 

4505 # need to retake to have the same size as the indexer 

4506 indexer[~check] = -1 

4507 

4508 # reset the new indexer to account for the new size 

4509 new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp) 

4510 new_indexer[~check] = -1 

4511 

4512 if not isinstance(self, ABCMultiIndex): 

4513 new_index = Index(new_labels, name=self.name) 

4514 else: 

4515 new_index = type(self).from_tuples(new_labels, names=self.names) 

4516 return new_index, indexer, new_indexer 

4517 

4518 # -------------------------------------------------------------------- 

4519 # Join Methods 

4520 

4521 @overload 

4522 def join( 

4523 self, 

4524 other: Index, 

4525 *, 

4526 how: JoinHow = ..., 

4527 level: Level = ..., 

4528 return_indexers: Literal[True], 

4529 sort: bool = ..., 

4530 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4531 ... 

4532 

4533 @overload 

4534 def join( 

4535 self, 

4536 other: Index, 

4537 *, 

4538 how: JoinHow = ..., 

4539 level: Level = ..., 

4540 return_indexers: Literal[False] = ..., 

4541 sort: bool = ..., 

4542 ) -> Index: 

4543 ... 

4544 

4545 @overload 

4546 def join( 

4547 self, 

4548 other: Index, 

4549 *, 

4550 how: JoinHow = ..., 

4551 level: Level = ..., 

4552 return_indexers: bool = ..., 

4553 sort: bool = ..., 

4554 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4555 ... 

4556 

4557 @final 

4558 @_maybe_return_indexers 

4559 def join( 

4560 self, 

4561 other: Index, 

4562 *, 

4563 how: JoinHow = "left", 

4564 level: Level | None = None, 

4565 return_indexers: bool = False, 

4566 sort: bool = False, 

4567 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4568 """ 

4569 Compute join_index and indexers to conform data structures to the new index. 

4570 

4571 Parameters 

4572 ---------- 

4573 other : Index 

4574 how : {'left', 'right', 'inner', 'outer'} 

4575 level : int or level name, default None 

4576 return_indexers : bool, default False 

4577 sort : bool, default False 

4578 Sort the join keys lexicographically in the result Index. If False, 

4579 the order of the join keys depends on the join type (how keyword). 

4580 

4581 Returns 

4582 ------- 

4583 join_index, (left_indexer, right_indexer) 

4584 

4585 Examples 

4586 -------- 

4587 >>> idx1 = pd.Index([1, 2, 3]) 

4588 >>> idx2 = pd.Index([4, 5, 6]) 

4589 >>> idx1.join(idx2, how='outer') 

4590 Index([1, 2, 3, 4, 5, 6], dtype='int64') 

4591 """ 

4592 other = ensure_index(other) 

4593 sort = sort or how == "outer" 

4594 

4595 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex): 

4596 if (self.tz is None) ^ (other.tz is None): 

4597 # Raise instead of casting to object below. 

4598 raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") 

4599 

4600 if not self._is_multi and not other._is_multi: 

4601 # We have specific handling for MultiIndex below 

4602 pself, pother = self._maybe_downcast_for_indexing(other) 

4603 if pself is not self or pother is not other: 

4604 return pself.join( 

4605 pother, how=how, level=level, return_indexers=True, sort=sort 

4606 ) 

4607 

4608 # try to figure out the join level 

4609 # GH3662 

4610 if level is None and (self._is_multi or other._is_multi): 

4611 # have the same levels/names so a simple join 

4612 if self.names == other.names: 

4613 pass 

4614 else: 

4615 return self._join_multi(other, how=how) 

4616 

4617 # join on the level 

4618 if level is not None and (self._is_multi or other._is_multi): 

4619 return self._join_level(other, level, how=how) 

4620 

4621 if len(self) == 0 or len(other) == 0: 

4622 try: 

4623 return self._join_empty(other, how, sort) 

4624 except TypeError: 

4625 # object dtype; non-comparable objects 

4626 pass 

4627 

4628 if self.dtype != other.dtype: 

4629 dtype = self._find_common_type_compat(other) 

4630 this = self.astype(dtype, copy=False) 

4631 other = other.astype(dtype, copy=False) 

4632 return this.join(other, how=how, return_indexers=True) 

4633 elif ( 

4634 isinstance(self, ABCCategoricalIndex) 

4635 and isinstance(other, ABCCategoricalIndex) 

4636 and not self.ordered 

4637 and not self.categories.equals(other.categories) 

4638 ): 

4639 # dtypes are "equal" but categories are in different order 

4640 other = Index(other._values.reorder_categories(self.categories)) 

4641 

4642 _validate_join_method(how) 

4643 

4644 if ( 

4645 self.is_monotonic_increasing 

4646 and other.is_monotonic_increasing 

4647 and self._can_use_libjoin 

4648 and other._can_use_libjoin 

4649 and (self.is_unique or other.is_unique) 

4650 ): 

4651 try: 

4652 return self._join_monotonic(other, how=how) 

4653 except TypeError: 

4654 # object dtype; non-comparable objects 

4655 pass 

4656 elif not self.is_unique or not other.is_unique: 

4657 return self._join_non_unique(other, how=how, sort=sort) 

4658 

4659 return self._join_via_get_indexer(other, how, sort) 

4660 

4661 @final 

4662 def _join_empty( 

4663 self, other: Index, how: JoinHow, sort: bool 

4664 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4665 assert len(self) == 0 or len(other) == 0 

4666 _validate_join_method(how) 

4667 

4668 lidx: np.ndarray | None 

4669 ridx: np.ndarray | None 

4670 

4671 if len(other): 

4672 how = cast(JoinHow, {"left": "right", "right": "left"}.get(how, how)) 

4673 join_index, ridx, lidx = other._join_empty(self, how, sort) 

4674 elif how in ["left", "outer"]: 

4675 if sort and not self.is_monotonic_increasing: 

4676 lidx = self.argsort() 

4677 join_index = self.take(lidx) 

4678 else: 

4679 lidx = None 

4680 join_index = self._view() 

4681 ridx = np.broadcast_to(np.intp(-1), len(join_index)) 

4682 else: 

4683 join_index = other._view() 

4684 lidx = np.array([], dtype=np.intp) 

4685 ridx = None 

4686 return join_index, lidx, ridx 

4687 

4688 @final 

4689 def _join_via_get_indexer( 

4690 self, other: Index, how: JoinHow, sort: bool 

4691 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4692 # Fallback if we do not have any fastpaths available based on 

4693 # uniqueness/monotonicity 

4694 

4695 # Note: at this point we have checked matching dtypes 

4696 

4697 if how == "left": 

4698 join_index = self.sort_values() if sort else self 

4699 elif how == "right": 

4700 join_index = other.sort_values() if sort else other 

4701 elif how == "inner": 

4702 join_index = self.intersection(other, sort=sort) 

4703 elif how == "outer": 

4704 try: 

4705 join_index = self.union(other, sort=sort) 

4706 except TypeError: 

4707 join_index = self.union(other) 

4708 try: 

4709 join_index = _maybe_try_sort(join_index, sort) 

4710 except TypeError: 

4711 pass 

4712 

4713 if join_index is self: 

4714 lindexer = None 

4715 else: 

4716 lindexer = self.get_indexer_for(join_index) 

4717 if join_index is other: 

4718 rindexer = None 

4719 else: 

4720 rindexer = other.get_indexer_for(join_index) 

4721 return join_index, lindexer, rindexer 

4722 

4723 @final 

4724 def _join_multi(self, other: Index, how: JoinHow): 

4725 from pandas.core.indexes.multi import MultiIndex 

4726 from pandas.core.reshape.merge import restore_dropped_levels_multijoin 

4727 

4728 # figure out join names 

4729 self_names_list = list(com.not_none(*self.names)) 

4730 other_names_list = list(com.not_none(*other.names)) 

4731 self_names_order = self_names_list.index 

4732 other_names_order = other_names_list.index 

4733 self_names = set(self_names_list) 

4734 other_names = set(other_names_list) 

4735 overlap = self_names & other_names 

4736 

4737 # need at least 1 in common 

4738 if not overlap: 

4739 raise ValueError("cannot join with no overlapping index names") 

4740 

4741 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): 

4742 # Drop the non-matching levels from left and right respectively 

4743 ldrop_names = sorted(self_names - overlap, key=self_names_order) 

4744 rdrop_names = sorted(other_names - overlap, key=other_names_order) 

4745 

4746 # if only the order differs 

4747 if not len(ldrop_names + rdrop_names): 

4748 self_jnlevels = self 

4749 other_jnlevels = other.reorder_levels(self.names) 

4750 else: 

4751 self_jnlevels = self.droplevel(ldrop_names) 

4752 other_jnlevels = other.droplevel(rdrop_names) 

4753 

4754 # Join left and right 

4755 # Join on same leveled multi-index frames is supported 

4756 join_idx, lidx, ridx = self_jnlevels.join( 

4757 other_jnlevels, how=how, return_indexers=True 

4758 ) 

4759 

4760 # Restore the dropped levels 

4761 # Returned index level order is 

4762 # common levels, ldrop_names, rdrop_names 

4763 dropped_names = ldrop_names + rdrop_names 

4764 

4765 # error: Argument 5/6 to "restore_dropped_levels_multijoin" has 

4766 # incompatible type "Optional[ndarray[Any, dtype[signedinteger[Any 

4767 # ]]]]"; expected "ndarray[Any, dtype[signedinteger[Any]]]" 

4768 levels, codes, names = restore_dropped_levels_multijoin( 

4769 self, 

4770 other, 

4771 dropped_names, 

4772 join_idx, 

4773 lidx, # type: ignore[arg-type] 

4774 ridx, # type: ignore[arg-type] 

4775 ) 

4776 

4777 # Re-create the multi-index 

4778 multi_join_idx = MultiIndex( 

4779 levels=levels, codes=codes, names=names, verify_integrity=False 

4780 ) 

4781 

4782 multi_join_idx = multi_join_idx.remove_unused_levels() 

4783 

4784 # maintain the order of the index levels 

4785 if how == "right": 

4786 level_order = other_names_list + ldrop_names 

4787 else: 

4788 level_order = self_names_list + rdrop_names 

4789 multi_join_idx = multi_join_idx.reorder_levels(level_order) 

4790 

4791 return multi_join_idx, lidx, ridx 

4792 

4793 jl = next(iter(overlap)) 

4794 

4795 # Case where only one index is multi 

4796 # make the indices into mi's that match 

4797 flip_order = False 

4798 if isinstance(self, MultiIndex): 

4799 self, other = other, self 

4800 flip_order = True 

4801 # flip if join method is right or left 

4802 flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"} 

4803 how = flip.get(how, how) 

4804 

4805 level = other.names.index(jl) 

4806 result = self._join_level(other, level, how=how) 

4807 

4808 if flip_order: 

4809 return result[0], result[2], result[1] 

4810 return result 

4811 

4812 @final 

4813 def _join_non_unique( 

4814 self, other: Index, how: JoinHow = "left", sort: bool = False 

4815 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

4816 from pandas.core.reshape.merge import get_join_indexers_non_unique 

4817 

4818 # We only get here if dtypes match 

4819 assert self.dtype == other.dtype 

4820 

4821 left_idx, right_idx = get_join_indexers_non_unique( 

4822 self._values, other._values, how=how, sort=sort 

4823 ) 

4824 mask = left_idx == -1 

4825 

4826 join_idx = self.take(left_idx) 

4827 right = other.take(right_idx) 

4828 join_index = join_idx.putmask(mask, right) 

4829 if isinstance(join_index, ABCMultiIndex) and how == "outer": 

4830 # test_join_index_levels 

4831 join_index = join_index._sort_levels_monotonic() 

4832 return join_index, left_idx, right_idx 

4833 

4834 @final 

4835 def _join_level( 

4836 self, other: Index, level, how: JoinHow = "left", keep_order: bool = True 

4837 ) -> tuple[MultiIndex, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4838 """ 

4839 The join method *only* affects the level of the resulting 

4840 MultiIndex. Otherwise it just exactly aligns the Index data to the 

4841 labels of the level in the MultiIndex. 

4842 

4843 If ```keep_order == True```, the order of the data indexed by the 

4844 MultiIndex will not be changed; otherwise, it will tie out 

4845 with `other`. 

4846 """ 

4847 from pandas.core.indexes.multi import MultiIndex 

4848 

4849 def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]: 

4850 """ 

4851 Returns sorter for the inner most level while preserving the 

4852 order of higher levels. 

4853 

4854 Parameters 

4855 ---------- 

4856 labels : list[np.ndarray] 

4857 Each ndarray has signed integer dtype, not necessarily identical. 

4858 

4859 Returns 

4860 ------- 

4861 np.ndarray[np.intp] 

4862 """ 

4863 if labels[0].size == 0: 

4864 return np.empty(0, dtype=np.intp) 

4865 

4866 if len(labels) == 1: 

4867 return get_group_index_sorter(ensure_platform_int(labels[0])) 

4868 

4869 # find indexers of beginning of each set of 

4870 # same-key labels w.r.t all but last level 

4871 tic = labels[0][:-1] != labels[0][1:] 

4872 for lab in labels[1:-1]: 

4873 tic |= lab[:-1] != lab[1:] 

4874 

4875 starts = np.hstack(([True], tic, [True])).nonzero()[0] 

4876 lab = ensure_int64(labels[-1]) 

4877 return lib.get_level_sorter(lab, ensure_platform_int(starts)) 

4878 

4879 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): 

4880 raise TypeError("Join on level between two MultiIndex objects is ambiguous") 

4881 

4882 left, right = self, other 

4883 

4884 flip_order = not isinstance(self, MultiIndex) 

4885 if flip_order: 

4886 left, right = right, left 

4887 flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"} 

4888 how = flip.get(how, how) 

4889 

4890 assert isinstance(left, MultiIndex) 

4891 

4892 level = left._get_level_number(level) 

4893 old_level = left.levels[level] 

4894 

4895 if not right.is_unique: 

4896 raise NotImplementedError( 

4897 "Index._join_level on non-unique index is not implemented" 

4898 ) 

4899 

4900 new_level, left_lev_indexer, right_lev_indexer = old_level.join( 

4901 right, how=how, return_indexers=True 

4902 ) 

4903 

4904 if left_lev_indexer is None: 

4905 if keep_order or len(left) == 0: 

4906 left_indexer = None 

4907 join_index = left 

4908 else: # sort the leaves 

4909 left_indexer = _get_leaf_sorter(left.codes[: level + 1]) 

4910 join_index = left[left_indexer] 

4911 

4912 else: 

4913 left_lev_indexer = ensure_platform_int(left_lev_indexer) 

4914 rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level)) 

4915 old_codes = left.codes[level] 

4916 

4917 taker = old_codes[old_codes != -1] 

4918 new_lev_codes = rev_indexer.take(taker) 

4919 

4920 new_codes = list(left.codes) 

4921 new_codes[level] = new_lev_codes 

4922 

4923 new_levels = list(left.levels) 

4924 new_levels[level] = new_level 

4925 

4926 if keep_order: # just drop missing values. o.w. keep order 

4927 left_indexer = np.arange(len(left), dtype=np.intp) 

4928 left_indexer = cast(np.ndarray, left_indexer) 

4929 mask = new_lev_codes != -1 

4930 if not mask.all(): 

4931 new_codes = [lab[mask] for lab in new_codes] 

4932 left_indexer = left_indexer[mask] 

4933 

4934 else: # tie out the order with other 

4935 if level == 0: # outer most level, take the fast route 

4936 max_new_lev = 0 if len(new_lev_codes) == 0 else new_lev_codes.max() 

4937 ngroups = 1 + max_new_lev 

4938 left_indexer, counts = libalgos.groupsort_indexer( 

4939 new_lev_codes, ngroups 

4940 ) 

4941 

4942 # missing values are placed first; drop them! 

4943 left_indexer = left_indexer[counts[0] :] 

4944 new_codes = [lab[left_indexer] for lab in new_codes] 

4945 

4946 else: # sort the leaves 

4947 mask = new_lev_codes != -1 

4948 mask_all = mask.all() 

4949 if not mask_all: 

4950 new_codes = [lab[mask] for lab in new_codes] 

4951 

4952 left_indexer = _get_leaf_sorter(new_codes[: level + 1]) 

4953 new_codes = [lab[left_indexer] for lab in new_codes] 

4954 

4955 # left_indexers are w.r.t masked frame. 

4956 # reverse to original frame! 

4957 if not mask_all: 

4958 left_indexer = mask.nonzero()[0][left_indexer] 

4959 

4960 join_index = MultiIndex( 

4961 levels=new_levels, 

4962 codes=new_codes, 

4963 names=left.names, 

4964 verify_integrity=False, 

4965 ) 

4966 

4967 if right_lev_indexer is not None: 

4968 right_indexer = right_lev_indexer.take(join_index.codes[level]) 

4969 else: 

4970 right_indexer = join_index.codes[level] 

4971 

4972 if flip_order: 

4973 left_indexer, right_indexer = right_indexer, left_indexer 

4974 

4975 left_indexer = ( 

4976 None if left_indexer is None else ensure_platform_int(left_indexer) 

4977 ) 

4978 right_indexer = ( 

4979 None if right_indexer is None else ensure_platform_int(right_indexer) 

4980 ) 

4981 return join_index, left_indexer, right_indexer 

4982 

4983 @final 

4984 def _join_monotonic( 

4985 self, other: Index, how: JoinHow = "left" 

4986 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4987 # We only get here with matching dtypes and both monotonic increasing 

4988 assert other.dtype == self.dtype 

4989 assert self._can_use_libjoin and other._can_use_libjoin 

4990 

4991 if self.equals(other): 

4992 # This is a convenient place for this check, but its correctness 

4993 # does not depend on monotonicity, so it could go earlier 

4994 # in the calling method. 

4995 ret_index = other if how == "right" else self 

4996 return ret_index, None, None 

4997 

4998 ridx: npt.NDArray[np.intp] | None 

4999 lidx: npt.NDArray[np.intp] | None 

5000 

5001 if self.is_unique and other.is_unique: 

5002 # We can perform much better than the general case 

5003 if how == "left": 

5004 join_index = self 

5005 lidx = None 

5006 ridx = self._left_indexer_unique(other) 

5007 elif how == "right": 

5008 join_index = other 

5009 lidx = other._left_indexer_unique(self) 

5010 ridx = None 

5011 elif how == "inner": 

5012 join_array, lidx, ridx = self._inner_indexer(other) 

5013 join_index = self._wrap_joined_index(join_array, other, lidx, ridx) 

5014 elif how == "outer": 

5015 join_array, lidx, ridx = self._outer_indexer(other) 

5016 join_index = self._wrap_joined_index(join_array, other, lidx, ridx) 

5017 else: 

5018 if how == "left": 

5019 join_array, lidx, ridx = self._left_indexer(other) 

5020 elif how == "right": 

5021 join_array, ridx, lidx = other._left_indexer(self) 

5022 elif how == "inner": 

5023 join_array, lidx, ridx = self._inner_indexer(other) 

5024 elif how == "outer": 

5025 join_array, lidx, ridx = self._outer_indexer(other) 

5026 

5027 assert lidx is not None 

5028 assert ridx is not None 

5029 

5030 join_index = self._wrap_joined_index(join_array, other, lidx, ridx) 

5031 

5032 lidx = None if lidx is None else ensure_platform_int(lidx) 

5033 ridx = None if ridx is None else ensure_platform_int(ridx) 

5034 return join_index, lidx, ridx 

5035 

5036 def _wrap_joined_index( 

5037 self, 

5038 joined: ArrayLike, 

5039 other: Self, 

5040 lidx: npt.NDArray[np.intp], 

5041 ridx: npt.NDArray[np.intp], 

5042 ) -> Self: 

5043 assert other.dtype == self.dtype 

5044 

5045 if isinstance(self, ABCMultiIndex): 

5046 name = self.names if self.names == other.names else None 

5047 # error: Incompatible return value type (got "MultiIndex", 

5048 # expected "Self") 

5049 mask = lidx == -1 

5050 join_idx = self.take(lidx) 

5051 right = cast("MultiIndex", other.take(ridx)) 

5052 join_index = join_idx.putmask(mask, right)._sort_levels_monotonic() 

5053 return join_index.set_names(name) # type: ignore[return-value] 

5054 else: 

5055 name = get_op_result_name(self, other) 

5056 return self._constructor._with_infer(joined, name=name, dtype=self.dtype) 

5057 

5058 @final 

5059 @cache_readonly 

5060 def _can_use_libjoin(self) -> bool: 

5061 """ 

5062 Whether we can use the fastpaths implemented in _libs.join. 

5063 

5064 This is driven by whether (in monotonic increasing cases that are 

5065 guaranteed not to have NAs) we can convert to a np.ndarray without 

5066 making a copy. If we cannot, this negates the performance benefit 

5067 of using libjoin. 

5068 """ 

5069 if type(self) is Index: 

5070 # excludes EAs, but include masks, we get here with monotonic 

5071 # values only, meaning no NA 

5072 return ( 

5073 isinstance(self.dtype, np.dtype) 

5074 or isinstance(self._values, (ArrowExtensionArray, BaseMaskedArray)) 

5075 or self.dtype == "string[python]" 

5076 ) 

5077 # Exclude index types where the conversion to numpy converts to object dtype, 

5078 # which negates the performance benefit of libjoin 

5079 # Subclasses should override to return False if _get_join_target is 

5080 # not zero-copy. 

5081 # TODO: exclude RangeIndex (which allocates memory)? 

5082 # Doing so seems to break test_concat_datetime_timezone 

5083 return not isinstance(self, (ABCIntervalIndex, ABCMultiIndex)) 

5084 

5085 # -------------------------------------------------------------------- 

5086 # Uncategorized Methods 

5087 

5088 @property 

5089 def values(self) -> ArrayLike: 

5090 """ 

5091 Return an array representing the data in the Index. 

5092 

5093 .. warning:: 

5094 

5095 We recommend using :attr:`Index.array` or 

5096 :meth:`Index.to_numpy`, depending on whether you need 

5097 a reference to the underlying data or a NumPy array. 

5098 

5099 Returns 

5100 ------- 

5101 array: numpy.ndarray or ExtensionArray 

5102 

5103 See Also 

5104 -------- 

5105 Index.array : Reference to the underlying data. 

5106 Index.to_numpy : A NumPy array representing the underlying data. 

5107 

5108 Examples 

5109 -------- 

5110 For :class:`pandas.Index`: 

5111 

5112 >>> idx = pd.Index([1, 2, 3]) 

5113 >>> idx 

5114 Index([1, 2, 3], dtype='int64') 

5115 >>> idx.values 

5116 array([1, 2, 3]) 

5117 

5118 For :class:`pandas.IntervalIndex`: 

5119 

5120 >>> idx = pd.interval_range(start=0, end=5) 

5121 >>> idx.values 

5122 <IntervalArray> 

5123 [(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]] 

5124 Length: 5, dtype: interval[int64, right] 

5125 """ 

5126 if using_copy_on_write(): 

5127 data = self._data 

5128 if isinstance(data, np.ndarray): 

5129 data = data.view() 

5130 data.flags.writeable = False 

5131 return data 

5132 return self._data 

5133 

5134 @cache_readonly 

5135 @doc(IndexOpsMixin.array) 

5136 def array(self) -> ExtensionArray: 

5137 array = self._data 

5138 if isinstance(array, np.ndarray): 

5139 from pandas.core.arrays.numpy_ import NumpyExtensionArray 

5140 

5141 array = NumpyExtensionArray(array) 

5142 return array 

5143 

5144 @property 

5145 def _values(self) -> ExtensionArray | np.ndarray: 

5146 """ 

5147 The best array representation. 

5148 

5149 This is an ndarray or ExtensionArray. 

5150 

5151 ``_values`` are consistent between ``Series`` and ``Index``. 

5152 

5153 It may differ from the public '.values' method. 

5154 

5155 index | values | _values | 

5156 ----------------- | --------------- | ------------- | 

5157 Index | ndarray | ndarray | 

5158 CategoricalIndex | Categorical | Categorical | 

5159 DatetimeIndex | ndarray[M8ns] | DatetimeArray | 

5160 DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray | 

5161 PeriodIndex | ndarray[object] | PeriodArray | 

5162 IntervalIndex | IntervalArray | IntervalArray | 

5163 

5164 See Also 

5165 -------- 

5166 values : Values 

5167 """ 

5168 return self._data 

5169 

5170 def _get_engine_target(self) -> ArrayLike: 

5171 """ 

5172 Get the ndarray or ExtensionArray that we can pass to the IndexEngine 

5173 constructor. 

5174 """ 

5175 vals = self._values 

5176 if isinstance(vals, StringArray): 

5177 # GH#45652 much more performant than ExtensionEngine 

5178 return vals._ndarray 

5179 if isinstance(vals, ArrowExtensionArray) and self.dtype.kind in "Mm": 

5180 import pyarrow as pa 

5181 

5182 pa_type = vals._pa_array.type 

5183 if pa.types.is_timestamp(pa_type): 

5184 vals = vals._to_datetimearray() 

5185 return vals._ndarray.view("i8") 

5186 elif pa.types.is_duration(pa_type): 

5187 vals = vals._to_timedeltaarray() 

5188 return vals._ndarray.view("i8") 

5189 if ( 

5190 type(self) is Index 

5191 and isinstance(self._values, ExtensionArray) 

5192 and not isinstance(self._values, BaseMaskedArray) 

5193 and not ( 

5194 isinstance(self._values, ArrowExtensionArray) 

5195 and is_numeric_dtype(self.dtype) 

5196 # Exclude decimal 

5197 and self.dtype.kind != "O" 

5198 ) 

5199 ): 

5200 # TODO(ExtensionIndex): remove special-case, just use self._values 

5201 return self._values.astype(object) 

5202 return vals 

5203 

5204 @final 

5205 def _get_join_target(self) -> np.ndarray: 

5206 """ 

5207 Get the ndarray or ExtensionArray that we can pass to the join 

5208 functions. 

5209 """ 

5210 if isinstance(self._values, BaseMaskedArray): 

5211 # This is only used if our array is monotonic, so no NAs present 

5212 return self._values._data 

5213 elif isinstance(self._values, ArrowExtensionArray): 

5214 # This is only used if our array is monotonic, so no missing values 

5215 # present 

5216 return self._values.to_numpy() 

5217 

5218 # TODO: exclude ABCRangeIndex case here as it copies 

5219 target = self._get_engine_target() 

5220 if not isinstance(target, np.ndarray): 

5221 raise ValueError("_can_use_libjoin should return False.") 

5222 return target 

5223 

5224 def _from_join_target(self, result: np.ndarray) -> ArrayLike: 

5225 """ 

5226 Cast the ndarray returned from one of the libjoin.foo_indexer functions 

5227 back to type(self._data). 

5228 """ 

5229 if isinstance(self.values, BaseMaskedArray): 

5230 return type(self.values)(result, np.zeros(result.shape, dtype=np.bool_)) 

5231 elif isinstance(self.values, (ArrowExtensionArray, StringArray)): 

5232 return type(self.values)._from_sequence(result, dtype=self.dtype) 

5233 return result 

5234 

5235 @doc(IndexOpsMixin._memory_usage) 

5236 def memory_usage(self, deep: bool = False) -> int: 

5237 result = self._memory_usage(deep=deep) 

5238 

5239 # include our engine hashtable 

5240 result += self._engine.sizeof(deep=deep) 

5241 return result 

5242 

5243 @final 

5244 def where(self, cond, other=None) -> Index: 

5245 """ 

5246 Replace values where the condition is False. 

5247 

5248 The replacement is taken from other. 

5249 

5250 Parameters 

5251 ---------- 

5252 cond : bool array-like with the same length as self 

5253 Condition to select the values on. 

5254 other : scalar, or array-like, default None 

5255 Replacement if the condition is False. 

5256 

5257 Returns 

5258 ------- 

5259 pandas.Index 

5260 A copy of self with values replaced from other 

5261 where the condition is False. 

5262 

5263 See Also 

5264 -------- 

5265 Series.where : Same method for Series. 

5266 DataFrame.where : Same method for DataFrame. 

5267 

5268 Examples 

5269 -------- 

5270 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor']) 

5271 >>> idx 

5272 Index(['car', 'bike', 'train', 'tractor'], dtype='object') 

5273 >>> idx.where(idx.isin(['car', 'train']), 'other') 

5274 Index(['car', 'other', 'train', 'other'], dtype='object') 

5275 """ 

5276 if isinstance(self, ABCMultiIndex): 

5277 raise NotImplementedError( 

5278 ".where is not supported for MultiIndex operations" 

5279 ) 

5280 cond = np.asarray(cond, dtype=bool) 

5281 return self.putmask(~cond, other) 

5282 

5283 # construction helpers 

5284 @final 

5285 @classmethod 

5286 def _raise_scalar_data_error(cls, data): 

5287 # We return the TypeError so that we can raise it from the constructor 

5288 # in order to keep mypy happy 

5289 raise TypeError( 

5290 f"{cls.__name__}(...) must be called with a collection of some " 

5291 f"kind, {repr(data) if not isinstance(data, np.generic) else str(data)} " 

5292 "was passed" 

5293 ) 

5294 

5295 def _validate_fill_value(self, value): 

5296 """ 

5297 Check if the value can be inserted into our array without casting, 

5298 and convert it to an appropriate native type if necessary. 

5299 

5300 Raises 

5301 ------ 

5302 TypeError 

5303 If the value cannot be inserted into an array of this dtype. 

5304 """ 

5305 dtype = self.dtype 

5306 if isinstance(dtype, np.dtype) and dtype.kind not in "mM": 

5307 # return np_can_hold_element(dtype, value) 

5308 try: 

5309 return np_can_hold_element(dtype, value) 

5310 except LossySetitemError as err: 

5311 # re-raise as TypeError for consistency 

5312 raise TypeError from err 

5313 elif not can_hold_element(self._values, value): 

5314 raise TypeError 

5315 return value 

5316 

5317 def _is_memory_usage_qualified(self) -> bool: 

5318 """ 

5319 Return a boolean if we need a qualified .info display. 

5320 """ 

5321 return is_object_dtype(self.dtype) 

5322 

5323 def __contains__(self, key: Any) -> bool: 

5324 """ 

5325 Return a boolean indicating whether the provided key is in the index. 

5326 

5327 Parameters 

5328 ---------- 

5329 key : label 

5330 The key to check if it is present in the index. 

5331 

5332 Returns 

5333 ------- 

5334 bool 

5335 Whether the key search is in the index. 

5336 

5337 Raises 

5338 ------ 

5339 TypeError 

5340 If the key is not hashable. 

5341 

5342 See Also 

5343 -------- 

5344 Index.isin : Returns an ndarray of boolean dtype indicating whether the 

5345 list-like key is in the index. 

5346 

5347 Examples 

5348 -------- 

5349 >>> idx = pd.Index([1, 2, 3, 4]) 

5350 >>> idx 

5351 Index([1, 2, 3, 4], dtype='int64') 

5352 

5353 >>> 2 in idx 

5354 True 

5355 >>> 6 in idx 

5356 False 

5357 """ 

5358 hash(key) 

5359 try: 

5360 return key in self._engine 

5361 except (OverflowError, TypeError, ValueError): 

5362 return False 

5363 

5364 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 

5365 # Incompatible types in assignment (expression has type "None", base class 

5366 # "object" defined the type as "Callable[[object], int]") 

5367 __hash__: ClassVar[None] # type: ignore[assignment] 

5368 

5369 @final 

5370 def __setitem__(self, key, value) -> None: 

5371 raise TypeError("Index does not support mutable operations") 

5372 

5373 def __getitem__(self, key): 

5374 """ 

5375 Override numpy.ndarray's __getitem__ method to work as desired. 

5376 

5377 This function adds lists and Series as valid boolean indexers 

5378 (ndarrays only supports ndarray with dtype=bool). 

5379 

5380 If resulting ndim != 1, plain ndarray is returned instead of 

5381 corresponding `Index` subclass. 

5382 

5383 """ 

5384 getitem = self._data.__getitem__ 

5385 

5386 if is_integer(key) or is_float(key): 

5387 # GH#44051 exclude bool, which would return a 2d ndarray 

5388 key = com.cast_scalar_indexer(key) 

5389 return getitem(key) 

5390 

5391 if isinstance(key, slice): 

5392 # This case is separated from the conditional above to avoid 

5393 # pessimization com.is_bool_indexer and ndim checks. 

5394 return self._getitem_slice(key) 

5395 

5396 if com.is_bool_indexer(key): 

5397 # if we have list[bools, length=1e5] then doing this check+convert 

5398 # takes 166 µs + 2.1 ms and cuts the ndarray.__getitem__ 

5399 # time below from 3.8 ms to 496 µs 

5400 # if we already have ndarray[bool], the overhead is 1.4 µs or .25% 

5401 if isinstance(getattr(key, "dtype", None), ExtensionDtype): 

5402 key = key.to_numpy(dtype=bool, na_value=False) 

5403 else: 

5404 key = np.asarray(key, dtype=bool) 

5405 

5406 if not isinstance(self.dtype, ExtensionDtype): 

5407 if len(key) == 0 and len(key) != len(self): 

5408 warnings.warn( 

5409 "Using a boolean indexer with length 0 on an Index with " 

5410 "length greater than 0 is deprecated and will raise in a " 

5411 "future version.", 

5412 FutureWarning, 

5413 stacklevel=find_stack_level(), 

5414 ) 

5415 

5416 result = getitem(key) 

5417 # Because we ruled out integer above, we always get an arraylike here 

5418 if result.ndim > 1: 

5419 disallow_ndim_indexing(result) 

5420 

5421 # NB: Using _constructor._simple_new would break if MultiIndex 

5422 # didn't override __getitem__ 

5423 return self._constructor._simple_new(result, name=self._name) 

5424 

5425 def _getitem_slice(self, slobj: slice) -> Self: 

5426 """ 

5427 Fastpath for __getitem__ when we know we have a slice. 

5428 """ 

5429 res = self._data[slobj] 

5430 result = type(self)._simple_new(res, name=self._name, refs=self._references) 

5431 if "_engine" in self._cache: 

5432 reverse = slobj.step is not None and slobj.step < 0 

5433 result._engine._update_from_sliced(self._engine, reverse=reverse) # type: ignore[union-attr] 

5434 

5435 return result 

5436 

5437 @final 

5438 def _can_hold_identifiers_and_holds_name(self, name) -> bool: 

5439 """ 

5440 Faster check for ``name in self`` when we know `name` is a Python 

5441 identifier (e.g. in NDFrame.__getattr__, which hits this to support 

5442 . key lookup). For indexes that can't hold identifiers (everything 

5443 but object & categorical) we just return False. 

5444 

5445 https://github.com/pandas-dev/pandas/issues/19764 

5446 """ 

5447 if ( 

5448 is_object_dtype(self.dtype) 

5449 or is_string_dtype(self.dtype) 

5450 or isinstance(self.dtype, CategoricalDtype) 

5451 ): 

5452 return name in self 

5453 return False 

5454 

5455 def append(self, other: Index | Sequence[Index]) -> Index: 

5456 """ 

5457 Append a collection of Index options together. 

5458 

5459 Parameters 

5460 ---------- 

5461 other : Index or list/tuple of indices 

5462 

5463 Returns 

5464 ------- 

5465 Index 

5466 

5467 Examples 

5468 -------- 

5469 >>> idx = pd.Index([1, 2, 3]) 

5470 >>> idx.append(pd.Index([4])) 

5471 Index([1, 2, 3, 4], dtype='int64') 

5472 """ 

5473 to_concat = [self] 

5474 

5475 if isinstance(other, (list, tuple)): 

5476 to_concat += list(other) 

5477 else: 

5478 # error: Argument 1 to "append" of "list" has incompatible type 

5479 # "Union[Index, Sequence[Index]]"; expected "Index" 

5480 to_concat.append(other) # type: ignore[arg-type] 

5481 

5482 for obj in to_concat: 

5483 if not isinstance(obj, Index): 

5484 raise TypeError("all inputs must be Index") 

5485 

5486 names = {obj.name for obj in to_concat} 

5487 name = None if len(names) > 1 else self.name 

5488 

5489 return self._concat(to_concat, name) 

5490 

5491 def _concat(self, to_concat: list[Index], name: Hashable) -> Index: 

5492 """ 

5493 Concatenate multiple Index objects. 

5494 """ 

5495 to_concat_vals = [x._values for x in to_concat] 

5496 

5497 result = concat_compat(to_concat_vals) 

5498 

5499 return Index._with_infer(result, name=name) 

5500 

5501 def putmask(self, mask, value) -> Index: 

5502 """ 

5503 Return a new Index of the values set with the mask. 

5504 

5505 Returns 

5506 ------- 

5507 Index 

5508 

5509 See Also 

5510 -------- 

5511 numpy.ndarray.putmask : Changes elements of an array 

5512 based on conditional and input values. 

5513 

5514 Examples 

5515 -------- 

5516 >>> idx1 = pd.Index([1, 2, 3]) 

5517 >>> idx2 = pd.Index([5, 6, 7]) 

5518 >>> idx1.putmask([True, False, False], idx2) 

5519 Index([5, 2, 3], dtype='int64') 

5520 """ 

5521 mask, noop = validate_putmask(self._values, mask) 

5522 if noop: 

5523 return self.copy() 

5524 

5525 if self.dtype != object and is_valid_na_for_dtype(value, self.dtype): 

5526 # e.g. None -> np.nan, see also Block._standardize_fill_value 

5527 value = self._na_value 

5528 

5529 try: 

5530 converted = self._validate_fill_value(value) 

5531 except (LossySetitemError, ValueError, TypeError) as err: 

5532 if is_object_dtype(self.dtype): # pragma: no cover 

5533 raise err 

5534 

5535 # See also: Block.coerce_to_target_dtype 

5536 dtype = self._find_common_type_compat(value) 

5537 return self.astype(dtype).putmask(mask, value) 

5538 

5539 values = self._values.copy() 

5540 

5541 if isinstance(values, np.ndarray): 

5542 converted = setitem_datetimelike_compat(values, mask.sum(), converted) 

5543 np.putmask(values, mask, converted) 

5544 

5545 else: 

5546 # Note: we use the original value here, not converted, as 

5547 # _validate_fill_value is not idempotent 

5548 values._putmask(mask, value) 

5549 

5550 return self._shallow_copy(values) 

5551 

5552 def equals(self, other: Any) -> bool: 

5553 """ 

5554 Determine if two Index object are equal. 

5555 

5556 The things that are being compared are: 

5557 

5558 * The elements inside the Index object. 

5559 * The order of the elements inside the Index object. 

5560 

5561 Parameters 

5562 ---------- 

5563 other : Any 

5564 The other object to compare against. 

5565 

5566 Returns 

5567 ------- 

5568 bool 

5569 True if "other" is an Index and it has the same elements and order 

5570 as the calling index; False otherwise. 

5571 

5572 Examples 

5573 -------- 

5574 >>> idx1 = pd.Index([1, 2, 3]) 

5575 >>> idx1 

5576 Index([1, 2, 3], dtype='int64') 

5577 >>> idx1.equals(pd.Index([1, 2, 3])) 

5578 True 

5579 

5580 The elements inside are compared 

5581 

5582 >>> idx2 = pd.Index(["1", "2", "3"]) 

5583 >>> idx2 

5584 Index(['1', '2', '3'], dtype='object') 

5585 

5586 >>> idx1.equals(idx2) 

5587 False 

5588 

5589 The order is compared 

5590 

5591 >>> ascending_idx = pd.Index([1, 2, 3]) 

5592 >>> ascending_idx 

5593 Index([1, 2, 3], dtype='int64') 

5594 >>> descending_idx = pd.Index([3, 2, 1]) 

5595 >>> descending_idx 

5596 Index([3, 2, 1], dtype='int64') 

5597 >>> ascending_idx.equals(descending_idx) 

5598 False 

5599 

5600 The dtype is *not* compared 

5601 

5602 >>> int64_idx = pd.Index([1, 2, 3], dtype='int64') 

5603 >>> int64_idx 

5604 Index([1, 2, 3], dtype='int64') 

5605 >>> uint64_idx = pd.Index([1, 2, 3], dtype='uint64') 

5606 >>> uint64_idx 

5607 Index([1, 2, 3], dtype='uint64') 

5608 >>> int64_idx.equals(uint64_idx) 

5609 True 

5610 """ 

5611 if self.is_(other): 

5612 return True 

5613 

5614 if not isinstance(other, Index): 

5615 return False 

5616 

5617 if len(self) != len(other): 

5618 # quickly return if the lengths are different 

5619 return False 

5620 

5621 if ( 

5622 isinstance(self.dtype, StringDtype) 

5623 and self.dtype.storage == "pyarrow_numpy" 

5624 and other.dtype != self.dtype 

5625 ): 

5626 # special case for object behavior 

5627 return other.equals(self.astype(object)) 

5628 

5629 if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype): 

5630 # if other is not object, use other's logic for coercion 

5631 return other.equals(self) 

5632 

5633 if isinstance(other, ABCMultiIndex): 

5634 # d-level MultiIndex can equal d-tuple Index 

5635 return other.equals(self) 

5636 

5637 if isinstance(self._values, ExtensionArray): 

5638 # Dispatch to the ExtensionArray's .equals method. 

5639 if not isinstance(other, type(self)): 

5640 return False 

5641 

5642 earr = cast(ExtensionArray, self._data) 

5643 return earr.equals(other._data) 

5644 

5645 if isinstance(other.dtype, ExtensionDtype): 

5646 # All EA-backed Index subclasses override equals 

5647 return other.equals(self) 

5648 

5649 return array_equivalent(self._values, other._values) 

5650 

5651 @final 

5652 def identical(self, other) -> bool: 

5653 """ 

5654 Similar to equals, but checks that object attributes and types are also equal. 

5655 

5656 Returns 

5657 ------- 

5658 bool 

5659 If two Index objects have equal elements and same type True, 

5660 otherwise False. 

5661 

5662 Examples 

5663 -------- 

5664 >>> idx1 = pd.Index(['1', '2', '3']) 

5665 >>> idx2 = pd.Index(['1', '2', '3']) 

5666 >>> idx2.identical(idx1) 

5667 True 

5668 

5669 >>> idx1 = pd.Index(['1', '2', '3'], name="A") 

5670 >>> idx2 = pd.Index(['1', '2', '3'], name="B") 

5671 >>> idx2.identical(idx1) 

5672 False 

5673 """ 

5674 return ( 

5675 self.equals(other) 

5676 and all( 

5677 getattr(self, c, None) == getattr(other, c, None) 

5678 for c in self._comparables 

5679 ) 

5680 and type(self) == type(other) 

5681 and self.dtype == other.dtype 

5682 ) 

5683 

5684 @final 

5685 def asof(self, label): 

5686 """ 

5687 Return the label from the index, or, if not present, the previous one. 

5688 

5689 Assuming that the index is sorted, return the passed index label if it 

5690 is in the index, or return the previous index label if the passed one 

5691 is not in the index. 

5692 

5693 Parameters 

5694 ---------- 

5695 label : object 

5696 The label up to which the method returns the latest index label. 

5697 

5698 Returns 

5699 ------- 

5700 object 

5701 The passed label if it is in the index. The previous label if the 

5702 passed label is not in the sorted index or `NaN` if there is no 

5703 such label. 

5704 

5705 See Also 

5706 -------- 

5707 Series.asof : Return the latest value in a Series up to the 

5708 passed index. 

5709 merge_asof : Perform an asof merge (similar to left join but it 

5710 matches on nearest key rather than equal key). 

5711 Index.get_loc : An `asof` is a thin wrapper around `get_loc` 

5712 with method='pad'. 

5713 

5714 Examples 

5715 -------- 

5716 `Index.asof` returns the latest index label up to the passed label. 

5717 

5718 >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03']) 

5719 >>> idx.asof('2014-01-01') 

5720 '2013-12-31' 

5721 

5722 If the label is in the index, the method returns the passed label. 

5723 

5724 >>> idx.asof('2014-01-02') 

5725 '2014-01-02' 

5726 

5727 If all of the labels in the index are later than the passed label, 

5728 NaN is returned. 

5729 

5730 >>> idx.asof('1999-01-02') 

5731 nan 

5732 

5733 If the index is not sorted, an error is raised. 

5734 

5735 >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02', 

5736 ... '2014-01-03']) 

5737 >>> idx_not_sorted.asof('2013-12-31') 

5738 Traceback (most recent call last): 

5739 ValueError: index must be monotonic increasing or decreasing 

5740 """ 

5741 self._searchsorted_monotonic(label) # validate sortedness 

5742 try: 

5743 loc = self.get_loc(label) 

5744 except (KeyError, TypeError): 

5745 # KeyError -> No exact match, try for padded 

5746 # TypeError -> passed e.g. non-hashable, fall through to get 

5747 # the tested exception message 

5748 indexer = self.get_indexer([label], method="pad") 

5749 if indexer.ndim > 1 or indexer.size > 1: 

5750 raise TypeError("asof requires scalar valued input") 

5751 loc = indexer.item() 

5752 if loc == -1: 

5753 return self._na_value 

5754 else: 

5755 if isinstance(loc, slice): 

5756 loc = loc.indices(len(self))[-1] 

5757 

5758 return self[loc] 

5759 

5760 def asof_locs( 

5761 self, where: Index, mask: npt.NDArray[np.bool_] 

5762 ) -> npt.NDArray[np.intp]: 

5763 """ 

5764 Return the locations (indices) of labels in the index. 

5765 

5766 As in the :meth:`pandas.Index.asof`, if the label (a particular entry in 

5767 ``where``) is not in the index, the latest index label up to the 

5768 passed label is chosen and its index returned. 

5769 

5770 If all of the labels in the index are later than a label in ``where``, 

5771 -1 is returned. 

5772 

5773 ``mask`` is used to ignore ``NA`` values in the index during calculation. 

5774 

5775 Parameters 

5776 ---------- 

5777 where : Index 

5778 An Index consisting of an array of timestamps. 

5779 mask : np.ndarray[bool] 

5780 Array of booleans denoting where values in the original 

5781 data are not ``NA``. 

5782 

5783 Returns 

5784 ------- 

5785 np.ndarray[np.intp] 

5786 An array of locations (indices) of the labels from the index 

5787 which correspond to the return values of :meth:`pandas.Index.asof` 

5788 for every element in ``where``. 

5789 

5790 See Also 

5791 -------- 

5792 Index.asof : Return the label from the index, or, if not present, the 

5793 previous one. 

5794 

5795 Examples 

5796 -------- 

5797 >>> idx = pd.date_range('2023-06-01', periods=3, freq='D') 

5798 >>> where = pd.DatetimeIndex(['2023-05-30 00:12:00', '2023-06-01 00:00:00', 

5799 ... '2023-06-02 23:59:59']) 

5800 >>> mask = np.ones(3, dtype=bool) 

5801 >>> idx.asof_locs(where, mask) 

5802 array([-1, 0, 1]) 

5803 

5804 We can use ``mask`` to ignore certain values in the index during calculation. 

5805 

5806 >>> mask[1] = False 

5807 >>> idx.asof_locs(where, mask) 

5808 array([-1, 0, 0]) 

5809 """ 

5810 # error: No overload variant of "searchsorted" of "ndarray" matches argument 

5811 # types "Union[ExtensionArray, ndarray[Any, Any]]", "str" 

5812 # TODO: will be fixed when ExtensionArray.searchsorted() is fixed 

5813 locs = self._values[mask].searchsorted( 

5814 where._values, side="right" # type: ignore[call-overload] 

5815 ) 

5816 locs = np.where(locs > 0, locs - 1, 0) 

5817 

5818 result = np.arange(len(self), dtype=np.intp)[mask].take(locs) 

5819 

5820 first_value = self._values[mask.argmax()] 

5821 result[(locs == 0) & (where._values < first_value)] = -1 

5822 

5823 return result 

5824 

5825 @overload 

5826 def sort_values( 

5827 self, 

5828 *, 

5829 return_indexer: Literal[False] = ..., 

5830 ascending: bool = ..., 

5831 na_position: NaPosition = ..., 

5832 key: Callable | None = ..., 

5833 ) -> Self: 

5834 ... 

5835 

5836 @overload 

5837 def sort_values( 

5838 self, 

5839 *, 

5840 return_indexer: Literal[True], 

5841 ascending: bool = ..., 

5842 na_position: NaPosition = ..., 

5843 key: Callable | None = ..., 

5844 ) -> tuple[Self, np.ndarray]: 

5845 ... 

5846 

5847 @overload 

5848 def sort_values( 

5849 self, 

5850 *, 

5851 return_indexer: bool = ..., 

5852 ascending: bool = ..., 

5853 na_position: NaPosition = ..., 

5854 key: Callable | None = ..., 

5855 ) -> Self | tuple[Self, np.ndarray]: 

5856 ... 

5857 

5858 @deprecate_nonkeyword_arguments( 

5859 version="3.0", allowed_args=["self"], name="sort_values" 

5860 ) 

5861 def sort_values( 

5862 self, 

5863 return_indexer: bool = False, 

5864 ascending: bool = True, 

5865 na_position: NaPosition = "last", 

5866 key: Callable | None = None, 

5867 ) -> Self | tuple[Self, np.ndarray]: 

5868 """ 

5869 Return a sorted copy of the index. 

5870 

5871 Return a sorted copy of the index, and optionally return the indices 

5872 that sorted the index itself. 

5873 

5874 Parameters 

5875 ---------- 

5876 return_indexer : bool, default False 

5877 Should the indices that would sort the index be returned. 

5878 ascending : bool, default True 

5879 Should the index values be sorted in an ascending order. 

5880 na_position : {'first' or 'last'}, default 'last' 

5881 Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at 

5882 the end. 

5883 key : callable, optional 

5884 If not None, apply the key function to the index values 

5885 before sorting. This is similar to the `key` argument in the 

5886 builtin :meth:`sorted` function, with the notable difference that 

5887 this `key` function should be *vectorized*. It should expect an 

5888 ``Index`` and return an ``Index`` of the same shape. 

5889 

5890 Returns 

5891 ------- 

5892 sorted_index : pandas.Index 

5893 Sorted copy of the index. 

5894 indexer : numpy.ndarray, optional 

5895 The indices that the index itself was sorted by. 

5896 

5897 See Also 

5898 -------- 

5899 Series.sort_values : Sort values of a Series. 

5900 DataFrame.sort_values : Sort values in a DataFrame. 

5901 

5902 Examples 

5903 -------- 

5904 >>> idx = pd.Index([10, 100, 1, 1000]) 

5905 >>> idx 

5906 Index([10, 100, 1, 1000], dtype='int64') 

5907 

5908 Sort values in ascending order (default behavior). 

5909 

5910 >>> idx.sort_values() 

5911 Index([1, 10, 100, 1000], dtype='int64') 

5912 

5913 Sort values in descending order, and also get the indices `idx` was 

5914 sorted by. 

5915 

5916 >>> idx.sort_values(ascending=False, return_indexer=True) 

5917 (Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2])) 

5918 """ 

5919 if key is None and ( 

5920 (ascending and self.is_monotonic_increasing) 

5921 or (not ascending and self.is_monotonic_decreasing) 

5922 ): 

5923 if return_indexer: 

5924 indexer = np.arange(len(self), dtype=np.intp) 

5925 return self.copy(), indexer 

5926 else: 

5927 return self.copy() 

5928 

5929 # GH 35584. Sort missing values according to na_position kwarg 

5930 # ignore na_position for MultiIndex 

5931 if not isinstance(self, ABCMultiIndex): 

5932 _as = nargsort( 

5933 items=self, ascending=ascending, na_position=na_position, key=key 

5934 ) 

5935 else: 

5936 idx = cast(Index, ensure_key_mapped(self, key)) 

5937 _as = idx.argsort(na_position=na_position) 

5938 if not ascending: 

5939 _as = _as[::-1] 

5940 

5941 sorted_index = self.take(_as) 

5942 

5943 if return_indexer: 

5944 return sorted_index, _as 

5945 else: 

5946 return sorted_index 

5947 

5948 @final 

5949 def sort(self, *args, **kwargs): 

5950 """ 

5951 Use sort_values instead. 

5952 """ 

5953 raise TypeError("cannot sort an Index object in-place, use sort_values instead") 

5954 

5955 def shift(self, periods: int = 1, freq=None): 

5956 """ 

5957 Shift index by desired number of time frequency increments. 

5958 

5959 This method is for shifting the values of datetime-like indexes 

5960 by a specified time increment a given number of times. 

5961 

5962 Parameters 

5963 ---------- 

5964 periods : int, default 1 

5965 Number of periods (or increments) to shift by, 

5966 can be positive or negative. 

5967 freq : pandas.DateOffset, pandas.Timedelta or str, optional 

5968 Frequency increment to shift by. 

5969 If None, the index is shifted by its own `freq` attribute. 

5970 Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. 

5971 

5972 Returns 

5973 ------- 

5974 pandas.Index 

5975 Shifted index. 

5976 

5977 See Also 

5978 -------- 

5979 Series.shift : Shift values of Series. 

5980 

5981 Notes 

5982 ----- 

5983 This method is only implemented for datetime-like index classes, 

5984 i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex. 

5985 

5986 Examples 

5987 -------- 

5988 Put the first 5 month starts of 2011 into an index. 

5989 

5990 >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS') 

5991 >>> month_starts 

5992 DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01', 

5993 '2011-05-01'], 

5994 dtype='datetime64[ns]', freq='MS') 

5995 

5996 Shift the index by 10 days. 

5997 

5998 >>> month_starts.shift(10, freq='D') 

5999 DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11', 

6000 '2011-05-11'], 

6001 dtype='datetime64[ns]', freq=None) 

6002 

6003 The default value of `freq` is the `freq` attribute of the index, 

6004 which is 'MS' (month start) in this example. 

6005 

6006 >>> month_starts.shift(10) 

6007 DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01', 

6008 '2012-03-01'], 

6009 dtype='datetime64[ns]', freq='MS') 

6010 """ 

6011 raise NotImplementedError( 

6012 f"This method is only implemented for DatetimeIndex, PeriodIndex and " 

6013 f"TimedeltaIndex; Got type {type(self).__name__}" 

6014 ) 

6015 

6016 def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: 

6017 """ 

6018 Return the integer indices that would sort the index. 

6019 

6020 Parameters 

6021 ---------- 

6022 *args 

6023 Passed to `numpy.ndarray.argsort`. 

6024 **kwargs 

6025 Passed to `numpy.ndarray.argsort`. 

6026 

6027 Returns 

6028 ------- 

6029 np.ndarray[np.intp] 

6030 Integer indices that would sort the index if used as 

6031 an indexer. 

6032 

6033 See Also 

6034 -------- 

6035 numpy.argsort : Similar method for NumPy arrays. 

6036 Index.sort_values : Return sorted copy of Index. 

6037 

6038 Examples 

6039 -------- 

6040 >>> idx = pd.Index(['b', 'a', 'd', 'c']) 

6041 >>> idx 

6042 Index(['b', 'a', 'd', 'c'], dtype='object') 

6043 

6044 >>> order = idx.argsort() 

6045 >>> order 

6046 array([1, 0, 3, 2]) 

6047 

6048 >>> idx[order] 

6049 Index(['a', 'b', 'c', 'd'], dtype='object') 

6050 """ 

6051 # This works for either ndarray or EA, is overridden 

6052 # by RangeIndex, MultIIndex 

6053 return self._data.argsort(*args, **kwargs) 

6054 

6055 def _check_indexing_error(self, key): 

6056 if not is_scalar(key): 

6057 # if key is not a scalar, directly raise an error (the code below 

6058 # would convert to numpy arrays and raise later any way) - GH29926 

6059 raise InvalidIndexError(key) 

6060 

6061 @cache_readonly 

6062 def _should_fallback_to_positional(self) -> bool: 

6063 """ 

6064 Should an integer key be treated as positional? 

6065 """ 

6066 return self.inferred_type not in { 

6067 "integer", 

6068 "mixed-integer", 

6069 "floating", 

6070 "complex", 

6071 } 

6072 

6073 _index_shared_docs[ 

6074 "get_indexer_non_unique" 

6075 ] = """ 

6076 Compute indexer and mask for new index given the current index. 

6077 

6078 The indexer should be then used as an input to ndarray.take to align the 

6079 current data to the new index. 

6080 

6081 Parameters 

6082 ---------- 

6083 target : %(target_klass)s 

6084 

6085 Returns 

6086 ------- 

6087 indexer : np.ndarray[np.intp] 

6088 Integers from 0 to n - 1 indicating that the index at these 

6089 positions matches the corresponding target values. Missing values 

6090 in the target are marked by -1. 

6091 missing : np.ndarray[np.intp] 

6092 An indexer into the target of the values not found. 

6093 These correspond to the -1 in the indexer array. 

6094 

6095 Examples 

6096 -------- 

6097 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b']) 

6098 >>> index.get_indexer_non_unique(['b', 'b']) 

6099 (array([1, 3, 4, 1, 3, 4]), array([], dtype=int64)) 

6100 

6101 In the example below there are no matched values. 

6102 

6103 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b']) 

6104 >>> index.get_indexer_non_unique(['q', 'r', 't']) 

6105 (array([-1, -1, -1]), array([0, 1, 2])) 

6106 

6107 For this reason, the returned ``indexer`` contains only integers equal to -1. 

6108 It demonstrates that there's no match between the index and the ``target`` 

6109 values at these positions. The mask [0, 1, 2] in the return value shows that 

6110 the first, second, and third elements are missing. 

6111 

6112 Notice that the return value is a tuple contains two items. In the example 

6113 below the first item is an array of locations in ``index``. The second 

6114 item is a mask shows that the first and third elements are missing. 

6115 

6116 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b']) 

6117 >>> index.get_indexer_non_unique(['f', 'b', 's']) 

6118 (array([-1, 1, 3, 4, -1]), array([0, 2])) 

6119 """ 

6120 

6121 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) 

6122 def get_indexer_non_unique( 

6123 self, target 

6124 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

6125 target = ensure_index(target) 

6126 target = self._maybe_cast_listlike_indexer(target) 

6127 

6128 if not self._should_compare(target) and not self._should_partial_index(target): 

6129 # _should_partial_index e.g. IntervalIndex with numeric scalars 

6130 # that can be matched to Interval scalars. 

6131 return self._get_indexer_non_comparable(target, method=None, unique=False) 

6132 

6133 pself, ptarget = self._maybe_downcast_for_indexing(target) 

6134 if pself is not self or ptarget is not target: 

6135 return pself.get_indexer_non_unique(ptarget) 

6136 

6137 if self.dtype != target.dtype: 

6138 # TODO: if object, could use infer_dtype to preempt costly 

6139 # conversion if still non-comparable? 

6140 dtype = self._find_common_type_compat(target) 

6141 

6142 this = self.astype(dtype, copy=False) 

6143 that = target.astype(dtype, copy=False) 

6144 return this.get_indexer_non_unique(that) 

6145 

6146 # TODO: get_indexer has fastpaths for both Categorical-self and 

6147 # Categorical-target. Can we do something similar here? 

6148 

6149 # Note: _maybe_downcast_for_indexing ensures we never get here 

6150 # with MultiIndex self and non-Multi target 

6151 if self._is_multi and target._is_multi: 

6152 engine = self._engine 

6153 # Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]" has 

6154 # no attribute "_extract_level_codes" 

6155 tgt_values = engine._extract_level_codes(target) # type: ignore[union-attr] 

6156 else: 

6157 tgt_values = target._get_engine_target() 

6158 

6159 indexer, missing = self._engine.get_indexer_non_unique(tgt_values) 

6160 return ensure_platform_int(indexer), ensure_platform_int(missing) 

6161 

6162 @final 

6163 def get_indexer_for(self, target) -> npt.NDArray[np.intp]: 

6164 """ 

6165 Guaranteed return of an indexer even when non-unique. 

6166 

6167 This dispatches to get_indexer or get_indexer_non_unique 

6168 as appropriate. 

6169 

6170 Returns 

6171 ------- 

6172 np.ndarray[np.intp] 

6173 List of indices. 

6174 

6175 Examples 

6176 -------- 

6177 >>> idx = pd.Index([np.nan, 'var1', np.nan]) 

6178 >>> idx.get_indexer_for([np.nan]) 

6179 array([0, 2]) 

6180 """ 

6181 if self._index_as_unique: 

6182 return self.get_indexer(target) 

6183 indexer, _ = self.get_indexer_non_unique(target) 

6184 return indexer 

6185 

6186 def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]: 

6187 """ 

6188 Analogue to get_indexer that raises if any elements are missing. 

6189 """ 

6190 keyarr = key 

6191 if not isinstance(keyarr, Index): 

6192 keyarr = com.asarray_tuplesafe(keyarr) 

6193 

6194 if self._index_as_unique: 

6195 indexer = self.get_indexer_for(keyarr) 

6196 keyarr = self.reindex(keyarr)[0] 

6197 else: 

6198 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr) 

6199 

6200 self._raise_if_missing(keyarr, indexer, axis_name) 

6201 

6202 keyarr = self.take(indexer) 

6203 if isinstance(key, Index): 

6204 # GH 42790 - Preserve name from an Index 

6205 keyarr.name = key.name 

6206 if lib.is_np_dtype(keyarr.dtype, "mM") or isinstance( 

6207 keyarr.dtype, DatetimeTZDtype 

6208 ): 

6209 # DTI/TDI.take can infer a freq in some cases when we dont want one 

6210 if isinstance(key, list) or ( 

6211 isinstance(key, type(self)) 

6212 # "Index" has no attribute "freq" 

6213 and key.freq is None # type: ignore[attr-defined] 

6214 ): 

6215 keyarr = keyarr._with_freq(None) 

6216 

6217 return keyarr, indexer 

6218 

6219 def _raise_if_missing(self, key, indexer, axis_name: str_t) -> None: 

6220 """ 

6221 Check that indexer can be used to return a result. 

6222 

6223 e.g. at least one element was found, 

6224 unless the list of keys was actually empty. 

6225 

6226 Parameters 

6227 ---------- 

6228 key : list-like 

6229 Targeted labels (only used to show correct error message). 

6230 indexer: array-like of booleans 

6231 Indices corresponding to the key, 

6232 (with -1 indicating not found). 

6233 axis_name : str 

6234 

6235 Raises 

6236 ------ 

6237 KeyError 

6238 If at least one key was requested but none was found. 

6239 """ 

6240 if len(key) == 0: 

6241 return 

6242 

6243 # Count missing values 

6244 missing_mask = indexer < 0 

6245 nmissing = missing_mask.sum() 

6246 

6247 if nmissing: 

6248 if nmissing == len(indexer): 

6249 raise KeyError(f"None of [{key}] are in the [{axis_name}]") 

6250 

6251 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique()) 

6252 raise KeyError(f"{not_found} not in index") 

6253 

6254 @overload 

6255 def _get_indexer_non_comparable( 

6256 self, target: Index, method, unique: Literal[True] = ... 

6257 ) -> npt.NDArray[np.intp]: 

6258 ... 

6259 

6260 @overload 

6261 def _get_indexer_non_comparable( 

6262 self, target: Index, method, unique: Literal[False] 

6263 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

6264 ... 

6265 

6266 @overload 

6267 def _get_indexer_non_comparable( 

6268 self, target: Index, method, unique: bool = True 

6269 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

6270 ... 

6271 

6272 @final 

6273 def _get_indexer_non_comparable( 

6274 self, target: Index, method, unique: bool = True 

6275 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

6276 """ 

6277 Called from get_indexer or get_indexer_non_unique when the target 

6278 is of a non-comparable dtype. 

6279 

6280 For get_indexer lookups with method=None, get_indexer is an _equality_ 

6281 check, so non-comparable dtypes mean we will always have no matches. 

6282 

6283 For get_indexer lookups with a method, get_indexer is an _inequality_ 

6284 check, so non-comparable dtypes mean we will always raise TypeError. 

6285 

6286 Parameters 

6287 ---------- 

6288 target : Index 

6289 method : str or None 

6290 unique : bool, default True 

6291 * True if called from get_indexer. 

6292 * False if called from get_indexer_non_unique. 

6293 

6294 Raises 

6295 ------ 

6296 TypeError 

6297 If doing an inequality check, i.e. method is not None. 

6298 """ 

6299 if method is not None: 

6300 other_dtype = _unpack_nested_dtype(target) 

6301 raise TypeError(f"Cannot compare dtypes {self.dtype} and {other_dtype}") 

6302 

6303 no_matches = -1 * np.ones(target.shape, dtype=np.intp) 

6304 if unique: 

6305 # This is for get_indexer 

6306 return no_matches 

6307 else: 

6308 # This is for get_indexer_non_unique 

6309 missing = np.arange(len(target), dtype=np.intp) 

6310 return no_matches, missing 

6311 

6312 @property 

6313 def _index_as_unique(self) -> bool: 

6314 """ 

6315 Whether we should treat this as unique for the sake of 

6316 get_indexer vs get_indexer_non_unique. 

6317 

6318 For IntervalIndex compat. 

6319 """ 

6320 return self.is_unique 

6321 

6322 _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects" 

6323 

6324 @final 

6325 def _maybe_downcast_for_indexing(self, other: Index) -> tuple[Index, Index]: 

6326 """ 

6327 When dealing with an object-dtype Index and a non-object Index, see 

6328 if we can upcast the object-dtype one to improve performance. 

6329 """ 

6330 

6331 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex): 

6332 if ( 

6333 self.tz is not None 

6334 and other.tz is not None 

6335 and not tz_compare(self.tz, other.tz) 

6336 ): 

6337 # standardize on UTC 

6338 return self.tz_convert("UTC"), other.tz_convert("UTC") 

6339 

6340 elif self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex): 

6341 try: 

6342 return type(other)(self), other 

6343 except OutOfBoundsDatetime: 

6344 return self, other 

6345 elif self.inferred_type == "timedelta" and isinstance(other, ABCTimedeltaIndex): 

6346 # TODO: we dont have tests that get here 

6347 return type(other)(self), other 

6348 

6349 elif self.dtype.kind == "u" and other.dtype.kind == "i": 

6350 # GH#41873 

6351 if other.min() >= 0: 

6352 # lookup min as it may be cached 

6353 # TODO: may need itemsize check if we have non-64-bit Indexes 

6354 return self, other.astype(self.dtype) 

6355 

6356 elif self._is_multi and not other._is_multi: 

6357 try: 

6358 # "Type[Index]" has no attribute "from_tuples" 

6359 other = type(self).from_tuples(other) # type: ignore[attr-defined] 

6360 except (TypeError, ValueError): 

6361 # let's instead try with a straight Index 

6362 self = Index(self._values) 

6363 

6364 if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype): 

6365 # Reverse op so we dont need to re-implement on the subclasses 

6366 other, self = other._maybe_downcast_for_indexing(self) 

6367 

6368 return self, other 

6369 

6370 @final 

6371 def _find_common_type_compat(self, target) -> DtypeObj: 

6372 """ 

6373 Implementation of find_common_type that adjusts for Index-specific 

6374 special cases. 

6375 """ 

6376 target_dtype, _ = infer_dtype_from(target) 

6377 

6378 # special case: if one dtype is uint64 and the other a signed int, return object 

6379 # See https://github.com/pandas-dev/pandas/issues/26778 for discussion 

6380 # Now it's: 

6381 # * float | [u]int -> float 

6382 # * uint64 | signed int -> object 

6383 # We may change union(float | [u]int) to go to object. 

6384 if self.dtype == "uint64" or target_dtype == "uint64": 

6385 if is_signed_integer_dtype(self.dtype) or is_signed_integer_dtype( 

6386 target_dtype 

6387 ): 

6388 return _dtype_obj 

6389 

6390 dtype = find_result_type(self.dtype, target) 

6391 dtype = common_dtype_categorical_compat([self, target], dtype) 

6392 return dtype 

6393 

6394 @final 

6395 def _should_compare(self, other: Index) -> bool: 

6396 """ 

6397 Check if `self == other` can ever have non-False entries. 

6398 """ 

6399 

6400 # NB: we use inferred_type rather than is_bool_dtype to catch 

6401 # object_dtype_of_bool and categorical[object_dtype_of_bool] cases 

6402 if ( 

6403 other.inferred_type == "boolean" and is_any_real_numeric_dtype(self.dtype) 

6404 ) or ( 

6405 self.inferred_type == "boolean" and is_any_real_numeric_dtype(other.dtype) 

6406 ): 

6407 # GH#16877 Treat boolean labels passed to a numeric index as not 

6408 # found. Without this fix False and True would be treated as 0 and 1 

6409 # respectively. 

6410 return False 

6411 

6412 dtype = _unpack_nested_dtype(other) 

6413 return self._is_comparable_dtype(dtype) or is_object_dtype(dtype) 

6414 

6415 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: 

6416 """ 

6417 Can we compare values of the given dtype to our own? 

6418 """ 

6419 if self.dtype.kind == "b": 

6420 return dtype.kind == "b" 

6421 elif is_numeric_dtype(self.dtype): 

6422 return is_numeric_dtype(dtype) 

6423 # TODO: this was written assuming we only get here with object-dtype, 

6424 # which is no longer correct. Can we specialize for EA? 

6425 return True 

6426 

6427 @final 

6428 def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]: 

6429 """ 

6430 Group the index labels by a given array of values. 

6431 

6432 Parameters 

6433 ---------- 

6434 values : array 

6435 Values used to determine the groups. 

6436 

6437 Returns 

6438 ------- 

6439 dict 

6440 {group name -> group labels} 

6441 """ 

6442 # TODO: if we are a MultiIndex, we can do better 

6443 # that converting to tuples 

6444 if isinstance(values, ABCMultiIndex): 

6445 values = values._values 

6446 values = Categorical(values) 

6447 result = values._reverse_indexer() 

6448 

6449 # map to the label 

6450 result = {k: self.take(v) for k, v in result.items()} 

6451 

6452 return PrettyDict(result) 

6453 

6454 def map(self, mapper, na_action: Literal["ignore"] | None = None): 

6455 """ 

6456 Map values using an input mapping or function. 

6457 

6458 Parameters 

6459 ---------- 

6460 mapper : function, dict, or Series 

6461 Mapping correspondence. 

6462 na_action : {None, 'ignore'} 

6463 If 'ignore', propagate NA values, without passing them to the 

6464 mapping correspondence. 

6465 

6466 Returns 

6467 ------- 

6468 Union[Index, MultiIndex] 

6469 The output of the mapping function applied to the index. 

6470 If the function returns a tuple with more than one element 

6471 a MultiIndex will be returned. 

6472 

6473 Examples 

6474 -------- 

6475 >>> idx = pd.Index([1, 2, 3]) 

6476 >>> idx.map({1: 'a', 2: 'b', 3: 'c'}) 

6477 Index(['a', 'b', 'c'], dtype='object') 

6478 

6479 Using `map` with a function: 

6480 

6481 >>> idx = pd.Index([1, 2, 3]) 

6482 >>> idx.map('I am a {}'.format) 

6483 Index(['I am a 1', 'I am a 2', 'I am a 3'], dtype='object') 

6484 

6485 >>> idx = pd.Index(['a', 'b', 'c']) 

6486 >>> idx.map(lambda x: x.upper()) 

6487 Index(['A', 'B', 'C'], dtype='object') 

6488 """ 

6489 from pandas.core.indexes.multi import MultiIndex 

6490 

6491 new_values = self._map_values(mapper, na_action=na_action) 

6492 

6493 # we can return a MultiIndex 

6494 if new_values.size and isinstance(new_values[0], tuple): 

6495 if isinstance(self, MultiIndex): 

6496 names = self.names 

6497 elif self.name: 

6498 names = [self.name] * len(new_values[0]) 

6499 else: 

6500 names = None 

6501 return MultiIndex.from_tuples(new_values, names=names) 

6502 

6503 dtype = None 

6504 if not new_values.size: 

6505 # empty 

6506 dtype = self.dtype 

6507 

6508 # e.g. if we are floating and new_values is all ints, then we 

6509 # don't want to cast back to floating. But if we are UInt64 

6510 # and new_values is all ints, we want to try. 

6511 same_dtype = lib.infer_dtype(new_values, skipna=False) == self.inferred_type 

6512 if same_dtype: 

6513 new_values = maybe_cast_pointwise_result( 

6514 new_values, self.dtype, same_dtype=same_dtype 

6515 ) 

6516 

6517 return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name) 

6518 

6519 # TODO: De-duplicate with map, xref GH#32349 

6520 @final 

6521 def _transform_index(self, func, *, level=None) -> Index: 

6522 """ 

6523 Apply function to all values found in index. 

6524 

6525 This includes transforming multiindex entries separately. 

6526 Only apply function to one level of the MultiIndex if level is specified. 

6527 """ 

6528 if isinstance(self, ABCMultiIndex): 

6529 values = [ 

6530 self.get_level_values(i).map(func) 

6531 if i == level or level is None 

6532 else self.get_level_values(i) 

6533 for i in range(self.nlevels) 

6534 ] 

6535 return type(self).from_arrays(values) 

6536 else: 

6537 items = [func(x) for x in self] 

6538 return Index(items, name=self.name, tupleize_cols=False) 

6539 

6540 def isin(self, values, level=None) -> npt.NDArray[np.bool_]: 

6541 """ 

6542 Return a boolean array where the index values are in `values`. 

6543 

6544 Compute boolean array of whether each index value is found in the 

6545 passed set of values. The length of the returned boolean array matches 

6546 the length of the index. 

6547 

6548 Parameters 

6549 ---------- 

6550 values : set or list-like 

6551 Sought values. 

6552 level : str or int, optional 

6553 Name or position of the index level to use (if the index is a 

6554 `MultiIndex`). 

6555 

6556 Returns 

6557 ------- 

6558 np.ndarray[bool] 

6559 NumPy array of boolean values. 

6560 

6561 See Also 

6562 -------- 

6563 Series.isin : Same for Series. 

6564 DataFrame.isin : Same method for DataFrames. 

6565 

6566 Notes 

6567 ----- 

6568 In the case of `MultiIndex` you must either specify `values` as a 

6569 list-like object containing tuples that are the same length as the 

6570 number of levels, or specify `level`. Otherwise it will raise a 

6571 ``ValueError``. 

6572 

6573 If `level` is specified: 

6574 

6575 - if it is the name of one *and only one* index level, use that level; 

6576 - otherwise it should be a number indicating level position. 

6577 

6578 Examples 

6579 -------- 

6580 >>> idx = pd.Index([1,2,3]) 

6581 >>> idx 

6582 Index([1, 2, 3], dtype='int64') 

6583 

6584 Check whether each index value in a list of values. 

6585 

6586 >>> idx.isin([1, 4]) 

6587 array([ True, False, False]) 

6588 

6589 >>> midx = pd.MultiIndex.from_arrays([[1,2,3], 

6590 ... ['red', 'blue', 'green']], 

6591 ... names=('number', 'color')) 

6592 >>> midx 

6593 MultiIndex([(1, 'red'), 

6594 (2, 'blue'), 

6595 (3, 'green')], 

6596 names=['number', 'color']) 

6597 

6598 Check whether the strings in the 'color' level of the MultiIndex 

6599 are in a list of colors. 

6600 

6601 >>> midx.isin(['red', 'orange', 'yellow'], level='color') 

6602 array([ True, False, False]) 

6603 

6604 To check across the levels of a MultiIndex, pass a list of tuples: 

6605 

6606 >>> midx.isin([(1, 'red'), (3, 'red')]) 

6607 array([ True, False, False]) 

6608 """ 

6609 if level is not None: 

6610 self._validate_index_level(level) 

6611 return algos.isin(self._values, values) 

6612 

6613 def _get_string_slice(self, key: str_t): 

6614 # this is for partial string indexing, 

6615 # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex 

6616 raise NotImplementedError 

6617 

6618 def slice_indexer( 

6619 self, 

6620 start: Hashable | None = None, 

6621 end: Hashable | None = None, 

6622 step: int | None = None, 

6623 ) -> slice: 

6624 """ 

6625 Compute the slice indexer for input labels and step. 

6626 

6627 Index needs to be ordered and unique. 

6628 

6629 Parameters 

6630 ---------- 

6631 start : label, default None 

6632 If None, defaults to the beginning. 

6633 end : label, default None 

6634 If None, defaults to the end. 

6635 step : int, default None 

6636 

6637 Returns 

6638 ------- 

6639 slice 

6640 

6641 Raises 

6642 ------ 

6643 KeyError : If key does not exist, or key is not unique and index is 

6644 not ordered. 

6645 

6646 Notes 

6647 ----- 

6648 This function assumes that the data is sorted, so use at your own peril 

6649 

6650 Examples 

6651 -------- 

6652 This is a method on all index types. For example you can do: 

6653 

6654 >>> idx = pd.Index(list('abcd')) 

6655 >>> idx.slice_indexer(start='b', end='c') 

6656 slice(1, 3, None) 

6657 

6658 >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')]) 

6659 >>> idx.slice_indexer(start='b', end=('c', 'g')) 

6660 slice(1, 3, None) 

6661 """ 

6662 start_slice, end_slice = self.slice_locs(start, end, step=step) 

6663 

6664 # return a slice 

6665 if not is_scalar(start_slice): 

6666 raise AssertionError("Start slice bound is non-scalar") 

6667 if not is_scalar(end_slice): 

6668 raise AssertionError("End slice bound is non-scalar") 

6669 

6670 return slice(start_slice, end_slice, step) 

6671 

6672 def _maybe_cast_indexer(self, key): 

6673 """ 

6674 If we have a float key and are not a floating index, then try to cast 

6675 to an int if equivalent. 

6676 """ 

6677 return key 

6678 

6679 def _maybe_cast_listlike_indexer(self, target) -> Index: 

6680 """ 

6681 Analogue to maybe_cast_indexer for get_indexer instead of get_loc. 

6682 """ 

6683 return ensure_index(target) 

6684 

6685 @final 

6686 def _validate_indexer( 

6687 self, 

6688 form: Literal["positional", "slice"], 

6689 key, 

6690 kind: Literal["getitem", "iloc"], 

6691 ) -> None: 

6692 """ 

6693 If we are positional indexer, validate that we have appropriate 

6694 typed bounds must be an integer. 

6695 """ 

6696 if not lib.is_int_or_none(key): 

6697 self._raise_invalid_indexer(form, key) 

6698 

6699 def _maybe_cast_slice_bound(self, label, side: str_t): 

6700 """ 

6701 This function should be overloaded in subclasses that allow non-trivial 

6702 casting on label-slice bounds, e.g. datetime-like indices allowing 

6703 strings containing formatted datetimes. 

6704 

6705 Parameters 

6706 ---------- 

6707 label : object 

6708 side : {'left', 'right'} 

6709 

6710 Returns 

6711 ------- 

6712 label : object 

6713 

6714 Notes 

6715 ----- 

6716 Value of `side` parameter should be validated in caller. 

6717 """ 

6718 

6719 # We are a plain index here (sub-class override this method if they 

6720 # wish to have special treatment for floats/ints, e.g. datetimelike Indexes 

6721 

6722 if is_numeric_dtype(self.dtype): 

6723 return self._maybe_cast_indexer(label) 

6724 

6725 # reject them, if index does not contain label 

6726 if (is_float(label) or is_integer(label)) and label not in self: 

6727 self._raise_invalid_indexer("slice", label) 

6728 

6729 return label 

6730 

6731 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"): 

6732 if self.is_monotonic_increasing: 

6733 return self.searchsorted(label, side=side) 

6734 elif self.is_monotonic_decreasing: 

6735 # np.searchsorted expects ascending sort order, have to reverse 

6736 # everything for it to work (element ordering, search side and 

6737 # resulting value). 

6738 pos = self[::-1].searchsorted( 

6739 label, side="right" if side == "left" else "left" 

6740 ) 

6741 return len(self) - pos 

6742 

6743 raise ValueError("index must be monotonic increasing or decreasing") 

6744 

6745 def get_slice_bound(self, label, side: Literal["left", "right"]) -> int: 

6746 """ 

6747 Calculate slice bound that corresponds to given label. 

6748 

6749 Returns leftmost (one-past-the-rightmost if ``side=='right'``) position 

6750 of given label. 

6751 

6752 Parameters 

6753 ---------- 

6754 label : object 

6755 side : {'left', 'right'} 

6756 

6757 Returns 

6758 ------- 

6759 int 

6760 Index of label. 

6761 

6762 See Also 

6763 -------- 

6764 Index.get_loc : Get integer location, slice or boolean mask for requested 

6765 label. 

6766 

6767 Examples 

6768 -------- 

6769 >>> idx = pd.RangeIndex(5) 

6770 >>> idx.get_slice_bound(3, 'left') 

6771 3 

6772 

6773 >>> idx.get_slice_bound(3, 'right') 

6774 4 

6775 

6776 If ``label`` is non-unique in the index, an error will be raised. 

6777 

6778 >>> idx_duplicate = pd.Index(['a', 'b', 'a', 'c', 'd']) 

6779 >>> idx_duplicate.get_slice_bound('a', 'left') 

6780 Traceback (most recent call last): 

6781 KeyError: Cannot get left slice bound for non-unique label: 'a' 

6782 """ 

6783 

6784 if side not in ("left", "right"): 

6785 raise ValueError( 

6786 "Invalid value for side kwarg, must be either " 

6787 f"'left' or 'right': {side}" 

6788 ) 

6789 

6790 original_label = label 

6791 

6792 # For datetime indices label may be a string that has to be converted 

6793 # to datetime boundary according to its resolution. 

6794 label = self._maybe_cast_slice_bound(label, side) 

6795 

6796 # we need to look up the label 

6797 try: 

6798 slc = self.get_loc(label) 

6799 except KeyError as err: 

6800 try: 

6801 return self._searchsorted_monotonic(label, side) 

6802 except ValueError: 

6803 # raise the original KeyError 

6804 raise err 

6805 

6806 if isinstance(slc, np.ndarray): 

6807 # get_loc may return a boolean array, which 

6808 # is OK as long as they are representable by a slice. 

6809 assert is_bool_dtype(slc.dtype) 

6810 slc = lib.maybe_booleans_to_slice(slc.view("u1")) 

6811 if isinstance(slc, np.ndarray): 

6812 raise KeyError( 

6813 f"Cannot get {side} slice bound for non-unique " 

6814 f"label: {repr(original_label)}" 

6815 ) 

6816 

6817 if isinstance(slc, slice): 

6818 if side == "left": 

6819 return slc.start 

6820 else: 

6821 return slc.stop 

6822 else: 

6823 if side == "right": 

6824 return slc + 1 

6825 else: 

6826 return slc 

6827 

6828 def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]: 

6829 """ 

6830 Compute slice locations for input labels. 

6831 

6832 Parameters 

6833 ---------- 

6834 start : label, default None 

6835 If None, defaults to the beginning. 

6836 end : label, default None 

6837 If None, defaults to the end. 

6838 step : int, defaults None 

6839 If None, defaults to 1. 

6840 

6841 Returns 

6842 ------- 

6843 tuple[int, int] 

6844 

6845 See Also 

6846 -------- 

6847 Index.get_loc : Get location for a single label. 

6848 

6849 Notes 

6850 ----- 

6851 This method only works if the index is monotonic or unique. 

6852 

6853 Examples 

6854 -------- 

6855 >>> idx = pd.Index(list('abcd')) 

6856 >>> idx.slice_locs(start='b', end='c') 

6857 (1, 3) 

6858 """ 

6859 inc = step is None or step >= 0 

6860 

6861 if not inc: 

6862 # If it's a reverse slice, temporarily swap bounds. 

6863 start, end = end, start 

6864 

6865 # GH 16785: If start and end happen to be date strings with UTC offsets 

6866 # attempt to parse and check that the offsets are the same 

6867 if isinstance(start, (str, datetime)) and isinstance(end, (str, datetime)): 

6868 try: 

6869 ts_start = Timestamp(start) 

6870 ts_end = Timestamp(end) 

6871 except (ValueError, TypeError): 

6872 pass 

6873 else: 

6874 if not tz_compare(ts_start.tzinfo, ts_end.tzinfo): 

6875 raise ValueError("Both dates must have the same UTC offset") 

6876 

6877 start_slice = None 

6878 if start is not None: 

6879 start_slice = self.get_slice_bound(start, "left") 

6880 if start_slice is None: 

6881 start_slice = 0 

6882 

6883 end_slice = None 

6884 if end is not None: 

6885 end_slice = self.get_slice_bound(end, "right") 

6886 if end_slice is None: 

6887 end_slice = len(self) 

6888 

6889 if not inc: 

6890 # Bounds at this moment are swapped, swap them back and shift by 1. 

6891 # 

6892 # slice_locs('B', 'A', step=-1): s='B', e='A' 

6893 # 

6894 # s='A' e='B' 

6895 # AFTER SWAP: | | 

6896 # v ------------------> V 

6897 # ----------------------------------- 

6898 # | | |A|A|A|A| | | | | |B|B| | | | | 

6899 # ----------------------------------- 

6900 # ^ <------------------ ^ 

6901 # SHOULD BE: | | 

6902 # end=s-1 start=e-1 

6903 # 

6904 end_slice, start_slice = start_slice - 1, end_slice - 1 

6905 

6906 # i == -1 triggers ``len(self) + i`` selection that points to the 

6907 # last element, not before-the-first one, subtracting len(self) 

6908 # compensates that. 

6909 if end_slice == -1: 

6910 end_slice -= len(self) 

6911 if start_slice == -1: 

6912 start_slice -= len(self) 

6913 

6914 return start_slice, end_slice 

6915 

6916 def delete(self, loc) -> Self: 

6917 """ 

6918 Make new Index with passed location(-s) deleted. 

6919 

6920 Parameters 

6921 ---------- 

6922 loc : int or list of int 

6923 Location of item(-s) which will be deleted. 

6924 Use a list of locations to delete more than one value at the same time. 

6925 

6926 Returns 

6927 ------- 

6928 Index 

6929 Will be same type as self, except for RangeIndex. 

6930 

6931 See Also 

6932 -------- 

6933 numpy.delete : Delete any rows and column from NumPy array (ndarray). 

6934 

6935 Examples 

6936 -------- 

6937 >>> idx = pd.Index(['a', 'b', 'c']) 

6938 >>> idx.delete(1) 

6939 Index(['a', 'c'], dtype='object') 

6940 

6941 >>> idx = pd.Index(['a', 'b', 'c']) 

6942 >>> idx.delete([0, 2]) 

6943 Index(['b'], dtype='object') 

6944 """ 

6945 values = self._values 

6946 res_values: ArrayLike 

6947 if isinstance(values, np.ndarray): 

6948 # TODO(__array_function__): special casing will be unnecessary 

6949 res_values = np.delete(values, loc) 

6950 else: 

6951 res_values = values.delete(loc) 

6952 

6953 # _constructor so RangeIndex-> Index with an int64 dtype 

6954 return self._constructor._simple_new(res_values, name=self.name) 

6955 

6956 def insert(self, loc: int, item) -> Index: 

6957 """ 

6958 Make new Index inserting new item at location. 

6959 

6960 Follows Python numpy.insert semantics for negative values. 

6961 

6962 Parameters 

6963 ---------- 

6964 loc : int 

6965 item : object 

6966 

6967 Returns 

6968 ------- 

6969 Index 

6970 

6971 Examples 

6972 -------- 

6973 >>> idx = pd.Index(['a', 'b', 'c']) 

6974 >>> idx.insert(1, 'x') 

6975 Index(['a', 'x', 'b', 'c'], dtype='object') 

6976 """ 

6977 item = lib.item_from_zerodim(item) 

6978 if is_valid_na_for_dtype(item, self.dtype) and self.dtype != object: 

6979 item = self._na_value 

6980 

6981 arr = self._values 

6982 

6983 try: 

6984 if isinstance(arr, ExtensionArray): 

6985 res_values = arr.insert(loc, item) 

6986 return type(self)._simple_new(res_values, name=self.name) 

6987 else: 

6988 item = self._validate_fill_value(item) 

6989 except (TypeError, ValueError, LossySetitemError): 

6990 # e.g. trying to insert an integer into a DatetimeIndex 

6991 # We cannot keep the same dtype, so cast to the (often object) 

6992 # minimal shared dtype before doing the insert. 

6993 dtype = self._find_common_type_compat(item) 

6994 return self.astype(dtype).insert(loc, item) 

6995 

6996 if arr.dtype != object or not isinstance( 

6997 item, (tuple, np.datetime64, np.timedelta64) 

6998 ): 

6999 # with object-dtype we need to worry about numpy incorrectly casting 

7000 # dt64/td64 to integer, also about treating tuples as sequences 

7001 # special-casing dt64/td64 https://github.com/numpy/numpy/issues/12550 

7002 casted = arr.dtype.type(item) 

7003 new_values = np.insert(arr, loc, casted) 

7004 

7005 else: 

7006 # error: No overload variant of "insert" matches argument types 

7007 # "ndarray[Any, Any]", "int", "None" 

7008 new_values = np.insert(arr, loc, None) # type: ignore[call-overload] 

7009 loc = loc if loc >= 0 else loc - 1 

7010 new_values[loc] = item 

7011 

7012 out = Index._with_infer(new_values, name=self.name) 

7013 if ( 

7014 using_pyarrow_string_dtype() 

7015 and is_string_dtype(out.dtype) 

7016 and new_values.dtype == object 

7017 ): 

7018 out = out.astype(new_values.dtype) 

7019 if self.dtype == object and out.dtype != object: 

7020 # GH#51363 

7021 warnings.warn( 

7022 "The behavior of Index.insert with object-dtype is deprecated, " 

7023 "in a future version this will return an object-dtype Index " 

7024 "instead of inferring a non-object dtype. To retain the old " 

7025 "behavior, do `idx.insert(loc, item).infer_objects(copy=False)`", 

7026 FutureWarning, 

7027 stacklevel=find_stack_level(), 

7028 ) 

7029 return out 

7030 

7031 def drop( 

7032 self, 

7033 labels: Index | np.ndarray | Iterable[Hashable], 

7034 errors: IgnoreRaise = "raise", 

7035 ) -> Index: 

7036 """ 

7037 Make new Index with passed list of labels deleted. 

7038 

7039 Parameters 

7040 ---------- 

7041 labels : array-like or scalar 

7042 errors : {'ignore', 'raise'}, default 'raise' 

7043 If 'ignore', suppress error and existing labels are dropped. 

7044 

7045 Returns 

7046 ------- 

7047 Index 

7048 Will be same type as self, except for RangeIndex. 

7049 

7050 Raises 

7051 ------ 

7052 KeyError 

7053 If not all of the labels are found in the selected axis 

7054 

7055 Examples 

7056 -------- 

7057 >>> idx = pd.Index(['a', 'b', 'c']) 

7058 >>> idx.drop(['a']) 

7059 Index(['b', 'c'], dtype='object') 

7060 """ 

7061 if not isinstance(labels, Index): 

7062 # avoid materializing e.g. RangeIndex 

7063 arr_dtype = "object" if self.dtype == "object" else None 

7064 labels = com.index_labels_to_array(labels, dtype=arr_dtype) 

7065 

7066 indexer = self.get_indexer_for(labels) 

7067 mask = indexer == -1 

7068 if mask.any(): 

7069 if errors != "ignore": 

7070 raise KeyError(f"{labels[mask].tolist()} not found in axis") 

7071 indexer = indexer[~mask] 

7072 return self.delete(indexer) 

7073 

7074 @final 

7075 def infer_objects(self, copy: bool = True) -> Index: 

7076 """ 

7077 If we have an object dtype, try to infer a non-object dtype. 

7078 

7079 Parameters 

7080 ---------- 

7081 copy : bool, default True 

7082 Whether to make a copy in cases where no inference occurs. 

7083 """ 

7084 if self._is_multi: 

7085 raise NotImplementedError( 

7086 "infer_objects is not implemented for MultiIndex. " 

7087 "Use index.to_frame().infer_objects() instead." 

7088 ) 

7089 if self.dtype != object: 

7090 return self.copy() if copy else self 

7091 

7092 values = self._values 

7093 values = cast("npt.NDArray[np.object_]", values) 

7094 res_values = lib.maybe_convert_objects( 

7095 values, 

7096 convert_non_numeric=True, 

7097 ) 

7098 if copy and res_values is values: 

7099 return self.copy() 

7100 result = Index(res_values, name=self.name) 

7101 if not copy and res_values is values and self._references is not None: 

7102 result._references = self._references 

7103 result._references.add_index_reference(result) 

7104 return result 

7105 

7106 @final 

7107 def diff(self, periods: int = 1) -> Index: 

7108 """ 

7109 Computes the difference between consecutive values in the Index object. 

7110 

7111 If periods is greater than 1, computes the difference between values that 

7112 are `periods` number of positions apart. 

7113 

7114 Parameters 

7115 ---------- 

7116 periods : int, optional 

7117 The number of positions between the current and previous 

7118 value to compute the difference with. Default is 1. 

7119 

7120 Returns 

7121 ------- 

7122 Index 

7123 A new Index object with the computed differences. 

7124 

7125 Examples 

7126 -------- 

7127 >>> import pandas as pd 

7128 >>> idx = pd.Index([10, 20, 30, 40, 50]) 

7129 >>> idx.diff() 

7130 Index([nan, 10.0, 10.0, 10.0, 10.0], dtype='float64') 

7131 

7132 """ 

7133 return Index(self.to_series().diff(periods)) 

7134 

7135 @final 

7136 def round(self, decimals: int = 0) -> Self: 

7137 """ 

7138 Round each value in the Index to the given number of decimals. 

7139 

7140 Parameters 

7141 ---------- 

7142 decimals : int, optional 

7143 Number of decimal places to round to. If decimals is negative, 

7144 it specifies the number of positions to the left of the decimal point. 

7145 

7146 Returns 

7147 ------- 

7148 Index 

7149 A new Index with the rounded values. 

7150 

7151 Examples 

7152 -------- 

7153 >>> import pandas as pd 

7154 >>> idx = pd.Index([10.1234, 20.5678, 30.9123, 40.4567, 50.7890]) 

7155 >>> idx.round(decimals=2) 

7156 Index([10.12, 20.57, 30.91, 40.46, 50.79], dtype='float64') 

7157 

7158 """ 

7159 return self._constructor(self.to_series().round(decimals)) 

7160 

7161 # -------------------------------------------------------------------- 

7162 # Generated Arithmetic, Comparison, and Unary Methods 

7163 

7164 def _cmp_method(self, other, op): 

7165 """ 

7166 Wrapper used to dispatch comparison operations. 

7167 """ 

7168 if self.is_(other): 

7169 # fastpath 

7170 if op in {operator.eq, operator.le, operator.ge}: 

7171 arr = np.ones(len(self), dtype=bool) 

7172 if self._can_hold_na and not isinstance(self, ABCMultiIndex): 

7173 # TODO: should set MultiIndex._can_hold_na = False? 

7174 arr[self.isna()] = False 

7175 return arr 

7176 elif op is operator.ne: 

7177 arr = np.zeros(len(self), dtype=bool) 

7178 if self._can_hold_na and not isinstance(self, ABCMultiIndex): 

7179 arr[self.isna()] = True 

7180 return arr 

7181 

7182 if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and len( 

7183 self 

7184 ) != len(other): 

7185 raise ValueError("Lengths must match to compare") 

7186 

7187 if not isinstance(other, ABCMultiIndex): 

7188 other = extract_array(other, extract_numpy=True) 

7189 else: 

7190 other = np.asarray(other) 

7191 

7192 if is_object_dtype(self.dtype) and isinstance(other, ExtensionArray): 

7193 # e.g. PeriodArray, Categorical 

7194 result = op(self._values, other) 

7195 

7196 elif isinstance(self._values, ExtensionArray): 

7197 result = op(self._values, other) 

7198 

7199 elif is_object_dtype(self.dtype) and not isinstance(self, ABCMultiIndex): 

7200 # don't pass MultiIndex 

7201 result = ops.comp_method_OBJECT_ARRAY(op, self._values, other) 

7202 

7203 else: 

7204 result = ops.comparison_op(self._values, other, op) 

7205 

7206 return result 

7207 

7208 @final 

7209 def _logical_method(self, other, op): 

7210 res_name = ops.get_op_result_name(self, other) 

7211 

7212 lvalues = self._values 

7213 rvalues = extract_array(other, extract_numpy=True, extract_range=True) 

7214 

7215 res_values = ops.logical_op(lvalues, rvalues, op) 

7216 return self._construct_result(res_values, name=res_name) 

7217 

7218 @final 

7219 def _construct_result(self, result, name): 

7220 if isinstance(result, tuple): 

7221 return ( 

7222 Index(result[0], name=name, dtype=result[0].dtype), 

7223 Index(result[1], name=name, dtype=result[1].dtype), 

7224 ) 

7225 return Index(result, name=name, dtype=result.dtype) 

7226 

7227 def _arith_method(self, other, op): 

7228 if ( 

7229 isinstance(other, Index) 

7230 and is_object_dtype(other.dtype) 

7231 and type(other) is not Index 

7232 ): 

7233 # We return NotImplemented for object-dtype index *subclasses* so they have 

7234 # a chance to implement ops before we unwrap them. 

7235 # See https://github.com/pandas-dev/pandas/issues/31109 

7236 return NotImplemented 

7237 

7238 return super()._arith_method(other, op) 

7239 

7240 @final 

7241 def _unary_method(self, op): 

7242 result = op(self._values) 

7243 return Index(result, name=self.name) 

7244 

7245 def __abs__(self) -> Index: 

7246 return self._unary_method(operator.abs) 

7247 

7248 def __neg__(self) -> Index: 

7249 return self._unary_method(operator.neg) 

7250 

7251 def __pos__(self) -> Index: 

7252 return self._unary_method(operator.pos) 

7253 

7254 def __invert__(self) -> Index: 

7255 # GH#8875 

7256 return self._unary_method(operator.inv) 

7257 

7258 # -------------------------------------------------------------------- 

7259 # Reductions 

7260 

7261 def any(self, *args, **kwargs): 

7262 """ 

7263 Return whether any element is Truthy. 

7264 

7265 Parameters 

7266 ---------- 

7267 *args 

7268 Required for compatibility with numpy. 

7269 **kwargs 

7270 Required for compatibility with numpy. 

7271 

7272 Returns 

7273 ------- 

7274 bool or array-like (if axis is specified) 

7275 A single element array-like may be converted to bool. 

7276 

7277 See Also 

7278 -------- 

7279 Index.all : Return whether all elements are True. 

7280 Series.all : Return whether all elements are True. 

7281 

7282 Notes 

7283 ----- 

7284 Not a Number (NaN), positive infinity and negative infinity 

7285 evaluate to True because these are not equal to zero. 

7286 

7287 Examples 

7288 -------- 

7289 >>> index = pd.Index([0, 1, 2]) 

7290 >>> index.any() 

7291 True 

7292 

7293 >>> index = pd.Index([0, 0, 0]) 

7294 >>> index.any() 

7295 False 

7296 """ 

7297 nv.validate_any(args, kwargs) 

7298 self._maybe_disable_logical_methods("any") 

7299 vals = self._values 

7300 if not isinstance(vals, np.ndarray): 

7301 # i.e. EA, call _reduce instead of "any" to get TypeError instead 

7302 # of AttributeError 

7303 return vals._reduce("any") 

7304 return np.any(vals) 

7305 

7306 def all(self, *args, **kwargs): 

7307 """ 

7308 Return whether all elements are Truthy. 

7309 

7310 Parameters 

7311 ---------- 

7312 *args 

7313 Required for compatibility with numpy. 

7314 **kwargs 

7315 Required for compatibility with numpy. 

7316 

7317 Returns 

7318 ------- 

7319 bool or array-like (if axis is specified) 

7320 A single element array-like may be converted to bool. 

7321 

7322 See Also 

7323 -------- 

7324 Index.any : Return whether any element in an Index is True. 

7325 Series.any : Return whether any element in a Series is True. 

7326 Series.all : Return whether all elements in a Series are True. 

7327 

7328 Notes 

7329 ----- 

7330 Not a Number (NaN), positive infinity and negative infinity 

7331 evaluate to True because these are not equal to zero. 

7332 

7333 Examples 

7334 -------- 

7335 True, because nonzero integers are considered True. 

7336 

7337 >>> pd.Index([1, 2, 3]).all() 

7338 True 

7339 

7340 False, because ``0`` is considered False. 

7341 

7342 >>> pd.Index([0, 1, 2]).all() 

7343 False 

7344 """ 

7345 nv.validate_all(args, kwargs) 

7346 self._maybe_disable_logical_methods("all") 

7347 vals = self._values 

7348 if not isinstance(vals, np.ndarray): 

7349 # i.e. EA, call _reduce instead of "all" to get TypeError instead 

7350 # of AttributeError 

7351 return vals._reduce("all") 

7352 return np.all(vals) 

7353 

7354 @final 

7355 def _maybe_disable_logical_methods(self, opname: str_t) -> None: 

7356 """ 

7357 raise if this Index subclass does not support any or all. 

7358 """ 

7359 if ( 

7360 isinstance(self, ABCMultiIndex) 

7361 # TODO(3.0): PeriodArray and DatetimeArray any/all will raise, 

7362 # so checking needs_i8_conversion will be unnecessary 

7363 or (needs_i8_conversion(self.dtype) and self.dtype.kind != "m") 

7364 ): 

7365 # This call will raise 

7366 make_invalid_op(opname)(self) 

7367 

7368 @Appender(IndexOpsMixin.argmin.__doc__) 

7369 def argmin(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: 

7370 nv.validate_argmin(args, kwargs) 

7371 nv.validate_minmax_axis(axis) 

7372 

7373 if not self._is_multi and self.hasnans: 

7374 # Take advantage of cache 

7375 mask = self._isnan 

7376 if not skipna or mask.all(): 

7377 warnings.warn( 

7378 f"The behavior of {type(self).__name__}.argmax/argmin " 

7379 "with skipna=False and NAs, or with all-NAs is deprecated. " 

7380 "In a future version this will raise ValueError.", 

7381 FutureWarning, 

7382 stacklevel=find_stack_level(), 

7383 ) 

7384 return -1 

7385 return super().argmin(skipna=skipna) 

7386 

7387 @Appender(IndexOpsMixin.argmax.__doc__) 

7388 def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: 

7389 nv.validate_argmax(args, kwargs) 

7390 nv.validate_minmax_axis(axis) 

7391 

7392 if not self._is_multi and self.hasnans: 

7393 # Take advantage of cache 

7394 mask = self._isnan 

7395 if not skipna or mask.all(): 

7396 warnings.warn( 

7397 f"The behavior of {type(self).__name__}.argmax/argmin " 

7398 "with skipna=False and NAs, or with all-NAs is deprecated. " 

7399 "In a future version this will raise ValueError.", 

7400 FutureWarning, 

7401 stacklevel=find_stack_level(), 

7402 ) 

7403 return -1 

7404 return super().argmax(skipna=skipna) 

7405 

7406 def min(self, axis=None, skipna: bool = True, *args, **kwargs): 

7407 """ 

7408 Return the minimum value of the Index. 

7409 

7410 Parameters 

7411 ---------- 

7412 axis : {None} 

7413 Dummy argument for consistency with Series. 

7414 skipna : bool, default True 

7415 Exclude NA/null values when showing the result. 

7416 *args, **kwargs 

7417 Additional arguments and keywords for compatibility with NumPy. 

7418 

7419 Returns 

7420 ------- 

7421 scalar 

7422 Minimum value. 

7423 

7424 See Also 

7425 -------- 

7426 Index.max : Return the maximum value of the object. 

7427 Series.min : Return the minimum value in a Series. 

7428 DataFrame.min : Return the minimum values in a DataFrame. 

7429 

7430 Examples 

7431 -------- 

7432 >>> idx = pd.Index([3, 2, 1]) 

7433 >>> idx.min() 

7434 1 

7435 

7436 >>> idx = pd.Index(['c', 'b', 'a']) 

7437 >>> idx.min() 

7438 'a' 

7439 

7440 For a MultiIndex, the minimum is determined lexicographically. 

7441 

7442 >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)]) 

7443 >>> idx.min() 

7444 ('a', 1) 

7445 """ 

7446 nv.validate_min(args, kwargs) 

7447 nv.validate_minmax_axis(axis) 

7448 

7449 if not len(self): 

7450 return self._na_value 

7451 

7452 if len(self) and self.is_monotonic_increasing: 

7453 # quick check 

7454 first = self[0] 

7455 if not isna(first): 

7456 return first 

7457 

7458 if not self._is_multi and self.hasnans: 

7459 # Take advantage of cache 

7460 mask = self._isnan 

7461 if not skipna or mask.all(): 

7462 return self._na_value 

7463 

7464 if not self._is_multi and not isinstance(self._values, np.ndarray): 

7465 return self._values._reduce(name="min", skipna=skipna) 

7466 

7467 return nanops.nanmin(self._values, skipna=skipna) 

7468 

7469 def max(self, axis=None, skipna: bool = True, *args, **kwargs): 

7470 """ 

7471 Return the maximum value of the Index. 

7472 

7473 Parameters 

7474 ---------- 

7475 axis : int, optional 

7476 For compatibility with NumPy. Only 0 or None are allowed. 

7477 skipna : bool, default True 

7478 Exclude NA/null values when showing the result. 

7479 *args, **kwargs 

7480 Additional arguments and keywords for compatibility with NumPy. 

7481 

7482 Returns 

7483 ------- 

7484 scalar 

7485 Maximum value. 

7486 

7487 See Also 

7488 -------- 

7489 Index.min : Return the minimum value in an Index. 

7490 Series.max : Return the maximum value in a Series. 

7491 DataFrame.max : Return the maximum values in a DataFrame. 

7492 

7493 Examples 

7494 -------- 

7495 >>> idx = pd.Index([3, 2, 1]) 

7496 >>> idx.max() 

7497 3 

7498 

7499 >>> idx = pd.Index(['c', 'b', 'a']) 

7500 >>> idx.max() 

7501 'c' 

7502 

7503 For a MultiIndex, the maximum is determined lexicographically. 

7504 

7505 >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)]) 

7506 >>> idx.max() 

7507 ('b', 2) 

7508 """ 

7509 

7510 nv.validate_max(args, kwargs) 

7511 nv.validate_minmax_axis(axis) 

7512 

7513 if not len(self): 

7514 return self._na_value 

7515 

7516 if len(self) and self.is_monotonic_increasing: 

7517 # quick check 

7518 last = self[-1] 

7519 if not isna(last): 

7520 return last 

7521 

7522 if not self._is_multi and self.hasnans: 

7523 # Take advantage of cache 

7524 mask = self._isnan 

7525 if not skipna or mask.all(): 

7526 return self._na_value 

7527 

7528 if not self._is_multi and not isinstance(self._values, np.ndarray): 

7529 return self._values._reduce(name="max", skipna=skipna) 

7530 

7531 return nanops.nanmax(self._values, skipna=skipna) 

7532 

7533 # -------------------------------------------------------------------- 

7534 

7535 @final 

7536 @property 

7537 def shape(self) -> Shape: 

7538 """ 

7539 Return a tuple of the shape of the underlying data. 

7540 

7541 Examples 

7542 -------- 

7543 >>> idx = pd.Index([1, 2, 3]) 

7544 >>> idx 

7545 Index([1, 2, 3], dtype='int64') 

7546 >>> idx.shape 

7547 (3,) 

7548 """ 

7549 # See GH#27775, GH#27384 for history/reasoning in how this is defined. 

7550 return (len(self),) 

7551 

7552 

7553def ensure_index_from_sequences(sequences, names=None) -> Index: 

7554 """ 

7555 Construct an index from sequences of data. 

7556 

7557 A single sequence returns an Index. Many sequences returns a 

7558 MultiIndex. 

7559 

7560 Parameters 

7561 ---------- 

7562 sequences : sequence of sequences 

7563 names : sequence of str 

7564 

7565 Returns 

7566 ------- 

7567 index : Index or MultiIndex 

7568 

7569 Examples 

7570 -------- 

7571 >>> ensure_index_from_sequences([[1, 2, 3]], names=["name"]) 

7572 Index([1, 2, 3], dtype='int64', name='name') 

7573 

7574 >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"]) 

7575 MultiIndex([('a', 'a'), 

7576 ('a', 'b')], 

7577 names=['L1', 'L2']) 

7578 

7579 See Also 

7580 -------- 

7581 ensure_index 

7582 """ 

7583 from pandas.core.indexes.multi import MultiIndex 

7584 

7585 if len(sequences) == 1: 

7586 if names is not None: 

7587 names = names[0] 

7588 return Index(sequences[0], name=names) 

7589 else: 

7590 return MultiIndex.from_arrays(sequences, names=names) 

7591 

7592 

7593def ensure_index(index_like: Axes, copy: bool = False) -> Index: 

7594 """ 

7595 Ensure that we have an index from some index-like object. 

7596 

7597 Parameters 

7598 ---------- 

7599 index_like : sequence 

7600 An Index or other sequence 

7601 copy : bool, default False 

7602 

7603 Returns 

7604 ------- 

7605 index : Index or MultiIndex 

7606 

7607 See Also 

7608 -------- 

7609 ensure_index_from_sequences 

7610 

7611 Examples 

7612 -------- 

7613 >>> ensure_index(['a', 'b']) 

7614 Index(['a', 'b'], dtype='object') 

7615 

7616 >>> ensure_index([('a', 'a'), ('b', 'c')]) 

7617 Index([('a', 'a'), ('b', 'c')], dtype='object') 

7618 

7619 >>> ensure_index([['a', 'a'], ['b', 'c']]) 

7620 MultiIndex([('a', 'b'), 

7621 ('a', 'c')], 

7622 ) 

7623 """ 

7624 if isinstance(index_like, Index): 

7625 if copy: 

7626 index_like = index_like.copy() 

7627 return index_like 

7628 

7629 if isinstance(index_like, ABCSeries): 

7630 name = index_like.name 

7631 return Index(index_like, name=name, copy=copy) 

7632 

7633 if is_iterator(index_like): 

7634 index_like = list(index_like) 

7635 

7636 if isinstance(index_like, list): 

7637 if type(index_like) is not list: # noqa: E721 

7638 # must check for exactly list here because of strict type 

7639 # check in clean_index_list 

7640 index_like = list(index_like) 

7641 

7642 if len(index_like) and lib.is_all_arraylike(index_like): 

7643 from pandas.core.indexes.multi import MultiIndex 

7644 

7645 return MultiIndex.from_arrays(index_like) 

7646 else: 

7647 return Index(index_like, copy=copy, tupleize_cols=False) 

7648 else: 

7649 return Index(index_like, copy=copy) 

7650 

7651 

7652def ensure_has_len(seq): 

7653 """ 

7654 If seq is an iterator, put its values into a list. 

7655 """ 

7656 try: 

7657 len(seq) 

7658 except TypeError: 

7659 return list(seq) 

7660 else: 

7661 return seq 

7662 

7663 

7664def trim_front(strings: list[str]) -> list[str]: 

7665 """ 

7666 Trims zeros and decimal points. 

7667 

7668 Examples 

7669 -------- 

7670 >>> trim_front([" a", " b"]) 

7671 ['a', 'b'] 

7672 

7673 >>> trim_front([" a", " "]) 

7674 ['a', ''] 

7675 """ 

7676 if not strings: 

7677 return strings 

7678 while all(strings) and all(x[0] == " " for x in strings): 

7679 strings = [x[1:] for x in strings] 

7680 return strings 

7681 

7682 

7683def _validate_join_method(method: str) -> None: 

7684 if method not in ["left", "right", "inner", "outer"]: 

7685 raise ValueError(f"do not recognize join method {method}") 

7686 

7687 

7688def maybe_extract_name(name, obj, cls) -> Hashable: 

7689 """ 

7690 If no name is passed, then extract it from data, validating hashability. 

7691 """ 

7692 if name is None and isinstance(obj, (Index, ABCSeries)): 

7693 # Note we don't just check for "name" attribute since that would 

7694 # pick up e.g. dtype.name 

7695 name = obj.name 

7696 

7697 # GH#29069 

7698 if not is_hashable(name): 

7699 raise TypeError(f"{cls.__name__}.name must be a hashable type") 

7700 

7701 return name 

7702 

7703 

7704def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]: 

7705 """ 

7706 Return common name if all indices agree, otherwise None (level-by-level). 

7707 

7708 Parameters 

7709 ---------- 

7710 indexes : list of Index objects 

7711 

7712 Returns 

7713 ------- 

7714 list 

7715 A list representing the unanimous 'names' found. 

7716 """ 

7717 name_tups = [tuple(i.names) for i in indexes] 

7718 name_sets = [{*ns} for ns in zip_longest(*name_tups)] 

7719 names = tuple(ns.pop() if len(ns) == 1 else None for ns in name_sets) 

7720 return names 

7721 

7722 

7723def _unpack_nested_dtype(other: Index) -> DtypeObj: 

7724 """ 

7725 When checking if our dtype is comparable with another, we need 

7726 to unpack CategoricalDtype to look at its categories.dtype. 

7727 

7728 Parameters 

7729 ---------- 

7730 other : Index 

7731 

7732 Returns 

7733 ------- 

7734 np.dtype or ExtensionDtype 

7735 """ 

7736 dtype = other.dtype 

7737 if isinstance(dtype, CategoricalDtype): 

7738 # If there is ever a SparseIndex, this could get dispatched 

7739 # here too. 

7740 return dtype.categories.dtype 

7741 elif isinstance(dtype, ArrowDtype): 

7742 # GH 53617 

7743 import pyarrow as pa 

7744 

7745 if pa.types.is_dictionary(dtype.pyarrow_dtype): 

7746 other = other[:0].astype(ArrowDtype(dtype.pyarrow_dtype.value_type)) 

7747 return other.dtype 

7748 

7749 

7750def _maybe_try_sort(result: Index | ArrayLike, sort: bool | None): 

7751 if sort is not False: 

7752 try: 

7753 # error: Incompatible types in assignment (expression has type 

7754 # "Union[ExtensionArray, ndarray[Any, Any], Index, Series, 

7755 # Tuple[Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series], 

7756 # ndarray[Any, Any]]]", variable has type "Union[Index, 

7757 # Union[ExtensionArray, ndarray[Any, Any]]]") 

7758 result = algos.safe_sort(result) # type: ignore[assignment] 

7759 except TypeError as err: 

7760 if sort is True: 

7761 raise 

7762 warnings.warn( 

7763 f"{err}, sort order is undefined for incomparable objects.", 

7764 RuntimeWarning, 

7765 stacklevel=find_stack_level(), 

7766 ) 

7767 return result 

7768 

7769 

7770def get_values_for_csv( 

7771 values: ArrayLike, 

7772 *, 

7773 date_format, 

7774 na_rep: str = "nan", 

7775 quoting=None, 

7776 float_format=None, 

7777 decimal: str = ".", 

7778) -> npt.NDArray[np.object_]: 

7779 """ 

7780 Convert to types which can be consumed by the standard library's 

7781 csv.writer.writerows. 

7782 """ 

7783 if isinstance(values, Categorical) and values.categories.dtype.kind in "Mm": 

7784 # GH#40754 Convert categorical datetimes to datetime array 

7785 values = algos.take_nd( 

7786 values.categories._values, 

7787 ensure_platform_int(values._codes), 

7788 fill_value=na_rep, 

7789 ) 

7790 

7791 values = ensure_wrapped_if_datetimelike(values) 

7792 

7793 if isinstance(values, (DatetimeArray, TimedeltaArray)): 

7794 if values.ndim == 1: 

7795 result = values._format_native_types(na_rep=na_rep, date_format=date_format) 

7796 result = result.astype(object, copy=False) 

7797 return result 

7798 

7799 # GH#21734 Process every column separately, they might have different formats 

7800 results_converted = [] 

7801 for i in range(len(values)): 

7802 result = values[i, :]._format_native_types( 

7803 na_rep=na_rep, date_format=date_format 

7804 ) 

7805 results_converted.append(result.astype(object, copy=False)) 

7806 return np.vstack(results_converted) 

7807 

7808 elif isinstance(values.dtype, PeriodDtype): 

7809 # TODO: tests that get here in column path 

7810 values = cast("PeriodArray", values) 

7811 res = values._format_native_types(na_rep=na_rep, date_format=date_format) 

7812 return res 

7813 

7814 elif isinstance(values.dtype, IntervalDtype): 

7815 # TODO: tests that get here in column path 

7816 values = cast("IntervalArray", values) 

7817 mask = values.isna() 

7818 if not quoting: 

7819 result = np.asarray(values).astype(str) 

7820 else: 

7821 result = np.array(values, dtype=object, copy=True) 

7822 

7823 result[mask] = na_rep 

7824 return result 

7825 

7826 elif values.dtype.kind == "f" and not isinstance(values.dtype, SparseDtype): 

7827 # see GH#13418: no special formatting is desired at the 

7828 # output (important for appropriate 'quoting' behaviour), 

7829 # so do not pass it through the FloatArrayFormatter 

7830 if float_format is None and decimal == ".": 

7831 mask = isna(values) 

7832 

7833 if not quoting: 

7834 values = values.astype(str) 

7835 else: 

7836 values = np.array(values, dtype="object") 

7837 

7838 values[mask] = na_rep 

7839 values = values.astype(object, copy=False) 

7840 return values 

7841 

7842 from pandas.io.formats.format import FloatArrayFormatter 

7843 

7844 formatter = FloatArrayFormatter( 

7845 values, 

7846 na_rep=na_rep, 

7847 float_format=float_format, 

7848 decimal=decimal, 

7849 quoting=quoting, 

7850 fixed_width=False, 

7851 ) 

7852 res = formatter.get_result_as_array() 

7853 res = res.astype(object, copy=False) 

7854 return res 

7855 

7856 elif isinstance(values, ExtensionArray): 

7857 mask = isna(values) 

7858 

7859 new_values = np.asarray(values.astype(object)) 

7860 new_values[mask] = na_rep 

7861 return new_values 

7862 

7863 else: 

7864 mask = isna(values) 

7865 itemsize = writers.word_len(na_rep) 

7866 

7867 if values.dtype != _dtype_obj and not quoting and itemsize: 

7868 values = values.astype(str) 

7869 if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize: 

7870 # enlarge for the na_rep 

7871 values = values.astype(f"<U{itemsize}") 

7872 else: 

7873 values = np.array(values, dtype="object") 

7874 

7875 values[mask] = na_rep 

7876 values = values.astype(object, copy=False) 

7877 return values