Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/indexes/base.py: 20%

Shortcuts on this page

r m x   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

2276 statements  

1from __future__ import annotations 

2 

3from datetime import datetime 

4import functools 

5from itertools import zip_longest 

6import operator 

7from typing import ( 

8 TYPE_CHECKING, 

9 Any, 

10 Callable, 

11 ClassVar, 

12 Hashable, 

13 Iterable, 

14 Literal, 

15 NoReturn, 

16 Sequence, 

17 TypeVar, 

18 cast, 

19 final, 

20 overload, 

21) 

22import warnings 

23 

24import numpy as np 

25 

26from pandas._config import get_option 

27 

28from pandas._libs import ( 

29 NaT, 

30 algos as libalgos, 

31 index as libindex, 

32 lib, 

33) 

34from pandas._libs.internals import BlockValuesRefs 

35import pandas._libs.join as libjoin 

36from pandas._libs.lib import ( 

37 is_datetime_array, 

38 no_default, 

39) 

40from pandas._libs.missing import is_float_nan 

41from pandas._libs.tslibs import ( 

42 IncompatibleFrequency, 

43 OutOfBoundsDatetime, 

44 Timestamp, 

45 tz_compare, 

46) 

47from pandas._typing import ( 

48 AnyAll, 

49 ArrayLike, 

50 Axes, 

51 Axis, 

52 DropKeep, 

53 DtypeObj, 

54 F, 

55 IgnoreRaise, 

56 IndexLabel, 

57 JoinHow, 

58 Level, 

59 Shape, 

60 npt, 

61) 

62from pandas.compat.numpy import function as nv 

63from pandas.errors import ( 

64 DuplicateLabelError, 

65 InvalidIndexError, 

66) 

67from pandas.util._decorators import ( 

68 Appender, 

69 cache_readonly, 

70 doc, 

71) 

72from pandas.util._exceptions import ( 

73 find_stack_level, 

74 rewrite_exception, 

75) 

76 

77from pandas.core.dtypes.astype import ( 

78 astype_array, 

79 astype_is_view, 

80) 

81from pandas.core.dtypes.cast import ( 

82 LossySetitemError, 

83 can_hold_element, 

84 common_dtype_categorical_compat, 

85 find_result_type, 

86 infer_dtype_from, 

87 maybe_cast_pointwise_result, 

88 np_can_hold_element, 

89) 

90from pandas.core.dtypes.common import ( 

91 ensure_int64, 

92 ensure_object, 

93 ensure_platform_int, 

94 is_any_real_numeric_dtype, 

95 is_bool_dtype, 

96 is_categorical_dtype, 

97 is_dtype_equal, 

98 is_ea_or_datetimelike_dtype, 

99 is_extension_array_dtype, 

100 is_float, 

101 is_float_dtype, 

102 is_hashable, 

103 is_integer, 

104 is_integer_dtype, 

105 is_interval_dtype, 

106 is_iterator, 

107 is_list_like, 

108 is_numeric_dtype, 

109 is_object_dtype, 

110 is_scalar, 

111 is_signed_integer_dtype, 

112 is_string_dtype, 

113 needs_i8_conversion, 

114 pandas_dtype, 

115 validate_all_hashable, 

116) 

117from pandas.core.dtypes.concat import concat_compat 

118from pandas.core.dtypes.dtypes import ( 

119 CategoricalDtype, 

120 DatetimeTZDtype, 

121 ExtensionDtype, 

122 IntervalDtype, 

123 PeriodDtype, 

124) 

125from pandas.core.dtypes.generic import ( 

126 ABCDataFrame, 

127 ABCDatetimeIndex, 

128 ABCMultiIndex, 

129 ABCPeriodIndex, 

130 ABCSeries, 

131 ABCTimedeltaIndex, 

132) 

133from pandas.core.dtypes.inference import is_dict_like 

134from pandas.core.dtypes.missing import ( 

135 array_equivalent, 

136 is_valid_na_for_dtype, 

137 isna, 

138) 

139 

140from pandas.core import ( 

141 arraylike, 

142 ops, 

143) 

144from pandas.core.accessor import CachedAccessor 

145import pandas.core.algorithms as algos 

146from pandas.core.array_algos.putmask import ( 

147 setitem_datetimelike_compat, 

148 validate_putmask, 

149) 

150from pandas.core.arrays import ( 

151 ArrowExtensionArray, 

152 BaseMaskedArray, 

153 Categorical, 

154 ExtensionArray, 

155) 

156from pandas.core.arrays.string_ import StringArray 

157from pandas.core.base import ( 

158 IndexOpsMixin, 

159 PandasObject, 

160) 

161import pandas.core.common as com 

162from pandas.core.construction import ( 

163 ensure_wrapped_if_datetimelike, 

164 extract_array, 

165 sanitize_array, 

166) 

167from pandas.core.indexers import disallow_ndim_indexing 

168from pandas.core.indexes.frozen import FrozenList 

169from pandas.core.missing import clean_reindex_fill_method 

170from pandas.core.ops import get_op_result_name 

171from pandas.core.ops.invalid import make_invalid_op 

172from pandas.core.sorting import ( 

173 ensure_key_mapped, 

174 get_group_index_sorter, 

175 nargsort, 

176) 

177from pandas.core.strings.accessor import StringMethods 

178 

179from pandas.io.formats.printing import ( 

180 PrettyDict, 

181 default_pprint, 

182 format_object_summary, 

183 pprint_thing, 

184) 

185 

186if TYPE_CHECKING: 

187 from pandas import ( 

188 CategoricalIndex, 

189 DataFrame, 

190 MultiIndex, 

191 Series, 

192 ) 

193 from pandas.core.arrays import PeriodArray 

194 

195 

196__all__ = ["Index"] 

197 

198_unsortable_types = frozenset(("mixed", "mixed-integer")) 

199 

200_index_doc_kwargs: dict[str, str] = { 

201 "klass": "Index", 

202 "inplace": "", 

203 "target_klass": "Index", 

204 "raises_section": "", 

205 "unique": "Index", 

206 "duplicated": "np.ndarray", 

207} 

208_index_shared_docs: dict[str, str] = {} 

209str_t = str 

210 

211 

212_dtype_obj = np.dtype("object") 

213 

214_masked_engines = { 

215 "Complex128": libindex.MaskedComplex128Engine, 

216 "Complex64": libindex.MaskedComplex64Engine, 

217 "Float64": libindex.MaskedFloat64Engine, 

218 "Float32": libindex.MaskedFloat32Engine, 

219 "UInt64": libindex.MaskedUInt64Engine, 

220 "UInt32": libindex.MaskedUInt32Engine, 

221 "UInt16": libindex.MaskedUInt16Engine, 

222 "UInt8": libindex.MaskedUInt8Engine, 

223 "Int64": libindex.MaskedInt64Engine, 

224 "Int32": libindex.MaskedInt32Engine, 

225 "Int16": libindex.MaskedInt16Engine, 

226 "Int8": libindex.MaskedInt8Engine, 

227 "boolean": libindex.MaskedBoolEngine, 

228 "double[pyarrow]": libindex.MaskedFloat64Engine, 

229 "float64[pyarrow]": libindex.MaskedFloat64Engine, 

230 "float32[pyarrow]": libindex.MaskedFloat32Engine, 

231 "float[pyarrow]": libindex.MaskedFloat32Engine, 

232 "uint64[pyarrow]": libindex.MaskedUInt64Engine, 

233 "uint32[pyarrow]": libindex.MaskedUInt32Engine, 

234 "uint16[pyarrow]": libindex.MaskedUInt16Engine, 

235 "uint8[pyarrow]": libindex.MaskedUInt8Engine, 

236 "int64[pyarrow]": libindex.MaskedInt64Engine, 

237 "int32[pyarrow]": libindex.MaskedInt32Engine, 

238 "int16[pyarrow]": libindex.MaskedInt16Engine, 

239 "int8[pyarrow]": libindex.MaskedInt8Engine, 

240 "bool[pyarrow]": libindex.MaskedBoolEngine, 

241} 

242 

243 

244def _maybe_return_indexers(meth: F) -> F: 

245 """ 

246 Decorator to simplify 'return_indexers' checks in Index.join. 

247 """ 

248 

249 @functools.wraps(meth) 

250 def join( 

251 self, 

252 other: Index, 

253 *, 

254 how: JoinHow = "left", 

255 level=None, 

256 return_indexers: bool = False, 

257 sort: bool = False, 

258 ): 

259 join_index, lidx, ridx = meth(self, other, how=how, level=level, sort=sort) 

260 if not return_indexers: 

261 return join_index 

262 

263 if lidx is not None: 

264 lidx = ensure_platform_int(lidx) 

265 if ridx is not None: 

266 ridx = ensure_platform_int(ridx) 

267 return join_index, lidx, ridx 

268 

269 return cast(F, join) 

270 

271 

272def _new_Index(cls, d): 

273 """ 

274 This is called upon unpickling, rather than the default which doesn't 

275 have arguments and breaks __new__. 

276 """ 

277 # required for backward compat, because PI can't be instantiated with 

278 # ordinals through __new__ GH #13277 

279 if issubclass(cls, ABCPeriodIndex): 

280 from pandas.core.indexes.period import _new_PeriodIndex 

281 

282 return _new_PeriodIndex(cls, **d) 

283 

284 if issubclass(cls, ABCMultiIndex): 

285 if "labels" in d and "codes" not in d: 

286 # GH#23752 "labels" kwarg has been replaced with "codes" 

287 d["codes"] = d.pop("labels") 

288 

289 # Since this was a valid MultiIndex at pickle-time, we don't need to 

290 # check validty at un-pickle time. 

291 d["verify_integrity"] = False 

292 

293 elif "dtype" not in d and "data" in d: 

294 # Prevent Index.__new__ from conducting inference; 

295 # "data" key not in RangeIndex 

296 d["dtype"] = d["data"].dtype 

297 return cls.__new__(cls, **d) 

298 

299 

300_IndexT = TypeVar("_IndexT", bound="Index") 

301 

302 

303class Index(IndexOpsMixin, PandasObject): 

304 """ 

305 Immutable sequence used for indexing and alignment. 

306 

307 The basic object storing axis labels for all pandas objects. 

308 

309 .. versionchanged:: 2.0.0 

310 

311 Index can hold all numpy numeric dtypes (except float16). Previously only 

312 int64/uint64/float64 dtypes were accepted. 

313 

314 Parameters 

315 ---------- 

316 data : array-like (1-dimensional) 

317 dtype : NumPy dtype (default: object) 

318 If dtype is None, we find the dtype that best fits the data. 

319 If an actual dtype is provided, we coerce to that dtype if it's safe. 

320 Otherwise, an error will be raised. 

321 copy : bool 

322 Make a copy of input ndarray. 

323 name : object 

324 Name to be stored in the index. 

325 tupleize_cols : bool (default: True) 

326 When True, attempt to create a MultiIndex if possible. 

327 

328 See Also 

329 -------- 

330 RangeIndex : Index implementing a monotonic integer range. 

331 CategoricalIndex : Index of :class:`Categorical` s. 

332 MultiIndex : A multi-level, or hierarchical Index. 

333 IntervalIndex : An Index of :class:`Interval` s. 

334 DatetimeIndex : Index of datetime64 data. 

335 TimedeltaIndex : Index of timedelta64 data. 

336 PeriodIndex : Index of Period data. 

337 

338 Notes 

339 ----- 

340 An Index instance can **only** contain hashable objects. 

341 An Index instance *can not* hold numpy float16 dtype. 

342 

343 Examples 

344 -------- 

345 >>> pd.Index([1, 2, 3]) 

346 Index([1, 2, 3], dtype='int64') 

347 

348 >>> pd.Index(list('abc')) 

349 Index(['a', 'b', 'c'], dtype='object') 

350 

351 >>> pd.Index([1, 2, 3], dtype="uint8") 

352 Index([1, 2, 3], dtype='uint8') 

353 """ 

354 

355 # To hand over control to subclasses 

356 _join_precedence = 1 

357 

358 # Cython methods; see github.com/cython/cython/issues/2647 

359 # for why we need to wrap these instead of making them class attributes 

360 # Moreover, cython will choose the appropriate-dtyped sub-function 

361 # given the dtypes of the passed arguments 

362 

363 @final 

364 def _left_indexer_unique(self: _IndexT, other: _IndexT) -> npt.NDArray[np.intp]: 

365 # Caller is responsible for ensuring other.dtype == self.dtype 

366 sv = self._get_join_target() 

367 ov = other._get_join_target() 

368 # can_use_libjoin assures sv and ov are ndarrays 

369 sv = cast(np.ndarray, sv) 

370 ov = cast(np.ndarray, ov) 

371 # similar but not identical to ov.searchsorted(sv) 

372 return libjoin.left_join_indexer_unique(sv, ov) 

373 

374 @final 

375 def _left_indexer( 

376 self: _IndexT, other: _IndexT 

377 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

378 # Caller is responsible for ensuring other.dtype == self.dtype 

379 sv = self._get_join_target() 

380 ov = other._get_join_target() 

381 # can_use_libjoin assures sv and ov are ndarrays 

382 sv = cast(np.ndarray, sv) 

383 ov = cast(np.ndarray, ov) 

384 joined_ndarray, lidx, ridx = libjoin.left_join_indexer(sv, ov) 

385 joined = self._from_join_target(joined_ndarray) 

386 return joined, lidx, ridx 

387 

388 @final 

389 def _inner_indexer( 

390 self: _IndexT, other: _IndexT 

391 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

392 # Caller is responsible for ensuring other.dtype == self.dtype 

393 sv = self._get_join_target() 

394 ov = other._get_join_target() 

395 # can_use_libjoin assures sv and ov are ndarrays 

396 sv = cast(np.ndarray, sv) 

397 ov = cast(np.ndarray, ov) 

398 joined_ndarray, lidx, ridx = libjoin.inner_join_indexer(sv, ov) 

399 joined = self._from_join_target(joined_ndarray) 

400 return joined, lidx, ridx 

401 

402 @final 

403 def _outer_indexer( 

404 self: _IndexT, other: _IndexT 

405 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

406 # Caller is responsible for ensuring other.dtype == self.dtype 

407 sv = self._get_join_target() 

408 ov = other._get_join_target() 

409 # can_use_libjoin assures sv and ov are ndarrays 

410 sv = cast(np.ndarray, sv) 

411 ov = cast(np.ndarray, ov) 

412 joined_ndarray, lidx, ridx = libjoin.outer_join_indexer(sv, ov) 

413 joined = self._from_join_target(joined_ndarray) 

414 return joined, lidx, ridx 

415 

416 _typ: str = "index" 

417 _data: ExtensionArray | np.ndarray 

418 _data_cls: type[ExtensionArray] | tuple[type[np.ndarray], type[ExtensionArray]] = ( 

419 np.ndarray, 

420 ExtensionArray, 

421 ) 

422 _id: object | None = None 

423 _name: Hashable = None 

424 # MultiIndex.levels previously allowed setting the index name. We 

425 # don't allow this anymore, and raise if it happens rather than 

426 # failing silently. 

427 _no_setting_name: bool = False 

428 _comparables: list[str] = ["name"] 

429 _attributes: list[str] = ["name"] 

430 

431 @cache_readonly 

432 def _can_hold_strings(self) -> bool: 

433 return not is_numeric_dtype(self) 

434 

435 _engine_types: dict[np.dtype | ExtensionDtype, type[libindex.IndexEngine]] = { 

436 np.dtype(np.int8): libindex.Int8Engine, 

437 np.dtype(np.int16): libindex.Int16Engine, 

438 np.dtype(np.int32): libindex.Int32Engine, 

439 np.dtype(np.int64): libindex.Int64Engine, 

440 np.dtype(np.uint8): libindex.UInt8Engine, 

441 np.dtype(np.uint16): libindex.UInt16Engine, 

442 np.dtype(np.uint32): libindex.UInt32Engine, 

443 np.dtype(np.uint64): libindex.UInt64Engine, 

444 np.dtype(np.float32): libindex.Float32Engine, 

445 np.dtype(np.float64): libindex.Float64Engine, 

446 np.dtype(np.complex64): libindex.Complex64Engine, 

447 np.dtype(np.complex128): libindex.Complex128Engine, 

448 } 

449 

450 @property 

451 def _engine_type( 

452 self, 

453 ) -> type[libindex.IndexEngine] | type[libindex.ExtensionEngine]: 

454 return self._engine_types.get(self.dtype, libindex.ObjectEngine) 

455 

456 # whether we support partial string indexing. Overridden 

457 # in DatetimeIndex and PeriodIndex 

458 _supports_partial_string_indexing = False 

459 

460 _accessors = {"str"} 

461 

462 str = CachedAccessor("str", StringMethods) 

463 

464 _references = None 

465 

466 # -------------------------------------------------------------------- 

467 # Constructors 

468 

469 def __new__( 

470 cls, 

471 data=None, 

472 dtype=None, 

473 copy: bool = False, 

474 name=None, 

475 tupleize_cols: bool = True, 

476 ) -> Index: 

477 from pandas.core.indexes.range import RangeIndex 

478 

479 name = maybe_extract_name(name, data, cls) 

480 

481 if dtype is not None: 

482 dtype = pandas_dtype(dtype) 

483 

484 data_dtype = getattr(data, "dtype", None) 

485 

486 refs = None 

487 if not copy and isinstance(data, (ABCSeries, Index)): 

488 refs = data._references 

489 

490 # range 

491 if isinstance(data, (range, RangeIndex)): 

492 result = RangeIndex(start=data, copy=copy, name=name) 

493 if dtype is not None: 

494 return result.astype(dtype, copy=False) 

495 return result 

496 

497 elif is_ea_or_datetimelike_dtype(dtype): 

498 # non-EA dtype indexes have special casting logic, so we punt here 

499 pass 

500 

501 elif is_ea_or_datetimelike_dtype(data_dtype): 

502 pass 

503 

504 elif isinstance(data, (np.ndarray, Index, ABCSeries)): 

505 if isinstance(data, ABCMultiIndex): 

506 data = data._values 

507 

508 if data.dtype.kind not in ["i", "u", "f", "b", "c", "m", "M"]: 

509 # GH#11836 we need to avoid having numpy coerce 

510 # things that look like ints/floats to ints unless 

511 # they are actually ints, e.g. '0' and 0.0 

512 # should not be coerced 

513 data = com.asarray_tuplesafe(data, dtype=_dtype_obj) 

514 

515 elif is_scalar(data): 

516 raise cls._raise_scalar_data_error(data) 

517 elif hasattr(data, "__array__"): 

518 return Index(np.asarray(data), dtype=dtype, copy=copy, name=name) 

519 elif not is_list_like(data) and not isinstance(data, memoryview): 

520 # 2022-11-16 the memoryview check is only necessary on some CI 

521 # builds, not clear why 

522 raise cls._raise_scalar_data_error(data) 

523 

524 else: 

525 if tupleize_cols: 

526 # GH21470: convert iterable to list before determining if empty 

527 if is_iterator(data): 

528 data = list(data) 

529 

530 if data and all(isinstance(e, tuple) for e in data): 

531 # we must be all tuples, otherwise don't construct 

532 # 10697 

533 from pandas.core.indexes.multi import MultiIndex 

534 

535 return MultiIndex.from_tuples(data, names=name) 

536 # other iterable of some kind 

537 

538 if not isinstance(data, (list, tuple)): 

539 # we allow set/frozenset, which Series/sanitize_array does not, so 

540 # cast to list here 

541 data = list(data) 

542 if len(data) == 0: 

543 # unlike Series, we default to object dtype: 

544 data = np.array(data, dtype=object) 

545 

546 if len(data) and isinstance(data[0], tuple): 

547 # Ensure we get 1-D array of tuples instead of 2D array. 

548 data = com.asarray_tuplesafe(data, dtype=_dtype_obj) 

549 

550 try: 

551 arr = sanitize_array(data, None, dtype=dtype, copy=copy) 

552 except ValueError as err: 

553 if "index must be specified when data is not list-like" in str(err): 

554 raise cls._raise_scalar_data_error(data) from err 

555 if "Data must be 1-dimensional" in str(err): 

556 raise ValueError("Index data must be 1-dimensional") from err 

557 raise 

558 arr = ensure_wrapped_if_datetimelike(arr) 

559 

560 klass = cls._dtype_to_subclass(arr.dtype) 

561 

562 arr = klass._ensure_array(arr, arr.dtype, copy=False) 

563 return klass._simple_new(arr, name, refs=refs) 

564 

565 @classmethod 

566 def _ensure_array(cls, data, dtype, copy: bool): 

567 """ 

568 Ensure we have a valid array to pass to _simple_new. 

569 """ 

570 if data.ndim > 1: 

571 # GH#13601, GH#20285, GH#27125 

572 raise ValueError("Index data must be 1-dimensional") 

573 elif dtype == np.float16: 

574 # float16 not supported (no indexing engine) 

575 raise NotImplementedError("float16 indexes are not supported") 

576 

577 if copy: 

578 # asarray_tuplesafe does not always copy underlying data, 

579 # so need to make sure that this happens 

580 data = data.copy() 

581 return data 

582 

583 @final 

584 @classmethod 

585 def _dtype_to_subclass(cls, dtype: DtypeObj): 

586 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423 

587 

588 if isinstance(dtype, ExtensionDtype): 

589 if isinstance(dtype, DatetimeTZDtype): 

590 from pandas import DatetimeIndex 

591 

592 return DatetimeIndex 

593 elif isinstance(dtype, CategoricalDtype): 

594 from pandas import CategoricalIndex 

595 

596 return CategoricalIndex 

597 elif isinstance(dtype, IntervalDtype): 

598 from pandas import IntervalIndex 

599 

600 return IntervalIndex 

601 elif isinstance(dtype, PeriodDtype): 

602 from pandas import PeriodIndex 

603 

604 return PeriodIndex 

605 

606 return Index 

607 

608 if dtype.kind == "M": 

609 from pandas import DatetimeIndex 

610 

611 return DatetimeIndex 

612 

613 elif dtype.kind == "m": 

614 from pandas import TimedeltaIndex 

615 

616 return TimedeltaIndex 

617 

618 elif dtype.kind == "O": 

619 # NB: assuming away MultiIndex 

620 return Index 

621 

622 elif issubclass(dtype.type, str) or is_numeric_dtype(dtype): 

623 return Index 

624 

625 raise NotImplementedError(dtype) 

626 

627 # NOTE for new Index creation: 

628 

629 # - _simple_new: It returns new Index with the same type as the caller. 

630 # All metadata (such as name) must be provided by caller's responsibility. 

631 # Using _shallow_copy is recommended because it fills these metadata 

632 # otherwise specified. 

633 

634 # - _shallow_copy: It returns new Index with the same type (using 

635 # _simple_new), but fills caller's metadata otherwise specified. Passed 

636 # kwargs will overwrite corresponding metadata. 

637 

638 # See each method's docstring. 

639 

640 @classmethod 

641 def _simple_new( 

642 cls: type[_IndexT], values: ArrayLike, name: Hashable = None, refs=None 

643 ) -> _IndexT: 

644 """ 

645 We require that we have a dtype compat for the values. If we are passed 

646 a non-dtype compat, then coerce using the constructor. 

647 

648 Must be careful not to recurse. 

649 """ 

650 assert isinstance(values, cls._data_cls), type(values) 

651 

652 result = object.__new__(cls) 

653 result._data = values 

654 result._name = name 

655 result._cache = {} 

656 result._reset_identity() 

657 if refs is not None: 

658 result._references = refs 

659 else: 

660 result._references = BlockValuesRefs() 

661 result._references.add_index_reference(result) 

662 

663 return result 

664 

665 @classmethod 

666 def _with_infer(cls, *args, **kwargs): 

667 """ 

668 Constructor that uses the 1.0.x behavior inferring numeric dtypes 

669 for ndarray[object] inputs. 

670 """ 

671 result = cls(*args, **kwargs) 

672 

673 if result.dtype == _dtype_obj and not result._is_multi: 

674 # error: Argument 1 to "maybe_convert_objects" has incompatible type 

675 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected 

676 # "ndarray[Any, Any]" 

677 values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type] 

678 if values.dtype.kind in ["i", "u", "f", "b"]: 

679 return Index(values, name=result.name) 

680 

681 return result 

682 

683 @cache_readonly 

684 def _constructor(self: _IndexT) -> type[_IndexT]: 

685 return type(self) 

686 

687 @final 

688 def _maybe_check_unique(self) -> None: 

689 """ 

690 Check that an Index has no duplicates. 

691 

692 This is typically only called via 

693 `NDFrame.flags.allows_duplicate_labels.setter` when it's set to 

694 True (duplicates aren't allowed). 

695 

696 Raises 

697 ------ 

698 DuplicateLabelError 

699 When the index is not unique. 

700 """ 

701 if not self.is_unique: 

702 msg = """Index has duplicates.""" 

703 duplicates = self._format_duplicate_message() 

704 msg += f"\n{duplicates}" 

705 

706 raise DuplicateLabelError(msg) 

707 

708 @final 

709 def _format_duplicate_message(self) -> DataFrame: 

710 """ 

711 Construct the DataFrame for a DuplicateLabelError. 

712 

713 This returns a DataFrame indicating the labels and positions 

714 of duplicates in an index. This should only be called when it's 

715 already known that duplicates are present. 

716 

717 Examples 

718 -------- 

719 >>> idx = pd.Index(['a', 'b', 'a']) 

720 >>> idx._format_duplicate_message() 

721 positions 

722 label 

723 a [0, 2] 

724 """ 

725 from pandas import Series 

726 

727 duplicates = self[self.duplicated(keep="first")].unique() 

728 assert len(duplicates) 

729 

730 out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates] 

731 if self._is_multi: 

732 # test_format_duplicate_labels_message_multi 

733 # error: "Type[Index]" has no attribute "from_tuples" [attr-defined] 

734 out.index = type(self).from_tuples(out.index) # type: ignore[attr-defined] 

735 

736 if self.nlevels == 1: 

737 out = out.rename_axis("label") 

738 return out.to_frame(name="positions") 

739 

740 # -------------------------------------------------------------------- 

741 # Index Internals Methods 

742 

743 def _shallow_copy(self: _IndexT, values, name: Hashable = no_default) -> _IndexT: 

744 """ 

745 Create a new Index with the same class as the caller, don't copy the 

746 data, use the same object attributes with passed in attributes taking 

747 precedence. 

748 

749 *this is an internal non-public method* 

750 

751 Parameters 

752 ---------- 

753 values : the values to create the new Index, optional 

754 name : Label, defaults to self.name 

755 """ 

756 name = self._name if name is no_default else name 

757 

758 return self._simple_new(values, name=name, refs=self._references) 

759 

760 def _view(self: _IndexT) -> _IndexT: 

761 """ 

762 fastpath to make a shallow copy, i.e. new object with same data. 

763 """ 

764 result = self._simple_new(self._values, name=self._name, refs=self._references) 

765 

766 result._cache = self._cache 

767 return result 

768 

769 @final 

770 def _rename(self: _IndexT, name: Hashable) -> _IndexT: 

771 """ 

772 fastpath for rename if new name is already validated. 

773 """ 

774 result = self._view() 

775 result._name = name 

776 return result 

777 

778 @final 

779 def is_(self, other) -> bool: 

780 """ 

781 More flexible, faster check like ``is`` but that works through views. 

782 

783 Note: this is *not* the same as ``Index.identical()``, which checks 

784 that metadata is also the same. 

785 

786 Parameters 

787 ---------- 

788 other : object 

789 Other object to compare against. 

790 

791 Returns 

792 ------- 

793 bool 

794 True if both have same underlying data, False otherwise. 

795 

796 See Also 

797 -------- 

798 Index.identical : Works like ``Index.is_`` but also checks metadata. 

799 """ 

800 if self is other: 

801 return True 

802 elif not hasattr(other, "_id"): 

803 return False 

804 elif self._id is None or other._id is None: 

805 return False 

806 else: 

807 return self._id is other._id 

808 

809 @final 

810 def _reset_identity(self) -> None: 

811 """ 

812 Initializes or resets ``_id`` attribute with new object. 

813 """ 

814 self._id = object() 

815 

816 @final 

817 def _cleanup(self) -> None: 

818 self._engine.clear_mapping() 

819 

820 @cache_readonly 

821 def _engine( 

822 self, 

823 ) -> libindex.IndexEngine | libindex.ExtensionEngine | libindex.MaskedIndexEngine: 

824 # For base class (object dtype) we get ObjectEngine 

825 target_values = self._get_engine_target() 

826 if isinstance(target_values, ExtensionArray): 

827 if isinstance(target_values, (BaseMaskedArray, ArrowExtensionArray)): 

828 try: 

829 return _masked_engines[target_values.dtype.name](target_values) 

830 except KeyError: 

831 # Not supported yet e.g. decimal 

832 pass 

833 elif self._engine_type is libindex.ObjectEngine: 

834 return libindex.ExtensionEngine(target_values) 

835 

836 target_values = cast(np.ndarray, target_values) 

837 # to avoid a reference cycle, bind `target_values` to a local variable, so 

838 # `self` is not passed into the lambda. 

839 if target_values.dtype == bool: 

840 return libindex.BoolEngine(target_values) 

841 elif target_values.dtype == np.complex64: 

842 return libindex.Complex64Engine(target_values) 

843 elif target_values.dtype == np.complex128: 

844 return libindex.Complex128Engine(target_values) 

845 elif needs_i8_conversion(self.dtype): 

846 # We need to keep M8/m8 dtype when initializing the Engine, 

847 # but don't want to change _get_engine_target bc it is used 

848 # elsewhere 

849 # error: Item "ExtensionArray" of "Union[ExtensionArray, 

850 # ndarray[Any, Any]]" has no attribute "_ndarray" [union-attr] 

851 target_values = self._data._ndarray # type: ignore[union-attr] 

852 

853 # error: Argument 1 to "ExtensionEngine" has incompatible type 

854 # "ndarray[Any, Any]"; expected "ExtensionArray" 

855 return self._engine_type(target_values) # type: ignore[arg-type] 

856 

857 @final 

858 @cache_readonly 

859 def _dir_additions_for_owner(self) -> set[str_t]: 

860 """ 

861 Add the string-like labels to the owner dataframe/series dir output. 

862 

863 If this is a MultiIndex, it's first level values are used. 

864 """ 

865 return { 

866 c 

867 for c in self.unique(level=0)[: get_option("display.max_dir_items")] 

868 if isinstance(c, str) and c.isidentifier() 

869 } 

870 

871 # -------------------------------------------------------------------- 

872 # Array-Like Methods 

873 

874 # ndarray compat 

875 def __len__(self) -> int: 

876 """ 

877 Return the length of the Index. 

878 """ 

879 return len(self._data) 

880 

881 def __array__(self, dtype=None) -> np.ndarray: 

882 """ 

883 The array interface, return my values. 

884 """ 

885 return np.asarray(self._data, dtype=dtype) 

886 

887 def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs): 

888 if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs): 

889 return NotImplemented 

890 

891 result = arraylike.maybe_dispatch_ufunc_to_dunder_op( 

892 self, ufunc, method, *inputs, **kwargs 

893 ) 

894 if result is not NotImplemented: 

895 return result 

896 

897 if "out" in kwargs: 

898 # e.g. test_dti_isub_tdi 

899 return arraylike.dispatch_ufunc_with_out( 

900 self, ufunc, method, *inputs, **kwargs 

901 ) 

902 

903 if method == "reduce": 

904 result = arraylike.dispatch_reduction_ufunc( 

905 self, ufunc, method, *inputs, **kwargs 

906 ) 

907 if result is not NotImplemented: 

908 return result 

909 

910 new_inputs = [x if x is not self else x._values for x in inputs] 

911 result = getattr(ufunc, method)(*new_inputs, **kwargs) 

912 if ufunc.nout == 2: 

913 # i.e. np.divmod, np.modf, np.frexp 

914 return tuple(self.__array_wrap__(x) for x in result) 

915 

916 if result.dtype == np.float16: 

917 result = result.astype(np.float32) 

918 

919 return self.__array_wrap__(result) 

920 

921 def __array_wrap__(self, result, context=None): 

922 """ 

923 Gets called after a ufunc and other functions e.g. np.split. 

924 """ 

925 result = lib.item_from_zerodim(result) 

926 if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1: 

927 return result 

928 

929 return Index(result, name=self.name) 

930 

931 @cache_readonly 

932 def dtype(self) -> DtypeObj: 

933 """ 

934 Return the dtype object of the underlying data. 

935 """ 

936 return self._data.dtype 

937 

938 @final 

939 def ravel(self, order: str_t = "C") -> Index: 

940 """ 

941 Return a view on self. 

942 

943 Returns 

944 ------- 

945 Index 

946 

947 See Also 

948 -------- 

949 numpy.ndarray.ravel : Return a flattened array. 

950 """ 

951 return self[:] 

952 

953 def view(self, cls=None): 

954 # we need to see if we are subclassing an 

955 # index type here 

956 if cls is not None and not hasattr(cls, "_typ"): 

957 dtype = cls 

958 if isinstance(cls, str): 

959 dtype = pandas_dtype(cls) 

960 

961 if isinstance(dtype, (np.dtype, ExtensionDtype)) and needs_i8_conversion( 

962 dtype 

963 ): 

964 if dtype.kind == "m" and dtype != "m8[ns]": 

965 # e.g. m8[s] 

966 return self._data.view(cls) 

967 

968 idx_cls = self._dtype_to_subclass(dtype) 

969 # NB: we only get here for subclasses that override 

970 # _data_cls such that it is a type and not a tuple 

971 # of types. 

972 arr_cls = idx_cls._data_cls 

973 arr = arr_cls(self._data.view("i8"), dtype=dtype) 

974 return idx_cls._simple_new(arr, name=self.name, refs=self._references) 

975 

976 result = self._data.view(cls) 

977 else: 

978 result = self._view() 

979 if isinstance(result, Index): 

980 result._id = self._id 

981 return result 

982 

983 def astype(self, dtype, copy: bool = True): 

984 """ 

985 Create an Index with values cast to dtypes. 

986 

987 The class of a new Index is determined by dtype. When conversion is 

988 impossible, a TypeError exception is raised. 

989 

990 Parameters 

991 ---------- 

992 dtype : numpy dtype or pandas type 

993 Note that any signed integer `dtype` is treated as ``'int64'``, 

994 and any unsigned integer `dtype` is treated as ``'uint64'``, 

995 regardless of the size. 

996 copy : bool, default True 

997 By default, astype always returns a newly allocated object. 

998 If copy is set to False and internal requirements on dtype are 

999 satisfied, the original data is used to create a new Index 

1000 or the original Index is returned. 

1001 

1002 Returns 

1003 ------- 

1004 Index 

1005 Index with values cast to specified dtype. 

1006 """ 

1007 if dtype is not None: 

1008 dtype = pandas_dtype(dtype) 

1009 

1010 if is_dtype_equal(self.dtype, dtype): 

1011 # Ensure that self.astype(self.dtype) is self 

1012 return self.copy() if copy else self 

1013 

1014 values = self._data 

1015 if isinstance(values, ExtensionArray): 

1016 with rewrite_exception(type(values).__name__, type(self).__name__): 

1017 new_values = values.astype(dtype, copy=copy) 

1018 

1019 elif isinstance(dtype, ExtensionDtype): 

1020 cls = dtype.construct_array_type() 

1021 # Note: for RangeIndex and CategoricalDtype self vs self._values 

1022 # behaves differently here. 

1023 new_values = cls._from_sequence(self, dtype=dtype, copy=copy) 

1024 

1025 else: 

1026 # GH#13149 specifically use astype_array instead of astype 

1027 new_values = astype_array(values, dtype=dtype, copy=copy) 

1028 

1029 # pass copy=False because any copying will be done in the astype above 

1030 result = Index(new_values, name=self.name, dtype=new_values.dtype, copy=False) 

1031 if ( 

1032 not copy 

1033 and self._references is not None 

1034 and astype_is_view(self.dtype, dtype) 

1035 ): 

1036 result._references = self._references 

1037 result._references.add_index_reference(result) 

1038 return result 

1039 

1040 _index_shared_docs[ 

1041 "take" 

1042 ] = """ 

1043 Return a new %(klass)s of the values selected by the indices. 

1044 

1045 For internal compatibility with numpy arrays. 

1046 

1047 Parameters 

1048 ---------- 

1049 indices : array-like 

1050 Indices to be taken. 

1051 axis : int, optional 

1052 The axis over which to select values, always 0. 

1053 allow_fill : bool, default True 

1054 fill_value : scalar, default None 

1055 If allow_fill=True and fill_value is not None, indices specified by 

1056 -1 are regarded as NA. If Index doesn't hold NA, raise ValueError. 

1057 

1058 Returns 

1059 ------- 

1060 Index 

1061 An index formed of elements at the given indices. Will be the same 

1062 type as self, except for RangeIndex. 

1063 

1064 See Also 

1065 -------- 

1066 numpy.ndarray.take: Return an array formed from the 

1067 elements of a at the given indices. 

1068 """ 

1069 

1070 @Appender(_index_shared_docs["take"] % _index_doc_kwargs) 

1071 def take( 

1072 self, 

1073 indices, 

1074 axis: Axis = 0, 

1075 allow_fill: bool = True, 

1076 fill_value=None, 

1077 **kwargs, 

1078 ): 

1079 if kwargs: 

1080 nv.validate_take((), kwargs) 

1081 if is_scalar(indices): 

1082 raise TypeError("Expected indices to be array-like") 

1083 indices = ensure_platform_int(indices) 

1084 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices) 

1085 

1086 # Note: we discard fill_value and use self._na_value, only relevant 

1087 # in the case where allow_fill is True and fill_value is not None 

1088 values = self._values 

1089 if isinstance(values, np.ndarray): 

1090 taken = algos.take( 

1091 values, indices, allow_fill=allow_fill, fill_value=self._na_value 

1092 ) 

1093 else: 

1094 # algos.take passes 'axis' keyword which not all EAs accept 

1095 taken = values.take( 

1096 indices, allow_fill=allow_fill, fill_value=self._na_value 

1097 ) 

1098 # _constructor so RangeIndex-> Index with an int64 dtype 

1099 return self._constructor._simple_new(taken, name=self.name) 

1100 

1101 @final 

1102 def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool: 

1103 """ 

1104 We only use pandas-style take when allow_fill is True _and_ 

1105 fill_value is not None. 

1106 """ 

1107 if allow_fill and fill_value is not None: 

1108 # only fill if we are passing a non-None fill_value 

1109 if self._can_hold_na: 

1110 if (indices < -1).any(): 

1111 raise ValueError( 

1112 "When allow_fill=True and fill_value is not None, " 

1113 "all indices must be >= -1" 

1114 ) 

1115 else: 

1116 cls_name = type(self).__name__ 

1117 raise ValueError( 

1118 f"Unable to fill values because {cls_name} cannot contain NA" 

1119 ) 

1120 else: 

1121 allow_fill = False 

1122 return allow_fill 

1123 

1124 _index_shared_docs[ 

1125 "repeat" 

1126 ] = """ 

1127 Repeat elements of a %(klass)s. 

1128 

1129 Returns a new %(klass)s where each element of the current %(klass)s 

1130 is repeated consecutively a given number of times. 

1131 

1132 Parameters 

1133 ---------- 

1134 repeats : int or array of ints 

1135 The number of repetitions for each element. This should be a 

1136 non-negative integer. Repeating 0 times will return an empty 

1137 %(klass)s. 

1138 axis : None 

1139 Must be ``None``. Has no effect but is accepted for compatibility 

1140 with numpy. 

1141 

1142 Returns 

1143 ------- 

1144 %(klass)s 

1145 Newly created %(klass)s with repeated elements. 

1146 

1147 See Also 

1148 -------- 

1149 Series.repeat : Equivalent function for Series. 

1150 numpy.repeat : Similar method for :class:`numpy.ndarray`. 

1151 

1152 Examples 

1153 -------- 

1154 >>> idx = pd.Index(['a', 'b', 'c']) 

1155 >>> idx 

1156 Index(['a', 'b', 'c'], dtype='object') 

1157 >>> idx.repeat(2) 

1158 Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object') 

1159 >>> idx.repeat([1, 2, 3]) 

1160 Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object') 

1161 """ 

1162 

1163 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs) 

1164 def repeat(self, repeats, axis=None): 

1165 repeats = ensure_platform_int(repeats) 

1166 nv.validate_repeat((), {"axis": axis}) 

1167 res_values = self._values.repeat(repeats) 

1168 

1169 # _constructor so RangeIndex-> Index with an int64 dtype 

1170 return self._constructor._simple_new(res_values, name=self.name) 

1171 

1172 # -------------------------------------------------------------------- 

1173 # Copying Methods 

1174 

1175 def copy( 

1176 self: _IndexT, 

1177 name: Hashable | None = None, 

1178 deep: bool = False, 

1179 ) -> _IndexT: 

1180 """ 

1181 Make a copy of this object. 

1182 

1183 Name is set on the new object. 

1184 

1185 Parameters 

1186 ---------- 

1187 name : Label, optional 

1188 Set name for new object. 

1189 deep : bool, default False 

1190 

1191 Returns 

1192 ------- 

1193 Index 

1194 Index refer to new object which is a copy of this object. 

1195 

1196 Notes 

1197 ----- 

1198 In most cases, there should be no functional difference from using 

1199 ``deep``, but if ``deep`` is passed it will attempt to deepcopy. 

1200 """ 

1201 

1202 name = self._validate_names(name=name, deep=deep)[0] 

1203 if deep: 

1204 new_data = self._data.copy() 

1205 new_index = type(self)._simple_new(new_data, name=name) 

1206 else: 

1207 new_index = self._rename(name=name) 

1208 return new_index 

1209 

1210 @final 

1211 def __copy__(self: _IndexT, **kwargs) -> _IndexT: 

1212 return self.copy(**kwargs) 

1213 

1214 @final 

1215 def __deepcopy__(self: _IndexT, memo=None) -> _IndexT: 

1216 """ 

1217 Parameters 

1218 ---------- 

1219 memo, default None 

1220 Standard signature. Unused 

1221 """ 

1222 return self.copy(deep=True) 

1223 

1224 # -------------------------------------------------------------------- 

1225 # Rendering Methods 

1226 

1227 @final 

1228 def __repr__(self) -> str_t: 

1229 """ 

1230 Return a string representation for this object. 

1231 """ 

1232 klass_name = type(self).__name__ 

1233 data = self._format_data() 

1234 attrs = self._format_attrs() 

1235 space = self._format_space() 

1236 attrs_str = [f"{k}={v}" for k, v in attrs] 

1237 prepr = f",{space}".join(attrs_str) 

1238 

1239 # no data provided, just attributes 

1240 if data is None: 

1241 data = "" 

1242 

1243 return f"{klass_name}({data}{prepr})" 

1244 

1245 def _format_space(self) -> str_t: 

1246 # using space here controls if the attributes 

1247 # are line separated or not (the default) 

1248 

1249 # max_seq_items = get_option('display.max_seq_items') 

1250 # if len(self) > max_seq_items: 

1251 # space = "\n%s" % (' ' * (len(klass) + 1)) 

1252 return " " 

1253 

1254 @property 

1255 def _formatter_func(self): 

1256 """ 

1257 Return the formatter function. 

1258 """ 

1259 return default_pprint 

1260 

1261 def _format_data(self, name=None) -> str_t: 

1262 """ 

1263 Return the formatted data as a unicode string. 

1264 """ 

1265 # do we want to justify (only do so for non-objects) 

1266 is_justify = True 

1267 

1268 if self.inferred_type == "string": 

1269 is_justify = False 

1270 elif self.inferred_type == "categorical": 

1271 self = cast("CategoricalIndex", self) 

1272 if is_object_dtype(self.categories): 

1273 is_justify = False 

1274 

1275 return format_object_summary( 

1276 self, 

1277 self._formatter_func, 

1278 is_justify=is_justify, 

1279 name=name, 

1280 line_break_each_value=self._is_multi, 

1281 ) 

1282 

1283 def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]: 

1284 """ 

1285 Return a list of tuples of the (attr,formatted_value). 

1286 """ 

1287 attrs: list[tuple[str_t, str_t | int | bool | None]] = [] 

1288 

1289 if not self._is_multi: 

1290 attrs.append(("dtype", f"'{self.dtype}'")) 

1291 

1292 if self.name is not None: 

1293 attrs.append(("name", default_pprint(self.name))) 

1294 elif self._is_multi and any(x is not None for x in self.names): 

1295 attrs.append(("names", default_pprint(self.names))) 

1296 

1297 max_seq_items = get_option("display.max_seq_items") or len(self) 

1298 if len(self) > max_seq_items: 

1299 attrs.append(("length", len(self))) 

1300 return attrs 

1301 

1302 @final 

1303 def _get_level_names(self) -> Hashable | Sequence[Hashable]: 

1304 """ 

1305 Return a name or list of names with None replaced by the level number. 

1306 """ 

1307 if self._is_multi: 

1308 return [ 

1309 level if name is None else name for level, name in enumerate(self.names) 

1310 ] 

1311 else: 

1312 return 0 if self.name is None else self.name 

1313 

1314 @final 

1315 def _mpl_repr(self) -> np.ndarray: 

1316 # how to represent ourselves to matplotlib 

1317 if isinstance(self.dtype, np.dtype) and self.dtype.kind != "M": 

1318 return cast(np.ndarray, self.values) 

1319 return self.astype(object, copy=False)._values 

1320 

1321 def format( 

1322 self, 

1323 name: bool = False, 

1324 formatter: Callable | None = None, 

1325 na_rep: str_t = "NaN", 

1326 ) -> list[str_t]: 

1327 """ 

1328 Render a string representation of the Index. 

1329 """ 

1330 header = [] 

1331 if name: 

1332 header.append( 

1333 pprint_thing(self.name, escape_chars=("\t", "\r", "\n")) 

1334 if self.name is not None 

1335 else "" 

1336 ) 

1337 

1338 if formatter is not None: 

1339 return header + list(self.map(formatter)) 

1340 

1341 return self._format_with_header(header, na_rep=na_rep) 

1342 

1343 def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t]: 

1344 from pandas.io.formats.format import format_array 

1345 

1346 values = self._values 

1347 

1348 if is_object_dtype(values.dtype): 

1349 values = cast(np.ndarray, values) 

1350 values = lib.maybe_convert_objects(values, safe=True) 

1351 

1352 result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values] 

1353 

1354 # could have nans 

1355 mask = is_float_nan(values) 

1356 if mask.any(): 

1357 result_arr = np.array(result) 

1358 result_arr[mask] = na_rep 

1359 result = result_arr.tolist() 

1360 else: 

1361 result = trim_front(format_array(values, None, justify="left")) 

1362 return header + result 

1363 

1364 def _format_native_types( 

1365 self, 

1366 *, 

1367 na_rep: str_t = "", 

1368 decimal: str_t = ".", 

1369 float_format=None, 

1370 date_format=None, 

1371 quoting=None, 

1372 ) -> npt.NDArray[np.object_]: 

1373 """ 

1374 Actually format specific types of the index. 

1375 """ 

1376 from pandas.io.formats.format import FloatArrayFormatter 

1377 

1378 if is_float_dtype(self.dtype) and not is_extension_array_dtype(self.dtype): 

1379 formatter = FloatArrayFormatter( 

1380 self._values, 

1381 na_rep=na_rep, 

1382 float_format=float_format, 

1383 decimal=decimal, 

1384 quoting=quoting, 

1385 fixed_width=False, 

1386 ) 

1387 return formatter.get_result_as_array() 

1388 

1389 mask = isna(self) 

1390 if not is_object_dtype(self) and not quoting: 

1391 values = np.asarray(self).astype(str) 

1392 else: 

1393 values = np.array(self, dtype=object, copy=True) 

1394 

1395 values[mask] = na_rep 

1396 return values 

1397 

1398 def _summary(self, name=None) -> str_t: 

1399 """ 

1400 Return a summarized representation. 

1401 

1402 Parameters 

1403 ---------- 

1404 name : str 

1405 name to use in the summary representation 

1406 

1407 Returns 

1408 ------- 

1409 String with a summarized representation of the index 

1410 """ 

1411 if len(self) > 0: 

1412 head = self[0] 

1413 if hasattr(head, "format") and not isinstance(head, str): 

1414 head = head.format() 

1415 elif needs_i8_conversion(self.dtype): 

1416 # e.g. Timedelta, display as values, not quoted 

1417 head = self._formatter_func(head).replace("'", "") 

1418 tail = self[-1] 

1419 if hasattr(tail, "format") and not isinstance(tail, str): 

1420 tail = tail.format() 

1421 elif needs_i8_conversion(self.dtype): 

1422 # e.g. Timedelta, display as values, not quoted 

1423 tail = self._formatter_func(tail).replace("'", "") 

1424 

1425 index_summary = f", {head} to {tail}" 

1426 else: 

1427 index_summary = "" 

1428 

1429 if name is None: 

1430 name = type(self).__name__ 

1431 return f"{name}: {len(self)} entries{index_summary}" 

1432 

1433 # -------------------------------------------------------------------- 

1434 # Conversion Methods 

1435 

1436 def to_flat_index(self: _IndexT) -> _IndexT: 

1437 """ 

1438 Identity method. 

1439 

1440 This is implemented for compatibility with subclass implementations 

1441 when chaining. 

1442 

1443 Returns 

1444 ------- 

1445 pd.Index 

1446 Caller. 

1447 

1448 See Also 

1449 -------- 

1450 MultiIndex.to_flat_index : Subclass implementation. 

1451 """ 

1452 return self 

1453 

1454 @final 

1455 def to_series(self, index=None, name: Hashable = None) -> Series: 

1456 """ 

1457 Create a Series with both index and values equal to the index keys. 

1458 

1459 Useful with map for returning an indexer based on an index. 

1460 

1461 Parameters 

1462 ---------- 

1463 index : Index, optional 

1464 Index of resulting Series. If None, defaults to original index. 

1465 name : str, optional 

1466 Name of resulting Series. If None, defaults to name of original 

1467 index. 

1468 

1469 Returns 

1470 ------- 

1471 Series 

1472 The dtype will be based on the type of the Index values. 

1473 

1474 See Also 

1475 -------- 

1476 Index.to_frame : Convert an Index to a DataFrame. 

1477 Series.to_frame : Convert Series to DataFrame. 

1478 

1479 Examples 

1480 -------- 

1481 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal') 

1482 

1483 By default, the original Index and original name is reused. 

1484 

1485 >>> idx.to_series() 

1486 animal 

1487 Ant Ant 

1488 Bear Bear 

1489 Cow Cow 

1490 Name: animal, dtype: object 

1491 

1492 To enforce a new Index, specify new labels to ``index``: 

1493 

1494 >>> idx.to_series(index=[0, 1, 2]) 

1495 0 Ant 

1496 1 Bear 

1497 2 Cow 

1498 Name: animal, dtype: object 

1499 

1500 To override the name of the resulting column, specify `name`: 

1501 

1502 >>> idx.to_series(name='zoo') 

1503 animal 

1504 Ant Ant 

1505 Bear Bear 

1506 Cow Cow 

1507 Name: zoo, dtype: object 

1508 """ 

1509 from pandas import Series 

1510 

1511 if index is None: 

1512 index = self._view() 

1513 if name is None: 

1514 name = self.name 

1515 

1516 return Series(self._values.copy(), index=index, name=name) 

1517 

1518 def to_frame( 

1519 self, index: bool = True, name: Hashable = lib.no_default 

1520 ) -> DataFrame: 

1521 """ 

1522 Create a DataFrame with a column containing the Index. 

1523 

1524 Parameters 

1525 ---------- 

1526 index : bool, default True 

1527 Set the index of the returned DataFrame as the original Index. 

1528 

1529 name : object, defaults to index.name 

1530 The passed name should substitute for the index name (if it has 

1531 one). 

1532 

1533 Returns 

1534 ------- 

1535 DataFrame 

1536 DataFrame containing the original Index data. 

1537 

1538 See Also 

1539 -------- 

1540 Index.to_series : Convert an Index to a Series. 

1541 Series.to_frame : Convert Series to DataFrame. 

1542 

1543 Examples 

1544 -------- 

1545 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal') 

1546 >>> idx.to_frame() 

1547 animal 

1548 animal 

1549 Ant Ant 

1550 Bear Bear 

1551 Cow Cow 

1552 

1553 By default, the original Index is reused. To enforce a new Index: 

1554 

1555 >>> idx.to_frame(index=False) 

1556 animal 

1557 0 Ant 

1558 1 Bear 

1559 2 Cow 

1560 

1561 To override the name of the resulting column, specify `name`: 

1562 

1563 >>> idx.to_frame(index=False, name='zoo') 

1564 zoo 

1565 0 Ant 

1566 1 Bear 

1567 2 Cow 

1568 """ 

1569 from pandas import DataFrame 

1570 

1571 if name is lib.no_default: 

1572 name = self._get_level_names() 

1573 result = DataFrame({name: self._values.copy()}) 

1574 

1575 if index: 

1576 result.index = self 

1577 return result 

1578 

1579 # -------------------------------------------------------------------- 

1580 # Name-Centric Methods 

1581 

1582 @property 

1583 def name(self) -> Hashable: 

1584 """ 

1585 Return Index or MultiIndex name. 

1586 """ 

1587 return self._name 

1588 

1589 @name.setter 

1590 def name(self, value: Hashable) -> None: 

1591 if self._no_setting_name: 

1592 # Used in MultiIndex.levels to avoid silently ignoring name updates. 

1593 raise RuntimeError( 

1594 "Cannot set name on a level of a MultiIndex. Use " 

1595 "'MultiIndex.set_names' instead." 

1596 ) 

1597 maybe_extract_name(value, None, type(self)) 

1598 self._name = value 

1599 

1600 @final 

1601 def _validate_names( 

1602 self, name=None, names=None, deep: bool = False 

1603 ) -> list[Hashable]: 

1604 """ 

1605 Handles the quirks of having a singular 'name' parameter for general 

1606 Index and plural 'names' parameter for MultiIndex. 

1607 """ 

1608 from copy import deepcopy 

1609 

1610 if names is not None and name is not None: 

1611 raise TypeError("Can only provide one of `names` and `name`") 

1612 if names is None and name is None: 

1613 new_names = deepcopy(self.names) if deep else self.names 

1614 elif names is not None: 

1615 if not is_list_like(names): 

1616 raise TypeError("Must pass list-like as `names`.") 

1617 new_names = names 

1618 elif not is_list_like(name): 

1619 new_names = [name] 

1620 else: 

1621 new_names = name 

1622 

1623 if len(new_names) != len(self.names): 

1624 raise ValueError( 

1625 f"Length of new names must be {len(self.names)}, got {len(new_names)}" 

1626 ) 

1627 

1628 # All items in 'new_names' need to be hashable 

1629 validate_all_hashable(*new_names, error_name=f"{type(self).__name__}.name") 

1630 

1631 return new_names 

1632 

1633 def _get_default_index_names( 

1634 self, names: Hashable | Sequence[Hashable] | None = None, default=None 

1635 ) -> list[Hashable]: 

1636 """ 

1637 Get names of index. 

1638 

1639 Parameters 

1640 ---------- 

1641 names : int, str or 1-dimensional list, default None 

1642 Index names to set. 

1643 default : str 

1644 Default name of index. 

1645 

1646 Raises 

1647 ------ 

1648 TypeError 

1649 if names not str or list-like 

1650 """ 

1651 from pandas.core.indexes.multi import MultiIndex 

1652 

1653 if names is not None: 

1654 if isinstance(names, (int, str)): 

1655 names = [names] 

1656 

1657 if not isinstance(names, list) and names is not None: 

1658 raise ValueError("Index names must be str or 1-dimensional list") 

1659 

1660 if not names: 

1661 if isinstance(self, MultiIndex): 

1662 names = com.fill_missing_names(self.names) 

1663 else: 

1664 names = [default] if self.name is None else [self.name] 

1665 

1666 return names 

1667 

1668 def _get_names(self) -> FrozenList: 

1669 return FrozenList((self.name,)) 

1670 

1671 def _set_names(self, values, *, level=None) -> None: 

1672 """ 

1673 Set new names on index. Each name has to be a hashable type. 

1674 

1675 Parameters 

1676 ---------- 

1677 values : str or sequence 

1678 name(s) to set 

1679 level : int, level name, or sequence of int/level names (default None) 

1680 If the index is a MultiIndex (hierarchical), level(s) to set (None 

1681 for all levels). Otherwise level must be None 

1682 

1683 Raises 

1684 ------ 

1685 TypeError if each name is not hashable. 

1686 """ 

1687 if not is_list_like(values): 

1688 raise ValueError("Names must be a list-like") 

1689 if len(values) != 1: 

1690 raise ValueError(f"Length of new names must be 1, got {len(values)}") 

1691 

1692 # GH 20527 

1693 # All items in 'name' need to be hashable: 

1694 validate_all_hashable(*values, error_name=f"{type(self).__name__}.name") 

1695 

1696 self._name = values[0] 

1697 

1698 names = property(fset=_set_names, fget=_get_names) 

1699 

1700 @overload 

1701 def set_names( 

1702 self: _IndexT, names, *, level=..., inplace: Literal[False] = ... 

1703 ) -> _IndexT: 

1704 ... 

1705 

1706 @overload 

1707 def set_names(self, names, *, level=..., inplace: Literal[True]) -> None: 

1708 ... 

1709 

1710 @overload 

1711 def set_names( 

1712 self: _IndexT, names, *, level=..., inplace: bool = ... 

1713 ) -> _IndexT | None: 

1714 ... 

1715 

1716 def set_names( 

1717 self: _IndexT, names, *, level=None, inplace: bool = False 

1718 ) -> _IndexT | None: 

1719 """ 

1720 Set Index or MultiIndex name. 

1721 

1722 Able to set new names partially and by level. 

1723 

1724 Parameters 

1725 ---------- 

1726 

1727 names : label or list of label or dict-like for MultiIndex 

1728 Name(s) to set. 

1729 

1730 .. versionchanged:: 1.3.0 

1731 

1732 level : int, label or list of int or label, optional 

1733 If the index is a MultiIndex and names is not dict-like, level(s) to set 

1734 (None for all levels). Otherwise level must be None. 

1735 

1736 .. versionchanged:: 1.3.0 

1737 

1738 inplace : bool, default False 

1739 Modifies the object directly, instead of creating a new Index or 

1740 MultiIndex. 

1741 

1742 Returns 

1743 ------- 

1744 Index or None 

1745 The same type as the caller or None if ``inplace=True``. 

1746 

1747 See Also 

1748 -------- 

1749 Index.rename : Able to set new names without level. 

1750 

1751 Examples 

1752 -------- 

1753 >>> idx = pd.Index([1, 2, 3, 4]) 

1754 >>> idx 

1755 Index([1, 2, 3, 4], dtype='int64') 

1756 >>> idx.set_names('quarter') 

1757 Index([1, 2, 3, 4], dtype='int64', name='quarter') 

1758 

1759 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], 

1760 ... [2018, 2019]]) 

1761 >>> idx 

1762 MultiIndex([('python', 2018), 

1763 ('python', 2019), 

1764 ( 'cobra', 2018), 

1765 ( 'cobra', 2019)], 

1766 ) 

1767 >>> idx = idx.set_names(['kind', 'year']) 

1768 >>> idx.set_names('species', level=0) 

1769 MultiIndex([('python', 2018), 

1770 ('python', 2019), 

1771 ( 'cobra', 2018), 

1772 ( 'cobra', 2019)], 

1773 names=['species', 'year']) 

1774 

1775 When renaming levels with a dict, levels can not be passed. 

1776 

1777 >>> idx.set_names({'kind': 'snake'}) 

1778 MultiIndex([('python', 2018), 

1779 ('python', 2019), 

1780 ( 'cobra', 2018), 

1781 ( 'cobra', 2019)], 

1782 names=['snake', 'year']) 

1783 """ 

1784 if level is not None and not isinstance(self, ABCMultiIndex): 

1785 raise ValueError("Level must be None for non-MultiIndex") 

1786 

1787 if level is not None and not is_list_like(level) and is_list_like(names): 

1788 raise TypeError("Names must be a string when a single level is provided.") 

1789 

1790 if not is_list_like(names) and level is None and self.nlevels > 1: 

1791 raise TypeError("Must pass list-like as `names`.") 

1792 

1793 if is_dict_like(names) and not isinstance(self, ABCMultiIndex): 

1794 raise TypeError("Can only pass dict-like as `names` for MultiIndex.") 

1795 

1796 if is_dict_like(names) and level is not None: 

1797 raise TypeError("Can not pass level for dictlike `names`.") 

1798 

1799 if isinstance(self, ABCMultiIndex) and is_dict_like(names) and level is None: 

1800 # Transform dict to list of new names and corresponding levels 

1801 level, names_adjusted = [], [] 

1802 for i, name in enumerate(self.names): 

1803 if name in names.keys(): 

1804 level.append(i) 

1805 names_adjusted.append(names[name]) 

1806 names = names_adjusted 

1807 

1808 if not is_list_like(names): 

1809 names = [names] 

1810 if level is not None and not is_list_like(level): 

1811 level = [level] 

1812 

1813 if inplace: 

1814 idx = self 

1815 else: 

1816 idx = self._view() 

1817 

1818 idx._set_names(names, level=level) 

1819 if not inplace: 

1820 return idx 

1821 return None 

1822 

1823 def rename(self, name, inplace: bool = False): 

1824 """ 

1825 Alter Index or MultiIndex name. 

1826 

1827 Able to set new names without level. Defaults to returning new index. 

1828 Length of names must match number of levels in MultiIndex. 

1829 

1830 Parameters 

1831 ---------- 

1832 name : label or list of labels 

1833 Name(s) to set. 

1834 inplace : bool, default False 

1835 Modifies the object directly, instead of creating a new Index or 

1836 MultiIndex. 

1837 

1838 Returns 

1839 ------- 

1840 Index or None 

1841 The same type as the caller or None if ``inplace=True``. 

1842 

1843 See Also 

1844 -------- 

1845 Index.set_names : Able to set new names partially and by level. 

1846 

1847 Examples 

1848 -------- 

1849 >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score') 

1850 >>> idx.rename('grade') 

1851 Index(['A', 'C', 'A', 'B'], dtype='object', name='grade') 

1852 

1853 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'], 

1854 ... [2018, 2019]], 

1855 ... names=['kind', 'year']) 

1856 >>> idx 

1857 MultiIndex([('python', 2018), 

1858 ('python', 2019), 

1859 ( 'cobra', 2018), 

1860 ( 'cobra', 2019)], 

1861 names=['kind', 'year']) 

1862 >>> idx.rename(['species', 'year']) 

1863 MultiIndex([('python', 2018), 

1864 ('python', 2019), 

1865 ( 'cobra', 2018), 

1866 ( 'cobra', 2019)], 

1867 names=['species', 'year']) 

1868 >>> idx.rename('species') 

1869 Traceback (most recent call last): 

1870 TypeError: Must pass list-like as `names`. 

1871 """ 

1872 return self.set_names([name], inplace=inplace) 

1873 

1874 # -------------------------------------------------------------------- 

1875 # Level-Centric Methods 

1876 

1877 @property 

1878 def nlevels(self) -> int: 

1879 """ 

1880 Number of levels. 

1881 """ 

1882 return 1 

1883 

1884 def _sort_levels_monotonic(self: _IndexT) -> _IndexT: 

1885 """ 

1886 Compat with MultiIndex. 

1887 """ 

1888 return self 

1889 

1890 @final 

1891 def _validate_index_level(self, level) -> None: 

1892 """ 

1893 Validate index level. 

1894 

1895 For single-level Index getting level number is a no-op, but some 

1896 verification must be done like in MultiIndex. 

1897 

1898 """ 

1899 if isinstance(level, int): 

1900 if level < 0 and level != -1: 

1901 raise IndexError( 

1902 "Too many levels: Index has only 1 level, " 

1903 f"{level} is not a valid level number" 

1904 ) 

1905 if level > 0: 

1906 raise IndexError( 

1907 f"Too many levels: Index has only 1 level, not {level + 1}" 

1908 ) 

1909 elif level != self.name: 

1910 raise KeyError( 

1911 f"Requested level ({level}) does not match index name ({self.name})" 

1912 ) 

1913 

1914 def _get_level_number(self, level) -> int: 

1915 self._validate_index_level(level) 

1916 return 0 

1917 

1918 def sortlevel( 

1919 self, level=None, ascending: bool | list[bool] = True, sort_remaining=None 

1920 ): 

1921 """ 

1922 For internal compatibility with the Index API. 

1923 

1924 Sort the Index. This is for compat with MultiIndex 

1925 

1926 Parameters 

1927 ---------- 

1928 ascending : bool, default True 

1929 False to sort in descending order 

1930 

1931 level, sort_remaining are compat parameters 

1932 

1933 Returns 

1934 ------- 

1935 Index 

1936 """ 

1937 if not isinstance(ascending, (list, bool)): 

1938 raise TypeError( 

1939 "ascending must be a single bool value or" 

1940 "a list of bool values of length 1" 

1941 ) 

1942 

1943 if isinstance(ascending, list): 

1944 if len(ascending) != 1: 

1945 raise TypeError("ascending must be a list of bool values of length 1") 

1946 ascending = ascending[0] 

1947 

1948 if not isinstance(ascending, bool): 

1949 raise TypeError("ascending must be a bool value") 

1950 

1951 return self.sort_values(return_indexer=True, ascending=ascending) 

1952 

1953 def _get_level_values(self, level) -> Index: 

1954 """ 

1955 Return an Index of values for requested level. 

1956 

1957 This is primarily useful to get an individual level of values from a 

1958 MultiIndex, but is provided on Index as well for compatibility. 

1959 

1960 Parameters 

1961 ---------- 

1962 level : int or str 

1963 It is either the integer position or the name of the level. 

1964 

1965 Returns 

1966 ------- 

1967 Index 

1968 Calling object, as there is only one level in the Index. 

1969 

1970 See Also 

1971 -------- 

1972 MultiIndex.get_level_values : Get values for a level of a MultiIndex. 

1973 

1974 Notes 

1975 ----- 

1976 For Index, level should be 0, since there are no multiple levels. 

1977 

1978 Examples 

1979 -------- 

1980 >>> idx = pd.Index(list('abc')) 

1981 >>> idx 

1982 Index(['a', 'b', 'c'], dtype='object') 

1983 

1984 Get level values by supplying `level` as integer: 

1985 

1986 >>> idx.get_level_values(0) 

1987 Index(['a', 'b', 'c'], dtype='object') 

1988 """ 

1989 self._validate_index_level(level) 

1990 return self 

1991 

1992 get_level_values = _get_level_values 

1993 

1994 @final 

1995 def droplevel(self, level: IndexLabel = 0): 

1996 """ 

1997 Return index with requested level(s) removed. 

1998 

1999 If resulting index has only 1 level left, the result will be 

2000 of Index type, not MultiIndex. The original index is not modified inplace. 

2001 

2002 Parameters 

2003 ---------- 

2004 level : int, str, or list-like, default 0 

2005 If a string is given, must be the name of a level 

2006 If list-like, elements must be names or indexes of levels. 

2007 

2008 Returns 

2009 ------- 

2010 Index or MultiIndex 

2011 

2012 Examples 

2013 -------- 

2014 >>> mi = pd.MultiIndex.from_arrays( 

2015 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z']) 

2016 >>> mi 

2017 MultiIndex([(1, 3, 5), 

2018 (2, 4, 6)], 

2019 names=['x', 'y', 'z']) 

2020 

2021 >>> mi.droplevel() 

2022 MultiIndex([(3, 5), 

2023 (4, 6)], 

2024 names=['y', 'z']) 

2025 

2026 >>> mi.droplevel(2) 

2027 MultiIndex([(1, 3), 

2028 (2, 4)], 

2029 names=['x', 'y']) 

2030 

2031 >>> mi.droplevel('z') 

2032 MultiIndex([(1, 3), 

2033 (2, 4)], 

2034 names=['x', 'y']) 

2035 

2036 >>> mi.droplevel(['x', 'y']) 

2037 Index([5, 6], dtype='int64', name='z') 

2038 """ 

2039 if not isinstance(level, (tuple, list)): 

2040 level = [level] 

2041 

2042 levnums = sorted(self._get_level_number(lev) for lev in level)[::-1] 

2043 

2044 return self._drop_level_numbers(levnums) 

2045 

2046 @final 

2047 def _drop_level_numbers(self, levnums: list[int]): 

2048 """ 

2049 Drop MultiIndex levels by level _number_, not name. 

2050 """ 

2051 

2052 if not levnums and not isinstance(self, ABCMultiIndex): 

2053 return self 

2054 if len(levnums) >= self.nlevels: 

2055 raise ValueError( 

2056 f"Cannot remove {len(levnums)} levels from an index with " 

2057 f"{self.nlevels} levels: at least one level must be left." 

2058 ) 

2059 # The two checks above guarantee that here self is a MultiIndex 

2060 self = cast("MultiIndex", self) 

2061 

2062 new_levels = list(self.levels) 

2063 new_codes = list(self.codes) 

2064 new_names = list(self.names) 

2065 

2066 for i in levnums: 

2067 new_levels.pop(i) 

2068 new_codes.pop(i) 

2069 new_names.pop(i) 

2070 

2071 if len(new_levels) == 1: 

2072 lev = new_levels[0] 

2073 

2074 if len(lev) == 0: 

2075 # If lev is empty, lev.take will fail GH#42055 

2076 if len(new_codes[0]) == 0: 

2077 # GH#45230 preserve RangeIndex here 

2078 # see test_reset_index_empty_rangeindex 

2079 result = lev[:0] 

2080 else: 

2081 res_values = algos.take(lev._values, new_codes[0], allow_fill=True) 

2082 # _constructor instead of type(lev) for RangeIndex compat GH#35230 

2083 result = lev._constructor._simple_new(res_values, name=new_names[0]) 

2084 else: 

2085 # set nan if needed 

2086 mask = new_codes[0] == -1 

2087 result = new_levels[0].take(new_codes[0]) 

2088 if mask.any(): 

2089 result = result.putmask(mask, np.nan) 

2090 

2091 result._name = new_names[0] 

2092 

2093 return result 

2094 else: 

2095 from pandas.core.indexes.multi import MultiIndex 

2096 

2097 return MultiIndex( 

2098 levels=new_levels, 

2099 codes=new_codes, 

2100 names=new_names, 

2101 verify_integrity=False, 

2102 ) 

2103 

2104 # -------------------------------------------------------------------- 

2105 # Introspection Methods 

2106 

2107 @cache_readonly 

2108 @final 

2109 def _can_hold_na(self) -> bool: 

2110 if isinstance(self.dtype, ExtensionDtype): 

2111 if isinstance(self.dtype, IntervalDtype): 

2112 # FIXME(GH#45720): this is inaccurate for integer-backed 

2113 # IntervalArray, but without it other.categories.take raises 

2114 # in IntervalArray._cmp_method 

2115 return True 

2116 return self.dtype._can_hold_na 

2117 if self.dtype.kind in ["i", "u", "b"]: 

2118 return False 

2119 return True 

2120 

2121 @property 

2122 def is_monotonic_increasing(self) -> bool: 

2123 """ 

2124 Return a boolean if the values are equal or increasing. 

2125 

2126 Returns 

2127 ------- 

2128 bool 

2129 

2130 See Also 

2131 -------- 

2132 Index.is_monotonic_decreasing : Check if the values are equal or decreasing. 

2133 

2134 Examples 

2135 -------- 

2136 >>> pd.Index([1, 2, 3]).is_monotonic_increasing 

2137 True 

2138 >>> pd.Index([1, 2, 2]).is_monotonic_increasing 

2139 True 

2140 >>> pd.Index([1, 3, 2]).is_monotonic_increasing 

2141 False 

2142 """ 

2143 return self._engine.is_monotonic_increasing 

2144 

2145 @property 

2146 def is_monotonic_decreasing(self) -> bool: 

2147 """ 

2148 Return a boolean if the values are equal or decreasing. 

2149 

2150 Returns 

2151 ------- 

2152 bool 

2153 

2154 See Also 

2155 -------- 

2156 Index.is_monotonic_increasing : Check if the values are equal or increasing. 

2157 

2158 Examples 

2159 -------- 

2160 >>> pd.Index([3, 2, 1]).is_monotonic_decreasing 

2161 True 

2162 >>> pd.Index([3, 2, 2]).is_monotonic_decreasing 

2163 True 

2164 >>> pd.Index([3, 1, 2]).is_monotonic_decreasing 

2165 False 

2166 """ 

2167 return self._engine.is_monotonic_decreasing 

2168 

2169 @final 

2170 @property 

2171 def _is_strictly_monotonic_increasing(self) -> bool: 

2172 """ 

2173 Return if the index is strictly monotonic increasing 

2174 (only increasing) values. 

2175 

2176 Examples 

2177 -------- 

2178 >>> Index([1, 2, 3])._is_strictly_monotonic_increasing 

2179 True 

2180 >>> Index([1, 2, 2])._is_strictly_monotonic_increasing 

2181 False 

2182 >>> Index([1, 3, 2])._is_strictly_monotonic_increasing 

2183 False 

2184 """ 

2185 return self.is_unique and self.is_monotonic_increasing 

2186 

2187 @final 

2188 @property 

2189 def _is_strictly_monotonic_decreasing(self) -> bool: 

2190 """ 

2191 Return if the index is strictly monotonic decreasing 

2192 (only decreasing) values. 

2193 

2194 Examples 

2195 -------- 

2196 >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing 

2197 True 

2198 >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing 

2199 False 

2200 >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing 

2201 False 

2202 """ 

2203 return self.is_unique and self.is_monotonic_decreasing 

2204 

2205 @cache_readonly 

2206 def is_unique(self) -> bool: 

2207 """ 

2208 Return if the index has unique values. 

2209 

2210 Returns 

2211 ------- 

2212 bool 

2213 

2214 See Also 

2215 -------- 

2216 Index.has_duplicates : Inverse method that checks if it has duplicate values. 

2217 

2218 Examples 

2219 -------- 

2220 >>> idx = pd.Index([1, 5, 7, 7]) 

2221 >>> idx.is_unique 

2222 False 

2223 

2224 >>> idx = pd.Index([1, 5, 7]) 

2225 >>> idx.is_unique 

2226 True 

2227 

2228 >>> idx = pd.Index(["Watermelon", "Orange", "Apple", 

2229 ... "Watermelon"]).astype("category") 

2230 >>> idx.is_unique 

2231 False 

2232 

2233 >>> idx = pd.Index(["Orange", "Apple", 

2234 ... "Watermelon"]).astype("category") 

2235 >>> idx.is_unique 

2236 True 

2237 """ 

2238 return self._engine.is_unique 

2239 

2240 @final 

2241 @property 

2242 def has_duplicates(self) -> bool: 

2243 """ 

2244 Check if the Index has duplicate values. 

2245 

2246 Returns 

2247 ------- 

2248 bool 

2249 Whether or not the Index has duplicate values. 

2250 

2251 See Also 

2252 -------- 

2253 Index.is_unique : Inverse method that checks if it has unique values. 

2254 

2255 Examples 

2256 -------- 

2257 >>> idx = pd.Index([1, 5, 7, 7]) 

2258 >>> idx.has_duplicates 

2259 True 

2260 

2261 >>> idx = pd.Index([1, 5, 7]) 

2262 >>> idx.has_duplicates 

2263 False 

2264 

2265 >>> idx = pd.Index(["Watermelon", "Orange", "Apple", 

2266 ... "Watermelon"]).astype("category") 

2267 >>> idx.has_duplicates 

2268 True 

2269 

2270 >>> idx = pd.Index(["Orange", "Apple", 

2271 ... "Watermelon"]).astype("category") 

2272 >>> idx.has_duplicates 

2273 False 

2274 """ 

2275 return not self.is_unique 

2276 

2277 @final 

2278 def is_boolean(self) -> bool: 

2279 """ 

2280 Check if the Index only consists of booleans. 

2281 

2282 .. deprecated:: 2.0.0 

2283 Use `pandas.api.types.is_bool_dtype` instead. 

2284 

2285 Returns 

2286 ------- 

2287 bool 

2288 Whether or not the Index only consists of booleans. 

2289 

2290 See Also 

2291 -------- 

2292 is_integer : Check if the Index only consists of integers (deprecated). 

2293 is_floating : Check if the Index is a floating type (deprecated). 

2294 is_numeric : Check if the Index only consists of numeric data (deprecated). 

2295 is_object : Check if the Index is of the object dtype (deprecated). 

2296 is_categorical : Check if the Index holds categorical data. 

2297 is_interval : Check if the Index holds Interval objects (deprecated). 

2298 

2299 Examples 

2300 -------- 

2301 >>> idx = pd.Index([True, False, True]) 

2302 >>> idx.is_boolean() # doctest: +SKIP 

2303 True 

2304 

2305 >>> idx = pd.Index(["True", "False", "True"]) 

2306 >>> idx.is_boolean() # doctest: +SKIP 

2307 False 

2308 

2309 >>> idx = pd.Index([True, False, "True"]) 

2310 >>> idx.is_boolean() # doctest: +SKIP 

2311 False 

2312 """ 

2313 warnings.warn( 

2314 f"{type(self).__name__}.is_boolean is deprecated. " 

2315 "Use pandas.api.types.is_bool_type instead.", 

2316 FutureWarning, 

2317 stacklevel=find_stack_level(), 

2318 ) 

2319 return self.inferred_type in ["boolean"] 

2320 

2321 @final 

2322 def is_integer(self) -> bool: 

2323 """ 

2324 Check if the Index only consists of integers. 

2325 

2326 .. deprecated:: 2.0.0 

2327 Use `pandas.api.types.is_integer_dtype` instead. 

2328 

2329 Returns 

2330 ------- 

2331 bool 

2332 Whether or not the Index only consists of integers. 

2333 

2334 See Also 

2335 -------- 

2336 is_boolean : Check if the Index only consists of booleans (deprecated). 

2337 is_floating : Check if the Index is a floating type (deprecated). 

2338 is_numeric : Check if the Index only consists of numeric data (deprecated). 

2339 is_object : Check if the Index is of the object dtype. (deprecated). 

2340 is_categorical : Check if the Index holds categorical data (deprecated). 

2341 is_interval : Check if the Index holds Interval objects (deprecated). 

2342 

2343 Examples 

2344 -------- 

2345 >>> idx = pd.Index([1, 2, 3, 4]) 

2346 >>> idx.is_integer() # doctest: +SKIP 

2347 True 

2348 

2349 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) 

2350 >>> idx.is_integer() # doctest: +SKIP 

2351 False 

2352 

2353 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"]) 

2354 >>> idx.is_integer() # doctest: +SKIP 

2355 False 

2356 """ 

2357 warnings.warn( 

2358 f"{type(self).__name__}.is_integer is deprecated. " 

2359 "Use pandas.api.types.is_integer_dtype instead.", 

2360 FutureWarning, 

2361 stacklevel=find_stack_level(), 

2362 ) 

2363 return self.inferred_type in ["integer"] 

2364 

2365 @final 

2366 def is_floating(self) -> bool: 

2367 """ 

2368 Check if the Index is a floating type. 

2369 

2370 .. deprecated:: 2.0.0 

2371 Use `pandas.api.types.is_float_dtype` instead 

2372 

2373 The Index may consist of only floats, NaNs, or a mix of floats, 

2374 integers, or NaNs. 

2375 

2376 Returns 

2377 ------- 

2378 bool 

2379 Whether or not the Index only consists of only consists of floats, NaNs, or 

2380 a mix of floats, integers, or NaNs. 

2381 

2382 See Also 

2383 -------- 

2384 is_boolean : Check if the Index only consists of booleans (deprecated). 

2385 is_integer : Check if the Index only consists of integers (deprecated). 

2386 is_numeric : Check if the Index only consists of numeric data (deprecated). 

2387 is_object : Check if the Index is of the object dtype. (deprecated). 

2388 is_categorical : Check if the Index holds categorical data (deprecated). 

2389 is_interval : Check if the Index holds Interval objects (deprecated). 

2390 

2391 Examples 

2392 -------- 

2393 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) 

2394 >>> idx.is_floating() # doctest: +SKIP 

2395 True 

2396 

2397 >>> idx = pd.Index([1.0, 2.0, np.nan, 4.0]) 

2398 >>> idx.is_floating() # doctest: +SKIP 

2399 True 

2400 

2401 >>> idx = pd.Index([1, 2, 3, 4, np.nan]) 

2402 >>> idx.is_floating() # doctest: +SKIP 

2403 True 

2404 

2405 >>> idx = pd.Index([1, 2, 3, 4]) 

2406 >>> idx.is_floating() # doctest: +SKIP 

2407 False 

2408 """ 

2409 warnings.warn( 

2410 f"{type(self).__name__}.is_floating is deprecated. " 

2411 "Use pandas.api.types.is_float_dtype instead.", 

2412 FutureWarning, 

2413 stacklevel=find_stack_level(), 

2414 ) 

2415 return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"] 

2416 

2417 @final 

2418 def is_numeric(self) -> bool: 

2419 """ 

2420 Check if the Index only consists of numeric data. 

2421 

2422 .. deprecated:: 2.0.0 

2423 Use `pandas.api.types.is_numeric_dtype` instead. 

2424 

2425 Returns 

2426 ------- 

2427 bool 

2428 Whether or not the Index only consists of numeric data. 

2429 

2430 See Also 

2431 -------- 

2432 is_boolean : Check if the Index only consists of booleans (deprecated). 

2433 is_integer : Check if the Index only consists of integers (deprecated). 

2434 is_floating : Check if the Index is a floating type (deprecated). 

2435 is_object : Check if the Index is of the object dtype. (deprecated). 

2436 is_categorical : Check if the Index holds categorical data (deprecated). 

2437 is_interval : Check if the Index holds Interval objects (deprecated). 

2438 

2439 Examples 

2440 -------- 

2441 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) 

2442 >>> idx.is_numeric() # doctest: +SKIP 

2443 True 

2444 

2445 >>> idx = pd.Index([1, 2, 3, 4.0]) 

2446 >>> idx.is_numeric() # doctest: +SKIP 

2447 True 

2448 

2449 >>> idx = pd.Index([1, 2, 3, 4]) 

2450 >>> idx.is_numeric() # doctest: +SKIP 

2451 True 

2452 

2453 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan]) 

2454 >>> idx.is_numeric() # doctest: +SKIP 

2455 True 

2456 

2457 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan, "Apple"]) 

2458 >>> idx.is_numeric() # doctest: +SKIP 

2459 False 

2460 """ 

2461 warnings.warn( 

2462 f"{type(self).__name__}.is_numeric is deprecated. " 

2463 "Use pandas.api.types.is_any_real_numeric_dtype instead", 

2464 FutureWarning, 

2465 stacklevel=find_stack_level(), 

2466 ) 

2467 return self.inferred_type in ["integer", "floating"] 

2468 

2469 @final 

2470 def is_object(self) -> bool: 

2471 """ 

2472 Check if the Index is of the object dtype. 

2473 

2474 .. deprecated:: 2.0.0 

2475 Use `pandas.api.types.is_object_dtype` instead. 

2476 

2477 Returns 

2478 ------- 

2479 bool 

2480 Whether or not the Index is of the object dtype. 

2481 

2482 See Also 

2483 -------- 

2484 is_boolean : Check if the Index only consists of booleans (deprecated). 

2485 is_integer : Check if the Index only consists of integers (deprecated). 

2486 is_floating : Check if the Index is a floating type (deprecated). 

2487 is_numeric : Check if the Index only consists of numeric data (deprecated). 

2488 is_categorical : Check if the Index holds categorical data (deprecated). 

2489 is_interval : Check if the Index holds Interval objects (deprecated). 

2490 

2491 Examples 

2492 -------- 

2493 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"]) 

2494 >>> idx.is_object() # doctest: +SKIP 

2495 True 

2496 

2497 >>> idx = pd.Index(["Apple", "Mango", 2.0]) 

2498 >>> idx.is_object() # doctest: +SKIP 

2499 True 

2500 

2501 >>> idx = pd.Index(["Watermelon", "Orange", "Apple", 

2502 ... "Watermelon"]).astype("category") 

2503 >>> idx.is_object() # doctest: +SKIP 

2504 False 

2505 

2506 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0]) 

2507 >>> idx.is_object() # doctest: +SKIP 

2508 False 

2509 """ 

2510 warnings.warn( 

2511 f"{type(self).__name__}.is_object is deprecated." 

2512 "Use pandas.api.types.is_object_dtype instead", 

2513 FutureWarning, 

2514 stacklevel=find_stack_level(), 

2515 ) 

2516 return is_object_dtype(self.dtype) 

2517 

2518 @final 

2519 def is_categorical(self) -> bool: 

2520 """ 

2521 Check if the Index holds categorical data. 

2522 

2523 .. deprecated:: 2.0.0 

2524 Use `isinstance(index.dtype, pd.CategoricalDtype)` instead. 

2525 

2526 Returns 

2527 ------- 

2528 bool 

2529 True if the Index is categorical. 

2530 

2531 See Also 

2532 -------- 

2533 CategoricalIndex : Index for categorical data. 

2534 is_boolean : Check if the Index only consists of booleans (deprecated). 

2535 is_integer : Check if the Index only consists of integers (deprecated). 

2536 is_floating : Check if the Index is a floating type (deprecated). 

2537 is_numeric : Check if the Index only consists of numeric data (deprecated). 

2538 is_object : Check if the Index is of the object dtype. (deprecated). 

2539 is_interval : Check if the Index holds Interval objects (deprecated). 

2540 

2541 Examples 

2542 -------- 

2543 >>> idx = pd.Index(["Watermelon", "Orange", "Apple", 

2544 ... "Watermelon"]).astype("category") 

2545 >>> idx.is_categorical() # doctest: +SKIP 

2546 True 

2547 

2548 >>> idx = pd.Index([1, 3, 5, 7]) 

2549 >>> idx.is_categorical() # doctest: +SKIP 

2550 False 

2551 

2552 >>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"]) 

2553 >>> s 

2554 0 Peter 

2555 1 Victor 

2556 2 Elisabeth 

2557 3 Mar 

2558 dtype: object 

2559 >>> s.index.is_categorical() # doctest: +SKIP 

2560 False 

2561 """ 

2562 warnings.warn( 

2563 f"{type(self).__name__}.is_categorical is deprecated." 

2564 "Use pandas.api.types.is_categorical_dtype instead", 

2565 FutureWarning, 

2566 stacklevel=find_stack_level(), 

2567 ) 

2568 

2569 return self.inferred_type in ["categorical"] 

2570 

2571 @final 

2572 def is_interval(self) -> bool: 

2573 """ 

2574 Check if the Index holds Interval objects. 

2575 

2576 .. deprecated:: 2.0.0 

2577 Use `isinstance(index.dtype, pd.IntervalDtype)` instead. 

2578 

2579 Returns 

2580 ------- 

2581 bool 

2582 Whether or not the Index holds Interval objects. 

2583 

2584 See Also 

2585 -------- 

2586 IntervalIndex : Index for Interval objects. 

2587 is_boolean : Check if the Index only consists of booleans (deprecated). 

2588 is_integer : Check if the Index only consists of integers (deprecated). 

2589 is_floating : Check if the Index is a floating type (deprecated). 

2590 is_numeric : Check if the Index only consists of numeric data (deprecated). 

2591 is_object : Check if the Index is of the object dtype. (deprecated). 

2592 is_categorical : Check if the Index holds categorical data (deprecated). 

2593 

2594 Examples 

2595 -------- 

2596 >>> idx = pd.Index([pd.Interval(left=0, right=5), 

2597 ... pd.Interval(left=5, right=10)]) 

2598 >>> idx.is_interval() # doctest: +SKIP 

2599 True 

2600 

2601 >>> idx = pd.Index([1, 3, 5, 7]) 

2602 >>> idx.is_interval() # doctest: +SKIP 

2603 False 

2604 """ 

2605 warnings.warn( 

2606 f"{type(self).__name__}.is_interval is deprecated." 

2607 "Use pandas.api.types.is_interval_dtype instead", 

2608 FutureWarning, 

2609 stacklevel=find_stack_level(), 

2610 ) 

2611 return self.inferred_type in ["interval"] 

2612 

2613 @final 

2614 def _holds_integer(self) -> bool: 

2615 """ 

2616 Whether the type is an integer type. 

2617 """ 

2618 return self.inferred_type in ["integer", "mixed-integer"] 

2619 

2620 @final 

2621 def holds_integer(self) -> bool: 

2622 """ 

2623 Whether the type is an integer type. 

2624 

2625 .. deprecated:: 2.0.0 

2626 Use `pandas.api.types.infer_dtype` instead 

2627 """ 

2628 warnings.warn( 

2629 f"{type(self).__name__}.holds_integer is deprecated. " 

2630 "Use pandas.api.types.infer_dtype instead.", 

2631 FutureWarning, 

2632 stacklevel=find_stack_level(), 

2633 ) 

2634 return self._holds_integer() 

2635 

2636 @cache_readonly 

2637 def inferred_type(self) -> str_t: 

2638 """ 

2639 Return a string of the type inferred from the values. 

2640 """ 

2641 return lib.infer_dtype(self._values, skipna=False) 

2642 

2643 @cache_readonly 

2644 @final 

2645 def _is_all_dates(self) -> bool: 

2646 """ 

2647 Whether or not the index values only consist of dates. 

2648 """ 

2649 if needs_i8_conversion(self.dtype): 

2650 return True 

2651 elif self.dtype != _dtype_obj: 

2652 # TODO(ExtensionIndex): 3rd party EA might override? 

2653 # Note: this includes IntervalIndex, even when the left/right 

2654 # contain datetime-like objects. 

2655 return False 

2656 elif self._is_multi: 

2657 return False 

2658 return is_datetime_array(ensure_object(self._values)) 

2659 

2660 @final 

2661 @cache_readonly 

2662 def _is_multi(self) -> bool: 

2663 """ 

2664 Cached check equivalent to isinstance(self, MultiIndex) 

2665 """ 

2666 return isinstance(self, ABCMultiIndex) 

2667 

2668 # -------------------------------------------------------------------- 

2669 # Pickle Methods 

2670 

2671 def __reduce__(self): 

2672 d = {"data": self._data, "name": self.name} 

2673 return _new_Index, (type(self), d), None 

2674 

2675 # -------------------------------------------------------------------- 

2676 # Null Handling Methods 

2677 

2678 @cache_readonly 

2679 def _na_value(self): 

2680 """The expected NA value to use with this index.""" 

2681 dtype = self.dtype 

2682 if isinstance(dtype, np.dtype): 

2683 if dtype.kind in ["m", "M"]: 

2684 return NaT 

2685 return np.nan 

2686 return dtype.na_value 

2687 

2688 @cache_readonly 

2689 def _isnan(self) -> npt.NDArray[np.bool_]: 

2690 """ 

2691 Return if each value is NaN. 

2692 """ 

2693 if self._can_hold_na: 

2694 return isna(self) 

2695 else: 

2696 # shouldn't reach to this condition by checking hasnans beforehand 

2697 values = np.empty(len(self), dtype=np.bool_) 

2698 values.fill(False) 

2699 return values 

2700 

2701 @cache_readonly 

2702 def hasnans(self) -> bool: 

2703 """ 

2704 Return True if there are any NaNs. 

2705 

2706 Enables various performance speedups. 

2707 

2708 Returns 

2709 ------- 

2710 bool 

2711 """ 

2712 if self._can_hold_na: 

2713 return bool(self._isnan.any()) 

2714 else: 

2715 return False 

2716 

2717 @final 

2718 def isna(self) -> npt.NDArray[np.bool_]: 

2719 """ 

2720 Detect missing values. 

2721 

2722 Return a boolean same-sized object indicating if the values are NA. 

2723 NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get 

2724 mapped to ``True`` values. 

2725 Everything else get mapped to ``False`` values. Characters such as 

2726 empty strings `''` or :attr:`numpy.inf` are not considered NA values 

2727 (unless you set ``pandas.options.mode.use_inf_as_na = True``). 

2728 

2729 Returns 

2730 ------- 

2731 numpy.ndarray[bool] 

2732 A boolean array of whether my values are NA. 

2733 

2734 See Also 

2735 -------- 

2736 Index.notna : Boolean inverse of isna. 

2737 Index.dropna : Omit entries with missing values. 

2738 isna : Top-level isna. 

2739 Series.isna : Detect missing values in Series object. 

2740 

2741 Examples 

2742 -------- 

2743 Show which entries in a pandas.Index are NA. The result is an 

2744 array. 

2745 

2746 >>> idx = pd.Index([5.2, 6.0, np.NaN]) 

2747 >>> idx 

2748 Index([5.2, 6.0, nan], dtype='float64') 

2749 >>> idx.isna() 

2750 array([False, False, True]) 

2751 

2752 Empty strings are not considered NA values. None is considered an NA 

2753 value. 

2754 

2755 >>> idx = pd.Index(['black', '', 'red', None]) 

2756 >>> idx 

2757 Index(['black', '', 'red', None], dtype='object') 

2758 >>> idx.isna() 

2759 array([False, False, False, True]) 

2760 

2761 For datetimes, `NaT` (Not a Time) is considered as an NA value. 

2762 

2763 >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'), 

2764 ... pd.Timestamp(''), None, pd.NaT]) 

2765 >>> idx 

2766 DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'], 

2767 dtype='datetime64[ns]', freq=None) 

2768 >>> idx.isna() 

2769 array([False, True, True, True]) 

2770 """ 

2771 return self._isnan 

2772 

2773 isnull = isna 

2774 

2775 @final 

2776 def notna(self) -> npt.NDArray[np.bool_]: 

2777 """ 

2778 Detect existing (non-missing) values. 

2779 

2780 Return a boolean same-sized object indicating if the values are not NA. 

2781 Non-missing values get mapped to ``True``. Characters such as empty 

2782 strings ``''`` or :attr:`numpy.inf` are not considered NA values 

2783 (unless you set ``pandas.options.mode.use_inf_as_na = True``). 

2784 NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False`` 

2785 values. 

2786 

2787 Returns 

2788 ------- 

2789 numpy.ndarray[bool] 

2790 Boolean array to indicate which entries are not NA. 

2791 

2792 See Also 

2793 -------- 

2794 Index.notnull : Alias of notna. 

2795 Index.isna: Inverse of notna. 

2796 notna : Top-level notna. 

2797 

2798 Examples 

2799 -------- 

2800 Show which entries in an Index are not NA. The result is an 

2801 array. 

2802 

2803 >>> idx = pd.Index([5.2, 6.0, np.NaN]) 

2804 >>> idx 

2805 Index([5.2, 6.0, nan], dtype='float64') 

2806 >>> idx.notna() 

2807 array([ True, True, False]) 

2808 

2809 Empty strings are not considered NA values. None is considered a NA 

2810 value. 

2811 

2812 >>> idx = pd.Index(['black', '', 'red', None]) 

2813 >>> idx 

2814 Index(['black', '', 'red', None], dtype='object') 

2815 >>> idx.notna() 

2816 array([ True, True, True, False]) 

2817 """ 

2818 return ~self.isna() 

2819 

2820 notnull = notna 

2821 

2822 def fillna(self, value=None, downcast=None): 

2823 """ 

2824 Fill NA/NaN values with the specified value. 

2825 

2826 Parameters 

2827 ---------- 

2828 value : scalar 

2829 Scalar value to use to fill holes (e.g. 0). 

2830 This value cannot be a list-likes. 

2831 downcast : dict, default is None 

2832 A dict of item->dtype of what to downcast if possible, 

2833 or the string 'infer' which will try to downcast to an appropriate 

2834 equal type (e.g. float64 to int64 if possible). 

2835 

2836 Returns 

2837 ------- 

2838 Index 

2839 

2840 See Also 

2841 -------- 

2842 DataFrame.fillna : Fill NaN values of a DataFrame. 

2843 Series.fillna : Fill NaN Values of a Series. 

2844 """ 

2845 

2846 value = self._require_scalar(value) 

2847 if self.hasnans: 

2848 result = self.putmask(self._isnan, value) 

2849 if downcast is None: 

2850 # no need to care metadata other than name 

2851 # because it can't have freq if it has NaTs 

2852 # _with_infer needed for test_fillna_categorical 

2853 return Index._with_infer(result, name=self.name) 

2854 raise NotImplementedError( 

2855 f"{type(self).__name__}.fillna does not support 'downcast' " 

2856 "argument values other than 'None'." 

2857 ) 

2858 return self._view() 

2859 

2860 def dropna(self: _IndexT, how: AnyAll = "any") -> _IndexT: 

2861 """ 

2862 Return Index without NA/NaN values. 

2863 

2864 Parameters 

2865 ---------- 

2866 how : {'any', 'all'}, default 'any' 

2867 If the Index is a MultiIndex, drop the value when any or all levels 

2868 are NaN. 

2869 

2870 Returns 

2871 ------- 

2872 Index 

2873 """ 

2874 if how not in ("any", "all"): 

2875 raise ValueError(f"invalid how option: {how}") 

2876 

2877 if self.hasnans: 

2878 res_values = self._values[~self._isnan] 

2879 return type(self)._simple_new(res_values, name=self.name) 

2880 return self._view() 

2881 

2882 # -------------------------------------------------------------------- 

2883 # Uniqueness Methods 

2884 

2885 def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT: 

2886 """ 

2887 Return unique values in the index. 

2888 

2889 Unique values are returned in order of appearance, this does NOT sort. 

2890 

2891 Parameters 

2892 ---------- 

2893 level : int or hashable, optional 

2894 Only return values from specified level (for MultiIndex). 

2895 If int, gets the level by integer position, else by level name. 

2896 

2897 Returns 

2898 ------- 

2899 Index 

2900 

2901 See Also 

2902 -------- 

2903 unique : Numpy array of unique values in that column. 

2904 Series.unique : Return unique values of Series object. 

2905 """ 

2906 if level is not None: 

2907 self._validate_index_level(level) 

2908 

2909 if self.is_unique: 

2910 return self._view() 

2911 

2912 result = super().unique() 

2913 return self._shallow_copy(result) 

2914 

2915 def drop_duplicates(self: _IndexT, *, keep: DropKeep = "first") -> _IndexT: 

2916 """ 

2917 Return Index with duplicate values removed. 

2918 

2919 Parameters 

2920 ---------- 

2921 keep : {'first', 'last', ``False``}, default 'first' 

2922 - 'first' : Drop duplicates except for the first occurrence. 

2923 - 'last' : Drop duplicates except for the last occurrence. 

2924 - ``False`` : Drop all duplicates. 

2925 

2926 Returns 

2927 ------- 

2928 Index 

2929 

2930 See Also 

2931 -------- 

2932 Series.drop_duplicates : Equivalent method on Series. 

2933 DataFrame.drop_duplicates : Equivalent method on DataFrame. 

2934 Index.duplicated : Related method on Index, indicating duplicate 

2935 Index values. 

2936 

2937 Examples 

2938 -------- 

2939 Generate an pandas.Index with duplicate values. 

2940 

2941 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo']) 

2942 

2943 The `keep` parameter controls which duplicate values are removed. 

2944 The value 'first' keeps the first occurrence for each 

2945 set of duplicated entries. The default value of keep is 'first'. 

2946 

2947 >>> idx.drop_duplicates(keep='first') 

2948 Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object') 

2949 

2950 The value 'last' keeps the last occurrence for each set of duplicated 

2951 entries. 

2952 

2953 >>> idx.drop_duplicates(keep='last') 

2954 Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object') 

2955 

2956 The value ``False`` discards all sets of duplicated entries. 

2957 

2958 >>> idx.drop_duplicates(keep=False) 

2959 Index(['cow', 'beetle', 'hippo'], dtype='object') 

2960 """ 

2961 if self.is_unique: 

2962 return self._view() 

2963 

2964 return super().drop_duplicates(keep=keep) 

2965 

2966 def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]: 

2967 """ 

2968 Indicate duplicate index values. 

2969 

2970 Duplicated values are indicated as ``True`` values in the resulting 

2971 array. Either all duplicates, all except the first, or all except the 

2972 last occurrence of duplicates can be indicated. 

2973 

2974 Parameters 

2975 ---------- 

2976 keep : {'first', 'last', False}, default 'first' 

2977 The value or values in a set of duplicates to mark as missing. 

2978 

2979 - 'first' : Mark duplicates as ``True`` except for the first 

2980 occurrence. 

2981 - 'last' : Mark duplicates as ``True`` except for the last 

2982 occurrence. 

2983 - ``False`` : Mark all duplicates as ``True``. 

2984 

2985 Returns 

2986 ------- 

2987 np.ndarray[bool] 

2988 

2989 See Also 

2990 -------- 

2991 Series.duplicated : Equivalent method on pandas.Series. 

2992 DataFrame.duplicated : Equivalent method on pandas.DataFrame. 

2993 Index.drop_duplicates : Remove duplicate values from Index. 

2994 

2995 Examples 

2996 -------- 

2997 By default, for each set of duplicated values, the first occurrence is 

2998 set to False and all others to True: 

2999 

3000 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama']) 

3001 >>> idx.duplicated() 

3002 array([False, False, True, False, True]) 

3003 

3004 which is equivalent to 

3005 

3006 >>> idx.duplicated(keep='first') 

3007 array([False, False, True, False, True]) 

3008 

3009 By using 'last', the last occurrence of each set of duplicated values 

3010 is set on False and all others on True: 

3011 

3012 >>> idx.duplicated(keep='last') 

3013 array([ True, False, True, False, False]) 

3014 

3015 By setting keep on ``False``, all duplicates are True: 

3016 

3017 >>> idx.duplicated(keep=False) 

3018 array([ True, False, True, False, True]) 

3019 """ 

3020 if self.is_unique: 

3021 # fastpath available bc we are immutable 

3022 return np.zeros(len(self), dtype=bool) 

3023 return self._duplicated(keep=keep) 

3024 

3025 # -------------------------------------------------------------------- 

3026 # Arithmetic & Logical Methods 

3027 

3028 def __iadd__(self, other): 

3029 # alias for __add__ 

3030 return self + other 

3031 

3032 @final 

3033 def __nonzero__(self) -> NoReturn: 

3034 raise ValueError( 

3035 f"The truth value of a {type(self).__name__} is ambiguous. " 

3036 "Use a.empty, a.bool(), a.item(), a.any() or a.all()." 

3037 ) 

3038 

3039 __bool__ = __nonzero__ 

3040 

3041 # -------------------------------------------------------------------- 

3042 # Set Operation Methods 

3043 

3044 def _get_reconciled_name_object(self, other): 

3045 """ 

3046 If the result of a set operation will be self, 

3047 return self, unless the name changes, in which 

3048 case make a shallow copy of self. 

3049 """ 

3050 name = get_op_result_name(self, other) 

3051 if self.name is not name: 

3052 return self.rename(name) 

3053 return self 

3054 

3055 @final 

3056 def _validate_sort_keyword(self, sort): 

3057 if sort not in [None, False, True]: 

3058 raise ValueError( 

3059 "The 'sort' keyword only takes the values of " 

3060 f"None, True, or False; {sort} was passed." 

3061 ) 

3062 

3063 @final 

3064 def _dti_setop_align_tzs(self, other: Index, setop: str_t) -> tuple[Index, Index]: 

3065 """ 

3066 With mismatched timezones, cast both to UTC. 

3067 """ 

3068 # Caller is responsibelf or checking 

3069 # `not is_dtype_equal(self.dtype, other.dtype)` 

3070 if ( 

3071 isinstance(self, ABCDatetimeIndex) 

3072 and isinstance(other, ABCDatetimeIndex) 

3073 and self.tz is not None 

3074 and other.tz is not None 

3075 ): 

3076 # GH#39328, GH#45357 

3077 left = self.tz_convert("UTC") 

3078 right = other.tz_convert("UTC") 

3079 return left, right 

3080 return self, other 

3081 

3082 @final 

3083 def union(self, other, sort=None): 

3084 """ 

3085 Form the union of two Index objects. 

3086 

3087 If the Index objects are incompatible, both Index objects will be 

3088 cast to dtype('object') first. 

3089 

3090 Parameters 

3091 ---------- 

3092 other : Index or array-like 

3093 sort : bool or None, default None 

3094 Whether to sort the resulting Index. 

3095 

3096 * None : Sort the result, except when 

3097 

3098 1. `self` and `other` are equal. 

3099 2. `self` or `other` has length 0. 

3100 3. Some values in `self` or `other` cannot be compared. 

3101 A RuntimeWarning is issued in this case. 

3102 

3103 * False : do not sort the result. 

3104 * True : Sort the result (which may raise TypeError). 

3105 

3106 Returns 

3107 ------- 

3108 Index 

3109 

3110 Examples 

3111 -------- 

3112 Union matching dtypes 

3113 

3114 >>> idx1 = pd.Index([1, 2, 3, 4]) 

3115 >>> idx2 = pd.Index([3, 4, 5, 6]) 

3116 >>> idx1.union(idx2) 

3117 Index([1, 2, 3, 4, 5, 6], dtype='int64') 

3118 

3119 Union mismatched dtypes 

3120 

3121 >>> idx1 = pd.Index(['a', 'b', 'c', 'd']) 

3122 >>> idx2 = pd.Index([1, 2, 3, 4]) 

3123 >>> idx1.union(idx2) 

3124 Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object') 

3125 

3126 MultiIndex case 

3127 

3128 >>> idx1 = pd.MultiIndex.from_arrays( 

3129 ... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]] 

3130 ... ) 

3131 >>> idx1 

3132 MultiIndex([(1, 'Red'), 

3133 (1, 'Blue'), 

3134 (2, 'Red'), 

3135 (2, 'Blue')], 

3136 ) 

3137 >>> idx2 = pd.MultiIndex.from_arrays( 

3138 ... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]] 

3139 ... ) 

3140 >>> idx2 

3141 MultiIndex([(3, 'Red'), 

3142 (3, 'Green'), 

3143 (2, 'Red'), 

3144 (2, 'Green')], 

3145 ) 

3146 >>> idx1.union(idx2) 

3147 MultiIndex([(1, 'Blue'), 

3148 (1, 'Red'), 

3149 (2, 'Blue'), 

3150 (2, 'Green'), 

3151 (2, 'Red'), 

3152 (3, 'Green'), 

3153 (3, 'Red')], 

3154 ) 

3155 >>> idx1.union(idx2, sort=False) 

3156 MultiIndex([(1, 'Red'), 

3157 (1, 'Blue'), 

3158 (2, 'Red'), 

3159 (2, 'Blue'), 

3160 (3, 'Red'), 

3161 (3, 'Green'), 

3162 (2, 'Green')], 

3163 ) 

3164 """ 

3165 self._validate_sort_keyword(sort) 

3166 self._assert_can_do_setop(other) 

3167 other, result_name = self._convert_can_do_setop(other) 

3168 

3169 if not is_dtype_equal(self.dtype, other.dtype): 

3170 if ( 

3171 isinstance(self, ABCMultiIndex) 

3172 and not is_object_dtype(_unpack_nested_dtype(other)) 

3173 and len(other) > 0 

3174 ): 

3175 raise NotImplementedError( 

3176 "Can only union MultiIndex with MultiIndex or Index of tuples, " 

3177 "try mi.to_flat_index().union(other) instead." 

3178 ) 

3179 self, other = self._dti_setop_align_tzs(other, "union") 

3180 

3181 dtype = self._find_common_type_compat(other) 

3182 left = self.astype(dtype, copy=False) 

3183 right = other.astype(dtype, copy=False) 

3184 return left.union(right, sort=sort) 

3185 

3186 elif not len(other) or self.equals(other): 

3187 # NB: whether this (and the `if not len(self)` check below) come before 

3188 # or after the is_dtype_equal check above affects the returned dtype 

3189 result = self._get_reconciled_name_object(other) 

3190 if sort is True: 

3191 return result.sort_values() 

3192 return result 

3193 

3194 elif not len(self): 

3195 result = other._get_reconciled_name_object(self) 

3196 if sort is True: 

3197 return result.sort_values() 

3198 return result 

3199 

3200 result = self._union(other, sort=sort) 

3201 

3202 return self._wrap_setop_result(other, result) 

3203 

3204 def _union(self, other: Index, sort: bool | None): 

3205 """ 

3206 Specific union logic should go here. In subclasses, union behavior 

3207 should be overwritten here rather than in `self.union`. 

3208 

3209 Parameters 

3210 ---------- 

3211 other : Index or array-like 

3212 sort : False or None, default False 

3213 Whether to sort the resulting index. 

3214 

3215 * True : sort the result 

3216 * False : do not sort the result. 

3217 * None : sort the result, except when `self` and `other` are equal 

3218 or when the values cannot be compared. 

3219 

3220 Returns 

3221 ------- 

3222 Index 

3223 """ 

3224 lvals = self._values 

3225 rvals = other._values 

3226 

3227 if ( 

3228 sort in (None, True) 

3229 and self.is_monotonic_increasing 

3230 and other.is_monotonic_increasing 

3231 and not (self.has_duplicates and other.has_duplicates) 

3232 and self._can_use_libjoin 

3233 ): 

3234 # Both are monotonic and at least one is unique, so can use outer join 

3235 # (actually don't need either unique, but without this restriction 

3236 # test_union_same_value_duplicated_in_both fails) 

3237 try: 

3238 return self._outer_indexer(other)[0] 

3239 except (TypeError, IncompatibleFrequency): 

3240 # incomparable objects; should only be for object dtype 

3241 value_list = list(lvals) 

3242 

3243 # worth making this faster? a very unusual case 

3244 value_set = set(lvals) 

3245 value_list.extend([x for x in rvals if x not in value_set]) 

3246 # If objects are unorderable, we must have object dtype. 

3247 return np.array(value_list, dtype=object) 

3248 

3249 elif not other.is_unique: 

3250 # other has duplicates 

3251 result_dups = algos.union_with_duplicates(self, other) 

3252 return _maybe_try_sort(result_dups, sort) 

3253 

3254 # The rest of this method is analogous to Index._intersection_via_get_indexer 

3255 

3256 # Self may have duplicates; other already checked as unique 

3257 # find indexes of things in "other" that are not in "self" 

3258 if self._index_as_unique: 

3259 indexer = self.get_indexer(other) 

3260 missing = (indexer == -1).nonzero()[0] 

3261 else: 

3262 missing = algos.unique1d(self.get_indexer_non_unique(other)[1]) 

3263 

3264 result: Index | MultiIndex | ArrayLike 

3265 if self._is_multi: 

3266 # Preserve MultiIndex to avoid losing dtypes 

3267 result = self.append(other.take(missing)) 

3268 

3269 else: 

3270 if len(missing) > 0: 

3271 other_diff = rvals.take(missing) 

3272 result = concat_compat((lvals, other_diff)) 

3273 else: 

3274 result = lvals 

3275 

3276 if not self.is_monotonic_increasing or not other.is_monotonic_increasing: 

3277 # if both are monotonic then result should already be sorted 

3278 result = _maybe_try_sort(result, sort) 

3279 

3280 return result 

3281 

3282 @final 

3283 def _wrap_setop_result(self, other: Index, result) -> Index: 

3284 name = get_op_result_name(self, other) 

3285 if isinstance(result, Index): 

3286 if result.name != name: 

3287 result = result.rename(name) 

3288 else: 

3289 result = self._shallow_copy(result, name=name) 

3290 return result 

3291 

3292 @final 

3293 def intersection(self, other, sort: bool = False): 

3294 """ 

3295 Form the intersection of two Index objects. 

3296 

3297 This returns a new Index with elements common to the index and `other`. 

3298 

3299 Parameters 

3300 ---------- 

3301 other : Index or array-like 

3302 sort : True, False or None, default False 

3303 Whether to sort the resulting index. 

3304 

3305 * None : sort the result, except when `self` and `other` are equal 

3306 or when the values cannot be compared. 

3307 * False : do not sort the result. 

3308 * True : Sort the result (which may raise TypeError). 

3309 

3310 Returns 

3311 ------- 

3312 Index 

3313 

3314 Examples 

3315 -------- 

3316 >>> idx1 = pd.Index([1, 2, 3, 4]) 

3317 >>> idx2 = pd.Index([3, 4, 5, 6]) 

3318 >>> idx1.intersection(idx2) 

3319 Index([3, 4], dtype='int64') 

3320 """ 

3321 self._validate_sort_keyword(sort) 

3322 self._assert_can_do_setop(other) 

3323 other, result_name = self._convert_can_do_setop(other) 

3324 

3325 if not is_dtype_equal(self.dtype, other.dtype): 

3326 self, other = self._dti_setop_align_tzs(other, "intersection") 

3327 

3328 if self.equals(other): 

3329 if self.has_duplicates: 

3330 result = self.unique()._get_reconciled_name_object(other) 

3331 else: 

3332 result = self._get_reconciled_name_object(other) 

3333 if sort is True: 

3334 result = result.sort_values() 

3335 return result 

3336 

3337 if len(self) == 0 or len(other) == 0: 

3338 # fastpath; we need to be careful about having commutativity 

3339 

3340 if self._is_multi or other._is_multi: 

3341 # _convert_can_do_setop ensures that we have both or neither 

3342 # We retain self.levels 

3343 return self[:0].rename(result_name) 

3344 

3345 dtype = self._find_common_type_compat(other) 

3346 if is_dtype_equal(self.dtype, dtype): 

3347 # Slicing allows us to retain DTI/TDI.freq, RangeIndex 

3348 

3349 # Note: self[:0] vs other[:0] affects 

3350 # 1) which index's `freq` we get in DTI/TDI cases 

3351 # This may be a historical artifact, i.e. no documented 

3352 # reason for this choice. 

3353 # 2) The `step` we get in RangeIndex cases 

3354 if len(self) == 0: 

3355 return self[:0].rename(result_name) 

3356 else: 

3357 return other[:0].rename(result_name) 

3358 

3359 return Index([], dtype=dtype, name=result_name) 

3360 

3361 elif not self._should_compare(other): 

3362 # We can infer that the intersection is empty. 

3363 if isinstance(self, ABCMultiIndex): 

3364 return self[:0].rename(result_name) 

3365 return Index([], name=result_name) 

3366 

3367 elif not is_dtype_equal(self.dtype, other.dtype): 

3368 dtype = self._find_common_type_compat(other) 

3369 this = self.astype(dtype, copy=False) 

3370 other = other.astype(dtype, copy=False) 

3371 return this.intersection(other, sort=sort) 

3372 

3373 result = self._intersection(other, sort=sort) 

3374 return self._wrap_intersection_result(other, result) 

3375 

3376 def _intersection(self, other: Index, sort: bool = False): 

3377 """ 

3378 intersection specialized to the case with matching dtypes. 

3379 """ 

3380 if ( 

3381 self.is_monotonic_increasing 

3382 and other.is_monotonic_increasing 

3383 and self._can_use_libjoin 

3384 and not isinstance(self, ABCMultiIndex) 

3385 ): 

3386 try: 

3387 res_indexer, indexer, _ = self._inner_indexer(other) 

3388 except TypeError: 

3389 # non-comparable; should only be for object dtype 

3390 pass 

3391 else: 

3392 # TODO: algos.unique1d should preserve DTA/TDA 

3393 if is_numeric_dtype(self): 

3394 # This is faster, because Index.unique() checks for uniqueness 

3395 # before calculating the unique values. 

3396 res = algos.unique1d(res_indexer) 

3397 else: 

3398 result = self.take(indexer) 

3399 res = result.drop_duplicates() 

3400 return ensure_wrapped_if_datetimelike(res) 

3401 

3402 res_values = self._intersection_via_get_indexer(other, sort=sort) 

3403 res_values = _maybe_try_sort(res_values, sort) 

3404 return res_values 

3405 

3406 def _wrap_intersection_result(self, other, result): 

3407 # We will override for MultiIndex to handle empty results 

3408 return self._wrap_setop_result(other, result) 

3409 

3410 @final 

3411 def _intersection_via_get_indexer( 

3412 self, other: Index | MultiIndex, sort 

3413 ) -> ArrayLike | MultiIndex: 

3414 """ 

3415 Find the intersection of two Indexes using get_indexer. 

3416 

3417 Returns 

3418 ------- 

3419 np.ndarray or ExtensionArray 

3420 The returned array will be unique. 

3421 """ 

3422 left_unique = self.unique() 

3423 right_unique = other.unique() 

3424 

3425 # even though we are unique, we need get_indexer_for for IntervalIndex 

3426 indexer = left_unique.get_indexer_for(right_unique) 

3427 

3428 mask = indexer != -1 

3429 

3430 taker = indexer.take(mask.nonzero()[0]) 

3431 if sort is False: 

3432 # sort bc we want the elements in the same order they are in self 

3433 # unnecessary in the case with sort=None bc we will sort later 

3434 taker = np.sort(taker) 

3435 

3436 if isinstance(left_unique, ABCMultiIndex): 

3437 result = left_unique.take(taker) 

3438 else: 

3439 result = left_unique.take(taker)._values 

3440 return result 

3441 

3442 @final 

3443 def difference(self, other, sort=None): 

3444 """ 

3445 Return a new Index with elements of index not in `other`. 

3446 

3447 This is the set difference of two Index objects. 

3448 

3449 Parameters 

3450 ---------- 

3451 other : Index or array-like 

3452 sort : bool or None, default None 

3453 Whether to sort the resulting index. By default, the 

3454 values are attempted to be sorted, but any TypeError from 

3455 incomparable elements is caught by pandas. 

3456 

3457 * None : Attempt to sort the result, but catch any TypeErrors 

3458 from comparing incomparable elements. 

3459 * False : Do not sort the result. 

3460 * True : Sort the result (which may raise TypeError). 

3461 

3462 Returns 

3463 ------- 

3464 Index 

3465 

3466 Examples 

3467 -------- 

3468 >>> idx1 = pd.Index([2, 1, 3, 4]) 

3469 >>> idx2 = pd.Index([3, 4, 5, 6]) 

3470 >>> idx1.difference(idx2) 

3471 Index([1, 2], dtype='int64') 

3472 >>> idx1.difference(idx2, sort=False) 

3473 Index([2, 1], dtype='int64') 

3474 """ 

3475 self._validate_sort_keyword(sort) 

3476 self._assert_can_do_setop(other) 

3477 other, result_name = self._convert_can_do_setop(other) 

3478 

3479 # Note: we do NOT call _dti_setop_align_tzs here, as there 

3480 # is no requirement that .difference be commutative, so it does 

3481 # not cast to object. 

3482 

3483 if self.equals(other): 

3484 # Note: we do not (yet) sort even if sort=None GH#24959 

3485 return self[:0].rename(result_name) 

3486 

3487 if len(other) == 0: 

3488 # Note: we do not (yet) sort even if sort=None GH#24959 

3489 result = self.rename(result_name) 

3490 if sort is True: 

3491 return result.sort_values() 

3492 return result 

3493 

3494 if not self._should_compare(other): 

3495 # Nothing matches -> difference is everything 

3496 result = self.rename(result_name) 

3497 if sort is True: 

3498 return result.sort_values() 

3499 return result 

3500 

3501 result = self._difference(other, sort=sort) 

3502 return self._wrap_difference_result(other, result) 

3503 

3504 def _difference(self, other, sort): 

3505 # overridden by RangeIndex 

3506 

3507 this = self.unique() 

3508 

3509 indexer = this.get_indexer_for(other) 

3510 indexer = indexer.take((indexer != -1).nonzero()[0]) 

3511 

3512 label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True) 

3513 

3514 the_diff: MultiIndex | ArrayLike 

3515 if isinstance(this, ABCMultiIndex): 

3516 the_diff = this.take(label_diff) 

3517 else: 

3518 the_diff = this._values.take(label_diff) 

3519 the_diff = _maybe_try_sort(the_diff, sort) 

3520 

3521 return the_diff 

3522 

3523 def _wrap_difference_result(self, other, result): 

3524 # We will override for MultiIndex to handle empty results 

3525 return self._wrap_setop_result(other, result) 

3526 

3527 def symmetric_difference(self, other, result_name=None, sort=None): 

3528 """ 

3529 Compute the symmetric difference of two Index objects. 

3530 

3531 Parameters 

3532 ---------- 

3533 other : Index or array-like 

3534 result_name : str 

3535 sort : bool or None, default None 

3536 Whether to sort the resulting index. By default, the 

3537 values are attempted to be sorted, but any TypeError from 

3538 incomparable elements is caught by pandas. 

3539 

3540 * None : Attempt to sort the result, but catch any TypeErrors 

3541 from comparing incomparable elements. 

3542 * False : Do not sort the result. 

3543 * True : Sort the result (which may raise TypeError). 

3544 

3545 Returns 

3546 ------- 

3547 Index 

3548 

3549 Notes 

3550 ----- 

3551 ``symmetric_difference`` contains elements that appear in either 

3552 ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by 

3553 ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates 

3554 dropped. 

3555 

3556 Examples 

3557 -------- 

3558 >>> idx1 = pd.Index([1, 2, 3, 4]) 

3559 >>> idx2 = pd.Index([2, 3, 4, 5]) 

3560 >>> idx1.symmetric_difference(idx2) 

3561 Index([1, 5], dtype='int64') 

3562 """ 

3563 self._validate_sort_keyword(sort) 

3564 self._assert_can_do_setop(other) 

3565 other, result_name_update = self._convert_can_do_setop(other) 

3566 if result_name is None: 

3567 result_name = result_name_update 

3568 

3569 if not is_dtype_equal(self.dtype, other.dtype): 

3570 self, other = self._dti_setop_align_tzs(other, "symmetric_difference") 

3571 

3572 if not self._should_compare(other): 

3573 return self.union(other, sort=sort).rename(result_name) 

3574 

3575 elif not is_dtype_equal(self.dtype, other.dtype): 

3576 dtype = self._find_common_type_compat(other) 

3577 this = self.astype(dtype, copy=False) 

3578 that = other.astype(dtype, copy=False) 

3579 return this.symmetric_difference(that, sort=sort).rename(result_name) 

3580 

3581 this = self.unique() 

3582 other = other.unique() 

3583 indexer = this.get_indexer_for(other) 

3584 

3585 # {this} minus {other} 

3586 common_indexer = indexer.take((indexer != -1).nonzero()[0]) 

3587 left_indexer = np.setdiff1d( 

3588 np.arange(this.size), common_indexer, assume_unique=True 

3589 ) 

3590 left_diff = this.take(left_indexer) 

3591 

3592 # {other} minus {this} 

3593 right_indexer = (indexer == -1).nonzero()[0] 

3594 right_diff = other.take(right_indexer) 

3595 

3596 res_values = left_diff.append(right_diff) 

3597 result = _maybe_try_sort(res_values, sort) 

3598 

3599 if not self._is_multi: 

3600 return Index(result, name=result_name, dtype=res_values.dtype) 

3601 else: 

3602 left_diff = cast("MultiIndex", left_diff) 

3603 if len(result) == 0: 

3604 # result might be an Index, if other was an Index 

3605 return left_diff.remove_unused_levels().set_names(result_name) 

3606 return result.set_names(result_name) 

3607 

3608 @final 

3609 def _assert_can_do_setop(self, other) -> bool: 

3610 if not is_list_like(other): 

3611 raise TypeError("Input must be Index or array-like") 

3612 return True 

3613 

3614 def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]: 

3615 if not isinstance(other, Index): 

3616 other = Index(other, name=self.name) 

3617 result_name = self.name 

3618 else: 

3619 result_name = get_op_result_name(self, other) 

3620 return other, result_name 

3621 

3622 # -------------------------------------------------------------------- 

3623 # Indexing Methods 

3624 

3625 def get_loc(self, key): 

3626 """ 

3627 Get integer location, slice or boolean mask for requested label. 

3628 

3629 Parameters 

3630 ---------- 

3631 key : label 

3632 

3633 Returns 

3634 ------- 

3635 int if unique index, slice if monotonic index, else mask 

3636 

3637 Examples 

3638 -------- 

3639 >>> unique_index = pd.Index(list('abc')) 

3640 >>> unique_index.get_loc('b') 

3641 1 

3642 

3643 >>> monotonic_index = pd.Index(list('abbc')) 

3644 >>> monotonic_index.get_loc('b') 

3645 slice(1, 3, None) 

3646 

3647 >>> non_monotonic_index = pd.Index(list('abcb')) 

3648 >>> non_monotonic_index.get_loc('b') 

3649 array([False, True, False, True]) 

3650 """ 

3651 casted_key = self._maybe_cast_indexer(key) 

3652 try: 

3653 return self._engine.get_loc(casted_key) 

3654 except KeyError as err: 

3655 raise KeyError(key) from err 

3656 except TypeError: 

3657 # If we have a listlike key, _check_indexing_error will raise 

3658 # InvalidIndexError. Otherwise we fall through and re-raise 

3659 # the TypeError. 

3660 self._check_indexing_error(key) 

3661 raise 

3662 

3663 _index_shared_docs[ 

3664 "get_indexer" 

3665 ] = """ 

3666 Compute indexer and mask for new index given the current index. 

3667 

3668 The indexer should be then used as an input to ndarray.take to align the 

3669 current data to the new index. 

3670 

3671 Parameters 

3672 ---------- 

3673 target : %(target_klass)s 

3674 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional 

3675 * default: exact matches only. 

3676 * pad / ffill: find the PREVIOUS index value if no exact match. 

3677 * backfill / bfill: use NEXT index value if no exact match 

3678 * nearest: use the NEAREST index value if no exact match. Tied 

3679 distances are broken by preferring the larger index value. 

3680 limit : int, optional 

3681 Maximum number of consecutive labels in ``target`` to match for 

3682 inexact matches. 

3683 tolerance : optional 

3684 Maximum distance between original and new labels for inexact 

3685 matches. The values of the index at the matching locations must 

3686 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 

3687 

3688 Tolerance may be a scalar value, which applies the same tolerance 

3689 to all values, or list-like, which applies variable tolerance per 

3690 element. List-like includes list, tuple, array, Series, and must be 

3691 the same size as the index and its dtype must exactly match the 

3692 index's type. 

3693 

3694 Returns 

3695 ------- 

3696 np.ndarray[np.intp] 

3697 Integers from 0 to n - 1 indicating that the index at these 

3698 positions matches the corresponding target values. Missing values 

3699 in the target are marked by -1. 

3700 %(raises_section)s 

3701 Notes 

3702 ----- 

3703 Returns -1 for unmatched values, for further explanation see the 

3704 example below. 

3705 

3706 Examples 

3707 -------- 

3708 >>> index = pd.Index(['c', 'a', 'b']) 

3709 >>> index.get_indexer(['a', 'b', 'x']) 

3710 array([ 1, 2, -1]) 

3711 

3712 Notice that the return value is an array of locations in ``index`` 

3713 and ``x`` is marked by -1, as it is not in ``index``. 

3714 """ 

3715 

3716 @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs) 

3717 @final 

3718 def get_indexer( 

3719 self, 

3720 target, 

3721 method: str_t | None = None, 

3722 limit: int | None = None, 

3723 tolerance=None, 

3724 ) -> npt.NDArray[np.intp]: 

3725 method = clean_reindex_fill_method(method) 

3726 orig_target = target 

3727 target = self._maybe_cast_listlike_indexer(target) 

3728 

3729 self._check_indexing_method(method, limit, tolerance) 

3730 

3731 if not self._index_as_unique: 

3732 raise InvalidIndexError(self._requires_unique_msg) 

3733 

3734 if len(target) == 0: 

3735 return np.array([], dtype=np.intp) 

3736 

3737 if not self._should_compare(target) and not self._should_partial_index(target): 

3738 # IntervalIndex get special treatment bc numeric scalars can be 

3739 # matched to Interval scalars 

3740 return self._get_indexer_non_comparable(target, method=method, unique=True) 

3741 

3742 if is_categorical_dtype(self.dtype): 

3743 # _maybe_cast_listlike_indexer ensures target has our dtype 

3744 # (could improve perf by doing _should_compare check earlier?) 

3745 assert is_dtype_equal(self.dtype, target.dtype) 

3746 

3747 indexer = self._engine.get_indexer(target.codes) 

3748 if self.hasnans and target.hasnans: 

3749 # After _maybe_cast_listlike_indexer, target elements which do not 

3750 # belong to some category are changed to NaNs 

3751 # Mask to track actual NaN values compared to inserted NaN values 

3752 # GH#45361 

3753 target_nans = isna(orig_target) 

3754 loc = self.get_loc(np.nan) 

3755 mask = target.isna() 

3756 indexer[target_nans] = loc 

3757 indexer[mask & ~target_nans] = -1 

3758 return indexer 

3759 

3760 if is_categorical_dtype(target.dtype): 

3761 # potential fastpath 

3762 # get an indexer for unique categories then propagate to codes via take_nd 

3763 # get_indexer instead of _get_indexer needed for MultiIndex cases 

3764 # e.g. test_append_different_columns_types 

3765 categories_indexer = self.get_indexer(target.categories) 

3766 

3767 indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1) 

3768 

3769 if (not self._is_multi and self.hasnans) and target.hasnans: 

3770 # Exclude MultiIndex because hasnans raises NotImplementedError 

3771 # we should only get here if we are unique, so loc is an integer 

3772 # GH#41934 

3773 loc = self.get_loc(np.nan) 

3774 mask = target.isna() 

3775 indexer[mask] = loc 

3776 

3777 return ensure_platform_int(indexer) 

3778 

3779 pself, ptarget = self._maybe_promote(target) 

3780 if pself is not self or ptarget is not target: 

3781 return pself.get_indexer( 

3782 ptarget, method=method, limit=limit, tolerance=tolerance 

3783 ) 

3784 

3785 if is_dtype_equal(self.dtype, target.dtype) and self.equals(target): 

3786 # Only call equals if we have same dtype to avoid inference/casting 

3787 return np.arange(len(target), dtype=np.intp) 

3788 

3789 if not is_dtype_equal( 

3790 self.dtype, target.dtype 

3791 ) and not self._should_partial_index(target): 

3792 # _should_partial_index e.g. IntervalIndex with numeric scalars 

3793 # that can be matched to Interval scalars. 

3794 dtype = self._find_common_type_compat(target) 

3795 

3796 this = self.astype(dtype, copy=False) 

3797 target = target.astype(dtype, copy=False) 

3798 return this._get_indexer( 

3799 target, method=method, limit=limit, tolerance=tolerance 

3800 ) 

3801 

3802 return self._get_indexer(target, method, limit, tolerance) 

3803 

3804 def _get_indexer( 

3805 self, 

3806 target: Index, 

3807 method: str_t | None = None, 

3808 limit: int | None = None, 

3809 tolerance=None, 

3810 ) -> npt.NDArray[np.intp]: 

3811 if tolerance is not None: 

3812 tolerance = self._convert_tolerance(tolerance, target) 

3813 

3814 if method in ["pad", "backfill"]: 

3815 indexer = self._get_fill_indexer(target, method, limit, tolerance) 

3816 elif method == "nearest": 

3817 indexer = self._get_nearest_indexer(target, limit, tolerance) 

3818 else: 

3819 if target._is_multi and self._is_multi: 

3820 engine = self._engine 

3821 # error: Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]" 

3822 # has no attribute "_extract_level_codes" 

3823 tgt_values = engine._extract_level_codes( # type: ignore[union-attr] 

3824 target 

3825 ) 

3826 else: 

3827 tgt_values = target._get_engine_target() 

3828 

3829 indexer = self._engine.get_indexer(tgt_values) 

3830 

3831 return ensure_platform_int(indexer) 

3832 

3833 @final 

3834 def _should_partial_index(self, target: Index) -> bool: 

3835 """ 

3836 Should we attempt partial-matching indexing? 

3837 """ 

3838 if is_interval_dtype(self.dtype): 

3839 if is_interval_dtype(target.dtype): 

3840 return False 

3841 # See https://github.com/pandas-dev/pandas/issues/47772 the commented 

3842 # out code can be restored (instead of hardcoding `return True`) 

3843 # once that issue is fixed 

3844 # "Index" has no attribute "left" 

3845 # return self.left._should_compare(target) # type: ignore[attr-defined] 

3846 return True 

3847 return False 

3848 

3849 @final 

3850 def _check_indexing_method( 

3851 self, 

3852 method: str_t | None, 

3853 limit: int | None = None, 

3854 tolerance=None, 

3855 ) -> None: 

3856 """ 

3857 Raise if we have a get_indexer `method` that is not supported or valid. 

3858 """ 

3859 if method not in [None, "bfill", "backfill", "pad", "ffill", "nearest"]: 

3860 # in practice the clean_reindex_fill_method call would raise 

3861 # before we get here 

3862 raise ValueError("Invalid fill method") # pragma: no cover 

3863 

3864 if self._is_multi: 

3865 if method == "nearest": 

3866 raise NotImplementedError( 

3867 "method='nearest' not implemented yet " 

3868 "for MultiIndex; see GitHub issue 9365" 

3869 ) 

3870 if method in ("pad", "backfill"): 

3871 if tolerance is not None: 

3872 raise NotImplementedError( 

3873 "tolerance not implemented yet for MultiIndex" 

3874 ) 

3875 

3876 if is_interval_dtype(self.dtype) or is_categorical_dtype(self.dtype): 

3877 # GH#37871 for now this is only for IntervalIndex and CategoricalIndex 

3878 if method is not None: 

3879 raise NotImplementedError( 

3880 f"method {method} not yet implemented for {type(self).__name__}" 

3881 ) 

3882 

3883 if method is None: 

3884 if tolerance is not None: 

3885 raise ValueError( 

3886 "tolerance argument only valid if doing pad, " 

3887 "backfill or nearest reindexing" 

3888 ) 

3889 if limit is not None: 

3890 raise ValueError( 

3891 "limit argument only valid if doing pad, " 

3892 "backfill or nearest reindexing" 

3893 ) 

3894 

3895 def _convert_tolerance(self, tolerance, target: np.ndarray | Index) -> np.ndarray: 

3896 # override this method on subclasses 

3897 tolerance = np.asarray(tolerance) 

3898 if target.size != tolerance.size and tolerance.size > 1: 

3899 raise ValueError("list-like tolerance size must match target index size") 

3900 elif is_numeric_dtype(self) and not np.issubdtype(tolerance.dtype, np.number): 

3901 if tolerance.ndim > 0: 

3902 raise ValueError( 

3903 f"tolerance argument for {type(self).__name__} with dtype " 

3904 f"{self.dtype} must contain numeric elements if it is list type" 

3905 ) 

3906 

3907 raise ValueError( 

3908 f"tolerance argument for {type(self).__name__} with dtype {self.dtype} " 

3909 f"must be numeric if it is a scalar: {repr(tolerance)}" 

3910 ) 

3911 return tolerance 

3912 

3913 @final 

3914 def _get_fill_indexer( 

3915 self, target: Index, method: str_t, limit: int | None = None, tolerance=None 

3916 ) -> npt.NDArray[np.intp]: 

3917 if self._is_multi: 

3918 # TODO: get_indexer_with_fill docstring says values must be _sorted_ 

3919 # but that doesn't appear to be enforced 

3920 # error: "IndexEngine" has no attribute "get_indexer_with_fill" 

3921 engine = self._engine 

3922 with warnings.catch_warnings(): 

3923 # TODO: We need to fix this. Casting to int64 in cython 

3924 warnings.filterwarnings("ignore", category=RuntimeWarning) 

3925 return engine.get_indexer_with_fill( # type: ignore[union-attr] 

3926 target=target._values, 

3927 values=self._values, 

3928 method=method, 

3929 limit=limit, 

3930 ) 

3931 

3932 if self.is_monotonic_increasing and target.is_monotonic_increasing: 

3933 target_values = target._get_engine_target() 

3934 own_values = self._get_engine_target() 

3935 if not isinstance(target_values, np.ndarray) or not isinstance( 

3936 own_values, np.ndarray 

3937 ): 

3938 raise NotImplementedError 

3939 

3940 if method == "pad": 

3941 indexer = libalgos.pad(own_values, target_values, limit=limit) 

3942 else: 

3943 # i.e. "backfill" 

3944 indexer = libalgos.backfill(own_values, target_values, limit=limit) 

3945 else: 

3946 indexer = self._get_fill_indexer_searchsorted(target, method, limit) 

3947 if tolerance is not None and len(self): 

3948 indexer = self._filter_indexer_tolerance(target, indexer, tolerance) 

3949 return indexer 

3950 

3951 @final 

3952 def _get_fill_indexer_searchsorted( 

3953 self, target: Index, method: str_t, limit: int | None = None 

3954 ) -> npt.NDArray[np.intp]: 

3955 """ 

3956 Fallback pad/backfill get_indexer that works for monotonic decreasing 

3957 indexes and non-monotonic targets. 

3958 """ 

3959 if limit is not None: 

3960 raise ValueError( 

3961 f"limit argument for {repr(method)} method only well-defined " 

3962 "if index and target are monotonic" 

3963 ) 

3964 

3965 side: Literal["left", "right"] = "left" if method == "pad" else "right" 

3966 

3967 # find exact matches first (this simplifies the algorithm) 

3968 indexer = self.get_indexer(target) 

3969 nonexact = indexer == -1 

3970 indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side) 

3971 if side == "left": 

3972 # searchsorted returns "indices into a sorted array such that, 

3973 # if the corresponding elements in v were inserted before the 

3974 # indices, the order of a would be preserved". 

3975 # Thus, we need to subtract 1 to find values to the left. 

3976 indexer[nonexact] -= 1 

3977 # This also mapped not found values (values of 0 from 

3978 # np.searchsorted) to -1, which conveniently is also our 

3979 # sentinel for missing values 

3980 else: 

3981 # Mark indices to the right of the largest value as not found 

3982 indexer[indexer == len(self)] = -1 

3983 return indexer 

3984 

3985 @final 

3986 def _get_nearest_indexer( 

3987 self, target: Index, limit: int | None, tolerance 

3988 ) -> npt.NDArray[np.intp]: 

3989 """ 

3990 Get the indexer for the nearest index labels; requires an index with 

3991 values that can be subtracted from each other (e.g., not strings or 

3992 tuples). 

3993 """ 

3994 if not len(self): 

3995 return self._get_fill_indexer(target, "pad") 

3996 

3997 left_indexer = self.get_indexer(target, "pad", limit=limit) 

3998 right_indexer = self.get_indexer(target, "backfill", limit=limit) 

3999 

4000 left_distances = self._difference_compat(target, left_indexer) 

4001 right_distances = self._difference_compat(target, right_indexer) 

4002 

4003 op = operator.lt if self.is_monotonic_increasing else operator.le 

4004 indexer = np.where( 

4005 # error: Argument 1&2 has incompatible type "Union[ExtensionArray, 

4006 # ndarray[Any, Any]]"; expected "Union[SupportsDunderLE, 

4007 # SupportsDunderGE, SupportsDunderGT, SupportsDunderLT]" 

4008 op(left_distances, right_distances) # type: ignore[arg-type] 

4009 | (right_indexer == -1), 

4010 left_indexer, 

4011 right_indexer, 

4012 ) 

4013 if tolerance is not None: 

4014 indexer = self._filter_indexer_tolerance(target, indexer, tolerance) 

4015 return indexer 

4016 

4017 @final 

4018 def _filter_indexer_tolerance( 

4019 self, 

4020 target: Index, 

4021 indexer: npt.NDArray[np.intp], 

4022 tolerance, 

4023 ) -> npt.NDArray[np.intp]: 

4024 distance = self._difference_compat(target, indexer) 

4025 

4026 return np.where(distance <= tolerance, indexer, -1) 

4027 

4028 @final 

4029 def _difference_compat( 

4030 self, target: Index, indexer: npt.NDArray[np.intp] 

4031 ) -> ArrayLike: 

4032 # Compatibility for PeriodArray, for which __sub__ returns an ndarray[object] 

4033 # of DateOffset objects, which do not support __abs__ (and would be slow 

4034 # if they did) 

4035 

4036 if isinstance(self.dtype, PeriodDtype): 

4037 # Note: we only get here with matching dtypes 

4038 own_values = cast("PeriodArray", self._data)._ndarray 

4039 target_values = cast("PeriodArray", target._data)._ndarray 

4040 diff = own_values[indexer] - target_values 

4041 else: 

4042 # error: Unsupported left operand type for - ("ExtensionArray") 

4043 diff = self._values[indexer] - target._values # type: ignore[operator] 

4044 return abs(diff) 

4045 

4046 # -------------------------------------------------------------------- 

4047 # Indexer Conversion Methods 

4048 

4049 @final 

4050 def _validate_positional_slice(self, key: slice) -> None: 

4051 """ 

4052 For positional indexing, a slice must have either int or None 

4053 for each of start, stop, and step. 

4054 """ 

4055 self._validate_indexer("positional", key.start, "iloc") 

4056 self._validate_indexer("positional", key.stop, "iloc") 

4057 self._validate_indexer("positional", key.step, "iloc") 

4058 

4059 def _convert_slice_indexer(self, key: slice, kind: str_t): 

4060 """ 

4061 Convert a slice indexer. 

4062 

4063 By definition, these are labels unless 'iloc' is passed in. 

4064 Floats are not allowed as the start, step, or stop of the slice. 

4065 

4066 Parameters 

4067 ---------- 

4068 key : label of the slice bound 

4069 kind : {'loc', 'getitem'} 

4070 """ 

4071 assert kind in ["loc", "getitem"], kind 

4072 

4073 # potentially cast the bounds to integers 

4074 start, stop, step = key.start, key.stop, key.step 

4075 

4076 # TODO(GH#50617): once Series.__[gs]etitem__ is removed we should be able 

4077 # to simplify this. 

4078 if isinstance(self.dtype, np.dtype) and is_float_dtype(self.dtype): 

4079 # We always treat __getitem__ slicing as label-based 

4080 # translate to locations 

4081 return self.slice_indexer(start, stop, step) 

4082 

4083 # figure out if this is a positional indexer 

4084 def is_int(v): 

4085 return v is None or is_integer(v) 

4086 

4087 is_index_slice = is_int(start) and is_int(stop) and is_int(step) 

4088 

4089 # special case for interval_dtype bc we do not do partial-indexing 

4090 # on integer Intervals when slicing 

4091 # TODO: write this in terms of e.g. should_partial_index? 

4092 ints_are_positional = self._should_fallback_to_positional or is_interval_dtype( 

4093 self.dtype 

4094 ) 

4095 is_positional = is_index_slice and ints_are_positional 

4096 

4097 if kind == "getitem": 

4098 # called from the getitem slicers, validate that we are in fact integers 

4099 if is_integer_dtype(self.dtype) or is_index_slice: 

4100 # Note: these checks are redundant if we know is_index_slice 

4101 self._validate_indexer("slice", key.start, "getitem") 

4102 self._validate_indexer("slice", key.stop, "getitem") 

4103 self._validate_indexer("slice", key.step, "getitem") 

4104 return key 

4105 

4106 # convert the slice to an indexer here 

4107 

4108 # if we are mixed and have integers 

4109 if is_positional: 

4110 try: 

4111 # Validate start & stop 

4112 if start is not None: 

4113 self.get_loc(start) 

4114 if stop is not None: 

4115 self.get_loc(stop) 

4116 is_positional = False 

4117 except KeyError: 

4118 pass 

4119 

4120 if com.is_null_slice(key): 

4121 # It doesn't matter if we are positional or label based 

4122 indexer = key 

4123 elif is_positional: 

4124 if kind == "loc": 

4125 # GH#16121, GH#24612, GH#31810 

4126 raise TypeError( 

4127 "Slicing a positional slice with .loc is not allowed, " 

4128 "Use .loc with labels or .iloc with positions instead.", 

4129 ) 

4130 indexer = key 

4131 else: 

4132 indexer = self.slice_indexer(start, stop, step) 

4133 

4134 return indexer 

4135 

4136 @final 

4137 def _raise_invalid_indexer( 

4138 self, 

4139 form: str_t, 

4140 key, 

4141 reraise: lib.NoDefault | None | Exception = lib.no_default, 

4142 ) -> None: 

4143 """ 

4144 Raise consistent invalid indexer message. 

4145 """ 

4146 msg = ( 

4147 f"cannot do {form} indexing on {type(self).__name__} with these " 

4148 f"indexers [{key}] of type {type(key).__name__}" 

4149 ) 

4150 if reraise is not lib.no_default: 

4151 raise TypeError(msg) from reraise 

4152 raise TypeError(msg) 

4153 

4154 # -------------------------------------------------------------------- 

4155 # Reindex Methods 

4156 

4157 @final 

4158 def _validate_can_reindex(self, indexer: np.ndarray) -> None: 

4159 """ 

4160 Check if we are allowing reindexing with this particular indexer. 

4161 

4162 Parameters 

4163 ---------- 

4164 indexer : an integer ndarray 

4165 

4166 Raises 

4167 ------ 

4168 ValueError if its a duplicate axis 

4169 """ 

4170 # trying to reindex on an axis with duplicates 

4171 if not self._index_as_unique and len(indexer): 

4172 raise ValueError("cannot reindex on an axis with duplicate labels") 

4173 

4174 def reindex( 

4175 self, target, method=None, level=None, limit=None, tolerance=None 

4176 ) -> tuple[Index, npt.NDArray[np.intp] | None]: 

4177 """ 

4178 Create index with target's values. 

4179 

4180 Parameters 

4181 ---------- 

4182 target : an iterable 

4183 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional 

4184 * default: exact matches only. 

4185 * pad / ffill: find the PREVIOUS index value if no exact match. 

4186 * backfill / bfill: use NEXT index value if no exact match 

4187 * nearest: use the NEAREST index value if no exact match. Tied 

4188 distances are broken by preferring the larger index value. 

4189 level : int, optional 

4190 Level of multiindex. 

4191 limit : int, optional 

4192 Maximum number of consecutive labels in ``target`` to match for 

4193 inexact matches. 

4194 tolerance : int or float, optional 

4195 Maximum distance between original and new labels for inexact 

4196 matches. The values of the index at the matching locations must 

4197 satisfy the equation ``abs(index[indexer] - target) <= tolerance``. 

4198 

4199 Tolerance may be a scalar value, which applies the same tolerance 

4200 to all values, or list-like, which applies variable tolerance per 

4201 element. List-like includes list, tuple, array, Series, and must be 

4202 the same size as the index and its dtype must exactly match the 

4203 index's type. 

4204 

4205 Returns 

4206 ------- 

4207 new_index : pd.Index 

4208 Resulting index. 

4209 indexer : np.ndarray[np.intp] or None 

4210 Indices of output values in original index. 

4211 

4212 Raises 

4213 ------ 

4214 TypeError 

4215 If ``method`` passed along with ``level``. 

4216 ValueError 

4217 If non-unique multi-index 

4218 ValueError 

4219 If non-unique index and ``method`` or ``limit`` passed. 

4220 

4221 See Also 

4222 -------- 

4223 Series.reindex : Conform Series to new index with optional filling logic. 

4224 DataFrame.reindex : Conform DataFrame to new index with optional filling logic. 

4225 

4226 Examples 

4227 -------- 

4228 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor']) 

4229 >>> idx 

4230 Index(['car', 'bike', 'train', 'tractor'], dtype='object') 

4231 >>> idx.reindex(['car', 'bike']) 

4232 (Index(['car', 'bike'], dtype='object'), array([0, 1])) 

4233 """ 

4234 # GH6552: preserve names when reindexing to non-named target 

4235 # (i.e. neither Index nor Series). 

4236 preserve_names = not hasattr(target, "name") 

4237 

4238 # GH7774: preserve dtype/tz if target is empty and not an Index. 

4239 target = ensure_has_len(target) # target may be an iterator 

4240 

4241 if not isinstance(target, Index) and len(target) == 0: 

4242 if level is not None and self._is_multi: 

4243 # "Index" has no attribute "levels"; maybe "nlevels"? 

4244 idx = self.levels[level] # type: ignore[attr-defined] 

4245 else: 

4246 idx = self 

4247 target = idx[:0] 

4248 else: 

4249 target = ensure_index(target) 

4250 

4251 if level is not None and ( 

4252 isinstance(self, ABCMultiIndex) or isinstance(target, ABCMultiIndex) 

4253 ): 

4254 if method is not None: 

4255 raise TypeError("Fill method not supported if level passed") 

4256 

4257 # TODO: tests where passing `keep_order=not self._is_multi` 

4258 # makes a difference for non-MultiIndex case 

4259 target, indexer, _ = self._join_level( 

4260 target, level, how="right", keep_order=not self._is_multi 

4261 ) 

4262 

4263 else: 

4264 if self.equals(target): 

4265 indexer = None 

4266 else: 

4267 if self._index_as_unique: 

4268 indexer = self.get_indexer( 

4269 target, method=method, limit=limit, tolerance=tolerance 

4270 ) 

4271 elif self._is_multi: 

4272 raise ValueError("cannot handle a non-unique multi-index!") 

4273 elif not self.is_unique: 

4274 # GH#42568 

4275 raise ValueError("cannot reindex on an axis with duplicate labels") 

4276 else: 

4277 indexer, _ = self.get_indexer_non_unique(target) 

4278 

4279 target = self._wrap_reindex_result(target, indexer, preserve_names) 

4280 return target, indexer 

4281 

4282 def _wrap_reindex_result(self, target, indexer, preserve_names: bool): 

4283 target = self._maybe_preserve_names(target, preserve_names) 

4284 return target 

4285 

4286 def _maybe_preserve_names(self, target: Index, preserve_names: bool): 

4287 if preserve_names and target.nlevels == 1 and target.name != self.name: 

4288 target = target.copy(deep=False) 

4289 target.name = self.name 

4290 return target 

4291 

4292 @final 

4293 def _reindex_non_unique( 

4294 self, target: Index 

4295 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp] | None]: 

4296 """ 

4297 Create a new index with target's values (move/add/delete values as 

4298 necessary) use with non-unique Index and a possibly non-unique target. 

4299 

4300 Parameters 

4301 ---------- 

4302 target : an iterable 

4303 

4304 Returns 

4305 ------- 

4306 new_index : pd.Index 

4307 Resulting index. 

4308 indexer : np.ndarray[np.intp] 

4309 Indices of output values in original index. 

4310 new_indexer : np.ndarray[np.intp] or None 

4311 

4312 """ 

4313 target = ensure_index(target) 

4314 if len(target) == 0: 

4315 # GH#13691 

4316 return self[:0], np.array([], dtype=np.intp), None 

4317 

4318 indexer, missing = self.get_indexer_non_unique(target) 

4319 check = indexer != -1 

4320 new_labels = self.take(indexer[check]) 

4321 new_indexer = None 

4322 

4323 if len(missing): 

4324 length = np.arange(len(indexer), dtype=np.intp) 

4325 

4326 missing = ensure_platform_int(missing) 

4327 missing_labels = target.take(missing) 

4328 missing_indexer = length[~check] 

4329 cur_labels = self.take(indexer[check]).values 

4330 cur_indexer = length[check] 

4331 

4332 # Index constructor below will do inference 

4333 new_labels = np.empty((len(indexer),), dtype=object) 

4334 new_labels[cur_indexer] = cur_labels 

4335 new_labels[missing_indexer] = missing_labels 

4336 

4337 # GH#38906 

4338 if not len(self): 

4339 new_indexer = np.arange(0, dtype=np.intp) 

4340 

4341 # a unique indexer 

4342 elif target.is_unique: 

4343 # see GH5553, make sure we use the right indexer 

4344 new_indexer = np.arange(len(indexer), dtype=np.intp) 

4345 new_indexer[cur_indexer] = np.arange(len(cur_labels)) 

4346 new_indexer[missing_indexer] = -1 

4347 

4348 # we have a non_unique selector, need to use the original 

4349 # indexer here 

4350 else: 

4351 # need to retake to have the same size as the indexer 

4352 indexer[~check] = -1 

4353 

4354 # reset the new indexer to account for the new size 

4355 new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp) 

4356 new_indexer[~check] = -1 

4357 

4358 if not isinstance(self, ABCMultiIndex): 

4359 new_index = Index(new_labels, name=self.name) 

4360 else: 

4361 new_index = type(self).from_tuples(new_labels, names=self.names) 

4362 return new_index, indexer, new_indexer 

4363 

4364 # -------------------------------------------------------------------- 

4365 # Join Methods 

4366 

4367 @overload 

4368 def join( 

4369 self, 

4370 other: Index, 

4371 *, 

4372 how: JoinHow = ..., 

4373 level: Level = ..., 

4374 return_indexers: Literal[True], 

4375 sort: bool = ..., 

4376 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4377 ... 

4378 

4379 @overload 

4380 def join( 

4381 self, 

4382 other: Index, 

4383 *, 

4384 how: JoinHow = ..., 

4385 level: Level = ..., 

4386 return_indexers: Literal[False] = ..., 

4387 sort: bool = ..., 

4388 ) -> Index: 

4389 ... 

4390 

4391 @overload 

4392 def join( 

4393 self, 

4394 other: Index, 

4395 *, 

4396 how: JoinHow = ..., 

4397 level: Level = ..., 

4398 return_indexers: bool = ..., 

4399 sort: bool = ..., 

4400 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4401 ... 

4402 

4403 @final 

4404 @_maybe_return_indexers 

4405 def join( 

4406 self, 

4407 other: Index, 

4408 *, 

4409 how: JoinHow = "left", 

4410 level: Level = None, 

4411 return_indexers: bool = False, 

4412 sort: bool = False, 

4413 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4414 """ 

4415 Compute join_index and indexers to conform data structures to the new index. 

4416 

4417 Parameters 

4418 ---------- 

4419 other : Index 

4420 how : {'left', 'right', 'inner', 'outer'} 

4421 level : int or level name, default None 

4422 return_indexers : bool, default False 

4423 sort : bool, default False 

4424 Sort the join keys lexicographically in the result Index. If False, 

4425 the order of the join keys depends on the join type (how keyword). 

4426 

4427 Returns 

4428 ------- 

4429 join_index, (left_indexer, right_indexer) 

4430 """ 

4431 other = ensure_index(other) 

4432 

4433 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex): 

4434 if (self.tz is None) ^ (other.tz is None): 

4435 # Raise instead of casting to object below. 

4436 raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex") 

4437 

4438 if not self._is_multi and not other._is_multi: 

4439 # We have specific handling for MultiIndex below 

4440 pself, pother = self._maybe_promote(other) 

4441 if pself is not self or pother is not other: 

4442 return pself.join( 

4443 pother, how=how, level=level, return_indexers=True, sort=sort 

4444 ) 

4445 

4446 lindexer: np.ndarray | None 

4447 rindexer: np.ndarray | None 

4448 

4449 # try to figure out the join level 

4450 # GH3662 

4451 if level is None and (self._is_multi or other._is_multi): 

4452 # have the same levels/names so a simple join 

4453 if self.names == other.names: 

4454 pass 

4455 else: 

4456 return self._join_multi(other, how=how) 

4457 

4458 # join on the level 

4459 if level is not None and (self._is_multi or other._is_multi): 

4460 return self._join_level(other, level, how=how) 

4461 

4462 if len(other) == 0: 

4463 if how in ("left", "outer"): 

4464 join_index = self._view() 

4465 rindexer = np.broadcast_to(np.intp(-1), len(join_index)) 

4466 return join_index, None, rindexer 

4467 elif how in ("right", "inner", "cross"): 

4468 join_index = other._view() 

4469 lindexer = np.array([]) 

4470 return join_index, lindexer, None 

4471 

4472 if len(self) == 0: 

4473 if how in ("right", "outer"): 

4474 join_index = other._view() 

4475 lindexer = np.broadcast_to(np.intp(-1), len(join_index)) 

4476 return join_index, lindexer, None 

4477 elif how in ("left", "inner", "cross"): 

4478 join_index = self._view() 

4479 rindexer = np.array([]) 

4480 return join_index, None, rindexer 

4481 

4482 if self._join_precedence < other._join_precedence: 

4483 flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"} 

4484 how = flip.get(how, how) 

4485 join_index, lidx, ridx = other.join( 

4486 self, how=how, level=level, return_indexers=True 

4487 ) 

4488 lidx, ridx = ridx, lidx 

4489 return join_index, lidx, ridx 

4490 

4491 if not is_dtype_equal(self.dtype, other.dtype): 

4492 dtype = self._find_common_type_compat(other) 

4493 this = self.astype(dtype, copy=False) 

4494 other = other.astype(dtype, copy=False) 

4495 return this.join(other, how=how, return_indexers=True) 

4496 

4497 _validate_join_method(how) 

4498 

4499 if not self.is_unique and not other.is_unique: 

4500 return self._join_non_unique(other, how=how) 

4501 elif not self.is_unique or not other.is_unique: 

4502 if self.is_monotonic_increasing and other.is_monotonic_increasing: 

4503 if not is_interval_dtype(self.dtype): 

4504 # otherwise we will fall through to _join_via_get_indexer 

4505 # GH#39133 

4506 # go through object dtype for ea till engine is supported properly 

4507 return self._join_monotonic(other, how=how) 

4508 else: 

4509 return self._join_non_unique(other, how=how) 

4510 elif ( 

4511 # GH48504: exclude MultiIndex to avoid going through MultiIndex._values 

4512 self.is_monotonic_increasing 

4513 and other.is_monotonic_increasing 

4514 and self._can_use_libjoin 

4515 and not isinstance(self, ABCMultiIndex) 

4516 and not is_categorical_dtype(self.dtype) 

4517 ): 

4518 # Categorical is monotonic if data are ordered as categories, but join can 

4519 # not handle this in case of not lexicographically monotonic GH#38502 

4520 try: 

4521 return self._join_monotonic(other, how=how) 

4522 except TypeError: 

4523 # object dtype; non-comparable objects 

4524 pass 

4525 

4526 return self._join_via_get_indexer(other, how, sort) 

4527 

4528 @final 

4529 def _join_via_get_indexer( 

4530 self, other: Index, how: JoinHow, sort: bool 

4531 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4532 # Fallback if we do not have any fastpaths available based on 

4533 # uniqueness/monotonicity 

4534 

4535 # Note: at this point we have checked matching dtypes 

4536 

4537 if how == "left": 

4538 join_index = self 

4539 elif how == "right": 

4540 join_index = other 

4541 elif how == "inner": 

4542 # TODO: sort=False here for backwards compat. It may 

4543 # be better to use the sort parameter passed into join 

4544 join_index = self.intersection(other, sort=False) 

4545 elif how == "outer": 

4546 # TODO: sort=True here for backwards compat. It may 

4547 # be better to use the sort parameter passed into join 

4548 join_index = self.union(other) 

4549 

4550 if sort: 

4551 join_index = join_index.sort_values() 

4552 

4553 if join_index is self: 

4554 lindexer = None 

4555 else: 

4556 lindexer = self.get_indexer_for(join_index) 

4557 if join_index is other: 

4558 rindexer = None 

4559 else: 

4560 rindexer = other.get_indexer_for(join_index) 

4561 return join_index, lindexer, rindexer 

4562 

4563 @final 

4564 def _join_multi(self, other: Index, how: JoinHow): 

4565 from pandas.core.indexes.multi import MultiIndex 

4566 from pandas.core.reshape.merge import restore_dropped_levels_multijoin 

4567 

4568 # figure out join names 

4569 self_names_list = list(com.not_none(*self.names)) 

4570 other_names_list = list(com.not_none(*other.names)) 

4571 self_names_order = self_names_list.index 

4572 other_names_order = other_names_list.index 

4573 self_names = set(self_names_list) 

4574 other_names = set(other_names_list) 

4575 overlap = self_names & other_names 

4576 

4577 # need at least 1 in common 

4578 if not overlap: 

4579 raise ValueError("cannot join with no overlapping index names") 

4580 

4581 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): 

4582 # Drop the non-matching levels from left and right respectively 

4583 ldrop_names = sorted(self_names - overlap, key=self_names_order) 

4584 rdrop_names = sorted(other_names - overlap, key=other_names_order) 

4585 

4586 # if only the order differs 

4587 if not len(ldrop_names + rdrop_names): 

4588 self_jnlevels = self 

4589 other_jnlevels = other.reorder_levels(self.names) 

4590 else: 

4591 self_jnlevels = self.droplevel(ldrop_names) 

4592 other_jnlevels = other.droplevel(rdrop_names) 

4593 

4594 # Join left and right 

4595 # Join on same leveled multi-index frames is supported 

4596 join_idx, lidx, ridx = self_jnlevels.join( 

4597 other_jnlevels, how=how, return_indexers=True 

4598 ) 

4599 

4600 # Restore the dropped levels 

4601 # Returned index level order is 

4602 # common levels, ldrop_names, rdrop_names 

4603 dropped_names = ldrop_names + rdrop_names 

4604 

4605 # error: Argument 5/6 to "restore_dropped_levels_multijoin" has 

4606 # incompatible type "Optional[ndarray[Any, dtype[signedinteger[Any 

4607 # ]]]]"; expected "ndarray[Any, dtype[signedinteger[Any]]]" 

4608 levels, codes, names = restore_dropped_levels_multijoin( 

4609 self, 

4610 other, 

4611 dropped_names, 

4612 join_idx, 

4613 lidx, # type: ignore[arg-type] 

4614 ridx, # type: ignore[arg-type] 

4615 ) 

4616 

4617 # Re-create the multi-index 

4618 multi_join_idx = MultiIndex( 

4619 levels=levels, codes=codes, names=names, verify_integrity=False 

4620 ) 

4621 

4622 multi_join_idx = multi_join_idx.remove_unused_levels() 

4623 

4624 return multi_join_idx, lidx, ridx 

4625 

4626 jl = list(overlap)[0] 

4627 

4628 # Case where only one index is multi 

4629 # make the indices into mi's that match 

4630 flip_order = False 

4631 if isinstance(self, MultiIndex): 

4632 self, other = other, self 

4633 flip_order = True 

4634 # flip if join method is right or left 

4635 flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"} 

4636 how = flip.get(how, how) 

4637 

4638 level = other.names.index(jl) 

4639 result = self._join_level(other, level, how=how) 

4640 

4641 if flip_order: 

4642 return result[0], result[2], result[1] 

4643 return result 

4644 

4645 @final 

4646 def _join_non_unique( 

4647 self, other: Index, how: JoinHow = "left" 

4648 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

4649 from pandas.core.reshape.merge import get_join_indexers 

4650 

4651 # We only get here if dtypes match 

4652 assert self.dtype == other.dtype 

4653 

4654 left_idx, right_idx = get_join_indexers( 

4655 [self._values], [other._values], how=how, sort=True 

4656 ) 

4657 mask = left_idx == -1 

4658 

4659 join_idx = self.take(left_idx) 

4660 right = other.take(right_idx) 

4661 join_index = join_idx.putmask(mask, right) 

4662 return join_index, left_idx, right_idx 

4663 

4664 @final 

4665 def _join_level( 

4666 self, other: Index, level, how: JoinHow = "left", keep_order: bool = True 

4667 ) -> tuple[MultiIndex, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4668 """ 

4669 The join method *only* affects the level of the resulting 

4670 MultiIndex. Otherwise it just exactly aligns the Index data to the 

4671 labels of the level in the MultiIndex. 

4672 

4673 If ```keep_order == True```, the order of the data indexed by the 

4674 MultiIndex will not be changed; otherwise, it will tie out 

4675 with `other`. 

4676 """ 

4677 from pandas.core.indexes.multi import MultiIndex 

4678 

4679 def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]: 

4680 """ 

4681 Returns sorter for the inner most level while preserving the 

4682 order of higher levels. 

4683 

4684 Parameters 

4685 ---------- 

4686 labels : list[np.ndarray] 

4687 Each ndarray has signed integer dtype, not necessarily identical. 

4688 

4689 Returns 

4690 ------- 

4691 np.ndarray[np.intp] 

4692 """ 

4693 if labels[0].size == 0: 

4694 return np.empty(0, dtype=np.intp) 

4695 

4696 if len(labels) == 1: 

4697 return get_group_index_sorter(ensure_platform_int(labels[0])) 

4698 

4699 # find indexers of beginning of each set of 

4700 # same-key labels w.r.t all but last level 

4701 tic = labels[0][:-1] != labels[0][1:] 

4702 for lab in labels[1:-1]: 

4703 tic |= lab[:-1] != lab[1:] 

4704 

4705 starts = np.hstack(([True], tic, [True])).nonzero()[0] 

4706 lab = ensure_int64(labels[-1]) 

4707 return lib.get_level_sorter(lab, ensure_platform_int(starts)) 

4708 

4709 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex): 

4710 raise TypeError("Join on level between two MultiIndex objects is ambiguous") 

4711 

4712 left, right = self, other 

4713 

4714 flip_order = not isinstance(self, MultiIndex) 

4715 if flip_order: 

4716 left, right = right, left 

4717 flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"} 

4718 how = flip.get(how, how) 

4719 

4720 assert isinstance(left, MultiIndex) 

4721 

4722 level = left._get_level_number(level) 

4723 old_level = left.levels[level] 

4724 

4725 if not right.is_unique: 

4726 raise NotImplementedError( 

4727 "Index._join_level on non-unique index is not implemented" 

4728 ) 

4729 

4730 new_level, left_lev_indexer, right_lev_indexer = old_level.join( 

4731 right, how=how, return_indexers=True 

4732 ) 

4733 

4734 if left_lev_indexer is None: 

4735 if keep_order or len(left) == 0: 

4736 left_indexer = None 

4737 join_index = left 

4738 else: # sort the leaves 

4739 left_indexer = _get_leaf_sorter(left.codes[: level + 1]) 

4740 join_index = left[left_indexer] 

4741 

4742 else: 

4743 left_lev_indexer = ensure_platform_int(left_lev_indexer) 

4744 rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level)) 

4745 old_codes = left.codes[level] 

4746 

4747 taker = old_codes[old_codes != -1] 

4748 new_lev_codes = rev_indexer.take(taker) 

4749 

4750 new_codes = list(left.codes) 

4751 new_codes[level] = new_lev_codes 

4752 

4753 new_levels = list(left.levels) 

4754 new_levels[level] = new_level 

4755 

4756 if keep_order: # just drop missing values. o.w. keep order 

4757 left_indexer = np.arange(len(left), dtype=np.intp) 

4758 left_indexer = cast(np.ndarray, left_indexer) 

4759 mask = new_lev_codes != -1 

4760 if not mask.all(): 

4761 new_codes = [lab[mask] for lab in new_codes] 

4762 left_indexer = left_indexer[mask] 

4763 

4764 else: # tie out the order with other 

4765 if level == 0: # outer most level, take the fast route 

4766 max_new_lev = 0 if len(new_lev_codes) == 0 else new_lev_codes.max() 

4767 ngroups = 1 + max_new_lev 

4768 left_indexer, counts = libalgos.groupsort_indexer( 

4769 new_lev_codes, ngroups 

4770 ) 

4771 

4772 # missing values are placed first; drop them! 

4773 left_indexer = left_indexer[counts[0] :] 

4774 new_codes = [lab[left_indexer] for lab in new_codes] 

4775 

4776 else: # sort the leaves 

4777 mask = new_lev_codes != -1 

4778 mask_all = mask.all() 

4779 if not mask_all: 

4780 new_codes = [lab[mask] for lab in new_codes] 

4781 

4782 left_indexer = _get_leaf_sorter(new_codes[: level + 1]) 

4783 new_codes = [lab[left_indexer] for lab in new_codes] 

4784 

4785 # left_indexers are w.r.t masked frame. 

4786 # reverse to original frame! 

4787 if not mask_all: 

4788 left_indexer = mask.nonzero()[0][left_indexer] 

4789 

4790 join_index = MultiIndex( 

4791 levels=new_levels, 

4792 codes=new_codes, 

4793 names=left.names, 

4794 verify_integrity=False, 

4795 ) 

4796 

4797 if right_lev_indexer is not None: 

4798 right_indexer = right_lev_indexer.take(join_index.codes[level]) 

4799 else: 

4800 right_indexer = join_index.codes[level] 

4801 

4802 if flip_order: 

4803 left_indexer, right_indexer = right_indexer, left_indexer 

4804 

4805 left_indexer = ( 

4806 None if left_indexer is None else ensure_platform_int(left_indexer) 

4807 ) 

4808 right_indexer = ( 

4809 None if right_indexer is None else ensure_platform_int(right_indexer) 

4810 ) 

4811 return join_index, left_indexer, right_indexer 

4812 

4813 @final 

4814 def _join_monotonic( 

4815 self, other: Index, how: JoinHow = "left" 

4816 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]: 

4817 # We only get here with matching dtypes and both monotonic increasing 

4818 assert other.dtype == self.dtype 

4819 

4820 if self.equals(other): 

4821 # This is a convenient place for this check, but its correctness 

4822 # does not depend on monotonicity, so it could go earlier 

4823 # in the calling method. 

4824 ret_index = other if how == "right" else self 

4825 return ret_index, None, None 

4826 

4827 ridx: npt.NDArray[np.intp] | None 

4828 lidx: npt.NDArray[np.intp] | None 

4829 

4830 if self.is_unique and other.is_unique: 

4831 # We can perform much better than the general case 

4832 if how == "left": 

4833 join_index = self 

4834 lidx = None 

4835 ridx = self._left_indexer_unique(other) 

4836 elif how == "right": 

4837 join_index = other 

4838 lidx = other._left_indexer_unique(self) 

4839 ridx = None 

4840 elif how == "inner": 

4841 join_array, lidx, ridx = self._inner_indexer(other) 

4842 join_index = self._wrap_joined_index(join_array, other, lidx, ridx) 

4843 elif how == "outer": 

4844 join_array, lidx, ridx = self._outer_indexer(other) 

4845 join_index = self._wrap_joined_index(join_array, other, lidx, ridx) 

4846 else: 

4847 if how == "left": 

4848 join_array, lidx, ridx = self._left_indexer(other) 

4849 elif how == "right": 

4850 join_array, ridx, lidx = other._left_indexer(self) 

4851 elif how == "inner": 

4852 join_array, lidx, ridx = self._inner_indexer(other) 

4853 elif how == "outer": 

4854 join_array, lidx, ridx = self._outer_indexer(other) 

4855 

4856 assert lidx is not None 

4857 assert ridx is not None 

4858 

4859 join_index = self._wrap_joined_index(join_array, other, lidx, ridx) 

4860 

4861 lidx = None if lidx is None else ensure_platform_int(lidx) 

4862 ridx = None if ridx is None else ensure_platform_int(ridx) 

4863 return join_index, lidx, ridx 

4864 

4865 def _wrap_joined_index( 

4866 self: _IndexT, 

4867 joined: ArrayLike, 

4868 other: _IndexT, 

4869 lidx: npt.NDArray[np.intp], 

4870 ridx: npt.NDArray[np.intp], 

4871 ) -> _IndexT: 

4872 assert other.dtype == self.dtype 

4873 

4874 if isinstance(self, ABCMultiIndex): 

4875 name = self.names if self.names == other.names else None 

4876 # error: Incompatible return value type (got "MultiIndex", 

4877 # expected "_IndexT") 

4878 mask = lidx == -1 

4879 join_idx = self.take(lidx) 

4880 right = other.take(ridx) 

4881 join_index = join_idx.putmask(mask, right)._sort_levels_monotonic() 

4882 return join_index.set_names(name) # type: ignore[return-value] 

4883 else: 

4884 name = get_op_result_name(self, other) 

4885 return self._constructor._with_infer(joined, name=name, dtype=self.dtype) 

4886 

4887 @cache_readonly 

4888 def _can_use_libjoin(self) -> bool: 

4889 """ 

4890 Whether we can use the fastpaths implement in _libs.join 

4891 """ 

4892 if type(self) is Index: 

4893 # excludes EAs, but include masks, we get here with monotonic 

4894 # values only, meaning no NA 

4895 return ( 

4896 isinstance(self.dtype, np.dtype) 

4897 or isinstance(self.values, BaseMaskedArray) 

4898 or isinstance(self._values, ArrowExtensionArray) 

4899 ) 

4900 return not is_interval_dtype(self.dtype) 

4901 

4902 # -------------------------------------------------------------------- 

4903 # Uncategorized Methods 

4904 

4905 @property 

4906 def values(self) -> ArrayLike: 

4907 """ 

4908 Return an array representing the data in the Index. 

4909 

4910 .. warning:: 

4911 

4912 We recommend using :attr:`Index.array` or 

4913 :meth:`Index.to_numpy`, depending on whether you need 

4914 a reference to the underlying data or a NumPy array. 

4915 

4916 Returns 

4917 ------- 

4918 array: numpy.ndarray or ExtensionArray 

4919 

4920 See Also 

4921 -------- 

4922 Index.array : Reference to the underlying data. 

4923 Index.to_numpy : A NumPy array representing the underlying data. 

4924 """ 

4925 return self._data 

4926 

4927 @cache_readonly 

4928 @doc(IndexOpsMixin.array) 

4929 def array(self) -> ExtensionArray: 

4930 array = self._data 

4931 if isinstance(array, np.ndarray): 

4932 from pandas.core.arrays.numpy_ import PandasArray 

4933 

4934 array = PandasArray(array) 

4935 return array 

4936 

4937 @property 

4938 def _values(self) -> ExtensionArray | np.ndarray: 

4939 """ 

4940 The best array representation. 

4941 

4942 This is an ndarray or ExtensionArray. 

4943 

4944 ``_values`` are consistent between ``Series`` and ``Index``. 

4945 

4946 It may differ from the public '.values' method. 

4947 

4948 index | values | _values | 

4949 ----------------- | --------------- | ------------- | 

4950 Index | ndarray | ndarray | 

4951 CategoricalIndex | Categorical | Categorical | 

4952 DatetimeIndex | ndarray[M8ns] | DatetimeArray | 

4953 DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray | 

4954 PeriodIndex | ndarray[object] | PeriodArray | 

4955 IntervalIndex | IntervalArray | IntervalArray | 

4956 

4957 See Also 

4958 -------- 

4959 values : Values 

4960 """ 

4961 return self._data 

4962 

4963 def _get_engine_target(self) -> ArrayLike: 

4964 """ 

4965 Get the ndarray or ExtensionArray that we can pass to the IndexEngine 

4966 constructor. 

4967 """ 

4968 vals = self._values 

4969 if isinstance(vals, StringArray): 

4970 # GH#45652 much more performant than ExtensionEngine 

4971 return vals._ndarray 

4972 if ( 

4973 type(self) is Index 

4974 and isinstance(self._values, ExtensionArray) 

4975 and not isinstance(self._values, BaseMaskedArray) 

4976 and not ( 

4977 isinstance(self._values, ArrowExtensionArray) 

4978 and is_numeric_dtype(self.dtype) 

4979 # Exclude decimal 

4980 and self.dtype.kind != "O" 

4981 ) 

4982 ): 

4983 # TODO(ExtensionIndex): remove special-case, just use self._values 

4984 return self._values.astype(object) 

4985 return vals 

4986 

4987 def _get_join_target(self) -> ArrayLike: 

4988 """ 

4989 Get the ndarray or ExtensionArray that we can pass to the join 

4990 functions. 

4991 """ 

4992 if isinstance(self._values, BaseMaskedArray): 

4993 # This is only used if our array is monotonic, so no NAs present 

4994 return self._values._data 

4995 elif isinstance(self._values, ArrowExtensionArray): 

4996 # This is only used if our array is monotonic, so no missing values 

4997 # present 

4998 return self._values.to_numpy() 

4999 return self._get_engine_target() 

5000 

5001 def _from_join_target(self, result: np.ndarray) -> ArrayLike: 

5002 """ 

5003 Cast the ndarray returned from one of the libjoin.foo_indexer functions 

5004 back to type(self)._data. 

5005 """ 

5006 if isinstance(self.values, BaseMaskedArray): 

5007 return type(self.values)(result, np.zeros(result.shape, dtype=np.bool_)) 

5008 elif isinstance(self.values, ArrowExtensionArray): 

5009 return type(self.values)._from_sequence(result) 

5010 return result 

5011 

5012 @doc(IndexOpsMixin._memory_usage) 

5013 def memory_usage(self, deep: bool = False) -> int: 

5014 result = self._memory_usage(deep=deep) 

5015 

5016 # include our engine hashtable 

5017 result += self._engine.sizeof(deep=deep) 

5018 return result 

5019 

5020 @final 

5021 def where(self, cond, other=None) -> Index: 

5022 """ 

5023 Replace values where the condition is False. 

5024 

5025 The replacement is taken from other. 

5026 

5027 Parameters 

5028 ---------- 

5029 cond : bool array-like with the same length as self 

5030 Condition to select the values on. 

5031 other : scalar, or array-like, default None 

5032 Replacement if the condition is False. 

5033 

5034 Returns 

5035 ------- 

5036 pandas.Index 

5037 A copy of self with values replaced from other 

5038 where the condition is False. 

5039 

5040 See Also 

5041 -------- 

5042 Series.where : Same method for Series. 

5043 DataFrame.where : Same method for DataFrame. 

5044 

5045 Examples 

5046 -------- 

5047 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor']) 

5048 >>> idx 

5049 Index(['car', 'bike', 'train', 'tractor'], dtype='object') 

5050 >>> idx.where(idx.isin(['car', 'train']), 'other') 

5051 Index(['car', 'other', 'train', 'other'], dtype='object') 

5052 """ 

5053 if isinstance(self, ABCMultiIndex): 

5054 raise NotImplementedError( 

5055 ".where is not supported for MultiIndex operations" 

5056 ) 

5057 cond = np.asarray(cond, dtype=bool) 

5058 return self.putmask(~cond, other) 

5059 

5060 # construction helpers 

5061 @final 

5062 @classmethod 

5063 def _raise_scalar_data_error(cls, data): 

5064 # We return the TypeError so that we can raise it from the constructor 

5065 # in order to keep mypy happy 

5066 raise TypeError( 

5067 f"{cls.__name__}(...) must be called with a collection of some " 

5068 f"kind, {repr(data)} was passed" 

5069 ) 

5070 

5071 def _validate_fill_value(self, value): 

5072 """ 

5073 Check if the value can be inserted into our array without casting, 

5074 and convert it to an appropriate native type if necessary. 

5075 

5076 Raises 

5077 ------ 

5078 TypeError 

5079 If the value cannot be inserted into an array of this dtype. 

5080 """ 

5081 dtype = self.dtype 

5082 if isinstance(dtype, np.dtype) and dtype.kind not in ["m", "M"]: 

5083 # return np_can_hold_element(dtype, value) 

5084 try: 

5085 return np_can_hold_element(dtype, value) 

5086 except LossySetitemError as err: 

5087 # re-raise as TypeError for consistency 

5088 raise TypeError from err 

5089 elif not can_hold_element(self._values, value): 

5090 raise TypeError 

5091 return value 

5092 

5093 @final 

5094 def _require_scalar(self, value): 

5095 """ 

5096 Check that this is a scalar value that we can use for setitem-like 

5097 operations without changing dtype. 

5098 """ 

5099 if not is_scalar(value): 

5100 raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}") 

5101 return value 

5102 

5103 def _is_memory_usage_qualified(self) -> bool: 

5104 """ 

5105 Return a boolean if we need a qualified .info display. 

5106 """ 

5107 return is_object_dtype(self.dtype) 

5108 

5109 def __contains__(self, key: Any) -> bool: 

5110 """ 

5111 Return a boolean indicating whether the provided key is in the index. 

5112 

5113 Parameters 

5114 ---------- 

5115 key : label 

5116 The key to check if it is present in the index. 

5117 

5118 Returns 

5119 ------- 

5120 bool 

5121 Whether the key search is in the index. 

5122 

5123 Raises 

5124 ------ 

5125 TypeError 

5126 If the key is not hashable. 

5127 

5128 See Also 

5129 -------- 

5130 Index.isin : Returns an ndarray of boolean dtype indicating whether the 

5131 list-like key is in the index. 

5132 

5133 Examples 

5134 -------- 

5135 >>> idx = pd.Index([1, 2, 3, 4]) 

5136 >>> idx 

5137 Index([1, 2, 3, 4], dtype='int64') 

5138 

5139 >>> 2 in idx 

5140 True 

5141 >>> 6 in idx 

5142 False 

5143 """ 

5144 hash(key) 

5145 try: 

5146 return key in self._engine 

5147 except (OverflowError, TypeError, ValueError): 

5148 return False 

5149 

5150 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318 

5151 # Incompatible types in assignment (expression has type "None", base class 

5152 # "object" defined the type as "Callable[[object], int]") 

5153 __hash__: ClassVar[None] # type: ignore[assignment] 

5154 

5155 @final 

5156 def __setitem__(self, key, value): 

5157 raise TypeError("Index does not support mutable operations") 

5158 

5159 def __getitem__(self, key): 

5160 """ 

5161 Override numpy.ndarray's __getitem__ method to work as desired. 

5162 

5163 This function adds lists and Series as valid boolean indexers 

5164 (ndarrays only supports ndarray with dtype=bool). 

5165 

5166 If resulting ndim != 1, plain ndarray is returned instead of 

5167 corresponding `Index` subclass. 

5168 

5169 """ 

5170 getitem = self._data.__getitem__ 

5171 

5172 if is_integer(key) or is_float(key): 

5173 # GH#44051 exclude bool, which would return a 2d ndarray 

5174 key = com.cast_scalar_indexer(key) 

5175 return getitem(key) 

5176 

5177 if isinstance(key, slice): 

5178 # This case is separated from the conditional above to avoid 

5179 # pessimization com.is_bool_indexer and ndim checks. 

5180 result = getitem(key) 

5181 # Going through simple_new for performance. 

5182 return type(self)._simple_new( 

5183 result, name=self._name, refs=self._references 

5184 ) 

5185 

5186 if com.is_bool_indexer(key): 

5187 # if we have list[bools, length=1e5] then doing this check+convert 

5188 # takes 166 µs + 2.1 ms and cuts the ndarray.__getitem__ 

5189 # time below from 3.8 ms to 496 µs 

5190 # if we already have ndarray[bool], the overhead is 1.4 µs or .25% 

5191 if is_extension_array_dtype(getattr(key, "dtype", None)): 

5192 key = key.to_numpy(dtype=bool, na_value=False) 

5193 else: 

5194 key = np.asarray(key, dtype=bool) 

5195 

5196 result = getitem(key) 

5197 # Because we ruled out integer above, we always get an arraylike here 

5198 if result.ndim > 1: 

5199 disallow_ndim_indexing(result) 

5200 

5201 # NB: Using _constructor._simple_new would break if MultiIndex 

5202 # didn't override __getitem__ 

5203 return self._constructor._simple_new(result, name=self._name) 

5204 

5205 def _getitem_slice(self: _IndexT, slobj: slice) -> _IndexT: 

5206 """ 

5207 Fastpath for __getitem__ when we know we have a slice. 

5208 """ 

5209 res = self._data[slobj] 

5210 return type(self)._simple_new(res, name=self._name, refs=self._references) 

5211 

5212 @final 

5213 def _can_hold_identifiers_and_holds_name(self, name) -> bool: 

5214 """ 

5215 Faster check for ``name in self`` when we know `name` is a Python 

5216 identifier (e.g. in NDFrame.__getattr__, which hits this to support 

5217 . key lookup). For indexes that can't hold identifiers (everything 

5218 but object & categorical) we just return False. 

5219 

5220 https://github.com/pandas-dev/pandas/issues/19764 

5221 """ 

5222 if ( 

5223 is_object_dtype(self.dtype) 

5224 or is_string_dtype(self.dtype) 

5225 or is_categorical_dtype(self.dtype) 

5226 ): 

5227 return name in self 

5228 return False 

5229 

5230 def append(self, other: Index | Sequence[Index]) -> Index: 

5231 """ 

5232 Append a collection of Index options together. 

5233 

5234 Parameters 

5235 ---------- 

5236 other : Index or list/tuple of indices 

5237 

5238 Returns 

5239 ------- 

5240 Index 

5241 """ 

5242 to_concat = [self] 

5243 

5244 if isinstance(other, (list, tuple)): 

5245 to_concat += list(other) 

5246 else: 

5247 # error: Argument 1 to "append" of "list" has incompatible type 

5248 # "Union[Index, Sequence[Index]]"; expected "Index" 

5249 to_concat.append(other) # type: ignore[arg-type] 

5250 

5251 for obj in to_concat: 

5252 if not isinstance(obj, Index): 

5253 raise TypeError("all inputs must be Index") 

5254 

5255 names = {obj.name for obj in to_concat} 

5256 name = None if len(names) > 1 else self.name 

5257 

5258 return self._concat(to_concat, name) 

5259 

5260 def _concat(self, to_concat: list[Index], name: Hashable) -> Index: 

5261 """ 

5262 Concatenate multiple Index objects. 

5263 """ 

5264 to_concat_vals = [x._values for x in to_concat] 

5265 

5266 result = concat_compat(to_concat_vals) 

5267 

5268 return Index._with_infer(result, name=name) 

5269 

5270 def putmask(self, mask, value) -> Index: 

5271 """ 

5272 Return a new Index of the values set with the mask. 

5273 

5274 Returns 

5275 ------- 

5276 Index 

5277 

5278 See Also 

5279 -------- 

5280 numpy.ndarray.putmask : Changes elements of an array 

5281 based on conditional and input values. 

5282 """ 

5283 mask, noop = validate_putmask(self._values, mask) 

5284 if noop: 

5285 return self.copy() 

5286 

5287 if self.dtype != object and is_valid_na_for_dtype(value, self.dtype): 

5288 # e.g. None -> np.nan, see also Block._standardize_fill_value 

5289 value = self._na_value 

5290 

5291 try: 

5292 converted = self._validate_fill_value(value) 

5293 except (LossySetitemError, ValueError, TypeError) as err: 

5294 if is_object_dtype(self): # pragma: no cover 

5295 raise err 

5296 

5297 # See also: Block.coerce_to_target_dtype 

5298 dtype = self._find_common_type_compat(value) 

5299 return self.astype(dtype).putmask(mask, value) 

5300 

5301 values = self._values.copy() 

5302 

5303 if isinstance(values, np.ndarray): 

5304 converted = setitem_datetimelike_compat(values, mask.sum(), converted) 

5305 np.putmask(values, mask, converted) 

5306 

5307 else: 

5308 # Note: we use the original value here, not converted, as 

5309 # _validate_fill_value is not idempotent 

5310 values._putmask(mask, value) 

5311 

5312 return self._shallow_copy(values) 

5313 

5314 def equals(self, other: Any) -> bool: 

5315 """ 

5316 Determine if two Index object are equal. 

5317 

5318 The things that are being compared are: 

5319 

5320 * The elements inside the Index object. 

5321 * The order of the elements inside the Index object. 

5322 

5323 Parameters 

5324 ---------- 

5325 other : Any 

5326 The other object to compare against. 

5327 

5328 Returns 

5329 ------- 

5330 bool 

5331 True if "other" is an Index and it has the same elements and order 

5332 as the calling index; False otherwise. 

5333 

5334 Examples 

5335 -------- 

5336 >>> idx1 = pd.Index([1, 2, 3]) 

5337 >>> idx1 

5338 Index([1, 2, 3], dtype='int64') 

5339 >>> idx1.equals(pd.Index([1, 2, 3])) 

5340 True 

5341 

5342 The elements inside are compared 

5343 

5344 >>> idx2 = pd.Index(["1", "2", "3"]) 

5345 >>> idx2 

5346 Index(['1', '2', '3'], dtype='object') 

5347 

5348 >>> idx1.equals(idx2) 

5349 False 

5350 

5351 The order is compared 

5352 

5353 >>> ascending_idx = pd.Index([1, 2, 3]) 

5354 >>> ascending_idx 

5355 Index([1, 2, 3], dtype='int64') 

5356 >>> descending_idx = pd.Index([3, 2, 1]) 

5357 >>> descending_idx 

5358 Index([3, 2, 1], dtype='int64') 

5359 >>> ascending_idx.equals(descending_idx) 

5360 False 

5361 

5362 The dtype is *not* compared 

5363 

5364 >>> int64_idx = pd.Index([1, 2, 3], dtype='int64') 

5365 >>> int64_idx 

5366 Index([1, 2, 3], dtype='int64') 

5367 >>> uint64_idx = pd.Index([1, 2, 3], dtype='uint64') 

5368 >>> uint64_idx 

5369 Index([1, 2, 3], dtype='uint64') 

5370 >>> int64_idx.equals(uint64_idx) 

5371 True 

5372 """ 

5373 if self.is_(other): 

5374 return True 

5375 

5376 if not isinstance(other, Index): 

5377 return False 

5378 

5379 if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype): 

5380 # if other is not object, use other's logic for coercion 

5381 return other.equals(self) 

5382 

5383 if isinstance(other, ABCMultiIndex): 

5384 # d-level MultiIndex can equal d-tuple Index 

5385 return other.equals(self) 

5386 

5387 if isinstance(self._values, ExtensionArray): 

5388 # Dispatch to the ExtensionArray's .equals method. 

5389 if not isinstance(other, type(self)): 

5390 return False 

5391 

5392 earr = cast(ExtensionArray, self._data) 

5393 return earr.equals(other._data) 

5394 

5395 if is_extension_array_dtype(other.dtype): 

5396 # All EA-backed Index subclasses override equals 

5397 return other.equals(self) 

5398 

5399 return array_equivalent(self._values, other._values) 

5400 

5401 @final 

5402 def identical(self, other) -> bool: 

5403 """ 

5404 Similar to equals, but checks that object attributes and types are also equal. 

5405 

5406 Returns 

5407 ------- 

5408 bool 

5409 If two Index objects have equal elements and same type True, 

5410 otherwise False. 

5411 """ 

5412 return ( 

5413 self.equals(other) 

5414 and all( 

5415 getattr(self, c, None) == getattr(other, c, None) 

5416 for c in self._comparables 

5417 ) 

5418 and type(self) == type(other) 

5419 and self.dtype == other.dtype 

5420 ) 

5421 

5422 @final 

5423 def asof(self, label): 

5424 """ 

5425 Return the label from the index, or, if not present, the previous one. 

5426 

5427 Assuming that the index is sorted, return the passed index label if it 

5428 is in the index, or return the previous index label if the passed one 

5429 is not in the index. 

5430 

5431 Parameters 

5432 ---------- 

5433 label : object 

5434 The label up to which the method returns the latest index label. 

5435 

5436 Returns 

5437 ------- 

5438 object 

5439 The passed label if it is in the index. The previous label if the 

5440 passed label is not in the sorted index or `NaN` if there is no 

5441 such label. 

5442 

5443 See Also 

5444 -------- 

5445 Series.asof : Return the latest value in a Series up to the 

5446 passed index. 

5447 merge_asof : Perform an asof merge (similar to left join but it 

5448 matches on nearest key rather than equal key). 

5449 Index.get_loc : An `asof` is a thin wrapper around `get_loc` 

5450 with method='pad'. 

5451 

5452 Examples 

5453 -------- 

5454 `Index.asof` returns the latest index label up to the passed label. 

5455 

5456 >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03']) 

5457 >>> idx.asof('2014-01-01') 

5458 '2013-12-31' 

5459 

5460 If the label is in the index, the method returns the passed label. 

5461 

5462 >>> idx.asof('2014-01-02') 

5463 '2014-01-02' 

5464 

5465 If all of the labels in the index are later than the passed label, 

5466 NaN is returned. 

5467 

5468 >>> idx.asof('1999-01-02') 

5469 nan 

5470 

5471 If the index is not sorted, an error is raised. 

5472 

5473 >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02', 

5474 ... '2014-01-03']) 

5475 >>> idx_not_sorted.asof('2013-12-31') 

5476 Traceback (most recent call last): 

5477 ValueError: index must be monotonic increasing or decreasing 

5478 """ 

5479 self._searchsorted_monotonic(label) # validate sortedness 

5480 try: 

5481 loc = self.get_loc(label) 

5482 except (KeyError, TypeError): 

5483 # KeyError -> No exact match, try for padded 

5484 # TypeError -> passed e.g. non-hashable, fall through to get 

5485 # the tested exception message 

5486 indexer = self.get_indexer([label], method="pad") 

5487 if indexer.ndim > 1 or indexer.size > 1: 

5488 raise TypeError("asof requires scalar valued input") 

5489 loc = indexer.item() 

5490 if loc == -1: 

5491 return self._na_value 

5492 else: 

5493 if isinstance(loc, slice): 

5494 loc = loc.indices(len(self))[-1] 

5495 

5496 return self[loc] 

5497 

5498 def asof_locs( 

5499 self, where: Index, mask: npt.NDArray[np.bool_] 

5500 ) -> npt.NDArray[np.intp]: 

5501 """ 

5502 Return the locations (indices) of labels in the index. 

5503 

5504 As in the `asof` function, if the label (a particular entry in 

5505 `where`) is not in the index, the latest index label up to the 

5506 passed label is chosen and its index returned. 

5507 

5508 If all of the labels in the index are later than a label in `where`, 

5509 -1 is returned. 

5510 

5511 `mask` is used to ignore NA values in the index during calculation. 

5512 

5513 Parameters 

5514 ---------- 

5515 where : Index 

5516 An Index consisting of an array of timestamps. 

5517 mask : np.ndarray[bool] 

5518 Array of booleans denoting where values in the original 

5519 data are not NA. 

5520 

5521 Returns 

5522 ------- 

5523 np.ndarray[np.intp] 

5524 An array of locations (indices) of the labels from the Index 

5525 which correspond to the return values of the `asof` function 

5526 for every element in `where`. 

5527 """ 

5528 # error: No overload variant of "searchsorted" of "ndarray" matches argument 

5529 # types "Union[ExtensionArray, ndarray[Any, Any]]", "str" 

5530 # TODO: will be fixed when ExtensionArray.searchsorted() is fixed 

5531 locs = self._values[mask].searchsorted( 

5532 where._values, side="right" # type: ignore[call-overload] 

5533 ) 

5534 locs = np.where(locs > 0, locs - 1, 0) 

5535 

5536 result = np.arange(len(self), dtype=np.intp)[mask].take(locs) 

5537 

5538 first_value = self._values[mask.argmax()] 

5539 result[(locs == 0) & (where._values < first_value)] = -1 

5540 

5541 return result 

5542 

5543 def sort_values( 

5544 self, 

5545 return_indexer: bool = False, 

5546 ascending: bool = True, 

5547 na_position: str_t = "last", 

5548 key: Callable | None = None, 

5549 ): 

5550 """ 

5551 Return a sorted copy of the index. 

5552 

5553 Return a sorted copy of the index, and optionally return the indices 

5554 that sorted the index itself. 

5555 

5556 Parameters 

5557 ---------- 

5558 return_indexer : bool, default False 

5559 Should the indices that would sort the index be returned. 

5560 ascending : bool, default True 

5561 Should the index values be sorted in an ascending order. 

5562 na_position : {'first' or 'last'}, default 'last' 

5563 Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at 

5564 the end. 

5565 

5566 .. versionadded:: 1.2.0 

5567 

5568 key : callable, optional 

5569 If not None, apply the key function to the index values 

5570 before sorting. This is similar to the `key` argument in the 

5571 builtin :meth:`sorted` function, with the notable difference that 

5572 this `key` function should be *vectorized*. It should expect an 

5573 ``Index`` and return an ``Index`` of the same shape. 

5574 

5575 .. versionadded:: 1.1.0 

5576 

5577 Returns 

5578 ------- 

5579 sorted_index : pandas.Index 

5580 Sorted copy of the index. 

5581 indexer : numpy.ndarray, optional 

5582 The indices that the index itself was sorted by. 

5583 

5584 See Also 

5585 -------- 

5586 Series.sort_values : Sort values of a Series. 

5587 DataFrame.sort_values : Sort values in a DataFrame. 

5588 

5589 Examples 

5590 -------- 

5591 >>> idx = pd.Index([10, 100, 1, 1000]) 

5592 >>> idx 

5593 Index([10, 100, 1, 1000], dtype='int64') 

5594 

5595 Sort values in ascending order (default behavior). 

5596 

5597 >>> idx.sort_values() 

5598 Index([1, 10, 100, 1000], dtype='int64') 

5599 

5600 Sort values in descending order, and also get the indices `idx` was 

5601 sorted by. 

5602 

5603 >>> idx.sort_values(ascending=False, return_indexer=True) 

5604 (Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2])) 

5605 """ 

5606 idx = ensure_key_mapped(self, key) 

5607 

5608 # GH 35584. Sort missing values according to na_position kwarg 

5609 # ignore na_position for MultiIndex 

5610 if not isinstance(self, ABCMultiIndex): 

5611 _as = nargsort( 

5612 items=idx, ascending=ascending, na_position=na_position, key=key 

5613 ) 

5614 else: 

5615 _as = idx.argsort() 

5616 if not ascending: 

5617 _as = _as[::-1] 

5618 

5619 sorted_index = self.take(_as) 

5620 

5621 if return_indexer: 

5622 return sorted_index, _as 

5623 else: 

5624 return sorted_index 

5625 

5626 @final 

5627 def sort(self, *args, **kwargs): 

5628 """ 

5629 Use sort_values instead. 

5630 """ 

5631 raise TypeError("cannot sort an Index object in-place, use sort_values instead") 

5632 

5633 def shift(self, periods: int = 1, freq=None): 

5634 """ 

5635 Shift index by desired number of time frequency increments. 

5636 

5637 This method is for shifting the values of datetime-like indexes 

5638 by a specified time increment a given number of times. 

5639 

5640 Parameters 

5641 ---------- 

5642 periods : int, default 1 

5643 Number of periods (or increments) to shift by, 

5644 can be positive or negative. 

5645 freq : pandas.DateOffset, pandas.Timedelta or str, optional 

5646 Frequency increment to shift by. 

5647 If None, the index is shifted by its own `freq` attribute. 

5648 Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc. 

5649 

5650 Returns 

5651 ------- 

5652 pandas.Index 

5653 Shifted index. 

5654 

5655 See Also 

5656 -------- 

5657 Series.shift : Shift values of Series. 

5658 

5659 Notes 

5660 ----- 

5661 This method is only implemented for datetime-like index classes, 

5662 i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex. 

5663 

5664 Examples 

5665 -------- 

5666 Put the first 5 month starts of 2011 into an index. 

5667 

5668 >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS') 

5669 >>> month_starts 

5670 DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01', 

5671 '2011-05-01'], 

5672 dtype='datetime64[ns]', freq='MS') 

5673 

5674 Shift the index by 10 days. 

5675 

5676 >>> month_starts.shift(10, freq='D') 

5677 DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11', 

5678 '2011-05-11'], 

5679 dtype='datetime64[ns]', freq=None) 

5680 

5681 The default value of `freq` is the `freq` attribute of the index, 

5682 which is 'MS' (month start) in this example. 

5683 

5684 >>> month_starts.shift(10) 

5685 DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01', 

5686 '2012-03-01'], 

5687 dtype='datetime64[ns]', freq='MS') 

5688 """ 

5689 raise NotImplementedError( 

5690 f"This method is only implemented for DatetimeIndex, PeriodIndex and " 

5691 f"TimedeltaIndex; Got type {type(self).__name__}" 

5692 ) 

5693 

5694 def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: 

5695 """ 

5696 Return the integer indices that would sort the index. 

5697 

5698 Parameters 

5699 ---------- 

5700 *args 

5701 Passed to `numpy.ndarray.argsort`. 

5702 **kwargs 

5703 Passed to `numpy.ndarray.argsort`. 

5704 

5705 Returns 

5706 ------- 

5707 np.ndarray[np.intp] 

5708 Integer indices that would sort the index if used as 

5709 an indexer. 

5710 

5711 See Also 

5712 -------- 

5713 numpy.argsort : Similar method for NumPy arrays. 

5714 Index.sort_values : Return sorted copy of Index. 

5715 

5716 Examples 

5717 -------- 

5718 >>> idx = pd.Index(['b', 'a', 'd', 'c']) 

5719 >>> idx 

5720 Index(['b', 'a', 'd', 'c'], dtype='object') 

5721 

5722 >>> order = idx.argsort() 

5723 >>> order 

5724 array([1, 0, 3, 2]) 

5725 

5726 >>> idx[order] 

5727 Index(['a', 'b', 'c', 'd'], dtype='object') 

5728 """ 

5729 # This works for either ndarray or EA, is overridden 

5730 # by RangeIndex, MultIIndex 

5731 return self._data.argsort(*args, **kwargs) 

5732 

5733 def _check_indexing_error(self, key): 

5734 if not is_scalar(key): 

5735 # if key is not a scalar, directly raise an error (the code below 

5736 # would convert to numpy arrays and raise later any way) - GH29926 

5737 raise InvalidIndexError(key) 

5738 

5739 @cache_readonly 

5740 def _should_fallback_to_positional(self) -> bool: 

5741 """ 

5742 Should an integer key be treated as positional? 

5743 """ 

5744 return self.inferred_type not in { 

5745 "integer", 

5746 "mixed-integer", 

5747 "floating", 

5748 "complex", 

5749 } 

5750 

5751 _index_shared_docs[ 

5752 "get_indexer_non_unique" 

5753 ] = """ 

5754 Compute indexer and mask for new index given the current index. 

5755 

5756 The indexer should be then used as an input to ndarray.take to align the 

5757 current data to the new index. 

5758 

5759 Parameters 

5760 ---------- 

5761 target : %(target_klass)s 

5762 

5763 Returns 

5764 ------- 

5765 indexer : np.ndarray[np.intp] 

5766 Integers from 0 to n - 1 indicating that the index at these 

5767 positions matches the corresponding target values. Missing values 

5768 in the target are marked by -1. 

5769 missing : np.ndarray[np.intp] 

5770 An indexer into the target of the values not found. 

5771 These correspond to the -1 in the indexer array. 

5772 

5773 Examples 

5774 -------- 

5775 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b']) 

5776 >>> index.get_indexer_non_unique(['b', 'b']) 

5777 (array([1, 3, 4, 1, 3, 4]), array([], dtype=int64)) 

5778 

5779 In the example below there are no matched values. 

5780 

5781 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b']) 

5782 >>> index.get_indexer_non_unique(['q', 'r', 't']) 

5783 (array([-1, -1, -1]), array([0, 1, 2])) 

5784 

5785 For this reason, the returned ``indexer`` contains only integers equal to -1. 

5786 It demonstrates that there's no match between the index and the ``target`` 

5787 values at these positions. The mask [0, 1, 2] in the return value shows that 

5788 the first, second, and third elements are missing. 

5789 

5790 Notice that the return value is a tuple contains two items. In the example 

5791 below the first item is an array of locations in ``index``. The second 

5792 item is a mask shows that the first and third elements are missing. 

5793 

5794 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b']) 

5795 >>> index.get_indexer_non_unique(['f', 'b', 's']) 

5796 (array([-1, 1, 3, 4, -1]), array([0, 2])) 

5797 """ 

5798 

5799 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs) 

5800 def get_indexer_non_unique( 

5801 self, target 

5802 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

5803 target = ensure_index(target) 

5804 target = self._maybe_cast_listlike_indexer(target) 

5805 

5806 if not self._should_compare(target) and not self._should_partial_index(target): 

5807 # _should_partial_index e.g. IntervalIndex with numeric scalars 

5808 # that can be matched to Interval scalars. 

5809 return self._get_indexer_non_comparable(target, method=None, unique=False) 

5810 

5811 pself, ptarget = self._maybe_promote(target) 

5812 if pself is not self or ptarget is not target: 

5813 return pself.get_indexer_non_unique(ptarget) 

5814 

5815 if not is_dtype_equal(self.dtype, target.dtype): 

5816 # TODO: if object, could use infer_dtype to preempt costly 

5817 # conversion if still non-comparable? 

5818 dtype = self._find_common_type_compat(target) 

5819 

5820 this = self.astype(dtype, copy=False) 

5821 that = target.astype(dtype, copy=False) 

5822 return this.get_indexer_non_unique(that) 

5823 

5824 # TODO: get_indexer has fastpaths for both Categorical-self and 

5825 # Categorical-target. Can we do something similar here? 

5826 

5827 # Note: _maybe_promote ensures we never get here with MultiIndex 

5828 # self and non-Multi target 

5829 tgt_values = target._get_engine_target() 

5830 if self._is_multi and target._is_multi: 

5831 engine = self._engine 

5832 # Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]" has 

5833 # no attribute "_extract_level_codes" 

5834 tgt_values = engine._extract_level_codes(target) # type: ignore[union-attr] 

5835 

5836 indexer, missing = self._engine.get_indexer_non_unique(tgt_values) 

5837 return ensure_platform_int(indexer), ensure_platform_int(missing) 

5838 

5839 @final 

5840 def get_indexer_for(self, target) -> npt.NDArray[np.intp]: 

5841 """ 

5842 Guaranteed return of an indexer even when non-unique. 

5843 

5844 This dispatches to get_indexer or get_indexer_non_unique 

5845 as appropriate. 

5846 

5847 Returns 

5848 ------- 

5849 np.ndarray[np.intp] 

5850 List of indices. 

5851 

5852 Examples 

5853 -------- 

5854 >>> idx = pd.Index([np.nan, 'var1', np.nan]) 

5855 >>> idx.get_indexer_for([np.nan]) 

5856 array([0, 2]) 

5857 """ 

5858 if self._index_as_unique: 

5859 return self.get_indexer(target) 

5860 indexer, _ = self.get_indexer_non_unique(target) 

5861 return indexer 

5862 

5863 def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]: 

5864 """ 

5865 Analogue to get_indexer that raises if any elements are missing. 

5866 """ 

5867 keyarr = key 

5868 if not isinstance(keyarr, Index): 

5869 keyarr = com.asarray_tuplesafe(keyarr) 

5870 

5871 if self._index_as_unique: 

5872 indexer = self.get_indexer_for(keyarr) 

5873 keyarr = self.reindex(keyarr)[0] 

5874 else: 

5875 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr) 

5876 

5877 self._raise_if_missing(keyarr, indexer, axis_name) 

5878 

5879 keyarr = self.take(indexer) 

5880 if isinstance(key, Index): 

5881 # GH 42790 - Preserve name from an Index 

5882 keyarr.name = key.name 

5883 if ( 

5884 isinstance(keyarr.dtype, np.dtype) and keyarr.dtype.kind in ["m", "M"] 

5885 ) or isinstance(keyarr.dtype, DatetimeTZDtype): 

5886 # DTI/TDI.take can infer a freq in some cases when we dont want one 

5887 if isinstance(key, list) or ( 

5888 isinstance(key, type(self)) 

5889 # "Index" has no attribute "freq" 

5890 and key.freq is None # type: ignore[attr-defined] 

5891 ): 

5892 keyarr = keyarr._with_freq(None) 

5893 

5894 return keyarr, indexer 

5895 

5896 def _raise_if_missing(self, key, indexer, axis_name: str_t) -> None: 

5897 """ 

5898 Check that indexer can be used to return a result. 

5899 

5900 e.g. at least one element was found, 

5901 unless the list of keys was actually empty. 

5902 

5903 Parameters 

5904 ---------- 

5905 key : list-like 

5906 Targeted labels (only used to show correct error message). 

5907 indexer: array-like of booleans 

5908 Indices corresponding to the key, 

5909 (with -1 indicating not found). 

5910 axis_name : str 

5911 

5912 Raises 

5913 ------ 

5914 KeyError 

5915 If at least one key was requested but none was found. 

5916 """ 

5917 if len(key) == 0: 

5918 return 

5919 

5920 # Count missing values 

5921 missing_mask = indexer < 0 

5922 nmissing = missing_mask.sum() 

5923 

5924 if nmissing: 

5925 # TODO: remove special-case; this is just to keep exception 

5926 # message tests from raising while debugging 

5927 use_interval_msg = is_interval_dtype(self.dtype) or ( 

5928 is_categorical_dtype(self.dtype) 

5929 # "Index" has no attribute "categories" [attr-defined] 

5930 and is_interval_dtype( 

5931 self.categories.dtype # type: ignore[attr-defined] 

5932 ) 

5933 ) 

5934 

5935 if nmissing == len(indexer): 

5936 if use_interval_msg: 

5937 key = list(key) 

5938 raise KeyError(f"None of [{key}] are in the [{axis_name}]") 

5939 

5940 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique()) 

5941 raise KeyError(f"{not_found} not in index") 

5942 

5943 @overload 

5944 def _get_indexer_non_comparable( 

5945 self, target: Index, method, unique: Literal[True] = ... 

5946 ) -> npt.NDArray[np.intp]: 

5947 ... 

5948 

5949 @overload 

5950 def _get_indexer_non_comparable( 

5951 self, target: Index, method, unique: Literal[False] 

5952 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

5953 ... 

5954 

5955 @overload 

5956 def _get_indexer_non_comparable( 

5957 self, target: Index, method, unique: bool = True 

5958 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

5959 ... 

5960 

5961 @final 

5962 def _get_indexer_non_comparable( 

5963 self, target: Index, method, unique: bool = True 

5964 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]: 

5965 """ 

5966 Called from get_indexer or get_indexer_non_unique when the target 

5967 is of a non-comparable dtype. 

5968 

5969 For get_indexer lookups with method=None, get_indexer is an _equality_ 

5970 check, so non-comparable dtypes mean we will always have no matches. 

5971 

5972 For get_indexer lookups with a method, get_indexer is an _inequality_ 

5973 check, so non-comparable dtypes mean we will always raise TypeError. 

5974 

5975 Parameters 

5976 ---------- 

5977 target : Index 

5978 method : str or None 

5979 unique : bool, default True 

5980 * True if called from get_indexer. 

5981 * False if called from get_indexer_non_unique. 

5982 

5983 Raises 

5984 ------ 

5985 TypeError 

5986 If doing an inequality check, i.e. method is not None. 

5987 """ 

5988 if method is not None: 

5989 other = _unpack_nested_dtype(target) 

5990 raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}") 

5991 

5992 no_matches = -1 * np.ones(target.shape, dtype=np.intp) 

5993 if unique: 

5994 # This is for get_indexer 

5995 return no_matches 

5996 else: 

5997 # This is for get_indexer_non_unique 

5998 missing = np.arange(len(target), dtype=np.intp) 

5999 return no_matches, missing 

6000 

6001 @property 

6002 def _index_as_unique(self) -> bool: 

6003 """ 

6004 Whether we should treat this as unique for the sake of 

6005 get_indexer vs get_indexer_non_unique. 

6006 

6007 For IntervalIndex compat. 

6008 """ 

6009 return self.is_unique 

6010 

6011 _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects" 

6012 

6013 @final 

6014 def _maybe_promote(self, other: Index) -> tuple[Index, Index]: 

6015 """ 

6016 When dealing with an object-dtype Index and a non-object Index, see 

6017 if we can upcast the object-dtype one to improve performance. 

6018 """ 

6019 

6020 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex): 

6021 if ( 

6022 self.tz is not None 

6023 and other.tz is not None 

6024 and not tz_compare(self.tz, other.tz) 

6025 ): 

6026 # standardize on UTC 

6027 return self.tz_convert("UTC"), other.tz_convert("UTC") 

6028 

6029 elif self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex): 

6030 try: 

6031 return type(other)(self), other 

6032 except OutOfBoundsDatetime: 

6033 return self, other 

6034 elif self.inferred_type == "timedelta" and isinstance(other, ABCTimedeltaIndex): 

6035 # TODO: we dont have tests that get here 

6036 return type(other)(self), other 

6037 

6038 elif self.dtype.kind == "u" and other.dtype.kind == "i": 

6039 # GH#41873 

6040 if other.min() >= 0: 

6041 # lookup min as it may be cached 

6042 # TODO: may need itemsize check if we have non-64-bit Indexes 

6043 return self, other.astype(self.dtype) 

6044 

6045 elif self._is_multi and not other._is_multi: 

6046 try: 

6047 # "Type[Index]" has no attribute "from_tuples" 

6048 other = type(self).from_tuples(other) # type: ignore[attr-defined] 

6049 except (TypeError, ValueError): 

6050 # let's instead try with a straight Index 

6051 self = Index(self._values) 

6052 

6053 if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype): 

6054 # Reverse op so we dont need to re-implement on the subclasses 

6055 other, self = other._maybe_promote(self) 

6056 

6057 return self, other 

6058 

6059 @final 

6060 def _find_common_type_compat(self, target) -> DtypeObj: 

6061 """ 

6062 Implementation of find_common_type that adjusts for Index-specific 

6063 special cases. 

6064 """ 

6065 target_dtype, _ = infer_dtype_from(target, pandas_dtype=True) 

6066 

6067 # special case: if one dtype is uint64 and the other a signed int, return object 

6068 # See https://github.com/pandas-dev/pandas/issues/26778 for discussion 

6069 # Now it's: 

6070 # * float | [u]int -> float 

6071 # * uint64 | signed int -> object 

6072 # We may change union(float | [u]int) to go to object. 

6073 if self.dtype == "uint64" or target_dtype == "uint64": 

6074 if is_signed_integer_dtype(self.dtype) or is_signed_integer_dtype( 

6075 target_dtype 

6076 ): 

6077 return _dtype_obj 

6078 

6079 dtype = find_result_type(self._values, target) 

6080 dtype = common_dtype_categorical_compat([self, target], dtype) 

6081 return dtype 

6082 

6083 @final 

6084 def _should_compare(self, other: Index) -> bool: 

6085 """ 

6086 Check if `self == other` can ever have non-False entries. 

6087 """ 

6088 

6089 if (is_bool_dtype(other) and is_any_real_numeric_dtype(self)) or ( 

6090 is_bool_dtype(self) and is_any_real_numeric_dtype(other) 

6091 ): 

6092 # GH#16877 Treat boolean labels passed to a numeric index as not 

6093 # found. Without this fix False and True would be treated as 0 and 1 

6094 # respectively. 

6095 return False 

6096 

6097 other = _unpack_nested_dtype(other) 

6098 dtype = other.dtype 

6099 return self._is_comparable_dtype(dtype) or is_object_dtype(dtype) 

6100 

6101 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool: 

6102 """ 

6103 Can we compare values of the given dtype to our own? 

6104 """ 

6105 if self.dtype.kind == "b": 

6106 return dtype.kind == "b" 

6107 elif is_numeric_dtype(self.dtype): 

6108 return is_numeric_dtype(dtype) 

6109 # TODO: this was written assuming we only get here with object-dtype, 

6110 # which is nom longer correct. Can we specialize for EA? 

6111 return True 

6112 

6113 @final 

6114 def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]: 

6115 """ 

6116 Group the index labels by a given array of values. 

6117 

6118 Parameters 

6119 ---------- 

6120 values : array 

6121 Values used to determine the groups. 

6122 

6123 Returns 

6124 ------- 

6125 dict 

6126 {group name -> group labels} 

6127 """ 

6128 # TODO: if we are a MultiIndex, we can do better 

6129 # that converting to tuples 

6130 if isinstance(values, ABCMultiIndex): 

6131 values = values._values 

6132 values = Categorical(values) 

6133 result = values._reverse_indexer() 

6134 

6135 # map to the label 

6136 result = {k: self.take(v) for k, v in result.items()} 

6137 

6138 return PrettyDict(result) 

6139 

6140 def map(self, mapper, na_action=None): 

6141 """ 

6142 Map values using an input mapping or function. 

6143 

6144 Parameters 

6145 ---------- 

6146 mapper : function, dict, or Series 

6147 Mapping correspondence. 

6148 na_action : {None, 'ignore'} 

6149 If 'ignore', propagate NA values, without passing them to the 

6150 mapping correspondence. 

6151 

6152 Returns 

6153 ------- 

6154 Union[Index, MultiIndex] 

6155 The output of the mapping function applied to the index. 

6156 If the function returns a tuple with more than one element 

6157 a MultiIndex will be returned. 

6158 """ 

6159 from pandas.core.indexes.multi import MultiIndex 

6160 

6161 new_values = self._map_values(mapper, na_action=na_action) 

6162 

6163 # we can return a MultiIndex 

6164 if new_values.size and isinstance(new_values[0], tuple): 

6165 if isinstance(self, MultiIndex): 

6166 names = self.names 

6167 elif self.name: 

6168 names = [self.name] * len(new_values[0]) 

6169 else: 

6170 names = None 

6171 return MultiIndex.from_tuples(new_values, names=names) 

6172 

6173 dtype = None 

6174 if not new_values.size: 

6175 # empty 

6176 dtype = self.dtype 

6177 

6178 # e.g. if we are floating and new_values is all ints, then we 

6179 # don't want to cast back to floating. But if we are UInt64 

6180 # and new_values is all ints, we want to try. 

6181 same_dtype = lib.infer_dtype(new_values, skipna=False) == self.inferred_type 

6182 if same_dtype: 

6183 new_values = maybe_cast_pointwise_result( 

6184 new_values, self.dtype, same_dtype=same_dtype 

6185 ) 

6186 

6187 return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name) 

6188 

6189 # TODO: De-duplicate with map, xref GH#32349 

6190 @final 

6191 def _transform_index(self, func, *, level=None) -> Index: 

6192 """ 

6193 Apply function to all values found in index. 

6194 

6195 This includes transforming multiindex entries separately. 

6196 Only apply function to one level of the MultiIndex if level is specified. 

6197 """ 

6198 if isinstance(self, ABCMultiIndex): 

6199 values = [ 

6200 self.get_level_values(i).map(func) 

6201 if i == level or level is None 

6202 else self.get_level_values(i) 

6203 for i in range(self.nlevels) 

6204 ] 

6205 return type(self).from_arrays(values) 

6206 else: 

6207 items = [func(x) for x in self] 

6208 return Index(items, name=self.name, tupleize_cols=False) 

6209 

6210 def isin(self, values, level=None) -> npt.NDArray[np.bool_]: 

6211 """ 

6212 Return a boolean array where the index values are in `values`. 

6213 

6214 Compute boolean array of whether each index value is found in the 

6215 passed set of values. The length of the returned boolean array matches 

6216 the length of the index. 

6217 

6218 Parameters 

6219 ---------- 

6220 values : set or list-like 

6221 Sought values. 

6222 level : str or int, optional 

6223 Name or position of the index level to use (if the index is a 

6224 `MultiIndex`). 

6225 

6226 Returns 

6227 ------- 

6228 np.ndarray[bool] 

6229 NumPy array of boolean values. 

6230 

6231 See Also 

6232 -------- 

6233 Series.isin : Same for Series. 

6234 DataFrame.isin : Same method for DataFrames. 

6235 

6236 Notes 

6237 ----- 

6238 In the case of `MultiIndex` you must either specify `values` as a 

6239 list-like object containing tuples that are the same length as the 

6240 number of levels, or specify `level`. Otherwise it will raise a 

6241 ``ValueError``. 

6242 

6243 If `level` is specified: 

6244 

6245 - if it is the name of one *and only one* index level, use that level; 

6246 - otherwise it should be a number indicating level position. 

6247 

6248 Examples 

6249 -------- 

6250 >>> idx = pd.Index([1,2,3]) 

6251 >>> idx 

6252 Index([1, 2, 3], dtype='int64') 

6253 

6254 Check whether each index value in a list of values. 

6255 

6256 >>> idx.isin([1, 4]) 

6257 array([ True, False, False]) 

6258 

6259 >>> midx = pd.MultiIndex.from_arrays([[1,2,3], 

6260 ... ['red', 'blue', 'green']], 

6261 ... names=('number', 'color')) 

6262 >>> midx 

6263 MultiIndex([(1, 'red'), 

6264 (2, 'blue'), 

6265 (3, 'green')], 

6266 names=['number', 'color']) 

6267 

6268 Check whether the strings in the 'color' level of the MultiIndex 

6269 are in a list of colors. 

6270 

6271 >>> midx.isin(['red', 'orange', 'yellow'], level='color') 

6272 array([ True, False, False]) 

6273 

6274 To check across the levels of a MultiIndex, pass a list of tuples: 

6275 

6276 >>> midx.isin([(1, 'red'), (3, 'red')]) 

6277 array([ True, False, False]) 

6278 

6279 For a DatetimeIndex, string values in `values` are converted to 

6280 Timestamps. 

6281 

6282 >>> dates = ['2000-03-11', '2000-03-12', '2000-03-13'] 

6283 >>> dti = pd.to_datetime(dates) 

6284 >>> dti 

6285 DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'], 

6286 dtype='datetime64[ns]', freq=None) 

6287 

6288 >>> dti.isin(['2000-03-11']) 

6289 array([ True, False, False]) 

6290 """ 

6291 if level is not None: 

6292 self._validate_index_level(level) 

6293 return algos.isin(self._values, values) 

6294 

6295 def _get_string_slice(self, key: str_t): 

6296 # this is for partial string indexing, 

6297 # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex 

6298 raise NotImplementedError 

6299 

6300 def slice_indexer( 

6301 self, 

6302 start: Hashable | None = None, 

6303 end: Hashable | None = None, 

6304 step: int | None = None, 

6305 ) -> slice: 

6306 """ 

6307 Compute the slice indexer for input labels and step. 

6308 

6309 Index needs to be ordered and unique. 

6310 

6311 Parameters 

6312 ---------- 

6313 start : label, default None 

6314 If None, defaults to the beginning. 

6315 end : label, default None 

6316 If None, defaults to the end. 

6317 step : int, default None 

6318 

6319 Returns 

6320 ------- 

6321 slice 

6322 

6323 Raises 

6324 ------ 

6325 KeyError : If key does not exist, or key is not unique and index is 

6326 not ordered. 

6327 

6328 Notes 

6329 ----- 

6330 This function assumes that the data is sorted, so use at your own peril 

6331 

6332 Examples 

6333 -------- 

6334 This is a method on all index types. For example you can do: 

6335 

6336 >>> idx = pd.Index(list('abcd')) 

6337 >>> idx.slice_indexer(start='b', end='c') 

6338 slice(1, 3, None) 

6339 

6340 >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')]) 

6341 >>> idx.slice_indexer(start='b', end=('c', 'g')) 

6342 slice(1, 3, None) 

6343 """ 

6344 start_slice, end_slice = self.slice_locs(start, end, step=step) 

6345 

6346 # return a slice 

6347 if not is_scalar(start_slice): 

6348 raise AssertionError("Start slice bound is non-scalar") 

6349 if not is_scalar(end_slice): 

6350 raise AssertionError("End slice bound is non-scalar") 

6351 

6352 return slice(start_slice, end_slice, step) 

6353 

6354 def _maybe_cast_indexer(self, key): 

6355 """ 

6356 If we have a float key and are not a floating index, then try to cast 

6357 to an int if equivalent. 

6358 """ 

6359 return key 

6360 

6361 def _maybe_cast_listlike_indexer(self, target) -> Index: 

6362 """ 

6363 Analogue to maybe_cast_indexer for get_indexer instead of get_loc. 

6364 """ 

6365 return ensure_index(target) 

6366 

6367 @final 

6368 def _validate_indexer(self, form: str_t, key, kind: str_t) -> None: 

6369 """ 

6370 If we are positional indexer, validate that we have appropriate 

6371 typed bounds must be an integer. 

6372 """ 

6373 assert kind in ["getitem", "iloc"] 

6374 

6375 if key is not None and not is_integer(key): 

6376 self._raise_invalid_indexer(form, key) 

6377 

6378 def _maybe_cast_slice_bound(self, label, side: str_t): 

6379 """ 

6380 This function should be overloaded in subclasses that allow non-trivial 

6381 casting on label-slice bounds, e.g. datetime-like indices allowing 

6382 strings containing formatted datetimes. 

6383 

6384 Parameters 

6385 ---------- 

6386 label : object 

6387 side : {'left', 'right'} 

6388 

6389 Returns 

6390 ------- 

6391 label : object 

6392 

6393 Notes 

6394 ----- 

6395 Value of `side` parameter should be validated in caller. 

6396 """ 

6397 

6398 # We are a plain index here (sub-class override this method if they 

6399 # wish to have special treatment for floats/ints, e.g. datetimelike Indexes 

6400 

6401 if is_numeric_dtype(self.dtype): 

6402 return self._maybe_cast_indexer(label) 

6403 

6404 # reject them, if index does not contain label 

6405 if (is_float(label) or is_integer(label)) and label not in self: 

6406 self._raise_invalid_indexer("slice", label) 

6407 

6408 return label 

6409 

6410 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"): 

6411 if self.is_monotonic_increasing: 

6412 return self.searchsorted(label, side=side) 

6413 elif self.is_monotonic_decreasing: 

6414 # np.searchsorted expects ascending sort order, have to reverse 

6415 # everything for it to work (element ordering, search side and 

6416 # resulting value). 

6417 pos = self[::-1].searchsorted( 

6418 label, side="right" if side == "left" else "left" 

6419 ) 

6420 return len(self) - pos 

6421 

6422 raise ValueError("index must be monotonic increasing or decreasing") 

6423 

6424 def get_slice_bound(self, label, side: Literal["left", "right"]) -> int: 

6425 """ 

6426 Calculate slice bound that corresponds to given label. 

6427 

6428 Returns leftmost (one-past-the-rightmost if ``side=='right'``) position 

6429 of given label. 

6430 

6431 Parameters 

6432 ---------- 

6433 label : object 

6434 side : {'left', 'right'} 

6435 

6436 Returns 

6437 ------- 

6438 int 

6439 Index of label. 

6440 """ 

6441 

6442 if side not in ("left", "right"): 

6443 raise ValueError( 

6444 "Invalid value for side kwarg, must be either " 

6445 f"'left' or 'right': {side}" 

6446 ) 

6447 

6448 original_label = label 

6449 

6450 # For datetime indices label may be a string that has to be converted 

6451 # to datetime boundary according to its resolution. 

6452 label = self._maybe_cast_slice_bound(label, side) 

6453 

6454 # we need to look up the label 

6455 try: 

6456 slc = self.get_loc(label) 

6457 except KeyError as err: 

6458 try: 

6459 return self._searchsorted_monotonic(label, side) 

6460 except ValueError: 

6461 # raise the original KeyError 

6462 raise err 

6463 

6464 if isinstance(slc, np.ndarray): 

6465 # get_loc may return a boolean array, which 

6466 # is OK as long as they are representable by a slice. 

6467 assert is_bool_dtype(slc.dtype) 

6468 slc = lib.maybe_booleans_to_slice(slc.view("u1")) 

6469 if isinstance(slc, np.ndarray): 

6470 raise KeyError( 

6471 f"Cannot get {side} slice bound for non-unique " 

6472 f"label: {repr(original_label)}" 

6473 ) 

6474 

6475 if isinstance(slc, slice): 

6476 if side == "left": 

6477 return slc.start 

6478 else: 

6479 return slc.stop 

6480 else: 

6481 if side == "right": 

6482 return slc + 1 

6483 else: 

6484 return slc 

6485 

6486 def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]: 

6487 """ 

6488 Compute slice locations for input labels. 

6489 

6490 Parameters 

6491 ---------- 

6492 start : label, default None 

6493 If None, defaults to the beginning. 

6494 end : label, default None 

6495 If None, defaults to the end. 

6496 step : int, defaults None 

6497 If None, defaults to 1. 

6498 

6499 Returns 

6500 ------- 

6501 tuple[int, int] 

6502 

6503 See Also 

6504 -------- 

6505 Index.get_loc : Get location for a single label. 

6506 

6507 Notes 

6508 ----- 

6509 This method only works if the index is monotonic or unique. 

6510 

6511 Examples 

6512 -------- 

6513 >>> idx = pd.Index(list('abcd')) 

6514 >>> idx.slice_locs(start='b', end='c') 

6515 (1, 3) 

6516 """ 

6517 inc = step is None or step >= 0 

6518 

6519 if not inc: 

6520 # If it's a reverse slice, temporarily swap bounds. 

6521 start, end = end, start 

6522 

6523 # GH 16785: If start and end happen to be date strings with UTC offsets 

6524 # attempt to parse and check that the offsets are the same 

6525 if isinstance(start, (str, datetime)) and isinstance(end, (str, datetime)): 

6526 try: 

6527 ts_start = Timestamp(start) 

6528 ts_end = Timestamp(end) 

6529 except (ValueError, TypeError): 

6530 pass 

6531 else: 

6532 if not tz_compare(ts_start.tzinfo, ts_end.tzinfo): 

6533 raise ValueError("Both dates must have the same UTC offset") 

6534 

6535 start_slice = None 

6536 if start is not None: 

6537 start_slice = self.get_slice_bound(start, "left") 

6538 if start_slice is None: 

6539 start_slice = 0 

6540 

6541 end_slice = None 

6542 if end is not None: 

6543 end_slice = self.get_slice_bound(end, "right") 

6544 if end_slice is None: 

6545 end_slice = len(self) 

6546 

6547 if not inc: 

6548 # Bounds at this moment are swapped, swap them back and shift by 1. 

6549 # 

6550 # slice_locs('B', 'A', step=-1): s='B', e='A' 

6551 # 

6552 # s='A' e='B' 

6553 # AFTER SWAP: | | 

6554 # v ------------------> V 

6555 # ----------------------------------- 

6556 # | | |A|A|A|A| | | | | |B|B| | | | | 

6557 # ----------------------------------- 

6558 # ^ <------------------ ^ 

6559 # SHOULD BE: | | 

6560 # end=s-1 start=e-1 

6561 # 

6562 end_slice, start_slice = start_slice - 1, end_slice - 1 

6563 

6564 # i == -1 triggers ``len(self) + i`` selection that points to the 

6565 # last element, not before-the-first one, subtracting len(self) 

6566 # compensates that. 

6567 if end_slice == -1: 

6568 end_slice -= len(self) 

6569 if start_slice == -1: 

6570 start_slice -= len(self) 

6571 

6572 return start_slice, end_slice 

6573 

6574 def delete(self: _IndexT, loc) -> _IndexT: 

6575 """ 

6576 Make new Index with passed location(-s) deleted. 

6577 

6578 Parameters 

6579 ---------- 

6580 loc : int or list of int 

6581 Location of item(-s) which will be deleted. 

6582 Use a list of locations to delete more than one value at the same time. 

6583 

6584 Returns 

6585 ------- 

6586 Index 

6587 Will be same type as self, except for RangeIndex. 

6588 

6589 See Also 

6590 -------- 

6591 numpy.delete : Delete any rows and column from NumPy array (ndarray). 

6592 

6593 Examples 

6594 -------- 

6595 >>> idx = pd.Index(['a', 'b', 'c']) 

6596 >>> idx.delete(1) 

6597 Index(['a', 'c'], dtype='object') 

6598 

6599 >>> idx = pd.Index(['a', 'b', 'c']) 

6600 >>> idx.delete([0, 2]) 

6601 Index(['b'], dtype='object') 

6602 """ 

6603 values = self._values 

6604 res_values: ArrayLike 

6605 if isinstance(values, np.ndarray): 

6606 # TODO(__array_function__): special casing will be unnecessary 

6607 res_values = np.delete(values, loc) 

6608 else: 

6609 res_values = values.delete(loc) 

6610 

6611 # _constructor so RangeIndex-> Index with an int64 dtype 

6612 return self._constructor._simple_new(res_values, name=self.name) 

6613 

6614 def insert(self, loc: int, item) -> Index: 

6615 """ 

6616 Make new Index inserting new item at location. 

6617 

6618 Follows Python numpy.insert semantics for negative values. 

6619 

6620 Parameters 

6621 ---------- 

6622 loc : int 

6623 item : object 

6624 

6625 Returns 

6626 ------- 

6627 Index 

6628 """ 

6629 item = lib.item_from_zerodim(item) 

6630 if is_valid_na_for_dtype(item, self.dtype) and self.dtype != object: 

6631 item = self._na_value 

6632 

6633 arr = self._values 

6634 

6635 try: 

6636 if isinstance(arr, ExtensionArray): 

6637 res_values = arr.insert(loc, item) 

6638 return type(self)._simple_new(res_values, name=self.name) 

6639 else: 

6640 item = self._validate_fill_value(item) 

6641 except (TypeError, ValueError, LossySetitemError): 

6642 # e.g. trying to insert an integer into a DatetimeIndex 

6643 # We cannot keep the same dtype, so cast to the (often object) 

6644 # minimal shared dtype before doing the insert. 

6645 dtype = self._find_common_type_compat(item) 

6646 return self.astype(dtype).insert(loc, item) 

6647 

6648 if arr.dtype != object or not isinstance( 

6649 item, (tuple, np.datetime64, np.timedelta64) 

6650 ): 

6651 # with object-dtype we need to worry about numpy incorrectly casting 

6652 # dt64/td64 to integer, also about treating tuples as sequences 

6653 # special-casing dt64/td64 https://github.com/numpy/numpy/issues/12550 

6654 casted = arr.dtype.type(item) 

6655 new_values = np.insert(arr, loc, casted) 

6656 

6657 else: 

6658 # error: No overload variant of "insert" matches argument types 

6659 # "ndarray[Any, Any]", "int", "None" 

6660 new_values = np.insert(arr, loc, None) # type: ignore[call-overload] 

6661 loc = loc if loc >= 0 else loc - 1 

6662 new_values[loc] = item 

6663 

6664 return Index._with_infer(new_values, name=self.name) 

6665 

6666 def drop( 

6667 self, 

6668 labels: Index | np.ndarray | Iterable[Hashable], 

6669 errors: IgnoreRaise = "raise", 

6670 ) -> Index: 

6671 """ 

6672 Make new Index with passed list of labels deleted. 

6673 

6674 Parameters 

6675 ---------- 

6676 labels : array-like or scalar 

6677 errors : {'ignore', 'raise'}, default 'raise' 

6678 If 'ignore', suppress error and existing labels are dropped. 

6679 

6680 Returns 

6681 ------- 

6682 Index 

6683 Will be same type as self, except for RangeIndex. 

6684 

6685 Raises 

6686 ------ 

6687 KeyError 

6688 If not all of the labels are found in the selected axis 

6689 """ 

6690 if not isinstance(labels, Index): 

6691 # avoid materializing e.g. RangeIndex 

6692 arr_dtype = "object" if self.dtype == "object" else None 

6693 labels = com.index_labels_to_array(labels, dtype=arr_dtype) 

6694 

6695 indexer = self.get_indexer_for(labels) 

6696 mask = indexer == -1 

6697 if mask.any(): 

6698 if errors != "ignore": 

6699 raise KeyError(f"{list(labels[mask])} not found in axis") 

6700 indexer = indexer[~mask] 

6701 return self.delete(indexer) 

6702 

6703 def infer_objects(self, copy: bool = True) -> Index: 

6704 """ 

6705 If we have an object dtype, try to infer a non-object dtype. 

6706 

6707 Parameters 

6708 ---------- 

6709 copy : bool, default True 

6710 Whether to make a copy in cases where no inference occurs. 

6711 """ 

6712 if self._is_multi: 

6713 raise NotImplementedError( 

6714 "infer_objects is not implemented for MultiIndex. " 

6715 "Use index.to_frame().infer_objects() instead." 

6716 ) 

6717 if self.dtype != object: 

6718 return self.copy() if copy else self 

6719 

6720 values = self._values 

6721 values = cast("npt.NDArray[np.object_]", values) 

6722 res_values = lib.maybe_convert_objects( 

6723 values, 

6724 convert_datetime=True, 

6725 convert_timedelta=True, 

6726 convert_period=True, 

6727 convert_interval=True, 

6728 ) 

6729 if copy and res_values is values: 

6730 return self.copy() 

6731 result = Index(res_values, name=self.name) 

6732 if not copy and res_values is values and self._references is not None: 

6733 result._references = self._references 

6734 result._references.add_index_reference(result) 

6735 return result 

6736 

6737 # -------------------------------------------------------------------- 

6738 # Generated Arithmetic, Comparison, and Unary Methods 

6739 

6740 def _cmp_method(self, other, op): 

6741 """ 

6742 Wrapper used to dispatch comparison operations. 

6743 """ 

6744 if self.is_(other): 

6745 # fastpath 

6746 if op in {operator.eq, operator.le, operator.ge}: 

6747 arr = np.ones(len(self), dtype=bool) 

6748 if self._can_hold_na and not isinstance(self, ABCMultiIndex): 

6749 # TODO: should set MultiIndex._can_hold_na = False? 

6750 arr[self.isna()] = False 

6751 return arr 

6752 elif op is operator.ne: 

6753 arr = np.zeros(len(self), dtype=bool) 

6754 if self._can_hold_na and not isinstance(self, ABCMultiIndex): 

6755 arr[self.isna()] = True 

6756 return arr 

6757 

6758 if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and len( 

6759 self 

6760 ) != len(other): 

6761 raise ValueError("Lengths must match to compare") 

6762 

6763 if not isinstance(other, ABCMultiIndex): 

6764 other = extract_array(other, extract_numpy=True) 

6765 else: 

6766 other = np.asarray(other) 

6767 

6768 if is_object_dtype(self.dtype) and isinstance(other, ExtensionArray): 

6769 # e.g. PeriodArray, Categorical 

6770 with np.errstate(all="ignore"): 

6771 result = op(self._values, other) 

6772 

6773 elif isinstance(self._values, ExtensionArray): 

6774 result = op(self._values, other) 

6775 

6776 elif is_object_dtype(self.dtype) and not isinstance(self, ABCMultiIndex): 

6777 # don't pass MultiIndex 

6778 with np.errstate(all="ignore"): 

6779 result = ops.comp_method_OBJECT_ARRAY(op, self._values, other) 

6780 

6781 else: 

6782 with np.errstate(all="ignore"): 

6783 result = ops.comparison_op(self._values, other, op) 

6784 

6785 return result 

6786 

6787 @final 

6788 def _logical_method(self, other, op): 

6789 res_name = ops.get_op_result_name(self, other) 

6790 

6791 lvalues = self._values 

6792 rvalues = extract_array(other, extract_numpy=True, extract_range=True) 

6793 

6794 res_values = ops.logical_op(lvalues, rvalues, op) 

6795 return self._construct_result(res_values, name=res_name) 

6796 

6797 @final 

6798 def _construct_result(self, result, name): 

6799 if isinstance(result, tuple): 

6800 return ( 

6801 Index(result[0], name=name, dtype=result[0].dtype), 

6802 Index(result[1], name=name, dtype=result[1].dtype), 

6803 ) 

6804 return Index(result, name=name, dtype=result.dtype) 

6805 

6806 def _arith_method(self, other, op): 

6807 if ( 

6808 isinstance(other, Index) 

6809 and is_object_dtype(other.dtype) 

6810 and type(other) is not Index 

6811 ): 

6812 # We return NotImplemented for object-dtype index *subclasses* so they have 

6813 # a chance to implement ops before we unwrap them. 

6814 # See https://github.com/pandas-dev/pandas/issues/31109 

6815 return NotImplemented 

6816 

6817 return super()._arith_method(other, op) 

6818 

6819 @final 

6820 def _unary_method(self, op): 

6821 result = op(self._values) 

6822 return Index(result, name=self.name) 

6823 

6824 def __abs__(self) -> Index: 

6825 return self._unary_method(operator.abs) 

6826 

6827 def __neg__(self) -> Index: 

6828 return self._unary_method(operator.neg) 

6829 

6830 def __pos__(self) -> Index: 

6831 return self._unary_method(operator.pos) 

6832 

6833 def __invert__(self) -> Index: 

6834 # GH#8875 

6835 return self._unary_method(operator.inv) 

6836 

6837 # -------------------------------------------------------------------- 

6838 # Reductions 

6839 

6840 def any(self, *args, **kwargs): 

6841 """ 

6842 Return whether any element is Truthy. 

6843 

6844 Parameters 

6845 ---------- 

6846 *args 

6847 Required for compatibility with numpy. 

6848 **kwargs 

6849 Required for compatibility with numpy. 

6850 

6851 Returns 

6852 ------- 

6853 bool or array-like (if axis is specified) 

6854 A single element array-like may be converted to bool. 

6855 

6856 See Also 

6857 -------- 

6858 Index.all : Return whether all elements are True. 

6859 Series.all : Return whether all elements are True. 

6860 

6861 Notes 

6862 ----- 

6863 Not a Number (NaN), positive infinity and negative infinity 

6864 evaluate to True because these are not equal to zero. 

6865 

6866 Examples 

6867 -------- 

6868 >>> index = pd.Index([0, 1, 2]) 

6869 >>> index.any() 

6870 True 

6871 

6872 >>> index = pd.Index([0, 0, 0]) 

6873 >>> index.any() 

6874 False 

6875 """ 

6876 nv.validate_any(args, kwargs) 

6877 self._maybe_disable_logical_methods("any") 

6878 # error: Argument 1 to "any" has incompatible type "ArrayLike"; expected 

6879 # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int, 

6880 # float, complex, str, bytes, generic]], Sequence[Sequence[Any]], 

6881 # _SupportsArray]" 

6882 return np.any(self.values) # type: ignore[arg-type] 

6883 

6884 def all(self, *args, **kwargs): 

6885 """ 

6886 Return whether all elements are Truthy. 

6887 

6888 Parameters 

6889 ---------- 

6890 *args 

6891 Required for compatibility with numpy. 

6892 **kwargs 

6893 Required for compatibility with numpy. 

6894 

6895 Returns 

6896 ------- 

6897 bool or array-like (if axis is specified) 

6898 A single element array-like may be converted to bool. 

6899 

6900 See Also 

6901 -------- 

6902 Index.any : Return whether any element in an Index is True. 

6903 Series.any : Return whether any element in a Series is True. 

6904 Series.all : Return whether all elements in a Series are True. 

6905 

6906 Notes 

6907 ----- 

6908 Not a Number (NaN), positive infinity and negative infinity 

6909 evaluate to True because these are not equal to zero. 

6910 

6911 Examples 

6912 -------- 

6913 True, because nonzero integers are considered True. 

6914 

6915 >>> pd.Index([1, 2, 3]).all() 

6916 True 

6917 

6918 False, because ``0`` is considered False. 

6919 

6920 >>> pd.Index([0, 1, 2]).all() 

6921 False 

6922 """ 

6923 nv.validate_all(args, kwargs) 

6924 self._maybe_disable_logical_methods("all") 

6925 # error: Argument 1 to "all" has incompatible type "ArrayLike"; expected 

6926 # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int, 

6927 # float, complex, str, bytes, generic]], Sequence[Sequence[Any]], 

6928 # _SupportsArray]" 

6929 return np.all(self.values) # type: ignore[arg-type] 

6930 

6931 @final 

6932 def _maybe_disable_logical_methods(self, opname: str_t) -> None: 

6933 """ 

6934 raise if this Index subclass does not support any or all. 

6935 """ 

6936 if ( 

6937 isinstance(self, ABCMultiIndex) 

6938 or needs_i8_conversion(self.dtype) 

6939 or is_interval_dtype(self.dtype) 

6940 or is_categorical_dtype(self.dtype) 

6941 or is_float_dtype(self.dtype) 

6942 ): 

6943 # This call will raise 

6944 make_invalid_op(opname)(self) 

6945 

6946 @Appender(IndexOpsMixin.argmin.__doc__) 

6947 def argmin(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: 

6948 nv.validate_argmin(args, kwargs) 

6949 nv.validate_minmax_axis(axis) 

6950 

6951 if not self._is_multi and self.hasnans: 

6952 # Take advantage of cache 

6953 mask = self._isnan 

6954 if not skipna or mask.all(): 

6955 return -1 

6956 return super().argmin(skipna=skipna) 

6957 

6958 @Appender(IndexOpsMixin.argmax.__doc__) 

6959 def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int: 

6960 nv.validate_argmax(args, kwargs) 

6961 nv.validate_minmax_axis(axis) 

6962 

6963 if not self._is_multi and self.hasnans: 

6964 # Take advantage of cache 

6965 mask = self._isnan 

6966 if not skipna or mask.all(): 

6967 return -1 

6968 return super().argmax(skipna=skipna) 

6969 

6970 @doc(IndexOpsMixin.min) 

6971 def min(self, axis=None, skipna: bool = True, *args, **kwargs): 

6972 nv.validate_min(args, kwargs) 

6973 nv.validate_minmax_axis(axis) 

6974 

6975 if not len(self): 

6976 return self._na_value 

6977 

6978 if len(self) and self.is_monotonic_increasing: 

6979 # quick check 

6980 first = self[0] 

6981 if not isna(first): 

6982 return first 

6983 

6984 if not self._is_multi and self.hasnans: 

6985 # Take advantage of cache 

6986 mask = self._isnan 

6987 if not skipna or mask.all(): 

6988 return self._na_value 

6989 

6990 if not self._is_multi and not isinstance(self._values, np.ndarray): 

6991 return self._values._reduce(name="min", skipna=skipna) 

6992 

6993 return super().min(skipna=skipna) 

6994 

6995 @doc(IndexOpsMixin.max) 

6996 def max(self, axis=None, skipna: bool = True, *args, **kwargs): 

6997 nv.validate_max(args, kwargs) 

6998 nv.validate_minmax_axis(axis) 

6999 

7000 if not len(self): 

7001 return self._na_value 

7002 

7003 if len(self) and self.is_monotonic_increasing: 

7004 # quick check 

7005 last = self[-1] 

7006 if not isna(last): 

7007 return last 

7008 

7009 if not self._is_multi and self.hasnans: 

7010 # Take advantage of cache 

7011 mask = self._isnan 

7012 if not skipna or mask.all(): 

7013 return self._na_value 

7014 

7015 if not self._is_multi and not isinstance(self._values, np.ndarray): 

7016 return self._values._reduce(name="max", skipna=skipna) 

7017 

7018 return super().max(skipna=skipna) 

7019 

7020 # -------------------------------------------------------------------- 

7021 

7022 @final 

7023 @property 

7024 def shape(self) -> Shape: 

7025 """ 

7026 Return a tuple of the shape of the underlying data. 

7027 """ 

7028 # See GH#27775, GH#27384 for history/reasoning in how this is defined. 

7029 return (len(self),) 

7030 

7031 

7032def ensure_index_from_sequences(sequences, names=None) -> Index: 

7033 """ 

7034 Construct an index from sequences of data. 

7035 

7036 A single sequence returns an Index. Many sequences returns a 

7037 MultiIndex. 

7038 

7039 Parameters 

7040 ---------- 

7041 sequences : sequence of sequences 

7042 names : sequence of str 

7043 

7044 Returns 

7045 ------- 

7046 index : Index or MultiIndex 

7047 

7048 Examples 

7049 -------- 

7050 >>> ensure_index_from_sequences([[1, 2, 3]], names=["name"]) 

7051 Index([1, 2, 3], dtype='int64', name='name') 

7052 

7053 >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"]) 

7054 MultiIndex([('a', 'a'), 

7055 ('a', 'b')], 

7056 names=['L1', 'L2']) 

7057 

7058 See Also 

7059 -------- 

7060 ensure_index 

7061 """ 

7062 from pandas.core.indexes.multi import MultiIndex 

7063 

7064 if len(sequences) == 1: 

7065 if names is not None: 

7066 names = names[0] 

7067 return Index(sequences[0], name=names) 

7068 else: 

7069 return MultiIndex.from_arrays(sequences, names=names) 

7070 

7071 

7072def ensure_index(index_like: Axes, copy: bool = False) -> Index: 

7073 """ 

7074 Ensure that we have an index from some index-like object. 

7075 

7076 Parameters 

7077 ---------- 

7078 index_like : sequence 

7079 An Index or other sequence 

7080 copy : bool, default False 

7081 

7082 Returns 

7083 ------- 

7084 index : Index or MultiIndex 

7085 

7086 See Also 

7087 -------- 

7088 ensure_index_from_sequences 

7089 

7090 Examples 

7091 -------- 

7092 >>> ensure_index(['a', 'b']) 

7093 Index(['a', 'b'], dtype='object') 

7094 

7095 >>> ensure_index([('a', 'a'), ('b', 'c')]) 

7096 Index([('a', 'a'), ('b', 'c')], dtype='object') 

7097 

7098 >>> ensure_index([['a', 'a'], ['b', 'c']]) 

7099 MultiIndex([('a', 'b'), 

7100 ('a', 'c')], 

7101 ) 

7102 """ 

7103 if isinstance(index_like, Index): 

7104 if copy: 

7105 index_like = index_like.copy() 

7106 return index_like 

7107 

7108 if isinstance(index_like, ABCSeries): 

7109 name = index_like.name 

7110 return Index(index_like, name=name, copy=copy) 

7111 

7112 if is_iterator(index_like): 

7113 index_like = list(index_like) 

7114 

7115 if isinstance(index_like, list): 

7116 if type(index_like) is not list: 

7117 # must check for exactly list here because of strict type 

7118 # check in clean_index_list 

7119 index_like = list(index_like) 

7120 

7121 if len(index_like) and lib.is_all_arraylike(index_like): 

7122 from pandas.core.indexes.multi import MultiIndex 

7123 

7124 return MultiIndex.from_arrays(index_like) 

7125 else: 

7126 return Index(index_like, copy=copy, tupleize_cols=False) 

7127 else: 

7128 return Index(index_like, copy=copy) 

7129 

7130 

7131def ensure_has_len(seq): 

7132 """ 

7133 If seq is an iterator, put its values into a list. 

7134 """ 

7135 try: 

7136 len(seq) 

7137 except TypeError: 

7138 return list(seq) 

7139 else: 

7140 return seq 

7141 

7142 

7143def trim_front(strings: list[str]) -> list[str]: 

7144 """ 

7145 Trims zeros and decimal points. 

7146 

7147 Examples 

7148 -------- 

7149 >>> trim_front([" a", " b"]) 

7150 ['a', 'b'] 

7151 

7152 >>> trim_front([" a", " "]) 

7153 ['a', ''] 

7154 """ 

7155 if not strings: 

7156 return strings 

7157 while all(strings) and all(x[0] == " " for x in strings): 

7158 strings = [x[1:] for x in strings] 

7159 return strings 

7160 

7161 

7162def _validate_join_method(method: str) -> None: 

7163 if method not in ["left", "right", "inner", "outer"]: 

7164 raise ValueError(f"do not recognize join method {method}") 

7165 

7166 

7167def maybe_extract_name(name, obj, cls) -> Hashable: 

7168 """ 

7169 If no name is passed, then extract it from data, validating hashability. 

7170 """ 

7171 if name is None and isinstance(obj, (Index, ABCSeries)): 

7172 # Note we don't just check for "name" attribute since that would 

7173 # pick up e.g. dtype.name 

7174 name = obj.name 

7175 

7176 # GH#29069 

7177 if not is_hashable(name): 

7178 raise TypeError(f"{cls.__name__}.name must be a hashable type") 

7179 

7180 return name 

7181 

7182 

7183def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]: 

7184 """ 

7185 Return common name if all indices agree, otherwise None (level-by-level). 

7186 

7187 Parameters 

7188 ---------- 

7189 indexes : list of Index objects 

7190 

7191 Returns 

7192 ------- 

7193 list 

7194 A list representing the unanimous 'names' found. 

7195 """ 

7196 name_tups = [tuple(i.names) for i in indexes] 

7197 name_sets = [{*ns} for ns in zip_longest(*name_tups)] 

7198 names = tuple(ns.pop() if len(ns) == 1 else None for ns in name_sets) 

7199 return names 

7200 

7201 

7202def _unpack_nested_dtype(other: Index) -> Index: 

7203 """ 

7204 When checking if our dtype is comparable with another, we need 

7205 to unpack CategoricalDtype to look at its categories.dtype. 

7206 

7207 Parameters 

7208 ---------- 

7209 other : Index 

7210 

7211 Returns 

7212 ------- 

7213 Index 

7214 """ 

7215 from pandas.core.arrays.arrow import ArrowDtype 

7216 

7217 dtype = other.dtype 

7218 if isinstance(dtype, CategoricalDtype): 

7219 # If there is ever a SparseIndex, this could get dispatched 

7220 # here too. 

7221 return dtype.categories 

7222 elif isinstance(dtype, ArrowDtype): 

7223 # GH 53617 

7224 import pyarrow as pa 

7225 

7226 if pa.types.is_dictionary(dtype.pyarrow_dtype): 

7227 other = other.astype(ArrowDtype(dtype.pyarrow_dtype.value_type)) 

7228 return other 

7229 

7230 

7231def _maybe_try_sort(result, sort): 

7232 if sort is not False: 

7233 try: 

7234 result = algos.safe_sort(result) 

7235 except TypeError as err: 

7236 if sort is True: 

7237 raise 

7238 warnings.warn( 

7239 f"{err}, sort order is undefined for incomparable objects.", 

7240 RuntimeWarning, 

7241 stacklevel=find_stack_level(), 

7242 ) 

7243 return result