Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/indexes/base.py: 20%

1from __future__ import annotations

3from datetime import datetime

4import functools

5from itertools import zip_longest

6import operator

7from typing import (

8 TYPE_CHECKING,

9 Any,

10 Callable,

11 ClassVar,

12 Hashable,

13 Iterable,

14 Literal,

15 NoReturn,

16 Sequence,

17 TypeVar,

18 cast,

19 final,

20 overload,

21)

22import warnings

24import numpy as np

26from pandas._config import get_option

28from pandas._libs import (

29 NaT,

30 algos as libalgos,

31 index as libindex,

32 lib,

33)

34from pandas._libs.internals import BlockValuesRefs

35import pandas._libs.join as libjoin

36from pandas._libs.lib import (

37 is_datetime_array,

38 no_default,

39)

40from pandas._libs.missing import is_float_nan

41from pandas._libs.tslibs import (

42 IncompatibleFrequency,

43 OutOfBoundsDatetime,

44 Timestamp,

45 tz_compare,

46)

47from pandas._typing import (

48 AnyAll,

49 ArrayLike,

50 Axes,

51 Axis,

52 DropKeep,

53 DtypeObj,

54 F,

55 IgnoreRaise,

56 IndexLabel,

57 JoinHow,

58 Level,

59 Shape,

60 npt,

61)

62from pandas.compat.numpy import function as nv

63from pandas.errors import (

64 DuplicateLabelError,

65 InvalidIndexError,

66)

67from pandas.util._decorators import (

68 Appender,

69 cache_readonly,

70 doc,

71)

72from pandas.util._exceptions import (

73 find_stack_level,

74 rewrite_exception,

75)

77from pandas.core.dtypes.astype import (

78 astype_array,

79 astype_is_view,

80)

81from pandas.core.dtypes.cast import (

82 LossySetitemError,

83 can_hold_element,

84 common_dtype_categorical_compat,

85 find_result_type,

86 infer_dtype_from,

87 maybe_cast_pointwise_result,

88 np_can_hold_element,

89)

90from pandas.core.dtypes.common import (

91 ensure_int64,

92 ensure_object,

93 ensure_platform_int,

94 is_any_real_numeric_dtype,

95 is_bool_dtype,

96 is_categorical_dtype,

97 is_dtype_equal,

98 is_ea_or_datetimelike_dtype,

99 is_extension_array_dtype,

100 is_float,

101 is_float_dtype,

102 is_hashable,

103 is_integer,

104 is_integer_dtype,

105 is_interval_dtype,

106 is_iterator,

107 is_list_like,

108 is_numeric_dtype,

109 is_object_dtype,

110 is_scalar,

111 is_signed_integer_dtype,

112 is_string_dtype,

113 needs_i8_conversion,

114 pandas_dtype,

115 validate_all_hashable,

116)

117from pandas.core.dtypes.concat import concat_compat

118from pandas.core.dtypes.dtypes import (

119 CategoricalDtype,

120 DatetimeTZDtype,

121 ExtensionDtype,

122 IntervalDtype,

123 PeriodDtype,

124)

125from pandas.core.dtypes.generic import (

126 ABCDataFrame,

127 ABCDatetimeIndex,

128 ABCMultiIndex,

129 ABCPeriodIndex,

130 ABCSeries,

131 ABCTimedeltaIndex,

132)

133from pandas.core.dtypes.inference import is_dict_like

134from pandas.core.dtypes.missing import (

135 array_equivalent,

136 is_valid_na_for_dtype,

137 isna,

138)

139

140from pandas.core import (

141 arraylike,

142 ops,

143)

144from pandas.core.accessor import CachedAccessor

145import pandas.core.algorithms as algos

146from pandas.core.array_algos.putmask import (

147 setitem_datetimelike_compat,

148 validate_putmask,

149)

150from pandas.core.arrays import (

151 ArrowExtensionArray,

152 BaseMaskedArray,

153 Categorical,

154 ExtensionArray,

155)

156from pandas.core.arrays.string_ import StringArray

157from pandas.core.base import (

158 IndexOpsMixin,

159 PandasObject,

160)

161import pandas.core.common as com

162from pandas.core.construction import (

163 ensure_wrapped_if_datetimelike,

164 extract_array,

165 sanitize_array,

166)

167from pandas.core.indexers import disallow_ndim_indexing

168from pandas.core.indexes.frozen import FrozenList

169from pandas.core.missing import clean_reindex_fill_method

170from pandas.core.ops import get_op_result_name

171from pandas.core.ops.invalid import make_invalid_op

172from pandas.core.sorting import (

173 ensure_key_mapped,

174 get_group_index_sorter,

175 nargsort,

176)

177from pandas.core.strings.accessor import StringMethods

178

179from pandas.io.formats.printing import (

180 PrettyDict,

181 default_pprint,

182 format_object_summary,

183 pprint_thing,

184)

185

186if TYPE_CHECKING:

187 from pandas import (

188 CategoricalIndex,

189 DataFrame,

190 MultiIndex,

191 Series,

192 )

193 from pandas.core.arrays import PeriodArray

194

195

196__all__ = ["Index"]

197

198_unsortable_types = frozenset(("mixed", "mixed-integer"))

199

200_index_doc_kwargs: dict[str, str] = {

201 "klass": "Index",

202 "inplace": "",

203 "target_klass": "Index",

204 "raises_section": "",

205 "unique": "Index",

206 "duplicated": "np.ndarray",

207}

208_index_shared_docs: dict[str, str] = {}

209str_t = str

210

211

212_dtype_obj = np.dtype("object")

213

214_masked_engines = {

215 "Complex128": libindex.MaskedComplex128Engine,

216 "Complex64": libindex.MaskedComplex64Engine,

217 "Float64": libindex.MaskedFloat64Engine,

218 "Float32": libindex.MaskedFloat32Engine,

219 "UInt64": libindex.MaskedUInt64Engine,

220 "UInt32": libindex.MaskedUInt32Engine,

221 "UInt16": libindex.MaskedUInt16Engine,

222 "UInt8": libindex.MaskedUInt8Engine,

223 "Int64": libindex.MaskedInt64Engine,

224 "Int32": libindex.MaskedInt32Engine,

225 "Int16": libindex.MaskedInt16Engine,

226 "Int8": libindex.MaskedInt8Engine,

227 "boolean": libindex.MaskedBoolEngine,

228 "double[pyarrow]": libindex.MaskedFloat64Engine,

229 "float64[pyarrow]": libindex.MaskedFloat64Engine,

230 "float32[pyarrow]": libindex.MaskedFloat32Engine,

231 "float[pyarrow]": libindex.MaskedFloat32Engine,

232 "uint64[pyarrow]": libindex.MaskedUInt64Engine,

233 "uint32[pyarrow]": libindex.MaskedUInt32Engine,

234 "uint16[pyarrow]": libindex.MaskedUInt16Engine,

235 "uint8[pyarrow]": libindex.MaskedUInt8Engine,

236 "int64[pyarrow]": libindex.MaskedInt64Engine,

237 "int32[pyarrow]": libindex.MaskedInt32Engine,

238 "int16[pyarrow]": libindex.MaskedInt16Engine,

239 "int8[pyarrow]": libindex.MaskedInt8Engine,

240 "bool[pyarrow]": libindex.MaskedBoolEngine,

241}

242

243

244def _maybe_return_indexers(meth: F) -> F:

245 """

246 Decorator to simplify 'return_indexers' checks in Index.join.

247 """

248

249 @functools.wraps(meth)

250 def join(

251 self,

252 other: Index,

253 *,

254 how: JoinHow = "left",

255 level=None,

256 return_indexers: bool = False,

257 sort: bool = False,

258 ):

259 join_index, lidx, ridx = meth(self, other, how=how, level=level, sort=sort)

260 if not return_indexers:

261 return join_index

262

263 if lidx is not None:

264 lidx = ensure_platform_int(lidx)

265 if ridx is not None:

266 ridx = ensure_platform_int(ridx)

267 return join_index, lidx, ridx

268

269 return cast(F, join)

270

271

272def _new_Index(cls, d):

273 """

274 This is called upon unpickling, rather than the default which doesn't

275 have arguments and breaks __new__.

276 """

277 # required for backward compat, because PI can't be instantiated with

278 # ordinals through __new__ GH #13277

279 if issubclass(cls, ABCPeriodIndex):

280 from pandas.core.indexes.period import _new_PeriodIndex

281

282 return _new_PeriodIndex(cls, **d)

283

284 if issubclass(cls, ABCMultiIndex):

285 if "labels" in d and "codes" not in d:

286 # GH#23752 "labels" kwarg has been replaced with "codes"

287 d["codes"] = d.pop("labels")

288

289 # Since this was a valid MultiIndex at pickle-time, we don't need to

290 # check validty at un-pickle time.

291 d["verify_integrity"] = False

292

293 elif "dtype" not in d and "data" in d:

294 # Prevent Index.__new__ from conducting inference;

295 # "data" key not in RangeIndex

296 d["dtype"] = d["data"].dtype

297 return cls.__new__(cls, **d)

298

299

300_IndexT = TypeVar("_IndexT", bound="Index")

301

302

303class Index(IndexOpsMixin, PandasObject):

304 """

305 Immutable sequence used for indexing and alignment.

306

307 The basic object storing axis labels for all pandas objects.

308

309 .. versionchanged:: 2.0.0

310

311 Index can hold all numpy numeric dtypes (except float16). Previously only

312 int64/uint64/float64 dtypes were accepted.

313

314 Parameters

315 ----------

316 data : array-like (1-dimensional)

317 dtype : NumPy dtype (default: object)

318 If dtype is None, we find the dtype that best fits the data.

319 If an actual dtype is provided, we coerce to that dtype if it's safe.

320 Otherwise, an error will be raised.

321 copy : bool

322 Make a copy of input ndarray.

323 name : object

324 Name to be stored in the index.

325 tupleize_cols : bool (default: True)

326 When True, attempt to create a MultiIndex if possible.

327

328 See Also

329 --------

330 RangeIndex : Index implementing a monotonic integer range.

331 CategoricalIndex : Index of :class:`Categorical` s.

332 MultiIndex : A multi-level, or hierarchical Index.

333 IntervalIndex : An Index of :class:`Interval` s.

334 DatetimeIndex : Index of datetime64 data.

335 TimedeltaIndex : Index of timedelta64 data.

336 PeriodIndex : Index of Period data.

337

338 Notes

339 -----

340 An Index instance can **only** contain hashable objects.

341 An Index instance *can not* hold numpy float16 dtype.

342

343 Examples

344 --------

345 >>> pd.Index([1, 2, 3])

346 Index([1, 2, 3], dtype='int64')

347

348 >>> pd.Index(list('abc'))

349 Index(['a', 'b', 'c'], dtype='object')

350

351 >>> pd.Index([1, 2, 3], dtype="uint8")

352 Index([1, 2, 3], dtype='uint8')

353 """

354

355 # To hand over control to subclasses

356 _join_precedence = 1

357

358 # Cython methods; see github.com/cython/cython/issues/2647

359 # for why we need to wrap these instead of making them class attributes

360 # Moreover, cython will choose the appropriate-dtyped sub-function

361 # given the dtypes of the passed arguments

362

363 @final

364 def _left_indexer_unique(self: _IndexT, other: _IndexT) -> npt.NDArray[np.intp]:

365 # Caller is responsible for ensuring other.dtype == self.dtype

366 sv = self._get_join_target()

367 ov = other._get_join_target()

368 # can_use_libjoin assures sv and ov are ndarrays

369 sv = cast(np.ndarray, sv)

370 ov = cast(np.ndarray, ov)

371 # similar but not identical to ov.searchsorted(sv)

372 return libjoin.left_join_indexer_unique(sv, ov)

373

374 @final

375 def _left_indexer(

376 self: _IndexT, other: _IndexT

377 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:

378 # Caller is responsible for ensuring other.dtype == self.dtype

379 sv = self._get_join_target()

380 ov = other._get_join_target()

381 # can_use_libjoin assures sv and ov are ndarrays

382 sv = cast(np.ndarray, sv)

383 ov = cast(np.ndarray, ov)

384 joined_ndarray, lidx, ridx = libjoin.left_join_indexer(sv, ov)

385 joined = self._from_join_target(joined_ndarray)

386 return joined, lidx, ridx

387

388 @final

389 def _inner_indexer(

390 self: _IndexT, other: _IndexT

391 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:

392 # Caller is responsible for ensuring other.dtype == self.dtype

393 sv = self._get_join_target()

394 ov = other._get_join_target()

395 # can_use_libjoin assures sv and ov are ndarrays

396 sv = cast(np.ndarray, sv)

397 ov = cast(np.ndarray, ov)

398 joined_ndarray, lidx, ridx = libjoin.inner_join_indexer(sv, ov)

399 joined = self._from_join_target(joined_ndarray)

400 return joined, lidx, ridx

401

402 @final

403 def _outer_indexer(

404 self: _IndexT, other: _IndexT

405 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:

406 # Caller is responsible for ensuring other.dtype == self.dtype

407 sv = self._get_join_target()

408 ov = other._get_join_target()

409 # can_use_libjoin assures sv and ov are ndarrays

410 sv = cast(np.ndarray, sv)

411 ov = cast(np.ndarray, ov)

412 joined_ndarray, lidx, ridx = libjoin.outer_join_indexer(sv, ov)

413 joined = self._from_join_target(joined_ndarray)

414 return joined, lidx, ridx

415

416 _typ: str = "index"

417 _data: ExtensionArray | np.ndarray

418 _data_cls: type[ExtensionArray] | tuple[type[np.ndarray], type[ExtensionArray]] = (

419 np.ndarray,

420 ExtensionArray,

421 )

422 _id: object | None = None

423 _name: Hashable = None

424 # MultiIndex.levels previously allowed setting the index name. We

425 # don't allow this anymore, and raise if it happens rather than

426 # failing silently.

427 _no_setting_name: bool = False

428 _comparables: list[str] = ["name"]

429 _attributes: list[str] = ["name"]

430

431 @cache_readonly

432 def _can_hold_strings(self) -> bool:

433 return not is_numeric_dtype(self)

434

435 _engine_types: dict[np.dtype | ExtensionDtype, type[libindex.IndexEngine]] = {

436 np.dtype(np.int8): libindex.Int8Engine,

437 np.dtype(np.int16): libindex.Int16Engine,

438 np.dtype(np.int32): libindex.Int32Engine,

439 np.dtype(np.int64): libindex.Int64Engine,

440 np.dtype(np.uint8): libindex.UInt8Engine,

441 np.dtype(np.uint16): libindex.UInt16Engine,

442 np.dtype(np.uint32): libindex.UInt32Engine,

443 np.dtype(np.uint64): libindex.UInt64Engine,

444 np.dtype(np.float32): libindex.Float32Engine,

445 np.dtype(np.float64): libindex.Float64Engine,

446 np.dtype(np.complex64): libindex.Complex64Engine,

447 np.dtype(np.complex128): libindex.Complex128Engine,

448 }

449

450 @property

451 def _engine_type(

452 self,

453 ) -> type[libindex.IndexEngine] | type[libindex.ExtensionEngine]:

454 return self._engine_types.get(self.dtype, libindex.ObjectEngine)

455

456 # whether we support partial string indexing. Overridden

457 # in DatetimeIndex and PeriodIndex

458 _supports_partial_string_indexing = False

459

460 _accessors = {"str"}

461

462 str = CachedAccessor("str", StringMethods)

463

464 _references = None

465

466 # --------------------------------------------------------------------

467 # Constructors

468

469 def __new__(

470 cls,

471 data=None,

472 dtype=None,

473 copy: bool = False,

474 name=None,

475 tupleize_cols: bool = True,

476 ) -> Index:

477 from pandas.core.indexes.range import RangeIndex

478

479 name = maybe_extract_name(name, data, cls)

480

481 if dtype is not None:

482 dtype = pandas_dtype(dtype)

483

484 data_dtype = getattr(data, "dtype", None)

485

486 refs = None

487 if not copy and isinstance(data, (ABCSeries, Index)):

488 refs = data._references

489

490 # range

491 if isinstance(data, (range, RangeIndex)):

492 result = RangeIndex(start=data, copy=copy, name=name)

493 if dtype is not None:

494 return result.astype(dtype, copy=False)

495 return result

496

497 elif is_ea_or_datetimelike_dtype(dtype):

498 # non-EA dtype indexes have special casting logic, so we punt here

499 pass

500

501 elif is_ea_or_datetimelike_dtype(data_dtype):

502 pass

503

504 elif isinstance(data, (np.ndarray, Index, ABCSeries)):

505 if isinstance(data, ABCMultiIndex):

506 data = data._values

507

508 if data.dtype.kind not in ["i", "u", "f", "b", "c", "m", "M"]:

509 # GH#11836 we need to avoid having numpy coerce

510 # things that look like ints/floats to ints unless

511 # they are actually ints, e.g. '0' and 0.0

512 # should not be coerced

513 data = com.asarray_tuplesafe(data, dtype=_dtype_obj)

514

515 elif is_scalar(data):

516 raise cls._raise_scalar_data_error(data)

517 elif hasattr(data, "__array__"):

518 return Index(np.asarray(data), dtype=dtype, copy=copy, name=name)

519 elif not is_list_like(data) and not isinstance(data, memoryview):

520 # 2022-11-16 the memoryview check is only necessary on some CI

521 # builds, not clear why

522 raise cls._raise_scalar_data_error(data)

523

524 else:

525 if tupleize_cols:

526 # GH21470: convert iterable to list before determining if empty

527 if is_iterator(data):

528 data = list(data)

529

530 if data and all(isinstance(e, tuple) for e in data):

531 # we must be all tuples, otherwise don't construct

532 # 10697

533 from pandas.core.indexes.multi import MultiIndex

534

535 return MultiIndex.from_tuples(data, names=name)

536 # other iterable of some kind

537

538 if not isinstance(data, (list, tuple)):

539 # we allow set/frozenset, which Series/sanitize_array does not, so

540 # cast to list here

541 data = list(data)

542 if len(data) == 0:

543 # unlike Series, we default to object dtype:

544 data = np.array(data, dtype=object)

545

546 if len(data) and isinstance(data[0], tuple):

547 # Ensure we get 1-D array of tuples instead of 2D array.

548 data = com.asarray_tuplesafe(data, dtype=_dtype_obj)

549

550 try:

551 arr = sanitize_array(data, None, dtype=dtype, copy=copy)

552 except ValueError as err:

553 if "index must be specified when data is not list-like" in str(err):

554 raise cls._raise_scalar_data_error(data) from err

555 if "Data must be 1-dimensional" in str(err):

556 raise ValueError("Index data must be 1-dimensional") from err

557 raise

558 arr = ensure_wrapped_if_datetimelike(arr)

559

560 klass = cls._dtype_to_subclass(arr.dtype)

561

562 arr = klass._ensure_array(arr, arr.dtype, copy=False)

563 return klass._simple_new(arr, name, refs=refs)

564

565 @classmethod

566 def _ensure_array(cls, data, dtype, copy: bool):

567 """

568 Ensure we have a valid array to pass to _simple_new.

569 """

570 if data.ndim > 1:

571 # GH#13601, GH#20285, GH#27125

572 raise ValueError("Index data must be 1-dimensional")

573 elif dtype == np.float16:

574 # float16 not supported (no indexing engine)

575 raise NotImplementedError("float16 indexes are not supported")

576

577 if copy:

578 # asarray_tuplesafe does not always copy underlying data,

579 # so need to make sure that this happens

580 data = data.copy()

581 return data

582

583 @final

584 @classmethod

585 def _dtype_to_subclass(cls, dtype: DtypeObj):

586 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423

587

588 if isinstance(dtype, ExtensionDtype):

589 if isinstance(dtype, DatetimeTZDtype):

590 from pandas import DatetimeIndex

591

592 return DatetimeIndex

593 elif isinstance(dtype, CategoricalDtype):

594 from pandas import CategoricalIndex

595

596 return CategoricalIndex

597 elif isinstance(dtype, IntervalDtype):

598 from pandas import IntervalIndex

599

600 return IntervalIndex

601 elif isinstance(dtype, PeriodDtype):

602 from pandas import PeriodIndex

603

604 return PeriodIndex

605

606 return Index

607

608 if dtype.kind == "M":

609 from pandas import DatetimeIndex

610

611 return DatetimeIndex

612

613 elif dtype.kind == "m":

614 from pandas import TimedeltaIndex

615

616 return TimedeltaIndex

617

618 elif dtype.kind == "O":

619 # NB: assuming away MultiIndex

620 return Index

621

622 elif issubclass(dtype.type, str) or is_numeric_dtype(dtype):

623 return Index

624

625 raise NotImplementedError(dtype)

626

627 # NOTE for new Index creation:

628

629 # - _simple_new: It returns new Index with the same type as the caller.

630 # All metadata (such as name) must be provided by caller's responsibility.

631 # Using _shallow_copy is recommended because it fills these metadata

632 # otherwise specified.

633

634 # - _shallow_copy: It returns new Index with the same type (using

635 # _simple_new), but fills caller's metadata otherwise specified. Passed

636 # kwargs will overwrite corresponding metadata.

637

638 # See each method's docstring.

639

640 @classmethod

641 def _simple_new(

642 cls: type[_IndexT], values: ArrayLike, name: Hashable = None, refs=None

643 ) -> _IndexT:

644 """

645 We require that we have a dtype compat for the values. If we are passed

646 a non-dtype compat, then coerce using the constructor.

647

648 Must be careful not to recurse.

649 """

650 assert isinstance(values, cls._data_cls), type(values)

651

652 result = object.__new__(cls)

653 result._data = values

654 result._name = name

655 result._cache = {}

656 result._reset_identity()

657 if refs is not None:

658 result._references = refs

659 else:

660 result._references = BlockValuesRefs()

661 result._references.add_index_reference(result)

662

663 return result

664

665 @classmethod

666 def _with_infer(cls, *args, **kwargs):

667 """

668 Constructor that uses the 1.0.x behavior inferring numeric dtypes

669 for ndarray[object] inputs.

670 """

671 result = cls(*args, **kwargs)

672

673 if result.dtype == _dtype_obj and not result._is_multi:

674 # error: Argument 1 to "maybe_convert_objects" has incompatible type

675 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected

676 # "ndarray[Any, Any]"

677 values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type]

678 if values.dtype.kind in ["i", "u", "f", "b"]:

679 return Index(values, name=result.name)

680

681 return result

682

683 @cache_readonly

684 def _constructor(self: _IndexT) -> type[_IndexT]:

685 return type(self)

686

687 @final

688 def _maybe_check_unique(self) -> None:

689 """

690 Check that an Index has no duplicates.

691

692 This is typically only called via

693 `NDFrame.flags.allows_duplicate_labels.setter` when it's set to

694 True (duplicates aren't allowed).

695

696 Raises

697 ------

698 DuplicateLabelError

699 When the index is not unique.

700 """

701 if not self.is_unique:

702 msg = """Index has duplicates."""

703 duplicates = self._format_duplicate_message()

704 msg += f"\n{duplicates}"

705

706 raise DuplicateLabelError(msg)

707

708 @final

709 def _format_duplicate_message(self) -> DataFrame:

710 """

711 Construct the DataFrame for a DuplicateLabelError.

712

713 This returns a DataFrame indicating the labels and positions

714 of duplicates in an index. This should only be called when it's

715 already known that duplicates are present.

716

717 Examples

718 --------

719 >>> idx = pd.Index(['a', 'b', 'a'])

720 >>> idx._format_duplicate_message()

721 positions

722 label

723 a [0, 2]

724 """

725 from pandas import Series

726

727 duplicates = self[self.duplicated(keep="first")].unique()

728 assert len(duplicates)

729

730 out = Series(np.arange(len(self))).groupby(self).agg(list)[duplicates]

731 if self._is_multi:

732 # test_format_duplicate_labels_message_multi

733 # error: "Type[Index]" has no attribute "from_tuples" [attr-defined]

734 out.index = type(self).from_tuples(out.index) # type: ignore[attr-defined]

735

736 if self.nlevels == 1:

737 out = out.rename_axis("label")

738 return out.to_frame(name="positions")

739

740 # --------------------------------------------------------------------

741 # Index Internals Methods

742

743 def _shallow_copy(self: _IndexT, values, name: Hashable = no_default) -> _IndexT:

744 """

745 Create a new Index with the same class as the caller, don't copy the

746 data, use the same object attributes with passed in attributes taking

747 precedence.

748

749 *this is an internal non-public method*

750

751 Parameters

752 ----------

753 values : the values to create the new Index, optional

754 name : Label, defaults to self.name

755 """

756 name = self._name if name is no_default else name

757

758 return self._simple_new(values, name=name, refs=self._references)

759

760 def _view(self: _IndexT) -> _IndexT:

761 """

762 fastpath to make a shallow copy, i.e. new object with same data.

763 """

764 result = self._simple_new(self._values, name=self._name, refs=self._references)

765

766 result._cache = self._cache

767 return result

768

769 @final

770 def _rename(self: _IndexT, name: Hashable) -> _IndexT:

771 """

772 fastpath for rename if new name is already validated.

773 """

774 result = self._view()

775 result._name = name

776 return result

777

778 @final

779 def is_(self, other) -> bool:

780 """

781 More flexible, faster check like ``is`` but that works through views.

782

783 Note: this is *not* the same as ``Index.identical()``, which checks

784 that metadata is also the same.

785

786 Parameters

787 ----------

788 other : object

789 Other object to compare against.

790

791 Returns

792 -------

793 bool

794 True if both have same underlying data, False otherwise.

795

796 See Also

797 --------

798 Index.identical : Works like ``Index.is_`` but also checks metadata.

799 """

800 if self is other:

801 return True

802 elif not hasattr(other, "_id"):

803 return False

804 elif self._id is None or other._id is None:

805 return False

806 else:

807 return self._id is other._id

808

809 @final

810 def _reset_identity(self) -> None:

811 """

812 Initializes or resets ``_id`` attribute with new object.

813 """

814 self._id = object()

815

816 @final

817 def _cleanup(self) -> None:

818 self._engine.clear_mapping()

819

820 @cache_readonly

821 def _engine(

822 self,

823 ) -> libindex.IndexEngine | libindex.ExtensionEngine | libindex.MaskedIndexEngine:

824 # For base class (object dtype) we get ObjectEngine

825 target_values = self._get_engine_target()

826 if isinstance(target_values, ExtensionArray):

827 if isinstance(target_values, (BaseMaskedArray, ArrowExtensionArray)):

828 try:

829 return _masked_engines[target_values.dtype.name](target_values)

830 except KeyError:

831 # Not supported yet e.g. decimal

832 pass

833 elif self._engine_type is libindex.ObjectEngine:

834 return libindex.ExtensionEngine(target_values)

835

836 target_values = cast(np.ndarray, target_values)

837 # to avoid a reference cycle, bind `target_values` to a local variable, so

838 # `self` is not passed into the lambda.

839 if target_values.dtype == bool:

840 return libindex.BoolEngine(target_values)

841 elif target_values.dtype == np.complex64:

842 return libindex.Complex64Engine(target_values)

843 elif target_values.dtype == np.complex128:

844 return libindex.Complex128Engine(target_values)

845 elif needs_i8_conversion(self.dtype):

846 # We need to keep M8/m8 dtype when initializing the Engine,

847 # but don't want to change _get_engine_target bc it is used

848 # elsewhere

849 # error: Item "ExtensionArray" of "Union[ExtensionArray,

850 # ndarray[Any, Any]]" has no attribute "_ndarray" [union-attr]

851 target_values = self._data._ndarray # type: ignore[union-attr]

852

853 # error: Argument 1 to "ExtensionEngine" has incompatible type

854 # "ndarray[Any, Any]"; expected "ExtensionArray"

855 return self._engine_type(target_values) # type: ignore[arg-type]

856

857 @final

858 @cache_readonly

859 def _dir_additions_for_owner(self) -> set[str_t]:

860 """

861 Add the string-like labels to the owner dataframe/series dir output.

862

863 If this is a MultiIndex, it's first level values are used.

864 """

865 return {

866 c

867 for c in self.unique(level=0)[: get_option("display.max_dir_items")]

868 if isinstance(c, str) and c.isidentifier()

869 }

870

871 # --------------------------------------------------------------------

872 # Array-Like Methods

873

874 # ndarray compat

875 def __len__(self) -> int:

876 """

877 Return the length of the Index.

878 """

879 return len(self._data)

880

881 def __array__(self, dtype=None) -> np.ndarray:

882 """

883 The array interface, return my values.

884 """

885 return np.asarray(self._data, dtype=dtype)

886

887 def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):

888 if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs):

889 return NotImplemented

890

891 result = arraylike.maybe_dispatch_ufunc_to_dunder_op(

892 self, ufunc, method, *inputs, **kwargs

893 )

894 if result is not NotImplemented:

895 return result

896

897 if "out" in kwargs:

898 # e.g. test_dti_isub_tdi

899 return arraylike.dispatch_ufunc_with_out(

900 self, ufunc, method, *inputs, **kwargs

901 )

902

903 if method == "reduce":

904 result = arraylike.dispatch_reduction_ufunc(

905 self, ufunc, method, *inputs, **kwargs

906 )

907 if result is not NotImplemented:

908 return result

909

910 new_inputs = [x if x is not self else x._values for x in inputs]

911 result = getattr(ufunc, method)(*new_inputs, **kwargs)

912 if ufunc.nout == 2:

913 # i.e. np.divmod, np.modf, np.frexp

914 return tuple(self.__array_wrap__(x) for x in result)

915

916 if result.dtype == np.float16:

917 result = result.astype(np.float32)

918

919 return self.__array_wrap__(result)

920

921 def __array_wrap__(self, result, context=None):

922 """

923 Gets called after a ufunc and other functions e.g. np.split.

924 """

925 result = lib.item_from_zerodim(result)

926 if is_bool_dtype(result) or lib.is_scalar(result) or np.ndim(result) > 1:

927 return result

928

929 return Index(result, name=self.name)

930

931 @cache_readonly

932 def dtype(self) -> DtypeObj:

933 """

934 Return the dtype object of the underlying data.

935 """

936 return self._data.dtype

937

938 @final

939 def ravel(self, order: str_t = "C") -> Index:

940 """

941 Return a view on self.

942

943 Returns

944 -------

945 Index

946

947 See Also

948 --------

949 numpy.ndarray.ravel : Return a flattened array.

950 """

951 return self[:]

952

953 def view(self, cls=None):

954 # we need to see if we are subclassing an

955 # index type here

956 if cls is not None and not hasattr(cls, "_typ"):

957 dtype = cls

958 if isinstance(cls, str):

959 dtype = pandas_dtype(cls)

960

961 if isinstance(dtype, (np.dtype, ExtensionDtype)) and needs_i8_conversion(

962 dtype

963 ):

964 if dtype.kind == "m" and dtype != "m8[ns]":

965 # e.g. m8[s]

966 return self._data.view(cls)

967

968 idx_cls = self._dtype_to_subclass(dtype)

969 # NB: we only get here for subclasses that override

970 # _data_cls such that it is a type and not a tuple

971 # of types.

972 arr_cls = idx_cls._data_cls

973 arr = arr_cls(self._data.view("i8"), dtype=dtype)

974 return idx_cls._simple_new(arr, name=self.name, refs=self._references)

975

976 result = self._data.view(cls)

977 else:

978 result = self._view()

979 if isinstance(result, Index):

980 result._id = self._id

981 return result

982

983 def astype(self, dtype, copy: bool = True):

984 """

985 Create an Index with values cast to dtypes.

986

987 The class of a new Index is determined by dtype. When conversion is

988 impossible, a TypeError exception is raised.

989

990 Parameters

991 ----------

992 dtype : numpy dtype or pandas type

993 Note that any signed integer `dtype` is treated as ``'int64'``,

994 and any unsigned integer `dtype` is treated as ``'uint64'``,

995 regardless of the size.

996 copy : bool, default True

997 By default, astype always returns a newly allocated object.

998 If copy is set to False and internal requirements on dtype are

999 satisfied, the original data is used to create a new Index

1000 or the original Index is returned.

1001

1002 Returns

1003 -------

1004 Index

1005 Index with values cast to specified dtype.

1006 """

1007 if dtype is not None:

1008 dtype = pandas_dtype(dtype)

1009

1010 if is_dtype_equal(self.dtype, dtype):

1011 # Ensure that self.astype(self.dtype) is self

1012 return self.copy() if copy else self

1013

1014 values = self._data

1015 if isinstance(values, ExtensionArray):

1016 with rewrite_exception(type(values).__name__, type(self).__name__):

1017 new_values = values.astype(dtype, copy=copy)

1018

1019 elif isinstance(dtype, ExtensionDtype):

1020 cls = dtype.construct_array_type()

1021 # Note: for RangeIndex and CategoricalDtype self vs self._values

1022 # behaves differently here.

1023 new_values = cls._from_sequence(self, dtype=dtype, copy=copy)

1024

1025 else:

1026 # GH#13149 specifically use astype_array instead of astype

1027 new_values = astype_array(values, dtype=dtype, copy=copy)

1028

1029 # pass copy=False because any copying will be done in the astype above

1030 result = Index(new_values, name=self.name, dtype=new_values.dtype, copy=False)

1031 if (

1032 not copy

1033 and self._references is not None

1034 and astype_is_view(self.dtype, dtype)

1035 ):

1036 result._references = self._references

1037 result._references.add_index_reference(result)

1038 return result

1039

1040 _index_shared_docs[

1041 "take"

1042 ] = """

1043 Return a new %(klass)s of the values selected by the indices.

1044

1045 For internal compatibility with numpy arrays.

1046

1047 Parameters

1048 ----------

1049 indices : array-like

1050 Indices to be taken.

1051 axis : int, optional

1052 The axis over which to select values, always 0.

1053 allow_fill : bool, default True

1054 fill_value : scalar, default None

1055 If allow_fill=True and fill_value is not None, indices specified by

1056 -1 are regarded as NA. If Index doesn't hold NA, raise ValueError.

1057

1058 Returns

1059 -------

1060 Index

1061 An index formed of elements at the given indices. Will be the same

1062 type as self, except for RangeIndex.

1063

1064 See Also

1065 --------

1066 numpy.ndarray.take: Return an array formed from the

1067 elements of a at the given indices.

1068 """

1069

1070 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)

1071 def take(

1072 self,

1073 indices,

1074 axis: Axis = 0,

1075 allow_fill: bool = True,

1076 fill_value=None,

1077 **kwargs,

1078 ):

1079 if kwargs:

1080 nv.validate_take((), kwargs)

1081 if is_scalar(indices):

1082 raise TypeError("Expected indices to be array-like")

1083 indices = ensure_platform_int(indices)

1084 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices)

1085

1086 # Note: we discard fill_value and use self._na_value, only relevant

1087 # in the case where allow_fill is True and fill_value is not None

1088 values = self._values

1089 if isinstance(values, np.ndarray):

1090 taken = algos.take(

1091 values, indices, allow_fill=allow_fill, fill_value=self._na_value

1092 )

1093 else:

1094 # algos.take passes 'axis' keyword which not all EAs accept

1095 taken = values.take(

1096 indices, allow_fill=allow_fill, fill_value=self._na_value

1097 )

1098 # _constructor so RangeIndex-> Index with an int64 dtype

1099 return self._constructor._simple_new(taken, name=self.name)

1100

1101 @final

1102 def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool:

1103 """

1104 We only use pandas-style take when allow_fill is True _and_

1105 fill_value is not None.

1106 """

1107 if allow_fill and fill_value is not None:

1108 # only fill if we are passing a non-None fill_value

1109 if self._can_hold_na:

1110 if (indices < -1).any():

1111 raise ValueError(

1112 "When allow_fill=True and fill_value is not None, "

1113 "all indices must be >= -1"

1114 )

1115 else:

1116 cls_name = type(self).__name__

1117 raise ValueError(

1118 f"Unable to fill values because {cls_name} cannot contain NA"

1119 )

1120 else:

1121 allow_fill = False

1122 return allow_fill

1123

1124 _index_shared_docs[

1125 "repeat"

1126 ] = """

1127 Repeat elements of a %(klass)s.

1128

1129 Returns a new %(klass)s where each element of the current %(klass)s

1130 is repeated consecutively a given number of times.

1131

1132 Parameters

1133 ----------

1134 repeats : int or array of ints

1135 The number of repetitions for each element. This should be a

1136 non-negative integer. Repeating 0 times will return an empty

1137 %(klass)s.

1138 axis : None

1139 Must be ``None``. Has no effect but is accepted for compatibility

1140 with numpy.

1141

1142 Returns

1143 -------

1144 %(klass)s

1145 Newly created %(klass)s with repeated elements.

1146

1147 See Also

1148 --------

1149 Series.repeat : Equivalent function for Series.

1150 numpy.repeat : Similar method for :class:`numpy.ndarray`.

1151

1152 Examples

1153 --------

1154 >>> idx = pd.Index(['a', 'b', 'c'])

1155 >>> idx

1156 Index(['a', 'b', 'c'], dtype='object')

1157 >>> idx.repeat(2)

1158 Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')

1159 >>> idx.repeat([1, 2, 3])

1160 Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')

1161 """

1162

1163 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)

1164 def repeat(self, repeats, axis=None):

1165 repeats = ensure_platform_int(repeats)

1166 nv.validate_repeat((), {"axis": axis})

1167 res_values = self._values.repeat(repeats)

1168

1169 # _constructor so RangeIndex-> Index with an int64 dtype

1170 return self._constructor._simple_new(res_values, name=self.name)

1171

1172 # --------------------------------------------------------------------

1173 # Copying Methods

1174

1175 def copy(

1176 self: _IndexT,

1177 name: Hashable | None = None,

1178 deep: bool = False,

1179 ) -> _IndexT:

1180 """

1181 Make a copy of this object.

1182

1183 Name is set on the new object.

1184

1185 Parameters

1186 ----------

1187 name : Label, optional

1188 Set name for new object.

1189 deep : bool, default False

1190

1191 Returns

1192 -------

1193 Index

1194 Index refer to new object which is a copy of this object.

1195

1196 Notes

1197 -----

1198 In most cases, there should be no functional difference from using

1199 ``deep``, but if ``deep`` is passed it will attempt to deepcopy.

1200 """

1201

1202 name = self._validate_names(name=name, deep=deep)[0]

1203 if deep:

1204 new_data = self._data.copy()

1205 new_index = type(self)._simple_new(new_data, name=name)

1206 else:

1207 new_index = self._rename(name=name)

1208 return new_index

1209

1210 @final

1211 def __copy__(self: _IndexT, **kwargs) -> _IndexT:

1212 return self.copy(**kwargs)

1213

1214 @final

1215 def __deepcopy__(self: _IndexT, memo=None) -> _IndexT:

1216 """

1217 Parameters

1218 ----------

1219 memo, default None

1220 Standard signature. Unused

1221 """

1222 return self.copy(deep=True)

1223

1224 # --------------------------------------------------------------------

1225 # Rendering Methods

1226

1227 @final

1228 def __repr__(self) -> str_t:

1229 """

1230 Return a string representation for this object.

1231 """

1232 klass_name = type(self).__name__

1233 data = self._format_data()

1234 attrs = self._format_attrs()

1235 space = self._format_space()

1236 attrs_str = [f"{k}={v}" for k, v in attrs]

1237 prepr = f",{space}".join(attrs_str)

1238

1239 # no data provided, just attributes

1240 if data is None:

1241 data = ""

1242

1243 return f"{klass_name}({data}{prepr})"

1244

1245 def _format_space(self) -> str_t:

1246 # using space here controls if the attributes

1247 # are line separated or not (the default)

1248

1249 # max_seq_items = get_option('display.max_seq_items')

1250 # if len(self) > max_seq_items:

1251 # space = "\n%s" % (' ' * (len(klass) + 1))

1252 return " "

1253

1254 @property

1255 def _formatter_func(self):

1256 """

1257 Return the formatter function.

1258 """

1259 return default_pprint

1260

1261 def _format_data(self, name=None) -> str_t:

1262 """

1263 Return the formatted data as a unicode string.

1264 """

1265 # do we want to justify (only do so for non-objects)

1266 is_justify = True

1267

1268 if self.inferred_type == "string":

1269 is_justify = False

1270 elif self.inferred_type == "categorical":

1271 self = cast("CategoricalIndex", self)

1272 if is_object_dtype(self.categories):

1273 is_justify = False

1274

1275 return format_object_summary(

1276 self,

1277 self._formatter_func,

1278 is_justify=is_justify,

1279 name=name,

1280 line_break_each_value=self._is_multi,

1281 )

1282

1283 def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]:

1284 """

1285 Return a list of tuples of the (attr,formatted_value).

1286 """

1287 attrs: list[tuple[str_t, str_t | int | bool | None]] = []

1288

1289 if not self._is_multi:

1290 attrs.append(("dtype", f"'{self.dtype}'"))

1291

1292 if self.name is not None:

1293 attrs.append(("name", default_pprint(self.name)))

1294 elif self._is_multi and any(x is not None for x in self.names):

1295 attrs.append(("names", default_pprint(self.names)))

1296

1297 max_seq_items = get_option("display.max_seq_items") or len(self)

1298 if len(self) > max_seq_items:

1299 attrs.append(("length", len(self)))

1300 return attrs

1301

1302 @final

1303 def _get_level_names(self) -> Hashable | Sequence[Hashable]:

1304 """

1305 Return a name or list of names with None replaced by the level number.

1306 """

1307 if self._is_multi:

1308 return [

1309 level if name is None else name for level, name in enumerate(self.names)

1310 ]

1311 else:

1312 return 0 if self.name is None else self.name

1313

1314 @final

1315 def _mpl_repr(self) -> np.ndarray:

1316 # how to represent ourselves to matplotlib

1317 if isinstance(self.dtype, np.dtype) and self.dtype.kind != "M":

1318 return cast(np.ndarray, self.values)

1319 return self.astype(object, copy=False)._values

1320

1321 def format(

1322 self,

1323 name: bool = False,

1324 formatter: Callable | None = None,

1325 na_rep: str_t = "NaN",

1326 ) -> list[str_t]:

1327 """

1328 Render a string representation of the Index.

1329 """

1330 header = []

1331 if name:

1332 header.append(

1333 pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))

1334 if self.name is not None

1335 else ""

1336 )

1337

1338 if formatter is not None:

1339 return header + list(self.map(formatter))

1340

1341 return self._format_with_header(header, na_rep=na_rep)

1342

1343 def _format_with_header(self, header: list[str_t], na_rep: str_t) -> list[str_t]:

1344 from pandas.io.formats.format import format_array

1345

1346 values = self._values

1347

1348 if is_object_dtype(values.dtype):

1349 values = cast(np.ndarray, values)

1350 values = lib.maybe_convert_objects(values, safe=True)

1351

1352 result = [pprint_thing(x, escape_chars=("\t", "\r", "\n")) for x in values]

1353

1354 # could have nans

1355 mask = is_float_nan(values)

1356 if mask.any():

1357 result_arr = np.array(result)

1358 result_arr[mask] = na_rep

1359 result = result_arr.tolist()

1360 else:

1361 result = trim_front(format_array(values, None, justify="left"))

1362 return header + result

1363

1364 def _format_native_types(

1365 self,

1366 *,

1367 na_rep: str_t = "",

1368 decimal: str_t = ".",

1369 float_format=None,

1370 date_format=None,

1371 quoting=None,

1372 ) -> npt.NDArray[np.object_]:

1373 """

1374 Actually format specific types of the index.

1375 """

1376 from pandas.io.formats.format import FloatArrayFormatter

1377

1378 if is_float_dtype(self.dtype) and not is_extension_array_dtype(self.dtype):

1379 formatter = FloatArrayFormatter(

1380 self._values,

1381 na_rep=na_rep,

1382 float_format=float_format,

1383 decimal=decimal,

1384 quoting=quoting,

1385 fixed_width=False,

1386 )

1387 return formatter.get_result_as_array()

1388

1389 mask = isna(self)

1390 if not is_object_dtype(self) and not quoting:

1391 values = np.asarray(self).astype(str)

1392 else:

1393 values = np.array(self, dtype=object, copy=True)

1394

1395 values[mask] = na_rep

1396 return values

1397

1398 def _summary(self, name=None) -> str_t:

1399 """

1400 Return a summarized representation.

1401

1402 Parameters

1403 ----------

1404 name : str

1405 name to use in the summary representation

1406

1407 Returns

1408 -------

1409 String with a summarized representation of the index

1410 """

1411 if len(self) > 0:

1412 head = self[0]

1413 if hasattr(head, "format") and not isinstance(head, str):

1414 head = head.format()

1415 elif needs_i8_conversion(self.dtype):

1416 # e.g. Timedelta, display as values, not quoted

1417 head = self._formatter_func(head).replace("'", "")

1418 tail = self[-1]

1419 if hasattr(tail, "format") and not isinstance(tail, str):

1420 tail = tail.format()

1421 elif needs_i8_conversion(self.dtype):

1422 # e.g. Timedelta, display as values, not quoted

1423 tail = self._formatter_func(tail).replace("'", "")

1424

1425 index_summary = f", {head} to {tail}"

1426 else:

1427 index_summary = ""

1428

1429 if name is None:

1430 name = type(self).__name__

1431 return f"{name}: {len(self)} entries{index_summary}"

1432

1433 # --------------------------------------------------------------------

1434 # Conversion Methods

1435

1436 def to_flat_index(self: _IndexT) -> _IndexT:

1437 """

1438 Identity method.

1439

1440 This is implemented for compatibility with subclass implementations

1441 when chaining.

1442

1443 Returns

1444 -------

1445 pd.Index

1446 Caller.

1447

1448 See Also

1449 --------

1450 MultiIndex.to_flat_index : Subclass implementation.

1451 """

1452 return self

1453

1454 @final

1455 def to_series(self, index=None, name: Hashable = None) -> Series:

1456 """

1457 Create a Series with both index and values equal to the index keys.

1458

1459 Useful with map for returning an indexer based on an index.

1460

1461 Parameters

1462 ----------

1463 index : Index, optional

1464 Index of resulting Series. If None, defaults to original index.

1465 name : str, optional

1466 Name of resulting Series. If None, defaults to name of original

1467 index.

1468

1469 Returns

1470 -------

1471 Series

1472 The dtype will be based on the type of the Index values.

1473

1474 See Also

1475 --------

1476 Index.to_frame : Convert an Index to a DataFrame.

1477 Series.to_frame : Convert Series to DataFrame.

1478

1479 Examples

1480 --------

1481 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')

1482

1483 By default, the original Index and original name is reused.

1484

1485 >>> idx.to_series()

1486 animal

1487 Ant Ant

1488 Bear Bear

1489 Cow Cow

1490 Name: animal, dtype: object

1491

1492 To enforce a new Index, specify new labels to ``index``:

1493

1494 >>> idx.to_series(index=[0, 1, 2])

1495 0 Ant

1496 1 Bear

1497 2 Cow

1498 Name: animal, dtype: object

1499

1500 To override the name of the resulting column, specify `name`:

1501

1502 >>> idx.to_series(name='zoo')

1503 animal

1504 Ant Ant

1505 Bear Bear

1506 Cow Cow

1507 Name: zoo, dtype: object

1508 """

1509 from pandas import Series

1510

1511 if index is None:

1512 index = self._view()

1513 if name is None:

1514 name = self.name

1515

1516 return Series(self._values.copy(), index=index, name=name)

1517

1518 def to_frame(

1519 self, index: bool = True, name: Hashable = lib.no_default

1520 ) -> DataFrame:

1521 """

1522 Create a DataFrame with a column containing the Index.

1523

1524 Parameters

1525 ----------

1526 index : bool, default True

1527 Set the index of the returned DataFrame as the original Index.

1528

1529 name : object, defaults to index.name

1530 The passed name should substitute for the index name (if it has

1531 one).

1532

1533 Returns

1534 -------

1535 DataFrame

1536 DataFrame containing the original Index data.

1537

1538 See Also

1539 --------

1540 Index.to_series : Convert an Index to a Series.

1541 Series.to_frame : Convert Series to DataFrame.

1542

1543 Examples

1544 --------

1545 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')

1546 >>> idx.to_frame()

1547 animal

1548 animal

1549 Ant Ant

1550 Bear Bear

1551 Cow Cow

1552

1553 By default, the original Index is reused. To enforce a new Index:

1554

1555 >>> idx.to_frame(index=False)

1556 animal

1557 0 Ant

1558 1 Bear

1559 2 Cow

1560

1561 To override the name of the resulting column, specify `name`:

1562

1563 >>> idx.to_frame(index=False, name='zoo')

1564 zoo

1565 0 Ant

1566 1 Bear

1567 2 Cow

1568 """

1569 from pandas import DataFrame

1570

1571 if name is lib.no_default:

1572 name = self._get_level_names()

1573 result = DataFrame({name: self._values.copy()})

1574

1575 if index:

1576 result.index = self

1577 return result

1578

1579 # --------------------------------------------------------------------

1580 # Name-Centric Methods

1581

1582 @property

1583 def name(self) -> Hashable:

1584 """

1585 Return Index or MultiIndex name.

1586 """

1587 return self._name

1588

1589 @name.setter

1590 def name(self, value: Hashable) -> None:

1591 if self._no_setting_name:

1592 # Used in MultiIndex.levels to avoid silently ignoring name updates.

1593 raise RuntimeError(

1594 "Cannot set name on a level of a MultiIndex. Use "

1595 "'MultiIndex.set_names' instead."

1596 )

1597 maybe_extract_name(value, None, type(self))

1598 self._name = value

1599

1600 @final

1601 def _validate_names(

1602 self, name=None, names=None, deep: bool = False

1603 ) -> list[Hashable]:

1604 """

1605 Handles the quirks of having a singular 'name' parameter for general

1606 Index and plural 'names' parameter for MultiIndex.

1607 """

1608 from copy import deepcopy

1609

1610 if names is not None and name is not None:

1611 raise TypeError("Can only provide one of `names` and `name`")

1612 if names is None and name is None:

1613 new_names = deepcopy(self.names) if deep else self.names

1614 elif names is not None:

1615 if not is_list_like(names):

1616 raise TypeError("Must pass list-like as `names`.")

1617 new_names = names

1618 elif not is_list_like(name):

1619 new_names = [name]

1620 else:

1621 new_names = name

1622

1623 if len(new_names) != len(self.names):

1624 raise ValueError(

1625 f"Length of new names must be {len(self.names)}, got {len(new_names)}"

1626 )

1627

1628 # All items in 'new_names' need to be hashable

1629 validate_all_hashable(*new_names, error_name=f"{type(self).__name__}.name")

1630

1631 return new_names

1632

1633 def _get_default_index_names(

1634 self, names: Hashable | Sequence[Hashable] | None = None, default=None

1635 ) -> list[Hashable]:

1636 """

1637 Get names of index.

1638

1639 Parameters

1640 ----------

1641 names : int, str or 1-dimensional list, default None

1642 Index names to set.

1643 default : str

1644 Default name of index.

1645

1646 Raises

1647 ------

1648 TypeError

1649 if names not str or list-like

1650 """

1651 from pandas.core.indexes.multi import MultiIndex

1652

1653 if names is not None:

1654 if isinstance(names, (int, str)):

1655 names = [names]

1656

1657 if not isinstance(names, list) and names is not None:

1658 raise ValueError("Index names must be str or 1-dimensional list")

1659

1660 if not names:

1661 if isinstance(self, MultiIndex):

1662 names = com.fill_missing_names(self.names)

1663 else:

1664 names = [default] if self.name is None else [self.name]

1665

1666 return names

1667

1668 def _get_names(self) -> FrozenList:

1669 return FrozenList((self.name,))

1670

1671 def _set_names(self, values, *, level=None) -> None:

1672 """

1673 Set new names on index. Each name has to be a hashable type.

1674

1675 Parameters

1676 ----------

1677 values : str or sequence

1678 name(s) to set

1679 level : int, level name, or sequence of int/level names (default None)

1680 If the index is a MultiIndex (hierarchical), level(s) to set (None

1681 for all levels). Otherwise level must be None

1682

1683 Raises

1684 ------

1685 TypeError if each name is not hashable.

1686 """

1687 if not is_list_like(values):

1688 raise ValueError("Names must be a list-like")

1689 if len(values) != 1:

1690 raise ValueError(f"Length of new names must be 1, got {len(values)}")

1691

1692 # GH 20527

1693 # All items in 'name' need to be hashable:

1694 validate_all_hashable(*values, error_name=f"{type(self).__name__}.name")

1695

1696 self._name = values[0]

1697

1698 names = property(fset=_set_names, fget=_get_names)

1699

1700 @overload

1701 def set_names(

1702 self: _IndexT, names, *, level=..., inplace: Literal[False] = ...

1703 ) -> _IndexT:

1704 ...

1705

1706 @overload

1707 def set_names(self, names, *, level=..., inplace: Literal[True]) -> None:

1708 ...

1709

1710 @overload

1711 def set_names(

1712 self: _IndexT, names, *, level=..., inplace: bool = ...

1713 ) -> _IndexT | None:

1714 ...

1715

1716 def set_names(

1717 self: _IndexT, names, *, level=None, inplace: bool = False

1718 ) -> _IndexT | None:

1719 """

1720 Set Index or MultiIndex name.

1721

1722 Able to set new names partially and by level.

1723

1724 Parameters

1725 ----------

1726

1727 names : label or list of label or dict-like for MultiIndex

1728 Name(s) to set.

1729

1730 .. versionchanged:: 1.3.0

1731

1732 level : int, label or list of int or label, optional

1733 If the index is a MultiIndex and names is not dict-like, level(s) to set

1734 (None for all levels). Otherwise level must be None.

1735

1736 .. versionchanged:: 1.3.0

1737

1738 inplace : bool, default False

1739 Modifies the object directly, instead of creating a new Index or

1740 MultiIndex.

1741

1742 Returns

1743 -------

1744 Index or None

1745 The same type as the caller or None if ``inplace=True``.

1746

1747 See Also

1748 --------

1749 Index.rename : Able to set new names without level.

1750

1751 Examples

1752 --------

1753 >>> idx = pd.Index([1, 2, 3, 4])

1754 >>> idx

1755 Index([1, 2, 3, 4], dtype='int64')

1756 >>> idx.set_names('quarter')

1757 Index([1, 2, 3, 4], dtype='int64', name='quarter')

1758

1759 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],

1760 ... [2018, 2019]])

1761 >>> idx

1762 MultiIndex([('python', 2018),

1763 ('python', 2019),

1764 ( 'cobra', 2018),

1765 ( 'cobra', 2019)],

1766 )

1767 >>> idx = idx.set_names(['kind', 'year'])

1768 >>> idx.set_names('species', level=0)

1769 MultiIndex([('python', 2018),

1770 ('python', 2019),

1771 ( 'cobra', 2018),

1772 ( 'cobra', 2019)],

1773 names=['species', 'year'])

1774

1775 When renaming levels with a dict, levels can not be passed.

1776

1777 >>> idx.set_names({'kind': 'snake'})

1778 MultiIndex([('python', 2018),

1779 ('python', 2019),

1780 ( 'cobra', 2018),

1781 ( 'cobra', 2019)],

1782 names=['snake', 'year'])

1783 """

1784 if level is not None and not isinstance(self, ABCMultiIndex):

1785 raise ValueError("Level must be None for non-MultiIndex")

1786

1787 if level is not None and not is_list_like(level) and is_list_like(names):

1788 raise TypeError("Names must be a string when a single level is provided.")

1789

1790 if not is_list_like(names) and level is None and self.nlevels > 1:

1791 raise TypeError("Must pass list-like as `names`.")

1792

1793 if is_dict_like(names) and not isinstance(self, ABCMultiIndex):

1794 raise TypeError("Can only pass dict-like as `names` for MultiIndex.")

1795

1796 if is_dict_like(names) and level is not None:

1797 raise TypeError("Can not pass level for dictlike `names`.")

1798

1799 if isinstance(self, ABCMultiIndex) and is_dict_like(names) and level is None:

1800 # Transform dict to list of new names and corresponding levels

1801 level, names_adjusted = [], []

1802 for i, name in enumerate(self.names):

1803 if name in names.keys():

1804 level.append(i)

1805 names_adjusted.append(names[name])

1806 names = names_adjusted

1807

1808 if not is_list_like(names):

1809 names = [names]

1810 if level is not None and not is_list_like(level):

1811 level = [level]

1812

1813 if inplace:

1814 idx = self

1815 else:

1816 idx = self._view()

1817

1818 idx._set_names(names, level=level)

1819 if not inplace:

1820 return idx

1821 return None

1822

1823 def rename(self, name, inplace: bool = False):

1824 """

1825 Alter Index or MultiIndex name.

1826

1827 Able to set new names without level. Defaults to returning new index.

1828 Length of names must match number of levels in MultiIndex.

1829

1830 Parameters

1831 ----------

1832 name : label or list of labels

1833 Name(s) to set.

1834 inplace : bool, default False

1835 Modifies the object directly, instead of creating a new Index or

1836 MultiIndex.

1837

1838 Returns

1839 -------

1840 Index or None

1841 The same type as the caller or None if ``inplace=True``.

1842

1843 See Also

1844 --------

1845 Index.set_names : Able to set new names partially and by level.

1846

1847 Examples

1848 --------

1849 >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score')

1850 >>> idx.rename('grade')

1851 Index(['A', 'C', 'A', 'B'], dtype='object', name='grade')

1852

1853 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],

1854 ... [2018, 2019]],

1855 ... names=['kind', 'year'])

1856 >>> idx

1857 MultiIndex([('python', 2018),

1858 ('python', 2019),

1859 ( 'cobra', 2018),

1860 ( 'cobra', 2019)],

1861 names=['kind', 'year'])

1862 >>> idx.rename(['species', 'year'])

1863 MultiIndex([('python', 2018),

1864 ('python', 2019),

1865 ( 'cobra', 2018),

1866 ( 'cobra', 2019)],

1867 names=['species', 'year'])

1868 >>> idx.rename('species')

1869 Traceback (most recent call last):

1870 TypeError: Must pass list-like as `names`.

1871 """

1872 return self.set_names([name], inplace=inplace)

1873

1874 # --------------------------------------------------------------------

1875 # Level-Centric Methods

1876

1877 @property

1878 def nlevels(self) -> int:

1879 """

1880 Number of levels.

1881 """

1882 return 1

1883

1884 def _sort_levels_monotonic(self: _IndexT) -> _IndexT:

1885 """

1886 Compat with MultiIndex.

1887 """

1888 return self

1889

1890 @final

1891 def _validate_index_level(self, level) -> None:

1892 """

1893 Validate index level.

1894

1895 For single-level Index getting level number is a no-op, but some

1896 verification must be done like in MultiIndex.

1897

1898 """

1899 if isinstance(level, int):

1900 if level < 0 and level != -1:

1901 raise IndexError(

1902 "Too many levels: Index has only 1 level, "

1903 f"{level} is not a valid level number"

1904 )

1905 if level > 0:

1906 raise IndexError(

1907 f"Too many levels: Index has only 1 level, not {level + 1}"

1908 )

1909 elif level != self.name:

1910 raise KeyError(

1911 f"Requested level ({level}) does not match index name ({self.name})"

1912 )

1913

1914 def _get_level_number(self, level) -> int:

1915 self._validate_index_level(level)

1916 return 0

1917

1918 def sortlevel(

1919 self, level=None, ascending: bool | list[bool] = True, sort_remaining=None

1920 ):

1921 """

1922 For internal compatibility with the Index API.

1923

1924 Sort the Index. This is for compat with MultiIndex

1925

1926 Parameters

1927 ----------

1928 ascending : bool, default True

1929 False to sort in descending order

1930

1931 level, sort_remaining are compat parameters

1932

1933 Returns

1934 -------

1935 Index

1936 """

1937 if not isinstance(ascending, (list, bool)):

1938 raise TypeError(

1939 "ascending must be a single bool value or"

1940 "a list of bool values of length 1"

1941 )

1942

1943 if isinstance(ascending, list):

1944 if len(ascending) != 1:

1945 raise TypeError("ascending must be a list of bool values of length 1")

1946 ascending = ascending[0]

1947

1948 if not isinstance(ascending, bool):

1949 raise TypeError("ascending must be a bool value")

1950

1951 return self.sort_values(return_indexer=True, ascending=ascending)

1952

1953 def _get_level_values(self, level) -> Index:

1954 """

1955 Return an Index of values for requested level.

1956

1957 This is primarily useful to get an individual level of values from a

1958 MultiIndex, but is provided on Index as well for compatibility.

1959

1960 Parameters

1961 ----------

1962 level : int or str

1963 It is either the integer position or the name of the level.

1964

1965 Returns

1966 -------

1967 Index

1968 Calling object, as there is only one level in the Index.

1969

1970 See Also

1971 --------

1972 MultiIndex.get_level_values : Get values for a level of a MultiIndex.

1973

1974 Notes

1975 -----

1976 For Index, level should be 0, since there are no multiple levels.

1977

1978 Examples

1979 --------

1980 >>> idx = pd.Index(list('abc'))

1981 >>> idx

1982 Index(['a', 'b', 'c'], dtype='object')

1983

1984 Get level values by supplying `level` as integer:

1985

1986 >>> idx.get_level_values(0)

1987 Index(['a', 'b', 'c'], dtype='object')

1988 """

1989 self._validate_index_level(level)

1990 return self

1991

1992 get_level_values = _get_level_values

1993

1994 @final

1995 def droplevel(self, level: IndexLabel = 0):

1996 """

1997 Return index with requested level(s) removed.

1998

1999 If resulting index has only 1 level left, the result will be

2000 of Index type, not MultiIndex. The original index is not modified inplace.

2001

2002 Parameters

2003 ----------

2004 level : int, str, or list-like, default 0

2005 If a string is given, must be the name of a level

2006 If list-like, elements must be names or indexes of levels.

2007

2008 Returns

2009 -------

2010 Index or MultiIndex

2011

2012 Examples

2013 --------

2014 >>> mi = pd.MultiIndex.from_arrays(

2015 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'])

2016 >>> mi

2017 MultiIndex([(1, 3, 5),

2018 (2, 4, 6)],

2019 names=['x', 'y', 'z'])

2020

2021 >>> mi.droplevel()

2022 MultiIndex([(3, 5),

2023 (4, 6)],

2024 names=['y', 'z'])

2025

2026 >>> mi.droplevel(2)

2027 MultiIndex([(1, 3),

2028 (2, 4)],

2029 names=['x', 'y'])

2030

2031 >>> mi.droplevel('z')

2032 MultiIndex([(1, 3),

2033 (2, 4)],

2034 names=['x', 'y'])

2035

2036 >>> mi.droplevel(['x', 'y'])

2037 Index([5, 6], dtype='int64', name='z')

2038 """

2039 if not isinstance(level, (tuple, list)):

2040 level = [level]

2041

2042 levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]

2043

2044 return self._drop_level_numbers(levnums)

2045

2046 @final

2047 def _drop_level_numbers(self, levnums: list[int]):

2048 """

2049 Drop MultiIndex levels by level _number_, not name.

2050 """

2051

2052 if not levnums and not isinstance(self, ABCMultiIndex):

2053 return self

2054 if len(levnums) >= self.nlevels:

2055 raise ValueError(

2056 f"Cannot remove {len(levnums)} levels from an index with "

2057 f"{self.nlevels} levels: at least one level must be left."

2058 )

2059 # The two checks above guarantee that here self is a MultiIndex

2060 self = cast("MultiIndex", self)

2061

2062 new_levels = list(self.levels)

2063 new_codes = list(self.codes)

2064 new_names = list(self.names)

2065

2066 for i in levnums:

2067 new_levels.pop(i)

2068 new_codes.pop(i)

2069 new_names.pop(i)

2070

2071 if len(new_levels) == 1:

2072 lev = new_levels[0]

2073

2074 if len(lev) == 0:

2075 # If lev is empty, lev.take will fail GH#42055

2076 if len(new_codes[0]) == 0:

2077 # GH#45230 preserve RangeIndex here

2078 # see test_reset_index_empty_rangeindex

2079 result = lev[:0]

2080 else:

2081 res_values = algos.take(lev._values, new_codes[0], allow_fill=True)

2082 # _constructor instead of type(lev) for RangeIndex compat GH#35230

2083 result = lev._constructor._simple_new(res_values, name=new_names[0])

2084 else:

2085 # set nan if needed

2086 mask = new_codes[0] == -1

2087 result = new_levels[0].take(new_codes[0])

2088 if mask.any():

2089 result = result.putmask(mask, np.nan)

2090

2091 result._name = new_names[0]

2092

2093 return result

2094 else:

2095 from pandas.core.indexes.multi import MultiIndex

2096

2097 return MultiIndex(

2098 levels=new_levels,

2099 codes=new_codes,

2100 names=new_names,

2101 verify_integrity=False,

2102 )

2103

2104 # --------------------------------------------------------------------

2105 # Introspection Methods

2106

2107 @cache_readonly

2108 @final

2109 def _can_hold_na(self) -> bool:

2110 if isinstance(self.dtype, ExtensionDtype):

2111 if isinstance(self.dtype, IntervalDtype):

2112 # FIXME(GH#45720): this is inaccurate for integer-backed

2113 # IntervalArray, but without it other.categories.take raises

2114 # in IntervalArray._cmp_method

2115 return True

2116 return self.dtype._can_hold_na

2117 if self.dtype.kind in ["i", "u", "b"]:

2118 return False

2119 return True

2120

2121 @property

2122 def is_monotonic_increasing(self) -> bool:

2123 """

2124 Return a boolean if the values are equal or increasing.

2125

2126 Returns

2127 -------

2128 bool

2129

2130 See Also

2131 --------

2132 Index.is_monotonic_decreasing : Check if the values are equal or decreasing.

2133

2134 Examples

2135 --------

2136 >>> pd.Index([1, 2, 3]).is_monotonic_increasing

2137 True

2138 >>> pd.Index([1, 2, 2]).is_monotonic_increasing

2139 True

2140 >>> pd.Index([1, 3, 2]).is_monotonic_increasing

2141 False

2142 """

2143 return self._engine.is_monotonic_increasing

2144

2145 @property

2146 def is_monotonic_decreasing(self) -> bool:

2147 """

2148 Return a boolean if the values are equal or decreasing.

2149

2150 Returns

2151 -------

2152 bool

2153

2154 See Also

2155 --------

2156 Index.is_monotonic_increasing : Check if the values are equal or increasing.

2157

2158 Examples

2159 --------

2160 >>> pd.Index([3, 2, 1]).is_monotonic_decreasing

2161 True

2162 >>> pd.Index([3, 2, 2]).is_monotonic_decreasing

2163 True

2164 >>> pd.Index([3, 1, 2]).is_monotonic_decreasing

2165 False

2166 """

2167 return self._engine.is_monotonic_decreasing

2168

2169 @final

2170 @property

2171 def _is_strictly_monotonic_increasing(self) -> bool:

2172 """

2173 Return if the index is strictly monotonic increasing

2174 (only increasing) values.

2175

2176 Examples

2177 --------

2178 >>> Index([1, 2, 3])._is_strictly_monotonic_increasing

2179 True

2180 >>> Index([1, 2, 2])._is_strictly_monotonic_increasing

2181 False

2182 >>> Index([1, 3, 2])._is_strictly_monotonic_increasing

2183 False

2184 """

2185 return self.is_unique and self.is_monotonic_increasing

2186

2187 @final

2188 @property

2189 def _is_strictly_monotonic_decreasing(self) -> bool:

2190 """

2191 Return if the index is strictly monotonic decreasing

2192 (only decreasing) values.

2193

2194 Examples

2195 --------

2196 >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing

2197 True

2198 >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing

2199 False

2200 >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing

2201 False

2202 """

2203 return self.is_unique and self.is_monotonic_decreasing

2204

2205 @cache_readonly

2206 def is_unique(self) -> bool:

2207 """

2208 Return if the index has unique values.

2209

2210 Returns

2211 -------

2212 bool

2213

2214 See Also

2215 --------

2216 Index.has_duplicates : Inverse method that checks if it has duplicate values.

2217

2218 Examples

2219 --------

2220 >>> idx = pd.Index([1, 5, 7, 7])

2221 >>> idx.is_unique

2222 False

2223

2224 >>> idx = pd.Index([1, 5, 7])

2225 >>> idx.is_unique

2226 True

2227

2228 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",

2229 ... "Watermelon"]).astype("category")

2230 >>> idx.is_unique

2231 False

2232

2233 >>> idx = pd.Index(["Orange", "Apple",

2234 ... "Watermelon"]).astype("category")

2235 >>> idx.is_unique

2236 True

2237 """

2238 return self._engine.is_unique

2239

2240 @final

2241 @property

2242 def has_duplicates(self) -> bool:

2243 """

2244 Check if the Index has duplicate values.

2245

2246 Returns

2247 -------

2248 bool

2249 Whether or not the Index has duplicate values.

2250

2251 See Also

2252 --------

2253 Index.is_unique : Inverse method that checks if it has unique values.

2254

2255 Examples

2256 --------

2257 >>> idx = pd.Index([1, 5, 7, 7])

2258 >>> idx.has_duplicates

2259 True

2260

2261 >>> idx = pd.Index([1, 5, 7])

2262 >>> idx.has_duplicates

2263 False

2264

2265 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",

2266 ... "Watermelon"]).astype("category")

2267 >>> idx.has_duplicates

2268 True

2269

2270 >>> idx = pd.Index(["Orange", "Apple",

2271 ... "Watermelon"]).astype("category")

2272 >>> idx.has_duplicates

2273 False

2274 """

2275 return not self.is_unique

2276

2277 @final

2278 def is_boolean(self) -> bool:

2279 """

2280 Check if the Index only consists of booleans.

2281

2282 .. deprecated:: 2.0.0

2283 Use `pandas.api.types.is_bool_dtype` instead.

2284

2285 Returns

2286 -------

2287 bool

2288 Whether or not the Index only consists of booleans.

2289

2290 See Also

2291 --------

2292 is_integer : Check if the Index only consists of integers (deprecated).

2293 is_floating : Check if the Index is a floating type (deprecated).

2294 is_numeric : Check if the Index only consists of numeric data (deprecated).

2295 is_object : Check if the Index is of the object dtype (deprecated).

2296 is_categorical : Check if the Index holds categorical data.

2297 is_interval : Check if the Index holds Interval objects (deprecated).

2298

2299 Examples

2300 --------

2301 >>> idx = pd.Index([True, False, True])

2302 >>> idx.is_boolean() # doctest: +SKIP

2303 True

2304

2305 >>> idx = pd.Index(["True", "False", "True"])

2306 >>> idx.is_boolean() # doctest: +SKIP

2307 False

2308

2309 >>> idx = pd.Index([True, False, "True"])

2310 >>> idx.is_boolean() # doctest: +SKIP

2311 False

2312 """

2313 warnings.warn(

2314 f"{type(self).__name__}.is_boolean is deprecated. "

2315 "Use pandas.api.types.is_bool_type instead.",

2316 FutureWarning,

2317 stacklevel=find_stack_level(),

2318 )

2319 return self.inferred_type in ["boolean"]

2320

2321 @final

2322 def is_integer(self) -> bool:

2323 """

2324 Check if the Index only consists of integers.

2325

2326 .. deprecated:: 2.0.0

2327 Use `pandas.api.types.is_integer_dtype` instead.

2328

2329 Returns

2330 -------

2331 bool

2332 Whether or not the Index only consists of integers.

2333

2334 See Also

2335 --------

2336 is_boolean : Check if the Index only consists of booleans (deprecated).

2337 is_floating : Check if the Index is a floating type (deprecated).

2338 is_numeric : Check if the Index only consists of numeric data (deprecated).

2339 is_object : Check if the Index is of the object dtype. (deprecated).

2340 is_categorical : Check if the Index holds categorical data (deprecated).

2341 is_interval : Check if the Index holds Interval objects (deprecated).

2342

2343 Examples

2344 --------

2345 >>> idx = pd.Index([1, 2, 3, 4])

2346 >>> idx.is_integer() # doctest: +SKIP

2347 True

2348

2349 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])

2350 >>> idx.is_integer() # doctest: +SKIP

2351 False

2352

2353 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"])

2354 >>> idx.is_integer() # doctest: +SKIP

2355 False

2356 """

2357 warnings.warn(

2358 f"{type(self).__name__}.is_integer is deprecated. "

2359 "Use pandas.api.types.is_integer_dtype instead.",

2360 FutureWarning,

2361 stacklevel=find_stack_level(),

2362 )

2363 return self.inferred_type in ["integer"]

2364

2365 @final

2366 def is_floating(self) -> bool:

2367 """

2368 Check if the Index is a floating type.

2369

2370 .. deprecated:: 2.0.0

2371 Use `pandas.api.types.is_float_dtype` instead

2372

2373 The Index may consist of only floats, NaNs, or a mix of floats,

2374 integers, or NaNs.

2375

2376 Returns

2377 -------

2378 bool

2379 Whether or not the Index only consists of only consists of floats, NaNs, or

2380 a mix of floats, integers, or NaNs.

2381

2382 See Also

2383 --------

2384 is_boolean : Check if the Index only consists of booleans (deprecated).

2385 is_integer : Check if the Index only consists of integers (deprecated).

2386 is_numeric : Check if the Index only consists of numeric data (deprecated).

2387 is_object : Check if the Index is of the object dtype. (deprecated).

2388 is_categorical : Check if the Index holds categorical data (deprecated).

2389 is_interval : Check if the Index holds Interval objects (deprecated).

2390

2391 Examples

2392 --------

2393 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])

2394 >>> idx.is_floating() # doctest: +SKIP

2395 True

2396

2397 >>> idx = pd.Index([1.0, 2.0, np.nan, 4.0])

2398 >>> idx.is_floating() # doctest: +SKIP

2399 True

2400

2401 >>> idx = pd.Index([1, 2, 3, 4, np.nan])

2402 >>> idx.is_floating() # doctest: +SKIP

2403 True

2404

2405 >>> idx = pd.Index([1, 2, 3, 4])

2406 >>> idx.is_floating() # doctest: +SKIP

2407 False

2408 """

2409 warnings.warn(

2410 f"{type(self).__name__}.is_floating is deprecated. "

2411 "Use pandas.api.types.is_float_dtype instead.",

2412 FutureWarning,

2413 stacklevel=find_stack_level(),

2414 )

2415 return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"]

2416

2417 @final

2418 def is_numeric(self) -> bool:

2419 """

2420 Check if the Index only consists of numeric data.

2421

2422 .. deprecated:: 2.0.0

2423 Use `pandas.api.types.is_numeric_dtype` instead.

2424

2425 Returns

2426 -------

2427 bool

2428 Whether or not the Index only consists of numeric data.

2429

2430 See Also

2431 --------

2432 is_boolean : Check if the Index only consists of booleans (deprecated).

2433 is_integer : Check if the Index only consists of integers (deprecated).

2434 is_floating : Check if the Index is a floating type (deprecated).

2435 is_object : Check if the Index is of the object dtype. (deprecated).

2436 is_categorical : Check if the Index holds categorical data (deprecated).

2437 is_interval : Check if the Index holds Interval objects (deprecated).

2438

2439 Examples

2440 --------

2441 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])

2442 >>> idx.is_numeric() # doctest: +SKIP

2443 True

2444

2445 >>> idx = pd.Index([1, 2, 3, 4.0])

2446 >>> idx.is_numeric() # doctest: +SKIP

2447 True

2448

2449 >>> idx = pd.Index([1, 2, 3, 4])

2450 >>> idx.is_numeric() # doctest: +SKIP

2451 True

2452

2453 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan])

2454 >>> idx.is_numeric() # doctest: +SKIP

2455 True

2456

2457 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan, "Apple"])

2458 >>> idx.is_numeric() # doctest: +SKIP

2459 False

2460 """

2461 warnings.warn(

2462 f"{type(self).__name__}.is_numeric is deprecated. "

2463 "Use pandas.api.types.is_any_real_numeric_dtype instead",

2464 FutureWarning,

2465 stacklevel=find_stack_level(),

2466 )

2467 return self.inferred_type in ["integer", "floating"]

2468

2469 @final

2470 def is_object(self) -> bool:

2471 """

2472 Check if the Index is of the object dtype.

2473

2474 .. deprecated:: 2.0.0

2475 Use `pandas.api.types.is_object_dtype` instead.

2476

2477 Returns

2478 -------

2479 bool

2480 Whether or not the Index is of the object dtype.

2481

2482 See Also

2483 --------

2484 is_boolean : Check if the Index only consists of booleans (deprecated).

2485 is_integer : Check if the Index only consists of integers (deprecated).

2486 is_floating : Check if the Index is a floating type (deprecated).

2487 is_numeric : Check if the Index only consists of numeric data (deprecated).

2488 is_categorical : Check if the Index holds categorical data (deprecated).

2489 is_interval : Check if the Index holds Interval objects (deprecated).

2490

2491 Examples

2492 --------

2493 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"])

2494 >>> idx.is_object() # doctest: +SKIP

2495 True

2496

2497 >>> idx = pd.Index(["Apple", "Mango", 2.0])

2498 >>> idx.is_object() # doctest: +SKIP

2499 True

2500

2501 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",

2502 ... "Watermelon"]).astype("category")

2503 >>> idx.is_object() # doctest: +SKIP

2504 False

2505

2506 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])

2507 >>> idx.is_object() # doctest: +SKIP

2508 False

2509 """

2510 warnings.warn(

2511 f"{type(self).__name__}.is_object is deprecated."

2512 "Use pandas.api.types.is_object_dtype instead",

2513 FutureWarning,

2514 stacklevel=find_stack_level(),

2515 )

2516 return is_object_dtype(self.dtype)

2517

2518 @final

2519 def is_categorical(self) -> bool:

2520 """

2521 Check if the Index holds categorical data.

2522

2523 .. deprecated:: 2.0.0

2524 Use `isinstance(index.dtype, pd.CategoricalDtype)` instead.

2525

2526 Returns

2527 -------

2528 bool

2529 True if the Index is categorical.

2530

2531 See Also

2532 --------

2533 CategoricalIndex : Index for categorical data.

2534 is_boolean : Check if the Index only consists of booleans (deprecated).

2535 is_integer : Check if the Index only consists of integers (deprecated).

2536 is_floating : Check if the Index is a floating type (deprecated).

2537 is_numeric : Check if the Index only consists of numeric data (deprecated).

2538 is_object : Check if the Index is of the object dtype. (deprecated).

2539 is_interval : Check if the Index holds Interval objects (deprecated).

2540

2541 Examples

2542 --------

2543 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",

2544 ... "Watermelon"]).astype("category")

2545 >>> idx.is_categorical() # doctest: +SKIP

2546 True

2547

2548 >>> idx = pd.Index([1, 3, 5, 7])

2549 >>> idx.is_categorical() # doctest: +SKIP

2550 False

2551

2552 >>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"])

2553 >>> s

2554 0 Peter

2555 1 Victor

2556 2 Elisabeth

2557 3 Mar

2558 dtype: object

2559 >>> s.index.is_categorical() # doctest: +SKIP

2560 False

2561 """

2562 warnings.warn(

2563 f"{type(self).__name__}.is_categorical is deprecated."

2564 "Use pandas.api.types.is_categorical_dtype instead",

2565 FutureWarning,

2566 stacklevel=find_stack_level(),

2567 )

2568

2569 return self.inferred_type in ["categorical"]

2570

2571 @final

2572 def is_interval(self) -> bool:

2573 """

2574 Check if the Index holds Interval objects.

2575

2576 .. deprecated:: 2.0.0

2577 Use `isinstance(index.dtype, pd.IntervalDtype)` instead.

2578

2579 Returns

2580 -------

2581 bool

2582 Whether or not the Index holds Interval objects.

2583

2584 See Also

2585 --------

2586 IntervalIndex : Index for Interval objects.

2587 is_boolean : Check if the Index only consists of booleans (deprecated).

2588 is_integer : Check if the Index only consists of integers (deprecated).

2589 is_floating : Check if the Index is a floating type (deprecated).

2590 is_numeric : Check if the Index only consists of numeric data (deprecated).

2591 is_object : Check if the Index is of the object dtype. (deprecated).

2592 is_categorical : Check if the Index holds categorical data (deprecated).

2593

2594 Examples

2595 --------

2596 >>> idx = pd.Index([pd.Interval(left=0, right=5),

2597 ... pd.Interval(left=5, right=10)])

2598 >>> idx.is_interval() # doctest: +SKIP

2599 True

2600

2601 >>> idx = pd.Index([1, 3, 5, 7])

2602 >>> idx.is_interval() # doctest: +SKIP

2603 False

2604 """

2605 warnings.warn(

2606 f"{type(self).__name__}.is_interval is deprecated."

2607 "Use pandas.api.types.is_interval_dtype instead",

2608 FutureWarning,

2609 stacklevel=find_stack_level(),

2610 )

2611 return self.inferred_type in ["interval"]

2612

2613 @final

2614 def _holds_integer(self) -> bool:

2615 """

2616 Whether the type is an integer type.

2617 """

2618 return self.inferred_type in ["integer", "mixed-integer"]

2619

2620 @final

2621 def holds_integer(self) -> bool:

2622 """

2623 Whether the type is an integer type.

2624

2625 .. deprecated:: 2.0.0

2626 Use `pandas.api.types.infer_dtype` instead

2627 """

2628 warnings.warn(

2629 f"{type(self).__name__}.holds_integer is deprecated. "

2630 "Use pandas.api.types.infer_dtype instead.",

2631 FutureWarning,

2632 stacklevel=find_stack_level(),

2633 )

2634 return self._holds_integer()

2635

2636 @cache_readonly

2637 def inferred_type(self) -> str_t:

2638 """

2639 Return a string of the type inferred from the values.

2640 """

2641 return lib.infer_dtype(self._values, skipna=False)

2642

2643 @cache_readonly

2644 @final

2645 def _is_all_dates(self) -> bool:

2646 """

2647 Whether or not the index values only consist of dates.

2648 """

2649 if needs_i8_conversion(self.dtype):

2650 return True

2651 elif self.dtype != _dtype_obj:

2652 # TODO(ExtensionIndex): 3rd party EA might override?

2653 # Note: this includes IntervalIndex, even when the left/right

2654 # contain datetime-like objects.

2655 return False

2656 elif self._is_multi:

2657 return False

2658 return is_datetime_array(ensure_object(self._values))

2659

2660 @final

2661 @cache_readonly

2662 def _is_multi(self) -> bool:

2663 """

2664 Cached check equivalent to isinstance(self, MultiIndex)

2665 """

2666 return isinstance(self, ABCMultiIndex)

2667

2668 # --------------------------------------------------------------------

2669 # Pickle Methods

2670

2671 def __reduce__(self):

2672 d = {"data": self._data, "name": self.name}

2673 return _new_Index, (type(self), d), None

2674

2675 # --------------------------------------------------------------------

2676 # Null Handling Methods

2677

2678 @cache_readonly

2679 def _na_value(self):

2680 """The expected NA value to use with this index."""

2681 dtype = self.dtype

2682 if isinstance(dtype, np.dtype):

2683 if dtype.kind in ["m", "M"]:

2684 return NaT

2685 return np.nan

2686 return dtype.na_value

2687

2688 @cache_readonly

2689 def _isnan(self) -> npt.NDArray[np.bool_]:

2690 """

2691 Return if each value is NaN.

2692 """

2693 if self._can_hold_na:

2694 return isna(self)

2695 else:

2696 # shouldn't reach to this condition by checking hasnans beforehand

2697 values = np.empty(len(self), dtype=np.bool_)

2698 values.fill(False)

2699 return values

2700

2701 @cache_readonly

2702 def hasnans(self) -> bool:

2703 """

2704 Return True if there are any NaNs.

2705

2706 Enables various performance speedups.

2707

2708 Returns

2709 -------

2710 bool

2711 """

2712 if self._can_hold_na:

2713 return bool(self._isnan.any())

2714 else:

2715 return False

2716

2717 @final

2718 def isna(self) -> npt.NDArray[np.bool_]:

2719 """

2720 Detect missing values.

2721

2722 Return a boolean same-sized object indicating if the values are NA.

2723 NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get

2724 mapped to ``True`` values.

2725 Everything else get mapped to ``False`` values. Characters such as

2726 empty strings `''` or :attr:`numpy.inf` are not considered NA values

2727 (unless you set ``pandas.options.mode.use_inf_as_na = True``).

2728

2729 Returns

2730 -------

2731 numpy.ndarray[bool]

2732 A boolean array of whether my values are NA.

2733

2734 See Also

2735 --------

2736 Index.notna : Boolean inverse of isna.

2737 Index.dropna : Omit entries with missing values.

2738 isna : Top-level isna.

2739 Series.isna : Detect missing values in Series object.

2740

2741 Examples

2742 --------

2743 Show which entries in a pandas.Index are NA. The result is an

2744 array.

2745

2746 >>> idx = pd.Index([5.2, 6.0, np.NaN])

2747 >>> idx

2748 Index([5.2, 6.0, nan], dtype='float64')

2749 >>> idx.isna()

2750 array([False, False, True])

2751

2752 Empty strings are not considered NA values. None is considered an NA

2753 value.

2754

2755 >>> idx = pd.Index(['black', '', 'red', None])

2756 >>> idx

2757 Index(['black', '', 'red', None], dtype='object')

2758 >>> idx.isna()

2759 array([False, False, False, True])

2760

2761 For datetimes, `NaT` (Not a Time) is considered as an NA value.

2762

2763 >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'),

2764 ... pd.Timestamp(''), None, pd.NaT])

2765 >>> idx

2766 DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'],

2767 dtype='datetime64[ns]', freq=None)

2768 >>> idx.isna()

2769 array([False, True, True, True])

2770 """

2771 return self._isnan

2772

2773 isnull = isna

2774

2775 @final

2776 def notna(self) -> npt.NDArray[np.bool_]:

2777 """

2778 Detect existing (non-missing) values.

2779

2780 Return a boolean same-sized object indicating if the values are not NA.

2781 Non-missing values get mapped to ``True``. Characters such as empty

2782 strings ``''`` or :attr:`numpy.inf` are not considered NA values

2783 (unless you set ``pandas.options.mode.use_inf_as_na = True``).

2784 NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False``

2785 values.

2786

2787 Returns

2788 -------

2789 numpy.ndarray[bool]

2790 Boolean array to indicate which entries are not NA.

2791

2792 See Also

2793 --------

2794 Index.notnull : Alias of notna.

2795 Index.isna: Inverse of notna.

2796 notna : Top-level notna.

2797

2798 Examples

2799 --------

2800 Show which entries in an Index are not NA. The result is an

2801 array.

2802

2803 >>> idx = pd.Index([5.2, 6.0, np.NaN])

2804 >>> idx

2805 Index([5.2, 6.0, nan], dtype='float64')

2806 >>> idx.notna()

2807 array([ True, True, False])

2808

2809 Empty strings are not considered NA values. None is considered a NA

2810 value.

2811

2812 >>> idx = pd.Index(['black', '', 'red', None])

2813 >>> idx

2814 Index(['black', '', 'red', None], dtype='object')

2815 >>> idx.notna()

2816 array([ True, True, True, False])

2817 """

2818 return ~self.isna()

2819

2820 notnull = notna

2821

2822 def fillna(self, value=None, downcast=None):

2823 """

2824 Fill NA/NaN values with the specified value.

2825

2826 Parameters

2827 ----------

2828 value : scalar

2829 Scalar value to use to fill holes (e.g. 0).

2830 This value cannot be a list-likes.

2831 downcast : dict, default is None

2832 A dict of item->dtype of what to downcast if possible,

2833 or the string 'infer' which will try to downcast to an appropriate

2834 equal type (e.g. float64 to int64 if possible).

2835

2836 Returns

2837 -------

2838 Index

2839

2840 See Also

2841 --------

2842 DataFrame.fillna : Fill NaN values of a DataFrame.

2843 Series.fillna : Fill NaN Values of a Series.

2844 """

2845

2846 value = self._require_scalar(value)

2847 if self.hasnans:

2848 result = self.putmask(self._isnan, value)

2849 if downcast is None:

2850 # no need to care metadata other than name

2851 # because it can't have freq if it has NaTs

2852 # _with_infer needed for test_fillna_categorical

2853 return Index._with_infer(result, name=self.name)

2854 raise NotImplementedError(

2855 f"{type(self).__name__}.fillna does not support 'downcast' "

2856 "argument values other than 'None'."

2857 )

2858 return self._view()

2859

2860 def dropna(self: _IndexT, how: AnyAll = "any") -> _IndexT:

2861 """

2862 Return Index without NA/NaN values.

2863

2864 Parameters

2865 ----------

2866 how : {'any', 'all'}, default 'any'

2867 If the Index is a MultiIndex, drop the value when any or all levels

2868 are NaN.

2869

2870 Returns

2871 -------

2872 Index

2873 """

2874 if how not in ("any", "all"):

2875 raise ValueError(f"invalid how option: {how}")

2876

2877 if self.hasnans:

2878 res_values = self._values[~self._isnan]

2879 return type(self)._simple_new(res_values, name=self.name)

2880 return self._view()

2881

2882 # --------------------------------------------------------------------

2883 # Uniqueness Methods

2884

2885 def unique(self: _IndexT, level: Hashable | None = None) -> _IndexT:

2886 """

2887 Return unique values in the index.

2888

2889 Unique values are returned in order of appearance, this does NOT sort.

2890

2891 Parameters

2892 ----------

2893 level : int or hashable, optional

2894 Only return values from specified level (for MultiIndex).

2895 If int, gets the level by integer position, else by level name.

2896

2897 Returns

2898 -------

2899 Index

2900

2901 See Also

2902 --------

2903 unique : Numpy array of unique values in that column.

2904 Series.unique : Return unique values of Series object.

2905 """

2906 if level is not None:

2907 self._validate_index_level(level)

2908

2909 if self.is_unique:

2910 return self._view()

2911

2912 result = super().unique()

2913 return self._shallow_copy(result)

2914

2915 def drop_duplicates(self: _IndexT, *, keep: DropKeep = "first") -> _IndexT:

2916 """

2917 Return Index with duplicate values removed.

2918

2919 Parameters

2920 ----------

2921 keep : {'first', 'last', ``False``}, default 'first'

2922 - 'first' : Drop duplicates except for the first occurrence.

2923 - 'last' : Drop duplicates except for the last occurrence.

2924 - ``False`` : Drop all duplicates.

2925

2926 Returns

2927 -------

2928 Index

2929

2930 See Also

2931 --------

2932 Series.drop_duplicates : Equivalent method on Series.

2933 DataFrame.drop_duplicates : Equivalent method on DataFrame.

2934 Index.duplicated : Related method on Index, indicating duplicate

2935 Index values.

2936

2937 Examples

2938 --------

2939 Generate an pandas.Index with duplicate values.

2940

2941 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])

2942

2943 The `keep` parameter controls which duplicate values are removed.

2944 The value 'first' keeps the first occurrence for each

2945 set of duplicated entries. The default value of keep is 'first'.

2946

2947 >>> idx.drop_duplicates(keep='first')

2948 Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object')

2949

2950 The value 'last' keeps the last occurrence for each set of duplicated

2951 entries.

2952

2953 >>> idx.drop_duplicates(keep='last')

2954 Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object')

2955

2956 The value ``False`` discards all sets of duplicated entries.

2957

2958 >>> idx.drop_duplicates(keep=False)

2959 Index(['cow', 'beetle', 'hippo'], dtype='object')

2960 """

2961 if self.is_unique:

2962 return self._view()

2963

2964 return super().drop_duplicates(keep=keep)

2965

2966 def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:

2967 """

2968 Indicate duplicate index values.

2969

2970 Duplicated values are indicated as ``True`` values in the resulting

2971 array. Either all duplicates, all except the first, or all except the

2972 last occurrence of duplicates can be indicated.

2973

2974 Parameters

2975 ----------

2976 keep : {'first', 'last', False}, default 'first'

2977 The value or values in a set of duplicates to mark as missing.

2978

2979 - 'first' : Mark duplicates as ``True`` except for the first

2980 occurrence.

2981 - 'last' : Mark duplicates as ``True`` except for the last

2982 occurrence.

2983 - ``False`` : Mark all duplicates as ``True``.

2984

2985 Returns

2986 -------

2987 np.ndarray[bool]

2988

2989 See Also

2990 --------

2991 Series.duplicated : Equivalent method on pandas.Series.

2992 DataFrame.duplicated : Equivalent method on pandas.DataFrame.

2993 Index.drop_duplicates : Remove duplicate values from Index.

2994

2995 Examples

2996 --------

2997 By default, for each set of duplicated values, the first occurrence is

2998 set to False and all others to True:

2999

3000 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama'])

3001 >>> idx.duplicated()

3002 array([False, False, True, False, True])

3003

3004 which is equivalent to

3005

3006 >>> idx.duplicated(keep='first')

3007 array([False, False, True, False, True])

3008

3009 By using 'last', the last occurrence of each set of duplicated values

3010 is set on False and all others on True:

3011

3012 >>> idx.duplicated(keep='last')

3013 array([ True, False, True, False, False])

3014

3015 By setting keep on ``False``, all duplicates are True:

3016

3017 >>> idx.duplicated(keep=False)

3018 array([ True, False, True, False, True])

3019 """

3020 if self.is_unique:

3021 # fastpath available bc we are immutable

3022 return np.zeros(len(self), dtype=bool)

3023 return self._duplicated(keep=keep)

3024

3025 # --------------------------------------------------------------------

3026 # Arithmetic & Logical Methods

3027

3028 def __iadd__(self, other):

3029 # alias for __add__

3030 return self + other

3031

3032 @final

3033 def __nonzero__(self) -> NoReturn:

3034 raise ValueError(

3035 f"The truth value of a {type(self).__name__} is ambiguous. "

3036 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."

3037 )

3038

3039 __bool__ = __nonzero__

3040

3041 # --------------------------------------------------------------------

3042 # Set Operation Methods

3043

3044 def _get_reconciled_name_object(self, other):

3045 """

3046 If the result of a set operation will be self,

3047 return self, unless the name changes, in which

3048 case make a shallow copy of self.

3049 """

3050 name = get_op_result_name(self, other)

3051 if self.name is not name:

3052 return self.rename(name)

3053 return self

3054

3055 @final

3056 def _validate_sort_keyword(self, sort):

3057 if sort not in [None, False, True]:

3058 raise ValueError(

3059 "The 'sort' keyword only takes the values of "

3060 f"None, True, or False; {sort} was passed."

3061 )

3062

3063 @final

3064 def _dti_setop_align_tzs(self, other: Index, setop: str_t) -> tuple[Index, Index]:

3065 """

3066 With mismatched timezones, cast both to UTC.

3067 """

3068 # Caller is responsibelf or checking

3069 # `not is_dtype_equal(self.dtype, other.dtype)`

3070 if (

3071 isinstance(self, ABCDatetimeIndex)

3072 and isinstance(other, ABCDatetimeIndex)

3073 and self.tz is not None

3074 and other.tz is not None

3075 ):

3076 # GH#39328, GH#45357

3077 left = self.tz_convert("UTC")

3078 right = other.tz_convert("UTC")

3079 return left, right

3080 return self, other

3081

3082 @final

3083 def union(self, other, sort=None):

3084 """

3085 Form the union of two Index objects.

3086

3087 If the Index objects are incompatible, both Index objects will be

3088 cast to dtype('object') first.

3089

3090 Parameters

3091 ----------

3092 other : Index or array-like

3093 sort : bool or None, default None

3094 Whether to sort the resulting Index.

3095

3096 * None : Sort the result, except when

3097

3098 1. `self` and `other` are equal.

3099 2. `self` or `other` has length 0.

3100 3. Some values in `self` or `other` cannot be compared.

3101 A RuntimeWarning is issued in this case.

3102

3103 * False : do not sort the result.

3104 * True : Sort the result (which may raise TypeError).

3105

3106 Returns

3107 -------

3108 Index

3109

3110 Examples

3111 --------

3112 Union matching dtypes

3113

3114 >>> idx1 = pd.Index([1, 2, 3, 4])

3115 >>> idx2 = pd.Index([3, 4, 5, 6])

3116 >>> idx1.union(idx2)

3117 Index([1, 2, 3, 4, 5, 6], dtype='int64')

3118

3119 Union mismatched dtypes

3120

3121 >>> idx1 = pd.Index(['a', 'b', 'c', 'd'])

3122 >>> idx2 = pd.Index([1, 2, 3, 4])

3123 >>> idx1.union(idx2)

3124 Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object')

3125

3126 MultiIndex case

3127

3128 >>> idx1 = pd.MultiIndex.from_arrays(

3129 ... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]

3130 ... )

3131 >>> idx1

3132 MultiIndex([(1, 'Red'),

3133 (1, 'Blue'),

3134 (2, 'Red'),

3135 (2, 'Blue')],

3136 )

3137 >>> idx2 = pd.MultiIndex.from_arrays(

3138 ... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]]

3139 ... )

3140 >>> idx2

3141 MultiIndex([(3, 'Red'),

3142 (3, 'Green'),

3143 (2, 'Red'),

3144 (2, 'Green')],

3145 )

3146 >>> idx1.union(idx2)

3147 MultiIndex([(1, 'Blue'),

3148 (1, 'Red'),

3149 (2, 'Blue'),

3150 (2, 'Green'),

3151 (2, 'Red'),

3152 (3, 'Green'),

3153 (3, 'Red')],

3154 )

3155 >>> idx1.union(idx2, sort=False)

3156 MultiIndex([(1, 'Red'),

3157 (1, 'Blue'),

3158 (2, 'Red'),

3159 (2, 'Blue'),

3160 (3, 'Red'),

3161 (3, 'Green'),

3162 (2, 'Green')],

3163 )

3164 """

3165 self._validate_sort_keyword(sort)

3166 self._assert_can_do_setop(other)

3167 other, result_name = self._convert_can_do_setop(other)

3168

3169 if not is_dtype_equal(self.dtype, other.dtype):

3170 if (

3171 isinstance(self, ABCMultiIndex)

3172 and not is_object_dtype(_unpack_nested_dtype(other))

3173 and len(other) > 0

3174 ):

3175 raise NotImplementedError(

3176 "Can only union MultiIndex with MultiIndex or Index of tuples, "

3177 "try mi.to_flat_index().union(other) instead."

3178 )

3179 self, other = self._dti_setop_align_tzs(other, "union")

3180

3181 dtype = self._find_common_type_compat(other)

3182 left = self.astype(dtype, copy=False)

3183 right = other.astype(dtype, copy=False)

3184 return left.union(right, sort=sort)

3185

3186 elif not len(other) or self.equals(other):

3187 # NB: whether this (and the `if not len(self)` check below) come before

3188 # or after the is_dtype_equal check above affects the returned dtype

3189 result = self._get_reconciled_name_object(other)

3190 if sort is True:

3191 return result.sort_values()

3192 return result

3193

3194 elif not len(self):

3195 result = other._get_reconciled_name_object(self)

3196 if sort is True:

3197 return result.sort_values()

3198 return result

3199

3200 result = self._union(other, sort=sort)

3201

3202 return self._wrap_setop_result(other, result)

3203

3204 def _union(self, other: Index, sort: bool | None):

3205 """

3206 Specific union logic should go here. In subclasses, union behavior

3207 should be overwritten here rather than in `self.union`.

3208

3209 Parameters

3210 ----------

3211 other : Index or array-like

3212 sort : False or None, default False

3213 Whether to sort the resulting index.

3214

3215 * True : sort the result

3216 * False : do not sort the result.

3217 * None : sort the result, except when `self` and `other` are equal

3218 or when the values cannot be compared.

3219

3220 Returns

3221 -------

3222 Index

3223 """

3224 lvals = self._values

3225 rvals = other._values

3226

3227 if (

3228 sort in (None, True)

3229 and self.is_monotonic_increasing

3230 and other.is_monotonic_increasing

3231 and not (self.has_duplicates and other.has_duplicates)

3232 and self._can_use_libjoin

3233 ):

3234 # Both are monotonic and at least one is unique, so can use outer join

3235 # (actually don't need either unique, but without this restriction

3236 # test_union_same_value_duplicated_in_both fails)

3237 try:

3238 return self._outer_indexer(other)[0]

3239 except (TypeError, IncompatibleFrequency):

3240 # incomparable objects; should only be for object dtype

3241 value_list = list(lvals)

3242

3243 # worth making this faster? a very unusual case

3244 value_set = set(lvals)

3245 value_list.extend([x for x in rvals if x not in value_set])

3246 # If objects are unorderable, we must have object dtype.

3247 return np.array(value_list, dtype=object)

3248

3249 elif not other.is_unique:

3250 # other has duplicates

3251 result_dups = algos.union_with_duplicates(self, other)

3252 return _maybe_try_sort(result_dups, sort)

3253

3254 # The rest of this method is analogous to Index._intersection_via_get_indexer

3255

3256 # Self may have duplicates; other already checked as unique

3257 # find indexes of things in "other" that are not in "self"

3258 if self._index_as_unique:

3259 indexer = self.get_indexer(other)

3260 missing = (indexer == -1).nonzero()[0]

3261 else:

3262 missing = algos.unique1d(self.get_indexer_non_unique(other)[1])

3263

3264 result: Index | MultiIndex | ArrayLike

3265 if self._is_multi:

3266 # Preserve MultiIndex to avoid losing dtypes

3267 result = self.append(other.take(missing))

3268

3269 else:

3270 if len(missing) > 0:

3271 other_diff = rvals.take(missing)

3272 result = concat_compat((lvals, other_diff))

3273 else:

3274 result = lvals

3275

3276 if not self.is_monotonic_increasing or not other.is_monotonic_increasing:

3277 # if both are monotonic then result should already be sorted

3278 result = _maybe_try_sort(result, sort)

3279

3280 return result

3281

3282 @final

3283 def _wrap_setop_result(self, other: Index, result) -> Index:

3284 name = get_op_result_name(self, other)

3285 if isinstance(result, Index):

3286 if result.name != name:

3287 result = result.rename(name)

3288 else:

3289 result = self._shallow_copy(result, name=name)

3290 return result

3291

3292 @final

3293 def intersection(self, other, sort: bool = False):

3294 """

3295 Form the intersection of two Index objects.

3296

3297 This returns a new Index with elements common to the index and `other`.

3298

3299 Parameters

3300 ----------

3301 other : Index or array-like

3302 sort : True, False or None, default False

3303 Whether to sort the resulting index.

3304

3305 * None : sort the result, except when `self` and `other` are equal

3306 or when the values cannot be compared.

3307 * False : do not sort the result.

3308 * True : Sort the result (which may raise TypeError).

3309

3310 Returns

3311 -------

3312 Index

3313

3314 Examples

3315 --------

3316 >>> idx1 = pd.Index([1, 2, 3, 4])

3317 >>> idx2 = pd.Index([3, 4, 5, 6])

3318 >>> idx1.intersection(idx2)

3319 Index([3, 4], dtype='int64')

3320 """

3321 self._validate_sort_keyword(sort)

3322 self._assert_can_do_setop(other)

3323 other, result_name = self._convert_can_do_setop(other)

3324

3325 if not is_dtype_equal(self.dtype, other.dtype):

3326 self, other = self._dti_setop_align_tzs(other, "intersection")

3327

3328 if self.equals(other):

3329 if self.has_duplicates:

3330 result = self.unique()._get_reconciled_name_object(other)

3331 else:

3332 result = self._get_reconciled_name_object(other)

3333 if sort is True:

3334 result = result.sort_values()

3335 return result

3336

3337 if len(self) == 0 or len(other) == 0:

3338 # fastpath; we need to be careful about having commutativity

3339

3340 if self._is_multi or other._is_multi:

3341 # _convert_can_do_setop ensures that we have both or neither

3342 # We retain self.levels

3343 return self[:0].rename(result_name)

3344

3345 dtype = self._find_common_type_compat(other)

3346 if is_dtype_equal(self.dtype, dtype):

3347 # Slicing allows us to retain DTI/TDI.freq, RangeIndex

3348

3349 # Note: self[:0] vs other[:0] affects

3350 # 1) which index's `freq` we get in DTI/TDI cases

3351 # This may be a historical artifact, i.e. no documented

3352 # reason for this choice.

3353 # 2) The `step` we get in RangeIndex cases

3354 if len(self) == 0:

3355 return self[:0].rename(result_name)

3356 else:

3357 return other[:0].rename(result_name)

3358

3359 return Index([], dtype=dtype, name=result_name)

3360

3361 elif not self._should_compare(other):

3362 # We can infer that the intersection is empty.

3363 if isinstance(self, ABCMultiIndex):

3364 return self[:0].rename(result_name)

3365 return Index([], name=result_name)

3366

3367 elif not is_dtype_equal(self.dtype, other.dtype):

3368 dtype = self._find_common_type_compat(other)

3369 this = self.astype(dtype, copy=False)

3370 other = other.astype(dtype, copy=False)

3371 return this.intersection(other, sort=sort)

3372

3373 result = self._intersection(other, sort=sort)

3374 return self._wrap_intersection_result(other, result)

3375

3376 def _intersection(self, other: Index, sort: bool = False):

3377 """

3378 intersection specialized to the case with matching dtypes.

3379 """

3380 if (

3381 self.is_monotonic_increasing

3382 and other.is_monotonic_increasing

3383 and self._can_use_libjoin

3384 and not isinstance(self, ABCMultiIndex)

3385 ):

3386 try:

3387 res_indexer, indexer, _ = self._inner_indexer(other)

3388 except TypeError:

3389 # non-comparable; should only be for object dtype

3390 pass

3391 else:

3392 # TODO: algos.unique1d should preserve DTA/TDA

3393 if is_numeric_dtype(self):

3394 # This is faster, because Index.unique() checks for uniqueness

3395 # before calculating the unique values.

3396 res = algos.unique1d(res_indexer)

3397 else:

3398 result = self.take(indexer)

3399 res = result.drop_duplicates()

3400 return ensure_wrapped_if_datetimelike(res)

3401

3402 res_values = self._intersection_via_get_indexer(other, sort=sort)

3403 res_values = _maybe_try_sort(res_values, sort)

3404 return res_values

3405

3406 def _wrap_intersection_result(self, other, result):

3407 # We will override for MultiIndex to handle empty results

3408 return self._wrap_setop_result(other, result)

3409

3410 @final

3411 def _intersection_via_get_indexer(

3412 self, other: Index | MultiIndex, sort

3413 ) -> ArrayLike | MultiIndex:

3414 """

3415 Find the intersection of two Indexes using get_indexer.

3416

3417 Returns

3418 -------

3419 np.ndarray or ExtensionArray

3420 The returned array will be unique.

3421 """

3422 left_unique = self.unique()

3423 right_unique = other.unique()

3424

3425 # even though we are unique, we need get_indexer_for for IntervalIndex

3426 indexer = left_unique.get_indexer_for(right_unique)

3427

3428 mask = indexer != -1

3429

3430 taker = indexer.take(mask.nonzero()[0])

3431 if sort is False:

3432 # sort bc we want the elements in the same order they are in self

3433 # unnecessary in the case with sort=None bc we will sort later

3434 taker = np.sort(taker)

3435

3436 if isinstance(left_unique, ABCMultiIndex):

3437 result = left_unique.take(taker)

3438 else:

3439 result = left_unique.take(taker)._values

3440 return result

3441

3442 @final

3443 def difference(self, other, sort=None):

3444 """

3445 Return a new Index with elements of index not in `other`.

3446

3447 This is the set difference of two Index objects.

3448

3449 Parameters

3450 ----------

3451 other : Index or array-like

3452 sort : bool or None, default None

3453 Whether to sort the resulting index. By default, the

3454 values are attempted to be sorted, but any TypeError from

3455 incomparable elements is caught by pandas.

3456

3457 * None : Attempt to sort the result, but catch any TypeErrors

3458 from comparing incomparable elements.

3459 * False : Do not sort the result.

3460 * True : Sort the result (which may raise TypeError).

3461

3462 Returns

3463 -------

3464 Index

3465

3466 Examples

3467 --------

3468 >>> idx1 = pd.Index([2, 1, 3, 4])

3469 >>> idx2 = pd.Index([3, 4, 5, 6])

3470 >>> idx1.difference(idx2)

3471 Index([1, 2], dtype='int64')

3472 >>> idx1.difference(idx2, sort=False)

3473 Index([2, 1], dtype='int64')

3474 """

3475 self._validate_sort_keyword(sort)

3476 self._assert_can_do_setop(other)

3477 other, result_name = self._convert_can_do_setop(other)

3478

3479 # Note: we do NOT call _dti_setop_align_tzs here, as there

3480 # is no requirement that .difference be commutative, so it does

3481 # not cast to object.

3482

3483 if self.equals(other):

3484 # Note: we do not (yet) sort even if sort=None GH#24959

3485 return self[:0].rename(result_name)

3486

3487 if len(other) == 0:

3488 # Note: we do not (yet) sort even if sort=None GH#24959

3489 result = self.rename(result_name)

3490 if sort is True:

3491 return result.sort_values()

3492 return result

3493

3494 if not self._should_compare(other):

3495 # Nothing matches -> difference is everything

3496 result = self.rename(result_name)

3497 if sort is True:

3498 return result.sort_values()

3499 return result

3500

3501 result = self._difference(other, sort=sort)

3502 return self._wrap_difference_result(other, result)

3503

3504 def _difference(self, other, sort):

3505 # overridden by RangeIndex

3506

3507 this = self.unique()

3508

3509 indexer = this.get_indexer_for(other)

3510 indexer = indexer.take((indexer != -1).nonzero()[0])

3511

3512 label_diff = np.setdiff1d(np.arange(this.size), indexer, assume_unique=True)

3513

3514 the_diff: MultiIndex | ArrayLike

3515 if isinstance(this, ABCMultiIndex):

3516 the_diff = this.take(label_diff)

3517 else:

3518 the_diff = this._values.take(label_diff)

3519 the_diff = _maybe_try_sort(the_diff, sort)

3520

3521 return the_diff

3522

3523 def _wrap_difference_result(self, other, result):

3524 # We will override for MultiIndex to handle empty results

3525 return self._wrap_setop_result(other, result)

3526

3527 def symmetric_difference(self, other, result_name=None, sort=None):

3528 """

3529 Compute the symmetric difference of two Index objects.

3530

3531 Parameters

3532 ----------

3533 other : Index or array-like

3534 result_name : str

3535 sort : bool or None, default None

3536 Whether to sort the resulting index. By default, the

3537 values are attempted to be sorted, but any TypeError from

3538 incomparable elements is caught by pandas.

3539

3540 * None : Attempt to sort the result, but catch any TypeErrors

3541 from comparing incomparable elements.

3542 * False : Do not sort the result.

3543 * True : Sort the result (which may raise TypeError).

3544

3545 Returns

3546 -------

3547 Index

3548

3549 Notes

3550 -----

3551 ``symmetric_difference`` contains elements that appear in either

3552 ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by

3553 ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates

3554 dropped.

3555

3556 Examples

3557 --------

3558 >>> idx1 = pd.Index([1, 2, 3, 4])

3559 >>> idx2 = pd.Index([2, 3, 4, 5])

3560 >>> idx1.symmetric_difference(idx2)

3561 Index([1, 5], dtype='int64')

3562 """

3563 self._validate_sort_keyword(sort)

3564 self._assert_can_do_setop(other)

3565 other, result_name_update = self._convert_can_do_setop(other)

3566 if result_name is None:

3567 result_name = result_name_update

3568

3569 if not is_dtype_equal(self.dtype, other.dtype):

3570 self, other = self._dti_setop_align_tzs(other, "symmetric_difference")

3571

3572 if not self._should_compare(other):

3573 return self.union(other, sort=sort).rename(result_name)

3574

3575 elif not is_dtype_equal(self.dtype, other.dtype):

3576 dtype = self._find_common_type_compat(other)

3577 this = self.astype(dtype, copy=False)

3578 that = other.astype(dtype, copy=False)

3579 return this.symmetric_difference(that, sort=sort).rename(result_name)

3580

3581 this = self.unique()

3582 other = other.unique()

3583 indexer = this.get_indexer_for(other)

3584

3585 # {this} minus {other}

3586 common_indexer = indexer.take((indexer != -1).nonzero()[0])

3587 left_indexer = np.setdiff1d(

3588 np.arange(this.size), common_indexer, assume_unique=True

3589 )

3590 left_diff = this.take(left_indexer)

3591

3592 # {other} minus {this}

3593 right_indexer = (indexer == -1).nonzero()[0]

3594 right_diff = other.take(right_indexer)

3595

3596 res_values = left_diff.append(right_diff)

3597 result = _maybe_try_sort(res_values, sort)

3598

3599 if not self._is_multi:

3600 return Index(result, name=result_name, dtype=res_values.dtype)

3601 else:

3602 left_diff = cast("MultiIndex", left_diff)

3603 if len(result) == 0:

3604 # result might be an Index, if other was an Index

3605 return left_diff.remove_unused_levels().set_names(result_name)

3606 return result.set_names(result_name)

3607

3608 @final

3609 def _assert_can_do_setop(self, other) -> bool:

3610 if not is_list_like(other):

3611 raise TypeError("Input must be Index or array-like")

3612 return True

3613

3614 def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]:

3615 if not isinstance(other, Index):

3616 other = Index(other, name=self.name)

3617 result_name = self.name

3618 else:

3619 result_name = get_op_result_name(self, other)

3620 return other, result_name

3621

3622 # --------------------------------------------------------------------

3623 # Indexing Methods

3624

3625 def get_loc(self, key):

3626 """

3627 Get integer location, slice or boolean mask for requested label.

3628

3629 Parameters

3630 ----------

3631 key : label

3632

3633 Returns

3634 -------

3635 int if unique index, slice if monotonic index, else mask

3636

3637 Examples

3638 --------

3639 >>> unique_index = pd.Index(list('abc'))

3640 >>> unique_index.get_loc('b')

3641 1

3642

3643 >>> monotonic_index = pd.Index(list('abbc'))

3644 >>> monotonic_index.get_loc('b')

3645 slice(1, 3, None)

3646

3647 >>> non_monotonic_index = pd.Index(list('abcb'))

3648 >>> non_monotonic_index.get_loc('b')

3649 array([False, True, False, True])

3650 """

3651 casted_key = self._maybe_cast_indexer(key)

3652 try:

3653 return self._engine.get_loc(casted_key)

3654 except KeyError as err:

3655 raise KeyError(key) from err

3656 except TypeError:

3657 # If we have a listlike key, _check_indexing_error will raise

3658 # InvalidIndexError. Otherwise we fall through and re-raise

3659 # the TypeError.

3660 self._check_indexing_error(key)

3661 raise

3662

3663 _index_shared_docs[

3664 "get_indexer"

3665 ] = """

3666 Compute indexer and mask for new index given the current index.

3667

3668 The indexer should be then used as an input to ndarray.take to align the

3669 current data to the new index.

3670

3671 Parameters

3672 ----------

3673 target : %(target_klass)s

3674 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional

3675 * default: exact matches only.

3676 * pad / ffill: find the PREVIOUS index value if no exact match.

3677 * backfill / bfill: use NEXT index value if no exact match

3678 * nearest: use the NEAREST index value if no exact match. Tied

3679 distances are broken by preferring the larger index value.

3680 limit : int, optional

3681 Maximum number of consecutive labels in ``target`` to match for

3682 inexact matches.

3683 tolerance : optional

3684 Maximum distance between original and new labels for inexact

3685 matches. The values of the index at the matching locations must

3686 satisfy the equation ``abs(index[indexer] - target) <= tolerance``.

3687

3688 Tolerance may be a scalar value, which applies the same tolerance

3689 to all values, or list-like, which applies variable tolerance per

3690 element. List-like includes list, tuple, array, Series, and must be

3691 the same size as the index and its dtype must exactly match the

3692 index's type.

3693

3694 Returns

3695 -------

3696 np.ndarray[np.intp]

3697 Integers from 0 to n - 1 indicating that the index at these

3698 positions matches the corresponding target values. Missing values

3699 in the target are marked by -1.

3700 %(raises_section)s

3701 Notes

3702 -----

3703 Returns -1 for unmatched values, for further explanation see the

3704 example below.

3705

3706 Examples

3707 --------

3708 >>> index = pd.Index(['c', 'a', 'b'])

3709 >>> index.get_indexer(['a', 'b', 'x'])

3710 array([ 1, 2, -1])

3711

3712 Notice that the return value is an array of locations in ``index``

3713 and ``x`` is marked by -1, as it is not in ``index``.

3714 """

3715

3716 @Appender(_index_shared_docs["get_indexer"] % _index_doc_kwargs)

3717 @final

3718 def get_indexer(

3719 self,

3720 target,

3721 method: str_t | None = None,

3722 limit: int | None = None,

3723 tolerance=None,

3724 ) -> npt.NDArray[np.intp]:

3725 method = clean_reindex_fill_method(method)

3726 orig_target = target

3727 target = self._maybe_cast_listlike_indexer(target)

3728

3729 self._check_indexing_method(method, limit, tolerance)

3730

3731 if not self._index_as_unique:

3732 raise InvalidIndexError(self._requires_unique_msg)

3733

3734 if len(target) == 0:

3735 return np.array([], dtype=np.intp)

3736

3737 if not self._should_compare(target) and not self._should_partial_index(target):

3738 # IntervalIndex get special treatment bc numeric scalars can be

3739 # matched to Interval scalars

3740 return self._get_indexer_non_comparable(target, method=method, unique=True)

3741

3742 if is_categorical_dtype(self.dtype):

3743 # _maybe_cast_listlike_indexer ensures target has our dtype

3744 # (could improve perf by doing _should_compare check earlier?)

3745 assert is_dtype_equal(self.dtype, target.dtype)

3746

3747 indexer = self._engine.get_indexer(target.codes)

3748 if self.hasnans and target.hasnans:

3749 # After _maybe_cast_listlike_indexer, target elements which do not

3750 # belong to some category are changed to NaNs

3751 # Mask to track actual NaN values compared to inserted NaN values

3752 # GH#45361

3753 target_nans = isna(orig_target)

3754 loc = self.get_loc(np.nan)

3755 mask = target.isna()

3756 indexer[target_nans] = loc

3757 indexer[mask & ~target_nans] = -1

3758 return indexer

3759

3760 if is_categorical_dtype(target.dtype):

3761 # potential fastpath

3762 # get an indexer for unique categories then propagate to codes via take_nd

3763 # get_indexer instead of _get_indexer needed for MultiIndex cases

3764 # e.g. test_append_different_columns_types

3765 categories_indexer = self.get_indexer(target.categories)

3766

3767 indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1)

3768

3769 if (not self._is_multi and self.hasnans) and target.hasnans:

3770 # Exclude MultiIndex because hasnans raises NotImplementedError

3771 # we should only get here if we are unique, so loc is an integer

3772 # GH#41934

3773 loc = self.get_loc(np.nan)

3774 mask = target.isna()

3775 indexer[mask] = loc

3776

3777 return ensure_platform_int(indexer)

3778

3779 pself, ptarget = self._maybe_promote(target)

3780 if pself is not self or ptarget is not target:

3781 return pself.get_indexer(

3782 ptarget, method=method, limit=limit, tolerance=tolerance

3783 )

3784

3785 if is_dtype_equal(self.dtype, target.dtype) and self.equals(target):

3786 # Only call equals if we have same dtype to avoid inference/casting

3787 return np.arange(len(target), dtype=np.intp)

3788

3789 if not is_dtype_equal(

3790 self.dtype, target.dtype

3791 ) and not self._should_partial_index(target):

3792 # _should_partial_index e.g. IntervalIndex with numeric scalars

3793 # that can be matched to Interval scalars.

3794 dtype = self._find_common_type_compat(target)

3795

3796 this = self.astype(dtype, copy=False)

3797 target = target.astype(dtype, copy=False)

3798 return this._get_indexer(

3799 target, method=method, limit=limit, tolerance=tolerance

3800 )

3801

3802 return self._get_indexer(target, method, limit, tolerance)

3803

3804 def _get_indexer(

3805 self,

3806 target: Index,

3807 method: str_t | None = None,

3808 limit: int | None = None,

3809 tolerance=None,

3810 ) -> npt.NDArray[np.intp]:

3811 if tolerance is not None:

3812 tolerance = self._convert_tolerance(tolerance, target)

3813

3814 if method in ["pad", "backfill"]:

3815 indexer = self._get_fill_indexer(target, method, limit, tolerance)

3816 elif method == "nearest":

3817 indexer = self._get_nearest_indexer(target, limit, tolerance)

3818 else:

3819 if target._is_multi and self._is_multi:

3820 engine = self._engine

3821 # error: Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]"

3822 # has no attribute "_extract_level_codes"

3823 tgt_values = engine._extract_level_codes( # type: ignore[union-attr]

3824 target

3825 )

3826 else:

3827 tgt_values = target._get_engine_target()

3828

3829 indexer = self._engine.get_indexer(tgt_values)

3830

3831 return ensure_platform_int(indexer)

3832

3833 @final

3834 def _should_partial_index(self, target: Index) -> bool:

3835 """

3836 Should we attempt partial-matching indexing?

3837 """

3838 if is_interval_dtype(self.dtype):

3839 if is_interval_dtype(target.dtype):

3840 return False

3841 # See https://github.com/pandas-dev/pandas/issues/47772 the commented

3842 # out code can be restored (instead of hardcoding `return True`)

3843 # once that issue is fixed

3844 # "Index" has no attribute "left"

3845 # return self.left._should_compare(target) # type: ignore[attr-defined]

3846 return True

3847 return False

3848

3849 @final

3850 def _check_indexing_method(

3851 self,

3852 method: str_t | None,

3853 limit: int | None = None,

3854 tolerance=None,

3855 ) -> None:

3856 """

3857 Raise if we have a get_indexer `method` that is not supported or valid.

3858 """

3859 if method not in [None, "bfill", "backfill", "pad", "ffill", "nearest"]:

3860 # in practice the clean_reindex_fill_method call would raise

3861 # before we get here

3862 raise ValueError("Invalid fill method") # pragma: no cover

3863

3864 if self._is_multi:

3865 if method == "nearest":

3866 raise NotImplementedError(

3867 "method='nearest' not implemented yet "

3868 "for MultiIndex; see GitHub issue 9365"

3869 )

3870 if method in ("pad", "backfill"):

3871 if tolerance is not None:

3872 raise NotImplementedError(

3873 "tolerance not implemented yet for MultiIndex"

3874 )

3875

3876 if is_interval_dtype(self.dtype) or is_categorical_dtype(self.dtype):

3877 # GH#37871 for now this is only for IntervalIndex and CategoricalIndex

3878 if method is not None:

3879 raise NotImplementedError(

3880 f"method {method} not yet implemented for {type(self).__name__}"

3881 )

3882

3883 if method is None:

3884 if tolerance is not None:

3885 raise ValueError(

3886 "tolerance argument only valid if doing pad, "

3887 "backfill or nearest reindexing"

3888 )

3889 if limit is not None:

3890 raise ValueError(

3891 "limit argument only valid if doing pad, "

3892 "backfill or nearest reindexing"

3893 )

3894

3895 def _convert_tolerance(self, tolerance, target: np.ndarray | Index) -> np.ndarray:

3896 # override this method on subclasses

3897 tolerance = np.asarray(tolerance)

3898 if target.size != tolerance.size and tolerance.size > 1:

3899 raise ValueError("list-like tolerance size must match target index size")

3900 elif is_numeric_dtype(self) and not np.issubdtype(tolerance.dtype, np.number):

3901 if tolerance.ndim > 0:

3902 raise ValueError(

3903 f"tolerance argument for {type(self).__name__} with dtype "

3904 f"{self.dtype} must contain numeric elements if it is list type"

3905 )

3906

3907 raise ValueError(

3908 f"tolerance argument for {type(self).__name__} with dtype {self.dtype} "

3909 f"must be numeric if it is a scalar: {repr(tolerance)}"

3910 )

3911 return tolerance

3912

3913 @final

3914 def _get_fill_indexer(

3915 self, target: Index, method: str_t, limit: int | None = None, tolerance=None

3916 ) -> npt.NDArray[np.intp]:

3917 if self._is_multi:

3918 # TODO: get_indexer_with_fill docstring says values must be _sorted_

3919 # but that doesn't appear to be enforced

3920 # error: "IndexEngine" has no attribute "get_indexer_with_fill"

3921 engine = self._engine

3922 with warnings.catch_warnings():

3923 # TODO: We need to fix this. Casting to int64 in cython

3924 warnings.filterwarnings("ignore", category=RuntimeWarning)

3925 return engine.get_indexer_with_fill( # type: ignore[union-attr]

3926 target=target._values,

3927 values=self._values,

3928 method=method,

3929 limit=limit,

3930 )

3931

3932 if self.is_monotonic_increasing and target.is_monotonic_increasing:

3933 target_values = target._get_engine_target()

3934 own_values = self._get_engine_target()

3935 if not isinstance(target_values, np.ndarray) or not isinstance(

3936 own_values, np.ndarray

3937 ):

3938 raise NotImplementedError

3939

3940 if method == "pad":

3941 indexer = libalgos.pad(own_values, target_values, limit=limit)

3942 else:

3943 # i.e. "backfill"

3944 indexer = libalgos.backfill(own_values, target_values, limit=limit)

3945 else:

3946 indexer = self._get_fill_indexer_searchsorted(target, method, limit)

3947 if tolerance is not None and len(self):

3948 indexer = self._filter_indexer_tolerance(target, indexer, tolerance)

3949 return indexer

3950

3951 @final

3952 def _get_fill_indexer_searchsorted(

3953 self, target: Index, method: str_t, limit: int | None = None

3954 ) -> npt.NDArray[np.intp]:

3955 """

3956 Fallback pad/backfill get_indexer that works for monotonic decreasing

3957 indexes and non-monotonic targets.

3958 """

3959 if limit is not None:

3960 raise ValueError(

3961 f"limit argument for {repr(method)} method only well-defined "

3962 "if index and target are monotonic"

3963 )

3964

3965 side: Literal["left", "right"] = "left" if method == "pad" else "right"

3966

3967 # find exact matches first (this simplifies the algorithm)

3968 indexer = self.get_indexer(target)

3969 nonexact = indexer == -1

3970 indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side)

3971 if side == "left":

3972 # searchsorted returns "indices into a sorted array such that,

3973 # if the corresponding elements in v were inserted before the

3974 # indices, the order of a would be preserved".

3975 # Thus, we need to subtract 1 to find values to the left.

3976 indexer[nonexact] -= 1

3977 # This also mapped not found values (values of 0 from

3978 # np.searchsorted) to -1, which conveniently is also our

3979 # sentinel for missing values

3980 else:

3981 # Mark indices to the right of the largest value as not found

3982 indexer[indexer == len(self)] = -1

3983 return indexer

3984

3985 @final

3986 def _get_nearest_indexer(

3987 self, target: Index, limit: int | None, tolerance

3988 ) -> npt.NDArray[np.intp]:

3989 """

3990 Get the indexer for the nearest index labels; requires an index with

3991 values that can be subtracted from each other (e.g., not strings or

3992 tuples).

3993 """

3994 if not len(self):

3995 return self._get_fill_indexer(target, "pad")

3996

3997 left_indexer = self.get_indexer(target, "pad", limit=limit)

3998 right_indexer = self.get_indexer(target, "backfill", limit=limit)

3999

4000 left_distances = self._difference_compat(target, left_indexer)

4001 right_distances = self._difference_compat(target, right_indexer)

4002

4003 op = operator.lt if self.is_monotonic_increasing else operator.le

4004 indexer = np.where(

4005 # error: Argument 1&2 has incompatible type "Union[ExtensionArray,

4006 # ndarray[Any, Any]]"; expected "Union[SupportsDunderLE,

4007 # SupportsDunderGE, SupportsDunderGT, SupportsDunderLT]"

4008 op(left_distances, right_distances) # type: ignore[arg-type]

4009 | (right_indexer == -1),

4010 left_indexer,

4011 right_indexer,

4012 )

4013 if tolerance is not None:

4014 indexer = self._filter_indexer_tolerance(target, indexer, tolerance)

4015 return indexer

4016

4017 @final

4018 def _filter_indexer_tolerance(

4019 self,

4020 target: Index,

4021 indexer: npt.NDArray[np.intp],

4022 tolerance,

4023 ) -> npt.NDArray[np.intp]:

4024 distance = self._difference_compat(target, indexer)

4025

4026 return np.where(distance <= tolerance, indexer, -1)

4027

4028 @final

4029 def _difference_compat(

4030 self, target: Index, indexer: npt.NDArray[np.intp]

4031 ) -> ArrayLike:

4032 # Compatibility for PeriodArray, for which __sub__ returns an ndarray[object]

4033 # of DateOffset objects, which do not support __abs__ (and would be slow

4034 # if they did)

4035

4036 if isinstance(self.dtype, PeriodDtype):

4037 # Note: we only get here with matching dtypes

4038 own_values = cast("PeriodArray", self._data)._ndarray

4039 target_values = cast("PeriodArray", target._data)._ndarray

4040 diff = own_values[indexer] - target_values

4041 else:

4042 # error: Unsupported left operand type for - ("ExtensionArray")

4043 diff = self._values[indexer] - target._values # type: ignore[operator]

4044 return abs(diff)

4045

4046 # --------------------------------------------------------------------

4047 # Indexer Conversion Methods

4048

4049 @final

4050 def _validate_positional_slice(self, key: slice) -> None:

4051 """

4052 For positional indexing, a slice must have either int or None

4053 for each of start, stop, and step.

4054 """

4055 self._validate_indexer("positional", key.start, "iloc")

4056 self._validate_indexer("positional", key.stop, "iloc")

4057 self._validate_indexer("positional", key.step, "iloc")

4058

4059 def _convert_slice_indexer(self, key: slice, kind: str_t):

4060 """

4061 Convert a slice indexer.

4062

4063 By definition, these are labels unless 'iloc' is passed in.

4064 Floats are not allowed as the start, step, or stop of the slice.

4065

4066 Parameters

4067 ----------

4068 key : label of the slice bound

4069 kind : {'loc', 'getitem'}

4070 """

4071 assert kind in ["loc", "getitem"], kind

4072

4073 # potentially cast the bounds to integers

4074 start, stop, step = key.start, key.stop, key.step

4075

4076 # TODO(GH#50617): once Series.__[gs]etitem__ is removed we should be able

4077 # to simplify this.

4078 if isinstance(self.dtype, np.dtype) and is_float_dtype(self.dtype):

4079 # We always treat __getitem__ slicing as label-based

4080 # translate to locations

4081 return self.slice_indexer(start, stop, step)

4082

4083 # figure out if this is a positional indexer

4084 def is_int(v):

4085 return v is None or is_integer(v)

4086

4087 is_index_slice = is_int(start) and is_int(stop) and is_int(step)

4088

4089 # special case for interval_dtype bc we do not do partial-indexing

4090 # on integer Intervals when slicing

4091 # TODO: write this in terms of e.g. should_partial_index?

4092 ints_are_positional = self._should_fallback_to_positional or is_interval_dtype(

4093 self.dtype

4094 )

4095 is_positional = is_index_slice and ints_are_positional

4096

4097 if kind == "getitem":

4098 # called from the getitem slicers, validate that we are in fact integers

4099 if is_integer_dtype(self.dtype) or is_index_slice:

4100 # Note: these checks are redundant if we know is_index_slice

4101 self._validate_indexer("slice", key.start, "getitem")

4102 self._validate_indexer("slice", key.stop, "getitem")

4103 self._validate_indexer("slice", key.step, "getitem")

4104 return key

4105

4106 # convert the slice to an indexer here

4107

4108 # if we are mixed and have integers

4109 if is_positional:

4110 try:

4111 # Validate start & stop

4112 if start is not None:

4113 self.get_loc(start)

4114 if stop is not None:

4115 self.get_loc(stop)

4116 is_positional = False

4117 except KeyError:

4118 pass

4119

4120 if com.is_null_slice(key):

4121 # It doesn't matter if we are positional or label based

4122 indexer = key

4123 elif is_positional:

4124 if kind == "loc":

4125 # GH#16121, GH#24612, GH#31810

4126 raise TypeError(

4127 "Slicing a positional slice with .loc is not allowed, "

4128 "Use .loc with labels or .iloc with positions instead.",

4129 )

4130 indexer = key

4131 else:

4132 indexer = self.slice_indexer(start, stop, step)

4133

4134 return indexer

4135

4136 @final

4137 def _raise_invalid_indexer(

4138 self,

4139 form: str_t,

4140 key,

4141 reraise: lib.NoDefault | None | Exception = lib.no_default,

4142 ) -> None:

4143 """

4144 Raise consistent invalid indexer message.

4145 """

4146 msg = (

4147 f"cannot do {form} indexing on {type(self).__name__} with these "

4148 f"indexers [{key}] of type {type(key).__name__}"

4149 )

4150 if reraise is not lib.no_default:

4151 raise TypeError(msg) from reraise

4152 raise TypeError(msg)

4153

4154 # --------------------------------------------------------------------

4155 # Reindex Methods

4156

4157 @final

4158 def _validate_can_reindex(self, indexer: np.ndarray) -> None:

4159 """

4160 Check if we are allowing reindexing with this particular indexer.

4161

4162 Parameters

4163 ----------

4164 indexer : an integer ndarray

4165

4166 Raises

4167 ------

4168 ValueError if its a duplicate axis

4169 """

4170 # trying to reindex on an axis with duplicates

4171 if not self._index_as_unique and len(indexer):

4172 raise ValueError("cannot reindex on an axis with duplicate labels")

4173

4174 def reindex(

4175 self, target, method=None, level=None, limit=None, tolerance=None

4176 ) -> tuple[Index, npt.NDArray[np.intp] | None]:

4177 """

4178 Create index with target's values.

4179

4180 Parameters

4181 ----------

4182 target : an iterable

4183 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional

4184 * default: exact matches only.

4185 * pad / ffill: find the PREVIOUS index value if no exact match.

4186 * backfill / bfill: use NEXT index value if no exact match

4187 * nearest: use the NEAREST index value if no exact match. Tied

4188 distances are broken by preferring the larger index value.

4189 level : int, optional

4190 Level of multiindex.

4191 limit : int, optional

4192 Maximum number of consecutive labels in ``target`` to match for

4193 inexact matches.

4194 tolerance : int or float, optional

4195 Maximum distance between original and new labels for inexact

4196 matches. The values of the index at the matching locations must

4197 satisfy the equation ``abs(index[indexer] - target) <= tolerance``.

4198

4199 Tolerance may be a scalar value, which applies the same tolerance

4200 to all values, or list-like, which applies variable tolerance per

4201 element. List-like includes list, tuple, array, Series, and must be

4202 the same size as the index and its dtype must exactly match the

4203 index's type.

4204

4205 Returns

4206 -------

4207 new_index : pd.Index

4208 Resulting index.

4209 indexer : np.ndarray[np.intp] or None

4210 Indices of output values in original index.

4211

4212 Raises

4213 ------

4214 TypeError

4215 If ``method`` passed along with ``level``.

4216 ValueError

4217 If non-unique multi-index

4218 ValueError

4219 If non-unique index and ``method`` or ``limit`` passed.

4220

4221 See Also

4222 --------

4223 Series.reindex : Conform Series to new index with optional filling logic.

4224 DataFrame.reindex : Conform DataFrame to new index with optional filling logic.

4225

4226 Examples

4227 --------

4228 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor'])

4229 >>> idx

4230 Index(['car', 'bike', 'train', 'tractor'], dtype='object')

4231 >>> idx.reindex(['car', 'bike'])

4232 (Index(['car', 'bike'], dtype='object'), array([0, 1]))

4233 """

4234 # GH6552: preserve names when reindexing to non-named target

4235 # (i.e. neither Index nor Series).

4236 preserve_names = not hasattr(target, "name")

4237

4238 # GH7774: preserve dtype/tz if target is empty and not an Index.

4239 target = ensure_has_len(target) # target may be an iterator

4240

4241 if not isinstance(target, Index) and len(target) == 0:

4242 if level is not None and self._is_multi:

4243 # "Index" has no attribute "levels"; maybe "nlevels"?

4244 idx = self.levels[level] # type: ignore[attr-defined]

4245 else:

4246 idx = self

4247 target = idx[:0]

4248 else:

4249 target = ensure_index(target)

4250

4251 if level is not None and (

4252 isinstance(self, ABCMultiIndex) or isinstance(target, ABCMultiIndex)

4253 ):

4254 if method is not None:

4255 raise TypeError("Fill method not supported if level passed")

4256

4257 # TODO: tests where passing `keep_order=not self._is_multi`

4258 # makes a difference for non-MultiIndex case

4259 target, indexer, _ = self._join_level(

4260 target, level, how="right", keep_order=not self._is_multi

4261 )

4262

4263 else:

4264 if self.equals(target):

4265 indexer = None

4266 else:

4267 if self._index_as_unique:

4268 indexer = self.get_indexer(

4269 target, method=method, limit=limit, tolerance=tolerance

4270 )

4271 elif self._is_multi:

4272 raise ValueError("cannot handle a non-unique multi-index!")

4273 elif not self.is_unique:

4274 # GH#42568

4275 raise ValueError("cannot reindex on an axis with duplicate labels")

4276 else:

4277 indexer, _ = self.get_indexer_non_unique(target)

4278

4279 target = self._wrap_reindex_result(target, indexer, preserve_names)

4280 return target, indexer

4281

4282 def _wrap_reindex_result(self, target, indexer, preserve_names: bool):

4283 target = self._maybe_preserve_names(target, preserve_names)

4284 return target

4285

4286 def _maybe_preserve_names(self, target: Index, preserve_names: bool):

4287 if preserve_names and target.nlevels == 1 and target.name != self.name:

4288 target = target.copy(deep=False)

4289 target.name = self.name

4290 return target

4291

4292 @final

4293 def _reindex_non_unique(

4294 self, target: Index

4295 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp] | None]:

4296 """

4297 Create a new index with target's values (move/add/delete values as

4298 necessary) use with non-unique Index and a possibly non-unique target.

4299

4300 Parameters

4301 ----------

4302 target : an iterable

4303

4304 Returns

4305 -------

4306 new_index : pd.Index

4307 Resulting index.

4308 indexer : np.ndarray[np.intp]

4309 Indices of output values in original index.

4310 new_indexer : np.ndarray[np.intp] or None

4311

4312 """

4313 target = ensure_index(target)

4314 if len(target) == 0:

4315 # GH#13691

4316 return self[:0], np.array([], dtype=np.intp), None

4317

4318 indexer, missing = self.get_indexer_non_unique(target)

4319 check = indexer != -1

4320 new_labels = self.take(indexer[check])

4321 new_indexer = None

4322

4323 if len(missing):

4324 length = np.arange(len(indexer), dtype=np.intp)

4325

4326 missing = ensure_platform_int(missing)

4327 missing_labels = target.take(missing)

4328 missing_indexer = length[~check]

4329 cur_labels = self.take(indexer[check]).values

4330 cur_indexer = length[check]

4331

4332 # Index constructor below will do inference

4333 new_labels = np.empty((len(indexer),), dtype=object)

4334 new_labels[cur_indexer] = cur_labels

4335 new_labels[missing_indexer] = missing_labels

4336

4337 # GH#38906

4338 if not len(self):

4339 new_indexer = np.arange(0, dtype=np.intp)

4340

4341 # a unique indexer

4342 elif target.is_unique:

4343 # see GH5553, make sure we use the right indexer

4344 new_indexer = np.arange(len(indexer), dtype=np.intp)

4345 new_indexer[cur_indexer] = np.arange(len(cur_labels))

4346 new_indexer[missing_indexer] = -1

4347

4348 # we have a non_unique selector, need to use the original

4349 # indexer here

4350 else:

4351 # need to retake to have the same size as the indexer

4352 indexer[~check] = -1

4353

4354 # reset the new indexer to account for the new size

4355 new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp)

4356 new_indexer[~check] = -1

4357

4358 if not isinstance(self, ABCMultiIndex):

4359 new_index = Index(new_labels, name=self.name)

4360 else:

4361 new_index = type(self).from_tuples(new_labels, names=self.names)

4362 return new_index, indexer, new_indexer

4363

4364 # --------------------------------------------------------------------

4365 # Join Methods

4366

4367 @overload

4368 def join(

4369 self,

4370 other: Index,

4371 *,

4372 how: JoinHow = ...,

4373 level: Level = ...,

4374 return_indexers: Literal[True],

4375 sort: bool = ...,

4376 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4377 ...

4378

4379 @overload

4380 def join(

4381 self,

4382 other: Index,

4383 *,

4384 how: JoinHow = ...,

4385 level: Level = ...,

4386 return_indexers: Literal[False] = ...,

4387 sort: bool = ...,

4388 ) -> Index:

4389 ...

4390

4391 @overload

4392 def join(

4393 self,

4394 other: Index,

4395 *,

4396 how: JoinHow = ...,

4397 level: Level = ...,

4398 return_indexers: bool = ...,

4399 sort: bool = ...,

4400 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4401 ...

4402

4403 @final

4404 @_maybe_return_indexers

4405 def join(

4406 self,

4407 other: Index,

4408 *,

4409 how: JoinHow = "left",

4410 level: Level = None,

4411 return_indexers: bool = False,

4412 sort: bool = False,

4413 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4414 """

4415 Compute join_index and indexers to conform data structures to the new index.

4416

4417 Parameters

4418 ----------

4419 other : Index

4420 how : {'left', 'right', 'inner', 'outer'}

4421 level : int or level name, default None

4422 return_indexers : bool, default False

4423 sort : bool, default False

4424 Sort the join keys lexicographically in the result Index. If False,

4425 the order of the join keys depends on the join type (how keyword).

4426

4427 Returns

4428 -------

4429 join_index, (left_indexer, right_indexer)

4430 """

4431 other = ensure_index(other)

4432

4433 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex):

4434 if (self.tz is None) ^ (other.tz is None):

4435 # Raise instead of casting to object below.

4436 raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")

4437

4438 if not self._is_multi and not other._is_multi:

4439 # We have specific handling for MultiIndex below

4440 pself, pother = self._maybe_promote(other)

4441 if pself is not self or pother is not other:

4442 return pself.join(

4443 pother, how=how, level=level, return_indexers=True, sort=sort

4444 )

4445

4446 lindexer: np.ndarray | None

4447 rindexer: np.ndarray | None

4448

4449 # try to figure out the join level

4450 # GH3662

4451 if level is None and (self._is_multi or other._is_multi):

4452 # have the same levels/names so a simple join

4453 if self.names == other.names:

4454 pass

4455 else:

4456 return self._join_multi(other, how=how)

4457

4458 # join on the level

4459 if level is not None and (self._is_multi or other._is_multi):

4460 return self._join_level(other, level, how=how)

4461

4462 if len(other) == 0:

4463 if how in ("left", "outer"):

4464 join_index = self._view()

4465 rindexer = np.broadcast_to(np.intp(-1), len(join_index))

4466 return join_index, None, rindexer

4467 elif how in ("right", "inner", "cross"):

4468 join_index = other._view()

4469 lindexer = np.array([])

4470 return join_index, lindexer, None

4471

4472 if len(self) == 0:

4473 if how in ("right", "outer"):

4474 join_index = other._view()

4475 lindexer = np.broadcast_to(np.intp(-1), len(join_index))

4476 return join_index, lindexer, None

4477 elif how in ("left", "inner", "cross"):

4478 join_index = self._view()

4479 rindexer = np.array([])

4480 return join_index, None, rindexer

4481

4482 if self._join_precedence < other._join_precedence:

4483 flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"}

4484 how = flip.get(how, how)

4485 join_index, lidx, ridx = other.join(

4486 self, how=how, level=level, return_indexers=True

4487 )

4488 lidx, ridx = ridx, lidx

4489 return join_index, lidx, ridx

4490

4491 if not is_dtype_equal(self.dtype, other.dtype):

4492 dtype = self._find_common_type_compat(other)

4493 this = self.astype(dtype, copy=False)

4494 other = other.astype(dtype, copy=False)

4495 return this.join(other, how=how, return_indexers=True)

4496

4497 _validate_join_method(how)

4498

4499 if not self.is_unique and not other.is_unique:

4500 return self._join_non_unique(other, how=how)

4501 elif not self.is_unique or not other.is_unique:

4502 if self.is_monotonic_increasing and other.is_monotonic_increasing:

4503 if not is_interval_dtype(self.dtype):

4504 # otherwise we will fall through to _join_via_get_indexer

4505 # GH#39133

4506 # go through object dtype for ea till engine is supported properly

4507 return self._join_monotonic(other, how=how)

4508 else:

4509 return self._join_non_unique(other, how=how)

4510 elif (

4511 # GH48504: exclude MultiIndex to avoid going through MultiIndex._values

4512 self.is_monotonic_increasing

4513 and other.is_monotonic_increasing

4514 and self._can_use_libjoin

4515 and not isinstance(self, ABCMultiIndex)

4516 and not is_categorical_dtype(self.dtype)

4517 ):

4518 # Categorical is monotonic if data are ordered as categories, but join can

4519 # not handle this in case of not lexicographically monotonic GH#38502

4520 try:

4521 return self._join_monotonic(other, how=how)

4522 except TypeError:

4523 # object dtype; non-comparable objects

4524 pass

4525

4526 return self._join_via_get_indexer(other, how, sort)

4527

4528 @final

4529 def _join_via_get_indexer(

4530 self, other: Index, how: JoinHow, sort: bool

4531 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4532 # Fallback if we do not have any fastpaths available based on

4533 # uniqueness/monotonicity

4534

4535 # Note: at this point we have checked matching dtypes

4536

4537 if how == "left":

4538 join_index = self

4539 elif how == "right":

4540 join_index = other

4541 elif how == "inner":

4542 # TODO: sort=False here for backwards compat. It may

4543 # be better to use the sort parameter passed into join

4544 join_index = self.intersection(other, sort=False)

4545 elif how == "outer":

4546 # TODO: sort=True here for backwards compat. It may

4547 # be better to use the sort parameter passed into join

4548 join_index = self.union(other)

4549

4550 if sort:

4551 join_index = join_index.sort_values()

4552

4553 if join_index is self:

4554 lindexer = None

4555 else:

4556 lindexer = self.get_indexer_for(join_index)

4557 if join_index is other:

4558 rindexer = None

4559 else:

4560 rindexer = other.get_indexer_for(join_index)

4561 return join_index, lindexer, rindexer

4562

4563 @final

4564 def _join_multi(self, other: Index, how: JoinHow):

4565 from pandas.core.indexes.multi import MultiIndex

4566 from pandas.core.reshape.merge import restore_dropped_levels_multijoin

4567

4568 # figure out join names

4569 self_names_list = list(com.not_none(*self.names))

4570 other_names_list = list(com.not_none(*other.names))

4571 self_names_order = self_names_list.index

4572 other_names_order = other_names_list.index

4573 self_names = set(self_names_list)

4574 other_names = set(other_names_list)

4575 overlap = self_names & other_names

4576

4577 # need at least 1 in common

4578 if not overlap:

4579 raise ValueError("cannot join with no overlapping index names")

4580

4581 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):

4582 # Drop the non-matching levels from left and right respectively

4583 ldrop_names = sorted(self_names - overlap, key=self_names_order)

4584 rdrop_names = sorted(other_names - overlap, key=other_names_order)

4585

4586 # if only the order differs

4587 if not len(ldrop_names + rdrop_names):

4588 self_jnlevels = self

4589 other_jnlevels = other.reorder_levels(self.names)

4590 else:

4591 self_jnlevels = self.droplevel(ldrop_names)

4592 other_jnlevels = other.droplevel(rdrop_names)

4593

4594 # Join left and right

4595 # Join on same leveled multi-index frames is supported

4596 join_idx, lidx, ridx = self_jnlevels.join(

4597 other_jnlevels, how=how, return_indexers=True

4598 )

4599

4600 # Restore the dropped levels

4601 # Returned index level order is

4602 # common levels, ldrop_names, rdrop_names

4603 dropped_names = ldrop_names + rdrop_names

4604

4605 # error: Argument 5/6 to "restore_dropped_levels_multijoin" has

4606 # incompatible type "Optional[ndarray[Any, dtype[signedinteger[Any

4607 # ]]]]"; expected "ndarray[Any, dtype[signedinteger[Any]]]"

4608 levels, codes, names = restore_dropped_levels_multijoin(

4609 self,

4610 other,

4611 dropped_names,

4612 join_idx,

4613 lidx, # type: ignore[arg-type]

4614 ridx, # type: ignore[arg-type]

4615 )

4616

4617 # Re-create the multi-index

4618 multi_join_idx = MultiIndex(

4619 levels=levels, codes=codes, names=names, verify_integrity=False

4620 )

4621

4622 multi_join_idx = multi_join_idx.remove_unused_levels()

4623

4624 return multi_join_idx, lidx, ridx

4625

4626 jl = list(overlap)[0]

4627

4628 # Case where only one index is multi

4629 # make the indices into mi's that match

4630 flip_order = False

4631 if isinstance(self, MultiIndex):

4632 self, other = other, self

4633 flip_order = True

4634 # flip if join method is right or left

4635 flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"}

4636 how = flip.get(how, how)

4637

4638 level = other.names.index(jl)

4639 result = self._join_level(other, level, how=how)

4640

4641 if flip_order:

4642 return result[0], result[2], result[1]

4643 return result

4644

4645 @final

4646 def _join_non_unique(

4647 self, other: Index, how: JoinHow = "left"

4648 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp]]:

4649 from pandas.core.reshape.merge import get_join_indexers

4650

4651 # We only get here if dtypes match

4652 assert self.dtype == other.dtype

4653

4654 left_idx, right_idx = get_join_indexers(

4655 [self._values], [other._values], how=how, sort=True

4656 )

4657 mask = left_idx == -1

4658

4659 join_idx = self.take(left_idx)

4660 right = other.take(right_idx)

4661 join_index = join_idx.putmask(mask, right)

4662 return join_index, left_idx, right_idx

4663

4664 @final

4665 def _join_level(

4666 self, other: Index, level, how: JoinHow = "left", keep_order: bool = True

4667 ) -> tuple[MultiIndex, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4668 """

4669 The join method *only* affects the level of the resulting

4670 MultiIndex. Otherwise it just exactly aligns the Index data to the

4671 labels of the level in the MultiIndex.

4672

4673 If ```keep_order == True```, the order of the data indexed by the

4674 MultiIndex will not be changed; otherwise, it will tie out

4675 with `other`.

4676 """

4677 from pandas.core.indexes.multi import MultiIndex

4678

4679 def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]:

4680 """

4681 Returns sorter for the inner most level while preserving the

4682 order of higher levels.

4683

4684 Parameters

4685 ----------

4686 labels : list[np.ndarray]

4687 Each ndarray has signed integer dtype, not necessarily identical.

4688

4689 Returns

4690 -------

4691 np.ndarray[np.intp]

4692 """

4693 if labels[0].size == 0:

4694 return np.empty(0, dtype=np.intp)

4695

4696 if len(labels) == 1:

4697 return get_group_index_sorter(ensure_platform_int(labels[0]))

4698

4699 # find indexers of beginning of each set of

4700 # same-key labels w.r.t all but last level

4701 tic = labels[0][:-1] != labels[0][1:]

4702 for lab in labels[1:-1]:

4703 tic |= lab[:-1] != lab[1:]

4704

4705 starts = np.hstack(([True], tic, [True])).nonzero()[0]

4706 lab = ensure_int64(labels[-1])

4707 return lib.get_level_sorter(lab, ensure_platform_int(starts))

4708

4709 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):

4710 raise TypeError("Join on level between two MultiIndex objects is ambiguous")

4711

4712 left, right = self, other

4713

4714 flip_order = not isinstance(self, MultiIndex)

4715 if flip_order:

4716 left, right = right, left

4717 flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"}

4718 how = flip.get(how, how)

4719

4720 assert isinstance(left, MultiIndex)

4721

4722 level = left._get_level_number(level)

4723 old_level = left.levels[level]

4724

4725 if not right.is_unique:

4726 raise NotImplementedError(

4727 "Index._join_level on non-unique index is not implemented"

4728 )

4729

4730 new_level, left_lev_indexer, right_lev_indexer = old_level.join(

4731 right, how=how, return_indexers=True

4732 )

4733

4734 if left_lev_indexer is None:

4735 if keep_order or len(left) == 0:

4736 left_indexer = None

4737 join_index = left

4738 else: # sort the leaves

4739 left_indexer = _get_leaf_sorter(left.codes[: level + 1])

4740 join_index = left[left_indexer]

4741

4742 else:

4743 left_lev_indexer = ensure_platform_int(left_lev_indexer)

4744 rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level))

4745 old_codes = left.codes[level]

4746

4747 taker = old_codes[old_codes != -1]

4748 new_lev_codes = rev_indexer.take(taker)

4749

4750 new_codes = list(left.codes)

4751 new_codes[level] = new_lev_codes

4752

4753 new_levels = list(left.levels)

4754 new_levels[level] = new_level

4755

4756 if keep_order: # just drop missing values. o.w. keep order

4757 left_indexer = np.arange(len(left), dtype=np.intp)

4758 left_indexer = cast(np.ndarray, left_indexer)

4759 mask = new_lev_codes != -1

4760 if not mask.all():

4761 new_codes = [lab[mask] for lab in new_codes]

4762 left_indexer = left_indexer[mask]

4763

4764 else: # tie out the order with other

4765 if level == 0: # outer most level, take the fast route

4766 max_new_lev = 0 if len(new_lev_codes) == 0 else new_lev_codes.max()

4767 ngroups = 1 + max_new_lev

4768 left_indexer, counts = libalgos.groupsort_indexer(

4769 new_lev_codes, ngroups

4770 )

4771

4772 # missing values are placed first; drop them!

4773 left_indexer = left_indexer[counts[0] :]

4774 new_codes = [lab[left_indexer] for lab in new_codes]

4775

4776 else: # sort the leaves

4777 mask = new_lev_codes != -1

4778 mask_all = mask.all()

4779 if not mask_all:

4780 new_codes = [lab[mask] for lab in new_codes]

4781

4782 left_indexer = _get_leaf_sorter(new_codes[: level + 1])

4783 new_codes = [lab[left_indexer] for lab in new_codes]

4784

4785 # left_indexers are w.r.t masked frame.

4786 # reverse to original frame!

4787 if not mask_all:

4788 left_indexer = mask.nonzero()[0][left_indexer]

4789

4790 join_index = MultiIndex(

4791 levels=new_levels,

4792 codes=new_codes,

4793 names=left.names,

4794 verify_integrity=False,

4795 )

4796

4797 if right_lev_indexer is not None:

4798 right_indexer = right_lev_indexer.take(join_index.codes[level])

4799 else:

4800 right_indexer = join_index.codes[level]

4801

4802 if flip_order:

4803 left_indexer, right_indexer = right_indexer, left_indexer

4804

4805 left_indexer = (

4806 None if left_indexer is None else ensure_platform_int(left_indexer)

4807 )

4808 right_indexer = (

4809 None if right_indexer is None else ensure_platform_int(right_indexer)

4810 )

4811 return join_index, left_indexer, right_indexer

4812

4813 @final

4814 def _join_monotonic(

4815 self, other: Index, how: JoinHow = "left"

4816 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4817 # We only get here with matching dtypes and both monotonic increasing

4818 assert other.dtype == self.dtype

4819

4820 if self.equals(other):

4821 # This is a convenient place for this check, but its correctness

4822 # does not depend on monotonicity, so it could go earlier

4823 # in the calling method.

4824 ret_index = other if how == "right" else self

4825 return ret_index, None, None

4826

4827 ridx: npt.NDArray[np.intp] | None

4828 lidx: npt.NDArray[np.intp] | None

4829

4830 if self.is_unique and other.is_unique:

4831 # We can perform much better than the general case

4832 if how == "left":

4833 join_index = self

4834 lidx = None

4835 ridx = self._left_indexer_unique(other)

4836 elif how == "right":

4837 join_index = other

4838 lidx = other._left_indexer_unique(self)

4839 ridx = None

4840 elif how == "inner":

4841 join_array, lidx, ridx = self._inner_indexer(other)

4842 join_index = self._wrap_joined_index(join_array, other, lidx, ridx)

4843 elif how == "outer":

4844 join_array, lidx, ridx = self._outer_indexer(other)

4845 join_index = self._wrap_joined_index(join_array, other, lidx, ridx)

4846 else:

4847 if how == "left":

4848 join_array, lidx, ridx = self._left_indexer(other)

4849 elif how == "right":

4850 join_array, ridx, lidx = other._left_indexer(self)

4851 elif how == "inner":

4852 join_array, lidx, ridx = self._inner_indexer(other)

4853 elif how == "outer":

4854 join_array, lidx, ridx = self._outer_indexer(other)

4855

4856 assert lidx is not None

4857 assert ridx is not None

4858

4859 join_index = self._wrap_joined_index(join_array, other, lidx, ridx)

4860

4861 lidx = None if lidx is None else ensure_platform_int(lidx)

4862 ridx = None if ridx is None else ensure_platform_int(ridx)

4863 return join_index, lidx, ridx

4864

4865 def _wrap_joined_index(

4866 self: _IndexT,

4867 joined: ArrayLike,

4868 other: _IndexT,

4869 lidx: npt.NDArray[np.intp],

4870 ridx: npt.NDArray[np.intp],

4871 ) -> _IndexT:

4872 assert other.dtype == self.dtype

4873

4874 if isinstance(self, ABCMultiIndex):

4875 name = self.names if self.names == other.names else None

4876 # error: Incompatible return value type (got "MultiIndex",

4877 # expected "_IndexT")

4878 mask = lidx == -1

4879 join_idx = self.take(lidx)

4880 right = other.take(ridx)

4881 join_index = join_idx.putmask(mask, right)._sort_levels_monotonic()

4882 return join_index.set_names(name) # type: ignore[return-value]

4883 else:

4884 name = get_op_result_name(self, other)

4885 return self._constructor._with_infer(joined, name=name, dtype=self.dtype)

4886

4887 @cache_readonly

4888 def _can_use_libjoin(self) -> bool:

4889 """

4890 Whether we can use the fastpaths implement in _libs.join

4891 """

4892 if type(self) is Index:

4893 # excludes EAs, but include masks, we get here with monotonic

4894 # values only, meaning no NA

4895 return (

4896 isinstance(self.dtype, np.dtype)

4897 or isinstance(self.values, BaseMaskedArray)

4898 or isinstance(self._values, ArrowExtensionArray)

4899 )

4900 return not is_interval_dtype(self.dtype)

4901

4902 # --------------------------------------------------------------------

4903 # Uncategorized Methods

4904

4905 @property

4906 def values(self) -> ArrayLike:

4907 """

4908 Return an array representing the data in the Index.

4909

4910 .. warning::

4911

4912 We recommend using :attr:`Index.array` or

4913 :meth:`Index.to_numpy`, depending on whether you need

4914 a reference to the underlying data or a NumPy array.

4915

4916 Returns

4917 -------

4918 array: numpy.ndarray or ExtensionArray

4919

4920 See Also

4921 --------

4922 Index.array : Reference to the underlying data.

4923 Index.to_numpy : A NumPy array representing the underlying data.

4924 """

4925 return self._data

4926

4927 @cache_readonly

4928 @doc(IndexOpsMixin.array)

4929 def array(self) -> ExtensionArray:

4930 array = self._data

4931 if isinstance(array, np.ndarray):

4932 from pandas.core.arrays.numpy_ import PandasArray

4933

4934 array = PandasArray(array)

4935 return array

4936

4937 @property

4938 def _values(self) -> ExtensionArray | np.ndarray:

4939 """

4940 The best array representation.

4941

4942 This is an ndarray or ExtensionArray.

4943

4944 ``_values`` are consistent between ``Series`` and ``Index``.

4945

4946 It may differ from the public '.values' method.

4947

4948 index | values | _values |

4949 ----------------- | --------------- | ------------- |

4950 Index | ndarray | ndarray |

4951 CategoricalIndex | Categorical | Categorical |

4952 DatetimeIndex | ndarray[M8ns] | DatetimeArray |

4953 DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray |

4954 PeriodIndex | ndarray[object] | PeriodArray |

4955 IntervalIndex | IntervalArray | IntervalArray |

4956

4957 See Also

4958 --------

4959 values : Values

4960 """

4961 return self._data

4962

4963 def _get_engine_target(self) -> ArrayLike:

4964 """

4965 Get the ndarray or ExtensionArray that we can pass to the IndexEngine

4966 constructor.

4967 """

4968 vals = self._values

4969 if isinstance(vals, StringArray):

4970 # GH#45652 much more performant than ExtensionEngine

4971 return vals._ndarray

4972 if (

4973 type(self) is Index

4974 and isinstance(self._values, ExtensionArray)

4975 and not isinstance(self._values, BaseMaskedArray)

4976 and not (

4977 isinstance(self._values, ArrowExtensionArray)

4978 and is_numeric_dtype(self.dtype)

4979 # Exclude decimal

4980 and self.dtype.kind != "O"

4981 )

4982 ):

4983 # TODO(ExtensionIndex): remove special-case, just use self._values

4984 return self._values.astype(object)

4985 return vals

4986

4987 def _get_join_target(self) -> ArrayLike:

4988 """

4989 Get the ndarray or ExtensionArray that we can pass to the join

4990 functions.

4991 """

4992 if isinstance(self._values, BaseMaskedArray):

4993 # This is only used if our array is monotonic, so no NAs present

4994 return self._values._data

4995 elif isinstance(self._values, ArrowExtensionArray):

4996 # This is only used if our array is monotonic, so no missing values

4997 # present

4998 return self._values.to_numpy()

4999 return self._get_engine_target()

5000

5001 def _from_join_target(self, result: np.ndarray) -> ArrayLike:

5002 """

5003 Cast the ndarray returned from one of the libjoin.foo_indexer functions

5004 back to type(self)._data.

5005 """

5006 if isinstance(self.values, BaseMaskedArray):

5007 return type(self.values)(result, np.zeros(result.shape, dtype=np.bool_))

5008 elif isinstance(self.values, ArrowExtensionArray):

5009 return type(self.values)._from_sequence(result)

5010 return result

5011

5012 @doc(IndexOpsMixin._memory_usage)

5013 def memory_usage(self, deep: bool = False) -> int:

5014 result = self._memory_usage(deep=deep)

5015

5016 # include our engine hashtable

5017 result += self._engine.sizeof(deep=deep)

5018 return result

5019

5020 @final

5021 def where(self, cond, other=None) -> Index:

5022 """

5023 Replace values where the condition is False.

5024

5025 The replacement is taken from other.

5026

5027 Parameters

5028 ----------

5029 cond : bool array-like with the same length as self

5030 Condition to select the values on.

5031 other : scalar, or array-like, default None

5032 Replacement if the condition is False.

5033

5034 Returns

5035 -------

5036 pandas.Index

5037 A copy of self with values replaced from other

5038 where the condition is False.

5039

5040 See Also

5041 --------

5042 Series.where : Same method for Series.

5043 DataFrame.where : Same method for DataFrame.

5044

5045 Examples

5046 --------

5047 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor'])

5048 >>> idx

5049 Index(['car', 'bike', 'train', 'tractor'], dtype='object')

5050 >>> idx.where(idx.isin(['car', 'train']), 'other')

5051 Index(['car', 'other', 'train', 'other'], dtype='object')

5052 """

5053 if isinstance(self, ABCMultiIndex):

5054 raise NotImplementedError(

5055 ".where is not supported for MultiIndex operations"

5056 )

5057 cond = np.asarray(cond, dtype=bool)

5058 return self.putmask(~cond, other)

5059

5060 # construction helpers

5061 @final

5062 @classmethod

5063 def _raise_scalar_data_error(cls, data):

5064 # We return the TypeError so that we can raise it from the constructor

5065 # in order to keep mypy happy

5066 raise TypeError(

5067 f"{cls.__name__}(...) must be called with a collection of some "

5068 f"kind, {repr(data)} was passed"

5069 )

5070

5071 def _validate_fill_value(self, value):

5072 """

5073 Check if the value can be inserted into our array without casting,

5074 and convert it to an appropriate native type if necessary.

5075

5076 Raises

5077 ------

5078 TypeError

5079 If the value cannot be inserted into an array of this dtype.

5080 """

5081 dtype = self.dtype

5082 if isinstance(dtype, np.dtype) and dtype.kind not in ["m", "M"]:

5083 # return np_can_hold_element(dtype, value)

5084 try:

5085 return np_can_hold_element(dtype, value)

5086 except LossySetitemError as err:

5087 # re-raise as TypeError for consistency

5088 raise TypeError from err

5089 elif not can_hold_element(self._values, value):

5090 raise TypeError

5091 return value

5092

5093 @final

5094 def _require_scalar(self, value):

5095 """

5096 Check that this is a scalar value that we can use for setitem-like

5097 operations without changing dtype.

5098 """

5099 if not is_scalar(value):

5100 raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}")

5101 return value

5102

5103 def _is_memory_usage_qualified(self) -> bool:

5104 """

5105 Return a boolean if we need a qualified .info display.

5106 """

5107 return is_object_dtype(self.dtype)

5108

5109 def __contains__(self, key: Any) -> bool:

5110 """

5111 Return a boolean indicating whether the provided key is in the index.

5112

5113 Parameters

5114 ----------

5115 key : label

5116 The key to check if it is present in the index.

5117

5118 Returns

5119 -------

5120 bool

5121 Whether the key search is in the index.

5122

5123 Raises

5124 ------

5125 TypeError

5126 If the key is not hashable.

5127

5128 See Also

5129 --------

5130 Index.isin : Returns an ndarray of boolean dtype indicating whether the

5131 list-like key is in the index.

5132

5133 Examples

5134 --------

5135 >>> idx = pd.Index([1, 2, 3, 4])

5136 >>> idx

5137 Index([1, 2, 3, 4], dtype='int64')

5138

5139 >>> 2 in idx

5140 True

5141 >>> 6 in idx

5142 False

5143 """

5144 hash(key)

5145 try:

5146 return key in self._engine

5147 except (OverflowError, TypeError, ValueError):

5148 return False

5149

5150 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318

5151 # Incompatible types in assignment (expression has type "None", base class

5152 # "object" defined the type as "Callable[[object], int]")

5153 __hash__: ClassVar[None] # type: ignore[assignment]

5154

5155 @final

5156 def __setitem__(self, key, value):

5157 raise TypeError("Index does not support mutable operations")

5158

5159 def __getitem__(self, key):

5160 """

5161 Override numpy.ndarray's __getitem__ method to work as desired.

5162

5163 This function adds lists and Series as valid boolean indexers

5164 (ndarrays only supports ndarray with dtype=bool).

5165

5166 If resulting ndim != 1, plain ndarray is returned instead of

5167 corresponding `Index` subclass.

5168

5169 """

5170 getitem = self._data.__getitem__

5171

5172 if is_integer(key) or is_float(key):

5173 # GH#44051 exclude bool, which would return a 2d ndarray

5174 key = com.cast_scalar_indexer(key)

5175 return getitem(key)

5176

5177 if isinstance(key, slice):

5178 # This case is separated from the conditional above to avoid

5179 # pessimization com.is_bool_indexer and ndim checks.

5180 result = getitem(key)

5181 # Going through simple_new for performance.

5182 return type(self)._simple_new(

5183 result, name=self._name, refs=self._references

5184 )

5185

5186 if com.is_bool_indexer(key):

5187 # if we have list[bools, length=1e5] then doing this check+convert

5188 # takes 166 µs + 2.1 ms and cuts the ndarray.__getitem__

5189 # time below from 3.8 ms to 496 µs

5190 # if we already have ndarray[bool], the overhead is 1.4 µs or .25%

5191 if is_extension_array_dtype(getattr(key, "dtype", None)):

5192 key = key.to_numpy(dtype=bool, na_value=False)

5193 else:

5194 key = np.asarray(key, dtype=bool)

5195

5196 result = getitem(key)

5197 # Because we ruled out integer above, we always get an arraylike here

5198 if result.ndim > 1:

5199 disallow_ndim_indexing(result)

5200

5201 # NB: Using _constructor._simple_new would break if MultiIndex

5202 # didn't override __getitem__

5203 return self._constructor._simple_new(result, name=self._name)

5204

5205 def _getitem_slice(self: _IndexT, slobj: slice) -> _IndexT:

5206 """

5207 Fastpath for __getitem__ when we know we have a slice.

5208 """

5209 res = self._data[slobj]

5210 return type(self)._simple_new(res, name=self._name, refs=self._references)

5211

5212 @final

5213 def _can_hold_identifiers_and_holds_name(self, name) -> bool:

5214 """

5215 Faster check for ``name in self`` when we know `name` is a Python

5216 identifier (e.g. in NDFrame.__getattr__, which hits this to support

5217 . key lookup). For indexes that can't hold identifiers (everything

5218 but object & categorical) we just return False.

5219

5220 https://github.com/pandas-dev/pandas/issues/19764

5221 """

5222 if (

5223 is_object_dtype(self.dtype)

5224 or is_string_dtype(self.dtype)

5225 or is_categorical_dtype(self.dtype)

5226 ):

5227 return name in self

5228 return False

5229

5230 def append(self, other: Index | Sequence[Index]) -> Index:

5231 """

5232 Append a collection of Index options together.

5233

5234 Parameters

5235 ----------

5236 other : Index or list/tuple of indices

5237

5238 Returns

5239 -------

5240 Index

5241 """

5242 to_concat = [self]

5243

5244 if isinstance(other, (list, tuple)):

5245 to_concat += list(other)

5246 else:

5247 # error: Argument 1 to "append" of "list" has incompatible type

5248 # "Union[Index, Sequence[Index]]"; expected "Index"

5249 to_concat.append(other) # type: ignore[arg-type]

5250

5251 for obj in to_concat:

5252 if not isinstance(obj, Index):

5253 raise TypeError("all inputs must be Index")

5254

5255 names = {obj.name for obj in to_concat}

5256 name = None if len(names) > 1 else self.name

5257

5258 return self._concat(to_concat, name)

5259

5260 def _concat(self, to_concat: list[Index], name: Hashable) -> Index:

5261 """

5262 Concatenate multiple Index objects.

5263 """

5264 to_concat_vals = [x._values for x in to_concat]

5265

5266 result = concat_compat(to_concat_vals)

5267

5268 return Index._with_infer(result, name=name)

5269

5270 def putmask(self, mask, value) -> Index:

5271 """

5272 Return a new Index of the values set with the mask.

5273

5274 Returns

5275 -------

5276 Index

5277

5278 See Also

5279 --------

5280 numpy.ndarray.putmask : Changes elements of an array

5281 based on conditional and input values.

5282 """

5283 mask, noop = validate_putmask(self._values, mask)

5284 if noop:

5285 return self.copy()

5286

5287 if self.dtype != object and is_valid_na_for_dtype(value, self.dtype):

5288 # e.g. None -> np.nan, see also Block._standardize_fill_value

5289 value = self._na_value

5290

5291 try:

5292 converted = self._validate_fill_value(value)

5293 except (LossySetitemError, ValueError, TypeError) as err:

5294 if is_object_dtype(self): # pragma: no cover

5295 raise err

5296

5297 # See also: Block.coerce_to_target_dtype

5298 dtype = self._find_common_type_compat(value)

5299 return self.astype(dtype).putmask(mask, value)

5300

5301 values = self._values.copy()

5302

5303 if isinstance(values, np.ndarray):

5304 converted = setitem_datetimelike_compat(values, mask.sum(), converted)

5305 np.putmask(values, mask, converted)

5306

5307 else:

5308 # Note: we use the original value here, not converted, as

5309 # _validate_fill_value is not idempotent

5310 values._putmask(mask, value)

5311

5312 return self._shallow_copy(values)

5313

5314 def equals(self, other: Any) -> bool:

5315 """

5316 Determine if two Index object are equal.

5317

5318 The things that are being compared are:

5319

5320 * The elements inside the Index object.

5321 * The order of the elements inside the Index object.

5322

5323 Parameters

5324 ----------

5325 other : Any

5326 The other object to compare against.

5327

5328 Returns

5329 -------

5330 bool

5331 True if "other" is an Index and it has the same elements and order

5332 as the calling index; False otherwise.

5333

5334 Examples

5335 --------

5336 >>> idx1 = pd.Index([1, 2, 3])

5337 >>> idx1

5338 Index([1, 2, 3], dtype='int64')

5339 >>> idx1.equals(pd.Index([1, 2, 3]))

5340 True

5341

5342 The elements inside are compared

5343

5344 >>> idx2 = pd.Index(["1", "2", "3"])

5345 >>> idx2

5346 Index(['1', '2', '3'], dtype='object')

5347

5348 >>> idx1.equals(idx2)

5349 False

5350

5351 The order is compared

5352

5353 >>> ascending_idx = pd.Index([1, 2, 3])

5354 >>> ascending_idx

5355 Index([1, 2, 3], dtype='int64')

5356 >>> descending_idx = pd.Index([3, 2, 1])

5357 >>> descending_idx

5358 Index([3, 2, 1], dtype='int64')

5359 >>> ascending_idx.equals(descending_idx)

5360 False

5361

5362 The dtype is *not* compared

5363

5364 >>> int64_idx = pd.Index([1, 2, 3], dtype='int64')

5365 >>> int64_idx

5366 Index([1, 2, 3], dtype='int64')

5367 >>> uint64_idx = pd.Index([1, 2, 3], dtype='uint64')

5368 >>> uint64_idx

5369 Index([1, 2, 3], dtype='uint64')

5370 >>> int64_idx.equals(uint64_idx)

5371 True

5372 """

5373 if self.is_(other):

5374 return True

5375

5376 if not isinstance(other, Index):

5377 return False

5378

5379 if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype):

5380 # if other is not object, use other's logic for coercion

5381 return other.equals(self)

5382

5383 if isinstance(other, ABCMultiIndex):

5384 # d-level MultiIndex can equal d-tuple Index

5385 return other.equals(self)

5386

5387 if isinstance(self._values, ExtensionArray):

5388 # Dispatch to the ExtensionArray's .equals method.

5389 if not isinstance(other, type(self)):

5390 return False

5391

5392 earr = cast(ExtensionArray, self._data)

5393 return earr.equals(other._data)

5394

5395 if is_extension_array_dtype(other.dtype):

5396 # All EA-backed Index subclasses override equals

5397 return other.equals(self)

5398

5399 return array_equivalent(self._values, other._values)

5400

5401 @final

5402 def identical(self, other) -> bool:

5403 """

5404 Similar to equals, but checks that object attributes and types are also equal.

5405

5406 Returns

5407 -------

5408 bool

5409 If two Index objects have equal elements and same type True,

5410 otherwise False.

5411 """

5412 return (

5413 self.equals(other)

5414 and all(

5415 getattr(self, c, None) == getattr(other, c, None)

5416 for c in self._comparables

5417 )

5418 and type(self) == type(other)

5419 and self.dtype == other.dtype

5420 )

5421

5422 @final

5423 def asof(self, label):

5424 """

5425 Return the label from the index, or, if not present, the previous one.

5426

5427 Assuming that the index is sorted, return the passed index label if it

5428 is in the index, or return the previous index label if the passed one

5429 is not in the index.

5430

5431 Parameters

5432 ----------

5433 label : object

5434 The label up to which the method returns the latest index label.

5435

5436 Returns

5437 -------

5438 object

5439 The passed label if it is in the index. The previous label if the

5440 passed label is not in the sorted index or `NaN` if there is no

5441 such label.

5442

5443 See Also

5444 --------

5445 Series.asof : Return the latest value in a Series up to the

5446 passed index.

5447 merge_asof : Perform an asof merge (similar to left join but it

5448 matches on nearest key rather than equal key).

5449 Index.get_loc : An `asof` is a thin wrapper around `get_loc`

5450 with method='pad'.

5451

5452 Examples

5453 --------

5454 `Index.asof` returns the latest index label up to the passed label.

5455

5456 >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03'])

5457 >>> idx.asof('2014-01-01')

5458 '2013-12-31'

5459

5460 If the label is in the index, the method returns the passed label.

5461

5462 >>> idx.asof('2014-01-02')

5463 '2014-01-02'

5464

5465 If all of the labels in the index are later than the passed label,

5466 NaN is returned.

5467

5468 >>> idx.asof('1999-01-02')

5469 nan

5470

5471 If the index is not sorted, an error is raised.

5472

5473 >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02',

5474 ... '2014-01-03'])

5475 >>> idx_not_sorted.asof('2013-12-31')

5476 Traceback (most recent call last):

5477 ValueError: index must be monotonic increasing or decreasing

5478 """

5479 self._searchsorted_monotonic(label) # validate sortedness

5480 try:

5481 loc = self.get_loc(label)

5482 except (KeyError, TypeError):

5483 # KeyError -> No exact match, try for padded

5484 # TypeError -> passed e.g. non-hashable, fall through to get

5485 # the tested exception message

5486 indexer = self.get_indexer([label], method="pad")

5487 if indexer.ndim > 1 or indexer.size > 1:

5488 raise TypeError("asof requires scalar valued input")

5489 loc = indexer.item()

5490 if loc == -1:

5491 return self._na_value

5492 else:

5493 if isinstance(loc, slice):

5494 loc = loc.indices(len(self))[-1]

5495

5496 return self[loc]

5497

5498 def asof_locs(

5499 self, where: Index, mask: npt.NDArray[np.bool_]

5500 ) -> npt.NDArray[np.intp]:

5501 """

5502 Return the locations (indices) of labels in the index.

5503

5504 As in the `asof` function, if the label (a particular entry in

5505 `where`) is not in the index, the latest index label up to the

5506 passed label is chosen and its index returned.

5507

5508 If all of the labels in the index are later than a label in `where`,

5509 -1 is returned.

5510

5511 `mask` is used to ignore NA values in the index during calculation.

5512

5513 Parameters

5514 ----------

5515 where : Index

5516 An Index consisting of an array of timestamps.

5517 mask : np.ndarray[bool]

5518 Array of booleans denoting where values in the original

5519 data are not NA.

5520

5521 Returns

5522 -------

5523 np.ndarray[np.intp]

5524 An array of locations (indices) of the labels from the Index

5525 which correspond to the return values of the `asof` function

5526 for every element in `where`.

5527 """

5528 # error: No overload variant of "searchsorted" of "ndarray" matches argument

5529 # types "Union[ExtensionArray, ndarray[Any, Any]]", "str"

5530 # TODO: will be fixed when ExtensionArray.searchsorted() is fixed

5531 locs = self._values[mask].searchsorted(

5532 where._values, side="right" # type: ignore[call-overload]

5533 )

5534 locs = np.where(locs > 0, locs - 1, 0)

5535

5536 result = np.arange(len(self), dtype=np.intp)[mask].take(locs)

5537

5538 first_value = self._values[mask.argmax()]

5539 result[(locs == 0) & (where._values < first_value)] = -1

5540

5541 return result

5542

5543 def sort_values(

5544 self,

5545 return_indexer: bool = False,

5546 ascending: bool = True,

5547 na_position: str_t = "last",

5548 key: Callable | None = None,

5549 ):

5550 """

5551 Return a sorted copy of the index.

5552

5553 Return a sorted copy of the index, and optionally return the indices

5554 that sorted the index itself.

5555

5556 Parameters

5557 ----------

5558 return_indexer : bool, default False

5559 Should the indices that would sort the index be returned.

5560 ascending : bool, default True

5561 Should the index values be sorted in an ascending order.

5562 na_position : {'first' or 'last'}, default 'last'

5563 Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at

5564 the end.

5565

5566 .. versionadded:: 1.2.0

5567

5568 key : callable, optional

5569 If not None, apply the key function to the index values

5570 before sorting. This is similar to the `key` argument in the

5571 builtin :meth:`sorted` function, with the notable difference that

5572 this `key` function should be *vectorized*. It should expect an

5573 ``Index`` and return an ``Index`` of the same shape.

5574

5575 .. versionadded:: 1.1.0

5576

5577 Returns

5578 -------

5579 sorted_index : pandas.Index

5580 Sorted copy of the index.

5581 indexer : numpy.ndarray, optional

5582 The indices that the index itself was sorted by.

5583

5584 See Also

5585 --------

5586 Series.sort_values : Sort values of a Series.

5587 DataFrame.sort_values : Sort values in a DataFrame.

5588

5589 Examples

5590 --------

5591 >>> idx = pd.Index([10, 100, 1, 1000])

5592 >>> idx

5593 Index([10, 100, 1, 1000], dtype='int64')

5594

5595 Sort values in ascending order (default behavior).

5596

5597 >>> idx.sort_values()

5598 Index([1, 10, 100, 1000], dtype='int64')

5599

5600 Sort values in descending order, and also get the indices `idx` was

5601 sorted by.

5602

5603 >>> idx.sort_values(ascending=False, return_indexer=True)

5604 (Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2]))

5605 """

5606 idx = ensure_key_mapped(self, key)

5607

5608 # GH 35584. Sort missing values according to na_position kwarg

5609 # ignore na_position for MultiIndex

5610 if not isinstance(self, ABCMultiIndex):

5611 _as = nargsort(

5612 items=idx, ascending=ascending, na_position=na_position, key=key

5613 )

5614 else:

5615 _as = idx.argsort()

5616 if not ascending:

5617 _as = _as[::-1]

5618

5619 sorted_index = self.take(_as)

5620

5621 if return_indexer:

5622 return sorted_index, _as

5623 else:

5624 return sorted_index

5625

5626 @final

5627 def sort(self, *args, **kwargs):

5628 """

5629 Use sort_values instead.

5630 """

5631 raise TypeError("cannot sort an Index object in-place, use sort_values instead")

5632

5633 def shift(self, periods: int = 1, freq=None):

5634 """

5635 Shift index by desired number of time frequency increments.

5636

5637 This method is for shifting the values of datetime-like indexes

5638 by a specified time increment a given number of times.

5639

5640 Parameters

5641 ----------

5642 periods : int, default 1

5643 Number of periods (or increments) to shift by,

5644 can be positive or negative.

5645 freq : pandas.DateOffset, pandas.Timedelta or str, optional

5646 Frequency increment to shift by.

5647 If None, the index is shifted by its own `freq` attribute.

5648 Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.

5649

5650 Returns

5651 -------

5652 pandas.Index

5653 Shifted index.

5654

5655 See Also

5656 --------

5657 Series.shift : Shift values of Series.

5658

5659 Notes

5660 -----

5661 This method is only implemented for datetime-like index classes,

5662 i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex.

5663

5664 Examples

5665 --------

5666 Put the first 5 month starts of 2011 into an index.

5667

5668 >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS')

5669 >>> month_starts

5670 DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01',

5671 '2011-05-01'],

5672 dtype='datetime64[ns]', freq='MS')

5673

5674 Shift the index by 10 days.

5675

5676 >>> month_starts.shift(10, freq='D')

5677 DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11',

5678 '2011-05-11'],

5679 dtype='datetime64[ns]', freq=None)

5680

5681 The default value of `freq` is the `freq` attribute of the index,

5682 which is 'MS' (month start) in this example.

5683

5684 >>> month_starts.shift(10)

5685 DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01',

5686 '2012-03-01'],

5687 dtype='datetime64[ns]', freq='MS')

5688 """

5689 raise NotImplementedError(

5690 f"This method is only implemented for DatetimeIndex, PeriodIndex and "

5691 f"TimedeltaIndex; Got type {type(self).__name__}"

5692 )

5693

5694 def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:

5695 """

5696 Return the integer indices that would sort the index.

5697

5698 Parameters

5699 ----------

5700 *args

5701 Passed to `numpy.ndarray.argsort`.

5702 **kwargs

5703 Passed to `numpy.ndarray.argsort`.

5704

5705 Returns

5706 -------

5707 np.ndarray[np.intp]

5708 Integer indices that would sort the index if used as

5709 an indexer.

5710

5711 See Also

5712 --------

5713 numpy.argsort : Similar method for NumPy arrays.

5714 Index.sort_values : Return sorted copy of Index.

5715

5716 Examples

5717 --------

5718 >>> idx = pd.Index(['b', 'a', 'd', 'c'])

5719 >>> idx

5720 Index(['b', 'a', 'd', 'c'], dtype='object')

5721

5722 >>> order = idx.argsort()

5723 >>> order

5724 array([1, 0, 3, 2])

5725

5726 >>> idx[order]

5727 Index(['a', 'b', 'c', 'd'], dtype='object')

5728 """

5729 # This works for either ndarray or EA, is overridden

5730 # by RangeIndex, MultIIndex

5731 return self._data.argsort(*args, **kwargs)

5732

5733 def _check_indexing_error(self, key):

5734 if not is_scalar(key):

5735 # if key is not a scalar, directly raise an error (the code below

5736 # would convert to numpy arrays and raise later any way) - GH29926

5737 raise InvalidIndexError(key)

5738

5739 @cache_readonly

5740 def _should_fallback_to_positional(self) -> bool:

5741 """

5742 Should an integer key be treated as positional?

5743 """

5744 return self.inferred_type not in {

5745 "integer",

5746 "mixed-integer",

5747 "floating",

5748 "complex",

5749 }

5750

5751 _index_shared_docs[

5752 "get_indexer_non_unique"

5753 ] = """

5754 Compute indexer and mask for new index given the current index.

5755

5756 The indexer should be then used as an input to ndarray.take to align the

5757 current data to the new index.

5758

5759 Parameters

5760 ----------

5761 target : %(target_klass)s

5762

5763 Returns

5764 -------

5765 indexer : np.ndarray[np.intp]

5766 Integers from 0 to n - 1 indicating that the index at these

5767 positions matches the corresponding target values. Missing values

5768 in the target are marked by -1.

5769 missing : np.ndarray[np.intp]

5770 An indexer into the target of the values not found.

5771 These correspond to the -1 in the indexer array.

5772

5773 Examples

5774 --------

5775 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])

5776 >>> index.get_indexer_non_unique(['b', 'b'])

5777 (array([1, 3, 4, 1, 3, 4]), array([], dtype=int64))

5778

5779 In the example below there are no matched values.

5780

5781 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])

5782 >>> index.get_indexer_non_unique(['q', 'r', 't'])

5783 (array([-1, -1, -1]), array([0, 1, 2]))

5784

5785 For this reason, the returned ``indexer`` contains only integers equal to -1.

5786 It demonstrates that there's no match between the index and the ``target``

5787 values at these positions. The mask [0, 1, 2] in the return value shows that

5788 the first, second, and third elements are missing.

5789

5790 Notice that the return value is a tuple contains two items. In the example

5791 below the first item is an array of locations in ``index``. The second

5792 item is a mask shows that the first and third elements are missing.

5793

5794 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])

5795 >>> index.get_indexer_non_unique(['f', 'b', 's'])

5796 (array([-1, 1, 3, 4, -1]), array([0, 2]))

5797 """

5798

5799 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)

5800 def get_indexer_non_unique(

5801 self, target

5802 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

5803 target = ensure_index(target)

5804 target = self._maybe_cast_listlike_indexer(target)

5805

5806 if not self._should_compare(target) and not self._should_partial_index(target):

5807 # _should_partial_index e.g. IntervalIndex with numeric scalars

5808 # that can be matched to Interval scalars.

5809 return self._get_indexer_non_comparable(target, method=None, unique=False)

5810

5811 pself, ptarget = self._maybe_promote(target)

5812 if pself is not self or ptarget is not target:

5813 return pself.get_indexer_non_unique(ptarget)

5814

5815 if not is_dtype_equal(self.dtype, target.dtype):

5816 # TODO: if object, could use infer_dtype to preempt costly

5817 # conversion if still non-comparable?

5818 dtype = self._find_common_type_compat(target)

5819

5820 this = self.astype(dtype, copy=False)

5821 that = target.astype(dtype, copy=False)

5822 return this.get_indexer_non_unique(that)

5823

5824 # TODO: get_indexer has fastpaths for both Categorical-self and

5825 # Categorical-target. Can we do something similar here?

5826

5827 # Note: _maybe_promote ensures we never get here with MultiIndex

5828 # self and non-Multi target

5829 tgt_values = target._get_engine_target()

5830 if self._is_multi and target._is_multi:

5831 engine = self._engine

5832 # Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]" has

5833 # no attribute "_extract_level_codes"

5834 tgt_values = engine._extract_level_codes(target) # type: ignore[union-attr]

5835

5836 indexer, missing = self._engine.get_indexer_non_unique(tgt_values)

5837 return ensure_platform_int(indexer), ensure_platform_int(missing)

5838

5839 @final

5840 def get_indexer_for(self, target) -> npt.NDArray[np.intp]:

5841 """

5842 Guaranteed return of an indexer even when non-unique.

5843

5844 This dispatches to get_indexer or get_indexer_non_unique

5845 as appropriate.

5846

5847 Returns

5848 -------

5849 np.ndarray[np.intp]

5850 List of indices.

5851

5852 Examples

5853 --------

5854 >>> idx = pd.Index([np.nan, 'var1', np.nan])

5855 >>> idx.get_indexer_for([np.nan])

5856 array([0, 2])

5857 """

5858 if self._index_as_unique:

5859 return self.get_indexer(target)

5860 indexer, _ = self.get_indexer_non_unique(target)

5861 return indexer

5862

5863 def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]:

5864 """

5865 Analogue to get_indexer that raises if any elements are missing.

5866 """

5867 keyarr = key

5868 if not isinstance(keyarr, Index):

5869 keyarr = com.asarray_tuplesafe(keyarr)

5870

5871 if self._index_as_unique:

5872 indexer = self.get_indexer_for(keyarr)

5873 keyarr = self.reindex(keyarr)[0]

5874 else:

5875 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)

5876

5877 self._raise_if_missing(keyarr, indexer, axis_name)

5878

5879 keyarr = self.take(indexer)

5880 if isinstance(key, Index):

5881 # GH 42790 - Preserve name from an Index

5882 keyarr.name = key.name

5883 if (

5884 isinstance(keyarr.dtype, np.dtype) and keyarr.dtype.kind in ["m", "M"]

5885 ) or isinstance(keyarr.dtype, DatetimeTZDtype):

5886 # DTI/TDI.take can infer a freq in some cases when we dont want one

5887 if isinstance(key, list) or (

5888 isinstance(key, type(self))

5889 # "Index" has no attribute "freq"

5890 and key.freq is None # type: ignore[attr-defined]

5891 ):

5892 keyarr = keyarr._with_freq(None)

5893

5894 return keyarr, indexer

5895

5896 def _raise_if_missing(self, key, indexer, axis_name: str_t) -> None:

5897 """

5898 Check that indexer can be used to return a result.

5899

5900 e.g. at least one element was found,

5901 unless the list of keys was actually empty.

5902

5903 Parameters

5904 ----------

5905 key : list-like

5906 Targeted labels (only used to show correct error message).

5907 indexer: array-like of booleans

5908 Indices corresponding to the key,

5909 (with -1 indicating not found).

5910 axis_name : str

5911

5912 Raises

5913 ------

5914 KeyError

5915 If at least one key was requested but none was found.

5916 """

5917 if len(key) == 0:

5918 return

5919

5920 # Count missing values

5921 missing_mask = indexer < 0

5922 nmissing = missing_mask.sum()

5923

5924 if nmissing:

5925 # TODO: remove special-case; this is just to keep exception

5926 # message tests from raising while debugging

5927 use_interval_msg = is_interval_dtype(self.dtype) or (

5928 is_categorical_dtype(self.dtype)

5929 # "Index" has no attribute "categories" [attr-defined]

5930 and is_interval_dtype(

5931 self.categories.dtype # type: ignore[attr-defined]

5932 )

5933 )

5934

5935 if nmissing == len(indexer):

5936 if use_interval_msg:

5937 key = list(key)

5938 raise KeyError(f"None of [{key}] are in the [{axis_name}]")

5939

5940 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())

5941 raise KeyError(f"{not_found} not in index")

5942

5943 @overload

5944 def _get_indexer_non_comparable(

5945 self, target: Index, method, unique: Literal[True] = ...

5946 ) -> npt.NDArray[np.intp]:

5947 ...

5948

5949 @overload

5950 def _get_indexer_non_comparable(

5951 self, target: Index, method, unique: Literal[False]

5952 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

5953 ...

5954

5955 @overload

5956 def _get_indexer_non_comparable(

5957 self, target: Index, method, unique: bool = True

5958 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

5959 ...

5960

5961 @final

5962 def _get_indexer_non_comparable(

5963 self, target: Index, method, unique: bool = True

5964 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

5965 """

5966 Called from get_indexer or get_indexer_non_unique when the target

5967 is of a non-comparable dtype.

5968

5969 For get_indexer lookups with method=None, get_indexer is an _equality_

5970 check, so non-comparable dtypes mean we will always have no matches.

5971

5972 For get_indexer lookups with a method, get_indexer is an _inequality_

5973 check, so non-comparable dtypes mean we will always raise TypeError.

5974

5975 Parameters

5976 ----------

5977 target : Index

5978 method : str or None

5979 unique : bool, default True

5980 * True if called from get_indexer.

5981 * False if called from get_indexer_non_unique.

5982

5983 Raises

5984 ------

5985 TypeError

5986 If doing an inequality check, i.e. method is not None.

5987 """

5988 if method is not None:

5989 other = _unpack_nested_dtype(target)

5990 raise TypeError(f"Cannot compare dtypes {self.dtype} and {other.dtype}")

5991

5992 no_matches = -1 * np.ones(target.shape, dtype=np.intp)

5993 if unique:

5994 # This is for get_indexer

5995 return no_matches

5996 else:

5997 # This is for get_indexer_non_unique

5998 missing = np.arange(len(target), dtype=np.intp)

5999 return no_matches, missing

6000

6001 @property

6002 def _index_as_unique(self) -> bool:

6003 """

6004 Whether we should treat this as unique for the sake of

6005 get_indexer vs get_indexer_non_unique.

6006

6007 For IntervalIndex compat.

6008 """

6009 return self.is_unique

6010

6011 _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects"

6012

6013 @final

6014 def _maybe_promote(self, other: Index) -> tuple[Index, Index]:

6015 """

6016 When dealing with an object-dtype Index and a non-object Index, see

6017 if we can upcast the object-dtype one to improve performance.

6018 """

6019

6020 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex):

6021 if (

6022 self.tz is not None

6023 and other.tz is not None

6024 and not tz_compare(self.tz, other.tz)

6025 ):

6026 # standardize on UTC

6027 return self.tz_convert("UTC"), other.tz_convert("UTC")

6028

6029 elif self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex):

6030 try:

6031 return type(other)(self), other

6032 except OutOfBoundsDatetime:

6033 return self, other

6034 elif self.inferred_type == "timedelta" and isinstance(other, ABCTimedeltaIndex):

6035 # TODO: we dont have tests that get here

6036 return type(other)(self), other

6037

6038 elif self.dtype.kind == "u" and other.dtype.kind == "i":

6039 # GH#41873

6040 if other.min() >= 0:

6041 # lookup min as it may be cached

6042 # TODO: may need itemsize check if we have non-64-bit Indexes

6043 return self, other.astype(self.dtype)

6044

6045 elif self._is_multi and not other._is_multi:

6046 try:

6047 # "Type[Index]" has no attribute "from_tuples"

6048 other = type(self).from_tuples(other) # type: ignore[attr-defined]

6049 except (TypeError, ValueError):

6050 # let's instead try with a straight Index

6051 self = Index(self._values)

6052

6053 if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):

6054 # Reverse op so we dont need to re-implement on the subclasses

6055 other, self = other._maybe_promote(self)

6056

6057 return self, other

6058

6059 @final

6060 def _find_common_type_compat(self, target) -> DtypeObj:

6061 """

6062 Implementation of find_common_type that adjusts for Index-specific

6063 special cases.

6064 """

6065 target_dtype, _ = infer_dtype_from(target, pandas_dtype=True)

6066

6067 # special case: if one dtype is uint64 and the other a signed int, return object

6068 # See https://github.com/pandas-dev/pandas/issues/26778 for discussion

6069 # Now it's:

6070 # * float | [u]int -> float

6071 # * uint64 | signed int -> object

6072 # We may change union(float | [u]int) to go to object.

6073 if self.dtype == "uint64" or target_dtype == "uint64":

6074 if is_signed_integer_dtype(self.dtype) or is_signed_integer_dtype(

6075 target_dtype

6076 ):

6077 return _dtype_obj

6078

6079 dtype = find_result_type(self._values, target)

6080 dtype = common_dtype_categorical_compat([self, target], dtype)

6081 return dtype

6082

6083 @final

6084 def _should_compare(self, other: Index) -> bool:

6085 """

6086 Check if `self == other` can ever have non-False entries.

6087 """

6088

6089 if (is_bool_dtype(other) and is_any_real_numeric_dtype(self)) or (

6090 is_bool_dtype(self) and is_any_real_numeric_dtype(other)

6091 ):

6092 # GH#16877 Treat boolean labels passed to a numeric index as not

6093 # found. Without this fix False and True would be treated as 0 and 1

6094 # respectively.

6095 return False

6096

6097 other = _unpack_nested_dtype(other)

6098 dtype = other.dtype

6099 return self._is_comparable_dtype(dtype) or is_object_dtype(dtype)

6100

6101 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:

6102 """

6103 Can we compare values of the given dtype to our own?

6104 """

6105 if self.dtype.kind == "b":

6106 return dtype.kind == "b"

6107 elif is_numeric_dtype(self.dtype):

6108 return is_numeric_dtype(dtype)

6109 # TODO: this was written assuming we only get here with object-dtype,

6110 # which is nom longer correct. Can we specialize for EA?

6111 return True

6112

6113 @final

6114 def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]:

6115 """

6116 Group the index labels by a given array of values.

6117

6118 Parameters

6119 ----------

6120 values : array

6121 Values used to determine the groups.

6122

6123 Returns

6124 -------

6125 dict

6126 {group name -> group labels}

6127 """

6128 # TODO: if we are a MultiIndex, we can do better

6129 # that converting to tuples

6130 if isinstance(values, ABCMultiIndex):

6131 values = values._values

6132 values = Categorical(values)

6133 result = values._reverse_indexer()

6134

6135 # map to the label

6136 result = {k: self.take(v) for k, v in result.items()}

6137

6138 return PrettyDict(result)

6139

6140 def map(self, mapper, na_action=None):

6141 """

6142 Map values using an input mapping or function.

6143

6144 Parameters

6145 ----------

6146 mapper : function, dict, or Series

6147 Mapping correspondence.

6148 na_action : {None, 'ignore'}

6149 If 'ignore', propagate NA values, without passing them to the

6150 mapping correspondence.

6151

6152 Returns

6153 -------

6154 Union[Index, MultiIndex]

6155 The output of the mapping function applied to the index.

6156 If the function returns a tuple with more than one element

6157 a MultiIndex will be returned.

6158 """

6159 from pandas.core.indexes.multi import MultiIndex

6160

6161 new_values = self._map_values(mapper, na_action=na_action)

6162

6163 # we can return a MultiIndex

6164 if new_values.size and isinstance(new_values[0], tuple):

6165 if isinstance(self, MultiIndex):

6166 names = self.names

6167 elif self.name:

6168 names = [self.name] * len(new_values[0])

6169 else:

6170 names = None

6171 return MultiIndex.from_tuples(new_values, names=names)

6172

6173 dtype = None

6174 if not new_values.size:

6175 # empty

6176 dtype = self.dtype

6177

6178 # e.g. if we are floating and new_values is all ints, then we

6179 # don't want to cast back to floating. But if we are UInt64

6180 # and new_values is all ints, we want to try.

6181 same_dtype = lib.infer_dtype(new_values, skipna=False) == self.inferred_type

6182 if same_dtype:

6183 new_values = maybe_cast_pointwise_result(

6184 new_values, self.dtype, same_dtype=same_dtype

6185 )

6186

6187 return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name)

6188

6189 # TODO: De-duplicate with map, xref GH#32349

6190 @final

6191 def _transform_index(self, func, *, level=None) -> Index:

6192 """

6193 Apply function to all values found in index.

6194

6195 This includes transforming multiindex entries separately.

6196 Only apply function to one level of the MultiIndex if level is specified.

6197 """

6198 if isinstance(self, ABCMultiIndex):

6199 values = [

6200 self.get_level_values(i).map(func)

6201 if i == level or level is None

6202 else self.get_level_values(i)

6203 for i in range(self.nlevels)

6204 ]

6205 return type(self).from_arrays(values)

6206 else:

6207 items = [func(x) for x in self]

6208 return Index(items, name=self.name, tupleize_cols=False)

6209

6210 def isin(self, values, level=None) -> npt.NDArray[np.bool_]:

6211 """

6212 Return a boolean array where the index values are in `values`.

6213

6214 Compute boolean array of whether each index value is found in the

6215 passed set of values. The length of the returned boolean array matches

6216 the length of the index.

6217

6218 Parameters

6219 ----------

6220 values : set or list-like

6221 Sought values.

6222 level : str or int, optional

6223 Name or position of the index level to use (if the index is a

6224 `MultiIndex`).

6225

6226 Returns

6227 -------

6228 np.ndarray[bool]

6229 NumPy array of boolean values.

6230

6231 See Also

6232 --------

6233 Series.isin : Same for Series.

6234 DataFrame.isin : Same method for DataFrames.

6235

6236 Notes

6237 -----

6238 In the case of `MultiIndex` you must either specify `values` as a

6239 list-like object containing tuples that are the same length as the

6240 number of levels, or specify `level`. Otherwise it will raise a

6241 ``ValueError``.

6242

6243 If `level` is specified:

6244

6245 - if it is the name of one *and only one* index level, use that level;

6246 - otherwise it should be a number indicating level position.

6247

6248 Examples

6249 --------

6250 >>> idx = pd.Index([1,2,3])

6251 >>> idx

6252 Index([1, 2, 3], dtype='int64')

6253

6254 Check whether each index value in a list of values.

6255

6256 >>> idx.isin([1, 4])

6257 array([ True, False, False])

6258

6259 >>> midx = pd.MultiIndex.from_arrays([[1,2,3],

6260 ... ['red', 'blue', 'green']],

6261 ... names=('number', 'color'))

6262 >>> midx

6263 MultiIndex([(1, 'red'),

6264 (2, 'blue'),

6265 (3, 'green')],

6266 names=['number', 'color'])

6267

6268 Check whether the strings in the 'color' level of the MultiIndex

6269 are in a list of colors.

6270

6271 >>> midx.isin(['red', 'orange', 'yellow'], level='color')

6272 array([ True, False, False])

6273

6274 To check across the levels of a MultiIndex, pass a list of tuples:

6275

6276 >>> midx.isin([(1, 'red'), (3, 'red')])

6277 array([ True, False, False])

6278

6279 For a DatetimeIndex, string values in `values` are converted to

6280 Timestamps.

6281

6282 >>> dates = ['2000-03-11', '2000-03-12', '2000-03-13']

6283 >>> dti = pd.to_datetime(dates)

6284 >>> dti

6285 DatetimeIndex(['2000-03-11', '2000-03-12', '2000-03-13'],

6286 dtype='datetime64[ns]', freq=None)

6287

6288 >>> dti.isin(['2000-03-11'])

6289 array([ True, False, False])

6290 """

6291 if level is not None:

6292 self._validate_index_level(level)

6293 return algos.isin(self._values, values)

6294

6295 def _get_string_slice(self, key: str_t):

6296 # this is for partial string indexing,

6297 # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex

6298 raise NotImplementedError

6299

6300 def slice_indexer(

6301 self,

6302 start: Hashable | None = None,

6303 end: Hashable | None = None,

6304 step: int | None = None,

6305 ) -> slice:

6306 """

6307 Compute the slice indexer for input labels and step.

6308

6309 Index needs to be ordered and unique.

6310

6311 Parameters

6312 ----------

6313 start : label, default None

6314 If None, defaults to the beginning.

6315 end : label, default None

6316 If None, defaults to the end.

6317 step : int, default None

6318

6319 Returns

6320 -------

6321 slice

6322

6323 Raises

6324 ------

6325 KeyError : If key does not exist, or key is not unique and index is

6326 not ordered.

6327

6328 Notes

6329 -----

6330 This function assumes that the data is sorted, so use at your own peril

6331

6332 Examples

6333 --------

6334 This is a method on all index types. For example you can do:

6335

6336 >>> idx = pd.Index(list('abcd'))

6337 >>> idx.slice_indexer(start='b', end='c')

6338 slice(1, 3, None)

6339

6340 >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')])

6341 >>> idx.slice_indexer(start='b', end=('c', 'g'))

6342 slice(1, 3, None)

6343 """

6344 start_slice, end_slice = self.slice_locs(start, end, step=step)

6345

6346 # return a slice

6347 if not is_scalar(start_slice):

6348 raise AssertionError("Start slice bound is non-scalar")

6349 if not is_scalar(end_slice):

6350 raise AssertionError("End slice bound is non-scalar")

6351

6352 return slice(start_slice, end_slice, step)

6353

6354 def _maybe_cast_indexer(self, key):

6355 """

6356 If we have a float key and are not a floating index, then try to cast

6357 to an int if equivalent.

6358 """

6359 return key

6360

6361 def _maybe_cast_listlike_indexer(self, target) -> Index:

6362 """

6363 Analogue to maybe_cast_indexer for get_indexer instead of get_loc.

6364 """

6365 return ensure_index(target)

6366

6367 @final

6368 def _validate_indexer(self, form: str_t, key, kind: str_t) -> None:

6369 """

6370 If we are positional indexer, validate that we have appropriate

6371 typed bounds must be an integer.

6372 """

6373 assert kind in ["getitem", "iloc"]

6374

6375 if key is not None and not is_integer(key):

6376 self._raise_invalid_indexer(form, key)

6377

6378 def _maybe_cast_slice_bound(self, label, side: str_t):

6379 """

6380 This function should be overloaded in subclasses that allow non-trivial

6381 casting on label-slice bounds, e.g. datetime-like indices allowing

6382 strings containing formatted datetimes.

6383

6384 Parameters

6385 ----------

6386 label : object

6387 side : {'left', 'right'}

6388

6389 Returns

6390 -------

6391 label : object

6392

6393 Notes

6394 -----

6395 Value of `side` parameter should be validated in caller.

6396 """

6397

6398 # We are a plain index here (sub-class override this method if they

6399 # wish to have special treatment for floats/ints, e.g. datetimelike Indexes

6400

6401 if is_numeric_dtype(self.dtype):

6402 return self._maybe_cast_indexer(label)

6403

6404 # reject them, if index does not contain label

6405 if (is_float(label) or is_integer(label)) and label not in self:

6406 self._raise_invalid_indexer("slice", label)

6407

6408 return label

6409

6410 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):

6411 if self.is_monotonic_increasing:

6412 return self.searchsorted(label, side=side)

6413 elif self.is_monotonic_decreasing:

6414 # np.searchsorted expects ascending sort order, have to reverse

6415 # everything for it to work (element ordering, search side and

6416 # resulting value).

6417 pos = self[::-1].searchsorted(

6418 label, side="right" if side == "left" else "left"

6419 )

6420 return len(self) - pos

6421

6422 raise ValueError("index must be monotonic increasing or decreasing")

6423

6424 def get_slice_bound(self, label, side: Literal["left", "right"]) -> int:

6425 """

6426 Calculate slice bound that corresponds to given label.

6427

6428 Returns leftmost (one-past-the-rightmost if ``side=='right'``) position

6429 of given label.

6430

6431 Parameters

6432 ----------

6433 label : object

6434 side : {'left', 'right'}

6435

6436 Returns

6437 -------

6438 int

6439 Index of label.

6440 """

6441

6442 if side not in ("left", "right"):

6443 raise ValueError(

6444 "Invalid value for side kwarg, must be either "

6445 f"'left' or 'right': {side}"

6446 )

6447

6448 original_label = label

6449

6450 # For datetime indices label may be a string that has to be converted

6451 # to datetime boundary according to its resolution.

6452 label = self._maybe_cast_slice_bound(label, side)

6453

6454 # we need to look up the label

6455 try:

6456 slc = self.get_loc(label)

6457 except KeyError as err:

6458 try:

6459 return self._searchsorted_monotonic(label, side)

6460 except ValueError:

6461 # raise the original KeyError

6462 raise err

6463

6464 if isinstance(slc, np.ndarray):

6465 # get_loc may return a boolean array, which

6466 # is OK as long as they are representable by a slice.

6467 assert is_bool_dtype(slc.dtype)

6468 slc = lib.maybe_booleans_to_slice(slc.view("u1"))

6469 if isinstance(slc, np.ndarray):

6470 raise KeyError(

6471 f"Cannot get {side} slice bound for non-unique "

6472 f"label: {repr(original_label)}"

6473 )

6474

6475 if isinstance(slc, slice):

6476 if side == "left":

6477 return slc.start

6478 else:

6479 return slc.stop

6480 else:

6481 if side == "right":

6482 return slc + 1

6483 else:

6484 return slc

6485

6486 def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]:

6487 """

6488 Compute slice locations for input labels.

6489

6490 Parameters

6491 ----------

6492 start : label, default None

6493 If None, defaults to the beginning.

6494 end : label, default None

6495 If None, defaults to the end.

6496 step : int, defaults None

6497 If None, defaults to 1.

6498

6499 Returns

6500 -------

6501 tuple[int, int]

6502

6503 See Also

6504 --------

6505 Index.get_loc : Get location for a single label.

6506

6507 Notes

6508 -----

6509 This method only works if the index is monotonic or unique.

6510

6511 Examples

6512 --------

6513 >>> idx = pd.Index(list('abcd'))

6514 >>> idx.slice_locs(start='b', end='c')

6515 (1, 3)

6516 """

6517 inc = step is None or step >= 0

6518

6519 if not inc:

6520 # If it's a reverse slice, temporarily swap bounds.

6521 start, end = end, start

6522

6523 # GH 16785: If start and end happen to be date strings with UTC offsets

6524 # attempt to parse and check that the offsets are the same

6525 if isinstance(start, (str, datetime)) and isinstance(end, (str, datetime)):

6526 try:

6527 ts_start = Timestamp(start)

6528 ts_end = Timestamp(end)

6529 except (ValueError, TypeError):

6530 pass

6531 else:

6532 if not tz_compare(ts_start.tzinfo, ts_end.tzinfo):

6533 raise ValueError("Both dates must have the same UTC offset")

6534

6535 start_slice = None

6536 if start is not None:

6537 start_slice = self.get_slice_bound(start, "left")

6538 if start_slice is None:

6539 start_slice = 0

6540

6541 end_slice = None

6542 if end is not None:

6543 end_slice = self.get_slice_bound(end, "right")

6544 if end_slice is None:

6545 end_slice = len(self)

6546

6547 if not inc:

6548 # Bounds at this moment are swapped, swap them back and shift by 1.

6549 #

6550 # slice_locs('B', 'A', step=-1): s='B', e='A'

6551 #

6552 # s='A' e='B'

6553 # AFTER SWAP: | |

6554 # v ------------------> V

6555 # -----------------------------------

6556 # | | |A|A|A|A| | | | | |B|B| | | | |

6557 # -----------------------------------

6558 # ^ <------------------ ^

6559 # SHOULD BE: | |

6560 # end=s-1 start=e-1

6561 #

6562 end_slice, start_slice = start_slice - 1, end_slice - 1

6563

6564 # i == -1 triggers ``len(self) + i`` selection that points to the

6565 # last element, not before-the-first one, subtracting len(self)

6566 # compensates that.

6567 if end_slice == -1:

6568 end_slice -= len(self)

6569 if start_slice == -1:

6570 start_slice -= len(self)

6571

6572 return start_slice, end_slice

6573

6574 def delete(self: _IndexT, loc) -> _IndexT:

6575 """

6576 Make new Index with passed location(-s) deleted.

6577

6578 Parameters

6579 ----------

6580 loc : int or list of int

6581 Location of item(-s) which will be deleted.

6582 Use a list of locations to delete more than one value at the same time.

6583

6584 Returns

6585 -------

6586 Index

6587 Will be same type as self, except for RangeIndex.

6588

6589 See Also

6590 --------

6591 numpy.delete : Delete any rows and column from NumPy array (ndarray).

6592

6593 Examples

6594 --------

6595 >>> idx = pd.Index(['a', 'b', 'c'])

6596 >>> idx.delete(1)

6597 Index(['a', 'c'], dtype='object')

6598

6599 >>> idx = pd.Index(['a', 'b', 'c'])

6600 >>> idx.delete([0, 2])

6601 Index(['b'], dtype='object')

6602 """

6603 values = self._values

6604 res_values: ArrayLike

6605 if isinstance(values, np.ndarray):

6606 # TODO(__array_function__): special casing will be unnecessary

6607 res_values = np.delete(values, loc)

6608 else:

6609 res_values = values.delete(loc)

6610

6611 # _constructor so RangeIndex-> Index with an int64 dtype

6612 return self._constructor._simple_new(res_values, name=self.name)

6613

6614 def insert(self, loc: int, item) -> Index:

6615 """

6616 Make new Index inserting new item at location.

6617

6618 Follows Python numpy.insert semantics for negative values.

6619

6620 Parameters

6621 ----------

6622 loc : int

6623 item : object

6624

6625 Returns

6626 -------

6627 Index

6628 """

6629 item = lib.item_from_zerodim(item)

6630 if is_valid_na_for_dtype(item, self.dtype) and self.dtype != object:

6631 item = self._na_value

6632

6633 arr = self._values

6634

6635 try:

6636 if isinstance(arr, ExtensionArray):

6637 res_values = arr.insert(loc, item)

6638 return type(self)._simple_new(res_values, name=self.name)

6639 else:

6640 item = self._validate_fill_value(item)

6641 except (TypeError, ValueError, LossySetitemError):

6642 # e.g. trying to insert an integer into a DatetimeIndex

6643 # We cannot keep the same dtype, so cast to the (often object)

6644 # minimal shared dtype before doing the insert.

6645 dtype = self._find_common_type_compat(item)

6646 return self.astype(dtype).insert(loc, item)

6647

6648 if arr.dtype != object or not isinstance(

6649 item, (tuple, np.datetime64, np.timedelta64)

6650 ):

6651 # with object-dtype we need to worry about numpy incorrectly casting

6652 # dt64/td64 to integer, also about treating tuples as sequences

6653 # special-casing dt64/td64 https://github.com/numpy/numpy/issues/12550

6654 casted = arr.dtype.type(item)

6655 new_values = np.insert(arr, loc, casted)

6656

6657 else:

6658 # error: No overload variant of "insert" matches argument types

6659 # "ndarray[Any, Any]", "int", "None"

6660 new_values = np.insert(arr, loc, None) # type: ignore[call-overload]

6661 loc = loc if loc >= 0 else loc - 1

6662 new_values[loc] = item

6663

6664 return Index._with_infer(new_values, name=self.name)

6665

6666 def drop(

6667 self,

6668 labels: Index | np.ndarray | Iterable[Hashable],

6669 errors: IgnoreRaise = "raise",

6670 ) -> Index:

6671 """

6672 Make new Index with passed list of labels deleted.

6673

6674 Parameters

6675 ----------

6676 labels : array-like or scalar

6677 errors : {'ignore', 'raise'}, default 'raise'

6678 If 'ignore', suppress error and existing labels are dropped.

6679

6680 Returns

6681 -------

6682 Index

6683 Will be same type as self, except for RangeIndex.

6684

6685 Raises

6686 ------

6687 KeyError

6688 If not all of the labels are found in the selected axis

6689 """

6690 if not isinstance(labels, Index):

6691 # avoid materializing e.g. RangeIndex

6692 arr_dtype = "object" if self.dtype == "object" else None

6693 labels = com.index_labels_to_array(labels, dtype=arr_dtype)

6694

6695 indexer = self.get_indexer_for(labels)

6696 mask = indexer == -1

6697 if mask.any():

6698 if errors != "ignore":

6699 raise KeyError(f"{list(labels[mask])} not found in axis")

6700 indexer = indexer[~mask]

6701 return self.delete(indexer)

6702

6703 def infer_objects(self, copy: bool = True) -> Index:

6704 """

6705 If we have an object dtype, try to infer a non-object dtype.

6706

6707 Parameters

6708 ----------

6709 copy : bool, default True

6710 Whether to make a copy in cases where no inference occurs.

6711 """

6712 if self._is_multi:

6713 raise NotImplementedError(

6714 "infer_objects is not implemented for MultiIndex. "

6715 "Use index.to_frame().infer_objects() instead."

6716 )

6717 if self.dtype != object:

6718 return self.copy() if copy else self

6719

6720 values = self._values

6721 values = cast("npt.NDArray[np.object_]", values)

6722 res_values = lib.maybe_convert_objects(

6723 values,

6724 convert_datetime=True,

6725 convert_timedelta=True,

6726 convert_period=True,

6727 convert_interval=True,

6728 )

6729 if copy and res_values is values:

6730 return self.copy()

6731 result = Index(res_values, name=self.name)

6732 if not copy and res_values is values and self._references is not None:

6733 result._references = self._references

6734 result._references.add_index_reference(result)

6735 return result

6736

6737 # --------------------------------------------------------------------

6738 # Generated Arithmetic, Comparison, and Unary Methods

6739

6740 def _cmp_method(self, other, op):

6741 """

6742 Wrapper used to dispatch comparison operations.

6743 """

6744 if self.is_(other):

6745 # fastpath

6746 if op in {operator.eq, operator.le, operator.ge}:

6747 arr = np.ones(len(self), dtype=bool)

6748 if self._can_hold_na and not isinstance(self, ABCMultiIndex):

6749 # TODO: should set MultiIndex._can_hold_na = False?

6750 arr[self.isna()] = False

6751 return arr

6752 elif op is operator.ne:

6753 arr = np.zeros(len(self), dtype=bool)

6754 if self._can_hold_na and not isinstance(self, ABCMultiIndex):

6755 arr[self.isna()] = True

6756 return arr

6757

6758 if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and len(

6759 self

6760 ) != len(other):

6761 raise ValueError("Lengths must match to compare")

6762

6763 if not isinstance(other, ABCMultiIndex):

6764 other = extract_array(other, extract_numpy=True)

6765 else:

6766 other = np.asarray(other)

6767

6768 if is_object_dtype(self.dtype) and isinstance(other, ExtensionArray):

6769 # e.g. PeriodArray, Categorical

6770 with np.errstate(all="ignore"):

6771 result = op(self._values, other)

6772

6773 elif isinstance(self._values, ExtensionArray):

6774 result = op(self._values, other)

6775

6776 elif is_object_dtype(self.dtype) and not isinstance(self, ABCMultiIndex):

6777 # don't pass MultiIndex

6778 with np.errstate(all="ignore"):

6779 result = ops.comp_method_OBJECT_ARRAY(op, self._values, other)

6780

6781 else:

6782 with np.errstate(all="ignore"):

6783 result = ops.comparison_op(self._values, other, op)

6784

6785 return result

6786

6787 @final

6788 def _logical_method(self, other, op):

6789 res_name = ops.get_op_result_name(self, other)

6790

6791 lvalues = self._values

6792 rvalues = extract_array(other, extract_numpy=True, extract_range=True)

6793

6794 res_values = ops.logical_op(lvalues, rvalues, op)

6795 return self._construct_result(res_values, name=res_name)

6796

6797 @final

6798 def _construct_result(self, result, name):

6799 if isinstance(result, tuple):

6800 return (

6801 Index(result[0], name=name, dtype=result[0].dtype),

6802 Index(result[1], name=name, dtype=result[1].dtype),

6803 )

6804 return Index(result, name=name, dtype=result.dtype)

6805

6806 def _arith_method(self, other, op):

6807 if (

6808 isinstance(other, Index)

6809 and is_object_dtype(other.dtype)

6810 and type(other) is not Index

6811 ):

6812 # We return NotImplemented for object-dtype index *subclasses* so they have

6813 # a chance to implement ops before we unwrap them.

6814 # See https://github.com/pandas-dev/pandas/issues/31109

6815 return NotImplemented

6816

6817 return super()._arith_method(other, op)

6818

6819 @final

6820 def _unary_method(self, op):

6821 result = op(self._values)

6822 return Index(result, name=self.name)

6823

6824 def __abs__(self) -> Index:

6825 return self._unary_method(operator.abs)

6826

6827 def __neg__(self) -> Index:

6828 return self._unary_method(operator.neg)

6829

6830 def __pos__(self) -> Index:

6831 return self._unary_method(operator.pos)

6832

6833 def __invert__(self) -> Index:

6834 # GH#8875

6835 return self._unary_method(operator.inv)

6836

6837 # --------------------------------------------------------------------

6838 # Reductions

6839

6840 def any(self, *args, **kwargs):

6841 """

6842 Return whether any element is Truthy.

6843

6844 Parameters

6845 ----------

6846 *args

6847 Required for compatibility with numpy.

6848 **kwargs

6849 Required for compatibility with numpy.

6850

6851 Returns

6852 -------

6853 bool or array-like (if axis is specified)

6854 A single element array-like may be converted to bool.

6855

6856 See Also

6857 --------

6858 Index.all : Return whether all elements are True.

6859 Series.all : Return whether all elements are True.

6860

6861 Notes

6862 -----

6863 Not a Number (NaN), positive infinity and negative infinity

6864 evaluate to True because these are not equal to zero.

6865

6866 Examples

6867 --------

6868 >>> index = pd.Index([0, 1, 2])

6869 >>> index.any()

6870 True

6871

6872 >>> index = pd.Index([0, 0, 0])

6873 >>> index.any()

6874 False

6875 """

6876 nv.validate_any(args, kwargs)

6877 self._maybe_disable_logical_methods("any")

6878 # error: Argument 1 to "any" has incompatible type "ArrayLike"; expected

6879 # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int,

6880 # float, complex, str, bytes, generic]], Sequence[Sequence[Any]],

6881 # _SupportsArray]"

6882 return np.any(self.values) # type: ignore[arg-type]

6883

6884 def all(self, *args, **kwargs):

6885 """

6886 Return whether all elements are Truthy.

6887

6888 Parameters

6889 ----------

6890 *args

6891 Required for compatibility with numpy.

6892 **kwargs

6893 Required for compatibility with numpy.

6894

6895 Returns

6896 -------

6897 bool or array-like (if axis is specified)

6898 A single element array-like may be converted to bool.

6899

6900 See Also

6901 --------

6902 Index.any : Return whether any element in an Index is True.

6903 Series.any : Return whether any element in a Series is True.

6904 Series.all : Return whether all elements in a Series are True.

6905

6906 Notes

6907 -----

6908 Not a Number (NaN), positive infinity and negative infinity

6909 evaluate to True because these are not equal to zero.

6910

6911 Examples

6912 --------

6913 True, because nonzero integers are considered True.

6914

6915 >>> pd.Index([1, 2, 3]).all()

6916 True

6917

6918 False, because ``0`` is considered False.

6919

6920 >>> pd.Index([0, 1, 2]).all()

6921 False

6922 """

6923 nv.validate_all(args, kwargs)

6924 self._maybe_disable_logical_methods("all")

6925 # error: Argument 1 to "all" has incompatible type "ArrayLike"; expected

6926 # "Union[Union[int, float, complex, str, bytes, generic], Sequence[Union[int,

6927 # float, complex, str, bytes, generic]], Sequence[Sequence[Any]],

6928 # _SupportsArray]"

6929 return np.all(self.values) # type: ignore[arg-type]

6930

6931 @final

6932 def _maybe_disable_logical_methods(self, opname: str_t) -> None:

6933 """

6934 raise if this Index subclass does not support any or all.

6935 """

6936 if (

6937 isinstance(self, ABCMultiIndex)

6938 or needs_i8_conversion(self.dtype)

6939 or is_interval_dtype(self.dtype)

6940 or is_categorical_dtype(self.dtype)

6941 or is_float_dtype(self.dtype)

6942 ):

6943 # This call will raise

6944 make_invalid_op(opname)(self)

6945

6946 @Appender(IndexOpsMixin.argmin.__doc__)

6947 def argmin(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:

6948 nv.validate_argmin(args, kwargs)

6949 nv.validate_minmax_axis(axis)

6950

6951 if not self._is_multi and self.hasnans:

6952 # Take advantage of cache

6953 mask = self._isnan

6954 if not skipna or mask.all():

6955 return -1

6956 return super().argmin(skipna=skipna)

6957

6958 @Appender(IndexOpsMixin.argmax.__doc__)

6959 def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:

6960 nv.validate_argmax(args, kwargs)

6961 nv.validate_minmax_axis(axis)

6962

6963 if not self._is_multi and self.hasnans:

6964 # Take advantage of cache

6965 mask = self._isnan

6966 if not skipna or mask.all():

6967 return -1

6968 return super().argmax(skipna=skipna)

6969

6970 @doc(IndexOpsMixin.min)

6971 def min(self, axis=None, skipna: bool = True, *args, **kwargs):

6972 nv.validate_min(args, kwargs)

6973 nv.validate_minmax_axis(axis)

6974

6975 if not len(self):

6976 return self._na_value

6977

6978 if len(self) and self.is_monotonic_increasing:

6979 # quick check

6980 first = self[0]

6981 if not isna(first):

6982 return first

6983

6984 if not self._is_multi and self.hasnans:

6985 # Take advantage of cache

6986 mask = self._isnan

6987 if not skipna or mask.all():

6988 return self._na_value

6989

6990 if not self._is_multi and not isinstance(self._values, np.ndarray):

6991 return self._values._reduce(name="min", skipna=skipna)

6992

6993 return super().min(skipna=skipna)

6994

6995 @doc(IndexOpsMixin.max)

6996 def max(self, axis=None, skipna: bool = True, *args, **kwargs):

6997 nv.validate_max(args, kwargs)

6998 nv.validate_minmax_axis(axis)

6999

7000 if not len(self):

7001 return self._na_value

7002

7003 if len(self) and self.is_monotonic_increasing:

7004 # quick check

7005 last = self[-1]

7006 if not isna(last):

7007 return last

7008

7009 if not self._is_multi and self.hasnans:

7010 # Take advantage of cache

7011 mask = self._isnan

7012 if not skipna or mask.all():

7013 return self._na_value

7014

7015 if not self._is_multi and not isinstance(self._values, np.ndarray):

7016 return self._values._reduce(name="max", skipna=skipna)

7017

7018 return super().max(skipna=skipna)

7019

7020 # --------------------------------------------------------------------

7021

7022 @final

7023 @property

7024 def shape(self) -> Shape:

7025 """

7026 Return a tuple of the shape of the underlying data.

7027 """

7028 # See GH#27775, GH#27384 for history/reasoning in how this is defined.

7029 return (len(self),)

7030

7031

7032def ensure_index_from_sequences(sequences, names=None) -> Index:

7033 """

7034 Construct an index from sequences of data.

7035

7036 A single sequence returns an Index. Many sequences returns a

7037 MultiIndex.

7038

7039 Parameters

7040 ----------

7041 sequences : sequence of sequences

7042 names : sequence of str

7043

7044 Returns

7045 -------

7046 index : Index or MultiIndex

7047

7048 Examples

7049 --------

7050 >>> ensure_index_from_sequences([[1, 2, 3]], names=["name"])

7051 Index([1, 2, 3], dtype='int64', name='name')

7052

7053 >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"])

7054 MultiIndex([('a', 'a'),

7055 ('a', 'b')],

7056 names=['L1', 'L2'])

7057

7058 See Also

7059 --------

7060 ensure_index

7061 """

7062 from pandas.core.indexes.multi import MultiIndex

7063

7064 if len(sequences) == 1:

7065 if names is not None:

7066 names = names[0]

7067 return Index(sequences[0], name=names)

7068 else:

7069 return MultiIndex.from_arrays(sequences, names=names)

7070

7071

7072def ensure_index(index_like: Axes, copy: bool = False) -> Index:

7073 """

7074 Ensure that we have an index from some index-like object.

7075

7076 Parameters

7077 ----------

7078 index_like : sequence

7079 An Index or other sequence

7080 copy : bool, default False

7081

7082 Returns

7083 -------

7084 index : Index or MultiIndex

7085

7086 See Also

7087 --------

7088 ensure_index_from_sequences

7089

7090 Examples

7091 --------

7092 >>> ensure_index(['a', 'b'])

7093 Index(['a', 'b'], dtype='object')

7094

7095 >>> ensure_index([('a', 'a'), ('b', 'c')])

7096 Index([('a', 'a'), ('b', 'c')], dtype='object')

7097

7098 >>> ensure_index([['a', 'a'], ['b', 'c']])

7099 MultiIndex([('a', 'b'),

7100 ('a', 'c')],

7101 )

7102 """

7103 if isinstance(index_like, Index):

7104 if copy:

7105 index_like = index_like.copy()

7106 return index_like

7107

7108 if isinstance(index_like, ABCSeries):

7109 name = index_like.name

7110 return Index(index_like, name=name, copy=copy)

7111

7112 if is_iterator(index_like):

7113 index_like = list(index_like)

7114

7115 if isinstance(index_like, list):

7116 if type(index_like) is not list:

7117 # must check for exactly list here because of strict type

7118 # check in clean_index_list

7119 index_like = list(index_like)

7120

7121 if len(index_like) and lib.is_all_arraylike(index_like):

7122 from pandas.core.indexes.multi import MultiIndex

7123

7124 return MultiIndex.from_arrays(index_like)

7125 else:

7126 return Index(index_like, copy=copy, tupleize_cols=False)

7127 else:

7128 return Index(index_like, copy=copy)

7129

7130

7131def ensure_has_len(seq):

7132 """

7133 If seq is an iterator, put its values into a list.

7134 """

7135 try:

7136 len(seq)

7137 except TypeError:

7138 return list(seq)

7139 else:

7140 return seq

7141

7142

7143def trim_front(strings: list[str]) -> list[str]:

7144 """

7145 Trims zeros and decimal points.

7146

7147 Examples

7148 --------

7149 >>> trim_front([" a", " b"])

7150 ['a', 'b']

7151

7152 >>> trim_front([" a", " "])

7153 ['a', '']

7154 """

7155 if not strings:

7156 return strings

7157 while all(strings) and all(x[0] == " " for x in strings):

7158 strings = [x[1:] for x in strings]

7159 return strings

7160

7161

7162def _validate_join_method(method: str) -> None:

7163 if method not in ["left", "right", "inner", "outer"]:

7164 raise ValueError(f"do not recognize join method {method}")

7165

7166

7167def maybe_extract_name(name, obj, cls) -> Hashable:

7168 """

7169 If no name is passed, then extract it from data, validating hashability.

7170 """

7171 if name is None and isinstance(obj, (Index, ABCSeries)):

7172 # Note we don't just check for "name" attribute since that would

7173 # pick up e.g. dtype.name

7174 name = obj.name

7175

7176 # GH#29069

7177 if not is_hashable(name):

7178 raise TypeError(f"{cls.__name__}.name must be a hashable type")

7179

7180 return name

7181

7182

7183def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]:

7184 """

7185 Return common name if all indices agree, otherwise None (level-by-level).

7186

7187 Parameters

7188 ----------

7189 indexes : list of Index objects

7190

7191 Returns

7192 -------

7193 list

7194 A list representing the unanimous 'names' found.

7195 """

7196 name_tups = [tuple(i.names) for i in indexes]

7197 name_sets = [{*ns} for ns in zip_longest(*name_tups)]

7198 names = tuple(ns.pop() if len(ns) == 1 else None for ns in name_sets)

7199 return names

7200

7201

7202def _unpack_nested_dtype(other: Index) -> Index:

7203 """

7204 When checking if our dtype is comparable with another, we need

7205 to unpack CategoricalDtype to look at its categories.dtype.

7206

7207 Parameters

7208 ----------

7209 other : Index

7210

7211 Returns

7212 -------

7213 Index

7214 """

7215 from pandas.core.arrays.arrow import ArrowDtype

7216

7217 dtype = other.dtype

7218 if isinstance(dtype, CategoricalDtype):

7219 # If there is ever a SparseIndex, this could get dispatched

7220 # here too.

7221 return dtype.categories

7222 elif isinstance(dtype, ArrowDtype):

7223 # GH 53617

7224 import pyarrow as pa

7225

7226 if pa.types.is_dictionary(dtype.pyarrow_dtype):

7227 other = other.astype(ArrowDtype(dtype.pyarrow_dtype.value_type))

7228 return other

7229

7230

7231def _maybe_try_sort(result, sort):

7232 if sort is not False:

7233 try:

7234 result = algos.safe_sort(result)

7235 except TypeError as err:

7236 if sort is True:

7237 raise

7238 warnings.warn(

7239 f"{err}, sort order is undefined for incomparable objects.",

7240 RuntimeWarning,

7241 stacklevel=find_stack_level(),

7242 )

7243 return result