Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/indexes/base.py: 40%

1from __future__ import annotations

3from collections import abc

4from datetime import datetime

5import functools

6from itertools import zip_longest

7import operator

8from typing import (

9 TYPE_CHECKING,

10 Any,

11 Callable,

12 ClassVar,

13 Literal,

14 NoReturn,

15 cast,

16 final,

17 overload,

18)

19import warnings

21import numpy as np

23from pandas._config import (

24 get_option,

25 using_copy_on_write,

26 using_pyarrow_string_dtype,

27)

29from pandas._libs import (

30 NaT,

31 algos as libalgos,

32 index as libindex,

33 lib,

34 writers,

35)

36from pandas._libs.internals import BlockValuesRefs

37import pandas._libs.join as libjoin

38from pandas._libs.lib import (

39 is_datetime_array,

40 no_default,

41)

42from pandas._libs.tslibs import (

43 IncompatibleFrequency,

44 OutOfBoundsDatetime,

45 Timestamp,

46 tz_compare,

47)

48from pandas._typing import (

49 AnyAll,

50 ArrayLike,

51 Axes,

52 Axis,

53 DropKeep,

54 DtypeObj,

55 F,

56 IgnoreRaise,

57 IndexLabel,

58 JoinHow,

59 Level,

60 NaPosition,

61 ReindexMethod,

62 Self,

63 Shape,

64 npt,

65)

66from pandas.compat.numpy import function as nv

67from pandas.errors import (

68 DuplicateLabelError,

69 InvalidIndexError,

70)

71from pandas.util._decorators import (

72 Appender,

73 cache_readonly,

74 deprecate_nonkeyword_arguments,

75 doc,

76)

77from pandas.util._exceptions import (

78 find_stack_level,

79 rewrite_exception,

80)

82from pandas.core.dtypes.astype import (

83 astype_array,

84 astype_is_view,

85)

86from pandas.core.dtypes.cast import (

87 LossySetitemError,

88 can_hold_element,

89 common_dtype_categorical_compat,

90 find_result_type,

91 infer_dtype_from,

92 maybe_cast_pointwise_result,

93 np_can_hold_element,

94)

95from pandas.core.dtypes.common import (

96 ensure_int64,

97 ensure_object,

98 ensure_platform_int,

99 is_any_real_numeric_dtype,

100 is_bool_dtype,

101 is_ea_or_datetimelike_dtype,

102 is_float,

103 is_hashable,

104 is_integer,

105 is_iterator,

106 is_list_like,

107 is_numeric_dtype,

108 is_object_dtype,

109 is_scalar,

110 is_signed_integer_dtype,

111 is_string_dtype,

112 needs_i8_conversion,

113 pandas_dtype,

114 validate_all_hashable,

115)

116from pandas.core.dtypes.concat import concat_compat

117from pandas.core.dtypes.dtypes import (

118 ArrowDtype,

119 CategoricalDtype,

120 DatetimeTZDtype,

121 ExtensionDtype,

122 IntervalDtype,

123 PeriodDtype,

124 SparseDtype,

125)

126from pandas.core.dtypes.generic import (

127 ABCCategoricalIndex,

128 ABCDataFrame,

129 ABCDatetimeIndex,

130 ABCIntervalIndex,

131 ABCMultiIndex,

132 ABCPeriodIndex,

133 ABCRangeIndex,

134 ABCSeries,

135 ABCTimedeltaIndex,

136)

137from pandas.core.dtypes.inference import is_dict_like

138from pandas.core.dtypes.missing import (

139 array_equivalent,

140 is_valid_na_for_dtype,

141 isna,

142)

143

144from pandas.core import (

145 arraylike,

146 nanops,

147 ops,

148)

149from pandas.core.accessor import CachedAccessor

150import pandas.core.algorithms as algos

151from pandas.core.array_algos.putmask import (

152 setitem_datetimelike_compat,

153 validate_putmask,

154)

155from pandas.core.arrays import (

156 ArrowExtensionArray,

157 BaseMaskedArray,

158 Categorical,

159 DatetimeArray,

160 ExtensionArray,

161 TimedeltaArray,

162)

163from pandas.core.arrays.string_ import (

164 StringArray,

165 StringDtype,

166)

167from pandas.core.base import (

168 IndexOpsMixin,

169 PandasObject,

170)

171import pandas.core.common as com

172from pandas.core.construction import (

173 ensure_wrapped_if_datetimelike,

174 extract_array,

175 sanitize_array,

176)

177from pandas.core.indexers import (

178 disallow_ndim_indexing,

179 is_valid_positional_slice,

180)

181from pandas.core.indexes.frozen import FrozenList

182from pandas.core.missing import clean_reindex_fill_method

183from pandas.core.ops import get_op_result_name

184from pandas.core.ops.invalid import make_invalid_op

185from pandas.core.sorting import (

186 ensure_key_mapped,

187 get_group_index_sorter,

188 nargsort,

189)

190from pandas.core.strings.accessor import StringMethods

191

192from pandas.io.formats.printing import (

193 PrettyDict,

194 default_pprint,

195 format_object_summary,

196 pprint_thing,

197)

198

199if TYPE_CHECKING:

200 from collections.abc import (

201 Hashable,

202 Iterable,

203 Sequence,

204 )

205

206 from pandas import (

207 CategoricalIndex,

208 DataFrame,

209 MultiIndex,

210 Series,

211 )

212 from pandas.core.arrays import (

213 IntervalArray,

214 PeriodArray,

215 )

216

217__all__ = ["Index"]

218

219_unsortable_types = frozenset(("mixed", "mixed-integer"))

220

221_index_doc_kwargs: dict[str, str] = {

222 "klass": "Index",

223 "inplace": "",

224 "target_klass": "Index",

225 "raises_section": "",

226 "unique": "Index",

227 "duplicated": "np.ndarray",

228}

229_index_shared_docs: dict[str, str] = {}

230str_t = str

231

232_dtype_obj = np.dtype("object")

233

234_masked_engines = {

235 "Complex128": libindex.MaskedComplex128Engine,

236 "Complex64": libindex.MaskedComplex64Engine,

237 "Float64": libindex.MaskedFloat64Engine,

238 "Float32": libindex.MaskedFloat32Engine,

239 "UInt64": libindex.MaskedUInt64Engine,

240 "UInt32": libindex.MaskedUInt32Engine,

241 "UInt16": libindex.MaskedUInt16Engine,

242 "UInt8": libindex.MaskedUInt8Engine,

243 "Int64": libindex.MaskedInt64Engine,

244 "Int32": libindex.MaskedInt32Engine,

245 "Int16": libindex.MaskedInt16Engine,

246 "Int8": libindex.MaskedInt8Engine,

247 "boolean": libindex.MaskedBoolEngine,

248 "double[pyarrow]": libindex.MaskedFloat64Engine,

249 "float64[pyarrow]": libindex.MaskedFloat64Engine,

250 "float32[pyarrow]": libindex.MaskedFloat32Engine,

251 "float[pyarrow]": libindex.MaskedFloat32Engine,

252 "uint64[pyarrow]": libindex.MaskedUInt64Engine,

253 "uint32[pyarrow]": libindex.MaskedUInt32Engine,

254 "uint16[pyarrow]": libindex.MaskedUInt16Engine,

255 "uint8[pyarrow]": libindex.MaskedUInt8Engine,

256 "int64[pyarrow]": libindex.MaskedInt64Engine,

257 "int32[pyarrow]": libindex.MaskedInt32Engine,

258 "int16[pyarrow]": libindex.MaskedInt16Engine,

259 "int8[pyarrow]": libindex.MaskedInt8Engine,

260 "bool[pyarrow]": libindex.MaskedBoolEngine,

261}

262

263

264def _maybe_return_indexers(meth: F) -> F:

265 """

266 Decorator to simplify 'return_indexers' checks in Index.join.

267 """

268

269 @functools.wraps(meth)

270 def join(

271 self,

272 other: Index,

273 *,

274 how: JoinHow = "left",

275 level=None,

276 return_indexers: bool = False,

277 sort: bool = False,

278 ):

279 join_index, lidx, ridx = meth(self, other, how=how, level=level, sort=sort)

280 if not return_indexers:

281 return join_index

282

283 if lidx is not None:

284 lidx = ensure_platform_int(lidx)

285 if ridx is not None:

286 ridx = ensure_platform_int(ridx)

287 return join_index, lidx, ridx

288

289 return cast(F, join)

290

291

292def _new_Index(cls, d):

293 """

294 This is called upon unpickling, rather than the default which doesn't

295 have arguments and breaks __new__.

296 """

297 # required for backward compat, because PI can't be instantiated with

298 # ordinals through __new__ GH #13277

299 if issubclass(cls, ABCPeriodIndex):

300 from pandas.core.indexes.period import _new_PeriodIndex

301

302 return _new_PeriodIndex(cls, **d)

303

304 if issubclass(cls, ABCMultiIndex):

305 if "labels" in d and "codes" not in d:

306 # GH#23752 "labels" kwarg has been replaced with "codes"

307 d["codes"] = d.pop("labels")

308

309 # Since this was a valid MultiIndex at pickle-time, we don't need to

310 # check validty at un-pickle time.

311 d["verify_integrity"] = False

312

313 elif "dtype" not in d and "data" in d:

314 # Prevent Index.__new__ from conducting inference;

315 # "data" key not in RangeIndex

316 d["dtype"] = d["data"].dtype

317 return cls.__new__(cls, **d)

318

319

320class Index(IndexOpsMixin, PandasObject):

321 """

322 Immutable sequence used for indexing and alignment.

323

324 The basic object storing axis labels for all pandas objects.

325

326 .. versionchanged:: 2.0.0

327

328 Index can hold all numpy numeric dtypes (except float16). Previously only

329 int64/uint64/float64 dtypes were accepted.

330

331 Parameters

332 ----------

333 data : array-like (1-dimensional)

334 dtype : str, numpy.dtype, or ExtensionDtype, optional

335 Data type for the output Index. If not specified, this will be

336 inferred from `data`.

337 See the :ref:`user guide <basics.dtypes>` for more usages.

338 copy : bool, default False

339 Copy input data.

340 name : object

341 Name to be stored in the index.

342 tupleize_cols : bool (default: True)

343 When True, attempt to create a MultiIndex if possible.

344

345 See Also

346 --------

347 RangeIndex : Index implementing a monotonic integer range.

348 CategoricalIndex : Index of :class:`Categorical` s.

349 MultiIndex : A multi-level, or hierarchical Index.

350 IntervalIndex : An Index of :class:`Interval` s.

351 DatetimeIndex : Index of datetime64 data.

352 TimedeltaIndex : Index of timedelta64 data.

353 PeriodIndex : Index of Period data.

354

355 Notes

356 -----

357 An Index instance can **only** contain hashable objects.

358 An Index instance *can not* hold numpy float16 dtype.

359

360 Examples

361 --------

362 >>> pd.Index([1, 2, 3])

363 Index([1, 2, 3], dtype='int64')

364

365 >>> pd.Index(list('abc'))

366 Index(['a', 'b', 'c'], dtype='object')

367

368 >>> pd.Index([1, 2, 3], dtype="uint8")

369 Index([1, 2, 3], dtype='uint8')

370 """

371

372 # similar to __array_priority__, positions Index after Series and DataFrame

373 # but before ExtensionArray. Should NOT be overridden by subclasses.

374 __pandas_priority__ = 2000

375

376 # Cython methods; see github.com/cython/cython/issues/2647

377 # for why we need to wrap these instead of making them class attributes

378 # Moreover, cython will choose the appropriate-dtyped sub-function

379 # given the dtypes of the passed arguments

380

381 @final

382 def _left_indexer_unique(self, other: Self) -> npt.NDArray[np.intp]:

383 # Caller is responsible for ensuring other.dtype == self.dtype

384 sv = self._get_join_target()

385 ov = other._get_join_target()

386 # similar but not identical to ov.searchsorted(sv)

387 return libjoin.left_join_indexer_unique(sv, ov)

388

389 @final

390 def _left_indexer(

391 self, other: Self

392 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:

393 # Caller is responsible for ensuring other.dtype == self.dtype

394 sv = self._get_join_target()

395 ov = other._get_join_target()

396 joined_ndarray, lidx, ridx = libjoin.left_join_indexer(sv, ov)

397 joined = self._from_join_target(joined_ndarray)

398 return joined, lidx, ridx

399

400 @final

401 def _inner_indexer(

402 self, other: Self

403 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:

404 # Caller is responsible for ensuring other.dtype == self.dtype

405 sv = self._get_join_target()

406 ov = other._get_join_target()

407 joined_ndarray, lidx, ridx = libjoin.inner_join_indexer(sv, ov)

408 joined = self._from_join_target(joined_ndarray)

409 return joined, lidx, ridx

410

411 @final

412 def _outer_indexer(

413 self, other: Self

414 ) -> tuple[ArrayLike, npt.NDArray[np.intp], npt.NDArray[np.intp]]:

415 # Caller is responsible for ensuring other.dtype == self.dtype

416 sv = self._get_join_target()

417 ov = other._get_join_target()

418 joined_ndarray, lidx, ridx = libjoin.outer_join_indexer(sv, ov)

419 joined = self._from_join_target(joined_ndarray)

420 return joined, lidx, ridx

421

422 _typ: str = "index"

423 _data: ExtensionArray | np.ndarray

424 _data_cls: type[ExtensionArray] | tuple[type[np.ndarray], type[ExtensionArray]] = (

425 np.ndarray,

426 ExtensionArray,

427 )

428 _id: object | None = None

429 _name: Hashable = None

430 # MultiIndex.levels previously allowed setting the index name. We

431 # don't allow this anymore, and raise if it happens rather than

432 # failing silently.

433 _no_setting_name: bool = False

434 _comparables: list[str] = ["name"]

435 _attributes: list[str] = ["name"]

436

437 @cache_readonly

438 def _can_hold_strings(self) -> bool:

439 return not is_numeric_dtype(self.dtype)

440

441 _engine_types: dict[np.dtype | ExtensionDtype, type[libindex.IndexEngine]] = {

442 np.dtype(np.int8): libindex.Int8Engine,

443 np.dtype(np.int16): libindex.Int16Engine,

444 np.dtype(np.int32): libindex.Int32Engine,

445 np.dtype(np.int64): libindex.Int64Engine,

446 np.dtype(np.uint8): libindex.UInt8Engine,

447 np.dtype(np.uint16): libindex.UInt16Engine,

448 np.dtype(np.uint32): libindex.UInt32Engine,

449 np.dtype(np.uint64): libindex.UInt64Engine,

450 np.dtype(np.float32): libindex.Float32Engine,

451 np.dtype(np.float64): libindex.Float64Engine,

452 np.dtype(np.complex64): libindex.Complex64Engine,

453 np.dtype(np.complex128): libindex.Complex128Engine,

454 }

455

456 @property

457 def _engine_type(

458 self,

459 ) -> type[libindex.IndexEngine | libindex.ExtensionEngine]:

460 return self._engine_types.get(self.dtype, libindex.ObjectEngine)

461

462 # whether we support partial string indexing. Overridden

463 # in DatetimeIndex and PeriodIndex

464 _supports_partial_string_indexing = False

465

466 _accessors = {"str"}

467

468 str = CachedAccessor("str", StringMethods)

469

470 _references = None

471

472 # --------------------------------------------------------------------

473 # Constructors

474

475 def __new__(

476 cls,

477 data=None,

478 dtype=None,

479 copy: bool = False,

480 name=None,

481 tupleize_cols: bool = True,

482 ) -> Self:

483 from pandas.core.indexes.range import RangeIndex

484

485 name = maybe_extract_name(name, data, cls)

486

487 if dtype is not None:

488 dtype = pandas_dtype(dtype)

489

490 data_dtype = getattr(data, "dtype", None)

491

492 refs = None

493 if not copy and isinstance(data, (ABCSeries, Index)):

494 refs = data._references

495

496 is_pandas_object = isinstance(data, (ABCSeries, Index, ExtensionArray))

497

498 # range

499 if isinstance(data, (range, RangeIndex)):

500 result = RangeIndex(start=data, copy=copy, name=name)

501 if dtype is not None:

502 return result.astype(dtype, copy=False)

503 # error: Incompatible return value type (got "MultiIndex",

504 # expected "Self")

505 return result # type: ignore[return-value]

506

507 elif is_ea_or_datetimelike_dtype(dtype):

508 # non-EA dtype indexes have special casting logic, so we punt here

509 pass

510

511 elif is_ea_or_datetimelike_dtype(data_dtype):

512 pass

513

514 elif isinstance(data, (np.ndarray, Index, ABCSeries)):

515 if isinstance(data, ABCMultiIndex):

516 data = data._values

517

518 if data.dtype.kind not in "iufcbmM":

519 # GH#11836 we need to avoid having numpy coerce

520 # things that look like ints/floats to ints unless

521 # they are actually ints, e.g. '0' and 0.0

522 # should not be coerced

523 data = com.asarray_tuplesafe(data, dtype=_dtype_obj)

524

525 elif is_scalar(data):

526 raise cls._raise_scalar_data_error(data)

527 elif hasattr(data, "__array__"):

528 return cls(np.asarray(data), dtype=dtype, copy=copy, name=name)

529 elif not is_list_like(data) and not isinstance(data, memoryview):

530 # 2022-11-16 the memoryview check is only necessary on some CI

531 # builds, not clear why

532 raise cls._raise_scalar_data_error(data)

533

534 else:

535 if tupleize_cols:

536 # GH21470: convert iterable to list before determining if empty

537 if is_iterator(data):

538 data = list(data)

539

540 if data and all(isinstance(e, tuple) for e in data):

541 # we must be all tuples, otherwise don't construct

542 # 10697

543 from pandas.core.indexes.multi import MultiIndex

544

545 # error: Incompatible return value type (got "MultiIndex",

546 # expected "Self")

547 return MultiIndex.from_tuples( # type: ignore[return-value]

548 data, names=name

549 )

550 # other iterable of some kind

551

552 if not isinstance(data, (list, tuple)):

553 # we allow set/frozenset, which Series/sanitize_array does not, so

554 # cast to list here

555 data = list(data)

556 if len(data) == 0:

557 # unlike Series, we default to object dtype:

558 data = np.array(data, dtype=object)

559

560 if len(data) and isinstance(data[0], tuple):

561 # Ensure we get 1-D array of tuples instead of 2D array.

562 data = com.asarray_tuplesafe(data, dtype=_dtype_obj)

563

564 try:

565 arr = sanitize_array(data, None, dtype=dtype, copy=copy)

566 except ValueError as err:

567 if "index must be specified when data is not list-like" in str(err):

568 raise cls._raise_scalar_data_error(data) from err

569 if "Data must be 1-dimensional" in str(err):

570 raise ValueError("Index data must be 1-dimensional") from err

571 raise

572 arr = ensure_wrapped_if_datetimelike(arr)

573

574 klass = cls._dtype_to_subclass(arr.dtype)

575

576 arr = klass._ensure_array(arr, arr.dtype, copy=False)

577 result = klass._simple_new(arr, name, refs=refs)

578 if dtype is None and is_pandas_object and data_dtype == np.object_:

579 if result.dtype != data_dtype:

580 warnings.warn(

581 "Dtype inference on a pandas object "

582 "(Series, Index, ExtensionArray) is deprecated. The Index "

583 "constructor will keep the original dtype in the future. "

584 "Call `infer_objects` on the result to get the old "

585 "behavior.",

586 FutureWarning,

587 stacklevel=2,

588 )

589 return result # type: ignore[return-value]

590

591 @classmethod

592 def _ensure_array(cls, data, dtype, copy: bool):

593 """

594 Ensure we have a valid array to pass to _simple_new.

595 """

596 if data.ndim > 1:

597 # GH#13601, GH#20285, GH#27125

598 raise ValueError("Index data must be 1-dimensional")

599 elif dtype == np.float16:

600 # float16 not supported (no indexing engine)

601 raise NotImplementedError("float16 indexes are not supported")

602

603 if copy:

604 # asarray_tuplesafe does not always copy underlying data,

605 # so need to make sure that this happens

606 data = data.copy()

607 return data

608

609 @final

610 @classmethod

611 def _dtype_to_subclass(cls, dtype: DtypeObj):

612 # Delay import for perf. https://github.com/pandas-dev/pandas/pull/31423

613

614 if isinstance(dtype, ExtensionDtype):

615 return dtype.index_class

616

617 if dtype.kind == "M":

618 from pandas import DatetimeIndex

619

620 return DatetimeIndex

621

622 elif dtype.kind == "m":

623 from pandas import TimedeltaIndex

624

625 return TimedeltaIndex

626

627 elif dtype.kind == "O":

628 # NB: assuming away MultiIndex

629 return Index

630

631 elif issubclass(dtype.type, str) or is_numeric_dtype(dtype):

632 return Index

633

634 raise NotImplementedError(dtype)

635

636 # NOTE for new Index creation:

637

638 # - _simple_new: It returns new Index with the same type as the caller.

639 # All metadata (such as name) must be provided by caller's responsibility.

640 # Using _shallow_copy is recommended because it fills these metadata

641 # otherwise specified.

642

643 # - _shallow_copy: It returns new Index with the same type (using

644 # _simple_new), but fills caller's metadata otherwise specified. Passed

645 # kwargs will overwrite corresponding metadata.

646

647 # See each method's docstring.

648

649 @classmethod

650 def _simple_new(

651 cls, values: ArrayLike, name: Hashable | None = None, refs=None

652 ) -> Self:

653 """

654 We require that we have a dtype compat for the values. If we are passed

655 a non-dtype compat, then coerce using the constructor.

656

657 Must be careful not to recurse.

658 """

659 assert isinstance(values, cls._data_cls), type(values)

660

661 result = object.__new__(cls)

662 result._data = values

663 result._name = name

664 result._cache = {}

665 result._reset_identity()

666 if refs is not None:

667 result._references = refs

668 else:

669 result._references = BlockValuesRefs()

670 result._references.add_index_reference(result)

671

672 return result

673

674 @classmethod

675 def _with_infer(cls, *args, **kwargs):

676 """

677 Constructor that uses the 1.0.x behavior inferring numeric dtypes

678 for ndarray[object] inputs.

679 """

680 result = cls(*args, **kwargs)

681

682 if result.dtype == _dtype_obj and not result._is_multi:

683 # error: Argument 1 to "maybe_convert_objects" has incompatible type

684 # "Union[ExtensionArray, ndarray[Any, Any]]"; expected

685 # "ndarray[Any, Any]"

686 values = lib.maybe_convert_objects(result._values) # type: ignore[arg-type]

687 if values.dtype.kind in "iufb":

688 return Index(values, name=result.name)

689

690 return result

691

692 @cache_readonly

693 def _constructor(self) -> type[Self]:

694 return type(self)

695

696 @final

697 def _maybe_check_unique(self) -> None:

698 """

699 Check that an Index has no duplicates.

700

701 This is typically only called via

702 `NDFrame.flags.allows_duplicate_labels.setter` when it's set to

703 True (duplicates aren't allowed).

704

705 Raises

706 ------

707 DuplicateLabelError

708 When the index is not unique.

709 """

710 if not self.is_unique:

711 msg = """Index has duplicates."""

712 duplicates = self._format_duplicate_message()

713 msg += f"\n{duplicates}"

714

715 raise DuplicateLabelError(msg)

716

717 @final

718 def _format_duplicate_message(self) -> DataFrame:

719 """

720 Construct the DataFrame for a DuplicateLabelError.

721

722 This returns a DataFrame indicating the labels and positions

723 of duplicates in an index. This should only be called when it's

724 already known that duplicates are present.

725

726 Examples

727 --------

728 >>> idx = pd.Index(['a', 'b', 'a'])

729 >>> idx._format_duplicate_message()

730 positions

731 label

732 a [0, 2]

733 """

734 from pandas import Series

735

736 duplicates = self[self.duplicated(keep="first")].unique()

737 assert len(duplicates)

738

739 out = (

740 Series(np.arange(len(self)), copy=False)

741 .groupby(self, observed=False)

742 .agg(list)[duplicates]

743 )

744 if self._is_multi:

745 # test_format_duplicate_labels_message_multi

746 # error: "Type[Index]" has no attribute "from_tuples" [attr-defined]

747 out.index = type(self).from_tuples(out.index) # type: ignore[attr-defined]

748

749 if self.nlevels == 1:

750 out = out.rename_axis("label")

751 return out.to_frame(name="positions")

752

753 # --------------------------------------------------------------------

754 # Index Internals Methods

755

756 def _shallow_copy(self, values, name: Hashable = no_default) -> Self:

757 """

758 Create a new Index with the same class as the caller, don't copy the

759 data, use the same object attributes with passed in attributes taking

760 precedence.

761

762 *this is an internal non-public method*

763

764 Parameters

765 ----------

766 values : the values to create the new Index, optional

767 name : Label, defaults to self.name

768 """

769 name = self._name if name is no_default else name

770

771 return self._simple_new(values, name=name, refs=self._references)

772

773 def _view(self) -> Self:

774 """

775 fastpath to make a shallow copy, i.e. new object with same data.

776 """

777 result = self._simple_new(self._values, name=self._name, refs=self._references)

778

779 result._cache = self._cache

780 return result

781

782 @final

783 def _rename(self, name: Hashable) -> Self:

784 """

785 fastpath for rename if new name is already validated.

786 """

787 result = self._view()

788 result._name = name

789 return result

790

791 @final

792 def is_(self, other) -> bool:

793 """

794 More flexible, faster check like ``is`` but that works through views.

795

796 Note: this is *not* the same as ``Index.identical()``, which checks

797 that metadata is also the same.

798

799 Parameters

800 ----------

801 other : object

802 Other object to compare against.

803

804 Returns

805 -------

806 bool

807 True if both have same underlying data, False otherwise.

808

809 See Also

810 --------

811 Index.identical : Works like ``Index.is_`` but also checks metadata.

812

813 Examples

814 --------

815 >>> idx1 = pd.Index(['1', '2', '3'])

816 >>> idx1.is_(idx1.view())

817 True

818

819 >>> idx1.is_(idx1.copy())

820 False

821 """

822 if self is other:

823 return True

824 elif not hasattr(other, "_id"):

825 return False

826 elif self._id is None or other._id is None:

827 return False

828 else:

829 return self._id is other._id

830

831 @final

832 def _reset_identity(self) -> None:

833 """

834 Initializes or resets ``_id`` attribute with new object.

835 """

836 self._id = object()

837

838 @final

839 def _cleanup(self) -> None:

840 self._engine.clear_mapping()

841

842 @cache_readonly

843 def _engine(

844 self,

845 ) -> libindex.IndexEngine | libindex.ExtensionEngine | libindex.MaskedIndexEngine:

846 # For base class (object dtype) we get ObjectEngine

847 target_values = self._get_engine_target()

848

849 if isinstance(self._values, ArrowExtensionArray) and self.dtype.kind in "Mm":

850 import pyarrow as pa

851

852 pa_type = self._values._pa_array.type

853 if pa.types.is_timestamp(pa_type):

854 target_values = self._values._to_datetimearray()

855 return libindex.DatetimeEngine(target_values._ndarray)

856 elif pa.types.is_duration(pa_type):

857 target_values = self._values._to_timedeltaarray()

858 return libindex.TimedeltaEngine(target_values._ndarray)

859

860 if isinstance(target_values, ExtensionArray):

861 if isinstance(target_values, (BaseMaskedArray, ArrowExtensionArray)):

862 try:

863 return _masked_engines[target_values.dtype.name](target_values)

864 except KeyError:

865 # Not supported yet e.g. decimal

866 pass

867 elif self._engine_type is libindex.ObjectEngine:

868 return libindex.ExtensionEngine(target_values)

869

870 target_values = cast(np.ndarray, target_values)

871 # to avoid a reference cycle, bind `target_values` to a local variable, so

872 # `self` is not passed into the lambda.

873 if target_values.dtype == bool:

874 return libindex.BoolEngine(target_values)

875 elif target_values.dtype == np.complex64:

876 return libindex.Complex64Engine(target_values)

877 elif target_values.dtype == np.complex128:

878 return libindex.Complex128Engine(target_values)

879 elif needs_i8_conversion(self.dtype):

880 # We need to keep M8/m8 dtype when initializing the Engine,

881 # but don't want to change _get_engine_target bc it is used

882 # elsewhere

883 # error: Item "ExtensionArray" of "Union[ExtensionArray,

884 # ndarray[Any, Any]]" has no attribute "_ndarray" [union-attr]

885 target_values = self._data._ndarray # type: ignore[union-attr]

886

887 # error: Argument 1 to "ExtensionEngine" has incompatible type

888 # "ndarray[Any, Any]"; expected "ExtensionArray"

889 return self._engine_type(target_values) # type: ignore[arg-type]

890

891 @final

892 @cache_readonly

893 def _dir_additions_for_owner(self) -> set[str_t]:

894 """

895 Add the string-like labels to the owner dataframe/series dir output.

896

897 If this is a MultiIndex, it's first level values are used.

898 """

899 return {

900 c

901 for c in self.unique(level=0)[: get_option("display.max_dir_items")]

902 if isinstance(c, str) and c.isidentifier()

903 }

904

905 # --------------------------------------------------------------------

906 # Array-Like Methods

907

908 # ndarray compat

909 def __len__(self) -> int:

910 """

911 Return the length of the Index.

912 """

913 return len(self._data)

914

915 def __array__(self, dtype=None, copy=None) -> np.ndarray:

916 """

917 The array interface, return my values.

918 """

919 return np.asarray(self._data, dtype=dtype)

920

921 def __array_ufunc__(self, ufunc: np.ufunc, method: str_t, *inputs, **kwargs):

922 if any(isinstance(other, (ABCSeries, ABCDataFrame)) for other in inputs):

923 return NotImplemented

924

925 result = arraylike.maybe_dispatch_ufunc_to_dunder_op(

926 self, ufunc, method, *inputs, **kwargs

927 )

928 if result is not NotImplemented:

929 return result

930

931 if "out" in kwargs:

932 # e.g. test_dti_isub_tdi

933 return arraylike.dispatch_ufunc_with_out(

934 self, ufunc, method, *inputs, **kwargs

935 )

936

937 if method == "reduce":

938 result = arraylike.dispatch_reduction_ufunc(

939 self, ufunc, method, *inputs, **kwargs

940 )

941 if result is not NotImplemented:

942 return result

943

944 new_inputs = [x if x is not self else x._values for x in inputs]

945 result = getattr(ufunc, method)(*new_inputs, **kwargs)

946 if ufunc.nout == 2:

947 # i.e. np.divmod, np.modf, np.frexp

948 return tuple(self.__array_wrap__(x) for x in result)

949 elif method == "reduce":

950 result = lib.item_from_zerodim(result)

951 return result

952

953 if result.dtype == np.float16:

954 result = result.astype(np.float32)

955

956 return self.__array_wrap__(result)

957

958 @final

959 def __array_wrap__(self, result, context=None, return_scalar=False):

960 """

961 Gets called after a ufunc and other functions e.g. np.split.

962 """

963 result = lib.item_from_zerodim(result)

964 if (not isinstance(result, Index) and is_bool_dtype(result.dtype)) or np.ndim(

965 result

966 ) > 1:

967 # exclude Index to avoid warning from is_bool_dtype deprecation;

968 # in the Index case it doesn't matter which path we go down.

969 # reached in plotting tests with e.g. np.nonzero(index)

970 return result

971

972 return Index(result, name=self.name)

973

974 @cache_readonly

975 def dtype(self) -> DtypeObj:

976 """

977 Return the dtype object of the underlying data.

978

979 Examples

980 --------

981 >>> idx = pd.Index([1, 2, 3])

982 >>> idx

983 Index([1, 2, 3], dtype='int64')

984 >>> idx.dtype

985 dtype('int64')

986 """

987 return self._data.dtype

988

989 @final

990 def ravel(self, order: str_t = "C") -> Self:

991 """

992 Return a view on self.

993

994 Returns

995 -------

996 Index

997

998 See Also

999 --------

1000 numpy.ndarray.ravel : Return a flattened array.

1001

1002 Examples

1003 --------

1004 >>> s = pd.Series([1, 2, 3], index=['a', 'b', 'c'])

1005 >>> s.index.ravel()

1006 Index(['a', 'b', 'c'], dtype='object')

1007 """

1008 return self[:]

1009

1010 def view(self, cls=None):

1011 # we need to see if we are subclassing an

1012 # index type here

1013 if cls is not None and not hasattr(cls, "_typ"):

1014 dtype = cls

1015 if isinstance(cls, str):

1016 dtype = pandas_dtype(cls)

1017

1018 if needs_i8_conversion(dtype):

1019 idx_cls = self._dtype_to_subclass(dtype)

1020 arr = self.array.view(dtype)

1021 if isinstance(arr, ExtensionArray):

1022 # here we exclude non-supported dt64/td64 dtypes

1023 return idx_cls._simple_new(

1024 arr, name=self.name, refs=self._references

1025 )

1026 return arr

1027

1028 result = self._data.view(cls)

1029 else:

1030 if cls is not None:

1031 warnings.warn(

1032 # GH#55709

1033 f"Passing a type in {type(self).__name__}.view is deprecated "

1034 "and will raise in a future version. "

1035 "Call view without any argument to retain the old behavior.",

1036 FutureWarning,

1037 stacklevel=find_stack_level(),

1038 )

1039

1040 result = self._view()

1041 if isinstance(result, Index):

1042 result._id = self._id

1043 return result

1044

1045 def astype(self, dtype, copy: bool = True):

1046 """

1047 Create an Index with values cast to dtypes.

1048

1049 The class of a new Index is determined by dtype. When conversion is

1050 impossible, a TypeError exception is raised.

1051

1052 Parameters

1053 ----------

1054 dtype : numpy dtype or pandas type

1055 Note that any signed integer `dtype` is treated as ``'int64'``,

1056 and any unsigned integer `dtype` is treated as ``'uint64'``,

1057 regardless of the size.

1058 copy : bool, default True

1059 By default, astype always returns a newly allocated object.

1060 If copy is set to False and internal requirements on dtype are

1061 satisfied, the original data is used to create a new Index

1062 or the original Index is returned.

1063

1064 Returns

1065 -------

1066 Index

1067 Index with values cast to specified dtype.

1068

1069 Examples

1070 --------

1071 >>> idx = pd.Index([1, 2, 3])

1072 >>> idx

1073 Index([1, 2, 3], dtype='int64')

1074 >>> idx.astype('float')

1075 Index([1.0, 2.0, 3.0], dtype='float64')

1076 """

1077 if dtype is not None:

1078 dtype = pandas_dtype(dtype)

1079

1080 if self.dtype == dtype:

1081 # Ensure that self.astype(self.dtype) is self

1082 return self.copy() if copy else self

1083

1084 values = self._data

1085 if isinstance(values, ExtensionArray):

1086 with rewrite_exception(type(values).__name__, type(self).__name__):

1087 new_values = values.astype(dtype, copy=copy)

1088

1089 elif isinstance(dtype, ExtensionDtype):

1090 cls = dtype.construct_array_type()

1091 # Note: for RangeIndex and CategoricalDtype self vs self._values

1092 # behaves differently here.

1093 new_values = cls._from_sequence(self, dtype=dtype, copy=copy)

1094

1095 else:

1096 # GH#13149 specifically use astype_array instead of astype

1097 new_values = astype_array(values, dtype=dtype, copy=copy)

1098

1099 # pass copy=False because any copying will be done in the astype above

1100 result = Index(new_values, name=self.name, dtype=new_values.dtype, copy=False)

1101 if (

1102 not copy

1103 and self._references is not None

1104 and astype_is_view(self.dtype, dtype)

1105 ):

1106 result._references = self._references

1107 result._references.add_index_reference(result)

1108 return result

1109

1110 _index_shared_docs[

1111 "take"

1112 ] = """

1113 Return a new %(klass)s of the values selected by the indices.

1114

1115 For internal compatibility with numpy arrays.

1116

1117 Parameters

1118 ----------

1119 indices : array-like

1120 Indices to be taken.

1121 axis : int, optional

1122 The axis over which to select values, always 0.

1123 allow_fill : bool, default True

1124 fill_value : scalar, default None

1125 If allow_fill=True and fill_value is not None, indices specified by

1126 -1 are regarded as NA. If Index doesn't hold NA, raise ValueError.

1127

1128 Returns

1129 -------

1130 Index

1131 An index formed of elements at the given indices. Will be the same

1132 type as self, except for RangeIndex.

1133

1134 See Also

1135 --------

1136 numpy.ndarray.take: Return an array formed from the

1137 elements of a at the given indices.

1138

1139 Examples

1140 --------

1141 >>> idx = pd.Index(['a', 'b', 'c'])

1142 >>> idx.take([2, 2, 1, 2])

1143 Index(['c', 'c', 'b', 'c'], dtype='object')

1144 """

1145

1146 @Appender(_index_shared_docs["take"] % _index_doc_kwargs)

1147 def take(

1148 self,

1149 indices,

1150 axis: Axis = 0,

1151 allow_fill: bool = True,

1152 fill_value=None,

1153 **kwargs,

1154 ) -> Self:

1155 if kwargs:

1156 nv.validate_take((), kwargs)

1157 if is_scalar(indices):

1158 raise TypeError("Expected indices to be array-like")

1159 indices = ensure_platform_int(indices)

1160 allow_fill = self._maybe_disallow_fill(allow_fill, fill_value, indices)

1161

1162 # Note: we discard fill_value and use self._na_value, only relevant

1163 # in the case where allow_fill is True and fill_value is not None

1164 values = self._values

1165 if isinstance(values, np.ndarray):

1166 taken = algos.take(

1167 values, indices, allow_fill=allow_fill, fill_value=self._na_value

1168 )

1169 else:

1170 # algos.take passes 'axis' keyword which not all EAs accept

1171 taken = values.take(

1172 indices, allow_fill=allow_fill, fill_value=self._na_value

1173 )

1174 return self._constructor._simple_new(taken, name=self.name)

1175

1176 @final

1177 def _maybe_disallow_fill(self, allow_fill: bool, fill_value, indices) -> bool:

1178 """

1179 We only use pandas-style take when allow_fill is True _and_

1180 fill_value is not None.

1181 """

1182 if allow_fill and fill_value is not None:

1183 # only fill if we are passing a non-None fill_value

1184 if self._can_hold_na:

1185 if (indices < -1).any():

1186 raise ValueError(

1187 "When allow_fill=True and fill_value is not None, "

1188 "all indices must be >= -1"

1189 )

1190 else:

1191 cls_name = type(self).__name__

1192 raise ValueError(

1193 f"Unable to fill values because {cls_name} cannot contain NA"

1194 )

1195 else:

1196 allow_fill = False

1197 return allow_fill

1198

1199 _index_shared_docs[

1200 "repeat"

1201 ] = """

1202 Repeat elements of a %(klass)s.

1203

1204 Returns a new %(klass)s where each element of the current %(klass)s

1205 is repeated consecutively a given number of times.

1206

1207 Parameters

1208 ----------

1209 repeats : int or array of ints

1210 The number of repetitions for each element. This should be a

1211 non-negative integer. Repeating 0 times will return an empty

1212 %(klass)s.

1213 axis : None

1214 Must be ``None``. Has no effect but is accepted for compatibility

1215 with numpy.

1216

1217 Returns

1218 -------

1219 %(klass)s

1220 Newly created %(klass)s with repeated elements.

1221

1222 See Also

1223 --------

1224 Series.repeat : Equivalent function for Series.

1225 numpy.repeat : Similar method for :class:`numpy.ndarray`.

1226

1227 Examples

1228 --------

1229 >>> idx = pd.Index(['a', 'b', 'c'])

1230 >>> idx

1231 Index(['a', 'b', 'c'], dtype='object')

1232 >>> idx.repeat(2)

1233 Index(['a', 'a', 'b', 'b', 'c', 'c'], dtype='object')

1234 >>> idx.repeat([1, 2, 3])

1235 Index(['a', 'b', 'b', 'c', 'c', 'c'], dtype='object')

1236 """

1237

1238 @Appender(_index_shared_docs["repeat"] % _index_doc_kwargs)

1239 def repeat(self, repeats, axis: None = None) -> Self:

1240 repeats = ensure_platform_int(repeats)

1241 nv.validate_repeat((), {"axis": axis})

1242 res_values = self._values.repeat(repeats)

1243

1244 # _constructor so RangeIndex-> Index with an int64 dtype

1245 return self._constructor._simple_new(res_values, name=self.name)

1246

1247 # --------------------------------------------------------------------

1248 # Copying Methods

1249

1250 def copy(

1251 self,

1252 name: Hashable | None = None,

1253 deep: bool = False,

1254 ) -> Self:

1255 """

1256 Make a copy of this object.

1257

1258 Name is set on the new object.

1259

1260 Parameters

1261 ----------

1262 name : Label, optional

1263 Set name for new object.

1264 deep : bool, default False

1265

1266 Returns

1267 -------

1268 Index

1269 Index refer to new object which is a copy of this object.

1270

1271 Notes

1272 -----

1273 In most cases, there should be no functional difference from using

1274 ``deep``, but if ``deep`` is passed it will attempt to deepcopy.

1275

1276 Examples

1277 --------

1278 >>> idx = pd.Index(['a', 'b', 'c'])

1279 >>> new_idx = idx.copy()

1280 >>> idx is new_idx

1281 False

1282 """

1283

1284 name = self._validate_names(name=name, deep=deep)[0]

1285 if deep:

1286 new_data = self._data.copy()

1287 new_index = type(self)._simple_new(new_data, name=name)

1288 else:

1289 new_index = self._rename(name=name)

1290 return new_index

1291

1292 @final

1293 def __copy__(self, **kwargs) -> Self:

1294 return self.copy(**kwargs)

1295

1296 @final

1297 def __deepcopy__(self, memo=None) -> Self:

1298 """

1299 Parameters

1300 ----------

1301 memo, default None

1302 Standard signature. Unused

1303 """

1304 return self.copy(deep=True)

1305

1306 # --------------------------------------------------------------------

1307 # Rendering Methods

1308

1309 @final

1310 def __repr__(self) -> str_t:

1311 """

1312 Return a string representation for this object.

1313 """

1314 klass_name = type(self).__name__

1315 data = self._format_data()

1316 attrs = self._format_attrs()

1317 attrs_str = [f"{k}={v}" for k, v in attrs]

1318 prepr = ", ".join(attrs_str)

1319

1320 return f"{klass_name}({data}{prepr})"

1321

1322 @property

1323 def _formatter_func(self):

1324 """

1325 Return the formatter function.

1326 """

1327 return default_pprint

1328

1329 @final

1330 def _format_data(self, name=None) -> str_t:

1331 """

1332 Return the formatted data as a unicode string.

1333 """

1334 # do we want to justify (only do so for non-objects)

1335 is_justify = True

1336

1337 if self.inferred_type == "string":

1338 is_justify = False

1339 elif isinstance(self.dtype, CategoricalDtype):

1340 self = cast("CategoricalIndex", self)

1341 if is_object_dtype(self.categories.dtype):

1342 is_justify = False

1343 elif isinstance(self, ABCRangeIndex):

1344 # We will do the relevant formatting via attrs

1345 return ""

1346

1347 return format_object_summary(

1348 self,

1349 self._formatter_func,

1350 is_justify=is_justify,

1351 name=name,

1352 line_break_each_value=self._is_multi,

1353 )

1354

1355 def _format_attrs(self) -> list[tuple[str_t, str_t | int | bool | None]]:

1356 """

1357 Return a list of tuples of the (attr,formatted_value).

1358 """

1359 attrs: list[tuple[str_t, str_t | int | bool | None]] = []

1360

1361 if not self._is_multi:

1362 attrs.append(("dtype", f"'{self.dtype}'"))

1363

1364 if self.name is not None:

1365 attrs.append(("name", default_pprint(self.name)))

1366 elif self._is_multi and any(x is not None for x in self.names):

1367 attrs.append(("names", default_pprint(self.names)))

1368

1369 max_seq_items = get_option("display.max_seq_items") or len(self)

1370 if len(self) > max_seq_items:

1371 attrs.append(("length", len(self)))

1372 return attrs

1373

1374 @final

1375 def _get_level_names(self) -> Hashable | Sequence[Hashable]:

1376 """

1377 Return a name or list of names with None replaced by the level number.

1378 """

1379 if self._is_multi:

1380 return [

1381 level if name is None else name for level, name in enumerate(self.names)

1382 ]

1383 else:

1384 return 0 if self.name is None else self.name

1385

1386 @final

1387 def _mpl_repr(self) -> np.ndarray:

1388 # how to represent ourselves to matplotlib

1389 if isinstance(self.dtype, np.dtype) and self.dtype.kind != "M":

1390 return cast(np.ndarray, self.values)

1391 return self.astype(object, copy=False)._values

1392

1393 def format(

1394 self,

1395 name: bool = False,

1396 formatter: Callable | None = None,

1397 na_rep: str_t = "NaN",

1398 ) -> list[str_t]:

1399 """

1400 Render a string representation of the Index.

1401 """

1402 warnings.warn(

1403 # GH#55413

1404 f"{type(self).__name__}.format is deprecated and will be removed "

1405 "in a future version. Convert using index.astype(str) or "

1406 "index.map(formatter) instead.",

1407 FutureWarning,

1408 stacklevel=find_stack_level(),

1409 )

1410 header = []

1411 if name:

1412 header.append(

1413 pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))

1414 if self.name is not None

1415 else ""

1416 )

1417

1418 if formatter is not None:

1419 return header + list(self.map(formatter))

1420

1421 return self._format_with_header(header=header, na_rep=na_rep)

1422

1423 _default_na_rep = "NaN"

1424

1425 @final

1426 def _format_flat(

1427 self,

1428 *,

1429 include_name: bool,

1430 formatter: Callable | None = None,

1431 ) -> list[str_t]:

1432 """

1433 Render a string representation of the Index.

1434 """

1435 header = []

1436 if include_name:

1437 header.append(

1438 pprint_thing(self.name, escape_chars=("\t", "\r", "\n"))

1439 if self.name is not None

1440 else ""

1441 )

1442

1443 if formatter is not None:

1444 return header + list(self.map(formatter))

1445

1446 return self._format_with_header(header=header, na_rep=self._default_na_rep)

1447

1448 def _format_with_header(self, *, header: list[str_t], na_rep: str_t) -> list[str_t]:

1449 from pandas.io.formats.format import format_array

1450

1451 values = self._values

1452

1453 if (

1454 is_object_dtype(values.dtype)

1455 or is_string_dtype(values.dtype)

1456 or isinstance(self.dtype, (IntervalDtype, CategoricalDtype))

1457 ):

1458 # TODO: why do we need different justify for these cases?

1459 justify = "all"

1460 else:

1461 justify = "left"

1462 # passing leading_space=False breaks test_format_missing,

1463 # test_index_repr_in_frame_with_nan, but would otherwise make

1464 # trim_front unnecessary

1465 formatted = format_array(values, None, justify=justify)

1466 result = trim_front(formatted)

1467 return header + result

1468

1469 def _get_values_for_csv(

1470 self,

1471 *,

1472 na_rep: str_t = "",

1473 decimal: str_t = ".",

1474 float_format=None,

1475 date_format=None,

1476 quoting=None,

1477 ) -> npt.NDArray[np.object_]:

1478 return get_values_for_csv(

1479 self._values,

1480 na_rep=na_rep,

1481 decimal=decimal,

1482 float_format=float_format,

1483 date_format=date_format,

1484 quoting=quoting,

1485 )

1486

1487 def _summary(self, name=None) -> str_t:

1488 """

1489 Return a summarized representation.

1490

1491 Parameters

1492 ----------

1493 name : str

1494 name to use in the summary representation

1495

1496 Returns

1497 -------

1498 String with a summarized representation of the index

1499 """

1500 if len(self) > 0:

1501 head = self[0]

1502 if hasattr(head, "format") and not isinstance(head, str):

1503 head = head.format()

1504 elif needs_i8_conversion(self.dtype):

1505 # e.g. Timedelta, display as values, not quoted

1506 head = self._formatter_func(head).replace("'", "")

1507 tail = self[-1]

1508 if hasattr(tail, "format") and not isinstance(tail, str):

1509 tail = tail.format()

1510 elif needs_i8_conversion(self.dtype):

1511 # e.g. Timedelta, display as values, not quoted

1512 tail = self._formatter_func(tail).replace("'", "")

1513

1514 index_summary = f", {head} to {tail}"

1515 else:

1516 index_summary = ""

1517

1518 if name is None:

1519 name = type(self).__name__

1520 return f"{name}: {len(self)} entries{index_summary}"

1521

1522 # --------------------------------------------------------------------

1523 # Conversion Methods

1524

1525 def to_flat_index(self) -> Self:

1526 """

1527 Identity method.

1528

1529 This is implemented for compatibility with subclass implementations

1530 when chaining.

1531

1532 Returns

1533 -------

1534 pd.Index

1535 Caller.

1536

1537 See Also

1538 --------

1539 MultiIndex.to_flat_index : Subclass implementation.

1540 """

1541 return self

1542

1543 @final

1544 def to_series(self, index=None, name: Hashable | None = None) -> Series:

1545 """

1546 Create a Series with both index and values equal to the index keys.

1547

1548 Useful with map for returning an indexer based on an index.

1549

1550 Parameters

1551 ----------

1552 index : Index, optional

1553 Index of resulting Series. If None, defaults to original index.

1554 name : str, optional

1555 Name of resulting Series. If None, defaults to name of original

1556 index.

1557

1558 Returns

1559 -------

1560 Series

1561 The dtype will be based on the type of the Index values.

1562

1563 See Also

1564 --------

1565 Index.to_frame : Convert an Index to a DataFrame.

1566 Series.to_frame : Convert Series to DataFrame.

1567

1568 Examples

1569 --------

1570 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')

1571

1572 By default, the original index and original name is reused.

1573

1574 >>> idx.to_series()

1575 animal

1576 Ant Ant

1577 Bear Bear

1578 Cow Cow

1579 Name: animal, dtype: object

1580

1581 To enforce a new index, specify new labels to ``index``:

1582

1583 >>> idx.to_series(index=[0, 1, 2])

1584 0 Ant

1585 1 Bear

1586 2 Cow

1587 Name: animal, dtype: object

1588

1589 To override the name of the resulting column, specify ``name``:

1590

1591 >>> idx.to_series(name='zoo')

1592 animal

1593 Ant Ant

1594 Bear Bear

1595 Cow Cow

1596 Name: zoo, dtype: object

1597 """

1598 from pandas import Series

1599

1600 if index is None:

1601 index = self._view()

1602 if name is None:

1603 name = self.name

1604

1605 return Series(self._values.copy(), index=index, name=name)

1606

1607 def to_frame(

1608 self, index: bool = True, name: Hashable = lib.no_default

1609 ) -> DataFrame:

1610 """

1611 Create a DataFrame with a column containing the Index.

1612

1613 Parameters

1614 ----------

1615 index : bool, default True

1616 Set the index of the returned DataFrame as the original Index.

1617

1618 name : object, defaults to index.name

1619 The passed name should substitute for the index name (if it has

1620 one).

1621

1622 Returns

1623 -------

1624 DataFrame

1625 DataFrame containing the original Index data.

1626

1627 See Also

1628 --------

1629 Index.to_series : Convert an Index to a Series.

1630 Series.to_frame : Convert Series to DataFrame.

1631

1632 Examples

1633 --------

1634 >>> idx = pd.Index(['Ant', 'Bear', 'Cow'], name='animal')

1635 >>> idx.to_frame()

1636 animal

1637 animal

1638 Ant Ant

1639 Bear Bear

1640 Cow Cow

1641

1642 By default, the original Index is reused. To enforce a new Index:

1643

1644 >>> idx.to_frame(index=False)

1645 animal

1646 0 Ant

1647 1 Bear

1648 2 Cow

1649

1650 To override the name of the resulting column, specify `name`:

1651

1652 >>> idx.to_frame(index=False, name='zoo')

1653 zoo

1654 0 Ant

1655 1 Bear

1656 2 Cow

1657 """

1658 from pandas import DataFrame

1659

1660 if name is lib.no_default:

1661 name = self._get_level_names()

1662 result = DataFrame({name: self}, copy=not using_copy_on_write())

1663

1664 if index:

1665 result.index = self

1666 return result

1667

1668 # --------------------------------------------------------------------

1669 # Name-Centric Methods

1670

1671 @property

1672 def name(self) -> Hashable:

1673 """

1674 Return Index or MultiIndex name.

1675

1676 Examples

1677 --------

1678 >>> idx = pd.Index([1, 2, 3], name='x')

1679 >>> idx

1680 Index([1, 2, 3], dtype='int64', name='x')

1681 >>> idx.name

1682 'x'

1683 """

1684 return self._name

1685

1686 @name.setter

1687 def name(self, value: Hashable) -> None:

1688 if self._no_setting_name:

1689 # Used in MultiIndex.levels to avoid silently ignoring name updates.

1690 raise RuntimeError(

1691 "Cannot set name on a level of a MultiIndex. Use "

1692 "'MultiIndex.set_names' instead."

1693 )

1694 maybe_extract_name(value, None, type(self))

1695 self._name = value

1696

1697 @final

1698 def _validate_names(

1699 self, name=None, names=None, deep: bool = False

1700 ) -> list[Hashable]:

1701 """

1702 Handles the quirks of having a singular 'name' parameter for general

1703 Index and plural 'names' parameter for MultiIndex.

1704 """

1705 from copy import deepcopy

1706

1707 if names is not None and name is not None:

1708 raise TypeError("Can only provide one of `names` and `name`")

1709 if names is None and name is None:

1710 new_names = deepcopy(self.names) if deep else self.names

1711 elif names is not None:

1712 if not is_list_like(names):

1713 raise TypeError("Must pass list-like as `names`.")

1714 new_names = names

1715 elif not is_list_like(name):

1716 new_names = [name]

1717 else:

1718 new_names = name

1719

1720 if len(new_names) != len(self.names):

1721 raise ValueError(

1722 f"Length of new names must be {len(self.names)}, got {len(new_names)}"

1723 )

1724

1725 # All items in 'new_names' need to be hashable

1726 validate_all_hashable(*new_names, error_name=f"{type(self).__name__}.name")

1727

1728 return new_names

1729

1730 def _get_default_index_names(

1731 self, names: Hashable | Sequence[Hashable] | None = None, default=None

1732 ) -> list[Hashable]:

1733 """

1734 Get names of index.

1735

1736 Parameters

1737 ----------

1738 names : int, str or 1-dimensional list, default None

1739 Index names to set.

1740 default : str

1741 Default name of index.

1742

1743 Raises

1744 ------

1745 TypeError

1746 if names not str or list-like

1747 """

1748 from pandas.core.indexes.multi import MultiIndex

1749

1750 if names is not None:

1751 if isinstance(names, (int, str)):

1752 names = [names]

1753

1754 if not isinstance(names, list) and names is not None:

1755 raise ValueError("Index names must be str or 1-dimensional list")

1756

1757 if not names:

1758 if isinstance(self, MultiIndex):

1759 names = com.fill_missing_names(self.names)

1760 else:

1761 names = [default] if self.name is None else [self.name]

1762

1763 return names

1764

1765 def _get_names(self) -> FrozenList:

1766 return FrozenList((self.name,))

1767

1768 def _set_names(self, values, *, level=None) -> None:

1769 """

1770 Set new names on index. Each name has to be a hashable type.

1771

1772 Parameters

1773 ----------

1774 values : str or sequence

1775 name(s) to set

1776 level : int, level name, or sequence of int/level names (default None)

1777 If the index is a MultiIndex (hierarchical), level(s) to set (None

1778 for all levels). Otherwise level must be None

1779

1780 Raises

1781 ------

1782 TypeError if each name is not hashable.

1783 """

1784 if not is_list_like(values):

1785 raise ValueError("Names must be a list-like")

1786 if len(values) != 1:

1787 raise ValueError(f"Length of new names must be 1, got {len(values)}")

1788

1789 # GH 20527

1790 # All items in 'name' need to be hashable:

1791 validate_all_hashable(*values, error_name=f"{type(self).__name__}.name")

1792

1793 self._name = values[0]

1794

1795 names = property(fset=_set_names, fget=_get_names)

1796

1797 @overload

1798 def set_names(self, names, *, level=..., inplace: Literal[False] = ...) -> Self:

1799 ...

1800

1801 @overload

1802 def set_names(self, names, *, level=..., inplace: Literal[True]) -> None:

1803 ...

1804

1805 @overload

1806 def set_names(self, names, *, level=..., inplace: bool = ...) -> Self | None:

1807 ...

1808

1809 def set_names(self, names, *, level=None, inplace: bool = False) -> Self | None:

1810 """

1811 Set Index or MultiIndex name.

1812

1813 Able to set new names partially and by level.

1814

1815 Parameters

1816 ----------

1817

1818 names : label or list of label or dict-like for MultiIndex

1819 Name(s) to set.

1820

1821 .. versionchanged:: 1.3.0

1822

1823 level : int, label or list of int or label, optional

1824 If the index is a MultiIndex and names is not dict-like, level(s) to set

1825 (None for all levels). Otherwise level must be None.

1826

1827 .. versionchanged:: 1.3.0

1828

1829 inplace : bool, default False

1830 Modifies the object directly, instead of creating a new Index or

1831 MultiIndex.

1832

1833 Returns

1834 -------

1835 Index or None

1836 The same type as the caller or None if ``inplace=True``.

1837

1838 See Also

1839 --------

1840 Index.rename : Able to set new names without level.

1841

1842 Examples

1843 --------

1844 >>> idx = pd.Index([1, 2, 3, 4])

1845 >>> idx

1846 Index([1, 2, 3, 4], dtype='int64')

1847 >>> idx.set_names('quarter')

1848 Index([1, 2, 3, 4], dtype='int64', name='quarter')

1849

1850 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],

1851 ... [2018, 2019]])

1852 >>> idx

1853 MultiIndex([('python', 2018),

1854 ('python', 2019),

1855 ( 'cobra', 2018),

1856 ( 'cobra', 2019)],

1857 )

1858 >>> idx = idx.set_names(['kind', 'year'])

1859 >>> idx.set_names('species', level=0)

1860 MultiIndex([('python', 2018),

1861 ('python', 2019),

1862 ( 'cobra', 2018),

1863 ( 'cobra', 2019)],

1864 names=['species', 'year'])

1865

1866 When renaming levels with a dict, levels can not be passed.

1867

1868 >>> idx.set_names({'kind': 'snake'})

1869 MultiIndex([('python', 2018),

1870 ('python', 2019),

1871 ( 'cobra', 2018),

1872 ( 'cobra', 2019)],

1873 names=['snake', 'year'])

1874 """

1875 if level is not None and not isinstance(self, ABCMultiIndex):

1876 raise ValueError("Level must be None for non-MultiIndex")

1877

1878 if level is not None and not is_list_like(level) and is_list_like(names):

1879 raise TypeError("Names must be a string when a single level is provided.")

1880

1881 if not is_list_like(names) and level is None and self.nlevels > 1:

1882 raise TypeError("Must pass list-like as `names`.")

1883

1884 if is_dict_like(names) and not isinstance(self, ABCMultiIndex):

1885 raise TypeError("Can only pass dict-like as `names` for MultiIndex.")

1886

1887 if is_dict_like(names) and level is not None:

1888 raise TypeError("Can not pass level for dictlike `names`.")

1889

1890 if isinstance(self, ABCMultiIndex) and is_dict_like(names) and level is None:

1891 # Transform dict to list of new names and corresponding levels

1892 level, names_adjusted = [], []

1893 for i, name in enumerate(self.names):

1894 if name in names.keys():

1895 level.append(i)

1896 names_adjusted.append(names[name])

1897 names = names_adjusted

1898

1899 if not is_list_like(names):

1900 names = [names]

1901 if level is not None and not is_list_like(level):

1902 level = [level]

1903

1904 if inplace:

1905 idx = self

1906 else:

1907 idx = self._view()

1908

1909 idx._set_names(names, level=level)

1910 if not inplace:

1911 return idx

1912 return None

1913

1914 @overload

1915 def rename(self, name, *, inplace: Literal[False] = ...) -> Self:

1916 ...

1917

1918 @overload

1919 def rename(self, name, *, inplace: Literal[True]) -> None:

1920 ...

1921

1922 @deprecate_nonkeyword_arguments(

1923 version="3.0", allowed_args=["self", "name"], name="rename"

1924 )

1925 def rename(self, name, inplace: bool = False) -> Self | None:

1926 """

1927 Alter Index or MultiIndex name.

1928

1929 Able to set new names without level. Defaults to returning new index.

1930 Length of names must match number of levels in MultiIndex.

1931

1932 Parameters

1933 ----------

1934 name : label or list of labels

1935 Name(s) to set.

1936 inplace : bool, default False

1937 Modifies the object directly, instead of creating a new Index or

1938 MultiIndex.

1939

1940 Returns

1941 -------

1942 Index or None

1943 The same type as the caller or None if ``inplace=True``.

1944

1945 See Also

1946 --------

1947 Index.set_names : Able to set new names partially and by level.

1948

1949 Examples

1950 --------

1951 >>> idx = pd.Index(['A', 'C', 'A', 'B'], name='score')

1952 >>> idx.rename('grade')

1953 Index(['A', 'C', 'A', 'B'], dtype='object', name='grade')

1954

1955 >>> idx = pd.MultiIndex.from_product([['python', 'cobra'],

1956 ... [2018, 2019]],

1957 ... names=['kind', 'year'])

1958 >>> idx

1959 MultiIndex([('python', 2018),

1960 ('python', 2019),

1961 ( 'cobra', 2018),

1962 ( 'cobra', 2019)],

1963 names=['kind', 'year'])

1964 >>> idx.rename(['species', 'year'])

1965 MultiIndex([('python', 2018),

1966 ('python', 2019),

1967 ( 'cobra', 2018),

1968 ( 'cobra', 2019)],

1969 names=['species', 'year'])

1970 >>> idx.rename('species')

1971 Traceback (most recent call last):

1972 TypeError: Must pass list-like as `names`.

1973 """

1974 return self.set_names([name], inplace=inplace)

1975

1976 # --------------------------------------------------------------------

1977 # Level-Centric Methods

1978

1979 @property

1980 def nlevels(self) -> int:

1981 """

1982 Number of levels.

1983 """

1984 return 1

1985

1986 def _sort_levels_monotonic(self) -> Self:

1987 """

1988 Compat with MultiIndex.

1989 """

1990 return self

1991

1992 @final

1993 def _validate_index_level(self, level) -> None:

1994 """

1995 Validate index level.

1996

1997 For single-level Index getting level number is a no-op, but some

1998 verification must be done like in MultiIndex.

1999

2000 """

2001 if isinstance(level, int):

2002 if level < 0 and level != -1:

2003 raise IndexError(

2004 "Too many levels: Index has only 1 level, "

2005 f"{level} is not a valid level number"

2006 )

2007 if level > 0:

2008 raise IndexError(

2009 f"Too many levels: Index has only 1 level, not {level + 1}"

2010 )

2011 elif level != self.name:

2012 raise KeyError(

2013 f"Requested level ({level}) does not match index name ({self.name})"

2014 )

2015

2016 def _get_level_number(self, level) -> int:

2017 self._validate_index_level(level)

2018 return 0

2019

2020 def sortlevel(

2021 self,

2022 level=None,

2023 ascending: bool | list[bool] = True,

2024 sort_remaining=None,

2025 na_position: NaPosition = "first",

2026 ):

2027 """

2028 For internal compatibility with the Index API.

2029

2030 Sort the Index. This is for compat with MultiIndex

2031

2032 Parameters

2033 ----------

2034 ascending : bool, default True

2035 False to sort in descending order

2036 na_position : {'first' or 'last'}, default 'first'

2037 Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at

2038 the end.

2039

2040 .. versionadded:: 2.1.0

2041

2042 level, sort_remaining are compat parameters

2043

2044 Returns

2045 -------

2046 Index

2047 """

2048 if not isinstance(ascending, (list, bool)):

2049 raise TypeError(

2050 "ascending must be a single bool value or"

2051 "a list of bool values of length 1"

2052 )

2053

2054 if isinstance(ascending, list):

2055 if len(ascending) != 1:

2056 raise TypeError("ascending must be a list of bool values of length 1")

2057 ascending = ascending[0]

2058

2059 if not isinstance(ascending, bool):

2060 raise TypeError("ascending must be a bool value")

2061

2062 return self.sort_values(

2063 return_indexer=True, ascending=ascending, na_position=na_position

2064 )

2065

2066 def _get_level_values(self, level) -> Index:

2067 """

2068 Return an Index of values for requested level.

2069

2070 This is primarily useful to get an individual level of values from a

2071 MultiIndex, but is provided on Index as well for compatibility.

2072

2073 Parameters

2074 ----------

2075 level : int or str

2076 It is either the integer position or the name of the level.

2077

2078 Returns

2079 -------

2080 Index

2081 Calling object, as there is only one level in the Index.

2082

2083 See Also

2084 --------

2085 MultiIndex.get_level_values : Get values for a level of a MultiIndex.

2086

2087 Notes

2088 -----

2089 For Index, level should be 0, since there are no multiple levels.

2090

2091 Examples

2092 --------

2093 >>> idx = pd.Index(list('abc'))

2094 >>> idx

2095 Index(['a', 'b', 'c'], dtype='object')

2096

2097 Get level values by supplying `level` as integer:

2098

2099 >>> idx.get_level_values(0)

2100 Index(['a', 'b', 'c'], dtype='object')

2101 """

2102 self._validate_index_level(level)

2103 return self

2104

2105 get_level_values = _get_level_values

2106

2107 @final

2108 def droplevel(self, level: IndexLabel = 0):

2109 """

2110 Return index with requested level(s) removed.

2111

2112 If resulting index has only 1 level left, the result will be

2113 of Index type, not MultiIndex. The original index is not modified inplace.

2114

2115 Parameters

2116 ----------

2117 level : int, str, or list-like, default 0

2118 If a string is given, must be the name of a level

2119 If list-like, elements must be names or indexes of levels.

2120

2121 Returns

2122 -------

2123 Index or MultiIndex

2124

2125 Examples

2126 --------

2127 >>> mi = pd.MultiIndex.from_arrays(

2128 ... [[1, 2], [3, 4], [5, 6]], names=['x', 'y', 'z'])

2129 >>> mi

2130 MultiIndex([(1, 3, 5),

2131 (2, 4, 6)],

2132 names=['x', 'y', 'z'])

2133

2134 >>> mi.droplevel()

2135 MultiIndex([(3, 5),

2136 (4, 6)],

2137 names=['y', 'z'])

2138

2139 >>> mi.droplevel(2)

2140 MultiIndex([(1, 3),

2141 (2, 4)],

2142 names=['x', 'y'])

2143

2144 >>> mi.droplevel('z')

2145 MultiIndex([(1, 3),

2146 (2, 4)],

2147 names=['x', 'y'])

2148

2149 >>> mi.droplevel(['x', 'y'])

2150 Index([5, 6], dtype='int64', name='z')

2151 """

2152 if not isinstance(level, (tuple, list)):

2153 level = [level]

2154

2155 levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]

2156

2157 return self._drop_level_numbers(levnums)

2158

2159 @final

2160 def _drop_level_numbers(self, levnums: list[int]):

2161 """

2162 Drop MultiIndex levels by level _number_, not name.

2163 """

2164

2165 if not levnums and not isinstance(self, ABCMultiIndex):

2166 return self

2167 if len(levnums) >= self.nlevels:

2168 raise ValueError(

2169 f"Cannot remove {len(levnums)} levels from an index with "

2170 f"{self.nlevels} levels: at least one level must be left."

2171 )

2172 # The two checks above guarantee that here self is a MultiIndex

2173 self = cast("MultiIndex", self)

2174

2175 new_levels = list(self.levels)

2176 new_codes = list(self.codes)

2177 new_names = list(self.names)

2178

2179 for i in levnums:

2180 new_levels.pop(i)

2181 new_codes.pop(i)

2182 new_names.pop(i)

2183

2184 if len(new_levels) == 1:

2185 lev = new_levels[0]

2186

2187 if len(lev) == 0:

2188 # If lev is empty, lev.take will fail GH#42055

2189 if len(new_codes[0]) == 0:

2190 # GH#45230 preserve RangeIndex here

2191 # see test_reset_index_empty_rangeindex

2192 result = lev[:0]

2193 else:

2194 res_values = algos.take(lev._values, new_codes[0], allow_fill=True)

2195 # _constructor instead of type(lev) for RangeIndex compat GH#35230

2196 result = lev._constructor._simple_new(res_values, name=new_names[0])

2197 else:

2198 # set nan if needed

2199 mask = new_codes[0] == -1

2200 result = new_levels[0].take(new_codes[0])

2201 if mask.any():

2202 result = result.putmask(mask, np.nan)

2203

2204 result._name = new_names[0]

2205

2206 return result

2207 else:

2208 from pandas.core.indexes.multi import MultiIndex

2209

2210 return MultiIndex(

2211 levels=new_levels,

2212 codes=new_codes,

2213 names=new_names,

2214 verify_integrity=False,

2215 )

2216

2217 # --------------------------------------------------------------------

2218 # Introspection Methods

2219

2220 @cache_readonly

2221 @final

2222 def _can_hold_na(self) -> bool:

2223 if isinstance(self.dtype, ExtensionDtype):

2224 return self.dtype._can_hold_na

2225 if self.dtype.kind in "iub":

2226 return False

2227 return True

2228

2229 @property

2230 def is_monotonic_increasing(self) -> bool:

2231 """

2232 Return a boolean if the values are equal or increasing.

2233

2234 Returns

2235 -------

2236 bool

2237

2238 See Also

2239 --------

2240 Index.is_monotonic_decreasing : Check if the values are equal or decreasing.

2241

2242 Examples

2243 --------

2244 >>> pd.Index([1, 2, 3]).is_monotonic_increasing

2245 True

2246 >>> pd.Index([1, 2, 2]).is_monotonic_increasing

2247 True

2248 >>> pd.Index([1, 3, 2]).is_monotonic_increasing

2249 False

2250 """

2251 return self._engine.is_monotonic_increasing

2252

2253 @property

2254 def is_monotonic_decreasing(self) -> bool:

2255 """

2256 Return a boolean if the values are equal or decreasing.

2257

2258 Returns

2259 -------

2260 bool

2261

2262 See Also

2263 --------

2264 Index.is_monotonic_increasing : Check if the values are equal or increasing.

2265

2266 Examples

2267 --------

2268 >>> pd.Index([3, 2, 1]).is_monotonic_decreasing

2269 True

2270 >>> pd.Index([3, 2, 2]).is_monotonic_decreasing

2271 True

2272 >>> pd.Index([3, 1, 2]).is_monotonic_decreasing

2273 False

2274 """

2275 return self._engine.is_monotonic_decreasing

2276

2277 @final

2278 @property

2279 def _is_strictly_monotonic_increasing(self) -> bool:

2280 """

2281 Return if the index is strictly monotonic increasing

2282 (only increasing) values.

2283

2284 Examples

2285 --------

2286 >>> Index([1, 2, 3])._is_strictly_monotonic_increasing

2287 True

2288 >>> Index([1, 2, 2])._is_strictly_monotonic_increasing

2289 False

2290 >>> Index([1, 3, 2])._is_strictly_monotonic_increasing

2291 False

2292 """

2293 return self.is_unique and self.is_monotonic_increasing

2294

2295 @final

2296 @property

2297 def _is_strictly_monotonic_decreasing(self) -> bool:

2298 """

2299 Return if the index is strictly monotonic decreasing

2300 (only decreasing) values.

2301

2302 Examples

2303 --------

2304 >>> Index([3, 2, 1])._is_strictly_monotonic_decreasing

2305 True

2306 >>> Index([3, 2, 2])._is_strictly_monotonic_decreasing

2307 False

2308 >>> Index([3, 1, 2])._is_strictly_monotonic_decreasing

2309 False

2310 """

2311 return self.is_unique and self.is_monotonic_decreasing

2312

2313 @cache_readonly

2314 def is_unique(self) -> bool:

2315 """

2316 Return if the index has unique values.

2317

2318 Returns

2319 -------

2320 bool

2321

2322 See Also

2323 --------

2324 Index.has_duplicates : Inverse method that checks if it has duplicate values.

2325

2326 Examples

2327 --------

2328 >>> idx = pd.Index([1, 5, 7, 7])

2329 >>> idx.is_unique

2330 False

2331

2332 >>> idx = pd.Index([1, 5, 7])

2333 >>> idx.is_unique

2334 True

2335

2336 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",

2337 ... "Watermelon"]).astype("category")

2338 >>> idx.is_unique

2339 False

2340

2341 >>> idx = pd.Index(["Orange", "Apple",

2342 ... "Watermelon"]).astype("category")

2343 >>> idx.is_unique

2344 True

2345 """

2346 return self._engine.is_unique

2347

2348 @final

2349 @property

2350 def has_duplicates(self) -> bool:

2351 """

2352 Check if the Index has duplicate values.

2353

2354 Returns

2355 -------

2356 bool

2357 Whether or not the Index has duplicate values.

2358

2359 See Also

2360 --------

2361 Index.is_unique : Inverse method that checks if it has unique values.

2362

2363 Examples

2364 --------

2365 >>> idx = pd.Index([1, 5, 7, 7])

2366 >>> idx.has_duplicates

2367 True

2368

2369 >>> idx = pd.Index([1, 5, 7])

2370 >>> idx.has_duplicates

2371 False

2372

2373 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",

2374 ... "Watermelon"]).astype("category")

2375 >>> idx.has_duplicates

2376 True

2377

2378 >>> idx = pd.Index(["Orange", "Apple",

2379 ... "Watermelon"]).astype("category")

2380 >>> idx.has_duplicates

2381 False

2382 """

2383 return not self.is_unique

2384

2385 @final

2386 def is_boolean(self) -> bool:

2387 """

2388 Check if the Index only consists of booleans.

2389

2390 .. deprecated:: 2.0.0

2391 Use `pandas.api.types.is_bool_dtype` instead.

2392

2393 Returns

2394 -------

2395 bool

2396 Whether or not the Index only consists of booleans.

2397

2398 See Also

2399 --------

2400 is_integer : Check if the Index only consists of integers (deprecated).

2401 is_floating : Check if the Index is a floating type (deprecated).

2402 is_numeric : Check if the Index only consists of numeric data (deprecated).

2403 is_object : Check if the Index is of the object dtype (deprecated).

2404 is_categorical : Check if the Index holds categorical data.

2405 is_interval : Check if the Index holds Interval objects (deprecated).

2406

2407 Examples

2408 --------

2409 >>> idx = pd.Index([True, False, True])

2410 >>> idx.is_boolean() # doctest: +SKIP

2411 True

2412

2413 >>> idx = pd.Index(["True", "False", "True"])

2414 >>> idx.is_boolean() # doctest: +SKIP

2415 False

2416

2417 >>> idx = pd.Index([True, False, "True"])

2418 >>> idx.is_boolean() # doctest: +SKIP

2419 False

2420 """

2421 warnings.warn(

2422 f"{type(self).__name__}.is_boolean is deprecated. "

2423 "Use pandas.api.types.is_bool_type instead.",

2424 FutureWarning,

2425 stacklevel=find_stack_level(),

2426 )

2427 return self.inferred_type in ["boolean"]

2428

2429 @final

2430 def is_integer(self) -> bool:

2431 """

2432 Check if the Index only consists of integers.

2433

2434 .. deprecated:: 2.0.0

2435 Use `pandas.api.types.is_integer_dtype` instead.

2436

2437 Returns

2438 -------

2439 bool

2440 Whether or not the Index only consists of integers.

2441

2442 See Also

2443 --------

2444 is_boolean : Check if the Index only consists of booleans (deprecated).

2445 is_floating : Check if the Index is a floating type (deprecated).

2446 is_numeric : Check if the Index only consists of numeric data (deprecated).

2447 is_object : Check if the Index is of the object dtype. (deprecated).

2448 is_categorical : Check if the Index holds categorical data (deprecated).

2449 is_interval : Check if the Index holds Interval objects (deprecated).

2450

2451 Examples

2452 --------

2453 >>> idx = pd.Index([1, 2, 3, 4])

2454 >>> idx.is_integer() # doctest: +SKIP

2455 True

2456

2457 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])

2458 >>> idx.is_integer() # doctest: +SKIP

2459 False

2460

2461 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"])

2462 >>> idx.is_integer() # doctest: +SKIP

2463 False

2464 """

2465 warnings.warn(

2466 f"{type(self).__name__}.is_integer is deprecated. "

2467 "Use pandas.api.types.is_integer_dtype instead.",

2468 FutureWarning,

2469 stacklevel=find_stack_level(),

2470 )

2471 return self.inferred_type in ["integer"]

2472

2473 @final

2474 def is_floating(self) -> bool:

2475 """

2476 Check if the Index is a floating type.

2477

2478 .. deprecated:: 2.0.0

2479 Use `pandas.api.types.is_float_dtype` instead

2480

2481 The Index may consist of only floats, NaNs, or a mix of floats,

2482 integers, or NaNs.

2483

2484 Returns

2485 -------

2486 bool

2487 Whether or not the Index only consists of only consists of floats, NaNs, or

2488 a mix of floats, integers, or NaNs.

2489

2490 See Also

2491 --------

2492 is_boolean : Check if the Index only consists of booleans (deprecated).

2493 is_integer : Check if the Index only consists of integers (deprecated).

2494 is_numeric : Check if the Index only consists of numeric data (deprecated).

2495 is_object : Check if the Index is of the object dtype. (deprecated).

2496 is_categorical : Check if the Index holds categorical data (deprecated).

2497 is_interval : Check if the Index holds Interval objects (deprecated).

2498

2499 Examples

2500 --------

2501 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])

2502 >>> idx.is_floating() # doctest: +SKIP

2503 True

2504

2505 >>> idx = pd.Index([1.0, 2.0, np.nan, 4.0])

2506 >>> idx.is_floating() # doctest: +SKIP

2507 True

2508

2509 >>> idx = pd.Index([1, 2, 3, 4, np.nan])

2510 >>> idx.is_floating() # doctest: +SKIP

2511 True

2512

2513 >>> idx = pd.Index([1, 2, 3, 4])

2514 >>> idx.is_floating() # doctest: +SKIP

2515 False

2516 """

2517 warnings.warn(

2518 f"{type(self).__name__}.is_floating is deprecated. "

2519 "Use pandas.api.types.is_float_dtype instead.",

2520 FutureWarning,

2521 stacklevel=find_stack_level(),

2522 )

2523 return self.inferred_type in ["floating", "mixed-integer-float", "integer-na"]

2524

2525 @final

2526 def is_numeric(self) -> bool:

2527 """

2528 Check if the Index only consists of numeric data.

2529

2530 .. deprecated:: 2.0.0

2531 Use `pandas.api.types.is_numeric_dtype` instead.

2532

2533 Returns

2534 -------

2535 bool

2536 Whether or not the Index only consists of numeric data.

2537

2538 See Also

2539 --------

2540 is_boolean : Check if the Index only consists of booleans (deprecated).

2541 is_integer : Check if the Index only consists of integers (deprecated).

2542 is_floating : Check if the Index is a floating type (deprecated).

2543 is_object : Check if the Index is of the object dtype. (deprecated).

2544 is_categorical : Check if the Index holds categorical data (deprecated).

2545 is_interval : Check if the Index holds Interval objects (deprecated).

2546

2547 Examples

2548 --------

2549 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])

2550 >>> idx.is_numeric() # doctest: +SKIP

2551 True

2552

2553 >>> idx = pd.Index([1, 2, 3, 4.0])

2554 >>> idx.is_numeric() # doctest: +SKIP

2555 True

2556

2557 >>> idx = pd.Index([1, 2, 3, 4])

2558 >>> idx.is_numeric() # doctest: +SKIP

2559 True

2560

2561 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan])

2562 >>> idx.is_numeric() # doctest: +SKIP

2563 True

2564

2565 >>> idx = pd.Index([1, 2, 3, 4.0, np.nan, "Apple"])

2566 >>> idx.is_numeric() # doctest: +SKIP

2567 False

2568 """

2569 warnings.warn(

2570 f"{type(self).__name__}.is_numeric is deprecated. "

2571 "Use pandas.api.types.is_any_real_numeric_dtype instead",

2572 FutureWarning,

2573 stacklevel=find_stack_level(),

2574 )

2575 return self.inferred_type in ["integer", "floating"]

2576

2577 @final

2578 def is_object(self) -> bool:

2579 """

2580 Check if the Index is of the object dtype.

2581

2582 .. deprecated:: 2.0.0

2583 Use `pandas.api.types.is_object_dtype` instead.

2584

2585 Returns

2586 -------

2587 bool

2588 Whether or not the Index is of the object dtype.

2589

2590 See Also

2591 --------

2592 is_boolean : Check if the Index only consists of booleans (deprecated).

2593 is_integer : Check if the Index only consists of integers (deprecated).

2594 is_floating : Check if the Index is a floating type (deprecated).

2595 is_numeric : Check if the Index only consists of numeric data (deprecated).

2596 is_categorical : Check if the Index holds categorical data (deprecated).

2597 is_interval : Check if the Index holds Interval objects (deprecated).

2598

2599 Examples

2600 --------

2601 >>> idx = pd.Index(["Apple", "Mango", "Watermelon"])

2602 >>> idx.is_object() # doctest: +SKIP

2603 True

2604

2605 >>> idx = pd.Index(["Apple", "Mango", 2.0])

2606 >>> idx.is_object() # doctest: +SKIP

2607 True

2608

2609 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",

2610 ... "Watermelon"]).astype("category")

2611 >>> idx.is_object() # doctest: +SKIP

2612 False

2613

2614 >>> idx = pd.Index([1.0, 2.0, 3.0, 4.0])

2615 >>> idx.is_object() # doctest: +SKIP

2616 False

2617 """

2618 warnings.warn(

2619 f"{type(self).__name__}.is_object is deprecated."

2620 "Use pandas.api.types.is_object_dtype instead",

2621 FutureWarning,

2622 stacklevel=find_stack_level(),

2623 )

2624 return is_object_dtype(self.dtype)

2625

2626 @final

2627 def is_categorical(self) -> bool:

2628 """

2629 Check if the Index holds categorical data.

2630

2631 .. deprecated:: 2.0.0

2632 Use `isinstance(index.dtype, pd.CategoricalDtype)` instead.

2633

2634 Returns

2635 -------

2636 bool

2637 True if the Index is categorical.

2638

2639 See Also

2640 --------

2641 CategoricalIndex : Index for categorical data.

2642 is_boolean : Check if the Index only consists of booleans (deprecated).

2643 is_integer : Check if the Index only consists of integers (deprecated).

2644 is_floating : Check if the Index is a floating type (deprecated).

2645 is_numeric : Check if the Index only consists of numeric data (deprecated).

2646 is_object : Check if the Index is of the object dtype. (deprecated).

2647 is_interval : Check if the Index holds Interval objects (deprecated).

2648

2649 Examples

2650 --------

2651 >>> idx = pd.Index(["Watermelon", "Orange", "Apple",

2652 ... "Watermelon"]).astype("category")

2653 >>> idx.is_categorical() # doctest: +SKIP

2654 True

2655

2656 >>> idx = pd.Index([1, 3, 5, 7])

2657 >>> idx.is_categorical() # doctest: +SKIP

2658 False

2659

2660 >>> s = pd.Series(["Peter", "Victor", "Elisabeth", "Mar"])

2661 >>> s

2662 0 Peter

2663 1 Victor

2664 2 Elisabeth

2665 3 Mar

2666 dtype: object

2667 >>> s.index.is_categorical() # doctest: +SKIP

2668 False

2669 """

2670 warnings.warn(

2671 f"{type(self).__name__}.is_categorical is deprecated."

2672 "Use pandas.api.types.is_categorical_dtype instead",

2673 FutureWarning,

2674 stacklevel=find_stack_level(),

2675 )

2676

2677 return self.inferred_type in ["categorical"]

2678

2679 @final

2680 def is_interval(self) -> bool:

2681 """

2682 Check if the Index holds Interval objects.

2683

2684 .. deprecated:: 2.0.0

2685 Use `isinstance(index.dtype, pd.IntervalDtype)` instead.

2686

2687 Returns

2688 -------

2689 bool

2690 Whether or not the Index holds Interval objects.

2691

2692 See Also

2693 --------

2694 IntervalIndex : Index for Interval objects.

2695 is_boolean : Check if the Index only consists of booleans (deprecated).

2696 is_integer : Check if the Index only consists of integers (deprecated).

2697 is_floating : Check if the Index is a floating type (deprecated).

2698 is_numeric : Check if the Index only consists of numeric data (deprecated).

2699 is_object : Check if the Index is of the object dtype. (deprecated).

2700 is_categorical : Check if the Index holds categorical data (deprecated).

2701

2702 Examples

2703 --------

2704 >>> idx = pd.Index([pd.Interval(left=0, right=5),

2705 ... pd.Interval(left=5, right=10)])

2706 >>> idx.is_interval() # doctest: +SKIP

2707 True

2708

2709 >>> idx = pd.Index([1, 3, 5, 7])

2710 >>> idx.is_interval() # doctest: +SKIP

2711 False

2712 """

2713 warnings.warn(

2714 f"{type(self).__name__}.is_interval is deprecated."

2715 "Use pandas.api.types.is_interval_dtype instead",

2716 FutureWarning,

2717 stacklevel=find_stack_level(),

2718 )

2719 return self.inferred_type in ["interval"]

2720

2721 @final

2722 def _holds_integer(self) -> bool:

2723 """

2724 Whether the type is an integer type.

2725 """

2726 return self.inferred_type in ["integer", "mixed-integer"]

2727

2728 @final

2729 def holds_integer(self) -> bool:

2730 """

2731 Whether the type is an integer type.

2732

2733 .. deprecated:: 2.0.0

2734 Use `pandas.api.types.infer_dtype` instead

2735 """

2736 warnings.warn(

2737 f"{type(self).__name__}.holds_integer is deprecated. "

2738 "Use pandas.api.types.infer_dtype instead.",

2739 FutureWarning,

2740 stacklevel=find_stack_level(),

2741 )

2742 return self._holds_integer()

2743

2744 @cache_readonly

2745 def inferred_type(self) -> str_t:

2746 """

2747 Return a string of the type inferred from the values.

2748

2749 Examples

2750 --------

2751 >>> idx = pd.Index([1, 2, 3])

2752 >>> idx

2753 Index([1, 2, 3], dtype='int64')

2754 >>> idx.inferred_type

2755 'integer'

2756 """

2757 return lib.infer_dtype(self._values, skipna=False)

2758

2759 @cache_readonly

2760 @final

2761 def _is_all_dates(self) -> bool:

2762 """

2763 Whether or not the index values only consist of dates.

2764 """

2765 if needs_i8_conversion(self.dtype):

2766 return True

2767 elif self.dtype != _dtype_obj:

2768 # TODO(ExtensionIndex): 3rd party EA might override?

2769 # Note: this includes IntervalIndex, even when the left/right

2770 # contain datetime-like objects.

2771 return False

2772 elif self._is_multi:

2773 return False

2774 return is_datetime_array(ensure_object(self._values))

2775

2776 @final

2777 @cache_readonly

2778 def _is_multi(self) -> bool:

2779 """

2780 Cached check equivalent to isinstance(self, MultiIndex)

2781 """

2782 return isinstance(self, ABCMultiIndex)

2783

2784 # --------------------------------------------------------------------

2785 # Pickle Methods

2786

2787 def __reduce__(self):

2788 d = {"data": self._data, "name": self.name}

2789 return _new_Index, (type(self), d), None

2790

2791 # --------------------------------------------------------------------

2792 # Null Handling Methods

2793

2794 @cache_readonly

2795 def _na_value(self):

2796 """The expected NA value to use with this index."""

2797 dtype = self.dtype

2798 if isinstance(dtype, np.dtype):

2799 if dtype.kind in "mM":

2800 return NaT

2801 return np.nan

2802 return dtype.na_value

2803

2804 @cache_readonly

2805 def _isnan(self) -> npt.NDArray[np.bool_]:

2806 """

2807 Return if each value is NaN.

2808 """

2809 if self._can_hold_na:

2810 return isna(self)

2811 else:

2812 # shouldn't reach to this condition by checking hasnans beforehand

2813 values = np.empty(len(self), dtype=np.bool_)

2814 values.fill(False)

2815 return values

2816

2817 @cache_readonly

2818 def hasnans(self) -> bool:

2819 """

2820 Return True if there are any NaNs.

2821

2822 Enables various performance speedups.

2823

2824 Returns

2825 -------

2826 bool

2827

2828 Examples

2829 --------

2830 >>> s = pd.Series([1, 2, 3], index=['a', 'b', None])

2831 >>> s

2832 a 1

2833 b 2

2834 None 3

2835 dtype: int64

2836 >>> s.index.hasnans

2837 True

2838 """

2839 if self._can_hold_na:

2840 return bool(self._isnan.any())

2841 else:

2842 return False

2843

2844 @final

2845 def isna(self) -> npt.NDArray[np.bool_]:

2846 """

2847 Detect missing values.

2848

2849 Return a boolean same-sized object indicating if the values are NA.

2850 NA values, such as ``None``, :attr:`numpy.NaN` or :attr:`pd.NaT`, get

2851 mapped to ``True`` values.

2852 Everything else get mapped to ``False`` values. Characters such as

2853 empty strings `''` or :attr:`numpy.inf` are not considered NA values.

2854

2855 Returns

2856 -------

2857 numpy.ndarray[bool]

2858 A boolean array of whether my values are NA.

2859

2860 See Also

2861 --------

2862 Index.notna : Boolean inverse of isna.

2863 Index.dropna : Omit entries with missing values.

2864 isna : Top-level isna.

2865 Series.isna : Detect missing values in Series object.

2866

2867 Examples

2868 --------

2869 Show which entries in a pandas.Index are NA. The result is an

2870 array.

2871

2872 >>> idx = pd.Index([5.2, 6.0, np.nan])

2873 >>> idx

2874 Index([5.2, 6.0, nan], dtype='float64')

2875 >>> idx.isna()

2876 array([False, False, True])

2877

2878 Empty strings are not considered NA values. None is considered an NA

2879 value.

2880

2881 >>> idx = pd.Index(['black', '', 'red', None])

2882 >>> idx

2883 Index(['black', '', 'red', None], dtype='object')

2884 >>> idx.isna()

2885 array([False, False, False, True])

2886

2887 For datetimes, `NaT` (Not a Time) is considered as an NA value.

2888

2889 >>> idx = pd.DatetimeIndex([pd.Timestamp('1940-04-25'),

2890 ... pd.Timestamp(''), None, pd.NaT])

2891 >>> idx

2892 DatetimeIndex(['1940-04-25', 'NaT', 'NaT', 'NaT'],

2893 dtype='datetime64[ns]', freq=None)

2894 >>> idx.isna()

2895 array([False, True, True, True])

2896 """

2897 return self._isnan

2898

2899 isnull = isna

2900

2901 @final

2902 def notna(self) -> npt.NDArray[np.bool_]:

2903 """

2904 Detect existing (non-missing) values.

2905

2906 Return a boolean same-sized object indicating if the values are not NA.

2907 Non-missing values get mapped to ``True``. Characters such as empty

2908 strings ``''`` or :attr:`numpy.inf` are not considered NA values.

2909 NA values, such as None or :attr:`numpy.NaN`, get mapped to ``False``

2910 values.

2911

2912 Returns

2913 -------

2914 numpy.ndarray[bool]

2915 Boolean array to indicate which entries are not NA.

2916

2917 See Also

2918 --------

2919 Index.notnull : Alias of notna.

2920 Index.isna: Inverse of notna.

2921 notna : Top-level notna.

2922

2923 Examples

2924 --------

2925 Show which entries in an Index are not NA. The result is an

2926 array.

2927

2928 >>> idx = pd.Index([5.2, 6.0, np.nan])

2929 >>> idx

2930 Index([5.2, 6.0, nan], dtype='float64')

2931 >>> idx.notna()

2932 array([ True, True, False])

2933

2934 Empty strings are not considered NA values. None is considered a NA

2935 value.

2936

2937 >>> idx = pd.Index(['black', '', 'red', None])

2938 >>> idx

2939 Index(['black', '', 'red', None], dtype='object')

2940 >>> idx.notna()

2941 array([ True, True, True, False])

2942 """

2943 return ~self.isna()

2944

2945 notnull = notna

2946

2947 def fillna(self, value=None, downcast=lib.no_default):

2948 """

2949 Fill NA/NaN values with the specified value.

2950

2951 Parameters

2952 ----------

2953 value : scalar

2954 Scalar value to use to fill holes (e.g. 0).

2955 This value cannot be a list-likes.

2956 downcast : dict, default is None

2957 A dict of item->dtype of what to downcast if possible,

2958 or the string 'infer' which will try to downcast to an appropriate

2959 equal type (e.g. float64 to int64 if possible).

2960

2961 .. deprecated:: 2.1.0

2962

2963 Returns

2964 -------

2965 Index

2966

2967 See Also

2968 --------

2969 DataFrame.fillna : Fill NaN values of a DataFrame.

2970 Series.fillna : Fill NaN Values of a Series.

2971

2972 Examples

2973 --------

2974 >>> idx = pd.Index([np.nan, np.nan, 3])

2975 >>> idx.fillna(0)

2976 Index([0.0, 0.0, 3.0], dtype='float64')

2977 """

2978 if not is_scalar(value):

2979 raise TypeError(f"'value' must be a scalar, passed: {type(value).__name__}")

2980 if downcast is not lib.no_default:

2981 warnings.warn(

2982 f"The 'downcast' keyword in {type(self).__name__}.fillna is "

2983 "deprecated and will be removed in a future version. "

2984 "It was previously silently ignored.",

2985 FutureWarning,

2986 stacklevel=find_stack_level(),

2987 )

2988 else:

2989 downcast = None

2990

2991 if self.hasnans:

2992 result = self.putmask(self._isnan, value)

2993 if downcast is None:

2994 # no need to care metadata other than name

2995 # because it can't have freq if it has NaTs

2996 # _with_infer needed for test_fillna_categorical

2997 return Index._with_infer(result, name=self.name)

2998 raise NotImplementedError(

2999 f"{type(self).__name__}.fillna does not support 'downcast' "

3000 "argument values other than 'None'."

3001 )

3002 return self._view()

3003

3004 def dropna(self, how: AnyAll = "any") -> Self:

3005 """

3006 Return Index without NA/NaN values.

3007

3008 Parameters

3009 ----------

3010 how : {'any', 'all'}, default 'any'

3011 If the Index is a MultiIndex, drop the value when any or all levels

3012 are NaN.

3013

3014 Returns

3015 -------

3016 Index

3017

3018 Examples

3019 --------

3020 >>> idx = pd.Index([1, np.nan, 3])

3021 >>> idx.dropna()

3022 Index([1.0, 3.0], dtype='float64')

3023 """

3024 if how not in ("any", "all"):

3025 raise ValueError(f"invalid how option: {how}")

3026

3027 if self.hasnans:

3028 res_values = self._values[~self._isnan]

3029 return type(self)._simple_new(res_values, name=self.name)

3030 return self._view()

3031

3032 # --------------------------------------------------------------------

3033 # Uniqueness Methods

3034

3035 def unique(self, level: Hashable | None = None) -> Self:

3036 """

3037 Return unique values in the index.

3038

3039 Unique values are returned in order of appearance, this does NOT sort.

3040

3041 Parameters

3042 ----------

3043 level : int or hashable, optional

3044 Only return values from specified level (for MultiIndex).

3045 If int, gets the level by integer position, else by level name.

3046

3047 Returns

3048 -------

3049 Index

3050

3051 See Also

3052 --------

3053 unique : Numpy array of unique values in that column.

3054 Series.unique : Return unique values of Series object.

3055

3056 Examples

3057 --------

3058 >>> idx = pd.Index([1, 1, 2, 3, 3])

3059 >>> idx.unique()

3060 Index([1, 2, 3], dtype='int64')

3061 """

3062 if level is not None:

3063 self._validate_index_level(level)

3064

3065 if self.is_unique:

3066 return self._view()

3067

3068 result = super().unique()

3069 return self._shallow_copy(result)

3070

3071 def drop_duplicates(self, *, keep: DropKeep = "first") -> Self:

3072 """

3073 Return Index with duplicate values removed.

3074

3075 Parameters

3076 ----------

3077 keep : {'first', 'last', ``False``}, default 'first'

3078 - 'first' : Drop duplicates except for the first occurrence.

3079 - 'last' : Drop duplicates except for the last occurrence.

3080 - ``False`` : Drop all duplicates.

3081

3082 Returns

3083 -------

3084 Index

3085

3086 See Also

3087 --------

3088 Series.drop_duplicates : Equivalent method on Series.

3089 DataFrame.drop_duplicates : Equivalent method on DataFrame.

3090 Index.duplicated : Related method on Index, indicating duplicate

3091 Index values.

3092

3093 Examples

3094 --------

3095 Generate an pandas.Index with duplicate values.

3096

3097 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama', 'hippo'])

3098

3099 The `keep` parameter controls which duplicate values are removed.

3100 The value 'first' keeps the first occurrence for each

3101 set of duplicated entries. The default value of keep is 'first'.

3102

3103 >>> idx.drop_duplicates(keep='first')

3104 Index(['lama', 'cow', 'beetle', 'hippo'], dtype='object')

3105

3106 The value 'last' keeps the last occurrence for each set of duplicated

3107 entries.

3108

3109 >>> idx.drop_duplicates(keep='last')

3110 Index(['cow', 'beetle', 'lama', 'hippo'], dtype='object')

3111

3112 The value ``False`` discards all sets of duplicated entries.

3113

3114 >>> idx.drop_duplicates(keep=False)

3115 Index(['cow', 'beetle', 'hippo'], dtype='object')

3116 """

3117 if self.is_unique:

3118 return self._view()

3119

3120 return super().drop_duplicates(keep=keep)

3121

3122 def duplicated(self, keep: DropKeep = "first") -> npt.NDArray[np.bool_]:

3123 """

3124 Indicate duplicate index values.

3125

3126 Duplicated values are indicated as ``True`` values in the resulting

3127 array. Either all duplicates, all except the first, or all except the

3128 last occurrence of duplicates can be indicated.

3129

3130 Parameters

3131 ----------

3132 keep : {'first', 'last', False}, default 'first'

3133 The value or values in a set of duplicates to mark as missing.

3134

3135 - 'first' : Mark duplicates as ``True`` except for the first

3136 occurrence.

3137 - 'last' : Mark duplicates as ``True`` except for the last

3138 occurrence.

3139 - ``False`` : Mark all duplicates as ``True``.

3140

3141 Returns

3142 -------

3143 np.ndarray[bool]

3144

3145 See Also

3146 --------

3147 Series.duplicated : Equivalent method on pandas.Series.

3148 DataFrame.duplicated : Equivalent method on pandas.DataFrame.

3149 Index.drop_duplicates : Remove duplicate values from Index.

3150

3151 Examples

3152 --------

3153 By default, for each set of duplicated values, the first occurrence is

3154 set to False and all others to True:

3155

3156 >>> idx = pd.Index(['lama', 'cow', 'lama', 'beetle', 'lama'])

3157 >>> idx.duplicated()

3158 array([False, False, True, False, True])

3159

3160 which is equivalent to

3161

3162 >>> idx.duplicated(keep='first')

3163 array([False, False, True, False, True])

3164

3165 By using 'last', the last occurrence of each set of duplicated values

3166 is set on False and all others on True:

3167

3168 >>> idx.duplicated(keep='last')

3169 array([ True, False, True, False, False])

3170

3171 By setting keep on ``False``, all duplicates are True:

3172

3173 >>> idx.duplicated(keep=False)

3174 array([ True, False, True, False, True])

3175 """

3176 if self.is_unique:

3177 # fastpath available bc we are immutable

3178 return np.zeros(len(self), dtype=bool)

3179 return self._duplicated(keep=keep)

3180

3181 # --------------------------------------------------------------------

3182 # Arithmetic & Logical Methods

3183

3184 def __iadd__(self, other):

3185 # alias for __add__

3186 return self + other

3187

3188 @final

3189 def __nonzero__(self) -> NoReturn:

3190 raise ValueError(

3191 f"The truth value of a {type(self).__name__} is ambiguous. "

3192 "Use a.empty, a.bool(), a.item(), a.any() or a.all()."

3193 )

3194

3195 __bool__ = __nonzero__

3196

3197 # --------------------------------------------------------------------

3198 # Set Operation Methods

3199

3200 def _get_reconciled_name_object(self, other):

3201 """

3202 If the result of a set operation will be self,

3203 return self, unless the name changes, in which

3204 case make a shallow copy of self.

3205 """

3206 name = get_op_result_name(self, other)

3207 if self.name is not name:

3208 return self.rename(name)

3209 return self

3210

3211 @final

3212 def _validate_sort_keyword(self, sort):

3213 if sort not in [None, False, True]:

3214 raise ValueError(

3215 "The 'sort' keyword only takes the values of "

3216 f"None, True, or False; {sort} was passed."

3217 )

3218

3219 @final

3220 def _dti_setop_align_tzs(self, other: Index, setop: str_t) -> tuple[Index, Index]:

3221 """

3222 With mismatched timezones, cast both to UTC.

3223 """

3224 # Caller is responsibelf or checking

3225 # `self.dtype != other.dtype`

3226 if (

3227 isinstance(self, ABCDatetimeIndex)

3228 and isinstance(other, ABCDatetimeIndex)

3229 and self.tz is not None

3230 and other.tz is not None

3231 ):

3232 # GH#39328, GH#45357

3233 left = self.tz_convert("UTC")

3234 right = other.tz_convert("UTC")

3235 return left, right

3236 return self, other

3237

3238 @final

3239 def union(self, other, sort=None):

3240 """

3241 Form the union of two Index objects.

3242

3243 If the Index objects are incompatible, both Index objects will be

3244 cast to dtype('object') first.

3245

3246 Parameters

3247 ----------

3248 other : Index or array-like

3249 sort : bool or None, default None

3250 Whether to sort the resulting Index.

3251

3252 * None : Sort the result, except when

3253

3254 1. `self` and `other` are equal.

3255 2. `self` or `other` has length 0.

3256 3. Some values in `self` or `other` cannot be compared.

3257 A RuntimeWarning is issued in this case.

3258

3259 * False : do not sort the result.

3260 * True : Sort the result (which may raise TypeError).

3261

3262 Returns

3263 -------

3264 Index

3265

3266 Examples

3267 --------

3268 Union matching dtypes

3269

3270 >>> idx1 = pd.Index([1, 2, 3, 4])

3271 >>> idx2 = pd.Index([3, 4, 5, 6])

3272 >>> idx1.union(idx2)

3273 Index([1, 2, 3, 4, 5, 6], dtype='int64')

3274

3275 Union mismatched dtypes

3276

3277 >>> idx1 = pd.Index(['a', 'b', 'c', 'd'])

3278 >>> idx2 = pd.Index([1, 2, 3, 4])

3279 >>> idx1.union(idx2)

3280 Index(['a', 'b', 'c', 'd', 1, 2, 3, 4], dtype='object')

3281

3282 MultiIndex case

3283

3284 >>> idx1 = pd.MultiIndex.from_arrays(

3285 ... [[1, 1, 2, 2], ["Red", "Blue", "Red", "Blue"]]

3286 ... )

3287 >>> idx1

3288 MultiIndex([(1, 'Red'),

3289 (1, 'Blue'),

3290 (2, 'Red'),

3291 (2, 'Blue')],

3292 )

3293 >>> idx2 = pd.MultiIndex.from_arrays(

3294 ... [[3, 3, 2, 2], ["Red", "Green", "Red", "Green"]]

3295 ... )

3296 >>> idx2

3297 MultiIndex([(3, 'Red'),

3298 (3, 'Green'),

3299 (2, 'Red'),

3300 (2, 'Green')],

3301 )

3302 >>> idx1.union(idx2)

3303 MultiIndex([(1, 'Blue'),

3304 (1, 'Red'),

3305 (2, 'Blue'),

3306 (2, 'Green'),

3307 (2, 'Red'),

3308 (3, 'Green'),

3309 (3, 'Red')],

3310 )

3311 >>> idx1.union(idx2, sort=False)

3312 MultiIndex([(1, 'Red'),

3313 (1, 'Blue'),

3314 (2, 'Red'),

3315 (2, 'Blue'),

3316 (3, 'Red'),

3317 (3, 'Green'),

3318 (2, 'Green')],

3319 )

3320 """

3321 self._validate_sort_keyword(sort)

3322 self._assert_can_do_setop(other)

3323 other, result_name = self._convert_can_do_setop(other)

3324

3325 if self.dtype != other.dtype:

3326 if (

3327 isinstance(self, ABCMultiIndex)

3328 and not is_object_dtype(_unpack_nested_dtype(other))

3329 and len(other) > 0

3330 ):

3331 raise NotImplementedError(

3332 "Can only union MultiIndex with MultiIndex or Index of tuples, "

3333 "try mi.to_flat_index().union(other) instead."

3334 )

3335 self, other = self._dti_setop_align_tzs(other, "union")

3336

3337 dtype = self._find_common_type_compat(other)

3338 left = self.astype(dtype, copy=False)

3339 right = other.astype(dtype, copy=False)

3340 return left.union(right, sort=sort)

3341

3342 elif not len(other) or self.equals(other):

3343 # NB: whether this (and the `if not len(self)` check below) come before

3344 # or after the dtype equality check above affects the returned dtype

3345 result = self._get_reconciled_name_object(other)

3346 if sort is True:

3347 return result.sort_values()

3348 return result

3349

3350 elif not len(self):

3351 result = other._get_reconciled_name_object(self)

3352 if sort is True:

3353 return result.sort_values()

3354 return result

3355

3356 result = self._union(other, sort=sort)

3357

3358 return self._wrap_setop_result(other, result)

3359

3360 def _union(self, other: Index, sort: bool | None):

3361 """

3362 Specific union logic should go here. In subclasses, union behavior

3363 should be overwritten here rather than in `self.union`.

3364

3365 Parameters

3366 ----------

3367 other : Index or array-like

3368 sort : False or None, default False

3369 Whether to sort the resulting index.

3370

3371 * True : sort the result

3372 * False : do not sort the result.

3373 * None : sort the result, except when `self` and `other` are equal

3374 or when the values cannot be compared.

3375

3376 Returns

3377 -------

3378 Index

3379 """

3380 lvals = self._values

3381 rvals = other._values

3382

3383 if (

3384 sort in (None, True)

3385 and self.is_monotonic_increasing

3386 and other.is_monotonic_increasing

3387 and not (self.has_duplicates and other.has_duplicates)

3388 and self._can_use_libjoin

3389 and other._can_use_libjoin

3390 ):

3391 # Both are monotonic and at least one is unique, so can use outer join

3392 # (actually don't need either unique, but without this restriction

3393 # test_union_same_value_duplicated_in_both fails)

3394 try:

3395 return self._outer_indexer(other)[0]

3396 except (TypeError, IncompatibleFrequency):

3397 # incomparable objects; should only be for object dtype

3398 value_list = list(lvals)

3399

3400 # worth making this faster? a very unusual case

3401 value_set = set(lvals)

3402 value_list.extend([x for x in rvals if x not in value_set])

3403 # If objects are unorderable, we must have object dtype.

3404 return np.array(value_list, dtype=object)

3405

3406 elif not other.is_unique:

3407 # other has duplicates

3408 result_dups = algos.union_with_duplicates(self, other)

3409 return _maybe_try_sort(result_dups, sort)

3410

3411 # The rest of this method is analogous to Index._intersection_via_get_indexer

3412

3413 # Self may have duplicates; other already checked as unique

3414 # find indexes of things in "other" that are not in "self"

3415 if self._index_as_unique:

3416 indexer = self.get_indexer(other)

3417 missing = (indexer == -1).nonzero()[0]

3418 else:

3419 missing = algos.unique1d(self.get_indexer_non_unique(other)[1])

3420

3421 result: Index | MultiIndex | ArrayLike

3422 if self._is_multi:

3423 # Preserve MultiIndex to avoid losing dtypes

3424 result = self.append(other.take(missing))

3425

3426 else:

3427 if len(missing) > 0:

3428 other_diff = rvals.take(missing)

3429 result = concat_compat((lvals, other_diff))

3430 else:

3431 result = lvals

3432

3433 if not self.is_monotonic_increasing or not other.is_monotonic_increasing:

3434 # if both are monotonic then result should already be sorted

3435 result = _maybe_try_sort(result, sort)

3436

3437 return result

3438

3439 @final

3440 def _wrap_setop_result(self, other: Index, result) -> Index:

3441 name = get_op_result_name(self, other)

3442 if isinstance(result, Index):

3443 if result.name != name:

3444 result = result.rename(name)

3445 else:

3446 result = self._shallow_copy(result, name=name)

3447 return result

3448

3449 @final

3450 def intersection(self, other, sort: bool = False):

3451 # default sort keyword is different here from other setops intentionally

3452 # done in GH#25063

3453 """

3454 Form the intersection of two Index objects.

3455

3456 This returns a new Index with elements common to the index and `other`.

3457

3458 Parameters

3459 ----------

3460 other : Index or array-like

3461 sort : True, False or None, default False

3462 Whether to sort the resulting index.

3463

3464 * None : sort the result, except when `self` and `other` are equal

3465 or when the values cannot be compared.

3466 * False : do not sort the result.

3467 * True : Sort the result (which may raise TypeError).

3468

3469 Returns

3470 -------

3471 Index

3472

3473 Examples

3474 --------

3475 >>> idx1 = pd.Index([1, 2, 3, 4])

3476 >>> idx2 = pd.Index([3, 4, 5, 6])

3477 >>> idx1.intersection(idx2)

3478 Index([3, 4], dtype='int64')

3479 """

3480 self._validate_sort_keyword(sort)

3481 self._assert_can_do_setop(other)

3482 other, result_name = self._convert_can_do_setop(other)

3483

3484 if self.dtype != other.dtype:

3485 self, other = self._dti_setop_align_tzs(other, "intersection")

3486

3487 if self.equals(other):

3488 if not self.is_unique:

3489 result = self.unique()._get_reconciled_name_object(other)

3490 else:

3491 result = self._get_reconciled_name_object(other)

3492 if sort is True:

3493 result = result.sort_values()

3494 return result

3495

3496 if len(self) == 0 or len(other) == 0:

3497 # fastpath; we need to be careful about having commutativity

3498

3499 if self._is_multi or other._is_multi:

3500 # _convert_can_do_setop ensures that we have both or neither

3501 # We retain self.levels

3502 return self[:0].rename(result_name)

3503

3504 dtype = self._find_common_type_compat(other)

3505 if self.dtype == dtype:

3506 # Slicing allows us to retain DTI/TDI.freq, RangeIndex

3507

3508 # Note: self[:0] vs other[:0] affects

3509 # 1) which index's `freq` we get in DTI/TDI cases

3510 # This may be a historical artifact, i.e. no documented

3511 # reason for this choice.

3512 # 2) The `step` we get in RangeIndex cases

3513 if len(self) == 0:

3514 return self[:0].rename(result_name)

3515 else:

3516 return other[:0].rename(result_name)

3517

3518 return Index([], dtype=dtype, name=result_name)

3519

3520 elif not self._should_compare(other):

3521 # We can infer that the intersection is empty.

3522 if isinstance(self, ABCMultiIndex):

3523 return self[:0].rename(result_name)

3524 return Index([], name=result_name)

3525

3526 elif self.dtype != other.dtype:

3527 dtype = self._find_common_type_compat(other)

3528 this = self.astype(dtype, copy=False)

3529 other = other.astype(dtype, copy=False)

3530 return this.intersection(other, sort=sort)

3531

3532 result = self._intersection(other, sort=sort)

3533 return self._wrap_intersection_result(other, result)

3534

3535 def _intersection(self, other: Index, sort: bool = False):

3536 """

3537 intersection specialized to the case with matching dtypes.

3538 """

3539 if (

3540 self.is_monotonic_increasing

3541 and other.is_monotonic_increasing

3542 and self._can_use_libjoin

3543 and other._can_use_libjoin

3544 ):

3545 try:

3546 res_indexer, indexer, _ = self._inner_indexer(other)

3547 except TypeError:

3548 # non-comparable; should only be for object dtype

3549 pass

3550 else:

3551 # TODO: algos.unique1d should preserve DTA/TDA

3552 if is_numeric_dtype(self.dtype):

3553 # This is faster, because Index.unique() checks for uniqueness

3554 # before calculating the unique values.

3555 res = algos.unique1d(res_indexer)

3556 else:

3557 result = self.take(indexer)

3558 res = result.drop_duplicates()

3559 return ensure_wrapped_if_datetimelike(res)

3560

3561 res_values = self._intersection_via_get_indexer(other, sort=sort)

3562 res_values = _maybe_try_sort(res_values, sort)

3563 return res_values

3564

3565 def _wrap_intersection_result(self, other, result):

3566 # We will override for MultiIndex to handle empty results

3567 return self._wrap_setop_result(other, result)

3568

3569 @final

3570 def _intersection_via_get_indexer(

3571 self, other: Index | MultiIndex, sort

3572 ) -> ArrayLike | MultiIndex:

3573 """

3574 Find the intersection of two Indexes using get_indexer.

3575

3576 Returns

3577 -------

3578 np.ndarray or ExtensionArray or MultiIndex

3579 The returned array will be unique.

3580 """

3581 left_unique = self.unique()

3582 right_unique = other.unique()

3583

3584 # even though we are unique, we need get_indexer_for for IntervalIndex

3585 indexer = left_unique.get_indexer_for(right_unique)

3586

3587 mask = indexer != -1

3588

3589 taker = indexer.take(mask.nonzero()[0])

3590 if sort is False:

3591 # sort bc we want the elements in the same order they are in self

3592 # unnecessary in the case with sort=None bc we will sort later

3593 taker = np.sort(taker)

3594

3595 result: MultiIndex | ExtensionArray | np.ndarray

3596 if isinstance(left_unique, ABCMultiIndex):

3597 result = left_unique.take(taker)

3598 else:

3599 result = left_unique.take(taker)._values

3600 return result

3601

3602 @final

3603 def difference(self, other, sort=None):

3604 """

3605 Return a new Index with elements of index not in `other`.

3606

3607 This is the set difference of two Index objects.

3608

3609 Parameters

3610 ----------

3611 other : Index or array-like

3612 sort : bool or None, default None

3613 Whether to sort the resulting index. By default, the

3614 values are attempted to be sorted, but any TypeError from

3615 incomparable elements is caught by pandas.

3616

3617 * None : Attempt to sort the result, but catch any TypeErrors

3618 from comparing incomparable elements.

3619 * False : Do not sort the result.

3620 * True : Sort the result (which may raise TypeError).

3621

3622 Returns

3623 -------

3624 Index

3625

3626 Examples

3627 --------

3628 >>> idx1 = pd.Index([2, 1, 3, 4])

3629 >>> idx2 = pd.Index([3, 4, 5, 6])

3630 >>> idx1.difference(idx2)

3631 Index([1, 2], dtype='int64')

3632 >>> idx1.difference(idx2, sort=False)

3633 Index([2, 1], dtype='int64')

3634 """

3635 self._validate_sort_keyword(sort)

3636 self._assert_can_do_setop(other)

3637 other, result_name = self._convert_can_do_setop(other)

3638

3639 # Note: we do NOT call _dti_setop_align_tzs here, as there

3640 # is no requirement that .difference be commutative, so it does

3641 # not cast to object.

3642

3643 if self.equals(other):

3644 # Note: we do not (yet) sort even if sort=None GH#24959

3645 return self[:0].rename(result_name)

3646

3647 if len(other) == 0:

3648 # Note: we do not (yet) sort even if sort=None GH#24959

3649 result = self.unique().rename(result_name)

3650 if sort is True:

3651 return result.sort_values()

3652 return result

3653

3654 if not self._should_compare(other):

3655 # Nothing matches -> difference is everything

3656 result = self.unique().rename(result_name)

3657 if sort is True:

3658 return result.sort_values()

3659 return result

3660

3661 result = self._difference(other, sort=sort)

3662 return self._wrap_difference_result(other, result)

3663

3664 def _difference(self, other, sort):

3665 # overridden by RangeIndex

3666 this = self

3667 if isinstance(self, ABCCategoricalIndex) and self.hasnans and other.hasnans:

3668 this = this.dropna()

3669 other = other.unique()

3670 the_diff = this[other.get_indexer_for(this) == -1]

3671 the_diff = the_diff if this.is_unique else the_diff.unique()

3672 the_diff = _maybe_try_sort(the_diff, sort)

3673 return the_diff

3674

3675 def _wrap_difference_result(self, other, result):

3676 # We will override for MultiIndex to handle empty results

3677 return self._wrap_setop_result(other, result)

3678

3679 def symmetric_difference(self, other, result_name=None, sort=None):

3680 """

3681 Compute the symmetric difference of two Index objects.

3682

3683 Parameters

3684 ----------

3685 other : Index or array-like

3686 result_name : str

3687 sort : bool or None, default None

3688 Whether to sort the resulting index. By default, the

3689 values are attempted to be sorted, but any TypeError from

3690 incomparable elements is caught by pandas.

3691

3692 * None : Attempt to sort the result, but catch any TypeErrors

3693 from comparing incomparable elements.

3694 * False : Do not sort the result.

3695 * True : Sort the result (which may raise TypeError).

3696

3697 Returns

3698 -------

3699 Index

3700

3701 Notes

3702 -----

3703 ``symmetric_difference`` contains elements that appear in either

3704 ``idx1`` or ``idx2`` but not both. Equivalent to the Index created by

3705 ``idx1.difference(idx2) | idx2.difference(idx1)`` with duplicates

3706 dropped.

3707

3708 Examples

3709 --------

3710 >>> idx1 = pd.Index([1, 2, 3, 4])

3711 >>> idx2 = pd.Index([2, 3, 4, 5])

3712 >>> idx1.symmetric_difference(idx2)

3713 Index([1, 5], dtype='int64')

3714 """

3715 self._validate_sort_keyword(sort)

3716 self._assert_can_do_setop(other)

3717 other, result_name_update = self._convert_can_do_setop(other)

3718 if result_name is None:

3719 result_name = result_name_update

3720

3721 if self.dtype != other.dtype:

3722 self, other = self._dti_setop_align_tzs(other, "symmetric_difference")

3723

3724 if not self._should_compare(other):

3725 return self.union(other, sort=sort).rename(result_name)

3726

3727 elif self.dtype != other.dtype:

3728 dtype = self._find_common_type_compat(other)

3729 this = self.astype(dtype, copy=False)

3730 that = other.astype(dtype, copy=False)

3731 return this.symmetric_difference(that, sort=sort).rename(result_name)

3732

3733 this = self.unique()

3734 other = other.unique()

3735 indexer = this.get_indexer_for(other)

3736

3737 # {this} minus {other}

3738 common_indexer = indexer.take((indexer != -1).nonzero()[0])

3739 left_indexer = np.setdiff1d(

3740 np.arange(this.size), common_indexer, assume_unique=True

3741 )

3742 left_diff = this.take(left_indexer)

3743

3744 # {other} minus {this}

3745 right_indexer = (indexer == -1).nonzero()[0]

3746 right_diff = other.take(right_indexer)

3747

3748 res_values = left_diff.append(right_diff)

3749 result = _maybe_try_sort(res_values, sort)

3750

3751 if not self._is_multi:

3752 return Index(result, name=result_name, dtype=res_values.dtype)

3753 else:

3754 left_diff = cast("MultiIndex", left_diff)

3755 if len(result) == 0:

3756 # result might be an Index, if other was an Index

3757 return left_diff.remove_unused_levels().set_names(result_name)

3758 return result.set_names(result_name)

3759

3760 @final

3761 def _assert_can_do_setop(self, other) -> bool:

3762 if not is_list_like(other):

3763 raise TypeError("Input must be Index or array-like")

3764 return True

3765

3766 def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]:

3767 if not isinstance(other, Index):

3768 other = Index(other, name=self.name)

3769 result_name = self.name

3770 else:

3771 result_name = get_op_result_name(self, other)

3772 return other, result_name

3773

3774 # --------------------------------------------------------------------

3775 # Indexing Methods

3776

3777 def get_loc(self, key):

3778 """

3779 Get integer location, slice or boolean mask for requested label.

3780

3781 Parameters

3782 ----------

3783 key : label

3784

3785 Returns

3786 -------

3787 int if unique index, slice if monotonic index, else mask

3788

3789 Examples

3790 --------

3791 >>> unique_index = pd.Index(list('abc'))

3792 >>> unique_index.get_loc('b')

3793 1

3794

3795 >>> monotonic_index = pd.Index(list('abbc'))

3796 >>> monotonic_index.get_loc('b')

3797 slice(1, 3, None)

3798

3799 >>> non_monotonic_index = pd.Index(list('abcb'))

3800 >>> non_monotonic_index.get_loc('b')

3801 array([False, True, False, True])

3802 """

3803 casted_key = self._maybe_cast_indexer(key)

3804 try:

3805 return self._engine.get_loc(casted_key)

3806 except KeyError as err:

3807 if isinstance(casted_key, slice) or (

3808 isinstance(casted_key, abc.Iterable)

3809 and any(isinstance(x, slice) for x in casted_key)

3810 ):

3811 raise InvalidIndexError(key)

3812 raise KeyError(key) from err

3813 except TypeError:

3814 # If we have a listlike key, _check_indexing_error will raise

3815 # InvalidIndexError. Otherwise we fall through and re-raise

3816 # the TypeError.

3817 self._check_indexing_error(key)

3818 raise

3819

3820 @final

3821 def get_indexer(

3822 self,

3823 target,

3824 method: ReindexMethod | None = None,

3825 limit: int | None = None,

3826 tolerance=None,

3827 ) -> npt.NDArray[np.intp]:

3828 """

3829 Compute indexer and mask for new index given the current index.

3830

3831 The indexer should be then used as an input to ndarray.take to align the

3832 current data to the new index.

3833

3834 Parameters

3835 ----------

3836 target : Index

3837 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional

3838 * default: exact matches only.

3839 * pad / ffill: find the PREVIOUS index value if no exact match.

3840 * backfill / bfill: use NEXT index value if no exact match

3841 * nearest: use the NEAREST index value if no exact match. Tied

3842 distances are broken by preferring the larger index value.

3843 limit : int, optional

3844 Maximum number of consecutive labels in ``target`` to match for

3845 inexact matches.

3846 tolerance : optional

3847 Maximum distance between original and new labels for inexact

3848 matches. The values of the index at the matching locations must

3849 satisfy the equation ``abs(index[indexer] - target) <= tolerance``.

3850

3851 Tolerance may be a scalar value, which applies the same tolerance

3852 to all values, or list-like, which applies variable tolerance per

3853 element. List-like includes list, tuple, array, Series, and must be

3854 the same size as the index and its dtype must exactly match the

3855 index's type.

3856

3857 Returns

3858 -------

3859 np.ndarray[np.intp]

3860 Integers from 0 to n - 1 indicating that the index at these

3861 positions matches the corresponding target values. Missing values

3862 in the target are marked by -1.

3863

3864 Notes

3865 -----

3866 Returns -1 for unmatched values, for further explanation see the

3867 example below.

3868

3869 Examples

3870 --------

3871 >>> index = pd.Index(['c', 'a', 'b'])

3872 >>> index.get_indexer(['a', 'b', 'x'])

3873 array([ 1, 2, -1])

3874

3875 Notice that the return value is an array of locations in ``index``

3876 and ``x`` is marked by -1, as it is not in ``index``.

3877 """

3878 method = clean_reindex_fill_method(method)

3879 orig_target = target

3880 target = self._maybe_cast_listlike_indexer(target)

3881

3882 self._check_indexing_method(method, limit, tolerance)

3883

3884 if not self._index_as_unique:

3885 raise InvalidIndexError(self._requires_unique_msg)

3886

3887 if len(target) == 0:

3888 return np.array([], dtype=np.intp)

3889

3890 if not self._should_compare(target) and not self._should_partial_index(target):

3891 # IntervalIndex get special treatment bc numeric scalars can be

3892 # matched to Interval scalars

3893 return self._get_indexer_non_comparable(target, method=method, unique=True)

3894

3895 if isinstance(self.dtype, CategoricalDtype):

3896 # _maybe_cast_listlike_indexer ensures target has our dtype

3897 # (could improve perf by doing _should_compare check earlier?)

3898 assert self.dtype == target.dtype

3899

3900 indexer = self._engine.get_indexer(target.codes)

3901 if self.hasnans and target.hasnans:

3902 # After _maybe_cast_listlike_indexer, target elements which do not

3903 # belong to some category are changed to NaNs

3904 # Mask to track actual NaN values compared to inserted NaN values

3905 # GH#45361

3906 target_nans = isna(orig_target)

3907 loc = self.get_loc(np.nan)

3908 mask = target.isna()

3909 indexer[target_nans] = loc

3910 indexer[mask & ~target_nans] = -1

3911 return indexer

3912

3913 if isinstance(target.dtype, CategoricalDtype):

3914 # potential fastpath

3915 # get an indexer for unique categories then propagate to codes via take_nd

3916 # get_indexer instead of _get_indexer needed for MultiIndex cases

3917 # e.g. test_append_different_columns_types

3918 categories_indexer = self.get_indexer(target.categories)

3919

3920 indexer = algos.take_nd(categories_indexer, target.codes, fill_value=-1)

3921

3922 if (not self._is_multi and self.hasnans) and target.hasnans:

3923 # Exclude MultiIndex because hasnans raises NotImplementedError

3924 # we should only get here if we are unique, so loc is an integer

3925 # GH#41934

3926 loc = self.get_loc(np.nan)

3927 mask = target.isna()

3928 indexer[mask] = loc

3929

3930 return ensure_platform_int(indexer)

3931

3932 pself, ptarget = self._maybe_downcast_for_indexing(target)

3933 if pself is not self or ptarget is not target:

3934 return pself.get_indexer(

3935 ptarget, method=method, limit=limit, tolerance=tolerance

3936 )

3937

3938 if self.dtype == target.dtype and self.equals(target):

3939 # Only call equals if we have same dtype to avoid inference/casting

3940 return np.arange(len(target), dtype=np.intp)

3941

3942 if self.dtype != target.dtype and not self._should_partial_index(target):

3943 # _should_partial_index e.g. IntervalIndex with numeric scalars

3944 # that can be matched to Interval scalars.

3945 dtype = self._find_common_type_compat(target)

3946

3947 this = self.astype(dtype, copy=False)

3948 target = target.astype(dtype, copy=False)

3949 return this._get_indexer(

3950 target, method=method, limit=limit, tolerance=tolerance

3951 )

3952

3953 return self._get_indexer(target, method, limit, tolerance)

3954

3955 def _get_indexer(

3956 self,

3957 target: Index,

3958 method: str_t | None = None,

3959 limit: int | None = None,

3960 tolerance=None,

3961 ) -> npt.NDArray[np.intp]:

3962 if tolerance is not None:

3963 tolerance = self._convert_tolerance(tolerance, target)

3964

3965 if method in ["pad", "backfill"]:

3966 indexer = self._get_fill_indexer(target, method, limit, tolerance)

3967 elif method == "nearest":

3968 indexer = self._get_nearest_indexer(target, limit, tolerance)

3969 else:

3970 if target._is_multi and self._is_multi:

3971 engine = self._engine

3972 # error: Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]"

3973 # has no attribute "_extract_level_codes"

3974 tgt_values = engine._extract_level_codes( # type: ignore[union-attr]

3975 target

3976 )

3977 else:

3978 tgt_values = target._get_engine_target()

3979

3980 indexer = self._engine.get_indexer(tgt_values)

3981

3982 return ensure_platform_int(indexer)

3983

3984 @final

3985 def _should_partial_index(self, target: Index) -> bool:

3986 """

3987 Should we attempt partial-matching indexing?

3988 """

3989 if isinstance(self.dtype, IntervalDtype):

3990 if isinstance(target.dtype, IntervalDtype):

3991 return False

3992 # "Index" has no attribute "left"

3993 return self.left._should_compare(target) # type: ignore[attr-defined]

3994 return False

3995

3996 @final

3997 def _check_indexing_method(

3998 self,

3999 method: str_t | None,

4000 limit: int | None = None,

4001 tolerance=None,

4002 ) -> None:

4003 """

4004 Raise if we have a get_indexer `method` that is not supported or valid.

4005 """

4006 if method not in [None, "bfill", "backfill", "pad", "ffill", "nearest"]:

4007 # in practice the clean_reindex_fill_method call would raise

4008 # before we get here

4009 raise ValueError("Invalid fill method") # pragma: no cover

4010

4011 if self._is_multi:

4012 if method == "nearest":

4013 raise NotImplementedError(

4014 "method='nearest' not implemented yet "

4015 "for MultiIndex; see GitHub issue 9365"

4016 )

4017 if method in ("pad", "backfill"):

4018 if tolerance is not None:

4019 raise NotImplementedError(

4020 "tolerance not implemented yet for MultiIndex"

4021 )

4022

4023 if isinstance(self.dtype, (IntervalDtype, CategoricalDtype)):

4024 # GH#37871 for now this is only for IntervalIndex and CategoricalIndex

4025 if method is not None:

4026 raise NotImplementedError(

4027 f"method {method} not yet implemented for {type(self).__name__}"

4028 )

4029

4030 if method is None:

4031 if tolerance is not None:

4032 raise ValueError(

4033 "tolerance argument only valid if doing pad, "

4034 "backfill or nearest reindexing"

4035 )

4036 if limit is not None:

4037 raise ValueError(

4038 "limit argument only valid if doing pad, "

4039 "backfill or nearest reindexing"

4040 )

4041

4042 def _convert_tolerance(self, tolerance, target: np.ndarray | Index) -> np.ndarray:

4043 # override this method on subclasses

4044 tolerance = np.asarray(tolerance)

4045 if target.size != tolerance.size and tolerance.size > 1:

4046 raise ValueError("list-like tolerance size must match target index size")

4047 elif is_numeric_dtype(self) and not np.issubdtype(tolerance.dtype, np.number):

4048 if tolerance.ndim > 0:

4049 raise ValueError(

4050 f"tolerance argument for {type(self).__name__} with dtype "

4051 f"{self.dtype} must contain numeric elements if it is list type"

4052 )

4053

4054 raise ValueError(

4055 f"tolerance argument for {type(self).__name__} with dtype {self.dtype} "

4056 f"must be numeric if it is a scalar: {repr(tolerance)}"

4057 )

4058 return tolerance

4059

4060 @final

4061 def _get_fill_indexer(

4062 self, target: Index, method: str_t, limit: int | None = None, tolerance=None

4063 ) -> npt.NDArray[np.intp]:

4064 if self._is_multi:

4065 if not (self.is_monotonic_increasing or self.is_monotonic_decreasing):

4066 raise ValueError("index must be monotonic increasing or decreasing")

4067 encoded = self.append(target)._engine.values # type: ignore[union-attr]

4068 self_encoded = Index(encoded[: len(self)])

4069 target_encoded = Index(encoded[len(self) :])

4070 return self_encoded._get_fill_indexer(

4071 target_encoded, method, limit, tolerance

4072 )

4073

4074 if self.is_monotonic_increasing and target.is_monotonic_increasing:

4075 target_values = target._get_engine_target()

4076 own_values = self._get_engine_target()

4077 if not isinstance(target_values, np.ndarray) or not isinstance(

4078 own_values, np.ndarray

4079 ):

4080 raise NotImplementedError

4081

4082 if method == "pad":

4083 indexer = libalgos.pad(own_values, target_values, limit=limit)

4084 else:

4085 # i.e. "backfill"

4086 indexer = libalgos.backfill(own_values, target_values, limit=limit)

4087 else:

4088 indexer = self._get_fill_indexer_searchsorted(target, method, limit)

4089 if tolerance is not None and len(self):

4090 indexer = self._filter_indexer_tolerance(target, indexer, tolerance)

4091 return indexer

4092

4093 @final

4094 def _get_fill_indexer_searchsorted(

4095 self, target: Index, method: str_t, limit: int | None = None

4096 ) -> npt.NDArray[np.intp]:

4097 """

4098 Fallback pad/backfill get_indexer that works for monotonic decreasing

4099 indexes and non-monotonic targets.

4100 """

4101 if limit is not None:

4102 raise ValueError(

4103 f"limit argument for {repr(method)} method only well-defined "

4104 "if index and target are monotonic"

4105 )

4106

4107 side: Literal["left", "right"] = "left" if method == "pad" else "right"

4108

4109 # find exact matches first (this simplifies the algorithm)

4110 indexer = self.get_indexer(target)

4111 nonexact = indexer == -1

4112 indexer[nonexact] = self._searchsorted_monotonic(target[nonexact], side)

4113 if side == "left":

4114 # searchsorted returns "indices into a sorted array such that,

4115 # if the corresponding elements in v were inserted before the

4116 # indices, the order of a would be preserved".

4117 # Thus, we need to subtract 1 to find values to the left.

4118 indexer[nonexact] -= 1

4119 # This also mapped not found values (values of 0 from

4120 # np.searchsorted) to -1, which conveniently is also our

4121 # sentinel for missing values

4122 else:

4123 # Mark indices to the right of the largest value as not found

4124 indexer[indexer == len(self)] = -1

4125 return indexer

4126

4127 @final

4128 def _get_nearest_indexer(

4129 self, target: Index, limit: int | None, tolerance

4130 ) -> npt.NDArray[np.intp]:

4131 """

4132 Get the indexer for the nearest index labels; requires an index with

4133 values that can be subtracted from each other (e.g., not strings or

4134 tuples).

4135 """

4136 if not len(self):

4137 return self._get_fill_indexer(target, "pad")

4138

4139 left_indexer = self.get_indexer(target, "pad", limit=limit)

4140 right_indexer = self.get_indexer(target, "backfill", limit=limit)

4141

4142 left_distances = self._difference_compat(target, left_indexer)

4143 right_distances = self._difference_compat(target, right_indexer)

4144

4145 op = operator.lt if self.is_monotonic_increasing else operator.le

4146 indexer = np.where(

4147 # error: Argument 1&2 has incompatible type "Union[ExtensionArray,

4148 # ndarray[Any, Any]]"; expected "Union[SupportsDunderLE,

4149 # SupportsDunderGE, SupportsDunderGT, SupportsDunderLT]"

4150 op(left_distances, right_distances) # type: ignore[arg-type]

4151 | (right_indexer == -1),

4152 left_indexer,

4153 right_indexer,

4154 )

4155 if tolerance is not None:

4156 indexer = self._filter_indexer_tolerance(target, indexer, tolerance)

4157 return indexer

4158

4159 @final

4160 def _filter_indexer_tolerance(

4161 self,

4162 target: Index,

4163 indexer: npt.NDArray[np.intp],

4164 tolerance,

4165 ) -> npt.NDArray[np.intp]:

4166 distance = self._difference_compat(target, indexer)

4167

4168 return np.where(distance <= tolerance, indexer, -1)

4169

4170 @final

4171 def _difference_compat(

4172 self, target: Index, indexer: npt.NDArray[np.intp]

4173 ) -> ArrayLike:

4174 # Compatibility for PeriodArray, for which __sub__ returns an ndarray[object]

4175 # of DateOffset objects, which do not support __abs__ (and would be slow

4176 # if they did)

4177

4178 if isinstance(self.dtype, PeriodDtype):

4179 # Note: we only get here with matching dtypes

4180 own_values = cast("PeriodArray", self._data)._ndarray

4181 target_values = cast("PeriodArray", target._data)._ndarray

4182 diff = own_values[indexer] - target_values

4183 else:

4184 # error: Unsupported left operand type for - ("ExtensionArray")

4185 diff = self._values[indexer] - target._values # type: ignore[operator]

4186 return abs(diff)

4187

4188 # --------------------------------------------------------------------

4189 # Indexer Conversion Methods

4190

4191 @final

4192 def _validate_positional_slice(self, key: slice) -> None:

4193 """

4194 For positional indexing, a slice must have either int or None

4195 for each of start, stop, and step.

4196 """

4197 self._validate_indexer("positional", key.start, "iloc")

4198 self._validate_indexer("positional", key.stop, "iloc")

4199 self._validate_indexer("positional", key.step, "iloc")

4200

4201 def _convert_slice_indexer(self, key: slice, kind: Literal["loc", "getitem"]):

4202 """

4203 Convert a slice indexer.

4204

4205 By definition, these are labels unless 'iloc' is passed in.

4206 Floats are not allowed as the start, step, or stop of the slice.

4207

4208 Parameters

4209 ----------

4210 key : label of the slice bound

4211 kind : {'loc', 'getitem'}

4212 """

4213

4214 # potentially cast the bounds to integers

4215 start, stop, step = key.start, key.stop, key.step

4216

4217 # figure out if this is a positional indexer

4218 is_index_slice = is_valid_positional_slice(key)

4219

4220 # TODO(GH#50617): once Series.__[gs]etitem__ is removed we should be able

4221 # to simplify this.

4222 if lib.is_np_dtype(self.dtype, "f"):

4223 # We always treat __getitem__ slicing as label-based

4224 # translate to locations

4225 if kind == "getitem" and is_index_slice and not start == stop and step != 0:

4226 # exclude step=0 from the warning because it will raise anyway

4227 # start/stop both None e.g. [:] or [::-1] won't change.

4228 # exclude start==stop since it will be empty either way, or

4229 # will be [:] or [::-1] which won't change

4230 warnings.warn(

4231 # GH#49612

4232 "The behavior of obj[i:j] with a float-dtype index is "

4233 "deprecated. In a future version, this will be treated as "

4234 "positional instead of label-based. For label-based slicing, "

4235 "use obj.loc[i:j] instead",

4236 FutureWarning,

4237 stacklevel=find_stack_level(),

4238 )

4239 return self.slice_indexer(start, stop, step)

4240

4241 if kind == "getitem":

4242 # called from the getitem slicers, validate that we are in fact integers

4243 if is_index_slice:

4244 # In this case the _validate_indexer checks below are redundant

4245 return key

4246 elif self.dtype.kind in "iu":

4247 # Note: these checks are redundant if we know is_index_slice

4248 self._validate_indexer("slice", key.start, "getitem")

4249 self._validate_indexer("slice", key.stop, "getitem")

4250 self._validate_indexer("slice", key.step, "getitem")

4251 return key

4252

4253 # convert the slice to an indexer here; checking that the user didn't

4254 # pass a positional slice to loc

4255 is_positional = is_index_slice and self._should_fallback_to_positional

4256

4257 # if we are mixed and have integers

4258 if is_positional:

4259 try:

4260 # Validate start & stop

4261 if start is not None:

4262 self.get_loc(start)

4263 if stop is not None:

4264 self.get_loc(stop)

4265 is_positional = False

4266 except KeyError:

4267 pass

4268

4269 if com.is_null_slice(key):

4270 # It doesn't matter if we are positional or label based

4271 indexer = key

4272 elif is_positional:

4273 if kind == "loc":

4274 # GH#16121, GH#24612, GH#31810

4275 raise TypeError(

4276 "Slicing a positional slice with .loc is not allowed, "

4277 "Use .loc with labels or .iloc with positions instead.",

4278 )

4279 indexer = key

4280 else:

4281 indexer = self.slice_indexer(start, stop, step)

4282

4283 return indexer

4284

4285 @final

4286 def _raise_invalid_indexer(

4287 self,

4288 form: Literal["slice", "positional"],

4289 key,

4290 reraise: lib.NoDefault | None | Exception = lib.no_default,

4291 ) -> None:

4292 """

4293 Raise consistent invalid indexer message.

4294 """

4295 msg = (

4296 f"cannot do {form} indexing on {type(self).__name__} with these "

4297 f"indexers [{key}] of type {type(key).__name__}"

4298 )

4299 if reraise is not lib.no_default:

4300 raise TypeError(msg) from reraise

4301 raise TypeError(msg)

4302

4303 # --------------------------------------------------------------------

4304 # Reindex Methods

4305

4306 @final

4307 def _validate_can_reindex(self, indexer: np.ndarray) -> None:

4308 """

4309 Check if we are allowing reindexing with this particular indexer.

4310

4311 Parameters

4312 ----------

4313 indexer : an integer ndarray

4314

4315 Raises

4316 ------

4317 ValueError if its a duplicate axis

4318 """

4319 # trying to reindex on an axis with duplicates

4320 if not self._index_as_unique and len(indexer):

4321 raise ValueError("cannot reindex on an axis with duplicate labels")

4322

4323 def reindex(

4324 self,

4325 target,

4326 method: ReindexMethod | None = None,

4327 level=None,

4328 limit: int | None = None,

4329 tolerance: float | None = None,

4330 ) -> tuple[Index, npt.NDArray[np.intp] | None]:

4331 """

4332 Create index with target's values.

4333

4334 Parameters

4335 ----------

4336 target : an iterable

4337 method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional

4338 * default: exact matches only.

4339 * pad / ffill: find the PREVIOUS index value if no exact match.

4340 * backfill / bfill: use NEXT index value if no exact match

4341 * nearest: use the NEAREST index value if no exact match. Tied

4342 distances are broken by preferring the larger index value.

4343 level : int, optional

4344 Level of multiindex.

4345 limit : int, optional

4346 Maximum number of consecutive labels in ``target`` to match for

4347 inexact matches.

4348 tolerance : int or float, optional

4349 Maximum distance between original and new labels for inexact

4350 matches. The values of the index at the matching locations must

4351 satisfy the equation ``abs(index[indexer] - target) <= tolerance``.

4352

4353 Tolerance may be a scalar value, which applies the same tolerance

4354 to all values, or list-like, which applies variable tolerance per

4355 element. List-like includes list, tuple, array, Series, and must be

4356 the same size as the index and its dtype must exactly match the

4357 index's type.

4358

4359 Returns

4360 -------

4361 new_index : pd.Index

4362 Resulting index.

4363 indexer : np.ndarray[np.intp] or None

4364 Indices of output values in original index.

4365

4366 Raises

4367 ------

4368 TypeError

4369 If ``method`` passed along with ``level``.

4370 ValueError

4371 If non-unique multi-index

4372 ValueError

4373 If non-unique index and ``method`` or ``limit`` passed.

4374

4375 See Also

4376 --------

4377 Series.reindex : Conform Series to new index with optional filling logic.

4378 DataFrame.reindex : Conform DataFrame to new index with optional filling logic.

4379

4380 Examples

4381 --------

4382 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor'])

4383 >>> idx

4384 Index(['car', 'bike', 'train', 'tractor'], dtype='object')

4385 >>> idx.reindex(['car', 'bike'])

4386 (Index(['car', 'bike'], dtype='object'), array([0, 1]))

4387 """

4388 # GH6552: preserve names when reindexing to non-named target

4389 # (i.e. neither Index nor Series).

4390 preserve_names = not hasattr(target, "name")

4391

4392 # GH7774: preserve dtype/tz if target is empty and not an Index.

4393 target = ensure_has_len(target) # target may be an iterator

4394

4395 if not isinstance(target, Index) and len(target) == 0:

4396 if level is not None and self._is_multi:

4397 # "Index" has no attribute "levels"; maybe "nlevels"?

4398 idx = self.levels[level] # type: ignore[attr-defined]

4399 else:

4400 idx = self

4401 target = idx[:0]

4402 else:

4403 target = ensure_index(target)

4404

4405 if level is not None and (

4406 isinstance(self, ABCMultiIndex) or isinstance(target, ABCMultiIndex)

4407 ):

4408 if method is not None:

4409 raise TypeError("Fill method not supported if level passed")

4410

4411 # TODO: tests where passing `keep_order=not self._is_multi`

4412 # makes a difference for non-MultiIndex case

4413 target, indexer, _ = self._join_level(

4414 target, level, how="right", keep_order=not self._is_multi

4415 )

4416

4417 else:

4418 if self.equals(target):

4419 indexer = None

4420 else:

4421 if self._index_as_unique:

4422 indexer = self.get_indexer(

4423 target, method=method, limit=limit, tolerance=tolerance

4424 )

4425 elif self._is_multi:

4426 raise ValueError("cannot handle a non-unique multi-index!")

4427 elif not self.is_unique:

4428 # GH#42568

4429 raise ValueError("cannot reindex on an axis with duplicate labels")

4430 else:

4431 indexer, _ = self.get_indexer_non_unique(target)

4432

4433 target = self._wrap_reindex_result(target, indexer, preserve_names)

4434 return target, indexer

4435

4436 def _wrap_reindex_result(self, target, indexer, preserve_names: bool):

4437 target = self._maybe_preserve_names(target, preserve_names)

4438 return target

4439

4440 def _maybe_preserve_names(self, target: Index, preserve_names: bool):

4441 if preserve_names and target.nlevels == 1 and target.name != self.name:

4442 target = target.copy(deep=False)

4443 target.name = self.name

4444 return target

4445

4446 @final

4447 def _reindex_non_unique(

4448 self, target: Index

4449 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp] | None]:

4450 """

4451 Create a new index with target's values (move/add/delete values as

4452 necessary) use with non-unique Index and a possibly non-unique target.

4453

4454 Parameters

4455 ----------

4456 target : an iterable

4457

4458 Returns

4459 -------

4460 new_index : pd.Index

4461 Resulting index.

4462 indexer : np.ndarray[np.intp]

4463 Indices of output values in original index.

4464 new_indexer : np.ndarray[np.intp] or None

4465

4466 """

4467 target = ensure_index(target)

4468 if len(target) == 0:

4469 # GH#13691

4470 return self[:0], np.array([], dtype=np.intp), None

4471

4472 indexer, missing = self.get_indexer_non_unique(target)

4473 check = indexer != -1

4474 new_labels: Index | np.ndarray = self.take(indexer[check])

4475 new_indexer = None

4476

4477 if len(missing):

4478 length = np.arange(len(indexer), dtype=np.intp)

4479

4480 missing = ensure_platform_int(missing)

4481 missing_labels = target.take(missing)

4482 missing_indexer = length[~check]

4483 cur_labels = self.take(indexer[check]).values

4484 cur_indexer = length[check]

4485

4486 # Index constructor below will do inference

4487 new_labels = np.empty((len(indexer),), dtype=object)

4488 new_labels[cur_indexer] = cur_labels

4489 new_labels[missing_indexer] = missing_labels

4490

4491 # GH#38906

4492 if not len(self):

4493 new_indexer = np.arange(0, dtype=np.intp)

4494

4495 # a unique indexer

4496 elif target.is_unique:

4497 # see GH5553, make sure we use the right indexer

4498 new_indexer = np.arange(len(indexer), dtype=np.intp)

4499 new_indexer[cur_indexer] = np.arange(len(cur_labels))

4500 new_indexer[missing_indexer] = -1

4501

4502 # we have a non_unique selector, need to use the original

4503 # indexer here

4504 else:

4505 # need to retake to have the same size as the indexer

4506 indexer[~check] = -1

4507

4508 # reset the new indexer to account for the new size

4509 new_indexer = np.arange(len(self.take(indexer)), dtype=np.intp)

4510 new_indexer[~check] = -1

4511

4512 if not isinstance(self, ABCMultiIndex):

4513 new_index = Index(new_labels, name=self.name)

4514 else:

4515 new_index = type(self).from_tuples(new_labels, names=self.names)

4516 return new_index, indexer, new_indexer

4517

4518 # --------------------------------------------------------------------

4519 # Join Methods

4520

4521 @overload

4522 def join(

4523 self,

4524 other: Index,

4525 *,

4526 how: JoinHow = ...,

4527 level: Level = ...,

4528 return_indexers: Literal[True],

4529 sort: bool = ...,

4530 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4531 ...

4532

4533 @overload

4534 def join(

4535 self,

4536 other: Index,

4537 *,

4538 how: JoinHow = ...,

4539 level: Level = ...,

4540 return_indexers: Literal[False] = ...,

4541 sort: bool = ...,

4542 ) -> Index:

4543 ...

4544

4545 @overload

4546 def join(

4547 self,

4548 other: Index,

4549 *,

4550 how: JoinHow = ...,

4551 level: Level = ...,

4552 return_indexers: bool = ...,

4553 sort: bool = ...,

4554 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4555 ...

4556

4557 @final

4558 @_maybe_return_indexers

4559 def join(

4560 self,

4561 other: Index,

4562 *,

4563 how: JoinHow = "left",

4564 level: Level | None = None,

4565 return_indexers: bool = False,

4566 sort: bool = False,

4567 ) -> Index | tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4568 """

4569 Compute join_index and indexers to conform data structures to the new index.

4570

4571 Parameters

4572 ----------

4573 other : Index

4574 how : {'left', 'right', 'inner', 'outer'}

4575 level : int or level name, default None

4576 return_indexers : bool, default False

4577 sort : bool, default False

4578 Sort the join keys lexicographically in the result Index. If False,

4579 the order of the join keys depends on the join type (how keyword).

4580

4581 Returns

4582 -------

4583 join_index, (left_indexer, right_indexer)

4584

4585 Examples

4586 --------

4587 >>> idx1 = pd.Index([1, 2, 3])

4588 >>> idx2 = pd.Index([4, 5, 6])

4589 >>> idx1.join(idx2, how='outer')

4590 Index([1, 2, 3, 4, 5, 6], dtype='int64')

4591 """

4592 other = ensure_index(other)

4593 sort = sort or how == "outer"

4594

4595 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex):

4596 if (self.tz is None) ^ (other.tz is None):

4597 # Raise instead of casting to object below.

4598 raise TypeError("Cannot join tz-naive with tz-aware DatetimeIndex")

4599

4600 if not self._is_multi and not other._is_multi:

4601 # We have specific handling for MultiIndex below

4602 pself, pother = self._maybe_downcast_for_indexing(other)

4603 if pself is not self or pother is not other:

4604 return pself.join(

4605 pother, how=how, level=level, return_indexers=True, sort=sort

4606 )

4607

4608 # try to figure out the join level

4609 # GH3662

4610 if level is None and (self._is_multi or other._is_multi):

4611 # have the same levels/names so a simple join

4612 if self.names == other.names:

4613 pass

4614 else:

4615 return self._join_multi(other, how=how)

4616

4617 # join on the level

4618 if level is not None and (self._is_multi or other._is_multi):

4619 return self._join_level(other, level, how=how)

4620

4621 if len(self) == 0 or len(other) == 0:

4622 try:

4623 return self._join_empty(other, how, sort)

4624 except TypeError:

4625 # object dtype; non-comparable objects

4626 pass

4627

4628 if self.dtype != other.dtype:

4629 dtype = self._find_common_type_compat(other)

4630 this = self.astype(dtype, copy=False)

4631 other = other.astype(dtype, copy=False)

4632 return this.join(other, how=how, return_indexers=True)

4633 elif (

4634 isinstance(self, ABCCategoricalIndex)

4635 and isinstance(other, ABCCategoricalIndex)

4636 and not self.ordered

4637 and not self.categories.equals(other.categories)

4638 ):

4639 # dtypes are "equal" but categories are in different order

4640 other = Index(other._values.reorder_categories(self.categories))

4641

4642 _validate_join_method(how)

4643

4644 if (

4645 self.is_monotonic_increasing

4646 and other.is_monotonic_increasing

4647 and self._can_use_libjoin

4648 and other._can_use_libjoin

4649 and (self.is_unique or other.is_unique)

4650 ):

4651 try:

4652 return self._join_monotonic(other, how=how)

4653 except TypeError:

4654 # object dtype; non-comparable objects

4655 pass

4656 elif not self.is_unique or not other.is_unique:

4657 return self._join_non_unique(other, how=how, sort=sort)

4658

4659 return self._join_via_get_indexer(other, how, sort)

4660

4661 @final

4662 def _join_empty(

4663 self, other: Index, how: JoinHow, sort: bool

4664 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4665 assert len(self) == 0 or len(other) == 0

4666 _validate_join_method(how)

4667

4668 lidx: np.ndarray | None

4669 ridx: np.ndarray | None

4670

4671 if len(other):

4672 how = cast(JoinHow, {"left": "right", "right": "left"}.get(how, how))

4673 join_index, ridx, lidx = other._join_empty(self, how, sort)

4674 elif how in ["left", "outer"]:

4675 if sort and not self.is_monotonic_increasing:

4676 lidx = self.argsort()

4677 join_index = self.take(lidx)

4678 else:

4679 lidx = None

4680 join_index = self._view()

4681 ridx = np.broadcast_to(np.intp(-1), len(join_index))

4682 else:

4683 join_index = other._view()

4684 lidx = np.array([], dtype=np.intp)

4685 ridx = None

4686 return join_index, lidx, ridx

4687

4688 @final

4689 def _join_via_get_indexer(

4690 self, other: Index, how: JoinHow, sort: bool

4691 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4692 # Fallback if we do not have any fastpaths available based on

4693 # uniqueness/monotonicity

4694

4695 # Note: at this point we have checked matching dtypes

4696

4697 if how == "left":

4698 join_index = self.sort_values() if sort else self

4699 elif how == "right":

4700 join_index = other.sort_values() if sort else other

4701 elif how == "inner":

4702 join_index = self.intersection(other, sort=sort)

4703 elif how == "outer":

4704 try:

4705 join_index = self.union(other, sort=sort)

4706 except TypeError:

4707 join_index = self.union(other)

4708 try:

4709 join_index = _maybe_try_sort(join_index, sort)

4710 except TypeError:

4711 pass

4712

4713 if join_index is self:

4714 lindexer = None

4715 else:

4716 lindexer = self.get_indexer_for(join_index)

4717 if join_index is other:

4718 rindexer = None

4719 else:

4720 rindexer = other.get_indexer_for(join_index)

4721 return join_index, lindexer, rindexer

4722

4723 @final

4724 def _join_multi(self, other: Index, how: JoinHow):

4725 from pandas.core.indexes.multi import MultiIndex

4726 from pandas.core.reshape.merge import restore_dropped_levels_multijoin

4727

4728 # figure out join names

4729 self_names_list = list(com.not_none(*self.names))

4730 other_names_list = list(com.not_none(*other.names))

4731 self_names_order = self_names_list.index

4732 other_names_order = other_names_list.index

4733 self_names = set(self_names_list)

4734 other_names = set(other_names_list)

4735 overlap = self_names & other_names

4736

4737 # need at least 1 in common

4738 if not overlap:

4739 raise ValueError("cannot join with no overlapping index names")

4740

4741 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):

4742 # Drop the non-matching levels from left and right respectively

4743 ldrop_names = sorted(self_names - overlap, key=self_names_order)

4744 rdrop_names = sorted(other_names - overlap, key=other_names_order)

4745

4746 # if only the order differs

4747 if not len(ldrop_names + rdrop_names):

4748 self_jnlevels = self

4749 other_jnlevels = other.reorder_levels(self.names)

4750 else:

4751 self_jnlevels = self.droplevel(ldrop_names)

4752 other_jnlevels = other.droplevel(rdrop_names)

4753

4754 # Join left and right

4755 # Join on same leveled multi-index frames is supported

4756 join_idx, lidx, ridx = self_jnlevels.join(

4757 other_jnlevels, how=how, return_indexers=True

4758 )

4759

4760 # Restore the dropped levels

4761 # Returned index level order is

4762 # common levels, ldrop_names, rdrop_names

4763 dropped_names = ldrop_names + rdrop_names

4764

4765 # error: Argument 5/6 to "restore_dropped_levels_multijoin" has

4766 # incompatible type "Optional[ndarray[Any, dtype[signedinteger[Any

4767 # ]]]]"; expected "ndarray[Any, dtype[signedinteger[Any]]]"

4768 levels, codes, names = restore_dropped_levels_multijoin(

4769 self,

4770 other,

4771 dropped_names,

4772 join_idx,

4773 lidx, # type: ignore[arg-type]

4774 ridx, # type: ignore[arg-type]

4775 )

4776

4777 # Re-create the multi-index

4778 multi_join_idx = MultiIndex(

4779 levels=levels, codes=codes, names=names, verify_integrity=False

4780 )

4781

4782 multi_join_idx = multi_join_idx.remove_unused_levels()

4783

4784 # maintain the order of the index levels

4785 if how == "right":

4786 level_order = other_names_list + ldrop_names

4787 else:

4788 level_order = self_names_list + rdrop_names

4789 multi_join_idx = multi_join_idx.reorder_levels(level_order)

4790

4791 return multi_join_idx, lidx, ridx

4792

4793 jl = next(iter(overlap))

4794

4795 # Case where only one index is multi

4796 # make the indices into mi's that match

4797 flip_order = False

4798 if isinstance(self, MultiIndex):

4799 self, other = other, self

4800 flip_order = True

4801 # flip if join method is right or left

4802 flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"}

4803 how = flip.get(how, how)

4804

4805 level = other.names.index(jl)

4806 result = self._join_level(other, level, how=how)

4807

4808 if flip_order:

4809 return result[0], result[2], result[1]

4810 return result

4811

4812 @final

4813 def _join_non_unique(

4814 self, other: Index, how: JoinHow = "left", sort: bool = False

4815 ) -> tuple[Index, npt.NDArray[np.intp], npt.NDArray[np.intp]]:

4816 from pandas.core.reshape.merge import get_join_indexers_non_unique

4817

4818 # We only get here if dtypes match

4819 assert self.dtype == other.dtype

4820

4821 left_idx, right_idx = get_join_indexers_non_unique(

4822 self._values, other._values, how=how, sort=sort

4823 )

4824 mask = left_idx == -1

4825

4826 join_idx = self.take(left_idx)

4827 right = other.take(right_idx)

4828 join_index = join_idx.putmask(mask, right)

4829 if isinstance(join_index, ABCMultiIndex) and how == "outer":

4830 # test_join_index_levels

4831 join_index = join_index._sort_levels_monotonic()

4832 return join_index, left_idx, right_idx

4833

4834 @final

4835 def _join_level(

4836 self, other: Index, level, how: JoinHow = "left", keep_order: bool = True

4837 ) -> tuple[MultiIndex, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4838 """

4839 The join method *only* affects the level of the resulting

4840 MultiIndex. Otherwise it just exactly aligns the Index data to the

4841 labels of the level in the MultiIndex.

4842

4843 If ```keep_order == True```, the order of the data indexed by the

4844 MultiIndex will not be changed; otherwise, it will tie out

4845 with `other`.

4846 """

4847 from pandas.core.indexes.multi import MultiIndex

4848

4849 def _get_leaf_sorter(labels: list[np.ndarray]) -> npt.NDArray[np.intp]:

4850 """

4851 Returns sorter for the inner most level while preserving the

4852 order of higher levels.

4853

4854 Parameters

4855 ----------

4856 labels : list[np.ndarray]

4857 Each ndarray has signed integer dtype, not necessarily identical.

4858

4859 Returns

4860 -------

4861 np.ndarray[np.intp]

4862 """

4863 if labels[0].size == 0:

4864 return np.empty(0, dtype=np.intp)

4865

4866 if len(labels) == 1:

4867 return get_group_index_sorter(ensure_platform_int(labels[0]))

4868

4869 # find indexers of beginning of each set of

4870 # same-key labels w.r.t all but last level

4871 tic = labels[0][:-1] != labels[0][1:]

4872 for lab in labels[1:-1]:

4873 tic |= lab[:-1] != lab[1:]

4874

4875 starts = np.hstack(([True], tic, [True])).nonzero()[0]

4876 lab = ensure_int64(labels[-1])

4877 return lib.get_level_sorter(lab, ensure_platform_int(starts))

4878

4879 if isinstance(self, MultiIndex) and isinstance(other, MultiIndex):

4880 raise TypeError("Join on level between two MultiIndex objects is ambiguous")

4881

4882 left, right = self, other

4883

4884 flip_order = not isinstance(self, MultiIndex)

4885 if flip_order:

4886 left, right = right, left

4887 flip: dict[JoinHow, JoinHow] = {"right": "left", "left": "right"}

4888 how = flip.get(how, how)

4889

4890 assert isinstance(left, MultiIndex)

4891

4892 level = left._get_level_number(level)

4893 old_level = left.levels[level]

4894

4895 if not right.is_unique:

4896 raise NotImplementedError(

4897 "Index._join_level on non-unique index is not implemented"

4898 )

4899

4900 new_level, left_lev_indexer, right_lev_indexer = old_level.join(

4901 right, how=how, return_indexers=True

4902 )

4903

4904 if left_lev_indexer is None:

4905 if keep_order or len(left) == 0:

4906 left_indexer = None

4907 join_index = left

4908 else: # sort the leaves

4909 left_indexer = _get_leaf_sorter(left.codes[: level + 1])

4910 join_index = left[left_indexer]

4911

4912 else:

4913 left_lev_indexer = ensure_platform_int(left_lev_indexer)

4914 rev_indexer = lib.get_reverse_indexer(left_lev_indexer, len(old_level))

4915 old_codes = left.codes[level]

4916

4917 taker = old_codes[old_codes != -1]

4918 new_lev_codes = rev_indexer.take(taker)

4919

4920 new_codes = list(left.codes)

4921 new_codes[level] = new_lev_codes

4922

4923 new_levels = list(left.levels)

4924 new_levels[level] = new_level

4925

4926 if keep_order: # just drop missing values. o.w. keep order

4927 left_indexer = np.arange(len(left), dtype=np.intp)

4928 left_indexer = cast(np.ndarray, left_indexer)

4929 mask = new_lev_codes != -1

4930 if not mask.all():

4931 new_codes = [lab[mask] for lab in new_codes]

4932 left_indexer = left_indexer[mask]

4933

4934 else: # tie out the order with other

4935 if level == 0: # outer most level, take the fast route

4936 max_new_lev = 0 if len(new_lev_codes) == 0 else new_lev_codes.max()

4937 ngroups = 1 + max_new_lev

4938 left_indexer, counts = libalgos.groupsort_indexer(

4939 new_lev_codes, ngroups

4940 )

4941

4942 # missing values are placed first; drop them!

4943 left_indexer = left_indexer[counts[0] :]

4944 new_codes = [lab[left_indexer] for lab in new_codes]

4945

4946 else: # sort the leaves

4947 mask = new_lev_codes != -1

4948 mask_all = mask.all()

4949 if not mask_all:

4950 new_codes = [lab[mask] for lab in new_codes]

4951

4952 left_indexer = _get_leaf_sorter(new_codes[: level + 1])

4953 new_codes = [lab[left_indexer] for lab in new_codes]

4954

4955 # left_indexers are w.r.t masked frame.

4956 # reverse to original frame!

4957 if not mask_all:

4958 left_indexer = mask.nonzero()[0][left_indexer]

4959

4960 join_index = MultiIndex(

4961 levels=new_levels,

4962 codes=new_codes,

4963 names=left.names,

4964 verify_integrity=False,

4965 )

4966

4967 if right_lev_indexer is not None:

4968 right_indexer = right_lev_indexer.take(join_index.codes[level])

4969 else:

4970 right_indexer = join_index.codes[level]

4971

4972 if flip_order:

4973 left_indexer, right_indexer = right_indexer, left_indexer

4974

4975 left_indexer = (

4976 None if left_indexer is None else ensure_platform_int(left_indexer)

4977 )

4978 right_indexer = (

4979 None if right_indexer is None else ensure_platform_int(right_indexer)

4980 )

4981 return join_index, left_indexer, right_indexer

4982

4983 @final

4984 def _join_monotonic(

4985 self, other: Index, how: JoinHow = "left"

4986 ) -> tuple[Index, npt.NDArray[np.intp] | None, npt.NDArray[np.intp] | None]:

4987 # We only get here with matching dtypes and both monotonic increasing

4988 assert other.dtype == self.dtype

4989 assert self._can_use_libjoin and other._can_use_libjoin

4990

4991 if self.equals(other):

4992 # This is a convenient place for this check, but its correctness

4993 # does not depend on monotonicity, so it could go earlier

4994 # in the calling method.

4995 ret_index = other if how == "right" else self

4996 return ret_index, None, None

4997

4998 ridx: npt.NDArray[np.intp] | None

4999 lidx: npt.NDArray[np.intp] | None

5000

5001 if self.is_unique and other.is_unique:

5002 # We can perform much better than the general case

5003 if how == "left":

5004 join_index = self

5005 lidx = None

5006 ridx = self._left_indexer_unique(other)

5007 elif how == "right":

5008 join_index = other

5009 lidx = other._left_indexer_unique(self)

5010 ridx = None

5011 elif how == "inner":

5012 join_array, lidx, ridx = self._inner_indexer(other)

5013 join_index = self._wrap_joined_index(join_array, other, lidx, ridx)

5014 elif how == "outer":

5015 join_array, lidx, ridx = self._outer_indexer(other)

5016 join_index = self._wrap_joined_index(join_array, other, lidx, ridx)

5017 else:

5018 if how == "left":

5019 join_array, lidx, ridx = self._left_indexer(other)

5020 elif how == "right":

5021 join_array, ridx, lidx = other._left_indexer(self)

5022 elif how == "inner":

5023 join_array, lidx, ridx = self._inner_indexer(other)

5024 elif how == "outer":

5025 join_array, lidx, ridx = self._outer_indexer(other)

5026

5027 assert lidx is not None

5028 assert ridx is not None

5029

5030 join_index = self._wrap_joined_index(join_array, other, lidx, ridx)

5031

5032 lidx = None if lidx is None else ensure_platform_int(lidx)

5033 ridx = None if ridx is None else ensure_platform_int(ridx)

5034 return join_index, lidx, ridx

5035

5036 def _wrap_joined_index(

5037 self,

5038 joined: ArrayLike,

5039 other: Self,

5040 lidx: npt.NDArray[np.intp],

5041 ridx: npt.NDArray[np.intp],

5042 ) -> Self:

5043 assert other.dtype == self.dtype

5044

5045 if isinstance(self, ABCMultiIndex):

5046 name = self.names if self.names == other.names else None

5047 # error: Incompatible return value type (got "MultiIndex",

5048 # expected "Self")

5049 mask = lidx == -1

5050 join_idx = self.take(lidx)

5051 right = cast("MultiIndex", other.take(ridx))

5052 join_index = join_idx.putmask(mask, right)._sort_levels_monotonic()

5053 return join_index.set_names(name) # type: ignore[return-value]

5054 else:

5055 name = get_op_result_name(self, other)

5056 return self._constructor._with_infer(joined, name=name, dtype=self.dtype)

5057

5058 @final

5059 @cache_readonly

5060 def _can_use_libjoin(self) -> bool:

5061 """

5062 Whether we can use the fastpaths implemented in _libs.join.

5063

5064 This is driven by whether (in monotonic increasing cases that are

5065 guaranteed not to have NAs) we can convert to a np.ndarray without

5066 making a copy. If we cannot, this negates the performance benefit

5067 of using libjoin.

5068 """

5069 if type(self) is Index:

5070 # excludes EAs, but include masks, we get here with monotonic

5071 # values only, meaning no NA

5072 return (

5073 isinstance(self.dtype, np.dtype)

5074 or isinstance(self._values, (ArrowExtensionArray, BaseMaskedArray))

5075 or self.dtype == "string[python]"

5076 )

5077 # Exclude index types where the conversion to numpy converts to object dtype,

5078 # which negates the performance benefit of libjoin

5079 # Subclasses should override to return False if _get_join_target is

5080 # not zero-copy.

5081 # TODO: exclude RangeIndex (which allocates memory)?

5082 # Doing so seems to break test_concat_datetime_timezone

5083 return not isinstance(self, (ABCIntervalIndex, ABCMultiIndex))

5084

5085 # --------------------------------------------------------------------

5086 # Uncategorized Methods

5087

5088 @property

5089 def values(self) -> ArrayLike:

5090 """

5091 Return an array representing the data in the Index.

5092

5093 .. warning::

5094

5095 We recommend using :attr:`Index.array` or

5096 :meth:`Index.to_numpy`, depending on whether you need

5097 a reference to the underlying data or a NumPy array.

5098

5099 Returns

5100 -------

5101 array: numpy.ndarray or ExtensionArray

5102

5103 See Also

5104 --------

5105 Index.array : Reference to the underlying data.

5106 Index.to_numpy : A NumPy array representing the underlying data.

5107

5108 Examples

5109 --------

5110 For :class:`pandas.Index`:

5111

5112 >>> idx = pd.Index([1, 2, 3])

5113 >>> idx

5114 Index([1, 2, 3], dtype='int64')

5115 >>> idx.values

5116 array([1, 2, 3])

5117

5118 For :class:`pandas.IntervalIndex`:

5119

5120 >>> idx = pd.interval_range(start=0, end=5)

5121 >>> idx.values

5122 <IntervalArray>

5123 [(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]]

5124 Length: 5, dtype: interval[int64, right]

5125 """

5126 if using_copy_on_write():

5127 data = self._data

5128 if isinstance(data, np.ndarray):

5129 data = data.view()

5130 data.flags.writeable = False

5131 return data

5132 return self._data

5133

5134 @cache_readonly

5135 @doc(IndexOpsMixin.array)

5136 def array(self) -> ExtensionArray:

5137 array = self._data

5138 if isinstance(array, np.ndarray):

5139 from pandas.core.arrays.numpy_ import NumpyExtensionArray

5140

5141 array = NumpyExtensionArray(array)

5142 return array

5143

5144 @property

5145 def _values(self) -> ExtensionArray | np.ndarray:

5146 """

5147 The best array representation.

5148

5149 This is an ndarray or ExtensionArray.

5150

5151 ``_values`` are consistent between ``Series`` and ``Index``.

5152

5153 It may differ from the public '.values' method.

5154

5155 index | values | _values |

5156 ----------------- | --------------- | ------------- |

5157 Index | ndarray | ndarray |

5158 CategoricalIndex | Categorical | Categorical |

5159 DatetimeIndex | ndarray[M8ns] | DatetimeArray |

5160 DatetimeIndex[tz] | ndarray[M8ns] | DatetimeArray |

5161 PeriodIndex | ndarray[object] | PeriodArray |

5162 IntervalIndex | IntervalArray | IntervalArray |

5163

5164 See Also

5165 --------

5166 values : Values

5167 """

5168 return self._data

5169

5170 def _get_engine_target(self) -> ArrayLike:

5171 """

5172 Get the ndarray or ExtensionArray that we can pass to the IndexEngine

5173 constructor.

5174 """

5175 vals = self._values

5176 if isinstance(vals, StringArray):

5177 # GH#45652 much more performant than ExtensionEngine

5178 return vals._ndarray

5179 if isinstance(vals, ArrowExtensionArray) and self.dtype.kind in "Mm":

5180 import pyarrow as pa

5181

5182 pa_type = vals._pa_array.type

5183 if pa.types.is_timestamp(pa_type):

5184 vals = vals._to_datetimearray()

5185 return vals._ndarray.view("i8")

5186 elif pa.types.is_duration(pa_type):

5187 vals = vals._to_timedeltaarray()

5188 return vals._ndarray.view("i8")

5189 if (

5190 type(self) is Index

5191 and isinstance(self._values, ExtensionArray)

5192 and not isinstance(self._values, BaseMaskedArray)

5193 and not (

5194 isinstance(self._values, ArrowExtensionArray)

5195 and is_numeric_dtype(self.dtype)

5196 # Exclude decimal

5197 and self.dtype.kind != "O"

5198 )

5199 ):

5200 # TODO(ExtensionIndex): remove special-case, just use self._values

5201 return self._values.astype(object)

5202 return vals

5203

5204 @final

5205 def _get_join_target(self) -> np.ndarray:

5206 """

5207 Get the ndarray or ExtensionArray that we can pass to the join

5208 functions.

5209 """

5210 if isinstance(self._values, BaseMaskedArray):

5211 # This is only used if our array is monotonic, so no NAs present

5212 return self._values._data

5213 elif isinstance(self._values, ArrowExtensionArray):

5214 # This is only used if our array is monotonic, so no missing values

5215 # present

5216 return self._values.to_numpy()

5217

5218 # TODO: exclude ABCRangeIndex case here as it copies

5219 target = self._get_engine_target()

5220 if not isinstance(target, np.ndarray):

5221 raise ValueError("_can_use_libjoin should return False.")

5222 return target

5223

5224 def _from_join_target(self, result: np.ndarray) -> ArrayLike:

5225 """

5226 Cast the ndarray returned from one of the libjoin.foo_indexer functions

5227 back to type(self._data).

5228 """

5229 if isinstance(self.values, BaseMaskedArray):

5230 return type(self.values)(result, np.zeros(result.shape, dtype=np.bool_))

5231 elif isinstance(self.values, (ArrowExtensionArray, StringArray)):

5232 return type(self.values)._from_sequence(result, dtype=self.dtype)

5233 return result

5234

5235 @doc(IndexOpsMixin._memory_usage)

5236 def memory_usage(self, deep: bool = False) -> int:

5237 result = self._memory_usage(deep=deep)

5238

5239 # include our engine hashtable

5240 result += self._engine.sizeof(deep=deep)

5241 return result

5242

5243 @final

5244 def where(self, cond, other=None) -> Index:

5245 """

5246 Replace values where the condition is False.

5247

5248 The replacement is taken from other.

5249

5250 Parameters

5251 ----------

5252 cond : bool array-like with the same length as self

5253 Condition to select the values on.

5254 other : scalar, or array-like, default None

5255 Replacement if the condition is False.

5256

5257 Returns

5258 -------

5259 pandas.Index

5260 A copy of self with values replaced from other

5261 where the condition is False.

5262

5263 See Also

5264 --------

5265 Series.where : Same method for Series.

5266 DataFrame.where : Same method for DataFrame.

5267

5268 Examples

5269 --------

5270 >>> idx = pd.Index(['car', 'bike', 'train', 'tractor'])

5271 >>> idx

5272 Index(['car', 'bike', 'train', 'tractor'], dtype='object')

5273 >>> idx.where(idx.isin(['car', 'train']), 'other')

5274 Index(['car', 'other', 'train', 'other'], dtype='object')

5275 """

5276 if isinstance(self, ABCMultiIndex):

5277 raise NotImplementedError(

5278 ".where is not supported for MultiIndex operations"

5279 )

5280 cond = np.asarray(cond, dtype=bool)

5281 return self.putmask(~cond, other)

5282

5283 # construction helpers

5284 @final

5285 @classmethod

5286 def _raise_scalar_data_error(cls, data):

5287 # We return the TypeError so that we can raise it from the constructor

5288 # in order to keep mypy happy

5289 raise TypeError(

5290 f"{cls.__name__}(...) must be called with a collection of some "

5291 f"kind, {repr(data) if not isinstance(data, np.generic) else str(data)} "

5292 "was passed"

5293 )

5294

5295 def _validate_fill_value(self, value):

5296 """

5297 Check if the value can be inserted into our array without casting,

5298 and convert it to an appropriate native type if necessary.

5299

5300 Raises

5301 ------

5302 TypeError

5303 If the value cannot be inserted into an array of this dtype.

5304 """

5305 dtype = self.dtype

5306 if isinstance(dtype, np.dtype) and dtype.kind not in "mM":

5307 # return np_can_hold_element(dtype, value)

5308 try:

5309 return np_can_hold_element(dtype, value)

5310 except LossySetitemError as err:

5311 # re-raise as TypeError for consistency

5312 raise TypeError from err

5313 elif not can_hold_element(self._values, value):

5314 raise TypeError

5315 return value

5316

5317 def _is_memory_usage_qualified(self) -> bool:

5318 """

5319 Return a boolean if we need a qualified .info display.

5320 """

5321 return is_object_dtype(self.dtype)

5322

5323 def __contains__(self, key: Any) -> bool:

5324 """

5325 Return a boolean indicating whether the provided key is in the index.

5326

5327 Parameters

5328 ----------

5329 key : label

5330 The key to check if it is present in the index.

5331

5332 Returns

5333 -------

5334 bool

5335 Whether the key search is in the index.

5336

5337 Raises

5338 ------

5339 TypeError

5340 If the key is not hashable.

5341

5342 See Also

5343 --------

5344 Index.isin : Returns an ndarray of boolean dtype indicating whether the

5345 list-like key is in the index.

5346

5347 Examples

5348 --------

5349 >>> idx = pd.Index([1, 2, 3, 4])

5350 >>> idx

5351 Index([1, 2, 3, 4], dtype='int64')

5352

5353 >>> 2 in idx

5354 True

5355 >>> 6 in idx

5356 False

5357 """

5358 hash(key)

5359 try:

5360 return key in self._engine

5361 except (OverflowError, TypeError, ValueError):

5362 return False

5363

5364 # https://github.com/python/typeshed/issues/2148#issuecomment-520783318

5365 # Incompatible types in assignment (expression has type "None", base class

5366 # "object" defined the type as "Callable[[object], int]")

5367 __hash__: ClassVar[None] # type: ignore[assignment]

5368

5369 @final

5370 def __setitem__(self, key, value) -> None:

5371 raise TypeError("Index does not support mutable operations")

5372

5373 def __getitem__(self, key):

5374 """

5375 Override numpy.ndarray's __getitem__ method to work as desired.

5376

5377 This function adds lists and Series as valid boolean indexers

5378 (ndarrays only supports ndarray with dtype=bool).

5379

5380 If resulting ndim != 1, plain ndarray is returned instead of

5381 corresponding `Index` subclass.

5382

5383 """

5384 getitem = self._data.__getitem__

5385

5386 if is_integer(key) or is_float(key):

5387 # GH#44051 exclude bool, which would return a 2d ndarray

5388 key = com.cast_scalar_indexer(key)

5389 return getitem(key)

5390

5391 if isinstance(key, slice):

5392 # This case is separated from the conditional above to avoid

5393 # pessimization com.is_bool_indexer and ndim checks.

5394 return self._getitem_slice(key)

5395

5396 if com.is_bool_indexer(key):

5397 # if we have list[bools, length=1e5] then doing this check+convert

5398 # takes 166 µs + 2.1 ms and cuts the ndarray.__getitem__

5399 # time below from 3.8 ms to 496 µs

5400 # if we already have ndarray[bool], the overhead is 1.4 µs or .25%

5401 if isinstance(getattr(key, "dtype", None), ExtensionDtype):

5402 key = key.to_numpy(dtype=bool, na_value=False)

5403 else:

5404 key = np.asarray(key, dtype=bool)

5405

5406 if not isinstance(self.dtype, ExtensionDtype):

5407 if len(key) == 0 and len(key) != len(self):

5408 warnings.warn(

5409 "Using a boolean indexer with length 0 on an Index with "

5410 "length greater than 0 is deprecated and will raise in a "

5411 "future version.",

5412 FutureWarning,

5413 stacklevel=find_stack_level(),

5414 )

5415

5416 result = getitem(key)

5417 # Because we ruled out integer above, we always get an arraylike here

5418 if result.ndim > 1:

5419 disallow_ndim_indexing(result)

5420

5421 # NB: Using _constructor._simple_new would break if MultiIndex

5422 # didn't override __getitem__

5423 return self._constructor._simple_new(result, name=self._name)

5424

5425 def _getitem_slice(self, slobj: slice) -> Self:

5426 """

5427 Fastpath for __getitem__ when we know we have a slice.

5428 """

5429 res = self._data[slobj]

5430 result = type(self)._simple_new(res, name=self._name, refs=self._references)

5431 if "_engine" in self._cache:

5432 reverse = slobj.step is not None and slobj.step < 0

5433 result._engine._update_from_sliced(self._engine, reverse=reverse) # type: ignore[union-attr]

5434

5435 return result

5436

5437 @final

5438 def _can_hold_identifiers_and_holds_name(self, name) -> bool:

5439 """

5440 Faster check for ``name in self`` when we know `name` is a Python

5441 identifier (e.g. in NDFrame.__getattr__, which hits this to support

5442 . key lookup). For indexes that can't hold identifiers (everything

5443 but object & categorical) we just return False.

5444

5445 https://github.com/pandas-dev/pandas/issues/19764

5446 """

5447 if (

5448 is_object_dtype(self.dtype)

5449 or is_string_dtype(self.dtype)

5450 or isinstance(self.dtype, CategoricalDtype)

5451 ):

5452 return name in self

5453 return False

5454

5455 def append(self, other: Index | Sequence[Index]) -> Index:

5456 """

5457 Append a collection of Index options together.

5458

5459 Parameters

5460 ----------

5461 other : Index or list/tuple of indices

5462

5463 Returns

5464 -------

5465 Index

5466

5467 Examples

5468 --------

5469 >>> idx = pd.Index([1, 2, 3])

5470 >>> idx.append(pd.Index([4]))

5471 Index([1, 2, 3, 4], dtype='int64')

5472 """

5473 to_concat = [self]

5474

5475 if isinstance(other, (list, tuple)):

5476 to_concat += list(other)

5477 else:

5478 # error: Argument 1 to "append" of "list" has incompatible type

5479 # "Union[Index, Sequence[Index]]"; expected "Index"

5480 to_concat.append(other) # type: ignore[arg-type]

5481

5482 for obj in to_concat:

5483 if not isinstance(obj, Index):

5484 raise TypeError("all inputs must be Index")

5485

5486 names = {obj.name for obj in to_concat}

5487 name = None if len(names) > 1 else self.name

5488

5489 return self._concat(to_concat, name)

5490

5491 def _concat(self, to_concat: list[Index], name: Hashable) -> Index:

5492 """

5493 Concatenate multiple Index objects.

5494 """

5495 to_concat_vals = [x._values for x in to_concat]

5496

5497 result = concat_compat(to_concat_vals)

5498

5499 return Index._with_infer(result, name=name)

5500

5501 def putmask(self, mask, value) -> Index:

5502 """

5503 Return a new Index of the values set with the mask.

5504

5505 Returns

5506 -------

5507 Index

5508

5509 See Also

5510 --------

5511 numpy.ndarray.putmask : Changes elements of an array

5512 based on conditional and input values.

5513

5514 Examples

5515 --------

5516 >>> idx1 = pd.Index([1, 2, 3])

5517 >>> idx2 = pd.Index([5, 6, 7])

5518 >>> idx1.putmask([True, False, False], idx2)

5519 Index([5, 2, 3], dtype='int64')

5520 """

5521 mask, noop = validate_putmask(self._values, mask)

5522 if noop:

5523 return self.copy()

5524

5525 if self.dtype != object and is_valid_na_for_dtype(value, self.dtype):

5526 # e.g. None -> np.nan, see also Block._standardize_fill_value

5527 value = self._na_value

5528

5529 try:

5530 converted = self._validate_fill_value(value)

5531 except (LossySetitemError, ValueError, TypeError) as err:

5532 if is_object_dtype(self.dtype): # pragma: no cover

5533 raise err

5534

5535 # See also: Block.coerce_to_target_dtype

5536 dtype = self._find_common_type_compat(value)

5537 return self.astype(dtype).putmask(mask, value)

5538

5539 values = self._values.copy()

5540

5541 if isinstance(values, np.ndarray):

5542 converted = setitem_datetimelike_compat(values, mask.sum(), converted)

5543 np.putmask(values, mask, converted)

5544

5545 else:

5546 # Note: we use the original value here, not converted, as

5547 # _validate_fill_value is not idempotent

5548 values._putmask(mask, value)

5549

5550 return self._shallow_copy(values)

5551

5552 def equals(self, other: Any) -> bool:

5553 """

5554 Determine if two Index object are equal.

5555

5556 The things that are being compared are:

5557

5558 * The elements inside the Index object.

5559 * The order of the elements inside the Index object.

5560

5561 Parameters

5562 ----------

5563 other : Any

5564 The other object to compare against.

5565

5566 Returns

5567 -------

5568 bool

5569 True if "other" is an Index and it has the same elements and order

5570 as the calling index; False otherwise.

5571

5572 Examples

5573 --------

5574 >>> idx1 = pd.Index([1, 2, 3])

5575 >>> idx1

5576 Index([1, 2, 3], dtype='int64')

5577 >>> idx1.equals(pd.Index([1, 2, 3]))

5578 True

5579

5580 The elements inside are compared

5581

5582 >>> idx2 = pd.Index(["1", "2", "3"])

5583 >>> idx2

5584 Index(['1', '2', '3'], dtype='object')

5585

5586 >>> idx1.equals(idx2)

5587 False

5588

5589 The order is compared

5590

5591 >>> ascending_idx = pd.Index([1, 2, 3])

5592 >>> ascending_idx

5593 Index([1, 2, 3], dtype='int64')

5594 >>> descending_idx = pd.Index([3, 2, 1])

5595 >>> descending_idx

5596 Index([3, 2, 1], dtype='int64')

5597 >>> ascending_idx.equals(descending_idx)

5598 False

5599

5600 The dtype is *not* compared

5601

5602 >>> int64_idx = pd.Index([1, 2, 3], dtype='int64')

5603 >>> int64_idx

5604 Index([1, 2, 3], dtype='int64')

5605 >>> uint64_idx = pd.Index([1, 2, 3], dtype='uint64')

5606 >>> uint64_idx

5607 Index([1, 2, 3], dtype='uint64')

5608 >>> int64_idx.equals(uint64_idx)

5609 True

5610 """

5611 if self.is_(other):

5612 return True

5613

5614 if not isinstance(other, Index):

5615 return False

5616

5617 if len(self) != len(other):

5618 # quickly return if the lengths are different

5619 return False

5620

5621 if (

5622 isinstance(self.dtype, StringDtype)

5623 and self.dtype.storage == "pyarrow_numpy"

5624 and other.dtype != self.dtype

5625 ):

5626 # special case for object behavior

5627 return other.equals(self.astype(object))

5628

5629 if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype):

5630 # if other is not object, use other's logic for coercion

5631 return other.equals(self)

5632

5633 if isinstance(other, ABCMultiIndex):

5634 # d-level MultiIndex can equal d-tuple Index

5635 return other.equals(self)

5636

5637 if isinstance(self._values, ExtensionArray):

5638 # Dispatch to the ExtensionArray's .equals method.

5639 if not isinstance(other, type(self)):

5640 return False

5641

5642 earr = cast(ExtensionArray, self._data)

5643 return earr.equals(other._data)

5644

5645 if isinstance(other.dtype, ExtensionDtype):

5646 # All EA-backed Index subclasses override equals

5647 return other.equals(self)

5648

5649 return array_equivalent(self._values, other._values)

5650

5651 @final

5652 def identical(self, other) -> bool:

5653 """

5654 Similar to equals, but checks that object attributes and types are also equal.

5655

5656 Returns

5657 -------

5658 bool

5659 If two Index objects have equal elements and same type True,

5660 otherwise False.

5661

5662 Examples

5663 --------

5664 >>> idx1 = pd.Index(['1', '2', '3'])

5665 >>> idx2 = pd.Index(['1', '2', '3'])

5666 >>> idx2.identical(idx1)

5667 True

5668

5669 >>> idx1 = pd.Index(['1', '2', '3'], name="A")

5670 >>> idx2 = pd.Index(['1', '2', '3'], name="B")

5671 >>> idx2.identical(idx1)

5672 False

5673 """

5674 return (

5675 self.equals(other)

5676 and all(

5677 getattr(self, c, None) == getattr(other, c, None)

5678 for c in self._comparables

5679 )

5680 and type(self) == type(other)

5681 and self.dtype == other.dtype

5682 )

5683

5684 @final

5685 def asof(self, label):

5686 """

5687 Return the label from the index, or, if not present, the previous one.

5688

5689 Assuming that the index is sorted, return the passed index label if it

5690 is in the index, or return the previous index label if the passed one

5691 is not in the index.

5692

5693 Parameters

5694 ----------

5695 label : object

5696 The label up to which the method returns the latest index label.

5697

5698 Returns

5699 -------

5700 object

5701 The passed label if it is in the index. The previous label if the

5702 passed label is not in the sorted index or `NaN` if there is no

5703 such label.

5704

5705 See Also

5706 --------

5707 Series.asof : Return the latest value in a Series up to the

5708 passed index.

5709 merge_asof : Perform an asof merge (similar to left join but it

5710 matches on nearest key rather than equal key).

5711 Index.get_loc : An `asof` is a thin wrapper around `get_loc`

5712 with method='pad'.

5713

5714 Examples

5715 --------

5716 `Index.asof` returns the latest index label up to the passed label.

5717

5718 >>> idx = pd.Index(['2013-12-31', '2014-01-02', '2014-01-03'])

5719 >>> idx.asof('2014-01-01')

5720 '2013-12-31'

5721

5722 If the label is in the index, the method returns the passed label.

5723

5724 >>> idx.asof('2014-01-02')

5725 '2014-01-02'

5726

5727 If all of the labels in the index are later than the passed label,

5728 NaN is returned.

5729

5730 >>> idx.asof('1999-01-02')

5731 nan

5732

5733 If the index is not sorted, an error is raised.

5734

5735 >>> idx_not_sorted = pd.Index(['2013-12-31', '2015-01-02',

5736 ... '2014-01-03'])

5737 >>> idx_not_sorted.asof('2013-12-31')

5738 Traceback (most recent call last):

5739 ValueError: index must be monotonic increasing or decreasing

5740 """

5741 self._searchsorted_monotonic(label) # validate sortedness

5742 try:

5743 loc = self.get_loc(label)

5744 except (KeyError, TypeError):

5745 # KeyError -> No exact match, try for padded

5746 # TypeError -> passed e.g. non-hashable, fall through to get

5747 # the tested exception message

5748 indexer = self.get_indexer([label], method="pad")

5749 if indexer.ndim > 1 or indexer.size > 1:

5750 raise TypeError("asof requires scalar valued input")

5751 loc = indexer.item()

5752 if loc == -1:

5753 return self._na_value

5754 else:

5755 if isinstance(loc, slice):

5756 loc = loc.indices(len(self))[-1]

5757

5758 return self[loc]

5759

5760 def asof_locs(

5761 self, where: Index, mask: npt.NDArray[np.bool_]

5762 ) -> npt.NDArray[np.intp]:

5763 """

5764 Return the locations (indices) of labels in the index.

5765

5766 As in the :meth:`pandas.Index.asof`, if the label (a particular entry in

5767 ``where``) is not in the index, the latest index label up to the

5768 passed label is chosen and its index returned.

5769

5770 If all of the labels in the index are later than a label in ``where``,

5771 -1 is returned.

5772

5773 ``mask`` is used to ignore ``NA`` values in the index during calculation.

5774

5775 Parameters

5776 ----------

5777 where : Index

5778 An Index consisting of an array of timestamps.

5779 mask : np.ndarray[bool]

5780 Array of booleans denoting where values in the original

5781 data are not ``NA``.

5782

5783 Returns

5784 -------

5785 np.ndarray[np.intp]

5786 An array of locations (indices) of the labels from the index

5787 which correspond to the return values of :meth:`pandas.Index.asof`

5788 for every element in ``where``.

5789

5790 See Also

5791 --------

5792 Index.asof : Return the label from the index, or, if not present, the

5793 previous one.

5794

5795 Examples

5796 --------

5797 >>> idx = pd.date_range('2023-06-01', periods=3, freq='D')

5798 >>> where = pd.DatetimeIndex(['2023-05-30 00:12:00', '2023-06-01 00:00:00',

5799 ... '2023-06-02 23:59:59'])

5800 >>> mask = np.ones(3, dtype=bool)

5801 >>> idx.asof_locs(where, mask)

5802 array([-1, 0, 1])

5803

5804 We can use ``mask`` to ignore certain values in the index during calculation.

5805

5806 >>> mask[1] = False

5807 >>> idx.asof_locs(where, mask)

5808 array([-1, 0, 0])

5809 """

5810 # error: No overload variant of "searchsorted" of "ndarray" matches argument

5811 # types "Union[ExtensionArray, ndarray[Any, Any]]", "str"

5812 # TODO: will be fixed when ExtensionArray.searchsorted() is fixed

5813 locs = self._values[mask].searchsorted(

5814 where._values, side="right" # type: ignore[call-overload]

5815 )

5816 locs = np.where(locs > 0, locs - 1, 0)

5817

5818 result = np.arange(len(self), dtype=np.intp)[mask].take(locs)

5819

5820 first_value = self._values[mask.argmax()]

5821 result[(locs == 0) & (where._values < first_value)] = -1

5822

5823 return result

5824

5825 @overload

5826 def sort_values(

5827 self,

5828 *,

5829 return_indexer: Literal[False] = ...,

5830 ascending: bool = ...,

5831 na_position: NaPosition = ...,

5832 key: Callable | None = ...,

5833 ) -> Self:

5834 ...

5835

5836 @overload

5837 def sort_values(

5838 self,

5839 *,

5840 return_indexer: Literal[True],

5841 ascending: bool = ...,

5842 na_position: NaPosition = ...,

5843 key: Callable | None = ...,

5844 ) -> tuple[Self, np.ndarray]:

5845 ...

5846

5847 @overload

5848 def sort_values(

5849 self,

5850 *,

5851 return_indexer: bool = ...,

5852 ascending: bool = ...,

5853 na_position: NaPosition = ...,

5854 key: Callable | None = ...,

5855 ) -> Self | tuple[Self, np.ndarray]:

5856 ...

5857

5858 @deprecate_nonkeyword_arguments(

5859 version="3.0", allowed_args=["self"], name="sort_values"

5860 )

5861 def sort_values(

5862 self,

5863 return_indexer: bool = False,

5864 ascending: bool = True,

5865 na_position: NaPosition = "last",

5866 key: Callable | None = None,

5867 ) -> Self | tuple[Self, np.ndarray]:

5868 """

5869 Return a sorted copy of the index.

5870

5871 Return a sorted copy of the index, and optionally return the indices

5872 that sorted the index itself.

5873

5874 Parameters

5875 ----------

5876 return_indexer : bool, default False

5877 Should the indices that would sort the index be returned.

5878 ascending : bool, default True

5879 Should the index values be sorted in an ascending order.

5880 na_position : {'first' or 'last'}, default 'last'

5881 Argument 'first' puts NaNs at the beginning, 'last' puts NaNs at

5882 the end.

5883 key : callable, optional

5884 If not None, apply the key function to the index values

5885 before sorting. This is similar to the `key` argument in the

5886 builtin :meth:`sorted` function, with the notable difference that

5887 this `key` function should be *vectorized*. It should expect an

5888 ``Index`` and return an ``Index`` of the same shape.

5889

5890 Returns

5891 -------

5892 sorted_index : pandas.Index

5893 Sorted copy of the index.

5894 indexer : numpy.ndarray, optional

5895 The indices that the index itself was sorted by.

5896

5897 See Also

5898 --------

5899 Series.sort_values : Sort values of a Series.

5900 DataFrame.sort_values : Sort values in a DataFrame.

5901

5902 Examples

5903 --------

5904 >>> idx = pd.Index([10, 100, 1, 1000])

5905 >>> idx

5906 Index([10, 100, 1, 1000], dtype='int64')

5907

5908 Sort values in ascending order (default behavior).

5909

5910 >>> idx.sort_values()

5911 Index([1, 10, 100, 1000], dtype='int64')

5912

5913 Sort values in descending order, and also get the indices `idx` was

5914 sorted by.

5915

5916 >>> idx.sort_values(ascending=False, return_indexer=True)

5917 (Index([1000, 100, 10, 1], dtype='int64'), array([3, 1, 0, 2]))

5918 """

5919 if key is None and (

5920 (ascending and self.is_monotonic_increasing)

5921 or (not ascending and self.is_monotonic_decreasing)

5922 ):

5923 if return_indexer:

5924 indexer = np.arange(len(self), dtype=np.intp)

5925 return self.copy(), indexer

5926 else:

5927 return self.copy()

5928

5929 # GH 35584. Sort missing values according to na_position kwarg

5930 # ignore na_position for MultiIndex

5931 if not isinstance(self, ABCMultiIndex):

5932 _as = nargsort(

5933 items=self, ascending=ascending, na_position=na_position, key=key

5934 )

5935 else:

5936 idx = cast(Index, ensure_key_mapped(self, key))

5937 _as = idx.argsort(na_position=na_position)

5938 if not ascending:

5939 _as = _as[::-1]

5940

5941 sorted_index = self.take(_as)

5942

5943 if return_indexer:

5944 return sorted_index, _as

5945 else:

5946 return sorted_index

5947

5948 @final

5949 def sort(self, *args, **kwargs):

5950 """

5951 Use sort_values instead.

5952 """

5953 raise TypeError("cannot sort an Index object in-place, use sort_values instead")

5954

5955 def shift(self, periods: int = 1, freq=None):

5956 """

5957 Shift index by desired number of time frequency increments.

5958

5959 This method is for shifting the values of datetime-like indexes

5960 by a specified time increment a given number of times.

5961

5962 Parameters

5963 ----------

5964 periods : int, default 1

5965 Number of periods (or increments) to shift by,

5966 can be positive or negative.

5967 freq : pandas.DateOffset, pandas.Timedelta or str, optional

5968 Frequency increment to shift by.

5969 If None, the index is shifted by its own `freq` attribute.

5970 Offset aliases are valid strings, e.g., 'D', 'W', 'M' etc.

5971

5972 Returns

5973 -------

5974 pandas.Index

5975 Shifted index.

5976

5977 See Also

5978 --------

5979 Series.shift : Shift values of Series.

5980

5981 Notes

5982 -----

5983 This method is only implemented for datetime-like index classes,

5984 i.e., DatetimeIndex, PeriodIndex and TimedeltaIndex.

5985

5986 Examples

5987 --------

5988 Put the first 5 month starts of 2011 into an index.

5989

5990 >>> month_starts = pd.date_range('1/1/2011', periods=5, freq='MS')

5991 >>> month_starts

5992 DatetimeIndex(['2011-01-01', '2011-02-01', '2011-03-01', '2011-04-01',

5993 '2011-05-01'],

5994 dtype='datetime64[ns]', freq='MS')

5995

5996 Shift the index by 10 days.

5997

5998 >>> month_starts.shift(10, freq='D')

5999 DatetimeIndex(['2011-01-11', '2011-02-11', '2011-03-11', '2011-04-11',

6000 '2011-05-11'],

6001 dtype='datetime64[ns]', freq=None)

6002

6003 The default value of `freq` is the `freq` attribute of the index,

6004 which is 'MS' (month start) in this example.

6005

6006 >>> month_starts.shift(10)

6007 DatetimeIndex(['2011-11-01', '2011-12-01', '2012-01-01', '2012-02-01',

6008 '2012-03-01'],

6009 dtype='datetime64[ns]', freq='MS')

6010 """

6011 raise NotImplementedError(

6012 f"This method is only implemented for DatetimeIndex, PeriodIndex and "

6013 f"TimedeltaIndex; Got type {type(self).__name__}"

6014 )

6015

6016 def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:

6017 """

6018 Return the integer indices that would sort the index.

6019

6020 Parameters

6021 ----------

6022 *args

6023 Passed to `numpy.ndarray.argsort`.

6024 **kwargs

6025 Passed to `numpy.ndarray.argsort`.

6026

6027 Returns

6028 -------

6029 np.ndarray[np.intp]

6030 Integer indices that would sort the index if used as

6031 an indexer.

6032

6033 See Also

6034 --------

6035 numpy.argsort : Similar method for NumPy arrays.

6036 Index.sort_values : Return sorted copy of Index.

6037

6038 Examples

6039 --------

6040 >>> idx = pd.Index(['b', 'a', 'd', 'c'])

6041 >>> idx

6042 Index(['b', 'a', 'd', 'c'], dtype='object')

6043

6044 >>> order = idx.argsort()

6045 >>> order

6046 array([1, 0, 3, 2])

6047

6048 >>> idx[order]

6049 Index(['a', 'b', 'c', 'd'], dtype='object')

6050 """

6051 # This works for either ndarray or EA, is overridden

6052 # by RangeIndex, MultIIndex

6053 return self._data.argsort(*args, **kwargs)

6054

6055 def _check_indexing_error(self, key):

6056 if not is_scalar(key):

6057 # if key is not a scalar, directly raise an error (the code below

6058 # would convert to numpy arrays and raise later any way) - GH29926

6059 raise InvalidIndexError(key)

6060

6061 @cache_readonly

6062 def _should_fallback_to_positional(self) -> bool:

6063 """

6064 Should an integer key be treated as positional?

6065 """

6066 return self.inferred_type not in {

6067 "integer",

6068 "mixed-integer",

6069 "floating",

6070 "complex",

6071 }

6072

6073 _index_shared_docs[

6074 "get_indexer_non_unique"

6075 ] = """

6076 Compute indexer and mask for new index given the current index.

6077

6078 The indexer should be then used as an input to ndarray.take to align the

6079 current data to the new index.

6080

6081 Parameters

6082 ----------

6083 target : %(target_klass)s

6084

6085 Returns

6086 -------

6087 indexer : np.ndarray[np.intp]

6088 Integers from 0 to n - 1 indicating that the index at these

6089 positions matches the corresponding target values. Missing values

6090 in the target are marked by -1.

6091 missing : np.ndarray[np.intp]

6092 An indexer into the target of the values not found.

6093 These correspond to the -1 in the indexer array.

6094

6095 Examples

6096 --------

6097 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])

6098 >>> index.get_indexer_non_unique(['b', 'b'])

6099 (array([1, 3, 4, 1, 3, 4]), array([], dtype=int64))

6100

6101 In the example below there are no matched values.

6102

6103 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])

6104 >>> index.get_indexer_non_unique(['q', 'r', 't'])

6105 (array([-1, -1, -1]), array([0, 1, 2]))

6106

6107 For this reason, the returned ``indexer`` contains only integers equal to -1.

6108 It demonstrates that there's no match between the index and the ``target``

6109 values at these positions. The mask [0, 1, 2] in the return value shows that

6110 the first, second, and third elements are missing.

6111

6112 Notice that the return value is a tuple contains two items. In the example

6113 below the first item is an array of locations in ``index``. The second

6114 item is a mask shows that the first and third elements are missing.

6115

6116 >>> index = pd.Index(['c', 'b', 'a', 'b', 'b'])

6117 >>> index.get_indexer_non_unique(['f', 'b', 's'])

6118 (array([-1, 1, 3, 4, -1]), array([0, 2]))

6119 """

6120

6121 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)

6122 def get_indexer_non_unique(

6123 self, target

6124 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

6125 target = ensure_index(target)

6126 target = self._maybe_cast_listlike_indexer(target)

6127

6128 if not self._should_compare(target) and not self._should_partial_index(target):

6129 # _should_partial_index e.g. IntervalIndex with numeric scalars

6130 # that can be matched to Interval scalars.

6131 return self._get_indexer_non_comparable(target, method=None, unique=False)

6132

6133 pself, ptarget = self._maybe_downcast_for_indexing(target)

6134 if pself is not self or ptarget is not target:

6135 return pself.get_indexer_non_unique(ptarget)

6136

6137 if self.dtype != target.dtype:

6138 # TODO: if object, could use infer_dtype to preempt costly

6139 # conversion if still non-comparable?

6140 dtype = self._find_common_type_compat(target)

6141

6142 this = self.astype(dtype, copy=False)

6143 that = target.astype(dtype, copy=False)

6144 return this.get_indexer_non_unique(that)

6145

6146 # TODO: get_indexer has fastpaths for both Categorical-self and

6147 # Categorical-target. Can we do something similar here?

6148

6149 # Note: _maybe_downcast_for_indexing ensures we never get here

6150 # with MultiIndex self and non-Multi target

6151 if self._is_multi and target._is_multi:

6152 engine = self._engine

6153 # Item "IndexEngine" of "Union[IndexEngine, ExtensionEngine]" has

6154 # no attribute "_extract_level_codes"

6155 tgt_values = engine._extract_level_codes(target) # type: ignore[union-attr]

6156 else:

6157 tgt_values = target._get_engine_target()

6158

6159 indexer, missing = self._engine.get_indexer_non_unique(tgt_values)

6160 return ensure_platform_int(indexer), ensure_platform_int(missing)

6161

6162 @final

6163 def get_indexer_for(self, target) -> npt.NDArray[np.intp]:

6164 """

6165 Guaranteed return of an indexer even when non-unique.

6166

6167 This dispatches to get_indexer or get_indexer_non_unique

6168 as appropriate.

6169

6170 Returns

6171 -------

6172 np.ndarray[np.intp]

6173 List of indices.

6174

6175 Examples

6176 --------

6177 >>> idx = pd.Index([np.nan, 'var1', np.nan])

6178 >>> idx.get_indexer_for([np.nan])

6179 array([0, 2])

6180 """

6181 if self._index_as_unique:

6182 return self.get_indexer(target)

6183 indexer, _ = self.get_indexer_non_unique(target)

6184 return indexer

6185

6186 def _get_indexer_strict(self, key, axis_name: str_t) -> tuple[Index, np.ndarray]:

6187 """

6188 Analogue to get_indexer that raises if any elements are missing.

6189 """

6190 keyarr = key

6191 if not isinstance(keyarr, Index):

6192 keyarr = com.asarray_tuplesafe(keyarr)

6193

6194 if self._index_as_unique:

6195 indexer = self.get_indexer_for(keyarr)

6196 keyarr = self.reindex(keyarr)[0]

6197 else:

6198 keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)

6199

6200 self._raise_if_missing(keyarr, indexer, axis_name)

6201

6202 keyarr = self.take(indexer)

6203 if isinstance(key, Index):

6204 # GH 42790 - Preserve name from an Index

6205 keyarr.name = key.name

6206 if lib.is_np_dtype(keyarr.dtype, "mM") or isinstance(

6207 keyarr.dtype, DatetimeTZDtype

6208 ):

6209 # DTI/TDI.take can infer a freq in some cases when we dont want one

6210 if isinstance(key, list) or (

6211 isinstance(key, type(self))

6212 # "Index" has no attribute "freq"

6213 and key.freq is None # type: ignore[attr-defined]

6214 ):

6215 keyarr = keyarr._with_freq(None)

6216

6217 return keyarr, indexer

6218

6219 def _raise_if_missing(self, key, indexer, axis_name: str_t) -> None:

6220 """

6221 Check that indexer can be used to return a result.

6222

6223 e.g. at least one element was found,

6224 unless the list of keys was actually empty.

6225

6226 Parameters

6227 ----------

6228 key : list-like

6229 Targeted labels (only used to show correct error message).

6230 indexer: array-like of booleans

6231 Indices corresponding to the key,

6232 (with -1 indicating not found).

6233 axis_name : str

6234

6235 Raises

6236 ------

6237 KeyError

6238 If at least one key was requested but none was found.

6239 """

6240 if len(key) == 0:

6241 return

6242

6243 # Count missing values

6244 missing_mask = indexer < 0

6245 nmissing = missing_mask.sum()

6246

6247 if nmissing:

6248 if nmissing == len(indexer):

6249 raise KeyError(f"None of [{key}] are in the [{axis_name}]")

6250

6251 not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())

6252 raise KeyError(f"{not_found} not in index")

6253

6254 @overload

6255 def _get_indexer_non_comparable(

6256 self, target: Index, method, unique: Literal[True] = ...

6257 ) -> npt.NDArray[np.intp]:

6258 ...

6259

6260 @overload

6261 def _get_indexer_non_comparable(

6262 self, target: Index, method, unique: Literal[False]

6263 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

6264 ...

6265

6266 @overload

6267 def _get_indexer_non_comparable(

6268 self, target: Index, method, unique: bool = True

6269 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

6270 ...

6271

6272 @final

6273 def _get_indexer_non_comparable(

6274 self, target: Index, method, unique: bool = True

6275 ) -> npt.NDArray[np.intp] | tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

6276 """

6277 Called from get_indexer or get_indexer_non_unique when the target

6278 is of a non-comparable dtype.

6279

6280 For get_indexer lookups with method=None, get_indexer is an _equality_

6281 check, so non-comparable dtypes mean we will always have no matches.

6282

6283 For get_indexer lookups with a method, get_indexer is an _inequality_

6284 check, so non-comparable dtypes mean we will always raise TypeError.

6285

6286 Parameters

6287 ----------

6288 target : Index

6289 method : str or None

6290 unique : bool, default True

6291 * True if called from get_indexer.

6292 * False if called from get_indexer_non_unique.

6293

6294 Raises

6295 ------

6296 TypeError

6297 If doing an inequality check, i.e. method is not None.

6298 """

6299 if method is not None:

6300 other_dtype = _unpack_nested_dtype(target)

6301 raise TypeError(f"Cannot compare dtypes {self.dtype} and {other_dtype}")

6302

6303 no_matches = -1 * np.ones(target.shape, dtype=np.intp)

6304 if unique:

6305 # This is for get_indexer

6306 return no_matches

6307 else:

6308 # This is for get_indexer_non_unique

6309 missing = np.arange(len(target), dtype=np.intp)

6310 return no_matches, missing

6311

6312 @property

6313 def _index_as_unique(self) -> bool:

6314 """

6315 Whether we should treat this as unique for the sake of

6316 get_indexer vs get_indexer_non_unique.

6317

6318 For IntervalIndex compat.

6319 """

6320 return self.is_unique

6321

6322 _requires_unique_msg = "Reindexing only valid with uniquely valued Index objects"

6323

6324 @final

6325 def _maybe_downcast_for_indexing(self, other: Index) -> tuple[Index, Index]:

6326 """

6327 When dealing with an object-dtype Index and a non-object Index, see

6328 if we can upcast the object-dtype one to improve performance.

6329 """

6330

6331 if isinstance(self, ABCDatetimeIndex) and isinstance(other, ABCDatetimeIndex):

6332 if (

6333 self.tz is not None

6334 and other.tz is not None

6335 and not tz_compare(self.tz, other.tz)

6336 ):

6337 # standardize on UTC

6338 return self.tz_convert("UTC"), other.tz_convert("UTC")

6339

6340 elif self.inferred_type == "date" and isinstance(other, ABCDatetimeIndex):

6341 try:

6342 return type(other)(self), other

6343 except OutOfBoundsDatetime:

6344 return self, other

6345 elif self.inferred_type == "timedelta" and isinstance(other, ABCTimedeltaIndex):

6346 # TODO: we dont have tests that get here

6347 return type(other)(self), other

6348

6349 elif self.dtype.kind == "u" and other.dtype.kind == "i":

6350 # GH#41873

6351 if other.min() >= 0:

6352 # lookup min as it may be cached

6353 # TODO: may need itemsize check if we have non-64-bit Indexes

6354 return self, other.astype(self.dtype)

6355

6356 elif self._is_multi and not other._is_multi:

6357 try:

6358 # "Type[Index]" has no attribute "from_tuples"

6359 other = type(self).from_tuples(other) # type: ignore[attr-defined]

6360 except (TypeError, ValueError):

6361 # let's instead try with a straight Index

6362 self = Index(self._values)

6363

6364 if not is_object_dtype(self.dtype) and is_object_dtype(other.dtype):

6365 # Reverse op so we dont need to re-implement on the subclasses

6366 other, self = other._maybe_downcast_for_indexing(self)

6367

6368 return self, other

6369

6370 @final

6371 def _find_common_type_compat(self, target) -> DtypeObj:

6372 """

6373 Implementation of find_common_type that adjusts for Index-specific

6374 special cases.

6375 """

6376 target_dtype, _ = infer_dtype_from(target)

6377

6378 # special case: if one dtype is uint64 and the other a signed int, return object

6379 # See https://github.com/pandas-dev/pandas/issues/26778 for discussion

6380 # Now it's:

6381 # * float | [u]int -> float

6382 # * uint64 | signed int -> object

6383 # We may change union(float | [u]int) to go to object.

6384 if self.dtype == "uint64" or target_dtype == "uint64":

6385 if is_signed_integer_dtype(self.dtype) or is_signed_integer_dtype(

6386 target_dtype

6387 ):

6388 return _dtype_obj

6389

6390 dtype = find_result_type(self.dtype, target)

6391 dtype = common_dtype_categorical_compat([self, target], dtype)

6392 return dtype

6393

6394 @final

6395 def _should_compare(self, other: Index) -> bool:

6396 """

6397 Check if `self == other` can ever have non-False entries.

6398 """

6399

6400 # NB: we use inferred_type rather than is_bool_dtype to catch

6401 # object_dtype_of_bool and categorical[object_dtype_of_bool] cases

6402 if (

6403 other.inferred_type == "boolean" and is_any_real_numeric_dtype(self.dtype)

6404 ) or (

6405 self.inferred_type == "boolean" and is_any_real_numeric_dtype(other.dtype)

6406 ):

6407 # GH#16877 Treat boolean labels passed to a numeric index as not

6408 # found. Without this fix False and True would be treated as 0 and 1

6409 # respectively.

6410 return False

6411

6412 dtype = _unpack_nested_dtype(other)

6413 return self._is_comparable_dtype(dtype) or is_object_dtype(dtype)

6414

6415 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:

6416 """

6417 Can we compare values of the given dtype to our own?

6418 """

6419 if self.dtype.kind == "b":

6420 return dtype.kind == "b"

6421 elif is_numeric_dtype(self.dtype):

6422 return is_numeric_dtype(dtype)

6423 # TODO: this was written assuming we only get here with object-dtype,

6424 # which is no longer correct. Can we specialize for EA?

6425 return True

6426

6427 @final

6428 def groupby(self, values) -> PrettyDict[Hashable, np.ndarray]:

6429 """

6430 Group the index labels by a given array of values.

6431

6432 Parameters

6433 ----------

6434 values : array

6435 Values used to determine the groups.

6436

6437 Returns

6438 -------

6439 dict

6440 {group name -> group labels}

6441 """

6442 # TODO: if we are a MultiIndex, we can do better

6443 # that converting to tuples

6444 if isinstance(values, ABCMultiIndex):

6445 values = values._values

6446 values = Categorical(values)

6447 result = values._reverse_indexer()

6448

6449 # map to the label

6450 result = {k: self.take(v) for k, v in result.items()}

6451

6452 return PrettyDict(result)

6453

6454 def map(self, mapper, na_action: Literal["ignore"] | None = None):

6455 """

6456 Map values using an input mapping or function.

6457

6458 Parameters

6459 ----------

6460 mapper : function, dict, or Series

6461 Mapping correspondence.

6462 na_action : {None, 'ignore'}

6463 If 'ignore', propagate NA values, without passing them to the

6464 mapping correspondence.

6465

6466 Returns

6467 -------

6468 Union[Index, MultiIndex]

6469 The output of the mapping function applied to the index.

6470 If the function returns a tuple with more than one element

6471 a MultiIndex will be returned.

6472

6473 Examples

6474 --------

6475 >>> idx = pd.Index([1, 2, 3])

6476 >>> idx.map({1: 'a', 2: 'b', 3: 'c'})

6477 Index(['a', 'b', 'c'], dtype='object')

6478

6479 Using `map` with a function:

6480

6481 >>> idx = pd.Index([1, 2, 3])

6482 >>> idx.map('I am a {}'.format)

6483 Index(['I am a 1', 'I am a 2', 'I am a 3'], dtype='object')

6484

6485 >>> idx = pd.Index(['a', 'b', 'c'])

6486 >>> idx.map(lambda x: x.upper())

6487 Index(['A', 'B', 'C'], dtype='object')

6488 """

6489 from pandas.core.indexes.multi import MultiIndex

6490

6491 new_values = self._map_values(mapper, na_action=na_action)

6492

6493 # we can return a MultiIndex

6494 if new_values.size and isinstance(new_values[0], tuple):

6495 if isinstance(self, MultiIndex):

6496 names = self.names

6497 elif self.name:

6498 names = [self.name] * len(new_values[0])

6499 else:

6500 names = None

6501 return MultiIndex.from_tuples(new_values, names=names)

6502

6503 dtype = None

6504 if not new_values.size:

6505 # empty

6506 dtype = self.dtype

6507

6508 # e.g. if we are floating and new_values is all ints, then we

6509 # don't want to cast back to floating. But if we are UInt64

6510 # and new_values is all ints, we want to try.

6511 same_dtype = lib.infer_dtype(new_values, skipna=False) == self.inferred_type

6512 if same_dtype:

6513 new_values = maybe_cast_pointwise_result(

6514 new_values, self.dtype, same_dtype=same_dtype

6515 )

6516

6517 return Index._with_infer(new_values, dtype=dtype, copy=False, name=self.name)

6518

6519 # TODO: De-duplicate with map, xref GH#32349

6520 @final

6521 def _transform_index(self, func, *, level=None) -> Index:

6522 """

6523 Apply function to all values found in index.

6524

6525 This includes transforming multiindex entries separately.

6526 Only apply function to one level of the MultiIndex if level is specified.

6527 """

6528 if isinstance(self, ABCMultiIndex):

6529 values = [

6530 self.get_level_values(i).map(func)

6531 if i == level or level is None

6532 else self.get_level_values(i)

6533 for i in range(self.nlevels)

6534 ]

6535 return type(self).from_arrays(values)

6536 else:

6537 items = [func(x) for x in self]

6538 return Index(items, name=self.name, tupleize_cols=False)

6539

6540 def isin(self, values, level=None) -> npt.NDArray[np.bool_]:

6541 """

6542 Return a boolean array where the index values are in `values`.

6543

6544 Compute boolean array of whether each index value is found in the

6545 passed set of values. The length of the returned boolean array matches

6546 the length of the index.

6547

6548 Parameters

6549 ----------

6550 values : set or list-like

6551 Sought values.

6552 level : str or int, optional

6553 Name or position of the index level to use (if the index is a

6554 `MultiIndex`).

6555

6556 Returns

6557 -------

6558 np.ndarray[bool]

6559 NumPy array of boolean values.

6560

6561 See Also

6562 --------

6563 Series.isin : Same for Series.

6564 DataFrame.isin : Same method for DataFrames.

6565

6566 Notes

6567 -----

6568 In the case of `MultiIndex` you must either specify `values` as a

6569 list-like object containing tuples that are the same length as the

6570 number of levels, or specify `level`. Otherwise it will raise a

6571 ``ValueError``.

6572

6573 If `level` is specified:

6574

6575 - if it is the name of one *and only one* index level, use that level;

6576 - otherwise it should be a number indicating level position.

6577

6578 Examples

6579 --------

6580 >>> idx = pd.Index([1,2,3])

6581 >>> idx

6582 Index([1, 2, 3], dtype='int64')

6583

6584 Check whether each index value in a list of values.

6585

6586 >>> idx.isin([1, 4])

6587 array([ True, False, False])

6588

6589 >>> midx = pd.MultiIndex.from_arrays([[1,2,3],

6590 ... ['red', 'blue', 'green']],

6591 ... names=('number', 'color'))

6592 >>> midx

6593 MultiIndex([(1, 'red'),

6594 (2, 'blue'),

6595 (3, 'green')],

6596 names=['number', 'color'])

6597

6598 Check whether the strings in the 'color' level of the MultiIndex

6599 are in a list of colors.

6600

6601 >>> midx.isin(['red', 'orange', 'yellow'], level='color')

6602 array([ True, False, False])

6603

6604 To check across the levels of a MultiIndex, pass a list of tuples:

6605

6606 >>> midx.isin([(1, 'red'), (3, 'red')])

6607 array([ True, False, False])

6608 """

6609 if level is not None:

6610 self._validate_index_level(level)

6611 return algos.isin(self._values, values)

6612

6613 def _get_string_slice(self, key: str_t):

6614 # this is for partial string indexing,

6615 # overridden in DatetimeIndex, TimedeltaIndex and PeriodIndex

6616 raise NotImplementedError

6617

6618 def slice_indexer(

6619 self,

6620 start: Hashable | None = None,

6621 end: Hashable | None = None,

6622 step: int | None = None,

6623 ) -> slice:

6624 """

6625 Compute the slice indexer for input labels and step.

6626

6627 Index needs to be ordered and unique.

6628

6629 Parameters

6630 ----------

6631 start : label, default None

6632 If None, defaults to the beginning.

6633 end : label, default None

6634 If None, defaults to the end.

6635 step : int, default None

6636

6637 Returns

6638 -------

6639 slice

6640

6641 Raises

6642 ------

6643 KeyError : If key does not exist, or key is not unique and index is

6644 not ordered.

6645

6646 Notes

6647 -----

6648 This function assumes that the data is sorted, so use at your own peril

6649

6650 Examples

6651 --------

6652 This is a method on all index types. For example you can do:

6653

6654 >>> idx = pd.Index(list('abcd'))

6655 >>> idx.slice_indexer(start='b', end='c')

6656 slice(1, 3, None)

6657

6658 >>> idx = pd.MultiIndex.from_arrays([list('abcd'), list('efgh')])

6659 >>> idx.slice_indexer(start='b', end=('c', 'g'))

6660 slice(1, 3, None)

6661 """

6662 start_slice, end_slice = self.slice_locs(start, end, step=step)

6663

6664 # return a slice

6665 if not is_scalar(start_slice):

6666 raise AssertionError("Start slice bound is non-scalar")

6667 if not is_scalar(end_slice):

6668 raise AssertionError("End slice bound is non-scalar")

6669

6670 return slice(start_slice, end_slice, step)

6671

6672 def _maybe_cast_indexer(self, key):

6673 """

6674 If we have a float key and are not a floating index, then try to cast

6675 to an int if equivalent.

6676 """

6677 return key

6678

6679 def _maybe_cast_listlike_indexer(self, target) -> Index:

6680 """

6681 Analogue to maybe_cast_indexer for get_indexer instead of get_loc.

6682 """

6683 return ensure_index(target)

6684

6685 @final

6686 def _validate_indexer(

6687 self,

6688 form: Literal["positional", "slice"],

6689 key,

6690 kind: Literal["getitem", "iloc"],

6691 ) -> None:

6692 """

6693 If we are positional indexer, validate that we have appropriate

6694 typed bounds must be an integer.

6695 """

6696 if not lib.is_int_or_none(key):

6697 self._raise_invalid_indexer(form, key)

6698

6699 def _maybe_cast_slice_bound(self, label, side: str_t):

6700 """

6701 This function should be overloaded in subclasses that allow non-trivial

6702 casting on label-slice bounds, e.g. datetime-like indices allowing

6703 strings containing formatted datetimes.

6704

6705 Parameters

6706 ----------

6707 label : object

6708 side : {'left', 'right'}

6709

6710 Returns

6711 -------

6712 label : object

6713

6714 Notes

6715 -----

6716 Value of `side` parameter should be validated in caller.

6717 """

6718

6719 # We are a plain index here (sub-class override this method if they

6720 # wish to have special treatment for floats/ints, e.g. datetimelike Indexes

6721

6722 if is_numeric_dtype(self.dtype):

6723 return self._maybe_cast_indexer(label)

6724

6725 # reject them, if index does not contain label

6726 if (is_float(label) or is_integer(label)) and label not in self:

6727 self._raise_invalid_indexer("slice", label)

6728

6729 return label

6730

6731 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):

6732 if self.is_monotonic_increasing:

6733 return self.searchsorted(label, side=side)

6734 elif self.is_monotonic_decreasing:

6735 # np.searchsorted expects ascending sort order, have to reverse

6736 # everything for it to work (element ordering, search side and

6737 # resulting value).

6738 pos = self[::-1].searchsorted(

6739 label, side="right" if side == "left" else "left"

6740 )

6741 return len(self) - pos

6742

6743 raise ValueError("index must be monotonic increasing or decreasing")

6744

6745 def get_slice_bound(self, label, side: Literal["left", "right"]) -> int:

6746 """

6747 Calculate slice bound that corresponds to given label.

6748

6749 Returns leftmost (one-past-the-rightmost if ``side=='right'``) position

6750 of given label.

6751

6752 Parameters

6753 ----------

6754 label : object

6755 side : {'left', 'right'}

6756

6757 Returns

6758 -------

6759 int

6760 Index of label.

6761

6762 See Also

6763 --------

6764 Index.get_loc : Get integer location, slice or boolean mask for requested

6765 label.

6766

6767 Examples

6768 --------

6769 >>> idx = pd.RangeIndex(5)

6770 >>> idx.get_slice_bound(3, 'left')

6771 3

6772

6773 >>> idx.get_slice_bound(3, 'right')

6774 4

6775

6776 If ``label`` is non-unique in the index, an error will be raised.

6777

6778 >>> idx_duplicate = pd.Index(['a', 'b', 'a', 'c', 'd'])

6779 >>> idx_duplicate.get_slice_bound('a', 'left')

6780 Traceback (most recent call last):

6781 KeyError: Cannot get left slice bound for non-unique label: 'a'

6782 """

6783

6784 if side not in ("left", "right"):

6785 raise ValueError(

6786 "Invalid value for side kwarg, must be either "

6787 f"'left' or 'right': {side}"

6788 )

6789

6790 original_label = label

6791

6792 # For datetime indices label may be a string that has to be converted

6793 # to datetime boundary according to its resolution.

6794 label = self._maybe_cast_slice_bound(label, side)

6795

6796 # we need to look up the label

6797 try:

6798 slc = self.get_loc(label)

6799 except KeyError as err:

6800 try:

6801 return self._searchsorted_monotonic(label, side)

6802 except ValueError:

6803 # raise the original KeyError

6804 raise err

6805

6806 if isinstance(slc, np.ndarray):

6807 # get_loc may return a boolean array, which

6808 # is OK as long as they are representable by a slice.

6809 assert is_bool_dtype(slc.dtype)

6810 slc = lib.maybe_booleans_to_slice(slc.view("u1"))

6811 if isinstance(slc, np.ndarray):

6812 raise KeyError(

6813 f"Cannot get {side} slice bound for non-unique "

6814 f"label: {repr(original_label)}"

6815 )

6816

6817 if isinstance(slc, slice):

6818 if side == "left":

6819 return slc.start

6820 else:

6821 return slc.stop

6822 else:

6823 if side == "right":

6824 return slc + 1

6825 else:

6826 return slc

6827

6828 def slice_locs(self, start=None, end=None, step=None) -> tuple[int, int]:

6829 """

6830 Compute slice locations for input labels.

6831

6832 Parameters

6833 ----------

6834 start : label, default None

6835 If None, defaults to the beginning.

6836 end : label, default None

6837 If None, defaults to the end.

6838 step : int, defaults None

6839 If None, defaults to 1.

6840

6841 Returns

6842 -------

6843 tuple[int, int]

6844

6845 See Also

6846 --------

6847 Index.get_loc : Get location for a single label.

6848

6849 Notes

6850 -----

6851 This method only works if the index is monotonic or unique.

6852

6853 Examples

6854 --------

6855 >>> idx = pd.Index(list('abcd'))

6856 >>> idx.slice_locs(start='b', end='c')

6857 (1, 3)

6858 """

6859 inc = step is None or step >= 0

6860

6861 if not inc:

6862 # If it's a reverse slice, temporarily swap bounds.

6863 start, end = end, start

6864

6865 # GH 16785: If start and end happen to be date strings with UTC offsets

6866 # attempt to parse and check that the offsets are the same

6867 if isinstance(start, (str, datetime)) and isinstance(end, (str, datetime)):

6868 try:

6869 ts_start = Timestamp(start)

6870 ts_end = Timestamp(end)

6871 except (ValueError, TypeError):

6872 pass

6873 else:

6874 if not tz_compare(ts_start.tzinfo, ts_end.tzinfo):

6875 raise ValueError("Both dates must have the same UTC offset")

6876

6877 start_slice = None

6878 if start is not None:

6879 start_slice = self.get_slice_bound(start, "left")

6880 if start_slice is None:

6881 start_slice = 0

6882

6883 end_slice = None

6884 if end is not None:

6885 end_slice = self.get_slice_bound(end, "right")

6886 if end_slice is None:

6887 end_slice = len(self)

6888

6889 if not inc:

6890 # Bounds at this moment are swapped, swap them back and shift by 1.

6891 #

6892 # slice_locs('B', 'A', step=-1): s='B', e='A'

6893 #

6894 # s='A' e='B'

6895 # AFTER SWAP: | |

6896 # v ------------------> V

6897 # -----------------------------------

6898 # | | |A|A|A|A| | | | | |B|B| | | | |

6899 # -----------------------------------

6900 # ^ <------------------ ^

6901 # SHOULD BE: | |

6902 # end=s-1 start=e-1

6903 #

6904 end_slice, start_slice = start_slice - 1, end_slice - 1

6905

6906 # i == -1 triggers ``len(self) + i`` selection that points to the

6907 # last element, not before-the-first one, subtracting len(self)

6908 # compensates that.

6909 if end_slice == -1:

6910 end_slice -= len(self)

6911 if start_slice == -1:

6912 start_slice -= len(self)

6913

6914 return start_slice, end_slice

6915

6916 def delete(self, loc) -> Self:

6917 """

6918 Make new Index with passed location(-s) deleted.

6919

6920 Parameters

6921 ----------

6922 loc : int or list of int

6923 Location of item(-s) which will be deleted.

6924 Use a list of locations to delete more than one value at the same time.

6925

6926 Returns

6927 -------

6928 Index

6929 Will be same type as self, except for RangeIndex.

6930

6931 See Also

6932 --------

6933 numpy.delete : Delete any rows and column from NumPy array (ndarray).

6934

6935 Examples

6936 --------

6937 >>> idx = pd.Index(['a', 'b', 'c'])

6938 >>> idx.delete(1)

6939 Index(['a', 'c'], dtype='object')

6940

6941 >>> idx = pd.Index(['a', 'b', 'c'])

6942 >>> idx.delete([0, 2])

6943 Index(['b'], dtype='object')

6944 """

6945 values = self._values

6946 res_values: ArrayLike

6947 if isinstance(values, np.ndarray):

6948 # TODO(__array_function__): special casing will be unnecessary

6949 res_values = np.delete(values, loc)

6950 else:

6951 res_values = values.delete(loc)

6952

6953 # _constructor so RangeIndex-> Index with an int64 dtype

6954 return self._constructor._simple_new(res_values, name=self.name)

6955

6956 def insert(self, loc: int, item) -> Index:

6957 """

6958 Make new Index inserting new item at location.

6959

6960 Follows Python numpy.insert semantics for negative values.

6961

6962 Parameters

6963 ----------

6964 loc : int

6965 item : object

6966

6967 Returns

6968 -------

6969 Index

6970

6971 Examples

6972 --------

6973 >>> idx = pd.Index(['a', 'b', 'c'])

6974 >>> idx.insert(1, 'x')

6975 Index(['a', 'x', 'b', 'c'], dtype='object')

6976 """

6977 item = lib.item_from_zerodim(item)

6978 if is_valid_na_for_dtype(item, self.dtype) and self.dtype != object:

6979 item = self._na_value

6980

6981 arr = self._values

6982

6983 try:

6984 if isinstance(arr, ExtensionArray):

6985 res_values = arr.insert(loc, item)

6986 return type(self)._simple_new(res_values, name=self.name)

6987 else:

6988 item = self._validate_fill_value(item)

6989 except (TypeError, ValueError, LossySetitemError):

6990 # e.g. trying to insert an integer into a DatetimeIndex

6991 # We cannot keep the same dtype, so cast to the (often object)

6992 # minimal shared dtype before doing the insert.

6993 dtype = self._find_common_type_compat(item)

6994 return self.astype(dtype).insert(loc, item)

6995

6996 if arr.dtype != object or not isinstance(

6997 item, (tuple, np.datetime64, np.timedelta64)

6998 ):

6999 # with object-dtype we need to worry about numpy incorrectly casting

7000 # dt64/td64 to integer, also about treating tuples as sequences

7001 # special-casing dt64/td64 https://github.com/numpy/numpy/issues/12550

7002 casted = arr.dtype.type(item)

7003 new_values = np.insert(arr, loc, casted)

7004

7005 else:

7006 # error: No overload variant of "insert" matches argument types

7007 # "ndarray[Any, Any]", "int", "None"

7008 new_values = np.insert(arr, loc, None) # type: ignore[call-overload]

7009 loc = loc if loc >= 0 else loc - 1

7010 new_values[loc] = item

7011

7012 out = Index._with_infer(new_values, name=self.name)

7013 if (

7014 using_pyarrow_string_dtype()

7015 and is_string_dtype(out.dtype)

7016 and new_values.dtype == object

7017 ):

7018 out = out.astype(new_values.dtype)

7019 if self.dtype == object and out.dtype != object:

7020 # GH#51363

7021 warnings.warn(

7022 "The behavior of Index.insert with object-dtype is deprecated, "

7023 "in a future version this will return an object-dtype Index "

7024 "instead of inferring a non-object dtype. To retain the old "

7025 "behavior, do `idx.insert(loc, item).infer_objects(copy=False)`",

7026 FutureWarning,

7027 stacklevel=find_stack_level(),

7028 )

7029 return out

7030

7031 def drop(

7032 self,

7033 labels: Index | np.ndarray | Iterable[Hashable],

7034 errors: IgnoreRaise = "raise",

7035 ) -> Index:

7036 """

7037 Make new Index with passed list of labels deleted.

7038

7039 Parameters

7040 ----------

7041 labels : array-like or scalar

7042 errors : {'ignore', 'raise'}, default 'raise'

7043 If 'ignore', suppress error and existing labels are dropped.

7044

7045 Returns

7046 -------

7047 Index

7048 Will be same type as self, except for RangeIndex.

7049

7050 Raises

7051 ------

7052 KeyError

7053 If not all of the labels are found in the selected axis

7054

7055 Examples

7056 --------

7057 >>> idx = pd.Index(['a', 'b', 'c'])

7058 >>> idx.drop(['a'])

7059 Index(['b', 'c'], dtype='object')

7060 """

7061 if not isinstance(labels, Index):

7062 # avoid materializing e.g. RangeIndex

7063 arr_dtype = "object" if self.dtype == "object" else None

7064 labels = com.index_labels_to_array(labels, dtype=arr_dtype)

7065

7066 indexer = self.get_indexer_for(labels)

7067 mask = indexer == -1

7068 if mask.any():

7069 if errors != "ignore":

7070 raise KeyError(f"{labels[mask].tolist()} not found in axis")

7071 indexer = indexer[~mask]

7072 return self.delete(indexer)

7073

7074 @final

7075 def infer_objects(self, copy: bool = True) -> Index:

7076 """

7077 If we have an object dtype, try to infer a non-object dtype.

7078

7079 Parameters

7080 ----------

7081 copy : bool, default True

7082 Whether to make a copy in cases where no inference occurs.

7083 """

7084 if self._is_multi:

7085 raise NotImplementedError(

7086 "infer_objects is not implemented for MultiIndex. "

7087 "Use index.to_frame().infer_objects() instead."

7088 )

7089 if self.dtype != object:

7090 return self.copy() if copy else self

7091

7092 values = self._values

7093 values = cast("npt.NDArray[np.object_]", values)

7094 res_values = lib.maybe_convert_objects(

7095 values,

7096 convert_non_numeric=True,

7097 )

7098 if copy and res_values is values:

7099 return self.copy()

7100 result = Index(res_values, name=self.name)

7101 if not copy and res_values is values and self._references is not None:

7102 result._references = self._references

7103 result._references.add_index_reference(result)

7104 return result

7105

7106 @final

7107 def diff(self, periods: int = 1) -> Index:

7108 """

7109 Computes the difference between consecutive values in the Index object.

7110

7111 If periods is greater than 1, computes the difference between values that

7112 are `periods` number of positions apart.

7113

7114 Parameters

7115 ----------

7116 periods : int, optional

7117 The number of positions between the current and previous

7118 value to compute the difference with. Default is 1.

7119

7120 Returns

7121 -------

7122 Index

7123 A new Index object with the computed differences.

7124

7125 Examples

7126 --------

7127 >>> import pandas as pd

7128 >>> idx = pd.Index([10, 20, 30, 40, 50])

7129 >>> idx.diff()

7130 Index([nan, 10.0, 10.0, 10.0, 10.0], dtype='float64')

7131

7132 """

7133 return Index(self.to_series().diff(periods))

7134

7135 @final

7136 def round(self, decimals: int = 0) -> Self:

7137 """

7138 Round each value in the Index to the given number of decimals.

7139

7140 Parameters

7141 ----------

7142 decimals : int, optional

7143 Number of decimal places to round to. If decimals is negative,

7144 it specifies the number of positions to the left of the decimal point.

7145

7146 Returns

7147 -------

7148 Index

7149 A new Index with the rounded values.

7150

7151 Examples

7152 --------

7153 >>> import pandas as pd

7154 >>> idx = pd.Index([10.1234, 20.5678, 30.9123, 40.4567, 50.7890])

7155 >>> idx.round(decimals=2)

7156 Index([10.12, 20.57, 30.91, 40.46, 50.79], dtype='float64')

7157

7158 """

7159 return self._constructor(self.to_series().round(decimals))

7160

7161 # --------------------------------------------------------------------

7162 # Generated Arithmetic, Comparison, and Unary Methods

7163

7164 def _cmp_method(self, other, op):

7165 """

7166 Wrapper used to dispatch comparison operations.

7167 """

7168 if self.is_(other):

7169 # fastpath

7170 if op in {operator.eq, operator.le, operator.ge}:

7171 arr = np.ones(len(self), dtype=bool)

7172 if self._can_hold_na and not isinstance(self, ABCMultiIndex):

7173 # TODO: should set MultiIndex._can_hold_na = False?

7174 arr[self.isna()] = False

7175 return arr

7176 elif op is operator.ne:

7177 arr = np.zeros(len(self), dtype=bool)

7178 if self._can_hold_na and not isinstance(self, ABCMultiIndex):

7179 arr[self.isna()] = True

7180 return arr

7181

7182 if isinstance(other, (np.ndarray, Index, ABCSeries, ExtensionArray)) and len(

7183 self

7184 ) != len(other):

7185 raise ValueError("Lengths must match to compare")

7186

7187 if not isinstance(other, ABCMultiIndex):

7188 other = extract_array(other, extract_numpy=True)

7189 else:

7190 other = np.asarray(other)

7191

7192 if is_object_dtype(self.dtype) and isinstance(other, ExtensionArray):

7193 # e.g. PeriodArray, Categorical

7194 result = op(self._values, other)

7195

7196 elif isinstance(self._values, ExtensionArray):

7197 result = op(self._values, other)

7198

7199 elif is_object_dtype(self.dtype) and not isinstance(self, ABCMultiIndex):

7200 # don't pass MultiIndex

7201 result = ops.comp_method_OBJECT_ARRAY(op, self._values, other)

7202

7203 else:

7204 result = ops.comparison_op(self._values, other, op)

7205

7206 return result

7207

7208 @final

7209 def _logical_method(self, other, op):

7210 res_name = ops.get_op_result_name(self, other)

7211

7212 lvalues = self._values

7213 rvalues = extract_array(other, extract_numpy=True, extract_range=True)

7214

7215 res_values = ops.logical_op(lvalues, rvalues, op)

7216 return self._construct_result(res_values, name=res_name)

7217

7218 @final

7219 def _construct_result(self, result, name):

7220 if isinstance(result, tuple):

7221 return (

7222 Index(result[0], name=name, dtype=result[0].dtype),

7223 Index(result[1], name=name, dtype=result[1].dtype),

7224 )

7225 return Index(result, name=name, dtype=result.dtype)

7226

7227 def _arith_method(self, other, op):

7228 if (

7229 isinstance(other, Index)

7230 and is_object_dtype(other.dtype)

7231 and type(other) is not Index

7232 ):

7233 # We return NotImplemented for object-dtype index *subclasses* so they have

7234 # a chance to implement ops before we unwrap them.

7235 # See https://github.com/pandas-dev/pandas/issues/31109

7236 return NotImplemented

7237

7238 return super()._arith_method(other, op)

7239

7240 @final

7241 def _unary_method(self, op):

7242 result = op(self._values)

7243 return Index(result, name=self.name)

7244

7245 def __abs__(self) -> Index:

7246 return self._unary_method(operator.abs)

7247

7248 def __neg__(self) -> Index:

7249 return self._unary_method(operator.neg)

7250

7251 def __pos__(self) -> Index:

7252 return self._unary_method(operator.pos)

7253

7254 def __invert__(self) -> Index:

7255 # GH#8875

7256 return self._unary_method(operator.inv)

7257

7258 # --------------------------------------------------------------------

7259 # Reductions

7260

7261 def any(self, *args, **kwargs):

7262 """

7263 Return whether any element is Truthy.

7264

7265 Parameters

7266 ----------

7267 *args

7268 Required for compatibility with numpy.

7269 **kwargs

7270 Required for compatibility with numpy.

7271

7272 Returns

7273 -------

7274 bool or array-like (if axis is specified)

7275 A single element array-like may be converted to bool.

7276

7277 See Also

7278 --------

7279 Index.all : Return whether all elements are True.

7280 Series.all : Return whether all elements are True.

7281

7282 Notes

7283 -----

7284 Not a Number (NaN), positive infinity and negative infinity

7285 evaluate to True because these are not equal to zero.

7286

7287 Examples

7288 --------

7289 >>> index = pd.Index([0, 1, 2])

7290 >>> index.any()

7291 True

7292

7293 >>> index = pd.Index([0, 0, 0])

7294 >>> index.any()

7295 False

7296 """

7297 nv.validate_any(args, kwargs)

7298 self._maybe_disable_logical_methods("any")

7299 vals = self._values

7300 if not isinstance(vals, np.ndarray):

7301 # i.e. EA, call _reduce instead of "any" to get TypeError instead

7302 # of AttributeError

7303 return vals._reduce("any")

7304 return np.any(vals)

7305

7306 def all(self, *args, **kwargs):

7307 """

7308 Return whether all elements are Truthy.

7309

7310 Parameters

7311 ----------

7312 *args

7313 Required for compatibility with numpy.

7314 **kwargs

7315 Required for compatibility with numpy.

7316

7317 Returns

7318 -------

7319 bool or array-like (if axis is specified)

7320 A single element array-like may be converted to bool.

7321

7322 See Also

7323 --------

7324 Index.any : Return whether any element in an Index is True.

7325 Series.any : Return whether any element in a Series is True.

7326 Series.all : Return whether all elements in a Series are True.

7327

7328 Notes

7329 -----

7330 Not a Number (NaN), positive infinity and negative infinity

7331 evaluate to True because these are not equal to zero.

7332

7333 Examples

7334 --------

7335 True, because nonzero integers are considered True.

7336

7337 >>> pd.Index([1, 2, 3]).all()

7338 True

7339

7340 False, because ``0`` is considered False.

7341

7342 >>> pd.Index([0, 1, 2]).all()

7343 False

7344 """

7345 nv.validate_all(args, kwargs)

7346 self._maybe_disable_logical_methods("all")

7347 vals = self._values

7348 if not isinstance(vals, np.ndarray):

7349 # i.e. EA, call _reduce instead of "all" to get TypeError instead

7350 # of AttributeError

7351 return vals._reduce("all")

7352 return np.all(vals)

7353

7354 @final

7355 def _maybe_disable_logical_methods(self, opname: str_t) -> None:

7356 """

7357 raise if this Index subclass does not support any or all.

7358 """

7359 if (

7360 isinstance(self, ABCMultiIndex)

7361 # TODO(3.0): PeriodArray and DatetimeArray any/all will raise,

7362 # so checking needs_i8_conversion will be unnecessary

7363 or (needs_i8_conversion(self.dtype) and self.dtype.kind != "m")

7364 ):

7365 # This call will raise

7366 make_invalid_op(opname)(self)

7367

7368 @Appender(IndexOpsMixin.argmin.__doc__)

7369 def argmin(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:

7370 nv.validate_argmin(args, kwargs)

7371 nv.validate_minmax_axis(axis)

7372

7373 if not self._is_multi and self.hasnans:

7374 # Take advantage of cache

7375 mask = self._isnan

7376 if not skipna or mask.all():

7377 warnings.warn(

7378 f"The behavior of {type(self).__name__}.argmax/argmin "

7379 "with skipna=False and NAs, or with all-NAs is deprecated. "

7380 "In a future version this will raise ValueError.",

7381 FutureWarning,

7382 stacklevel=find_stack_level(),

7383 )

7384 return -1

7385 return super().argmin(skipna=skipna)

7386

7387 @Appender(IndexOpsMixin.argmax.__doc__)

7388 def argmax(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:

7389 nv.validate_argmax(args, kwargs)

7390 nv.validate_minmax_axis(axis)

7391

7392 if not self._is_multi and self.hasnans:

7393 # Take advantage of cache

7394 mask = self._isnan

7395 if not skipna or mask.all():

7396 warnings.warn(

7397 f"The behavior of {type(self).__name__}.argmax/argmin "

7398 "with skipna=False and NAs, or with all-NAs is deprecated. "

7399 "In a future version this will raise ValueError.",

7400 FutureWarning,

7401 stacklevel=find_stack_level(),

7402 )

7403 return -1

7404 return super().argmax(skipna=skipna)

7405

7406 def min(self, axis=None, skipna: bool = True, *args, **kwargs):

7407 """

7408 Return the minimum value of the Index.

7409

7410 Parameters

7411 ----------

7412 axis : {None}

7413 Dummy argument for consistency with Series.

7414 skipna : bool, default True

7415 Exclude NA/null values when showing the result.

7416 *args, **kwargs

7417 Additional arguments and keywords for compatibility with NumPy.

7418

7419 Returns

7420 -------

7421 scalar

7422 Minimum value.

7423

7424 See Also

7425 --------

7426 Index.max : Return the maximum value of the object.

7427 Series.min : Return the minimum value in a Series.

7428 DataFrame.min : Return the minimum values in a DataFrame.

7429

7430 Examples

7431 --------

7432 >>> idx = pd.Index([3, 2, 1])

7433 >>> idx.min()

7434 1

7435

7436 >>> idx = pd.Index(['c', 'b', 'a'])

7437 >>> idx.min()

7438 'a'

7439

7440 For a MultiIndex, the minimum is determined lexicographically.

7441

7442 >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)])

7443 >>> idx.min()

7444 ('a', 1)

7445 """

7446 nv.validate_min(args, kwargs)

7447 nv.validate_minmax_axis(axis)

7448

7449 if not len(self):

7450 return self._na_value

7451

7452 if len(self) and self.is_monotonic_increasing:

7453 # quick check

7454 first = self[0]

7455 if not isna(first):

7456 return first

7457

7458 if not self._is_multi and self.hasnans:

7459 # Take advantage of cache

7460 mask = self._isnan

7461 if not skipna or mask.all():

7462 return self._na_value

7463

7464 if not self._is_multi and not isinstance(self._values, np.ndarray):

7465 return self._values._reduce(name="min", skipna=skipna)

7466

7467 return nanops.nanmin(self._values, skipna=skipna)

7468

7469 def max(self, axis=None, skipna: bool = True, *args, **kwargs):

7470 """

7471 Return the maximum value of the Index.

7472

7473 Parameters

7474 ----------

7475 axis : int, optional

7476 For compatibility with NumPy. Only 0 or None are allowed.

7477 skipna : bool, default True

7478 Exclude NA/null values when showing the result.

7479 *args, **kwargs

7480 Additional arguments and keywords for compatibility with NumPy.

7481

7482 Returns

7483 -------

7484 scalar

7485 Maximum value.

7486

7487 See Also

7488 --------

7489 Index.min : Return the minimum value in an Index.

7490 Series.max : Return the maximum value in a Series.

7491 DataFrame.max : Return the maximum values in a DataFrame.

7492

7493 Examples

7494 --------

7495 >>> idx = pd.Index([3, 2, 1])

7496 >>> idx.max()

7497 3

7498

7499 >>> idx = pd.Index(['c', 'b', 'a'])

7500 >>> idx.max()

7501 'c'

7502

7503 For a MultiIndex, the maximum is determined lexicographically.

7504

7505 >>> idx = pd.MultiIndex.from_product([('a', 'b'), (2, 1)])

7506 >>> idx.max()

7507 ('b', 2)

7508 """

7509

7510 nv.validate_max(args, kwargs)

7511 nv.validate_minmax_axis(axis)

7512

7513 if not len(self):

7514 return self._na_value

7515

7516 if len(self) and self.is_monotonic_increasing:

7517 # quick check

7518 last = self[-1]

7519 if not isna(last):

7520 return last

7521

7522 if not self._is_multi and self.hasnans:

7523 # Take advantage of cache

7524 mask = self._isnan

7525 if not skipna or mask.all():

7526 return self._na_value

7527

7528 if not self._is_multi and not isinstance(self._values, np.ndarray):

7529 return self._values._reduce(name="max", skipna=skipna)

7530

7531 return nanops.nanmax(self._values, skipna=skipna)

7532

7533 # --------------------------------------------------------------------

7534

7535 @final

7536 @property

7537 def shape(self) -> Shape:

7538 """

7539 Return a tuple of the shape of the underlying data.

7540

7541 Examples

7542 --------

7543 >>> idx = pd.Index([1, 2, 3])

7544 >>> idx

7545 Index([1, 2, 3], dtype='int64')

7546 >>> idx.shape

7547 (3,)

7548 """

7549 # See GH#27775, GH#27384 for history/reasoning in how this is defined.

7550 return (len(self),)

7551

7552

7553def ensure_index_from_sequences(sequences, names=None) -> Index:

7554 """

7555 Construct an index from sequences of data.

7556

7557 A single sequence returns an Index. Many sequences returns a

7558 MultiIndex.

7559

7560 Parameters

7561 ----------

7562 sequences : sequence of sequences

7563 names : sequence of str

7564

7565 Returns

7566 -------

7567 index : Index or MultiIndex

7568

7569 Examples

7570 --------

7571 >>> ensure_index_from_sequences([[1, 2, 3]], names=["name"])

7572 Index([1, 2, 3], dtype='int64', name='name')

7573

7574 >>> ensure_index_from_sequences([["a", "a"], ["a", "b"]], names=["L1", "L2"])

7575 MultiIndex([('a', 'a'),

7576 ('a', 'b')],

7577 names=['L1', 'L2'])

7578

7579 See Also

7580 --------

7581 ensure_index

7582 """

7583 from pandas.core.indexes.multi import MultiIndex

7584

7585 if len(sequences) == 1:

7586 if names is not None:

7587 names = names[0]

7588 return Index(sequences[0], name=names)

7589 else:

7590 return MultiIndex.from_arrays(sequences, names=names)

7591

7592

7593def ensure_index(index_like: Axes, copy: bool = False) -> Index:

7594 """

7595 Ensure that we have an index from some index-like object.

7596

7597 Parameters

7598 ----------

7599 index_like : sequence

7600 An Index or other sequence

7601 copy : bool, default False

7602

7603 Returns

7604 -------

7605 index : Index or MultiIndex

7606

7607 See Also

7608 --------

7609 ensure_index_from_sequences

7610

7611 Examples

7612 --------

7613 >>> ensure_index(['a', 'b'])

7614 Index(['a', 'b'], dtype='object')

7615

7616 >>> ensure_index([('a', 'a'), ('b', 'c')])

7617 Index([('a', 'a'), ('b', 'c')], dtype='object')

7618

7619 >>> ensure_index([['a', 'a'], ['b', 'c']])

7620 MultiIndex([('a', 'b'),

7621 ('a', 'c')],

7622 )

7623 """

7624 if isinstance(index_like, Index):

7625 if copy:

7626 index_like = index_like.copy()

7627 return index_like

7628

7629 if isinstance(index_like, ABCSeries):

7630 name = index_like.name

7631 return Index(index_like, name=name, copy=copy)

7632

7633 if is_iterator(index_like):

7634 index_like = list(index_like)

7635

7636 if isinstance(index_like, list):

7637 if type(index_like) is not list: # noqa: E721

7638 # must check for exactly list here because of strict type

7639 # check in clean_index_list

7640 index_like = list(index_like)

7641

7642 if len(index_like) and lib.is_all_arraylike(index_like):

7643 from pandas.core.indexes.multi import MultiIndex

7644

7645 return MultiIndex.from_arrays(index_like)

7646 else:

7647 return Index(index_like, copy=copy, tupleize_cols=False)

7648 else:

7649 return Index(index_like, copy=copy)

7650

7651

7652def ensure_has_len(seq):

7653 """

7654 If seq is an iterator, put its values into a list.

7655 """

7656 try:

7657 len(seq)

7658 except TypeError:

7659 return list(seq)

7660 else:

7661 return seq

7662

7663

7664def trim_front(strings: list[str]) -> list[str]:

7665 """

7666 Trims zeros and decimal points.

7667

7668 Examples

7669 --------

7670 >>> trim_front([" a", " b"])

7671 ['a', 'b']

7672

7673 >>> trim_front([" a", " "])

7674 ['a', '']

7675 """

7676 if not strings:

7677 return strings

7678 while all(strings) and all(x[0] == " " for x in strings):

7679 strings = [x[1:] for x in strings]

7680 return strings

7681

7682

7683def _validate_join_method(method: str) -> None:

7684 if method not in ["left", "right", "inner", "outer"]:

7685 raise ValueError(f"do not recognize join method {method}")

7686

7687

7688def maybe_extract_name(name, obj, cls) -> Hashable:

7689 """

7690 If no name is passed, then extract it from data, validating hashability.

7691 """

7692 if name is None and isinstance(obj, (Index, ABCSeries)):

7693 # Note we don't just check for "name" attribute since that would

7694 # pick up e.g. dtype.name

7695 name = obj.name

7696

7697 # GH#29069

7698 if not is_hashable(name):

7699 raise TypeError(f"{cls.__name__}.name must be a hashable type")

7700

7701 return name

7702

7703

7704def get_unanimous_names(*indexes: Index) -> tuple[Hashable, ...]:

7705 """

7706 Return common name if all indices agree, otherwise None (level-by-level).

7707

7708 Parameters

7709 ----------

7710 indexes : list of Index objects

7711

7712 Returns

7713 -------

7714 list

7715 A list representing the unanimous 'names' found.

7716 """

7717 name_tups = [tuple(i.names) for i in indexes]

7718 name_sets = [{*ns} for ns in zip_longest(*name_tups)]

7719 names = tuple(ns.pop() if len(ns) == 1 else None for ns in name_sets)

7720 return names

7721

7722

7723def _unpack_nested_dtype(other: Index) -> DtypeObj:

7724 """

7725 When checking if our dtype is comparable with another, we need

7726 to unpack CategoricalDtype to look at its categories.dtype.

7727

7728 Parameters

7729 ----------

7730 other : Index

7731

7732 Returns

7733 -------

7734 np.dtype or ExtensionDtype

7735 """

7736 dtype = other.dtype

7737 if isinstance(dtype, CategoricalDtype):

7738 # If there is ever a SparseIndex, this could get dispatched

7739 # here too.

7740 return dtype.categories.dtype

7741 elif isinstance(dtype, ArrowDtype):

7742 # GH 53617

7743 import pyarrow as pa

7744

7745 if pa.types.is_dictionary(dtype.pyarrow_dtype):

7746 other = other[:0].astype(ArrowDtype(dtype.pyarrow_dtype.value_type))

7747 return other.dtype

7748

7749

7750def _maybe_try_sort(result: Index | ArrayLike, sort: bool | None):

7751 if sort is not False:

7752 try:

7753 # error: Incompatible types in assignment (expression has type

7754 # "Union[ExtensionArray, ndarray[Any, Any], Index, Series,

7755 # Tuple[Union[Union[ExtensionArray, ndarray[Any, Any]], Index, Series],

7756 # ndarray[Any, Any]]]", variable has type "Union[Index,

7757 # Union[ExtensionArray, ndarray[Any, Any]]]")

7758 result = algos.safe_sort(result) # type: ignore[assignment]

7759 except TypeError as err:

7760 if sort is True:

7761 raise

7762 warnings.warn(

7763 f"{err}, sort order is undefined for incomparable objects.",

7764 RuntimeWarning,

7765 stacklevel=find_stack_level(),

7766 )

7767 return result

7768

7769

7770def get_values_for_csv(

7771 values: ArrayLike,

7772 *,

7773 date_format,

7774 na_rep: str = "nan",

7775 quoting=None,

7776 float_format=None,

7777 decimal: str = ".",

7778) -> npt.NDArray[np.object_]:

7779 """

7780 Convert to types which can be consumed by the standard library's

7781 csv.writer.writerows.

7782 """

7783 if isinstance(values, Categorical) and values.categories.dtype.kind in "Mm":

7784 # GH#40754 Convert categorical datetimes to datetime array

7785 values = algos.take_nd(

7786 values.categories._values,

7787 ensure_platform_int(values._codes),

7788 fill_value=na_rep,

7789 )

7790

7791 values = ensure_wrapped_if_datetimelike(values)

7792

7793 if isinstance(values, (DatetimeArray, TimedeltaArray)):

7794 if values.ndim == 1:

7795 result = values._format_native_types(na_rep=na_rep, date_format=date_format)

7796 result = result.astype(object, copy=False)

7797 return result

7798

7799 # GH#21734 Process every column separately, they might have different formats

7800 results_converted = []

7801 for i in range(len(values)):

7802 result = values[i, :]._format_native_types(

7803 na_rep=na_rep, date_format=date_format

7804 )

7805 results_converted.append(result.astype(object, copy=False))

7806 return np.vstack(results_converted)

7807

7808 elif isinstance(values.dtype, PeriodDtype):

7809 # TODO: tests that get here in column path

7810 values = cast("PeriodArray", values)

7811 res = values._format_native_types(na_rep=na_rep, date_format=date_format)

7812 return res

7813

7814 elif isinstance(values.dtype, IntervalDtype):

7815 # TODO: tests that get here in column path

7816 values = cast("IntervalArray", values)

7817 mask = values.isna()

7818 if not quoting:

7819 result = np.asarray(values).astype(str)

7820 else:

7821 result = np.array(values, dtype=object, copy=True)

7822

7823 result[mask] = na_rep

7824 return result

7825

7826 elif values.dtype.kind == "f" and not isinstance(values.dtype, SparseDtype):

7827 # see GH#13418: no special formatting is desired at the

7828 # output (important for appropriate 'quoting' behaviour),

7829 # so do not pass it through the FloatArrayFormatter

7830 if float_format is None and decimal == ".":

7831 mask = isna(values)

7832

7833 if not quoting:

7834 values = values.astype(str)

7835 else:

7836 values = np.array(values, dtype="object")

7837

7838 values[mask] = na_rep

7839 values = values.astype(object, copy=False)

7840 return values

7841

7842 from pandas.io.formats.format import FloatArrayFormatter

7843

7844 formatter = FloatArrayFormatter(

7845 values,

7846 na_rep=na_rep,

7847 float_format=float_format,

7848 decimal=decimal,

7849 quoting=quoting,

7850 fixed_width=False,

7851 )

7852 res = formatter.get_result_as_array()

7853 res = res.astype(object, copy=False)

7854 return res

7855

7856 elif isinstance(values, ExtensionArray):

7857 mask = isna(values)

7858

7859 new_values = np.asarray(values.astype(object))

7860 new_values[mask] = na_rep

7861 return new_values

7862

7863 else:

7864 mask = isna(values)

7865 itemsize = writers.word_len(na_rep)

7866

7867 if values.dtype != _dtype_obj and not quoting and itemsize:

7868 values = values.astype(str)

7869 if values.dtype.itemsize / np.dtype("U1").itemsize < itemsize:

7870 # enlarge for the na_rep

7871 values = values.astype(f"<U{itemsize}")

7872 else:

7873 values = np.array(values, dtype="object")

7874

7875 values[mask] = na_rep

7876 values = values.astype(object, copy=False)

7877 return values