Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.9/dist-packages/pandas/core/indexes/range.py: 38%

1from __future__ import annotations

3from collections.abc import (

4 Hashable,

5 Iterator,

7from datetime import timedelta

8import operator

9from sys import getsizeof

10from typing import (

11 TYPE_CHECKING,

12 Any,

13 Callable,

14 Literal,

15 cast,

16 overload,

17)

19import numpy as np

21from pandas._libs import (

22 index as libindex,

23 lib,

24)

25from pandas._libs.algos import unique_deltas

26from pandas._libs.lib import no_default

27from pandas.compat.numpy import function as nv

28from pandas.util._decorators import (

29 cache_readonly,

30 deprecate_nonkeyword_arguments,

31 doc,

32)

34from pandas.core.dtypes.common import (

35 ensure_platform_int,

36 ensure_python_int,

37 is_float,

38 is_integer,

39 is_scalar,

40 is_signed_integer_dtype,

41)

42from pandas.core.dtypes.generic import ABCTimedeltaIndex

44from pandas.core import ops

45import pandas.core.common as com

46from pandas.core.construction import extract_array

47import pandas.core.indexes.base as ibase

48from pandas.core.indexes.base import (

49 Index,

50 maybe_extract_name,

51)

52from pandas.core.ops.common import unpack_zerodim_and_defer

54if TYPE_CHECKING:

55 from pandas._typing import (

56 Axis,

57 Dtype,

58 NaPosition,

59 Self,

60 npt,

61 )

62_empty_range = range(0)

63_dtype_int64 = np.dtype(np.int64)

66class RangeIndex(Index):

67 """

68 Immutable Index implementing a monotonic integer range.

70 RangeIndex is a memory-saving special case of an Index limited to representing

71 monotonic ranges with a 64-bit dtype. Using RangeIndex may in some instances

72 improve computing speed.

74 This is the default index type used

75 by DataFrame and Series when no explicit index is provided by the user.

77 Parameters

78 ----------

79 start : int (default: 0), range, or other RangeIndex instance

80 If int and "stop" is not given, interpreted as "stop" instead.

81 stop : int (default: 0)

82 step : int (default: 1)

83 dtype : np.int64

84 Unused, accepted for homogeneity with other index types.

85 copy : bool, default False

86 Unused, accepted for homogeneity with other index types.

87 name : object, optional

88 Name to be stored in the index.

90 Attributes

91 ----------

92 start

93 stop

94 step

96 Methods

97 -------

98 from_range

100 See Also

101 --------

102 Index : The base pandas Index type.

103

104 Examples

105 --------

106 >>> list(pd.RangeIndex(5))

107 [0, 1, 2, 3, 4]

108

109 >>> list(pd.RangeIndex(-2, 4))

110 [-2, -1, 0, 1, 2, 3]

111

112 >>> list(pd.RangeIndex(0, 10, 2))

113 [0, 2, 4, 6, 8]

114

115 >>> list(pd.RangeIndex(2, -10, -3))

116 [2, -1, -4, -7]

117

118 >>> list(pd.RangeIndex(0))

119 []

120

121 >>> list(pd.RangeIndex(1, 0))

122 []

123 """

124

125 _typ = "rangeindex"

126 _dtype_validation_metadata = (is_signed_integer_dtype, "signed integer")

127 _range: range

128 _values: np.ndarray

129

130 @property

131 def _engine_type(self) -> type[libindex.Int64Engine]:

132 return libindex.Int64Engine

133

134 # --------------------------------------------------------------------

135 # Constructors

136

137 def __new__(

138 cls,

139 start=None,

140 stop=None,

141 step=None,

142 dtype: Dtype | None = None,

143 copy: bool = False,

144 name: Hashable | None = None,

145 ) -> Self:

146 cls._validate_dtype(dtype)

147 name = maybe_extract_name(name, start, cls)

148

149 # RangeIndex

150 if isinstance(start, cls):

151 return start.copy(name=name)

152 elif isinstance(start, range):

153 return cls._simple_new(start, name=name)

154

155 # validate the arguments

156 if com.all_none(start, stop, step):

157 raise TypeError("RangeIndex(...) must be called with integers")

158

159 start = ensure_python_int(start) if start is not None else 0

160

161 if stop is None:

162 start, stop = 0, start

163 else:

164 stop = ensure_python_int(stop)

165

166 step = ensure_python_int(step) if step is not None else 1

167 if step == 0:

168 raise ValueError("Step must not be zero")

169

170 rng = range(start, stop, step)

171 return cls._simple_new(rng, name=name)

172

173 @classmethod

174 def from_range(cls, data: range, name=None, dtype: Dtype | None = None) -> Self:

175 """

176 Create :class:`pandas.RangeIndex` from a ``range`` object.

177

178 Returns

179 -------

180 RangeIndex

181

182 Examples

183 --------

184 >>> pd.RangeIndex.from_range(range(5))

185 RangeIndex(start=0, stop=5, step=1)

186

187 >>> pd.RangeIndex.from_range(range(2, -10, -3))

188 RangeIndex(start=2, stop=-10, step=-3)

189 """

190 if not isinstance(data, range):

191 raise TypeError(

192 f"{cls.__name__}(...) must be called with object coercible to a "

193 f"range, {repr(data)} was passed"

194 )

195 cls._validate_dtype(dtype)

196 return cls._simple_new(data, name=name)

197

198 # error: Argument 1 of "_simple_new" is incompatible with supertype "Index";

199 # supertype defines the argument type as

200 # "Union[ExtensionArray, ndarray[Any, Any]]" [override]

201 @classmethod

202 def _simple_new( # type: ignore[override]

203 cls, values: range, name: Hashable | None = None

204 ) -> Self:

205 result = object.__new__(cls)

206

207 assert isinstance(values, range)

208

209 result._range = values

210 result._name = name

211 result._cache = {}

212 result._reset_identity()

213 result._references = None

214 return result

215

216 @classmethod

217 def _validate_dtype(cls, dtype: Dtype | None) -> None:

218 if dtype is None:

219 return

220

221 validation_func, expected = cls._dtype_validation_metadata

222 if not validation_func(dtype):

223 raise ValueError(

224 f"Incorrect `dtype` passed: expected {expected}, received {dtype}"

225 )

226

227 # --------------------------------------------------------------------

228

229 # error: Return type "Type[Index]" of "_constructor" incompatible with return

230 # type "Type[RangeIndex]" in supertype "Index"

231 @cache_readonly

232 def _constructor(self) -> type[Index]: # type: ignore[override]

233 """return the class to use for construction"""

234 return Index

235

236 # error: Signature of "_data" incompatible with supertype "Index"

237 @cache_readonly

238 def _data(self) -> np.ndarray: # type: ignore[override]

239 """

240 An int array that for performance reasons is created only when needed.

241

242 The constructed array is saved in ``_cache``.

243 """

244 return np.arange(self.start, self.stop, self.step, dtype=np.int64)

245

246 def _get_data_as_items(self) -> list[tuple[str, int]]:

247 """return a list of tuples of start, stop, step"""

248 rng = self._range

249 return [("start", rng.start), ("stop", rng.stop), ("step", rng.step)]

250

251 def __reduce__(self):

252 d = {"name": self._name}

253 d.update(dict(self._get_data_as_items()))

254 return ibase._new_Index, (type(self), d), None

255

256 # --------------------------------------------------------------------

257 # Rendering Methods

258

259 def _format_attrs(self):

260 """

261 Return a list of tuples of the (attr, formatted_value)

262 """

263 attrs = cast("list[tuple[str, str | int]]", self._get_data_as_items())

264 if self._name is not None:

265 attrs.append(("name", ibase.default_pprint(self._name)))

266 return attrs

267

268 def _format_with_header(self, *, header: list[str], na_rep: str) -> list[str]:

269 # Equivalent to Index implementation, but faster

270 if not len(self._range):

271 return header

272 first_val_str = str(self._range[0])

273 last_val_str = str(self._range[-1])

274 max_length = max(len(first_val_str), len(last_val_str))

275

276 return header + [f"{x:<{max_length}}" for x in self._range]

277

278 # --------------------------------------------------------------------

279

280 @property

281 def start(self) -> int:

282 """

283 The value of the `start` parameter (``0`` if this was not supplied).

284

285 Examples

286 --------

287 >>> idx = pd.RangeIndex(5)

288 >>> idx.start

289 0

290

291 >>> idx = pd.RangeIndex(2, -10, -3)

292 >>> idx.start

293 2

294 """

295 # GH 25710

296 return self._range.start

297

298 @property

299 def stop(self) -> int:

300 """

301 The value of the `stop` parameter.

302

303 Examples

304 --------

305 >>> idx = pd.RangeIndex(5)

306 >>> idx.stop

307 5

308

309 >>> idx = pd.RangeIndex(2, -10, -3)

310 >>> idx.stop

311 -10

312 """

313 return self._range.stop

314

315 @property

316 def step(self) -> int:

317 """

318 The value of the `step` parameter (``1`` if this was not supplied).

319

320 Examples

321 --------

322 >>> idx = pd.RangeIndex(5)

323 >>> idx.step

324 1

325

326 >>> idx = pd.RangeIndex(2, -10, -3)

327 >>> idx.step

328 -3

329

330 Even if :class:`pandas.RangeIndex` is empty, ``step`` is still ``1`` if

331 not supplied.

332

333 >>> idx = pd.RangeIndex(1, 0)

334 >>> idx.step

335 1

336 """

337 # GH 25710

338 return self._range.step

339

340 @cache_readonly

341 def nbytes(self) -> int:

342 """

343 Return the number of bytes in the underlying data.

344 """

345 rng = self._range

346 return getsizeof(rng) + sum(

347 getsizeof(getattr(rng, attr_name))

348 for attr_name in ["start", "stop", "step"]

349 )

350

351 def memory_usage(self, deep: bool = False) -> int:

352 """

353 Memory usage of my values

354

355 Parameters

356 ----------

357 deep : bool

358 Introspect the data deeply, interrogate

359 `object` dtypes for system-level memory consumption

360

361 Returns

362 -------

363 bytes used

364

365 Notes

366 -----

367 Memory usage does not include memory consumed by elements that

368 are not components of the array if deep=False

369

370 See Also

371 --------

372 numpy.ndarray.nbytes

373 """

374 return self.nbytes

375

376 @property

377 def dtype(self) -> np.dtype:

378 return _dtype_int64

379

380 @property

381 def is_unique(self) -> bool:

382 """return if the index has unique values"""

383 return True

384

385 @cache_readonly

386 def is_monotonic_increasing(self) -> bool:

387 return self._range.step > 0 or len(self) <= 1

388

389 @cache_readonly

390 def is_monotonic_decreasing(self) -> bool:

391 return self._range.step < 0 or len(self) <= 1

392

393 def __contains__(self, key: Any) -> bool:

394 hash(key)

395 try:

396 key = ensure_python_int(key)

397 except TypeError:

398 return False

399 return key in self._range

400

401 @property

402 def inferred_type(self) -> str:

403 return "integer"

404

405 # --------------------------------------------------------------------

406 # Indexing Methods

407

408 @doc(Index.get_loc)

409 def get_loc(self, key) -> int:

410 if is_integer(key) or (is_float(key) and key.is_integer()):

411 new_key = int(key)

412 try:

413 return self._range.index(new_key)

414 except ValueError as err:

415 raise KeyError(key) from err

416 if isinstance(key, Hashable):

417 raise KeyError(key)

418 self._check_indexing_error(key)

419 raise KeyError(key)

420

421 def _get_indexer(

422 self,

423 target: Index,

424 method: str | None = None,

425 limit: int | None = None,

426 tolerance=None,

427 ) -> npt.NDArray[np.intp]:

428 if com.any_not_none(method, tolerance, limit):

429 return super()._get_indexer(

430 target, method=method, tolerance=tolerance, limit=limit

431 )

432

433 if self.step > 0:

434 start, stop, step = self.start, self.stop, self.step

435 else:

436 # GH 28678: work on reversed range for simplicity

437 reverse = self._range[::-1]

438 start, stop, step = reverse.start, reverse.stop, reverse.step

439

440 target_array = np.asarray(target)

441 locs = target_array - start

442 valid = (locs % step == 0) & (locs >= 0) & (target_array < stop)

443 locs[~valid] = -1

444 locs[valid] = locs[valid] / step

445

446 if step != self.step:

447 # We reversed this range: transform to original locs

448 locs[valid] = len(self) - 1 - locs[valid]

449 return ensure_platform_int(locs)

450

451 @cache_readonly

452 def _should_fallback_to_positional(self) -> bool:

453 """

454 Should an integer key be treated as positional?

455 """

456 return False

457

458 # --------------------------------------------------------------------

459

460 def tolist(self) -> list[int]:

461 return list(self._range)

462

463 @doc(Index.__iter__)

464 def __iter__(self) -> Iterator[int]:

465 yield from self._range

466

467 @doc(Index._shallow_copy)

468 def _shallow_copy(self, values, name: Hashable = no_default):

469 name = self._name if name is no_default else name

470

471 if values.dtype.kind == "f":

472 return Index(values, name=name, dtype=np.float64)

473 # GH 46675 & 43885: If values is equally spaced, return a

474 # more memory-compact RangeIndex instead of Index with 64-bit dtype

475 unique_diffs = unique_deltas(values)

476 if len(unique_diffs) == 1 and unique_diffs[0] != 0:

477 diff = unique_diffs[0]

478 new_range = range(values[0], values[-1] + diff, diff)

479 return type(self)._simple_new(new_range, name=name)

480 else:

481 return self._constructor._simple_new(values, name=name)

482

483 def _view(self) -> Self:

484 result = type(self)._simple_new(self._range, name=self._name)

485 result._cache = self._cache

486 return result

487

488 @doc(Index.copy)

489 def copy(self, name: Hashable | None = None, deep: bool = False) -> Self:

490 name = self._validate_names(name=name, deep=deep)[0]

491 new_index = self._rename(name=name)

492 return new_index

493

494 def _minmax(self, meth: str):

495 no_steps = len(self) - 1

496 if no_steps == -1:

497 return np.nan

498 elif (meth == "min" and self.step > 0) or (meth == "max" and self.step < 0):

499 return self.start

500

501 return self.start + self.step * no_steps

502

503 def min(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:

504 """The minimum value of the RangeIndex"""

505 nv.validate_minmax_axis(axis)

506 nv.validate_min(args, kwargs)

507 return self._minmax("min")

508

509 def max(self, axis=None, skipna: bool = True, *args, **kwargs) -> int:

510 """The maximum value of the RangeIndex"""

511 nv.validate_minmax_axis(axis)

512 nv.validate_max(args, kwargs)

513 return self._minmax("max")

514

515 def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:

516 """

517 Returns the indices that would sort the index and its

518 underlying data.

519

520 Returns

521 -------

522 np.ndarray[np.intp]

523

524 See Also

525 --------

526 numpy.ndarray.argsort

527 """

528 ascending = kwargs.pop("ascending", True) # EA compat

529 kwargs.pop("kind", None) # e.g. "mergesort" is irrelevant

530 nv.validate_argsort(args, kwargs)

531

532 if self._range.step > 0:

533 result = np.arange(len(self), dtype=np.intp)

534 else:

535 result = np.arange(len(self) - 1, -1, -1, dtype=np.intp)

536

537 if not ascending:

538 result = result[::-1]

539 return result

540

541 def factorize(

542 self,

543 sort: bool = False,

544 use_na_sentinel: bool = True,

545 ) -> tuple[npt.NDArray[np.intp], RangeIndex]:

546 codes = np.arange(len(self), dtype=np.intp)

547 uniques = self

548 if sort and self.step < 0:

549 codes = codes[::-1]

550 uniques = uniques[::-1]

551 return codes, uniques

552

553 def equals(self, other: object) -> bool:

554 """

555 Determines if two Index objects contain the same elements.

556 """

557 if isinstance(other, RangeIndex):

558 return self._range == other._range

559 return super().equals(other)

560

561 # error: Signature of "sort_values" incompatible with supertype "Index"

562 @overload # type: ignore[override]

563 def sort_values(

564 self,

565 *,

566 return_indexer: Literal[False] = ...,

567 ascending: bool = ...,

568 na_position: NaPosition = ...,

569 key: Callable | None = ...,

570 ) -> Self:

571 ...

572

573 @overload

574 def sort_values(

575 self,

576 *,

577 return_indexer: Literal[True],

578 ascending: bool = ...,

579 na_position: NaPosition = ...,

580 key: Callable | None = ...,

581 ) -> tuple[Self, np.ndarray | RangeIndex]:

582 ...

583

584 @overload

585 def sort_values(

586 self,

587 *,

588 return_indexer: bool = ...,

589 ascending: bool = ...,

590 na_position: NaPosition = ...,

591 key: Callable | None = ...,

592 ) -> Self | tuple[Self, np.ndarray | RangeIndex]:

593 ...

594

595 @deprecate_nonkeyword_arguments(

596 version="3.0", allowed_args=["self"], name="sort_values"

597 )

598 def sort_values(

599 self,

600 return_indexer: bool = False,

601 ascending: bool = True,

602 na_position: NaPosition = "last",

603 key: Callable | None = None,

604 ) -> Self | tuple[Self, np.ndarray | RangeIndex]:

605 if key is not None:

606 return super().sort_values(

607 return_indexer=return_indexer,

608 ascending=ascending,

609 na_position=na_position,

610 key=key,

611 )

612 else:

613 sorted_index = self

614 inverse_indexer = False

615 if ascending:

616 if self.step < 0:

617 sorted_index = self[::-1]

618 inverse_indexer = True

619 else:

620 if self.step > 0:

621 sorted_index = self[::-1]

622 inverse_indexer = True

623

624 if return_indexer:

625 if inverse_indexer:

626 rng = range(len(self) - 1, -1, -1)

627 else:

628 rng = range(len(self))

629 return sorted_index, RangeIndex(rng)

630 else:

631 return sorted_index

632

633 # --------------------------------------------------------------------

634 # Set Operations

635

636 def _intersection(self, other: Index, sort: bool = False):

637 # caller is responsible for checking self and other are both non-empty

638

639 if not isinstance(other, RangeIndex):

640 return super()._intersection(other, sort=sort)

641

642 first = self._range[::-1] if self.step < 0 else self._range

643 second = other._range[::-1] if other.step < 0 else other._range

644

645 # check whether intervals intersect

646 # deals with in- and decreasing ranges

647 int_low = max(first.start, second.start)

648 int_high = min(first.stop, second.stop)

649 if int_high <= int_low:

650 return self._simple_new(_empty_range)

651

652 # Method hint: linear Diophantine equation

653 # solve intersection problem

654 # performance hint: for identical step sizes, could use

655 # cheaper alternative

656 gcd, s, _ = self._extended_gcd(first.step, second.step)

657

658 # check whether element sets intersect

659 if (first.start - second.start) % gcd:

660 return self._simple_new(_empty_range)

661

662 # calculate parameters for the RangeIndex describing the

663 # intersection disregarding the lower bounds

664 tmp_start = first.start + (second.start - first.start) * first.step // gcd * s

665 new_step = first.step * second.step // gcd

666 new_range = range(tmp_start, int_high, new_step)

667 new_index = self._simple_new(new_range)

668

669 # adjust index to limiting interval

670 new_start = new_index._min_fitting_element(int_low)

671 new_range = range(new_start, new_index.stop, new_index.step)

672 new_index = self._simple_new(new_range)

673

674 if (self.step < 0 and other.step < 0) is not (new_index.step < 0):

675 new_index = new_index[::-1]

676

677 if sort is None:

678 new_index = new_index.sort_values()

679

680 return new_index

681

682 def _min_fitting_element(self, lower_limit: int) -> int:

683 """Returns the smallest element greater than or equal to the limit"""

684 no_steps = -(-(lower_limit - self.start) // abs(self.step))

685 return self.start + abs(self.step) * no_steps

686

687 def _extended_gcd(self, a: int, b: int) -> tuple[int, int, int]:

688 """

689 Extended Euclidean algorithms to solve Bezout's identity:

690 a*x + b*y = gcd(x, y)

691 Finds one particular solution for x, y: s, t

692 Returns: gcd, s, t

693 """

694 s, old_s = 0, 1

695 t, old_t = 1, 0

696 r, old_r = b, a

697 while r:

698 quotient = old_r // r

699 old_r, r = r, old_r - quotient * r

700 old_s, s = s, old_s - quotient * s

701 old_t, t = t, old_t - quotient * t

702 return old_r, old_s, old_t

703

704 def _range_in_self(self, other: range) -> bool:

705 """Check if other range is contained in self"""

706 # https://stackoverflow.com/a/32481015

707 if not other:

708 return True

709 if not self._range:

710 return False

711 if len(other) > 1 and other.step % self._range.step:

712 return False

713 return other.start in self._range and other[-1] in self._range

714

715 def _union(self, other: Index, sort: bool | None):

716 """

717 Form the union of two Index objects and sorts if possible

718

719 Parameters

720 ----------

721 other : Index or array-like

722

723 sort : bool or None, default None

724 Whether to sort (monotonically increasing) the resulting index.

725 ``sort=None|True`` returns a ``RangeIndex`` if possible or a sorted

726 ``Index`` with a int64 dtype if not.

727 ``sort=False`` can return a ``RangeIndex`` if self is monotonically

728 increasing and other is fully contained in self. Otherwise, returns

729 an unsorted ``Index`` with an int64 dtype.

730

731 Returns

732 -------

733 union : Index

734 """

735 if isinstance(other, RangeIndex):

736 if sort in (None, True) or (

737 sort is False and self.step > 0 and self._range_in_self(other._range)

738 ):

739 # GH 47557: Can still return a RangeIndex

740 # if other range in self and sort=False

741 start_s, step_s = self.start, self.step

742 end_s = self.start + self.step * (len(self) - 1)

743 start_o, step_o = other.start, other.step

744 end_o = other.start + other.step * (len(other) - 1)

745 if self.step < 0:

746 start_s, step_s, end_s = end_s, -step_s, start_s

747 if other.step < 0:

748 start_o, step_o, end_o = end_o, -step_o, start_o

749 if len(self) == 1 and len(other) == 1:

750 step_s = step_o = abs(self.start - other.start)

751 elif len(self) == 1:

752 step_s = step_o

753 elif len(other) == 1:

754 step_o = step_s

755 start_r = min(start_s, start_o)

756 end_r = max(end_s, end_o)

757 if step_o == step_s:

758 if (

759 (start_s - start_o) % step_s == 0

760 and (start_s - end_o) <= step_s

761 and (start_o - end_s) <= step_s

762 ):

763 return type(self)(start_r, end_r + step_s, step_s)

764 if (

765 (step_s % 2 == 0)

766 and (abs(start_s - start_o) == step_s / 2)

767 and (abs(end_s - end_o) == step_s / 2)

768 ):

769 # e.g. range(0, 10, 2) and range(1, 11, 2)

770 # but not range(0, 20, 4) and range(1, 21, 4) GH#44019

771 return type(self)(start_r, end_r + step_s / 2, step_s / 2)

772

773 elif step_o % step_s == 0:

774 if (

775 (start_o - start_s) % step_s == 0

776 and (start_o + step_s >= start_s)

777 and (end_o - step_s <= end_s)

778 ):

779 return type(self)(start_r, end_r + step_s, step_s)

780 elif step_s % step_o == 0:

781 if (

782 (start_s - start_o) % step_o == 0

783 and (start_s + step_o >= start_o)

784 and (end_s - step_o <= end_o)

785 ):

786 return type(self)(start_r, end_r + step_o, step_o)

787

788 return super()._union(other, sort=sort)

789

790 def _difference(self, other, sort=None):

791 # optimized set operation if we have another RangeIndex

792 self._validate_sort_keyword(sort)

793 self._assert_can_do_setop(other)

794 other, result_name = self._convert_can_do_setop(other)

795

796 if not isinstance(other, RangeIndex):

797 return super()._difference(other, sort=sort)

798

799 if sort is not False and self.step < 0:

800 return self[::-1]._difference(other)

801

802 res_name = ops.get_op_result_name(self, other)

803

804 first = self._range[::-1] if self.step < 0 else self._range

805 overlap = self.intersection(other)

806 if overlap.step < 0:

807 overlap = overlap[::-1]

808

809 if len(overlap) == 0:

810 return self.rename(name=res_name)

811 if len(overlap) == len(self):

812 return self[:0].rename(res_name)

813

814 # overlap.step will always be a multiple of self.step (see _intersection)

815

816 if len(overlap) == 1:

817 if overlap[0] == self[0]:

818 return self[1:]

819

820 elif overlap[0] == self[-1]:

821 return self[:-1]

822

823 elif len(self) == 3 and overlap[0] == self[1]:

824 return self[::2]

825

826 else:

827 return super()._difference(other, sort=sort)

828

829 elif len(overlap) == 2 and overlap[0] == first[0] and overlap[-1] == first[-1]:

830 # e.g. range(-8, 20, 7) and range(13, -9, -3)

831 return self[1:-1]

832

833 if overlap.step == first.step:

834 if overlap[0] == first.start:

835 # The difference is everything after the intersection

836 new_rng = range(overlap[-1] + first.step, first.stop, first.step)

837 elif overlap[-1] == first[-1]:

838 # The difference is everything before the intersection

839 new_rng = range(first.start, overlap[0], first.step)

840 elif overlap._range == first[1:-1]:

841 # e.g. range(4) and range(1, 3)

842 step = len(first) - 1

843 new_rng = first[::step]

844 else:

845 # The difference is not range-like

846 # e.g. range(1, 10, 1) and range(3, 7, 1)

847 return super()._difference(other, sort=sort)

848

849 else:

850 # We must have len(self) > 1, bc we ruled out above

851 # len(overlap) == 0 and len(overlap) == len(self)

852 assert len(self) > 1

853

854 if overlap.step == first.step * 2:

855 if overlap[0] == first[0] and overlap[-1] in (first[-1], first[-2]):

856 # e.g. range(1, 10, 1) and range(1, 10, 2)

857 new_rng = first[1::2]

858

859 elif overlap[0] == first[1] and overlap[-1] in (first[-1], first[-2]):

860 # e.g. range(1, 10, 1) and range(2, 10, 2)

861 new_rng = first[::2]

862

863 else:

864 # We can get here with e.g. range(20) and range(0, 10, 2)

865 return super()._difference(other, sort=sort)

866

867 else:

868 # e.g. range(10) and range(0, 10, 3)

869 return super()._difference(other, sort=sort)

870

871 new_index = type(self)._simple_new(new_rng, name=res_name)

872 if first is not self._range:

873 new_index = new_index[::-1]

874

875 return new_index

876

877 def symmetric_difference(

878 self, other, result_name: Hashable | None = None, sort=None

879 ):

880 if not isinstance(other, RangeIndex) or sort is not None:

881 return super().symmetric_difference(other, result_name, sort)

882

883 left = self.difference(other)

884 right = other.difference(self)

885 result = left.union(right)

886

887 if result_name is not None:

888 result = result.rename(result_name)

889 return result

890

891 # --------------------------------------------------------------------

892

893 # error: Return type "Index" of "delete" incompatible with return type

894 # "RangeIndex" in supertype "Index"

895 def delete(self, loc) -> Index: # type: ignore[override]

896 # In some cases we can retain RangeIndex, see also

897 # DatetimeTimedeltaMixin._get_delete_Freq

898 if is_integer(loc):

899 if loc in (0, -len(self)):

900 return self[1:]

901 if loc in (-1, len(self) - 1):

902 return self[:-1]

903 if len(self) == 3 and loc in (1, -2):

904 return self[::2]

905

906 elif lib.is_list_like(loc):

907 slc = lib.maybe_indices_to_slice(np.asarray(loc, dtype=np.intp), len(self))

908

909 if isinstance(slc, slice):

910 # defer to RangeIndex._difference, which is optimized to return

911 # a RangeIndex whenever possible

912 other = self[slc]

913 return self.difference(other, sort=False)

914

915 return super().delete(loc)

916

917 def insert(self, loc: int, item) -> Index:

918 if len(self) and (is_integer(item) or is_float(item)):

919 # We can retain RangeIndex is inserting at the beginning or end,

920 # or right in the middle.

921 rng = self._range

922 if loc == 0 and item == self[0] - self.step:

923 new_rng = range(rng.start - rng.step, rng.stop, rng.step)

924 return type(self)._simple_new(new_rng, name=self._name)

925

926 elif loc == len(self) and item == self[-1] + self.step:

927 new_rng = range(rng.start, rng.stop + rng.step, rng.step)

928 return type(self)._simple_new(new_rng, name=self._name)

929

930 elif len(self) == 2 and item == self[0] + self.step / 2:

931 # e.g. inserting 1 into [0, 2]

932 step = int(self.step / 2)

933 new_rng = range(self.start, self.stop, step)

934 return type(self)._simple_new(new_rng, name=self._name)

935

936 return super().insert(loc, item)

937

938 def _concat(self, indexes: list[Index], name: Hashable) -> Index:

939 """

940 Overriding parent method for the case of all RangeIndex instances.

941

942 When all members of "indexes" are of type RangeIndex: result will be

943 RangeIndex if possible, Index with a int64 dtype otherwise. E.g.:

944 indexes = [RangeIndex(3), RangeIndex(3, 6)] -> RangeIndex(6)

945 indexes = [RangeIndex(3), RangeIndex(4, 6)] -> Index([0,1,2,4,5], dtype='int64')

946 """

947 if not all(isinstance(x, RangeIndex) for x in indexes):

948 return super()._concat(indexes, name)

949

950 elif len(indexes) == 1:

951 return indexes[0]

952

953 rng_indexes = cast(list[RangeIndex], indexes)

954

955 start = step = next_ = None

956

957 # Filter the empty indexes

958 non_empty_indexes = [obj for obj in rng_indexes if len(obj)]

959

960 for obj in non_empty_indexes:

961 rng = obj._range

962

963 if start is None:

964 # This is set by the first non-empty index

965 start = rng.start

966 if step is None and len(rng) > 1:

967 step = rng.step

968 elif step is None:

969 # First non-empty index had only one element

970 if rng.start == start:

971 values = np.concatenate([x._values for x in rng_indexes])

972 result = self._constructor(values)

973 return result.rename(name)

974

975 step = rng.start - start

976

977 non_consecutive = (step != rng.step and len(rng) > 1) or (

978 next_ is not None and rng.start != next_

979 )

980 if non_consecutive:

981 result = self._constructor(

982 np.concatenate([x._values for x in rng_indexes])

983 )

984 return result.rename(name)

985

986 if step is not None:

987 next_ = rng[-1] + step

988

989 if non_empty_indexes:

990 # Get the stop value from "next" or alternatively

991 # from the last non-empty index

992 stop = non_empty_indexes[-1].stop if next_ is None else next_

993 return RangeIndex(start, stop, step).rename(name)

994

995 # Here all "indexes" had 0 length, i.e. were empty.

996 # In this case return an empty range index.

997 return RangeIndex(0, 0).rename(name)

998

999 def __len__(self) -> int:

1000 """

1001 return the length of the RangeIndex

1002 """

1003 return len(self._range)

1004

1005 @property

1006 def size(self) -> int:

1007 return len(self)

1008

1009 def __getitem__(self, key):

1010 """

1011 Conserve RangeIndex type for scalar and slice keys.

1012 """

1013 if isinstance(key, slice):

1014 return self._getitem_slice(key)

1015 elif is_integer(key):

1016 new_key = int(key)

1017 try:

1018 return self._range[new_key]

1019 except IndexError as err:

1020 raise IndexError(

1021 f"index {key} is out of bounds for axis 0 with size {len(self)}"

1022 ) from err

1023 elif is_scalar(key):

1024 raise IndexError(

1025 "only integers, slices (`:`), "

1026 "ellipsis (`...`), numpy.newaxis (`None`) "

1027 "and integer or boolean "

1028 "arrays are valid indices"

1029 )

1030 return super().__getitem__(key)

1031

1032 def _getitem_slice(self, slobj: slice) -> Self:

1033 """

1034 Fastpath for __getitem__ when we know we have a slice.

1035 """

1036 res = self._range[slobj]

1037 return type(self)._simple_new(res, name=self._name)

1038

1039 @unpack_zerodim_and_defer("__floordiv__")

1040 def __floordiv__(self, other):

1041 if is_integer(other) and other != 0:

1042 if len(self) == 0 or self.start % other == 0 and self.step % other == 0:

1043 start = self.start // other

1044 step = self.step // other

1045 stop = start + len(self) * step

1046 new_range = range(start, stop, step or 1)

1047 return self._simple_new(new_range, name=self._name)

1048 if len(self) == 1:

1049 start = self.start // other

1050 new_range = range(start, start + 1, 1)

1051 return self._simple_new(new_range, name=self._name)

1052

1053 return super().__floordiv__(other)

1054

1055 # --------------------------------------------------------------------

1056 # Reductions

1057

1058 def all(self, *args, **kwargs) -> bool:

1059 return 0 not in self._range

1060

1061 def any(self, *args, **kwargs) -> bool:

1062 return any(self._range)

1063

1064 # --------------------------------------------------------------------

1065

1066 def _cmp_method(self, other, op):

1067 if isinstance(other, RangeIndex) and self._range == other._range:

1068 # Both are immutable so if ._range attr. are equal, shortcut is possible

1069 return super()._cmp_method(self, op)

1070 return super()._cmp_method(other, op)

1071

1072 def _arith_method(self, other, op):

1073 """

1074 Parameters

1075 ----------

1076 other : Any

1077 op : callable that accepts 2 params

1078 perform the binary op

1079 """

1080

1081 if isinstance(other, ABCTimedeltaIndex):

1082 # Defer to TimedeltaIndex implementation

1083 return NotImplemented

1084 elif isinstance(other, (timedelta, np.timedelta64)):

1085 # GH#19333 is_integer evaluated True on timedelta64,

1086 # so we need to catch these explicitly

1087 return super()._arith_method(other, op)

1088 elif lib.is_np_dtype(getattr(other, "dtype", None), "m"):

1089 # Must be an np.ndarray; GH#22390

1090 return super()._arith_method(other, op)

1091

1092 if op in [

1093 operator.pow,

1094 ops.rpow,

1095 operator.mod,

1096 ops.rmod,

1097 operator.floordiv,

1098 ops.rfloordiv,

1099 divmod,

1100 ops.rdivmod,

1101 ]:

1102 return super()._arith_method(other, op)

1103

1104 step: Callable | None = None

1105 if op in [operator.mul, ops.rmul, operator.truediv, ops.rtruediv]:

1106 step = op

1107

1108 # TODO: if other is a RangeIndex we may have more efficient options

1109 right = extract_array(other, extract_numpy=True, extract_range=True)

1110 left = self

1111

1112 try:

1113 # apply if we have an override

1114 if step:

1115 with np.errstate(all="ignore"):

1116 rstep = step(left.step, right)

1117

1118 # we don't have a representable op

1119 # so return a base index

1120 if not is_integer(rstep) or not rstep:

1121 raise ValueError

1122

1123 # GH#53255

1124 else:

1125 rstep = -left.step if op == ops.rsub else left.step

1126

1127 with np.errstate(all="ignore"):

1128 rstart = op(left.start, right)

1129 rstop = op(left.stop, right)

1130

1131 res_name = ops.get_op_result_name(self, other)

1132 result = type(self)(rstart, rstop, rstep, name=res_name)

1133

1134 # for compat with numpy / Index with int64 dtype

1135 # even if we can represent as a RangeIndex, return

1136 # as a float64 Index if we have float-like descriptors

1137 if not all(is_integer(x) for x in [rstart, rstop, rstep]):

1138 result = result.astype("float64")

1139

1140 return result

1141

1142 except (ValueError, TypeError, ZeroDivisionError):

1143 # test_arithmetic_explicit_conversions

1144 return super()._arith_method(other, op)

1145

1146 # error: Return type "Index" of "take" incompatible with return type

1147 # "RangeIndex" in supertype "Index"

1148 def take( # type: ignore[override]

1149 self,

1150 indices,

1151 axis: Axis = 0,

1152 allow_fill: bool = True,

1153 fill_value=None,

1154 **kwargs,

1155 ) -> Index:

1156 if kwargs:

1157 nv.validate_take((), kwargs)

1158 if is_scalar(indices):

1159 raise TypeError("Expected indices to be array-like")

1160 indices = ensure_platform_int(indices)

1161

1162 # raise an exception if allow_fill is True and fill_value is not None

1163 self._maybe_disallow_fill(allow_fill, fill_value, indices)

1164

1165 if len(indices) == 0:

1166 taken = np.array([], dtype=self.dtype)

1167 else:

1168 ind_max = indices.max()

1169 if ind_max >= len(self):

1170 raise IndexError(

1171 f"index {ind_max} is out of bounds for axis 0 with size {len(self)}"

1172 )

1173 ind_min = indices.min()

1174 if ind_min < -len(self):

1175 raise IndexError(

1176 f"index {ind_min} is out of bounds for axis 0 with size {len(self)}"

1177 )

1178 taken = indices.astype(self.dtype, casting="safe")

1179 if ind_min < 0:

1180 taken %= len(self)

1181 if self.step != 1:

1182 taken *= self.step

1183 if self.start != 0:

1184 taken += self.start

1185

1186 # _constructor so RangeIndex-> Index with an int64 dtype

1187 return self._constructor._simple_new(taken, name=self.name)