Coverage for /pythoncovmergedfiles/medio/medio/usr/local/lib/python3.8/site-packages/pandas/core/indexes/interval.py: 29%

1""" define the IntervalIndex """

2from __future__ import annotations

4from operator import (

5 le,

6 lt,

8import textwrap

9from typing import (

10 Any,

11 Hashable,

12 Literal,

13)

15import numpy as np

17from pandas._libs import lib

18from pandas._libs.interval import (

19 Interval,

20 IntervalMixin,

21 IntervalTree,

22)

23from pandas._libs.tslibs import (

24 BaseOffset,

25 Timedelta,

26 Timestamp,

27 to_offset,

28)

29from pandas._typing import (

30 Dtype,

31 DtypeObj,

32 IntervalClosedType,

33 npt,

34)

35from pandas.errors import InvalidIndexError

36from pandas.util._decorators import (

37 Appender,

38 cache_readonly,

39)

40from pandas.util._exceptions import rewrite_exception

42from pandas.core.dtypes.cast import (

43 find_common_type,

44 infer_dtype_from_scalar,

45 maybe_box_datetimelike,

46 maybe_downcast_numeric,

47 maybe_upcast_numeric_to_64bit,

48)

49from pandas.core.dtypes.common import (

50 ensure_platform_int,

51 is_datetime64tz_dtype,

52 is_datetime_or_timedelta_dtype,

53 is_dtype_equal,

54 is_float,

55 is_float_dtype,

56 is_integer,

57 is_integer_dtype,

58 is_interval_dtype,

59 is_list_like,

60 is_number,

61 is_object_dtype,

62 is_scalar,

63)

64from pandas.core.dtypes.dtypes import IntervalDtype

65from pandas.core.dtypes.missing import is_valid_na_for_dtype

67from pandas.core.algorithms import unique

68from pandas.core.arrays.interval import (

69 IntervalArray,

70 _interval_shared_docs,

71)

72import pandas.core.common as com

73from pandas.core.indexers import is_valid_positional_slice

74import pandas.core.indexes.base as ibase

75from pandas.core.indexes.base import (

76 Index,

77 _index_shared_docs,

78 ensure_index,

79 maybe_extract_name,

80)

81from pandas.core.indexes.datetimes import (

82 DatetimeIndex,

83 date_range,

84)

85from pandas.core.indexes.extension import (

86 ExtensionIndex,

87 inherit_names,

88)

89from pandas.core.indexes.multi import MultiIndex

90from pandas.core.indexes.timedeltas import (

91 TimedeltaIndex,

92 timedelta_range,

93)

95_index_doc_kwargs = dict(ibase._index_doc_kwargs)

97_index_doc_kwargs.update(

98 {

99 "klass": "IntervalIndex",

100 "qualname": "IntervalIndex",

101 "target_klass": "IntervalIndex or list of Intervals",

102 "name": textwrap.dedent(

103 """\

104 name : object, optional

105 Name to be stored in the index.

106 """

107 ),

108 }

109)

110

111

112def _get_next_label(label):

113 dtype = getattr(label, "dtype", type(label))

114 if isinstance(label, (Timestamp, Timedelta)):

115 dtype = "datetime64"

116 if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype):

117 return label + np.timedelta64(1, "ns")

118 elif is_integer_dtype(dtype):

119 return label + 1

120 elif is_float_dtype(dtype):

121 return np.nextafter(label, np.infty)

122 else:

123 raise TypeError(f"cannot determine next label for type {repr(type(label))}")

124

125

126def _get_prev_label(label):

127 dtype = getattr(label, "dtype", type(label))

128 if isinstance(label, (Timestamp, Timedelta)):

129 dtype = "datetime64"

130 if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype):

131 return label - np.timedelta64(1, "ns")

132 elif is_integer_dtype(dtype):

133 return label - 1

134 elif is_float_dtype(dtype):

135 return np.nextafter(label, -np.infty)

136 else:

137 raise TypeError(f"cannot determine next label for type {repr(type(label))}")

138

139

140def _new_IntervalIndex(cls, d):

141 """

142 This is called upon unpickling, rather than the default which doesn't have

143 arguments and breaks __new__.

144 """

145 return cls.from_arrays(**d)

146

147

148@Appender(

149 _interval_shared_docs["class"]

150 % {

151 "klass": "IntervalIndex",

152 "summary": "Immutable index of intervals that are closed on the same side.",

153 "name": _index_doc_kwargs["name"],

154 "versionadded": "0.20.0",

155 "extra_attributes": "is_overlapping\nvalues\n",

156 "extra_methods": "",

157 "examples": textwrap.dedent(

158 """\

159 Examples

160 --------

161 A new ``IntervalIndex`` is typically constructed using

162 :func:`interval_range`:

163

164 >>> pd.interval_range(start=0, end=5)

165 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],

166 dtype='interval[int64, right]')

167

168 It may also be constructed using one of the constructor

169 methods: :meth:`IntervalIndex.from_arrays`,

170 :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`.

171

172 See further examples in the doc strings of ``interval_range`` and the

173 mentioned constructor methods.

174 """

175 ),

176 }

177)

178@inherit_names(["set_closed", "to_tuples"], IntervalArray, wrap=True)

179@inherit_names(

180 [

181 "__array__",

182 "overlaps",

183 "contains",

184 "closed_left",

185 "closed_right",

186 "open_left",

187 "open_right",

188 "is_empty",

189 ],

190 IntervalArray,

191)

192@inherit_names(["is_non_overlapping_monotonic", "closed"], IntervalArray, cache=True)

193class IntervalIndex(ExtensionIndex):

194 _typ = "intervalindex"

195

196 # annotate properties pinned via inherit_names

197 closed: IntervalClosedType

198 is_non_overlapping_monotonic: bool

199 closed_left: bool

200 closed_right: bool

201 open_left: bool

202 open_right: bool

203

204 _data: IntervalArray

205 _values: IntervalArray

206 _can_hold_strings = False

207 _data_cls = IntervalArray

208

209 # --------------------------------------------------------------------

210 # Constructors

211

212 def __new__(

213 cls,

214 data,

215 closed=None,

216 dtype: Dtype | None = None,

217 copy: bool = False,

218 name: Hashable = None,

219 verify_integrity: bool = True,

220 ) -> IntervalIndex:

221 name = maybe_extract_name(name, data, cls)

222

223 with rewrite_exception("IntervalArray", cls.__name__):

224 array = IntervalArray(

225 data,

226 closed=closed,

227 copy=copy,

228 dtype=dtype,

229 verify_integrity=verify_integrity,

230 )

231

232 return cls._simple_new(array, name)

233

234 @classmethod

235 @Appender(

236 _interval_shared_docs["from_breaks"]

237 % {

238 "klass": "IntervalIndex",

239 "name": textwrap.dedent(

240 """

241 name : str, optional

242 Name of the resulting IntervalIndex."""

243 ),

244 "examples": textwrap.dedent(

245 """\

246 Examples

247 --------

248 >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3])

249 IntervalIndex([(0, 1], (1, 2], (2, 3]],

250 dtype='interval[int64, right]')

251 """

252 ),

253 }

254 )

255 def from_breaks(

256 cls,

257 breaks,

258 closed: IntervalClosedType | None = "right",

259 name: Hashable = None,

260 copy: bool = False,

261 dtype: Dtype | None = None,

262 ) -> IntervalIndex:

263 with rewrite_exception("IntervalArray", cls.__name__):

264 array = IntervalArray.from_breaks(

265 breaks, closed=closed, copy=copy, dtype=dtype

266 )

267 return cls._simple_new(array, name=name)

268

269 @classmethod

270 @Appender(

271 _interval_shared_docs["from_arrays"]

272 % {

273 "klass": "IntervalIndex",

274 "name": textwrap.dedent(

275 """

276 name : str, optional

277 Name of the resulting IntervalIndex."""

278 ),

279 "examples": textwrap.dedent(

280 """\

281 Examples

282 --------

283 >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3])

284 IntervalIndex([(0, 1], (1, 2], (2, 3]],

285 dtype='interval[int64, right]')

286 """

287 ),

288 }

289 )

290 def from_arrays(

291 cls,

292 left,

293 right,

294 closed: IntervalClosedType = "right",

295 name: Hashable = None,

296 copy: bool = False,

297 dtype: Dtype | None = None,

298 ) -> IntervalIndex:

299 with rewrite_exception("IntervalArray", cls.__name__):

300 array = IntervalArray.from_arrays(

301 left, right, closed, copy=copy, dtype=dtype

302 )

303 return cls._simple_new(array, name=name)

304

305 @classmethod

306 @Appender(

307 _interval_shared_docs["from_tuples"]

308 % {

309 "klass": "IntervalIndex",

310 "name": textwrap.dedent(

311 """

312 name : str, optional

313 Name of the resulting IntervalIndex."""

314 ),

315 "examples": textwrap.dedent(

316 """\

317 Examples

318 --------

319 >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)])

320 IntervalIndex([(0, 1], (1, 2]],

321 dtype='interval[int64, right]')

322 """

323 ),

324 }

325 )

326 def from_tuples(

327 cls,

328 data,

329 closed: IntervalClosedType = "right",

330 name: Hashable = None,

331 copy: bool = False,

332 dtype: Dtype | None = None,

333 ) -> IntervalIndex:

334 with rewrite_exception("IntervalArray", cls.__name__):

335 arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype)

336 return cls._simple_new(arr, name=name)

337

338 # --------------------------------------------------------------------

339 # error: Return type "IntervalTree" of "_engine" incompatible with return type

340 # "Union[IndexEngine, ExtensionEngine]" in supertype "Index"

341 @cache_readonly

342 def _engine(self) -> IntervalTree: # type: ignore[override]

343 # IntervalTree does not supports numpy array unless they are 64 bit

344 left = self._maybe_convert_i8(self.left)

345 left = maybe_upcast_numeric_to_64bit(left)

346 right = self._maybe_convert_i8(self.right)

347 right = maybe_upcast_numeric_to_64bit(right)

348 return IntervalTree(left, right, closed=self.closed)

349

350 def __contains__(self, key: Any) -> bool:

351 """

352 return a boolean if this key is IN the index

353 We *only* accept an Interval

354

355 Parameters

356 ----------

357 key : Interval

358

359 Returns

360 -------

361 bool

362 """

363 hash(key)

364 if not isinstance(key, Interval):

365 if is_valid_na_for_dtype(key, self.dtype):

366 return self.hasnans

367 return False

368

369 try:

370 self.get_loc(key)

371 return True

372 except KeyError:

373 return False

374

375 @cache_readonly

376 def _multiindex(self) -> MultiIndex:

377 return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"])

378

379 def __reduce__(self):

380 d = {

381 "left": self.left,

382 "right": self.right,

383 "closed": self.closed,

384 "name": self.name,

385 }

386 return _new_IntervalIndex, (type(self), d), None

387

388 @property

389 def inferred_type(self) -> str:

390 """Return a string of the type inferred from the values"""

391 return "interval"

392

393 # Cannot determine type of "memory_usage"

394 @Appender(Index.memory_usage.__doc__) # type: ignore[has-type]

395 def memory_usage(self, deep: bool = False) -> int:

396 # we don't use an explicit engine

397 # so return the bytes here

398 return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep)

399

400 # IntervalTree doesn't have a is_monotonic_decreasing, so have to override

401 # the Index implementation

402 @cache_readonly

403 def is_monotonic_decreasing(self) -> bool:

404 """

405 Return True if the IntervalIndex is monotonic decreasing (only equal or

406 decreasing values), else False

407 """

408 return self[::-1].is_monotonic_increasing

409

410 @cache_readonly

411 def is_unique(self) -> bool:

412 """

413 Return True if the IntervalIndex contains unique elements, else False.

414 """

415 left = self.left

416 right = self.right

417

418 if self.isna().sum() > 1:

419 return False

420

421 if left.is_unique or right.is_unique:

422 return True

423

424 seen_pairs = set()

425 check_idx = np.where(left.duplicated(keep=False))[0]

426 for idx in check_idx:

427 pair = (left[idx], right[idx])

428 if pair in seen_pairs:

429 return False

430 seen_pairs.add(pair)

431

432 return True

433

434 @property

435 def is_overlapping(self) -> bool:

436 """

437 Return True if the IntervalIndex has overlapping intervals, else False.

438

439 Two intervals overlap if they share a common point, including closed

440 endpoints. Intervals that only have an open endpoint in common do not

441 overlap.

442

443 Returns

444 -------

445 bool

446 Boolean indicating if the IntervalIndex has overlapping intervals.

447

448 See Also

449 --------

450 Interval.overlaps : Check whether two Interval objects overlap.

451 IntervalIndex.overlaps : Check an IntervalIndex elementwise for

452 overlaps.

453

454 Examples

455 --------

456 >>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)])

457 >>> index

458 IntervalIndex([(0, 2], (1, 3], (4, 5]],

459 dtype='interval[int64, right]')

460 >>> index.is_overlapping

461 True

462

463 Intervals that share closed endpoints overlap:

464

465 >>> index = pd.interval_range(0, 3, closed='both')

466 >>> index

467 IntervalIndex([[0, 1], [1, 2], [2, 3]],

468 dtype='interval[int64, both]')

469 >>> index.is_overlapping

470 True

471

472 Intervals that only have an open endpoint in common do not overlap:

473

474 >>> index = pd.interval_range(0, 3, closed='left')

475 >>> index

476 IntervalIndex([[0, 1), [1, 2), [2, 3)],

477 dtype='interval[int64, left]')

478 >>> index.is_overlapping

479 False

480 """

481 # GH 23309

482 return self._engine.is_overlapping

483

484 def _needs_i8_conversion(self, key) -> bool:

485 """

486 Check if a given key needs i8 conversion. Conversion is necessary for

487 Timestamp, Timedelta, DatetimeIndex, and TimedeltaIndex keys. An

488 Interval-like requires conversion if its endpoints are one of the

489 aforementioned types.

490

491 Assumes that any list-like data has already been cast to an Index.

492

493 Parameters

494 ----------

495 key : scalar or Index-like

496 The key that should be checked for i8 conversion

497

498 Returns

499 -------

500 bool

501 """

502 if is_interval_dtype(key) or isinstance(key, Interval):

503 return self._needs_i8_conversion(key.left)

504

505 i8_types = (Timestamp, Timedelta, DatetimeIndex, TimedeltaIndex)

506 return isinstance(key, i8_types)

507

508 def _maybe_convert_i8(self, key):

509 """

510 Maybe convert a given key to its equivalent i8 value(s). Used as a

511 preprocessing step prior to IntervalTree queries (self._engine), which

512 expects numeric data.

513

514 Parameters

515 ----------

516 key : scalar or list-like

517 The key that should maybe be converted to i8.

518

519 Returns

520 -------

521 scalar or list-like

522 The original key if no conversion occurred, int if converted scalar,

523 Index with an int64 dtype if converted list-like.

524 """

525 if is_list_like(key):

526 key = ensure_index(key)

527 key = maybe_upcast_numeric_to_64bit(key)

528

529 if not self._needs_i8_conversion(key):

530 return key

531

532 scalar = is_scalar(key)

533 if is_interval_dtype(key) or isinstance(key, Interval):

534 # convert left/right and reconstruct

535 left = self._maybe_convert_i8(key.left)

536 right = self._maybe_convert_i8(key.right)

537 constructor = Interval if scalar else IntervalIndex.from_arrays

538 # error: "object" not callable

539 return constructor(

540 left, right, closed=self.closed

541 ) # type: ignore[operator]

542

543 if scalar:

544 # Timestamp/Timedelta

545 key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True)

546 if lib.is_period(key):

547 key_i8 = key.ordinal

548 elif isinstance(key_i8, Timestamp):

549 key_i8 = key_i8._value

550 elif isinstance(key_i8, (np.datetime64, np.timedelta64)):

551 key_i8 = key_i8.view("i8")

552 else:

553 # DatetimeIndex/TimedeltaIndex

554 key_dtype, key_i8 = key.dtype, Index(key.asi8)

555 if key.hasnans:

556 # convert NaT from its i8 value to np.nan so it's not viewed

557 # as a valid value, maybe causing errors (e.g. is_overlapping)

558 key_i8 = key_i8.where(~key._isnan)

559

560 # ensure consistency with IntervalIndex subtype

561 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any],

562 # ExtensionDtype]" has no attribute "subtype"

563 subtype = self.dtype.subtype # type: ignore[union-attr]

564

565 if not is_dtype_equal(subtype, key_dtype):

566 raise ValueError(

567 f"Cannot index an IntervalIndex of subtype {subtype} with "

568 f"values of dtype {key_dtype}"

569 )

570

571 return key_i8

572

573 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):

574 if not self.is_non_overlapping_monotonic:

575 raise KeyError(

576 "can only get slices from an IntervalIndex if bounds are "

577 "non-overlapping and all monotonic increasing or decreasing"

578 )

579

580 if isinstance(label, (IntervalMixin, IntervalIndex)):

581 raise NotImplementedError("Interval objects are not currently supported")

582

583 # GH 20921: "not is_monotonic_increasing" for the second condition

584 # instead of "is_monotonic_decreasing" to account for single element

585 # indexes being both increasing and decreasing

586 if (side == "left" and self.left.is_monotonic_increasing) or (

587 side == "right" and not self.left.is_monotonic_increasing

588 ):

589 sub_idx = self.right

590 if self.open_right:

591 label = _get_next_label(label)

592 else:

593 sub_idx = self.left

594 if self.open_left:

595 label = _get_prev_label(label)

596

597 return sub_idx._searchsorted_monotonic(label, side)

598

599 # --------------------------------------------------------------------

600 # Indexing Methods

601

602 def get_loc(self, key) -> int | slice | np.ndarray:

603 """

604 Get integer location, slice or boolean mask for requested label.

605

606 Parameters

607 ----------

608 key : label

609

610 Returns

611 -------

612 int if unique index, slice if monotonic index, else mask

613

614 Examples

615 --------

616 >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2)

617 >>> index = pd.IntervalIndex([i1, i2])

618 >>> index.get_loc(1)

619 0

620

621 You can also supply a point inside an interval.

622

623 >>> index.get_loc(1.5)

624 1

625

626 If a label is in several intervals, you get the locations of all the

627 relevant intervals.

628

629 >>> i3 = pd.Interval(0, 2)

630 >>> overlapping_index = pd.IntervalIndex([i1, i2, i3])

631 >>> overlapping_index.get_loc(0.5)

632 array([ True, False, True])

633

634 Only exact matches will be returned if an interval is provided.

635

636 >>> index.get_loc(pd.Interval(0, 1))

637 0

638 """

639 self._check_indexing_error(key)

640

641 if isinstance(key, Interval):

642 if self.closed != key.closed:

643 raise KeyError(key)

644 mask = (self.left == key.left) & (self.right == key.right)

645 elif is_valid_na_for_dtype(key, self.dtype):

646 mask = self.isna()

647 else:

648 # assume scalar

649 op_left = le if self.closed_left else lt

650 op_right = le if self.closed_right else lt

651 try:

652 mask = op_left(self.left, key) & op_right(key, self.right)

653 except TypeError as err:

654 # scalar is not comparable to II subtype --> invalid label

655 raise KeyError(key) from err

656

657 matches = mask.sum()

658 if matches == 0:

659 raise KeyError(key)

660 if matches == 1:

661 return mask.argmax()

662

663 res = lib.maybe_booleans_to_slice(mask.view("u1"))

664 if isinstance(res, slice) and res.stop is None:

665 # TODO: DO this in maybe_booleans_to_slice?

666 res = slice(res.start, len(self), res.step)

667 return res

668

669 def _get_indexer(

670 self,

671 target: Index,

672 method: str | None = None,

673 limit: int | None = None,

674 tolerance: Any | None = None,

675 ) -> npt.NDArray[np.intp]:

676 if isinstance(target, IntervalIndex):

677 # We only get here with not self.is_overlapping

678 # -> at most one match per interval in target

679 # want exact matches -> need both left/right to match, so defer to

680 # left/right get_indexer, compare elementwise, equality -> match

681 indexer = self._get_indexer_unique_sides(target)

682

683 elif not is_object_dtype(target.dtype):

684 # homogeneous scalar index: use IntervalTree

685 # we should always have self._should_partial_index(target) here

686 target = self._maybe_convert_i8(target)

687 indexer = self._engine.get_indexer(target.values)

688 else:

689 # heterogeneous scalar index: defer elementwise to get_loc

690 # we should always have self._should_partial_index(target) here

691 return self._get_indexer_pointwise(target)[0]

692

693 return ensure_platform_int(indexer)

694

695 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)

696 def get_indexer_non_unique(

697 self, target: Index

698 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

699 target = ensure_index(target)

700

701 if not self._should_compare(target) and not self._should_partial_index(target):

702 # e.g. IntervalIndex with different closed or incompatible subtype

703 # -> no matches

704 return self._get_indexer_non_comparable(target, None, unique=False)

705

706 elif isinstance(target, IntervalIndex):

707 if self.left.is_unique and self.right.is_unique:

708 # fastpath available even if we don't have self._index_as_unique

709 indexer = self._get_indexer_unique_sides(target)

710 missing = (indexer == -1).nonzero()[0]

711 else:

712 return self._get_indexer_pointwise(target)

713

714 elif is_object_dtype(target.dtype) or not self._should_partial_index(target):

715 # target might contain intervals: defer elementwise to get_loc

716 return self._get_indexer_pointwise(target)

717

718 else:

719 # Note: this case behaves differently from other Index subclasses

720 # because IntervalIndex does partial-int indexing

721 target = self._maybe_convert_i8(target)

722 indexer, missing = self._engine.get_indexer_non_unique(target.values)

723

724 return ensure_platform_int(indexer), ensure_platform_int(missing)

725

726 def _get_indexer_unique_sides(self, target: IntervalIndex) -> npt.NDArray[np.intp]:

727 """

728 _get_indexer specialized to the case where both of our sides are unique.

729 """

730 # Caller is responsible for checking

731 # `self.left.is_unique and self.right.is_unique`

732

733 left_indexer = self.left.get_indexer(target.left)

734 right_indexer = self.right.get_indexer(target.right)

735 indexer = np.where(left_indexer == right_indexer, left_indexer, -1)

736 return indexer

737

738 def _get_indexer_pointwise(

739 self, target: Index

740 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:

741 """

742 pointwise implementation for get_indexer and get_indexer_non_unique.

743 """

744 indexer, missing = [], []

745 for i, key in enumerate(target):

746 try:

747 locs = self.get_loc(key)

748 if isinstance(locs, slice):

749 # Only needed for get_indexer_non_unique

750 locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp")

751 elif lib.is_integer(locs):

752 locs = np.array(locs, ndmin=1)

753 else:

754 # otherwise we have ndarray[bool]

755 locs = np.where(locs)[0]

756 except KeyError:

757 missing.append(i)

758 locs = np.array([-1])

759 except InvalidIndexError:

760 # i.e. non-scalar key e.g. a tuple.

761 # see test_append_different_columns_types_raises

762 missing.append(i)

763 locs = np.array([-1])

764

765 indexer.append(locs)

766

767 indexer = np.concatenate(indexer)

768 return ensure_platform_int(indexer), ensure_platform_int(missing)

769

770 @cache_readonly

771 def _index_as_unique(self) -> bool:

772 return not self.is_overlapping and self._engine._na_count < 2

773

774 _requires_unique_msg = (

775 "cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique"

776 )

777

778 def _convert_slice_indexer(self, key: slice, kind: str):

779 if not (key.step is None or key.step == 1):

780 # GH#31658 if label-based, we require step == 1,

781 # if positional, we disallow float start/stop

782 msg = "label-based slicing with step!=1 is not supported for IntervalIndex"

783 if kind == "loc":

784 raise ValueError(msg)

785 if kind == "getitem":

786 if not is_valid_positional_slice(key):

787 # i.e. this cannot be interpreted as a positional slice

788 raise ValueError(msg)

789

790 return super()._convert_slice_indexer(key, kind)

791

792 @cache_readonly

793 def _should_fallback_to_positional(self) -> bool:

794 # integer lookups in Series.__getitem__ are unambiguously

795 # positional in this case

796 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any],

797 # ExtensionDtype]" has no attribute "subtype"

798 return self.dtype.subtype.kind in ["m", "M"] # type: ignore[union-attr]

799

800 def _maybe_cast_slice_bound(self, label, side: str):

801 return getattr(self, side)._maybe_cast_slice_bound(label, side)

802

803 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:

804 if not isinstance(dtype, IntervalDtype):

805 return False

806 common_subtype = find_common_type([self.dtype, dtype])

807 return not is_object_dtype(common_subtype)

808

809 # --------------------------------------------------------------------

810

811 @cache_readonly

812 def left(self) -> Index:

813 return Index(self._data.left, copy=False)

814

815 @cache_readonly

816 def right(self) -> Index:

817 return Index(self._data.right, copy=False)

818

819 @cache_readonly

820 def mid(self) -> Index:

821 return Index(self._data.mid, copy=False)

822

823 @property

824 def length(self) -> Index:

825 return Index(self._data.length, copy=False)

826

827 # --------------------------------------------------------------------

828 # Rendering Methods

829 # __repr__ associated methods are based on MultiIndex

830

831 def _format_with_header(self, header: list[str], na_rep: str) -> list[str]:

832 # matches base class except for whitespace padding

833 return header + list(self._format_native_types(na_rep=na_rep))

834

835 def _format_native_types(

836 self, *, na_rep: str = "NaN", quoting=None, **kwargs

837 ) -> npt.NDArray[np.object_]:

838 # GH 28210: use base method but with different default na_rep

839 return super()._format_native_types(na_rep=na_rep, quoting=quoting, **kwargs)

840

841 def _format_data(self, name=None) -> str:

842 # TODO: integrate with categorical and make generic

843 # name argument is unused here; just for compat with base / categorical

844 return f"{self._data._format_data()},{self._format_space()}"

845

846 # --------------------------------------------------------------------

847 # Set Operations

848

849 def _intersection(self, other, sort):

850 """

851 intersection specialized to the case with matching dtypes.

852 """

853 # For IntervalIndex we also know other.closed == self.closed

854 if self.left.is_unique and self.right.is_unique:

855 taken = self._intersection_unique(other)

856 elif other.left.is_unique and other.right.is_unique and self.isna().sum() <= 1:

857 # Swap other/self if other is unique and self does not have

858 # multiple NaNs

859 taken = other._intersection_unique(self)

860 else:

861 # duplicates

862 taken = self._intersection_non_unique(other)

863

864 if sort is None:

865 taken = taken.sort_values()

866

867 return taken

868

869 def _intersection_unique(self, other: IntervalIndex) -> IntervalIndex:

870 """

871 Used when the IntervalIndex does not have any common endpoint,

872 no matter left or right.

873 Return the intersection with another IntervalIndex.

874 Parameters

875 ----------

876 other : IntervalIndex

877 Returns

878 -------

879 IntervalIndex

880 """

881 # Note: this is much more performant than super()._intersection(other)

882 lindexer = self.left.get_indexer(other.left)

883 rindexer = self.right.get_indexer(other.right)

884

885 match = (lindexer == rindexer) & (lindexer != -1)

886 indexer = lindexer.take(match.nonzero()[0])

887 indexer = unique(indexer)

888

889 return self.take(indexer)

890

891 def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex:

892 """

893 Used when the IntervalIndex does have some common endpoints,

894 on either sides.

895 Return the intersection with another IntervalIndex.

896

897 Parameters

898 ----------

899 other : IntervalIndex

900

901 Returns

902 -------

903 IntervalIndex

904 """

905 # Note: this is about 3.25x faster than super()._intersection(other)

906 # in IntervalIndexMethod.time_intersection_both_duplicate(1000)

907 mask = np.zeros(len(self), dtype=bool)

908

909 if self.hasnans and other.hasnans:

910 first_nan_loc = np.arange(len(self))[self.isna()][0]

911 mask[first_nan_loc] = True

912

913 other_tups = set(zip(other.left, other.right))

914 for i, tup in enumerate(zip(self.left, self.right)):

915 if tup in other_tups:

916 mask[i] = True

917

918 return self[mask]

919

920 # --------------------------------------------------------------------

921

922 def _get_engine_target(self) -> np.ndarray:

923 # Note: we _could_ use libjoin functions by either casting to object

924 # dtype or constructing tuples (faster than constructing Intervals)

925 # but the libjoin fastpaths are no longer fast in these cases.

926 raise NotImplementedError(

927 "IntervalIndex does not use libjoin fastpaths or pass values to "

928 "IndexEngine objects"

929 )

930

931 def _from_join_target(self, result):

932 raise NotImplementedError("IntervalIndex does not use libjoin fastpaths")

933

934 # TODO: arithmetic operations

935

936

937def _is_valid_endpoint(endpoint) -> bool:

938 """

939 Helper for interval_range to check if start/end are valid types.

940 """

941 return any(

942 [

943 is_number(endpoint),

944 isinstance(endpoint, Timestamp),

945 isinstance(endpoint, Timedelta),

946 endpoint is None,

947 ]

948 )

949

950

951def _is_type_compatible(a, b) -> bool:

952 """

953 Helper for interval_range to check type compat of start/end/freq.

954 """

955 is_ts_compat = lambda x: isinstance(x, (Timestamp, BaseOffset))

956 is_td_compat = lambda x: isinstance(x, (Timedelta, BaseOffset))

957 return (

958 (is_number(a) and is_number(b))

959 or (is_ts_compat(a) and is_ts_compat(b))

960 or (is_td_compat(a) and is_td_compat(b))

961 or com.any_none(a, b)

962 )

963

964

965def interval_range(

966 start=None,

967 end=None,

968 periods=None,

969 freq=None,

970 name: Hashable = None,

971 closed: IntervalClosedType = "right",

972) -> IntervalIndex:

973 """

974 Return a fixed frequency IntervalIndex.

975

976 Parameters

977 ----------

978 start : numeric or datetime-like, default None

979 Left bound for generating intervals.

980 end : numeric or datetime-like, default None

981 Right bound for generating intervals.

982 periods : int, default None

983 Number of periods to generate.

984 freq : numeric, str, datetime.timedelta, or DateOffset, default None

985 The length of each interval. Must be consistent with the type of start

986 and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1

987 for numeric and 'D' for datetime-like.

988 name : str, default None

989 Name of the resulting IntervalIndex.

990 closed : {'left', 'right', 'both', 'neither'}, default 'right'

991 Whether the intervals are closed on the left-side, right-side, both

992 or neither.

993

994 Returns

995 -------

996 IntervalIndex

997

998 See Also

999 --------

1000 IntervalIndex : An Index of intervals that are all closed on the same side.

1001

1002 Notes

1003 -----

1004 Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,

1005 exactly three must be specified. If ``freq`` is omitted, the resulting

1006 ``IntervalIndex`` will have ``periods`` linearly spaced elements between

1007 ``start`` and ``end``, inclusively.

1008

1009 To learn more about datetime-like frequency strings, please see `this link

1010 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.

1011

1012 Examples

1013 --------

1014 Numeric ``start`` and ``end`` is supported.

1015

1016 >>> pd.interval_range(start=0, end=5)

1017 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],

1018 dtype='interval[int64, right]')

1019

1020 Additionally, datetime-like input is also supported.

1021

1022 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),

1023 ... end=pd.Timestamp('2017-01-04'))

1024 IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],

1025 (2017-01-03, 2017-01-04]],

1026 dtype='interval[datetime64[ns], right]')

1027

1028 The ``freq`` parameter specifies the frequency between the left and right.

1029 endpoints of the individual intervals within the ``IntervalIndex``. For

1030 numeric ``start`` and ``end``, the frequency must also be numeric.

1031

1032 >>> pd.interval_range(start=0, periods=4, freq=1.5)

1033 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],

1034 dtype='interval[float64, right]')

1035

1036 Similarly, for datetime-like ``start`` and ``end``, the frequency must be

1037 convertible to a DateOffset.

1038

1039 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),

1040 ... periods=3, freq='MS')

1041 IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],

1042 (2017-03-01, 2017-04-01]],

1043 dtype='interval[datetime64[ns], right]')

1044

1045 Specify ``start``, ``end``, and ``periods``; the frequency is generated

1046 automatically (linearly spaced).

1047

1048 >>> pd.interval_range(start=0, end=6, periods=4)

1049 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],

1050 dtype='interval[float64, right]')

1051

1052 The ``closed`` parameter specifies which endpoints of the individual

1053 intervals within the ``IntervalIndex`` are closed.

1054

1055 >>> pd.interval_range(end=5, periods=4, closed='both')

1056 IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],

1057 dtype='interval[int64, both]')

1058 """

1059 start = maybe_box_datetimelike(start)

1060 end = maybe_box_datetimelike(end)

1061 endpoint = start if start is not None else end

1062

1063 if freq is None and com.any_none(periods, start, end):

1064 freq = 1 if is_number(endpoint) else "D"

1065

1066 if com.count_not_none(start, end, periods, freq) != 3:

1067 raise ValueError(

1068 "Of the four parameters: start, end, periods, and "

1069 "freq, exactly three must be specified"

1070 )

1071

1072 if not _is_valid_endpoint(start):

1073 raise ValueError(f"start must be numeric or datetime-like, got {start}")

1074 if not _is_valid_endpoint(end):

1075 raise ValueError(f"end must be numeric or datetime-like, got {end}")

1076

1077 if is_float(periods):

1078 periods = int(periods)

1079 elif not is_integer(periods) and periods is not None:

1080 raise TypeError(f"periods must be a number, got {periods}")

1081

1082 if freq is not None and not is_number(freq):

1083 try:

1084 freq = to_offset(freq)

1085 except ValueError as err:

1086 raise ValueError(

1087 f"freq must be numeric or convertible to DateOffset, got {freq}"

1088 ) from err

1089

1090 # verify type compatibility

1091 if not all(

1092 [

1093 _is_type_compatible(start, end),

1094 _is_type_compatible(start, freq),

1095 _is_type_compatible(end, freq),

1096 ]

1097 ):

1098 raise TypeError("start, end, freq need to be type compatible")

1099

1100 # +1 to convert interval count to breaks count (n breaks = n-1 intervals)

1101 if periods is not None:

1102 periods += 1

1103

1104 breaks: np.ndarray | TimedeltaIndex | DatetimeIndex

1105

1106 if is_number(endpoint):

1107 # force consistency between start/end/freq (lower end if freq skips it)

1108 if com.all_not_none(start, end, freq):

1109 end -= (end - start) % freq

1110

1111 # compute the period/start/end if unspecified (at most one)

1112 if periods is None:

1113 periods = int((end - start) // freq) + 1

1114 elif start is None:

1115 start = end - (periods - 1) * freq

1116 elif end is None:

1117 end = start + (periods - 1) * freq

1118

1119 breaks = np.linspace(start, end, periods)

1120 if all(is_integer(x) for x in com.not_none(start, end, freq)):

1121 # np.linspace always produces float output

1122

1123 # error: Argument 1 to "maybe_downcast_numeric" has incompatible type

1124 # "Union[ndarray[Any, Any], TimedeltaIndex, DatetimeIndex]";

1125 # expected "ndarray[Any, Any]" [

1126 breaks = maybe_downcast_numeric(

1127 breaks, # type: ignore[arg-type]

1128 np.dtype("int64"),

1129 )

1130 else:

1131 # delegate to the appropriate range function

1132 if isinstance(endpoint, Timestamp):

1133 breaks = date_range(start=start, end=end, periods=periods, freq=freq)

1134 else:

1135 breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq)

1136

1137 return IntervalIndex.from_breaks(breaks, name=name, closed=closed)