1""" define the IntervalIndex """
2from __future__ import annotations
3
4from operator import (
5 le,
6 lt,
7)
8import textwrap
9from typing import (
10 Any,
11 Hashable,
12 Literal,
13)
14
15import numpy as np
16
17from pandas._libs import lib
18from pandas._libs.interval import (
19 Interval,
20 IntervalMixin,
21 IntervalTree,
22)
23from pandas._libs.tslibs import (
24 BaseOffset,
25 Timedelta,
26 Timestamp,
27 to_offset,
28)
29from pandas._typing import (
30 Dtype,
31 DtypeObj,
32 IntervalClosedType,
33 npt,
34)
35from pandas.errors import InvalidIndexError
36from pandas.util._decorators import (
37 Appender,
38 cache_readonly,
39)
40from pandas.util._exceptions import rewrite_exception
41
42from pandas.core.dtypes.cast import (
43 find_common_type,
44 infer_dtype_from_scalar,
45 maybe_box_datetimelike,
46 maybe_downcast_numeric,
47 maybe_upcast_numeric_to_64bit,
48)
49from pandas.core.dtypes.common import (
50 ensure_platform_int,
51 is_datetime64tz_dtype,
52 is_datetime_or_timedelta_dtype,
53 is_dtype_equal,
54 is_float,
55 is_float_dtype,
56 is_integer,
57 is_integer_dtype,
58 is_interval_dtype,
59 is_list_like,
60 is_number,
61 is_object_dtype,
62 is_scalar,
63)
64from pandas.core.dtypes.dtypes import IntervalDtype
65from pandas.core.dtypes.missing import is_valid_na_for_dtype
66
67from pandas.core.algorithms import unique
68from pandas.core.arrays.interval import (
69 IntervalArray,
70 _interval_shared_docs,
71)
72import pandas.core.common as com
73from pandas.core.indexers import is_valid_positional_slice
74import pandas.core.indexes.base as ibase
75from pandas.core.indexes.base import (
76 Index,
77 _index_shared_docs,
78 ensure_index,
79 maybe_extract_name,
80)
81from pandas.core.indexes.datetimes import (
82 DatetimeIndex,
83 date_range,
84)
85from pandas.core.indexes.extension import (
86 ExtensionIndex,
87 inherit_names,
88)
89from pandas.core.indexes.multi import MultiIndex
90from pandas.core.indexes.timedeltas import (
91 TimedeltaIndex,
92 timedelta_range,
93)
94
95_index_doc_kwargs = dict(ibase._index_doc_kwargs)
96
97_index_doc_kwargs.update(
98 {
99 "klass": "IntervalIndex",
100 "qualname": "IntervalIndex",
101 "target_klass": "IntervalIndex or list of Intervals",
102 "name": textwrap.dedent(
103 """\
104 name : object, optional
105 Name to be stored in the index.
106 """
107 ),
108 }
109)
110
111
112def _get_next_label(label):
113 dtype = getattr(label, "dtype", type(label))
114 if isinstance(label, (Timestamp, Timedelta)):
115 dtype = "datetime64"
116 if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype):
117 return label + np.timedelta64(1, "ns")
118 elif is_integer_dtype(dtype):
119 return label + 1
120 elif is_float_dtype(dtype):
121 return np.nextafter(label, np.infty)
122 else:
123 raise TypeError(f"cannot determine next label for type {repr(type(label))}")
124
125
126def _get_prev_label(label):
127 dtype = getattr(label, "dtype", type(label))
128 if isinstance(label, (Timestamp, Timedelta)):
129 dtype = "datetime64"
130 if is_datetime_or_timedelta_dtype(dtype) or is_datetime64tz_dtype(dtype):
131 return label - np.timedelta64(1, "ns")
132 elif is_integer_dtype(dtype):
133 return label - 1
134 elif is_float_dtype(dtype):
135 return np.nextafter(label, -np.infty)
136 else:
137 raise TypeError(f"cannot determine next label for type {repr(type(label))}")
138
139
140def _new_IntervalIndex(cls, d):
141 """
142 This is called upon unpickling, rather than the default which doesn't have
143 arguments and breaks __new__.
144 """
145 return cls.from_arrays(**d)
146
147
148@Appender(
149 _interval_shared_docs["class"]
150 % {
151 "klass": "IntervalIndex",
152 "summary": "Immutable index of intervals that are closed on the same side.",
153 "name": _index_doc_kwargs["name"],
154 "versionadded": "0.20.0",
155 "extra_attributes": "is_overlapping\nvalues\n",
156 "extra_methods": "",
157 "examples": textwrap.dedent(
158 """\
159 Examples
160 --------
161 A new ``IntervalIndex`` is typically constructed using
162 :func:`interval_range`:
163
164 >>> pd.interval_range(start=0, end=5)
165 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
166 dtype='interval[int64, right]')
167
168 It may also be constructed using one of the constructor
169 methods: :meth:`IntervalIndex.from_arrays`,
170 :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`.
171
172 See further examples in the doc strings of ``interval_range`` and the
173 mentioned constructor methods.
174 """
175 ),
176 }
177)
178@inherit_names(["set_closed", "to_tuples"], IntervalArray, wrap=True)
179@inherit_names(
180 [
181 "__array__",
182 "overlaps",
183 "contains",
184 "closed_left",
185 "closed_right",
186 "open_left",
187 "open_right",
188 "is_empty",
189 ],
190 IntervalArray,
191)
192@inherit_names(["is_non_overlapping_monotonic", "closed"], IntervalArray, cache=True)
193class IntervalIndex(ExtensionIndex):
194 _typ = "intervalindex"
195
196 # annotate properties pinned via inherit_names
197 closed: IntervalClosedType
198 is_non_overlapping_monotonic: bool
199 closed_left: bool
200 closed_right: bool
201 open_left: bool
202 open_right: bool
203
204 _data: IntervalArray
205 _values: IntervalArray
206 _can_hold_strings = False
207 _data_cls = IntervalArray
208
209 # --------------------------------------------------------------------
210 # Constructors
211
212 def __new__(
213 cls,
214 data,
215 closed=None,
216 dtype: Dtype | None = None,
217 copy: bool = False,
218 name: Hashable = None,
219 verify_integrity: bool = True,
220 ) -> IntervalIndex:
221 name = maybe_extract_name(name, data, cls)
222
223 with rewrite_exception("IntervalArray", cls.__name__):
224 array = IntervalArray(
225 data,
226 closed=closed,
227 copy=copy,
228 dtype=dtype,
229 verify_integrity=verify_integrity,
230 )
231
232 return cls._simple_new(array, name)
233
234 @classmethod
235 @Appender(
236 _interval_shared_docs["from_breaks"]
237 % {
238 "klass": "IntervalIndex",
239 "name": textwrap.dedent(
240 """
241 name : str, optional
242 Name of the resulting IntervalIndex."""
243 ),
244 "examples": textwrap.dedent(
245 """\
246 Examples
247 --------
248 >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3])
249 IntervalIndex([(0, 1], (1, 2], (2, 3]],
250 dtype='interval[int64, right]')
251 """
252 ),
253 }
254 )
255 def from_breaks(
256 cls,
257 breaks,
258 closed: IntervalClosedType | None = "right",
259 name: Hashable = None,
260 copy: bool = False,
261 dtype: Dtype | None = None,
262 ) -> IntervalIndex:
263 with rewrite_exception("IntervalArray", cls.__name__):
264 array = IntervalArray.from_breaks(
265 breaks, closed=closed, copy=copy, dtype=dtype
266 )
267 return cls._simple_new(array, name=name)
268
269 @classmethod
270 @Appender(
271 _interval_shared_docs["from_arrays"]
272 % {
273 "klass": "IntervalIndex",
274 "name": textwrap.dedent(
275 """
276 name : str, optional
277 Name of the resulting IntervalIndex."""
278 ),
279 "examples": textwrap.dedent(
280 """\
281 Examples
282 --------
283 >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3])
284 IntervalIndex([(0, 1], (1, 2], (2, 3]],
285 dtype='interval[int64, right]')
286 """
287 ),
288 }
289 )
290 def from_arrays(
291 cls,
292 left,
293 right,
294 closed: IntervalClosedType = "right",
295 name: Hashable = None,
296 copy: bool = False,
297 dtype: Dtype | None = None,
298 ) -> IntervalIndex:
299 with rewrite_exception("IntervalArray", cls.__name__):
300 array = IntervalArray.from_arrays(
301 left, right, closed, copy=copy, dtype=dtype
302 )
303 return cls._simple_new(array, name=name)
304
305 @classmethod
306 @Appender(
307 _interval_shared_docs["from_tuples"]
308 % {
309 "klass": "IntervalIndex",
310 "name": textwrap.dedent(
311 """
312 name : str, optional
313 Name of the resulting IntervalIndex."""
314 ),
315 "examples": textwrap.dedent(
316 """\
317 Examples
318 --------
319 >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)])
320 IntervalIndex([(0, 1], (1, 2]],
321 dtype='interval[int64, right]')
322 """
323 ),
324 }
325 )
326 def from_tuples(
327 cls,
328 data,
329 closed: IntervalClosedType = "right",
330 name: Hashable = None,
331 copy: bool = False,
332 dtype: Dtype | None = None,
333 ) -> IntervalIndex:
334 with rewrite_exception("IntervalArray", cls.__name__):
335 arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype)
336 return cls._simple_new(arr, name=name)
337
338 # --------------------------------------------------------------------
339 # error: Return type "IntervalTree" of "_engine" incompatible with return type
340 # "Union[IndexEngine, ExtensionEngine]" in supertype "Index"
341 @cache_readonly
342 def _engine(self) -> IntervalTree: # type: ignore[override]
343 # IntervalTree does not supports numpy array unless they are 64 bit
344 left = self._maybe_convert_i8(self.left)
345 left = maybe_upcast_numeric_to_64bit(left)
346 right = self._maybe_convert_i8(self.right)
347 right = maybe_upcast_numeric_to_64bit(right)
348 return IntervalTree(left, right, closed=self.closed)
349
350 def __contains__(self, key: Any) -> bool:
351 """
352 return a boolean if this key is IN the index
353 We *only* accept an Interval
354
355 Parameters
356 ----------
357 key : Interval
358
359 Returns
360 -------
361 bool
362 """
363 hash(key)
364 if not isinstance(key, Interval):
365 if is_valid_na_for_dtype(key, self.dtype):
366 return self.hasnans
367 return False
368
369 try:
370 self.get_loc(key)
371 return True
372 except KeyError:
373 return False
374
375 @cache_readonly
376 def _multiindex(self) -> MultiIndex:
377 return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"])
378
379 def __reduce__(self):
380 d = {
381 "left": self.left,
382 "right": self.right,
383 "closed": self.closed,
384 "name": self.name,
385 }
386 return _new_IntervalIndex, (type(self), d), None
387
388 @property
389 def inferred_type(self) -> str:
390 """Return a string of the type inferred from the values"""
391 return "interval"
392
393 # Cannot determine type of "memory_usage"
394 @Appender(Index.memory_usage.__doc__) # type: ignore[has-type]
395 def memory_usage(self, deep: bool = False) -> int:
396 # we don't use an explicit engine
397 # so return the bytes here
398 return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep)
399
400 # IntervalTree doesn't have a is_monotonic_decreasing, so have to override
401 # the Index implementation
402 @cache_readonly
403 def is_monotonic_decreasing(self) -> bool:
404 """
405 Return True if the IntervalIndex is monotonic decreasing (only equal or
406 decreasing values), else False
407 """
408 return self[::-1].is_monotonic_increasing
409
410 @cache_readonly
411 def is_unique(self) -> bool:
412 """
413 Return True if the IntervalIndex contains unique elements, else False.
414 """
415 left = self.left
416 right = self.right
417
418 if self.isna().sum() > 1:
419 return False
420
421 if left.is_unique or right.is_unique:
422 return True
423
424 seen_pairs = set()
425 check_idx = np.where(left.duplicated(keep=False))[0]
426 for idx in check_idx:
427 pair = (left[idx], right[idx])
428 if pair in seen_pairs:
429 return False
430 seen_pairs.add(pair)
431
432 return True
433
434 @property
435 def is_overlapping(self) -> bool:
436 """
437 Return True if the IntervalIndex has overlapping intervals, else False.
438
439 Two intervals overlap if they share a common point, including closed
440 endpoints. Intervals that only have an open endpoint in common do not
441 overlap.
442
443 Returns
444 -------
445 bool
446 Boolean indicating if the IntervalIndex has overlapping intervals.
447
448 See Also
449 --------
450 Interval.overlaps : Check whether two Interval objects overlap.
451 IntervalIndex.overlaps : Check an IntervalIndex elementwise for
452 overlaps.
453
454 Examples
455 --------
456 >>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)])
457 >>> index
458 IntervalIndex([(0, 2], (1, 3], (4, 5]],
459 dtype='interval[int64, right]')
460 >>> index.is_overlapping
461 True
462
463 Intervals that share closed endpoints overlap:
464
465 >>> index = pd.interval_range(0, 3, closed='both')
466 >>> index
467 IntervalIndex([[0, 1], [1, 2], [2, 3]],
468 dtype='interval[int64, both]')
469 >>> index.is_overlapping
470 True
471
472 Intervals that only have an open endpoint in common do not overlap:
473
474 >>> index = pd.interval_range(0, 3, closed='left')
475 >>> index
476 IntervalIndex([[0, 1), [1, 2), [2, 3)],
477 dtype='interval[int64, left]')
478 >>> index.is_overlapping
479 False
480 """
481 # GH 23309
482 return self._engine.is_overlapping
483
484 def _needs_i8_conversion(self, key) -> bool:
485 """
486 Check if a given key needs i8 conversion. Conversion is necessary for
487 Timestamp, Timedelta, DatetimeIndex, and TimedeltaIndex keys. An
488 Interval-like requires conversion if its endpoints are one of the
489 aforementioned types.
490
491 Assumes that any list-like data has already been cast to an Index.
492
493 Parameters
494 ----------
495 key : scalar or Index-like
496 The key that should be checked for i8 conversion
497
498 Returns
499 -------
500 bool
501 """
502 if is_interval_dtype(key) or isinstance(key, Interval):
503 return self._needs_i8_conversion(key.left)
504
505 i8_types = (Timestamp, Timedelta, DatetimeIndex, TimedeltaIndex)
506 return isinstance(key, i8_types)
507
508 def _maybe_convert_i8(self, key):
509 """
510 Maybe convert a given key to its equivalent i8 value(s). Used as a
511 preprocessing step prior to IntervalTree queries (self._engine), which
512 expects numeric data.
513
514 Parameters
515 ----------
516 key : scalar or list-like
517 The key that should maybe be converted to i8.
518
519 Returns
520 -------
521 scalar or list-like
522 The original key if no conversion occurred, int if converted scalar,
523 Index with an int64 dtype if converted list-like.
524 """
525 if is_list_like(key):
526 key = ensure_index(key)
527 key = maybe_upcast_numeric_to_64bit(key)
528
529 if not self._needs_i8_conversion(key):
530 return key
531
532 scalar = is_scalar(key)
533 if is_interval_dtype(key) or isinstance(key, Interval):
534 # convert left/right and reconstruct
535 left = self._maybe_convert_i8(key.left)
536 right = self._maybe_convert_i8(key.right)
537 constructor = Interval if scalar else IntervalIndex.from_arrays
538 # error: "object" not callable
539 return constructor(
540 left, right, closed=self.closed
541 ) # type: ignore[operator]
542
543 if scalar:
544 # Timestamp/Timedelta
545 key_dtype, key_i8 = infer_dtype_from_scalar(key, pandas_dtype=True)
546 if lib.is_period(key):
547 key_i8 = key.ordinal
548 elif isinstance(key_i8, Timestamp):
549 key_i8 = key_i8._value
550 elif isinstance(key_i8, (np.datetime64, np.timedelta64)):
551 key_i8 = key_i8.view("i8")
552 else:
553 # DatetimeIndex/TimedeltaIndex
554 key_dtype, key_i8 = key.dtype, Index(key.asi8)
555 if key.hasnans:
556 # convert NaT from its i8 value to np.nan so it's not viewed
557 # as a valid value, maybe causing errors (e.g. is_overlapping)
558 key_i8 = key_i8.where(~key._isnan)
559
560 # ensure consistency with IntervalIndex subtype
561 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any],
562 # ExtensionDtype]" has no attribute "subtype"
563 subtype = self.dtype.subtype # type: ignore[union-attr]
564
565 if not is_dtype_equal(subtype, key_dtype):
566 raise ValueError(
567 f"Cannot index an IntervalIndex of subtype {subtype} with "
568 f"values of dtype {key_dtype}"
569 )
570
571 return key_i8
572
573 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):
574 if not self.is_non_overlapping_monotonic:
575 raise KeyError(
576 "can only get slices from an IntervalIndex if bounds are "
577 "non-overlapping and all monotonic increasing or decreasing"
578 )
579
580 if isinstance(label, (IntervalMixin, IntervalIndex)):
581 raise NotImplementedError("Interval objects are not currently supported")
582
583 # GH 20921: "not is_monotonic_increasing" for the second condition
584 # instead of "is_monotonic_decreasing" to account for single element
585 # indexes being both increasing and decreasing
586 if (side == "left" and self.left.is_monotonic_increasing) or (
587 side == "right" and not self.left.is_monotonic_increasing
588 ):
589 sub_idx = self.right
590 if self.open_right:
591 label = _get_next_label(label)
592 else:
593 sub_idx = self.left
594 if self.open_left:
595 label = _get_prev_label(label)
596
597 return sub_idx._searchsorted_monotonic(label, side)
598
599 # --------------------------------------------------------------------
600 # Indexing Methods
601
602 def get_loc(self, key) -> int | slice | np.ndarray:
603 """
604 Get integer location, slice or boolean mask for requested label.
605
606 Parameters
607 ----------
608 key : label
609
610 Returns
611 -------
612 int if unique index, slice if monotonic index, else mask
613
614 Examples
615 --------
616 >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2)
617 >>> index = pd.IntervalIndex([i1, i2])
618 >>> index.get_loc(1)
619 0
620
621 You can also supply a point inside an interval.
622
623 >>> index.get_loc(1.5)
624 1
625
626 If a label is in several intervals, you get the locations of all the
627 relevant intervals.
628
629 >>> i3 = pd.Interval(0, 2)
630 >>> overlapping_index = pd.IntervalIndex([i1, i2, i3])
631 >>> overlapping_index.get_loc(0.5)
632 array([ True, False, True])
633
634 Only exact matches will be returned if an interval is provided.
635
636 >>> index.get_loc(pd.Interval(0, 1))
637 0
638 """
639 self._check_indexing_error(key)
640
641 if isinstance(key, Interval):
642 if self.closed != key.closed:
643 raise KeyError(key)
644 mask = (self.left == key.left) & (self.right == key.right)
645 elif is_valid_na_for_dtype(key, self.dtype):
646 mask = self.isna()
647 else:
648 # assume scalar
649 op_left = le if self.closed_left else lt
650 op_right = le if self.closed_right else lt
651 try:
652 mask = op_left(self.left, key) & op_right(key, self.right)
653 except TypeError as err:
654 # scalar is not comparable to II subtype --> invalid label
655 raise KeyError(key) from err
656
657 matches = mask.sum()
658 if matches == 0:
659 raise KeyError(key)
660 if matches == 1:
661 return mask.argmax()
662
663 res = lib.maybe_booleans_to_slice(mask.view("u1"))
664 if isinstance(res, slice) and res.stop is None:
665 # TODO: DO this in maybe_booleans_to_slice?
666 res = slice(res.start, len(self), res.step)
667 return res
668
669 def _get_indexer(
670 self,
671 target: Index,
672 method: str | None = None,
673 limit: int | None = None,
674 tolerance: Any | None = None,
675 ) -> npt.NDArray[np.intp]:
676 if isinstance(target, IntervalIndex):
677 # We only get here with not self.is_overlapping
678 # -> at most one match per interval in target
679 # want exact matches -> need both left/right to match, so defer to
680 # left/right get_indexer, compare elementwise, equality -> match
681 indexer = self._get_indexer_unique_sides(target)
682
683 elif not is_object_dtype(target.dtype):
684 # homogeneous scalar index: use IntervalTree
685 # we should always have self._should_partial_index(target) here
686 target = self._maybe_convert_i8(target)
687 indexer = self._engine.get_indexer(target.values)
688 else:
689 # heterogeneous scalar index: defer elementwise to get_loc
690 # we should always have self._should_partial_index(target) here
691 return self._get_indexer_pointwise(target)[0]
692
693 return ensure_platform_int(indexer)
694
695 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
696 def get_indexer_non_unique(
697 self, target: Index
698 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
699 target = ensure_index(target)
700
701 if not self._should_compare(target) and not self._should_partial_index(target):
702 # e.g. IntervalIndex with different closed or incompatible subtype
703 # -> no matches
704 return self._get_indexer_non_comparable(target, None, unique=False)
705
706 elif isinstance(target, IntervalIndex):
707 if self.left.is_unique and self.right.is_unique:
708 # fastpath available even if we don't have self._index_as_unique
709 indexer = self._get_indexer_unique_sides(target)
710 missing = (indexer == -1).nonzero()[0]
711 else:
712 return self._get_indexer_pointwise(target)
713
714 elif is_object_dtype(target.dtype) or not self._should_partial_index(target):
715 # target might contain intervals: defer elementwise to get_loc
716 return self._get_indexer_pointwise(target)
717
718 else:
719 # Note: this case behaves differently from other Index subclasses
720 # because IntervalIndex does partial-int indexing
721 target = self._maybe_convert_i8(target)
722 indexer, missing = self._engine.get_indexer_non_unique(target.values)
723
724 return ensure_platform_int(indexer), ensure_platform_int(missing)
725
726 def _get_indexer_unique_sides(self, target: IntervalIndex) -> npt.NDArray[np.intp]:
727 """
728 _get_indexer specialized to the case where both of our sides are unique.
729 """
730 # Caller is responsible for checking
731 # `self.left.is_unique and self.right.is_unique`
732
733 left_indexer = self.left.get_indexer(target.left)
734 right_indexer = self.right.get_indexer(target.right)
735 indexer = np.where(left_indexer == right_indexer, left_indexer, -1)
736 return indexer
737
738 def _get_indexer_pointwise(
739 self, target: Index
740 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
741 """
742 pointwise implementation for get_indexer and get_indexer_non_unique.
743 """
744 indexer, missing = [], []
745 for i, key in enumerate(target):
746 try:
747 locs = self.get_loc(key)
748 if isinstance(locs, slice):
749 # Only needed for get_indexer_non_unique
750 locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp")
751 elif lib.is_integer(locs):
752 locs = np.array(locs, ndmin=1)
753 else:
754 # otherwise we have ndarray[bool]
755 locs = np.where(locs)[0]
756 except KeyError:
757 missing.append(i)
758 locs = np.array([-1])
759 except InvalidIndexError:
760 # i.e. non-scalar key e.g. a tuple.
761 # see test_append_different_columns_types_raises
762 missing.append(i)
763 locs = np.array([-1])
764
765 indexer.append(locs)
766
767 indexer = np.concatenate(indexer)
768 return ensure_platform_int(indexer), ensure_platform_int(missing)
769
770 @cache_readonly
771 def _index_as_unique(self) -> bool:
772 return not self.is_overlapping and self._engine._na_count < 2
773
774 _requires_unique_msg = (
775 "cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique"
776 )
777
778 def _convert_slice_indexer(self, key: slice, kind: str):
779 if not (key.step is None or key.step == 1):
780 # GH#31658 if label-based, we require step == 1,
781 # if positional, we disallow float start/stop
782 msg = "label-based slicing with step!=1 is not supported for IntervalIndex"
783 if kind == "loc":
784 raise ValueError(msg)
785 if kind == "getitem":
786 if not is_valid_positional_slice(key):
787 # i.e. this cannot be interpreted as a positional slice
788 raise ValueError(msg)
789
790 return super()._convert_slice_indexer(key, kind)
791
792 @cache_readonly
793 def _should_fallback_to_positional(self) -> bool:
794 # integer lookups in Series.__getitem__ are unambiguously
795 # positional in this case
796 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any],
797 # ExtensionDtype]" has no attribute "subtype"
798 return self.dtype.subtype.kind in ["m", "M"] # type: ignore[union-attr]
799
800 def _maybe_cast_slice_bound(self, label, side: str):
801 return getattr(self, side)._maybe_cast_slice_bound(label, side)
802
803 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
804 if not isinstance(dtype, IntervalDtype):
805 return False
806 common_subtype = find_common_type([self.dtype, dtype])
807 return not is_object_dtype(common_subtype)
808
809 # --------------------------------------------------------------------
810
811 @cache_readonly
812 def left(self) -> Index:
813 return Index(self._data.left, copy=False)
814
815 @cache_readonly
816 def right(self) -> Index:
817 return Index(self._data.right, copy=False)
818
819 @cache_readonly
820 def mid(self) -> Index:
821 return Index(self._data.mid, copy=False)
822
823 @property
824 def length(self) -> Index:
825 return Index(self._data.length, copy=False)
826
827 # --------------------------------------------------------------------
828 # Rendering Methods
829 # __repr__ associated methods are based on MultiIndex
830
831 def _format_with_header(self, header: list[str], na_rep: str) -> list[str]:
832 # matches base class except for whitespace padding
833 return header + list(self._format_native_types(na_rep=na_rep))
834
835 def _format_native_types(
836 self, *, na_rep: str = "NaN", quoting=None, **kwargs
837 ) -> npt.NDArray[np.object_]:
838 # GH 28210: use base method but with different default na_rep
839 return super()._format_native_types(na_rep=na_rep, quoting=quoting, **kwargs)
840
841 def _format_data(self, name=None) -> str:
842 # TODO: integrate with categorical and make generic
843 # name argument is unused here; just for compat with base / categorical
844 return f"{self._data._format_data()},{self._format_space()}"
845
846 # --------------------------------------------------------------------
847 # Set Operations
848
849 def _intersection(self, other, sort):
850 """
851 intersection specialized to the case with matching dtypes.
852 """
853 # For IntervalIndex we also know other.closed == self.closed
854 if self.left.is_unique and self.right.is_unique:
855 taken = self._intersection_unique(other)
856 elif other.left.is_unique and other.right.is_unique and self.isna().sum() <= 1:
857 # Swap other/self if other is unique and self does not have
858 # multiple NaNs
859 taken = other._intersection_unique(self)
860 else:
861 # duplicates
862 taken = self._intersection_non_unique(other)
863
864 if sort is None:
865 taken = taken.sort_values()
866
867 return taken
868
869 def _intersection_unique(self, other: IntervalIndex) -> IntervalIndex:
870 """
871 Used when the IntervalIndex does not have any common endpoint,
872 no matter left or right.
873 Return the intersection with another IntervalIndex.
874 Parameters
875 ----------
876 other : IntervalIndex
877 Returns
878 -------
879 IntervalIndex
880 """
881 # Note: this is much more performant than super()._intersection(other)
882 lindexer = self.left.get_indexer(other.left)
883 rindexer = self.right.get_indexer(other.right)
884
885 match = (lindexer == rindexer) & (lindexer != -1)
886 indexer = lindexer.take(match.nonzero()[0])
887 indexer = unique(indexer)
888
889 return self.take(indexer)
890
891 def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex:
892 """
893 Used when the IntervalIndex does have some common endpoints,
894 on either sides.
895 Return the intersection with another IntervalIndex.
896
897 Parameters
898 ----------
899 other : IntervalIndex
900
901 Returns
902 -------
903 IntervalIndex
904 """
905 # Note: this is about 3.25x faster than super()._intersection(other)
906 # in IntervalIndexMethod.time_intersection_both_duplicate(1000)
907 mask = np.zeros(len(self), dtype=bool)
908
909 if self.hasnans and other.hasnans:
910 first_nan_loc = np.arange(len(self))[self.isna()][0]
911 mask[first_nan_loc] = True
912
913 other_tups = set(zip(other.left, other.right))
914 for i, tup in enumerate(zip(self.left, self.right)):
915 if tup in other_tups:
916 mask[i] = True
917
918 return self[mask]
919
920 # --------------------------------------------------------------------
921
922 def _get_engine_target(self) -> np.ndarray:
923 # Note: we _could_ use libjoin functions by either casting to object
924 # dtype or constructing tuples (faster than constructing Intervals)
925 # but the libjoin fastpaths are no longer fast in these cases.
926 raise NotImplementedError(
927 "IntervalIndex does not use libjoin fastpaths or pass values to "
928 "IndexEngine objects"
929 )
930
931 def _from_join_target(self, result):
932 raise NotImplementedError("IntervalIndex does not use libjoin fastpaths")
933
934 # TODO: arithmetic operations
935
936
937def _is_valid_endpoint(endpoint) -> bool:
938 """
939 Helper for interval_range to check if start/end are valid types.
940 """
941 return any(
942 [
943 is_number(endpoint),
944 isinstance(endpoint, Timestamp),
945 isinstance(endpoint, Timedelta),
946 endpoint is None,
947 ]
948 )
949
950
951def _is_type_compatible(a, b) -> bool:
952 """
953 Helper for interval_range to check type compat of start/end/freq.
954 """
955 is_ts_compat = lambda x: isinstance(x, (Timestamp, BaseOffset))
956 is_td_compat = lambda x: isinstance(x, (Timedelta, BaseOffset))
957 return (
958 (is_number(a) and is_number(b))
959 or (is_ts_compat(a) and is_ts_compat(b))
960 or (is_td_compat(a) and is_td_compat(b))
961 or com.any_none(a, b)
962 )
963
964
965def interval_range(
966 start=None,
967 end=None,
968 periods=None,
969 freq=None,
970 name: Hashable = None,
971 closed: IntervalClosedType = "right",
972) -> IntervalIndex:
973 """
974 Return a fixed frequency IntervalIndex.
975
976 Parameters
977 ----------
978 start : numeric or datetime-like, default None
979 Left bound for generating intervals.
980 end : numeric or datetime-like, default None
981 Right bound for generating intervals.
982 periods : int, default None
983 Number of periods to generate.
984 freq : numeric, str, datetime.timedelta, or DateOffset, default None
985 The length of each interval. Must be consistent with the type of start
986 and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1
987 for numeric and 'D' for datetime-like.
988 name : str, default None
989 Name of the resulting IntervalIndex.
990 closed : {'left', 'right', 'both', 'neither'}, default 'right'
991 Whether the intervals are closed on the left-side, right-side, both
992 or neither.
993
994 Returns
995 -------
996 IntervalIndex
997
998 See Also
999 --------
1000 IntervalIndex : An Index of intervals that are all closed on the same side.
1001
1002 Notes
1003 -----
1004 Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
1005 exactly three must be specified. If ``freq`` is omitted, the resulting
1006 ``IntervalIndex`` will have ``periods`` linearly spaced elements between
1007 ``start`` and ``end``, inclusively.
1008
1009 To learn more about datetime-like frequency strings, please see `this link
1010 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
1011
1012 Examples
1013 --------
1014 Numeric ``start`` and ``end`` is supported.
1015
1016 >>> pd.interval_range(start=0, end=5)
1017 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
1018 dtype='interval[int64, right]')
1019
1020 Additionally, datetime-like input is also supported.
1021
1022 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
1023 ... end=pd.Timestamp('2017-01-04'))
1024 IntervalIndex([(2017-01-01, 2017-01-02], (2017-01-02, 2017-01-03],
1025 (2017-01-03, 2017-01-04]],
1026 dtype='interval[datetime64[ns], right]')
1027
1028 The ``freq`` parameter specifies the frequency between the left and right.
1029 endpoints of the individual intervals within the ``IntervalIndex``. For
1030 numeric ``start`` and ``end``, the frequency must also be numeric.
1031
1032 >>> pd.interval_range(start=0, periods=4, freq=1.5)
1033 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
1034 dtype='interval[float64, right]')
1035
1036 Similarly, for datetime-like ``start`` and ``end``, the frequency must be
1037 convertible to a DateOffset.
1038
1039 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
1040 ... periods=3, freq='MS')
1041 IntervalIndex([(2017-01-01, 2017-02-01], (2017-02-01, 2017-03-01],
1042 (2017-03-01, 2017-04-01]],
1043 dtype='interval[datetime64[ns], right]')
1044
1045 Specify ``start``, ``end``, and ``periods``; the frequency is generated
1046 automatically (linearly spaced).
1047
1048 >>> pd.interval_range(start=0, end=6, periods=4)
1049 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
1050 dtype='interval[float64, right]')
1051
1052 The ``closed`` parameter specifies which endpoints of the individual
1053 intervals within the ``IntervalIndex`` are closed.
1054
1055 >>> pd.interval_range(end=5, periods=4, closed='both')
1056 IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
1057 dtype='interval[int64, both]')
1058 """
1059 start = maybe_box_datetimelike(start)
1060 end = maybe_box_datetimelike(end)
1061 endpoint = start if start is not None else end
1062
1063 if freq is None and com.any_none(periods, start, end):
1064 freq = 1 if is_number(endpoint) else "D"
1065
1066 if com.count_not_none(start, end, periods, freq) != 3:
1067 raise ValueError(
1068 "Of the four parameters: start, end, periods, and "
1069 "freq, exactly three must be specified"
1070 )
1071
1072 if not _is_valid_endpoint(start):
1073 raise ValueError(f"start must be numeric or datetime-like, got {start}")
1074 if not _is_valid_endpoint(end):
1075 raise ValueError(f"end must be numeric or datetime-like, got {end}")
1076
1077 if is_float(periods):
1078 periods = int(periods)
1079 elif not is_integer(periods) and periods is not None:
1080 raise TypeError(f"periods must be a number, got {periods}")
1081
1082 if freq is not None and not is_number(freq):
1083 try:
1084 freq = to_offset(freq)
1085 except ValueError as err:
1086 raise ValueError(
1087 f"freq must be numeric or convertible to DateOffset, got {freq}"
1088 ) from err
1089
1090 # verify type compatibility
1091 if not all(
1092 [
1093 _is_type_compatible(start, end),
1094 _is_type_compatible(start, freq),
1095 _is_type_compatible(end, freq),
1096 ]
1097 ):
1098 raise TypeError("start, end, freq need to be type compatible")
1099
1100 # +1 to convert interval count to breaks count (n breaks = n-1 intervals)
1101 if periods is not None:
1102 periods += 1
1103
1104 breaks: np.ndarray | TimedeltaIndex | DatetimeIndex
1105
1106 if is_number(endpoint):
1107 # force consistency between start/end/freq (lower end if freq skips it)
1108 if com.all_not_none(start, end, freq):
1109 end -= (end - start) % freq
1110
1111 # compute the period/start/end if unspecified (at most one)
1112 if periods is None:
1113 periods = int((end - start) // freq) + 1
1114 elif start is None:
1115 start = end - (periods - 1) * freq
1116 elif end is None:
1117 end = start + (periods - 1) * freq
1118
1119 breaks = np.linspace(start, end, periods)
1120 if all(is_integer(x) for x in com.not_none(start, end, freq)):
1121 # np.linspace always produces float output
1122
1123 # error: Argument 1 to "maybe_downcast_numeric" has incompatible type
1124 # "Union[ndarray[Any, Any], TimedeltaIndex, DatetimeIndex]";
1125 # expected "ndarray[Any, Any]" [
1126 breaks = maybe_downcast_numeric(
1127 breaks, # type: ignore[arg-type]
1128 np.dtype("int64"),
1129 )
1130 else:
1131 # delegate to the appropriate range function
1132 if isinstance(endpoint, Timestamp):
1133 breaks = date_range(start=start, end=end, periods=periods, freq=freq)
1134 else:
1135 breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq)
1136
1137 return IntervalIndex.from_breaks(breaks, name=name, closed=closed)