1""" define the IntervalIndex """
2from __future__ import annotations
3
4from operator import (
5 le,
6 lt,
7)
8import textwrap
9from typing import (
10 TYPE_CHECKING,
11 Any,
12 Literal,
13)
14
15import numpy as np
16
17from pandas._libs import lib
18from pandas._libs.interval import (
19 Interval,
20 IntervalMixin,
21 IntervalTree,
22)
23from pandas._libs.tslibs import (
24 BaseOffset,
25 Period,
26 Timedelta,
27 Timestamp,
28 to_offset,
29)
30from pandas.errors import InvalidIndexError
31from pandas.util._decorators import (
32 Appender,
33 cache_readonly,
34)
35from pandas.util._exceptions import rewrite_exception
36
37from pandas.core.dtypes.cast import (
38 find_common_type,
39 infer_dtype_from_scalar,
40 maybe_box_datetimelike,
41 maybe_downcast_numeric,
42 maybe_upcast_numeric_to_64bit,
43)
44from pandas.core.dtypes.common import (
45 ensure_platform_int,
46 is_float_dtype,
47 is_integer,
48 is_integer_dtype,
49 is_list_like,
50 is_number,
51 is_object_dtype,
52 is_scalar,
53 pandas_dtype,
54)
55from pandas.core.dtypes.dtypes import (
56 DatetimeTZDtype,
57 IntervalDtype,
58)
59from pandas.core.dtypes.missing import is_valid_na_for_dtype
60
61from pandas.core.algorithms import unique
62from pandas.core.arrays.datetimelike import validate_periods
63from pandas.core.arrays.interval import (
64 IntervalArray,
65 _interval_shared_docs,
66)
67import pandas.core.common as com
68from pandas.core.indexers import is_valid_positional_slice
69import pandas.core.indexes.base as ibase
70from pandas.core.indexes.base import (
71 Index,
72 _index_shared_docs,
73 ensure_index,
74 maybe_extract_name,
75)
76from pandas.core.indexes.datetimes import (
77 DatetimeIndex,
78 date_range,
79)
80from pandas.core.indexes.extension import (
81 ExtensionIndex,
82 inherit_names,
83)
84from pandas.core.indexes.multi import MultiIndex
85from pandas.core.indexes.timedeltas import (
86 TimedeltaIndex,
87 timedelta_range,
88)
89
90if TYPE_CHECKING:
91 from collections.abc import Hashable
92
93 from pandas._typing import (
94 Dtype,
95 DtypeObj,
96 IntervalClosedType,
97 Self,
98 npt,
99 )
100_index_doc_kwargs = dict(ibase._index_doc_kwargs)
101
102_index_doc_kwargs.update(
103 {
104 "klass": "IntervalIndex",
105 "qualname": "IntervalIndex",
106 "target_klass": "IntervalIndex or list of Intervals",
107 "name": textwrap.dedent(
108 """\
109 name : object, optional
110 Name to be stored in the index.
111 """
112 ),
113 }
114)
115
116
117def _get_next_label(label):
118 # see test_slice_locs_with_ints_and_floats_succeeds
119 dtype = getattr(label, "dtype", type(label))
120 if isinstance(label, (Timestamp, Timedelta)):
121 dtype = "datetime64[ns]"
122 dtype = pandas_dtype(dtype)
123
124 if lib.is_np_dtype(dtype, "mM") or isinstance(dtype, DatetimeTZDtype):
125 return label + np.timedelta64(1, "ns")
126 elif is_integer_dtype(dtype):
127 return label + 1
128 elif is_float_dtype(dtype):
129 return np.nextafter(label, np.inf)
130 else:
131 raise TypeError(f"cannot determine next label for type {repr(type(label))}")
132
133
134def _get_prev_label(label):
135 # see test_slice_locs_with_ints_and_floats_succeeds
136 dtype = getattr(label, "dtype", type(label))
137 if isinstance(label, (Timestamp, Timedelta)):
138 dtype = "datetime64[ns]"
139 dtype = pandas_dtype(dtype)
140
141 if lib.is_np_dtype(dtype, "mM") or isinstance(dtype, DatetimeTZDtype):
142 return label - np.timedelta64(1, "ns")
143 elif is_integer_dtype(dtype):
144 return label - 1
145 elif is_float_dtype(dtype):
146 return np.nextafter(label, -np.inf)
147 else:
148 raise TypeError(f"cannot determine next label for type {repr(type(label))}")
149
150
151def _new_IntervalIndex(cls, d):
152 """
153 This is called upon unpickling, rather than the default which doesn't have
154 arguments and breaks __new__.
155 """
156 return cls.from_arrays(**d)
157
158
159@Appender(
160 _interval_shared_docs["class"]
161 % {
162 "klass": "IntervalIndex",
163 "summary": "Immutable index of intervals that are closed on the same side.",
164 "name": _index_doc_kwargs["name"],
165 "extra_attributes": "is_overlapping\nvalues\n",
166 "extra_methods": "",
167 "examples": textwrap.dedent(
168 """\
169 Examples
170 --------
171 A new ``IntervalIndex`` is typically constructed using
172 :func:`interval_range`:
173
174 >>> pd.interval_range(start=0, end=5)
175 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
176 dtype='interval[int64, right]')
177
178 It may also be constructed using one of the constructor
179 methods: :meth:`IntervalIndex.from_arrays`,
180 :meth:`IntervalIndex.from_breaks`, and :meth:`IntervalIndex.from_tuples`.
181
182 See further examples in the doc strings of ``interval_range`` and the
183 mentioned constructor methods.
184 """
185 ),
186 }
187)
188@inherit_names(["set_closed", "to_tuples"], IntervalArray, wrap=True)
189@inherit_names(
190 [
191 "__array__",
192 "overlaps",
193 "contains",
194 "closed_left",
195 "closed_right",
196 "open_left",
197 "open_right",
198 "is_empty",
199 ],
200 IntervalArray,
201)
202@inherit_names(["is_non_overlapping_monotonic", "closed"], IntervalArray, cache=True)
203class IntervalIndex(ExtensionIndex):
204 _typ = "intervalindex"
205
206 # annotate properties pinned via inherit_names
207 closed: IntervalClosedType
208 is_non_overlapping_monotonic: bool
209 closed_left: bool
210 closed_right: bool
211 open_left: bool
212 open_right: bool
213
214 _data: IntervalArray
215 _values: IntervalArray
216 _can_hold_strings = False
217 _data_cls = IntervalArray
218
219 # --------------------------------------------------------------------
220 # Constructors
221
222 def __new__(
223 cls,
224 data,
225 closed: IntervalClosedType | None = None,
226 dtype: Dtype | None = None,
227 copy: bool = False,
228 name: Hashable | None = None,
229 verify_integrity: bool = True,
230 ) -> Self:
231 name = maybe_extract_name(name, data, cls)
232
233 with rewrite_exception("IntervalArray", cls.__name__):
234 array = IntervalArray(
235 data,
236 closed=closed,
237 copy=copy,
238 dtype=dtype,
239 verify_integrity=verify_integrity,
240 )
241
242 return cls._simple_new(array, name)
243
244 @classmethod
245 @Appender(
246 _interval_shared_docs["from_breaks"]
247 % {
248 "klass": "IntervalIndex",
249 "name": textwrap.dedent(
250 """
251 name : str, optional
252 Name of the resulting IntervalIndex."""
253 ),
254 "examples": textwrap.dedent(
255 """\
256 Examples
257 --------
258 >>> pd.IntervalIndex.from_breaks([0, 1, 2, 3])
259 IntervalIndex([(0, 1], (1, 2], (2, 3]],
260 dtype='interval[int64, right]')
261 """
262 ),
263 }
264 )
265 def from_breaks(
266 cls,
267 breaks,
268 closed: IntervalClosedType | None = "right",
269 name: Hashable | None = None,
270 copy: bool = False,
271 dtype: Dtype | None = None,
272 ) -> IntervalIndex:
273 with rewrite_exception("IntervalArray", cls.__name__):
274 array = IntervalArray.from_breaks(
275 breaks, closed=closed, copy=copy, dtype=dtype
276 )
277 return cls._simple_new(array, name=name)
278
279 @classmethod
280 @Appender(
281 _interval_shared_docs["from_arrays"]
282 % {
283 "klass": "IntervalIndex",
284 "name": textwrap.dedent(
285 """
286 name : str, optional
287 Name of the resulting IntervalIndex."""
288 ),
289 "examples": textwrap.dedent(
290 """\
291 Examples
292 --------
293 >>> pd.IntervalIndex.from_arrays([0, 1, 2], [1, 2, 3])
294 IntervalIndex([(0, 1], (1, 2], (2, 3]],
295 dtype='interval[int64, right]')
296 """
297 ),
298 }
299 )
300 def from_arrays(
301 cls,
302 left,
303 right,
304 closed: IntervalClosedType = "right",
305 name: Hashable | None = None,
306 copy: bool = False,
307 dtype: Dtype | None = None,
308 ) -> IntervalIndex:
309 with rewrite_exception("IntervalArray", cls.__name__):
310 array = IntervalArray.from_arrays(
311 left, right, closed, copy=copy, dtype=dtype
312 )
313 return cls._simple_new(array, name=name)
314
315 @classmethod
316 @Appender(
317 _interval_shared_docs["from_tuples"]
318 % {
319 "klass": "IntervalIndex",
320 "name": textwrap.dedent(
321 """
322 name : str, optional
323 Name of the resulting IntervalIndex."""
324 ),
325 "examples": textwrap.dedent(
326 """\
327 Examples
328 --------
329 >>> pd.IntervalIndex.from_tuples([(0, 1), (1, 2)])
330 IntervalIndex([(0, 1], (1, 2]],
331 dtype='interval[int64, right]')
332 """
333 ),
334 }
335 )
336 def from_tuples(
337 cls,
338 data,
339 closed: IntervalClosedType = "right",
340 name: Hashable | None = None,
341 copy: bool = False,
342 dtype: Dtype | None = None,
343 ) -> IntervalIndex:
344 with rewrite_exception("IntervalArray", cls.__name__):
345 arr = IntervalArray.from_tuples(data, closed=closed, copy=copy, dtype=dtype)
346 return cls._simple_new(arr, name=name)
347
348 # --------------------------------------------------------------------
349 # error: Return type "IntervalTree" of "_engine" incompatible with return type
350 # "Union[IndexEngine, ExtensionEngine]" in supertype "Index"
351 @cache_readonly
352 def _engine(self) -> IntervalTree: # type: ignore[override]
353 # IntervalTree does not supports numpy array unless they are 64 bit
354 left = self._maybe_convert_i8(self.left)
355 left = maybe_upcast_numeric_to_64bit(left)
356 right = self._maybe_convert_i8(self.right)
357 right = maybe_upcast_numeric_to_64bit(right)
358 return IntervalTree(left, right, closed=self.closed)
359
360 def __contains__(self, key: Any) -> bool:
361 """
362 return a boolean if this key is IN the index
363 We *only* accept an Interval
364
365 Parameters
366 ----------
367 key : Interval
368
369 Returns
370 -------
371 bool
372 """
373 hash(key)
374 if not isinstance(key, Interval):
375 if is_valid_na_for_dtype(key, self.dtype):
376 return self.hasnans
377 return False
378
379 try:
380 self.get_loc(key)
381 return True
382 except KeyError:
383 return False
384
385 def _getitem_slice(self, slobj: slice) -> IntervalIndex:
386 """
387 Fastpath for __getitem__ when we know we have a slice.
388 """
389 res = self._data[slobj]
390 return type(self)._simple_new(res, name=self._name)
391
392 @cache_readonly
393 def _multiindex(self) -> MultiIndex:
394 return MultiIndex.from_arrays([self.left, self.right], names=["left", "right"])
395
396 def __reduce__(self):
397 d = {
398 "left": self.left,
399 "right": self.right,
400 "closed": self.closed,
401 "name": self.name,
402 }
403 return _new_IntervalIndex, (type(self), d), None
404
405 @property
406 def inferred_type(self) -> str:
407 """Return a string of the type inferred from the values"""
408 return "interval"
409
410 # Cannot determine type of "memory_usage"
411 @Appender(Index.memory_usage.__doc__) # type: ignore[has-type]
412 def memory_usage(self, deep: bool = False) -> int:
413 # we don't use an explicit engine
414 # so return the bytes here
415 return self.left.memory_usage(deep=deep) + self.right.memory_usage(deep=deep)
416
417 # IntervalTree doesn't have a is_monotonic_decreasing, so have to override
418 # the Index implementation
419 @cache_readonly
420 def is_monotonic_decreasing(self) -> bool:
421 """
422 Return True if the IntervalIndex is monotonic decreasing (only equal or
423 decreasing values), else False
424 """
425 return self[::-1].is_monotonic_increasing
426
427 @cache_readonly
428 def is_unique(self) -> bool:
429 """
430 Return True if the IntervalIndex contains unique elements, else False.
431 """
432 left = self.left
433 right = self.right
434
435 if self.isna().sum() > 1:
436 return False
437
438 if left.is_unique or right.is_unique:
439 return True
440
441 seen_pairs = set()
442 check_idx = np.where(left.duplicated(keep=False))[0]
443 for idx in check_idx:
444 pair = (left[idx], right[idx])
445 if pair in seen_pairs:
446 return False
447 seen_pairs.add(pair)
448
449 return True
450
451 @property
452 def is_overlapping(self) -> bool:
453 """
454 Return True if the IntervalIndex has overlapping intervals, else False.
455
456 Two intervals overlap if they share a common point, including closed
457 endpoints. Intervals that only have an open endpoint in common do not
458 overlap.
459
460 Returns
461 -------
462 bool
463 Boolean indicating if the IntervalIndex has overlapping intervals.
464
465 See Also
466 --------
467 Interval.overlaps : Check whether two Interval objects overlap.
468 IntervalIndex.overlaps : Check an IntervalIndex elementwise for
469 overlaps.
470
471 Examples
472 --------
473 >>> index = pd.IntervalIndex.from_tuples([(0, 2), (1, 3), (4, 5)])
474 >>> index
475 IntervalIndex([(0, 2], (1, 3], (4, 5]],
476 dtype='interval[int64, right]')
477 >>> index.is_overlapping
478 True
479
480 Intervals that share closed endpoints overlap:
481
482 >>> index = pd.interval_range(0, 3, closed='both')
483 >>> index
484 IntervalIndex([[0, 1], [1, 2], [2, 3]],
485 dtype='interval[int64, both]')
486 >>> index.is_overlapping
487 True
488
489 Intervals that only have an open endpoint in common do not overlap:
490
491 >>> index = pd.interval_range(0, 3, closed='left')
492 >>> index
493 IntervalIndex([[0, 1), [1, 2), [2, 3)],
494 dtype='interval[int64, left]')
495 >>> index.is_overlapping
496 False
497 """
498 # GH 23309
499 return self._engine.is_overlapping
500
501 def _needs_i8_conversion(self, key) -> bool:
502 """
503 Check if a given key needs i8 conversion. Conversion is necessary for
504 Timestamp, Timedelta, DatetimeIndex, and TimedeltaIndex keys. An
505 Interval-like requires conversion if its endpoints are one of the
506 aforementioned types.
507
508 Assumes that any list-like data has already been cast to an Index.
509
510 Parameters
511 ----------
512 key : scalar or Index-like
513 The key that should be checked for i8 conversion
514
515 Returns
516 -------
517 bool
518 """
519 key_dtype = getattr(key, "dtype", None)
520 if isinstance(key_dtype, IntervalDtype) or isinstance(key, Interval):
521 return self._needs_i8_conversion(key.left)
522
523 i8_types = (Timestamp, Timedelta, DatetimeIndex, TimedeltaIndex)
524 return isinstance(key, i8_types)
525
526 def _maybe_convert_i8(self, key):
527 """
528 Maybe convert a given key to its equivalent i8 value(s). Used as a
529 preprocessing step prior to IntervalTree queries (self._engine), which
530 expects numeric data.
531
532 Parameters
533 ----------
534 key : scalar or list-like
535 The key that should maybe be converted to i8.
536
537 Returns
538 -------
539 scalar or list-like
540 The original key if no conversion occurred, int if converted scalar,
541 Index with an int64 dtype if converted list-like.
542 """
543 if is_list_like(key):
544 key = ensure_index(key)
545 key = maybe_upcast_numeric_to_64bit(key)
546
547 if not self._needs_i8_conversion(key):
548 return key
549
550 scalar = is_scalar(key)
551 key_dtype = getattr(key, "dtype", None)
552 if isinstance(key_dtype, IntervalDtype) or isinstance(key, Interval):
553 # convert left/right and reconstruct
554 left = self._maybe_convert_i8(key.left)
555 right = self._maybe_convert_i8(key.right)
556 constructor = Interval if scalar else IntervalIndex.from_arrays
557 # error: "object" not callable
558 return constructor(
559 left, right, closed=self.closed
560 ) # type: ignore[operator]
561
562 if scalar:
563 # Timestamp/Timedelta
564 key_dtype, key_i8 = infer_dtype_from_scalar(key)
565 if isinstance(key, Period):
566 key_i8 = key.ordinal
567 elif isinstance(key_i8, Timestamp):
568 key_i8 = key_i8._value
569 elif isinstance(key_i8, (np.datetime64, np.timedelta64)):
570 key_i8 = key_i8.view("i8")
571 else:
572 # DatetimeIndex/TimedeltaIndex
573 key_dtype, key_i8 = key.dtype, Index(key.asi8)
574 if key.hasnans:
575 # convert NaT from its i8 value to np.nan so it's not viewed
576 # as a valid value, maybe causing errors (e.g. is_overlapping)
577 key_i8 = key_i8.where(~key._isnan)
578
579 # ensure consistency with IntervalIndex subtype
580 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any],
581 # ExtensionDtype]" has no attribute "subtype"
582 subtype = self.dtype.subtype # type: ignore[union-attr]
583
584 if subtype != key_dtype:
585 raise ValueError(
586 f"Cannot index an IntervalIndex of subtype {subtype} with "
587 f"values of dtype {key_dtype}"
588 )
589
590 return key_i8
591
592 def _searchsorted_monotonic(self, label, side: Literal["left", "right"] = "left"):
593 if not self.is_non_overlapping_monotonic:
594 raise KeyError(
595 "can only get slices from an IntervalIndex if bounds are "
596 "non-overlapping and all monotonic increasing or decreasing"
597 )
598
599 if isinstance(label, (IntervalMixin, IntervalIndex)):
600 raise NotImplementedError("Interval objects are not currently supported")
601
602 # GH 20921: "not is_monotonic_increasing" for the second condition
603 # instead of "is_monotonic_decreasing" to account for single element
604 # indexes being both increasing and decreasing
605 if (side == "left" and self.left.is_monotonic_increasing) or (
606 side == "right" and not self.left.is_monotonic_increasing
607 ):
608 sub_idx = self.right
609 if self.open_right:
610 label = _get_next_label(label)
611 else:
612 sub_idx = self.left
613 if self.open_left:
614 label = _get_prev_label(label)
615
616 return sub_idx._searchsorted_monotonic(label, side)
617
618 # --------------------------------------------------------------------
619 # Indexing Methods
620
621 def get_loc(self, key) -> int | slice | np.ndarray:
622 """
623 Get integer location, slice or boolean mask for requested label.
624
625 Parameters
626 ----------
627 key : label
628
629 Returns
630 -------
631 int if unique index, slice if monotonic index, else mask
632
633 Examples
634 --------
635 >>> i1, i2 = pd.Interval(0, 1), pd.Interval(1, 2)
636 >>> index = pd.IntervalIndex([i1, i2])
637 >>> index.get_loc(1)
638 0
639
640 You can also supply a point inside an interval.
641
642 >>> index.get_loc(1.5)
643 1
644
645 If a label is in several intervals, you get the locations of all the
646 relevant intervals.
647
648 >>> i3 = pd.Interval(0, 2)
649 >>> overlapping_index = pd.IntervalIndex([i1, i2, i3])
650 >>> overlapping_index.get_loc(0.5)
651 array([ True, False, True])
652
653 Only exact matches will be returned if an interval is provided.
654
655 >>> index.get_loc(pd.Interval(0, 1))
656 0
657 """
658 self._check_indexing_error(key)
659
660 if isinstance(key, Interval):
661 if self.closed != key.closed:
662 raise KeyError(key)
663 mask = (self.left == key.left) & (self.right == key.right)
664 elif is_valid_na_for_dtype(key, self.dtype):
665 mask = self.isna()
666 else:
667 # assume scalar
668 op_left = le if self.closed_left else lt
669 op_right = le if self.closed_right else lt
670 try:
671 mask = op_left(self.left, key) & op_right(key, self.right)
672 except TypeError as err:
673 # scalar is not comparable to II subtype --> invalid label
674 raise KeyError(key) from err
675
676 matches = mask.sum()
677 if matches == 0:
678 raise KeyError(key)
679 if matches == 1:
680 return mask.argmax()
681
682 res = lib.maybe_booleans_to_slice(mask.view("u1"))
683 if isinstance(res, slice) and res.stop is None:
684 # TODO: DO this in maybe_booleans_to_slice?
685 res = slice(res.start, len(self), res.step)
686 return res
687
688 def _get_indexer(
689 self,
690 target: Index,
691 method: str | None = None,
692 limit: int | None = None,
693 tolerance: Any | None = None,
694 ) -> npt.NDArray[np.intp]:
695 if isinstance(target, IntervalIndex):
696 # We only get here with not self.is_overlapping
697 # -> at most one match per interval in target
698 # want exact matches -> need both left/right to match, so defer to
699 # left/right get_indexer, compare elementwise, equality -> match
700 indexer = self._get_indexer_unique_sides(target)
701
702 elif not is_object_dtype(target.dtype):
703 # homogeneous scalar index: use IntervalTree
704 # we should always have self._should_partial_index(target) here
705 target = self._maybe_convert_i8(target)
706 indexer = self._engine.get_indexer(target.values)
707 else:
708 # heterogeneous scalar index: defer elementwise to get_loc
709 # we should always have self._should_partial_index(target) here
710 return self._get_indexer_pointwise(target)[0]
711
712 return ensure_platform_int(indexer)
713
714 @Appender(_index_shared_docs["get_indexer_non_unique"] % _index_doc_kwargs)
715 def get_indexer_non_unique(
716 self, target: Index
717 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
718 target = ensure_index(target)
719
720 if not self._should_compare(target) and not self._should_partial_index(target):
721 # e.g. IntervalIndex with different closed or incompatible subtype
722 # -> no matches
723 return self._get_indexer_non_comparable(target, None, unique=False)
724
725 elif isinstance(target, IntervalIndex):
726 if self.left.is_unique and self.right.is_unique:
727 # fastpath available even if we don't have self._index_as_unique
728 indexer = self._get_indexer_unique_sides(target)
729 missing = (indexer == -1).nonzero()[0]
730 else:
731 return self._get_indexer_pointwise(target)
732
733 elif is_object_dtype(target.dtype) or not self._should_partial_index(target):
734 # target might contain intervals: defer elementwise to get_loc
735 return self._get_indexer_pointwise(target)
736
737 else:
738 # Note: this case behaves differently from other Index subclasses
739 # because IntervalIndex does partial-int indexing
740 target = self._maybe_convert_i8(target)
741 indexer, missing = self._engine.get_indexer_non_unique(target.values)
742
743 return ensure_platform_int(indexer), ensure_platform_int(missing)
744
745 def _get_indexer_unique_sides(self, target: IntervalIndex) -> npt.NDArray[np.intp]:
746 """
747 _get_indexer specialized to the case where both of our sides are unique.
748 """
749 # Caller is responsible for checking
750 # `self.left.is_unique and self.right.is_unique`
751
752 left_indexer = self.left.get_indexer(target.left)
753 right_indexer = self.right.get_indexer(target.right)
754 indexer = np.where(left_indexer == right_indexer, left_indexer, -1)
755 return indexer
756
757 def _get_indexer_pointwise(
758 self, target: Index
759 ) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp]]:
760 """
761 pointwise implementation for get_indexer and get_indexer_non_unique.
762 """
763 indexer, missing = [], []
764 for i, key in enumerate(target):
765 try:
766 locs = self.get_loc(key)
767 if isinstance(locs, slice):
768 # Only needed for get_indexer_non_unique
769 locs = np.arange(locs.start, locs.stop, locs.step, dtype="intp")
770 elif lib.is_integer(locs):
771 locs = np.array(locs, ndmin=1)
772 else:
773 # otherwise we have ndarray[bool]
774 locs = np.where(locs)[0]
775 except KeyError:
776 missing.append(i)
777 locs = np.array([-1])
778 except InvalidIndexError:
779 # i.e. non-scalar key e.g. a tuple.
780 # see test_append_different_columns_types_raises
781 missing.append(i)
782 locs = np.array([-1])
783
784 indexer.append(locs)
785
786 indexer = np.concatenate(indexer)
787 return ensure_platform_int(indexer), ensure_platform_int(missing)
788
789 @cache_readonly
790 def _index_as_unique(self) -> bool:
791 return not self.is_overlapping and self._engine._na_count < 2
792
793 _requires_unique_msg = (
794 "cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique"
795 )
796
797 def _convert_slice_indexer(self, key: slice, kind: Literal["loc", "getitem"]):
798 if not (key.step is None or key.step == 1):
799 # GH#31658 if label-based, we require step == 1,
800 # if positional, we disallow float start/stop
801 msg = "label-based slicing with step!=1 is not supported for IntervalIndex"
802 if kind == "loc":
803 raise ValueError(msg)
804 if kind == "getitem":
805 if not is_valid_positional_slice(key):
806 # i.e. this cannot be interpreted as a positional slice
807 raise ValueError(msg)
808
809 return super()._convert_slice_indexer(key, kind)
810
811 @cache_readonly
812 def _should_fallback_to_positional(self) -> bool:
813 # integer lookups in Series.__getitem__ are unambiguously
814 # positional in this case
815 # error: Item "ExtensionDtype"/"dtype[Any]" of "Union[dtype[Any],
816 # ExtensionDtype]" has no attribute "subtype"
817 return self.dtype.subtype.kind in "mM" # type: ignore[union-attr]
818
819 def _maybe_cast_slice_bound(self, label, side: str):
820 return getattr(self, side)._maybe_cast_slice_bound(label, side)
821
822 def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
823 if not isinstance(dtype, IntervalDtype):
824 return False
825 common_subtype = find_common_type([self.dtype, dtype])
826 return not is_object_dtype(common_subtype)
827
828 # --------------------------------------------------------------------
829
830 @cache_readonly
831 def left(self) -> Index:
832 return Index(self._data.left, copy=False)
833
834 @cache_readonly
835 def right(self) -> Index:
836 return Index(self._data.right, copy=False)
837
838 @cache_readonly
839 def mid(self) -> Index:
840 return Index(self._data.mid, copy=False)
841
842 @property
843 def length(self) -> Index:
844 return Index(self._data.length, copy=False)
845
846 # --------------------------------------------------------------------
847 # Set Operations
848
849 def _intersection(self, other, sort):
850 """
851 intersection specialized to the case with matching dtypes.
852 """
853 # For IntervalIndex we also know other.closed == self.closed
854 if self.left.is_unique and self.right.is_unique:
855 taken = self._intersection_unique(other)
856 elif other.left.is_unique and other.right.is_unique and self.isna().sum() <= 1:
857 # Swap other/self if other is unique and self does not have
858 # multiple NaNs
859 taken = other._intersection_unique(self)
860 else:
861 # duplicates
862 taken = self._intersection_non_unique(other)
863
864 if sort is None:
865 taken = taken.sort_values()
866
867 return taken
868
869 def _intersection_unique(self, other: IntervalIndex) -> IntervalIndex:
870 """
871 Used when the IntervalIndex does not have any common endpoint,
872 no matter left or right.
873 Return the intersection with another IntervalIndex.
874 Parameters
875 ----------
876 other : IntervalIndex
877 Returns
878 -------
879 IntervalIndex
880 """
881 # Note: this is much more performant than super()._intersection(other)
882 lindexer = self.left.get_indexer(other.left)
883 rindexer = self.right.get_indexer(other.right)
884
885 match = (lindexer == rindexer) & (lindexer != -1)
886 indexer = lindexer.take(match.nonzero()[0])
887 indexer = unique(indexer)
888
889 return self.take(indexer)
890
891 def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex:
892 """
893 Used when the IntervalIndex does have some common endpoints,
894 on either sides.
895 Return the intersection with another IntervalIndex.
896
897 Parameters
898 ----------
899 other : IntervalIndex
900
901 Returns
902 -------
903 IntervalIndex
904 """
905 # Note: this is about 3.25x faster than super()._intersection(other)
906 # in IntervalIndexMethod.time_intersection_both_duplicate(1000)
907 mask = np.zeros(len(self), dtype=bool)
908
909 if self.hasnans and other.hasnans:
910 first_nan_loc = np.arange(len(self))[self.isna()][0]
911 mask[first_nan_loc] = True
912
913 other_tups = set(zip(other.left, other.right))
914 for i, tup in enumerate(zip(self.left, self.right)):
915 if tup in other_tups:
916 mask[i] = True
917
918 return self[mask]
919
920 # --------------------------------------------------------------------
921
922 def _get_engine_target(self) -> np.ndarray:
923 # Note: we _could_ use libjoin functions by either casting to object
924 # dtype or constructing tuples (faster than constructing Intervals)
925 # but the libjoin fastpaths are no longer fast in these cases.
926 raise NotImplementedError(
927 "IntervalIndex does not use libjoin fastpaths or pass values to "
928 "IndexEngine objects"
929 )
930
931 def _from_join_target(self, result):
932 raise NotImplementedError("IntervalIndex does not use libjoin fastpaths")
933
934 # TODO: arithmetic operations
935
936
937def _is_valid_endpoint(endpoint) -> bool:
938 """
939 Helper for interval_range to check if start/end are valid types.
940 """
941 return any(
942 [
943 is_number(endpoint),
944 isinstance(endpoint, Timestamp),
945 isinstance(endpoint, Timedelta),
946 endpoint is None,
947 ]
948 )
949
950
951def _is_type_compatible(a, b) -> bool:
952 """
953 Helper for interval_range to check type compat of start/end/freq.
954 """
955 is_ts_compat = lambda x: isinstance(x, (Timestamp, BaseOffset))
956 is_td_compat = lambda x: isinstance(x, (Timedelta, BaseOffset))
957 return (
958 (is_number(a) and is_number(b))
959 or (is_ts_compat(a) and is_ts_compat(b))
960 or (is_td_compat(a) and is_td_compat(b))
961 or com.any_none(a, b)
962 )
963
964
965def interval_range(
966 start=None,
967 end=None,
968 periods=None,
969 freq=None,
970 name: Hashable | None = None,
971 closed: IntervalClosedType = "right",
972) -> IntervalIndex:
973 """
974 Return a fixed frequency IntervalIndex.
975
976 Parameters
977 ----------
978 start : numeric or datetime-like, default None
979 Left bound for generating intervals.
980 end : numeric or datetime-like, default None
981 Right bound for generating intervals.
982 periods : int, default None
983 Number of periods to generate.
984 freq : numeric, str, Timedelta, datetime.timedelta, or DateOffset, default None
985 The length of each interval. Must be consistent with the type of start
986 and end, e.g. 2 for numeric, or '5H' for datetime-like. Default is 1
987 for numeric and 'D' for datetime-like.
988 name : str, default None
989 Name of the resulting IntervalIndex.
990 closed : {'left', 'right', 'both', 'neither'}, default 'right'
991 Whether the intervals are closed on the left-side, right-side, both
992 or neither.
993
994 Returns
995 -------
996 IntervalIndex
997
998 See Also
999 --------
1000 IntervalIndex : An Index of intervals that are all closed on the same side.
1001
1002 Notes
1003 -----
1004 Of the four parameters ``start``, ``end``, ``periods``, and ``freq``,
1005 exactly three must be specified. If ``freq`` is omitted, the resulting
1006 ``IntervalIndex`` will have ``periods`` linearly spaced elements between
1007 ``start`` and ``end``, inclusively.
1008
1009 To learn more about datetime-like frequency strings, please see `this link
1010 <https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases>`__.
1011
1012 Examples
1013 --------
1014 Numeric ``start`` and ``end`` is supported.
1015
1016 >>> pd.interval_range(start=0, end=5)
1017 IntervalIndex([(0, 1], (1, 2], (2, 3], (3, 4], (4, 5]],
1018 dtype='interval[int64, right]')
1019
1020 Additionally, datetime-like input is also supported.
1021
1022 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
1023 ... end=pd.Timestamp('2017-01-04'))
1024 IntervalIndex([(2017-01-01 00:00:00, 2017-01-02 00:00:00],
1025 (2017-01-02 00:00:00, 2017-01-03 00:00:00],
1026 (2017-01-03 00:00:00, 2017-01-04 00:00:00]],
1027 dtype='interval[datetime64[ns], right]')
1028
1029 The ``freq`` parameter specifies the frequency between the left and right.
1030 endpoints of the individual intervals within the ``IntervalIndex``. For
1031 numeric ``start`` and ``end``, the frequency must also be numeric.
1032
1033 >>> pd.interval_range(start=0, periods=4, freq=1.5)
1034 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
1035 dtype='interval[float64, right]')
1036
1037 Similarly, for datetime-like ``start`` and ``end``, the frequency must be
1038 convertible to a DateOffset.
1039
1040 >>> pd.interval_range(start=pd.Timestamp('2017-01-01'),
1041 ... periods=3, freq='MS')
1042 IntervalIndex([(2017-01-01 00:00:00, 2017-02-01 00:00:00],
1043 (2017-02-01 00:00:00, 2017-03-01 00:00:00],
1044 (2017-03-01 00:00:00, 2017-04-01 00:00:00]],
1045 dtype='interval[datetime64[ns], right]')
1046
1047 Specify ``start``, ``end``, and ``periods``; the frequency is generated
1048 automatically (linearly spaced).
1049
1050 >>> pd.interval_range(start=0, end=6, periods=4)
1051 IntervalIndex([(0.0, 1.5], (1.5, 3.0], (3.0, 4.5], (4.5, 6.0]],
1052 dtype='interval[float64, right]')
1053
1054 The ``closed`` parameter specifies which endpoints of the individual
1055 intervals within the ``IntervalIndex`` are closed.
1056
1057 >>> pd.interval_range(end=5, periods=4, closed='both')
1058 IntervalIndex([[1, 2], [2, 3], [3, 4], [4, 5]],
1059 dtype='interval[int64, both]')
1060 """
1061 start = maybe_box_datetimelike(start)
1062 end = maybe_box_datetimelike(end)
1063 endpoint = start if start is not None else end
1064
1065 if freq is None and com.any_none(periods, start, end):
1066 freq = 1 if is_number(endpoint) else "D"
1067
1068 if com.count_not_none(start, end, periods, freq) != 3:
1069 raise ValueError(
1070 "Of the four parameters: start, end, periods, and "
1071 "freq, exactly three must be specified"
1072 )
1073
1074 if not _is_valid_endpoint(start):
1075 raise ValueError(f"start must be numeric or datetime-like, got {start}")
1076 if not _is_valid_endpoint(end):
1077 raise ValueError(f"end must be numeric or datetime-like, got {end}")
1078
1079 periods = validate_periods(periods)
1080
1081 if freq is not None and not is_number(freq):
1082 try:
1083 freq = to_offset(freq)
1084 except ValueError as err:
1085 raise ValueError(
1086 f"freq must be numeric or convertible to DateOffset, got {freq}"
1087 ) from err
1088
1089 # verify type compatibility
1090 if not all(
1091 [
1092 _is_type_compatible(start, end),
1093 _is_type_compatible(start, freq),
1094 _is_type_compatible(end, freq),
1095 ]
1096 ):
1097 raise TypeError("start, end, freq need to be type compatible")
1098
1099 # +1 to convert interval count to breaks count (n breaks = n-1 intervals)
1100 if periods is not None:
1101 periods += 1
1102
1103 breaks: np.ndarray | TimedeltaIndex | DatetimeIndex
1104
1105 if is_number(endpoint):
1106 if com.all_not_none(start, end, freq):
1107 # 0.1 ensures we capture end
1108 breaks = np.arange(start, end + (freq * 0.1), freq)
1109 else:
1110 # compute the period/start/end if unspecified (at most one)
1111 if periods is None:
1112 periods = int((end - start) // freq) + 1
1113 elif start is None:
1114 start = end - (periods - 1) * freq
1115 elif end is None:
1116 end = start + (periods - 1) * freq
1117
1118 breaks = np.linspace(start, end, periods)
1119 if all(is_integer(x) for x in com.not_none(start, end, freq)):
1120 # np.linspace always produces float output
1121
1122 # error: Argument 1 to "maybe_downcast_numeric" has incompatible type
1123 # "Union[ndarray[Any, Any], TimedeltaIndex, DatetimeIndex]";
1124 # expected "ndarray[Any, Any]" [
1125 breaks = maybe_downcast_numeric(
1126 breaks, # type: ignore[arg-type]
1127 np.dtype("int64"),
1128 )
1129 else:
1130 # delegate to the appropriate range function
1131 if isinstance(endpoint, Timestamp):
1132 breaks = date_range(start=start, end=end, periods=periods, freq=freq)
1133 else:
1134 breaks = timedelta_range(start=start, end=end, periods=periods, freq=freq)
1135
1136 return IntervalIndex.from_breaks(breaks, name=name, closed=closed)