1from __future__ import annotations
2
3import operator
4from operator import (
5 le,
6 lt,
7)
8import textwrap
9from typing import (
10 TYPE_CHECKING,
11 Literal,
12 Union,
13 overload,
14)
15import warnings
16
17import numpy as np
18
19from pandas._libs import lib
20from pandas._libs.interval import (
21 VALID_CLOSED,
22 Interval,
23 IntervalMixin,
24 intervals_to_interval_bounds,
25)
26from pandas._libs.missing import NA
27from pandas._typing import (
28 ArrayLike,
29 AxisInt,
30 Dtype,
31 FillnaOptions,
32 IntervalClosedType,
33 NpDtype,
34 PositionalIndexer,
35 ScalarIndexer,
36 Self,
37 SequenceIndexer,
38 SortKind,
39 TimeArrayLike,
40 npt,
41)
42from pandas.compat.numpy import function as nv
43from pandas.errors import IntCastingNaNError
44from pandas.util._decorators import Appender
45
46from pandas.core.dtypes.cast import (
47 LossySetitemError,
48 maybe_upcast_numeric_to_64bit,
49)
50from pandas.core.dtypes.common import (
51 is_float_dtype,
52 is_integer_dtype,
53 is_list_like,
54 is_object_dtype,
55 is_scalar,
56 is_string_dtype,
57 needs_i8_conversion,
58 pandas_dtype,
59)
60from pandas.core.dtypes.dtypes import (
61 CategoricalDtype,
62 IntervalDtype,
63)
64from pandas.core.dtypes.generic import (
65 ABCDataFrame,
66 ABCDatetimeIndex,
67 ABCIntervalIndex,
68 ABCPeriodIndex,
69)
70from pandas.core.dtypes.missing import (
71 is_valid_na_for_dtype,
72 isna,
73 notna,
74)
75
76from pandas.core.algorithms import (
77 isin,
78 take,
79 unique,
80 value_counts_internal as value_counts,
81)
82from pandas.core.arrays import ArrowExtensionArray
83from pandas.core.arrays.base import (
84 ExtensionArray,
85 _extension_array_shared_docs,
86)
87from pandas.core.arrays.datetimes import DatetimeArray
88from pandas.core.arrays.timedeltas import TimedeltaArray
89import pandas.core.common as com
90from pandas.core.construction import (
91 array as pd_array,
92 ensure_wrapped_if_datetimelike,
93 extract_array,
94)
95from pandas.core.indexers import check_array_indexer
96from pandas.core.ops import (
97 invalid_comparison,
98 unpack_zerodim_and_defer,
99)
100
101if TYPE_CHECKING:
102 from collections.abc import (
103 Iterator,
104 Sequence,
105 )
106
107 from pandas import (
108 Index,
109 Series,
110 )
111
112
113IntervalSide = Union[TimeArrayLike, np.ndarray]
114IntervalOrNA = Union[Interval, float]
115
116_interval_shared_docs: dict[str, str] = {}
117
118_shared_docs_kwargs = {
119 "klass": "IntervalArray",
120 "qualname": "arrays.IntervalArray",
121 "name": "",
122}
123
124
125_interval_shared_docs[
126 "class"
127] = """
128%(summary)s
129
130Parameters
131----------
132data : array-like (1-dimensional)
133 Array-like (ndarray, :class:`DateTimeArray`, :class:`TimeDeltaArray`) containing
134 Interval objects from which to build the %(klass)s.
135closed : {'left', 'right', 'both', 'neither'}, default 'right'
136 Whether the intervals are closed on the left-side, right-side, both or
137 neither.
138dtype : dtype or None, default None
139 If None, dtype will be inferred.
140copy : bool, default False
141 Copy the input data.
142%(name)s\
143verify_integrity : bool, default True
144 Verify that the %(klass)s is valid.
145
146Attributes
147----------
148left
149right
150closed
151mid
152length
153is_empty
154is_non_overlapping_monotonic
155%(extra_attributes)s\
156
157Methods
158-------
159from_arrays
160from_tuples
161from_breaks
162contains
163overlaps
164set_closed
165to_tuples
166%(extra_methods)s\
167
168See Also
169--------
170Index : The base pandas Index type.
171Interval : A bounded slice-like interval; the elements of an %(klass)s.
172interval_range : Function to create a fixed frequency IntervalIndex.
173cut : Bin values into discrete Intervals.
174qcut : Bin values into equal-sized Intervals based on rank or sample quantiles.
175
176Notes
177-----
178See the `user guide
179<https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html#intervalindex>`__
180for more.
181
182%(examples)s\
183"""
184
185
186@Appender(
187 _interval_shared_docs["class"]
188 % {
189 "klass": "IntervalArray",
190 "summary": "Pandas array for interval data that are closed on the same side.",
191 "name": "",
192 "extra_attributes": "",
193 "extra_methods": "",
194 "examples": textwrap.dedent(
195 """\
196 Examples
197 --------
198 A new ``IntervalArray`` can be constructed directly from an array-like of
199 ``Interval`` objects:
200
201 >>> pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])
202 <IntervalArray>
203 [(0, 1], (1, 5]]
204 Length: 2, dtype: interval[int64, right]
205
206 It may also be constructed using one of the constructor
207 methods: :meth:`IntervalArray.from_arrays`,
208 :meth:`IntervalArray.from_breaks`, and :meth:`IntervalArray.from_tuples`.
209 """
210 ),
211 }
212)
213class IntervalArray(IntervalMixin, ExtensionArray):
214 can_hold_na = True
215 _na_value = _fill_value = np.nan
216
217 @property
218 def ndim(self) -> Literal[1]:
219 return 1
220
221 # To make mypy recognize the fields
222 _left: IntervalSide
223 _right: IntervalSide
224 _dtype: IntervalDtype
225
226 # ---------------------------------------------------------------------
227 # Constructors
228
229 def __new__(
230 cls,
231 data,
232 closed: IntervalClosedType | None = None,
233 dtype: Dtype | None = None,
234 copy: bool = False,
235 verify_integrity: bool = True,
236 ):
237 data = extract_array(data, extract_numpy=True)
238
239 if isinstance(data, cls):
240 left: IntervalSide = data._left
241 right: IntervalSide = data._right
242 closed = closed or data.closed
243 dtype = IntervalDtype(left.dtype, closed=closed)
244 else:
245 # don't allow scalars
246 if is_scalar(data):
247 msg = (
248 f"{cls.__name__}(...) must be called with a collection "
249 f"of some kind, {data} was passed"
250 )
251 raise TypeError(msg)
252
253 # might need to convert empty or purely na data
254 data = _maybe_convert_platform_interval(data)
255 left, right, infer_closed = intervals_to_interval_bounds(
256 data, validate_closed=closed is None
257 )
258 if left.dtype == object:
259 left = lib.maybe_convert_objects(left)
260 right = lib.maybe_convert_objects(right)
261 closed = closed or infer_closed
262
263 left, right, dtype = cls._ensure_simple_new_inputs(
264 left,
265 right,
266 closed=closed,
267 copy=copy,
268 dtype=dtype,
269 )
270
271 if verify_integrity:
272 cls._validate(left, right, dtype=dtype)
273
274 return cls._simple_new(
275 left,
276 right,
277 dtype=dtype,
278 )
279
280 @classmethod
281 def _simple_new(
282 cls,
283 left: IntervalSide,
284 right: IntervalSide,
285 dtype: IntervalDtype,
286 ) -> Self:
287 result = IntervalMixin.__new__(cls)
288 result._left = left
289 result._right = right
290 result._dtype = dtype
291
292 return result
293
294 @classmethod
295 def _ensure_simple_new_inputs(
296 cls,
297 left,
298 right,
299 closed: IntervalClosedType | None = None,
300 copy: bool = False,
301 dtype: Dtype | None = None,
302 ) -> tuple[IntervalSide, IntervalSide, IntervalDtype]:
303 """Ensure correctness of input parameters for cls._simple_new."""
304 from pandas.core.indexes.base import ensure_index
305
306 left = ensure_index(left, copy=copy)
307 left = maybe_upcast_numeric_to_64bit(left)
308
309 right = ensure_index(right, copy=copy)
310 right = maybe_upcast_numeric_to_64bit(right)
311
312 if closed is None and isinstance(dtype, IntervalDtype):
313 closed = dtype.closed
314
315 closed = closed or "right"
316
317 if dtype is not None:
318 # GH 19262: dtype must be an IntervalDtype to override inferred
319 dtype = pandas_dtype(dtype)
320 if isinstance(dtype, IntervalDtype):
321 if dtype.subtype is not None:
322 left = left.astype(dtype.subtype)
323 right = right.astype(dtype.subtype)
324 else:
325 msg = f"dtype must be an IntervalDtype, got {dtype}"
326 raise TypeError(msg)
327
328 if dtype.closed is None:
329 # possibly loading an old pickle
330 dtype = IntervalDtype(dtype.subtype, closed)
331 elif closed != dtype.closed:
332 raise ValueError("closed keyword does not match dtype.closed")
333
334 # coerce dtypes to match if needed
335 if is_float_dtype(left.dtype) and is_integer_dtype(right.dtype):
336 right = right.astype(left.dtype)
337 elif is_float_dtype(right.dtype) and is_integer_dtype(left.dtype):
338 left = left.astype(right.dtype)
339
340 if type(left) != type(right):
341 msg = (
342 f"must not have differing left [{type(left).__name__}] and "
343 f"right [{type(right).__name__}] types"
344 )
345 raise ValueError(msg)
346 if isinstance(left.dtype, CategoricalDtype) or is_string_dtype(left.dtype):
347 # GH 19016
348 msg = (
349 "category, object, and string subtypes are not supported "
350 "for IntervalArray"
351 )
352 raise TypeError(msg)
353 if isinstance(left, ABCPeriodIndex):
354 msg = "Period dtypes are not supported, use a PeriodIndex instead"
355 raise ValueError(msg)
356 if isinstance(left, ABCDatetimeIndex) and str(left.tz) != str(right.tz):
357 msg = (
358 "left and right must have the same time zone, got "
359 f"'{left.tz}' and '{right.tz}'"
360 )
361 raise ValueError(msg)
362 elif needs_i8_conversion(left.dtype) and left.unit != right.unit:
363 # e.g. m8[s] vs m8[ms], try to cast to a common dtype GH#55714
364 left_arr, right_arr = left._data._ensure_matching_resos(right._data)
365 left = ensure_index(left_arr)
366 right = ensure_index(right_arr)
367
368 # For dt64/td64 we want DatetimeArray/TimedeltaArray instead of ndarray
369 left = ensure_wrapped_if_datetimelike(left)
370 left = extract_array(left, extract_numpy=True)
371 right = ensure_wrapped_if_datetimelike(right)
372 right = extract_array(right, extract_numpy=True)
373
374 if isinstance(left, ArrowExtensionArray) or isinstance(
375 right, ArrowExtensionArray
376 ):
377 pass
378 else:
379 lbase = getattr(left, "_ndarray", left)
380 lbase = getattr(lbase, "_data", lbase).base
381 rbase = getattr(right, "_ndarray", right)
382 rbase = getattr(rbase, "_data", rbase).base
383 if lbase is not None and lbase is rbase:
384 # If these share data, then setitem could corrupt our IA
385 right = right.copy()
386
387 dtype = IntervalDtype(left.dtype, closed=closed)
388
389 return left, right, dtype
390
391 @classmethod
392 def _from_sequence(
393 cls,
394 scalars,
395 *,
396 dtype: Dtype | None = None,
397 copy: bool = False,
398 ) -> Self:
399 return cls(scalars, dtype=dtype, copy=copy)
400
401 @classmethod
402 def _from_factorized(cls, values: np.ndarray, original: IntervalArray) -> Self:
403 return cls._from_sequence(values, dtype=original.dtype)
404
405 _interval_shared_docs["from_breaks"] = textwrap.dedent(
406 """
407 Construct an %(klass)s from an array of splits.
408
409 Parameters
410 ----------
411 breaks : array-like (1-dimensional)
412 Left and right bounds for each interval.
413 closed : {'left', 'right', 'both', 'neither'}, default 'right'
414 Whether the intervals are closed on the left-side, right-side, both
415 or neither.\
416 %(name)s
417 copy : bool, default False
418 Copy the data.
419 dtype : dtype or None, default None
420 If None, dtype will be inferred.
421
422 Returns
423 -------
424 %(klass)s
425
426 See Also
427 --------
428 interval_range : Function to create a fixed frequency IntervalIndex.
429 %(klass)s.from_arrays : Construct from a left and right array.
430 %(klass)s.from_tuples : Construct from a sequence of tuples.
431
432 %(examples)s\
433 """
434 )
435
436 @classmethod
437 @Appender(
438 _interval_shared_docs["from_breaks"]
439 % {
440 "klass": "IntervalArray",
441 "name": "",
442 "examples": textwrap.dedent(
443 """\
444 Examples
445 --------
446 >>> pd.arrays.IntervalArray.from_breaks([0, 1, 2, 3])
447 <IntervalArray>
448 [(0, 1], (1, 2], (2, 3]]
449 Length: 3, dtype: interval[int64, right]
450 """
451 ),
452 }
453 )
454 def from_breaks(
455 cls,
456 breaks,
457 closed: IntervalClosedType | None = "right",
458 copy: bool = False,
459 dtype: Dtype | None = None,
460 ) -> Self:
461 breaks = _maybe_convert_platform_interval(breaks)
462
463 return cls.from_arrays(breaks[:-1], breaks[1:], closed, copy=copy, dtype=dtype)
464
465 _interval_shared_docs["from_arrays"] = textwrap.dedent(
466 """
467 Construct from two arrays defining the left and right bounds.
468
469 Parameters
470 ----------
471 left : array-like (1-dimensional)
472 Left bounds for each interval.
473 right : array-like (1-dimensional)
474 Right bounds for each interval.
475 closed : {'left', 'right', 'both', 'neither'}, default 'right'
476 Whether the intervals are closed on the left-side, right-side, both
477 or neither.\
478 %(name)s
479 copy : bool, default False
480 Copy the data.
481 dtype : dtype, optional
482 If None, dtype will be inferred.
483
484 Returns
485 -------
486 %(klass)s
487
488 Raises
489 ------
490 ValueError
491 When a value is missing in only one of `left` or `right`.
492 When a value in `left` is greater than the corresponding value
493 in `right`.
494
495 See Also
496 --------
497 interval_range : Function to create a fixed frequency IntervalIndex.
498 %(klass)s.from_breaks : Construct an %(klass)s from an array of
499 splits.
500 %(klass)s.from_tuples : Construct an %(klass)s from an
501 array-like of tuples.
502
503 Notes
504 -----
505 Each element of `left` must be less than or equal to the `right`
506 element at the same position. If an element is missing, it must be
507 missing in both `left` and `right`. A TypeError is raised when
508 using an unsupported type for `left` or `right`. At the moment,
509 'category', 'object', and 'string' subtypes are not supported.
510
511 %(examples)s\
512 """
513 )
514
515 @classmethod
516 @Appender(
517 _interval_shared_docs["from_arrays"]
518 % {
519 "klass": "IntervalArray",
520 "name": "",
521 "examples": textwrap.dedent(
522 """\
523 Examples
524 --------
525 >>> pd.arrays.IntervalArray.from_arrays([0, 1, 2], [1, 2, 3])
526 <IntervalArray>
527 [(0, 1], (1, 2], (2, 3]]
528 Length: 3, dtype: interval[int64, right]
529 """
530 ),
531 }
532 )
533 def from_arrays(
534 cls,
535 left,
536 right,
537 closed: IntervalClosedType | None = "right",
538 copy: bool = False,
539 dtype: Dtype | None = None,
540 ) -> Self:
541 left = _maybe_convert_platform_interval(left)
542 right = _maybe_convert_platform_interval(right)
543
544 left, right, dtype = cls._ensure_simple_new_inputs(
545 left,
546 right,
547 closed=closed,
548 copy=copy,
549 dtype=dtype,
550 )
551 cls._validate(left, right, dtype=dtype)
552
553 return cls._simple_new(left, right, dtype=dtype)
554
555 _interval_shared_docs["from_tuples"] = textwrap.dedent(
556 """
557 Construct an %(klass)s from an array-like of tuples.
558
559 Parameters
560 ----------
561 data : array-like (1-dimensional)
562 Array of tuples.
563 closed : {'left', 'right', 'both', 'neither'}, default 'right'
564 Whether the intervals are closed on the left-side, right-side, both
565 or neither.\
566 %(name)s
567 copy : bool, default False
568 By-default copy the data, this is compat only and ignored.
569 dtype : dtype or None, default None
570 If None, dtype will be inferred.
571
572 Returns
573 -------
574 %(klass)s
575
576 See Also
577 --------
578 interval_range : Function to create a fixed frequency IntervalIndex.
579 %(klass)s.from_arrays : Construct an %(klass)s from a left and
580 right array.
581 %(klass)s.from_breaks : Construct an %(klass)s from an array of
582 splits.
583
584 %(examples)s\
585 """
586 )
587
588 @classmethod
589 @Appender(
590 _interval_shared_docs["from_tuples"]
591 % {
592 "klass": "IntervalArray",
593 "name": "",
594 "examples": textwrap.dedent(
595 """\
596 Examples
597 --------
598 >>> pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])
599 <IntervalArray>
600 [(0, 1], (1, 2]]
601 Length: 2, dtype: interval[int64, right]
602 """
603 ),
604 }
605 )
606 def from_tuples(
607 cls,
608 data,
609 closed: IntervalClosedType | None = "right",
610 copy: bool = False,
611 dtype: Dtype | None = None,
612 ) -> Self:
613 if len(data):
614 left, right = [], []
615 else:
616 # ensure that empty data keeps input dtype
617 left = right = data
618
619 for d in data:
620 if not isinstance(d, tuple) and isna(d):
621 lhs = rhs = np.nan
622 else:
623 name = cls.__name__
624 try:
625 # need list of length 2 tuples, e.g. [(0, 1), (1, 2), ...]
626 lhs, rhs = d
627 except ValueError as err:
628 msg = f"{name}.from_tuples requires tuples of length 2, got {d}"
629 raise ValueError(msg) from err
630 except TypeError as err:
631 msg = f"{name}.from_tuples received an invalid item, {d}"
632 raise TypeError(msg) from err
633 left.append(lhs)
634 right.append(rhs)
635
636 return cls.from_arrays(left, right, closed, copy=False, dtype=dtype)
637
638 @classmethod
639 def _validate(cls, left, right, dtype: IntervalDtype) -> None:
640 """
641 Verify that the IntervalArray is valid.
642
643 Checks that
644
645 * dtype is correct
646 * left and right match lengths
647 * left and right have the same missing values
648 * left is always below right
649 """
650 if not isinstance(dtype, IntervalDtype):
651 msg = f"invalid dtype: {dtype}"
652 raise ValueError(msg)
653 if len(left) != len(right):
654 msg = "left and right must have the same length"
655 raise ValueError(msg)
656 left_mask = notna(left)
657 right_mask = notna(right)
658 if not (left_mask == right_mask).all():
659 msg = (
660 "missing values must be missing in the same "
661 "location both left and right sides"
662 )
663 raise ValueError(msg)
664 if not (left[left_mask] <= right[left_mask]).all():
665 msg = "left side of interval must be <= right side"
666 raise ValueError(msg)
667
668 def _shallow_copy(self, left, right) -> Self:
669 """
670 Return a new IntervalArray with the replacement attributes
671
672 Parameters
673 ----------
674 left : Index
675 Values to be used for the left-side of the intervals.
676 right : Index
677 Values to be used for the right-side of the intervals.
678 """
679 dtype = IntervalDtype(left.dtype, closed=self.closed)
680 left, right, dtype = self._ensure_simple_new_inputs(left, right, dtype=dtype)
681
682 return self._simple_new(left, right, dtype=dtype)
683
684 # ---------------------------------------------------------------------
685 # Descriptive
686
687 @property
688 def dtype(self) -> IntervalDtype:
689 return self._dtype
690
691 @property
692 def nbytes(self) -> int:
693 return self.left.nbytes + self.right.nbytes
694
695 @property
696 def size(self) -> int:
697 # Avoid materializing self.values
698 return self.left.size
699
700 # ---------------------------------------------------------------------
701 # EA Interface
702
703 def __iter__(self) -> Iterator:
704 return iter(np.asarray(self))
705
706 def __len__(self) -> int:
707 return len(self._left)
708
709 @overload
710 def __getitem__(self, key: ScalarIndexer) -> IntervalOrNA:
711 ...
712
713 @overload
714 def __getitem__(self, key: SequenceIndexer) -> Self:
715 ...
716
717 def __getitem__(self, key: PositionalIndexer) -> Self | IntervalOrNA:
718 key = check_array_indexer(self, key)
719 left = self._left[key]
720 right = self._right[key]
721
722 if not isinstance(left, (np.ndarray, ExtensionArray)):
723 # scalar
724 if is_scalar(left) and isna(left):
725 return self._fill_value
726 return Interval(left, right, self.closed)
727 if np.ndim(left) > 1:
728 # GH#30588 multi-dimensional indexer disallowed
729 raise ValueError("multi-dimensional indexing not allowed")
730 # Argument 2 to "_simple_new" of "IntervalArray" has incompatible type
731 # "Union[Period, Timestamp, Timedelta, NaTType, DatetimeArray, TimedeltaArray,
732 # ndarray[Any, Any]]"; expected "Union[Union[DatetimeArray, TimedeltaArray],
733 # ndarray[Any, Any]]"
734 return self._simple_new(left, right, dtype=self.dtype) # type: ignore[arg-type]
735
736 def __setitem__(self, key, value) -> None:
737 value_left, value_right = self._validate_setitem_value(value)
738 key = check_array_indexer(self, key)
739
740 self._left[key] = value_left
741 self._right[key] = value_right
742
743 def _cmp_method(self, other, op):
744 # ensure pandas array for list-like and eliminate non-interval scalars
745 if is_list_like(other):
746 if len(self) != len(other):
747 raise ValueError("Lengths must match to compare")
748 other = pd_array(other)
749 elif not isinstance(other, Interval):
750 # non-interval scalar -> no matches
751 if other is NA:
752 # GH#31882
753 from pandas.core.arrays import BooleanArray
754
755 arr = np.empty(self.shape, dtype=bool)
756 mask = np.ones(self.shape, dtype=bool)
757 return BooleanArray(arr, mask)
758 return invalid_comparison(self, other, op)
759
760 # determine the dtype of the elements we want to compare
761 if isinstance(other, Interval):
762 other_dtype = pandas_dtype("interval")
763 elif not isinstance(other.dtype, CategoricalDtype):
764 other_dtype = other.dtype
765 else:
766 # for categorical defer to categories for dtype
767 other_dtype = other.categories.dtype
768
769 # extract intervals if we have interval categories with matching closed
770 if isinstance(other_dtype, IntervalDtype):
771 if self.closed != other.categories.closed:
772 return invalid_comparison(self, other, op)
773
774 other = other.categories._values.take(
775 other.codes, allow_fill=True, fill_value=other.categories._na_value
776 )
777
778 # interval-like -> need same closed and matching endpoints
779 if isinstance(other_dtype, IntervalDtype):
780 if self.closed != other.closed:
781 return invalid_comparison(self, other, op)
782 elif not isinstance(other, Interval):
783 other = type(self)(other)
784
785 if op is operator.eq:
786 return (self._left == other.left) & (self._right == other.right)
787 elif op is operator.ne:
788 return (self._left != other.left) | (self._right != other.right)
789 elif op is operator.gt:
790 return (self._left > other.left) | (
791 (self._left == other.left) & (self._right > other.right)
792 )
793 elif op is operator.ge:
794 return (self == other) | (self > other)
795 elif op is operator.lt:
796 return (self._left < other.left) | (
797 (self._left == other.left) & (self._right < other.right)
798 )
799 else:
800 # operator.lt
801 return (self == other) | (self < other)
802
803 # non-interval/non-object dtype -> no matches
804 if not is_object_dtype(other_dtype):
805 return invalid_comparison(self, other, op)
806
807 # object dtype -> iteratively check for intervals
808 result = np.zeros(len(self), dtype=bool)
809 for i, obj in enumerate(other):
810 try:
811 result[i] = op(self[i], obj)
812 except TypeError:
813 if obj is NA:
814 # comparison with np.nan returns NA
815 # github.com/pandas-dev/pandas/pull/37124#discussion_r509095092
816 result = result.astype(object)
817 result[i] = NA
818 else:
819 raise
820 return result
821
822 @unpack_zerodim_and_defer("__eq__")
823 def __eq__(self, other):
824 return self._cmp_method(other, operator.eq)
825
826 @unpack_zerodim_and_defer("__ne__")
827 def __ne__(self, other):
828 return self._cmp_method(other, operator.ne)
829
830 @unpack_zerodim_and_defer("__gt__")
831 def __gt__(self, other):
832 return self._cmp_method(other, operator.gt)
833
834 @unpack_zerodim_and_defer("__ge__")
835 def __ge__(self, other):
836 return self._cmp_method(other, operator.ge)
837
838 @unpack_zerodim_and_defer("__lt__")
839 def __lt__(self, other):
840 return self._cmp_method(other, operator.lt)
841
842 @unpack_zerodim_and_defer("__le__")
843 def __le__(self, other):
844 return self._cmp_method(other, operator.le)
845
846 def argsort(
847 self,
848 *,
849 ascending: bool = True,
850 kind: SortKind = "quicksort",
851 na_position: str = "last",
852 **kwargs,
853 ) -> np.ndarray:
854 ascending = nv.validate_argsort_with_ascending(ascending, (), kwargs)
855
856 if ascending and kind == "quicksort" and na_position == "last":
857 # TODO: in an IntervalIndex we can reuse the cached
858 # IntervalTree.left_sorter
859 return np.lexsort((self.right, self.left))
860
861 # TODO: other cases we can use lexsort for? much more performant.
862 return super().argsort(
863 ascending=ascending, kind=kind, na_position=na_position, **kwargs
864 )
865
866 def min(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA:
867 nv.validate_minmax_axis(axis, self.ndim)
868
869 if not len(self):
870 return self._na_value
871
872 mask = self.isna()
873 if mask.any():
874 if not skipna:
875 return self._na_value
876 obj = self[~mask]
877 else:
878 obj = self
879
880 indexer = obj.argsort()[0]
881 return obj[indexer]
882
883 def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOrNA:
884 nv.validate_minmax_axis(axis, self.ndim)
885
886 if not len(self):
887 return self._na_value
888
889 mask = self.isna()
890 if mask.any():
891 if not skipna:
892 return self._na_value
893 obj = self[~mask]
894 else:
895 obj = self
896
897 indexer = obj.argsort()[-1]
898 return obj[indexer]
899
900 def _pad_or_backfill( # pylint: disable=useless-parent-delegation
901 self,
902 *,
903 method: FillnaOptions,
904 limit: int | None = None,
905 limit_area: Literal["inside", "outside"] | None = None,
906 copy: bool = True,
907 ) -> Self:
908 # TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove
909 # this method entirely.
910 return super()._pad_or_backfill(
911 method=method, limit=limit, limit_area=limit_area, copy=copy
912 )
913
914 def fillna(
915 self, value=None, method=None, limit: int | None = None, copy: bool = True
916 ) -> Self:
917 """
918 Fill NA/NaN values using the specified method.
919
920 Parameters
921 ----------
922 value : scalar, dict, Series
923 If a scalar value is passed it is used to fill all missing values.
924 Alternatively, a Series or dict can be used to fill in different
925 values for each index. The value should not be a list. The
926 value(s) passed should be either Interval objects or NA/NaN.
927 method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
928 (Not implemented yet for IntervalArray)
929 Method to use for filling holes in reindexed Series
930 limit : int, default None
931 (Not implemented yet for IntervalArray)
932 If method is specified, this is the maximum number of consecutive
933 NaN values to forward/backward fill. In other words, if there is
934 a gap with more than this number of consecutive NaNs, it will only
935 be partially filled. If method is not specified, this is the
936 maximum number of entries along the entire axis where NaNs will be
937 filled.
938 copy : bool, default True
939 Whether to make a copy of the data before filling. If False, then
940 the original should be modified and no new memory should be allocated.
941 For ExtensionArray subclasses that cannot do this, it is at the
942 author's discretion whether to ignore "copy=False" or to raise.
943
944 Returns
945 -------
946 filled : IntervalArray with NA/NaN filled
947 """
948 if copy is False:
949 raise NotImplementedError
950 if method is not None:
951 return super().fillna(value=value, method=method, limit=limit)
952
953 value_left, value_right = self._validate_scalar(value)
954
955 left = self.left.fillna(value=value_left)
956 right = self.right.fillna(value=value_right)
957 return self._shallow_copy(left, right)
958
959 def astype(self, dtype, copy: bool = True):
960 """
961 Cast to an ExtensionArray or NumPy array with dtype 'dtype'.
962
963 Parameters
964 ----------
965 dtype : str or dtype
966 Typecode or data-type to which the array is cast.
967
968 copy : bool, default True
969 Whether to copy the data, even if not necessary. If False,
970 a copy is made only if the old dtype does not match the
971 new dtype.
972
973 Returns
974 -------
975 array : ExtensionArray or ndarray
976 ExtensionArray or NumPy ndarray with 'dtype' for its dtype.
977 """
978 from pandas import Index
979
980 if dtype is not None:
981 dtype = pandas_dtype(dtype)
982
983 if isinstance(dtype, IntervalDtype):
984 if dtype == self.dtype:
985 return self.copy() if copy else self
986
987 if is_float_dtype(self.dtype.subtype) and needs_i8_conversion(
988 dtype.subtype
989 ):
990 # This is allowed on the Index.astype but we disallow it here
991 msg = (
992 f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible"
993 )
994 raise TypeError(msg)
995
996 # need to cast to different subtype
997 try:
998 # We need to use Index rules for astype to prevent casting
999 # np.nan entries to int subtypes
1000 new_left = Index(self._left, copy=False).astype(dtype.subtype)
1001 new_right = Index(self._right, copy=False).astype(dtype.subtype)
1002 except IntCastingNaNError:
1003 # e.g test_subtype_integer
1004 raise
1005 except (TypeError, ValueError) as err:
1006 # e.g. test_subtype_integer_errors f8->u8 can be lossy
1007 # and raises ValueError
1008 msg = (
1009 f"Cannot convert {self.dtype} to {dtype}; subtypes are incompatible"
1010 )
1011 raise TypeError(msg) from err
1012 return self._shallow_copy(new_left, new_right)
1013 else:
1014 try:
1015 return super().astype(dtype, copy=copy)
1016 except (TypeError, ValueError) as err:
1017 msg = f"Cannot cast {type(self).__name__} to dtype {dtype}"
1018 raise TypeError(msg) from err
1019
1020 def equals(self, other) -> bool:
1021 if type(self) != type(other):
1022 return False
1023
1024 return bool(
1025 self.closed == other.closed
1026 and self.left.equals(other.left)
1027 and self.right.equals(other.right)
1028 )
1029
1030 @classmethod
1031 def _concat_same_type(cls, to_concat: Sequence[IntervalArray]) -> Self:
1032 """
1033 Concatenate multiple IntervalArray
1034
1035 Parameters
1036 ----------
1037 to_concat : sequence of IntervalArray
1038
1039 Returns
1040 -------
1041 IntervalArray
1042 """
1043 closed_set = {interval.closed for interval in to_concat}
1044 if len(closed_set) != 1:
1045 raise ValueError("Intervals must all be closed on the same side.")
1046 closed = closed_set.pop()
1047
1048 left: IntervalSide = np.concatenate([interval.left for interval in to_concat])
1049 right: IntervalSide = np.concatenate([interval.right for interval in to_concat])
1050
1051 left, right, dtype = cls._ensure_simple_new_inputs(left, right, closed=closed)
1052
1053 return cls._simple_new(left, right, dtype=dtype)
1054
1055 def copy(self) -> Self:
1056 """
1057 Return a copy of the array.
1058
1059 Returns
1060 -------
1061 IntervalArray
1062 """
1063 left = self._left.copy()
1064 right = self._right.copy()
1065 dtype = self.dtype
1066 return self._simple_new(left, right, dtype=dtype)
1067
1068 def isna(self) -> np.ndarray:
1069 return isna(self._left)
1070
1071 def shift(self, periods: int = 1, fill_value: object = None) -> IntervalArray:
1072 if not len(self) or periods == 0:
1073 return self.copy()
1074
1075 self._validate_scalar(fill_value)
1076
1077 # ExtensionArray.shift doesn't work for two reasons
1078 # 1. IntervalArray.dtype.na_value may not be correct for the dtype.
1079 # 2. IntervalArray._from_sequence only accepts NaN for missing values,
1080 # not other values like NaT
1081
1082 empty_len = min(abs(periods), len(self))
1083 if isna(fill_value):
1084 from pandas import Index
1085
1086 fill_value = Index(self._left, copy=False)._na_value
1087 empty = IntervalArray.from_breaks([fill_value] * (empty_len + 1))
1088 else:
1089 empty = self._from_sequence([fill_value] * empty_len, dtype=self.dtype)
1090
1091 if periods > 0:
1092 a = empty
1093 b = self[:-periods]
1094 else:
1095 a = self[abs(periods) :]
1096 b = empty
1097 return self._concat_same_type([a, b])
1098
1099 def take(
1100 self,
1101 indices,
1102 *,
1103 allow_fill: bool = False,
1104 fill_value=None,
1105 axis=None,
1106 **kwargs,
1107 ) -> Self:
1108 """
1109 Take elements from the IntervalArray.
1110
1111 Parameters
1112 ----------
1113 indices : sequence of integers
1114 Indices to be taken.
1115
1116 allow_fill : bool, default False
1117 How to handle negative values in `indices`.
1118
1119 * False: negative values in `indices` indicate positional indices
1120 from the right (the default). This is similar to
1121 :func:`numpy.take`.
1122
1123 * True: negative values in `indices` indicate
1124 missing values. These values are set to `fill_value`. Any other
1125 other negative values raise a ``ValueError``.
1126
1127 fill_value : Interval or NA, optional
1128 Fill value to use for NA-indices when `allow_fill` is True.
1129 This may be ``None``, in which case the default NA value for
1130 the type, ``self.dtype.na_value``, is used.
1131
1132 For many ExtensionArrays, there will be two representations of
1133 `fill_value`: a user-facing "boxed" scalar, and a low-level
1134 physical NA value. `fill_value` should be the user-facing version,
1135 and the implementation should handle translating that to the
1136 physical version for processing the take if necessary.
1137
1138 axis : any, default None
1139 Present for compat with IntervalIndex; does nothing.
1140
1141 Returns
1142 -------
1143 IntervalArray
1144
1145 Raises
1146 ------
1147 IndexError
1148 When the indices are out of bounds for the array.
1149 ValueError
1150 When `indices` contains negative values other than ``-1``
1151 and `allow_fill` is True.
1152 """
1153 nv.validate_take((), kwargs)
1154
1155 fill_left = fill_right = fill_value
1156 if allow_fill:
1157 fill_left, fill_right = self._validate_scalar(fill_value)
1158
1159 left_take = take(
1160 self._left, indices, allow_fill=allow_fill, fill_value=fill_left
1161 )
1162 right_take = take(
1163 self._right, indices, allow_fill=allow_fill, fill_value=fill_right
1164 )
1165
1166 return self._shallow_copy(left_take, right_take)
1167
1168 def _validate_listlike(self, value):
1169 # list-like of intervals
1170 try:
1171 array = IntervalArray(value)
1172 self._check_closed_matches(array, name="value")
1173 value_left, value_right = array.left, array.right
1174 except TypeError as err:
1175 # wrong type: not interval or NA
1176 msg = f"'value' should be an interval type, got {type(value)} instead."
1177 raise TypeError(msg) from err
1178
1179 try:
1180 self.left._validate_fill_value(value_left)
1181 except (LossySetitemError, TypeError) as err:
1182 msg = (
1183 "'value' should be a compatible interval type, "
1184 f"got {type(value)} instead."
1185 )
1186 raise TypeError(msg) from err
1187
1188 return value_left, value_right
1189
1190 def _validate_scalar(self, value):
1191 if isinstance(value, Interval):
1192 self._check_closed_matches(value, name="value")
1193 left, right = value.left, value.right
1194 # TODO: check subdtype match like _validate_setitem_value?
1195 elif is_valid_na_for_dtype(value, self.left.dtype):
1196 # GH#18295
1197 left = right = self.left._na_value
1198 else:
1199 raise TypeError(
1200 "can only insert Interval objects and NA into an IntervalArray"
1201 )
1202 return left, right
1203
1204 def _validate_setitem_value(self, value):
1205 if is_valid_na_for_dtype(value, self.left.dtype):
1206 # na value: need special casing to set directly on numpy arrays
1207 value = self.left._na_value
1208 if is_integer_dtype(self.dtype.subtype):
1209 # can't set NaN on a numpy integer array
1210 # GH#45484 TypeError, not ValueError, matches what we get with
1211 # non-NA un-holdable value.
1212 raise TypeError("Cannot set float NaN to integer-backed IntervalArray")
1213 value_left, value_right = value, value
1214
1215 elif isinstance(value, Interval):
1216 # scalar interval
1217 self._check_closed_matches(value, name="value")
1218 value_left, value_right = value.left, value.right
1219 self.left._validate_fill_value(value_left)
1220 self.left._validate_fill_value(value_right)
1221
1222 else:
1223 return self._validate_listlike(value)
1224
1225 return value_left, value_right
1226
1227 def value_counts(self, dropna: bool = True) -> Series:
1228 """
1229 Returns a Series containing counts of each interval.
1230
1231 Parameters
1232 ----------
1233 dropna : bool, default True
1234 Don't include counts of NaN.
1235
1236 Returns
1237 -------
1238 counts : Series
1239
1240 See Also
1241 --------
1242 Series.value_counts
1243 """
1244 # TODO: implement this is a non-naive way!
1245 with warnings.catch_warnings():
1246 warnings.filterwarnings(
1247 "ignore",
1248 "The behavior of value_counts with object-dtype is deprecated",
1249 category=FutureWarning,
1250 )
1251 result = value_counts(np.asarray(self), dropna=dropna)
1252 # Once the deprecation is enforced, we will need to do
1253 # `result.index = result.index.astype(self.dtype)`
1254 return result
1255
1256 # ---------------------------------------------------------------------
1257 # Rendering Methods
1258
1259 def _formatter(self, boxed: bool = False):
1260 # returning 'str' here causes us to render as e.g. "(0, 1]" instead of
1261 # "Interval(0, 1, closed='right')"
1262 return str
1263
1264 # ---------------------------------------------------------------------
1265 # Vectorized Interval Properties/Attributes
1266
1267 @property
1268 def left(self) -> Index:
1269 """
1270 Return the left endpoints of each Interval in the IntervalArray as an Index.
1271
1272 Examples
1273 --------
1274
1275 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(2, 5)])
1276 >>> interv_arr
1277 <IntervalArray>
1278 [(0, 1], (2, 5]]
1279 Length: 2, dtype: interval[int64, right]
1280 >>> interv_arr.left
1281 Index([0, 2], dtype='int64')
1282 """
1283 from pandas import Index
1284
1285 return Index(self._left, copy=False)
1286
1287 @property
1288 def right(self) -> Index:
1289 """
1290 Return the right endpoints of each Interval in the IntervalArray as an Index.
1291
1292 Examples
1293 --------
1294
1295 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(2, 5)])
1296 >>> interv_arr
1297 <IntervalArray>
1298 [(0, 1], (2, 5]]
1299 Length: 2, dtype: interval[int64, right]
1300 >>> interv_arr.right
1301 Index([1, 5], dtype='int64')
1302 """
1303 from pandas import Index
1304
1305 return Index(self._right, copy=False)
1306
1307 @property
1308 def length(self) -> Index:
1309 """
1310 Return an Index with entries denoting the length of each Interval.
1311
1312 Examples
1313 --------
1314
1315 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])
1316 >>> interv_arr
1317 <IntervalArray>
1318 [(0, 1], (1, 5]]
1319 Length: 2, dtype: interval[int64, right]
1320 >>> interv_arr.length
1321 Index([1, 4], dtype='int64')
1322 """
1323 return self.right - self.left
1324
1325 @property
1326 def mid(self) -> Index:
1327 """
1328 Return the midpoint of each Interval in the IntervalArray as an Index.
1329
1330 Examples
1331 --------
1332
1333 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])
1334 >>> interv_arr
1335 <IntervalArray>
1336 [(0, 1], (1, 5]]
1337 Length: 2, dtype: interval[int64, right]
1338 >>> interv_arr.mid
1339 Index([0.5, 3.0], dtype='float64')
1340 """
1341 try:
1342 return 0.5 * (self.left + self.right)
1343 except TypeError:
1344 # datetime safe version
1345 return self.left + 0.5 * self.length
1346
1347 _interval_shared_docs["overlaps"] = textwrap.dedent(
1348 """
1349 Check elementwise if an Interval overlaps the values in the %(klass)s.
1350
1351 Two intervals overlap if they share a common point, including closed
1352 endpoints. Intervals that only have an open endpoint in common do not
1353 overlap.
1354
1355 Parameters
1356 ----------
1357 other : %(klass)s
1358 Interval to check against for an overlap.
1359
1360 Returns
1361 -------
1362 ndarray
1363 Boolean array positionally indicating where an overlap occurs.
1364
1365 See Also
1366 --------
1367 Interval.overlaps : Check whether two Interval objects overlap.
1368
1369 Examples
1370 --------
1371 %(examples)s
1372 >>> intervals.overlaps(pd.Interval(0.5, 1.5))
1373 array([ True, True, False])
1374
1375 Intervals that share closed endpoints overlap:
1376
1377 >>> intervals.overlaps(pd.Interval(1, 3, closed='left'))
1378 array([ True, True, True])
1379
1380 Intervals that only have an open endpoint in common do not overlap:
1381
1382 >>> intervals.overlaps(pd.Interval(1, 2, closed='right'))
1383 array([False, True, False])
1384 """
1385 )
1386
1387 @Appender(
1388 _interval_shared_docs["overlaps"]
1389 % {
1390 "klass": "IntervalArray",
1391 "examples": textwrap.dedent(
1392 """\
1393 >>> data = [(0, 1), (1, 3), (2, 4)]
1394 >>> intervals = pd.arrays.IntervalArray.from_tuples(data)
1395 >>> intervals
1396 <IntervalArray>
1397 [(0, 1], (1, 3], (2, 4]]
1398 Length: 3, dtype: interval[int64, right]
1399 """
1400 ),
1401 }
1402 )
1403 def overlaps(self, other):
1404 if isinstance(other, (IntervalArray, ABCIntervalIndex)):
1405 raise NotImplementedError
1406 if not isinstance(other, Interval):
1407 msg = f"`other` must be Interval-like, got {type(other).__name__}"
1408 raise TypeError(msg)
1409
1410 # equality is okay if both endpoints are closed (overlap at a point)
1411 op1 = le if (self.closed_left and other.closed_right) else lt
1412 op2 = le if (other.closed_left and self.closed_right) else lt
1413
1414 # overlaps is equivalent negation of two interval being disjoint:
1415 # disjoint = (A.left > B.right) or (B.left > A.right)
1416 # (simplifying the negation allows this to be done in less operations)
1417 return op1(self.left, other.right) & op2(other.left, self.right)
1418
1419 # ---------------------------------------------------------------------
1420
1421 @property
1422 def closed(self) -> IntervalClosedType:
1423 """
1424 String describing the inclusive side the intervals.
1425
1426 Either ``left``, ``right``, ``both`` or ``neither``.
1427
1428 Examples
1429 --------
1430
1431 For arrays:
1432
1433 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])
1434 >>> interv_arr
1435 <IntervalArray>
1436 [(0, 1], (1, 5]]
1437 Length: 2, dtype: interval[int64, right]
1438 >>> interv_arr.closed
1439 'right'
1440
1441 For Interval Index:
1442
1443 >>> interv_idx = pd.interval_range(start=0, end=2)
1444 >>> interv_idx
1445 IntervalIndex([(0, 1], (1, 2]], dtype='interval[int64, right]')
1446 >>> interv_idx.closed
1447 'right'
1448 """
1449 return self.dtype.closed
1450
1451 _interval_shared_docs["set_closed"] = textwrap.dedent(
1452 """
1453 Return an identical %(klass)s closed on the specified side.
1454
1455 Parameters
1456 ----------
1457 closed : {'left', 'right', 'both', 'neither'}
1458 Whether the intervals are closed on the left-side, right-side, both
1459 or neither.
1460
1461 Returns
1462 -------
1463 %(klass)s
1464
1465 %(examples)s\
1466 """
1467 )
1468
1469 @Appender(
1470 _interval_shared_docs["set_closed"]
1471 % {
1472 "klass": "IntervalArray",
1473 "examples": textwrap.dedent(
1474 """\
1475 Examples
1476 --------
1477 >>> index = pd.arrays.IntervalArray.from_breaks(range(4))
1478 >>> index
1479 <IntervalArray>
1480 [(0, 1], (1, 2], (2, 3]]
1481 Length: 3, dtype: interval[int64, right]
1482 >>> index.set_closed('both')
1483 <IntervalArray>
1484 [[0, 1], [1, 2], [2, 3]]
1485 Length: 3, dtype: interval[int64, both]
1486 """
1487 ),
1488 }
1489 )
1490 def set_closed(self, closed: IntervalClosedType) -> Self:
1491 if closed not in VALID_CLOSED:
1492 msg = f"invalid option for 'closed': {closed}"
1493 raise ValueError(msg)
1494
1495 left, right = self._left, self._right
1496 dtype = IntervalDtype(left.dtype, closed=closed)
1497 return self._simple_new(left, right, dtype=dtype)
1498
1499 _interval_shared_docs[
1500 "is_non_overlapping_monotonic"
1501 ] = """
1502 Return a boolean whether the %(klass)s is non-overlapping and monotonic.
1503
1504 Non-overlapping means (no Intervals share points), and monotonic means
1505 either monotonic increasing or monotonic decreasing.
1506
1507 Examples
1508 --------
1509 For arrays:
1510
1511 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1), pd.Interval(1, 5)])
1512 >>> interv_arr
1513 <IntervalArray>
1514 [(0, 1], (1, 5]]
1515 Length: 2, dtype: interval[int64, right]
1516 >>> interv_arr.is_non_overlapping_monotonic
1517 True
1518
1519 >>> interv_arr = pd.arrays.IntervalArray([pd.Interval(0, 1),
1520 ... pd.Interval(-1, 0.1)])
1521 >>> interv_arr
1522 <IntervalArray>
1523 [(0.0, 1.0], (-1.0, 0.1]]
1524 Length: 2, dtype: interval[float64, right]
1525 >>> interv_arr.is_non_overlapping_monotonic
1526 False
1527
1528 For Interval Index:
1529
1530 >>> interv_idx = pd.interval_range(start=0, end=2)
1531 >>> interv_idx
1532 IntervalIndex([(0, 1], (1, 2]], dtype='interval[int64, right]')
1533 >>> interv_idx.is_non_overlapping_monotonic
1534 True
1535
1536 >>> interv_idx = pd.interval_range(start=0, end=2, closed='both')
1537 >>> interv_idx
1538 IntervalIndex([[0, 1], [1, 2]], dtype='interval[int64, both]')
1539 >>> interv_idx.is_non_overlapping_monotonic
1540 False
1541 """
1542
1543 @property
1544 @Appender(
1545 _interval_shared_docs["is_non_overlapping_monotonic"] % _shared_docs_kwargs
1546 )
1547 def is_non_overlapping_monotonic(self) -> bool:
1548 # must be increasing (e.g., [0, 1), [1, 2), [2, 3), ... )
1549 # or decreasing (e.g., [-1, 0), [-2, -1), [-3, -2), ...)
1550 # we already require left <= right
1551
1552 # strict inequality for closed == 'both'; equality implies overlapping
1553 # at a point when both sides of intervals are included
1554 if self.closed == "both":
1555 return bool(
1556 (self._right[:-1] < self._left[1:]).all()
1557 or (self._left[:-1] > self._right[1:]).all()
1558 )
1559
1560 # non-strict inequality when closed != 'both'; at least one side is
1561 # not included in the intervals, so equality does not imply overlapping
1562 return bool(
1563 (self._right[:-1] <= self._left[1:]).all()
1564 or (self._left[:-1] >= self._right[1:]).all()
1565 )
1566
1567 # ---------------------------------------------------------------------
1568 # Conversion
1569
1570 def __array__(
1571 self, dtype: NpDtype | None = None, copy: bool | None = None
1572 ) -> np.ndarray:
1573 """
1574 Return the IntervalArray's data as a numpy array of Interval
1575 objects (with dtype='object')
1576 """
1577 left = self._left
1578 right = self._right
1579 mask = self.isna()
1580 closed = self.closed
1581
1582 result = np.empty(len(left), dtype=object)
1583 for i, left_value in enumerate(left):
1584 if mask[i]:
1585 result[i] = np.nan
1586 else:
1587 result[i] = Interval(left_value, right[i], closed)
1588 return result
1589
1590 def __arrow_array__(self, type=None):
1591 """
1592 Convert myself into a pyarrow Array.
1593 """
1594 import pyarrow
1595
1596 from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
1597
1598 try:
1599 subtype = pyarrow.from_numpy_dtype(self.dtype.subtype)
1600 except TypeError as err:
1601 raise TypeError(
1602 f"Conversion to arrow with subtype '{self.dtype.subtype}' "
1603 "is not supported"
1604 ) from err
1605 interval_type = ArrowIntervalType(subtype, self.closed)
1606 storage_array = pyarrow.StructArray.from_arrays(
1607 [
1608 pyarrow.array(self._left, type=subtype, from_pandas=True),
1609 pyarrow.array(self._right, type=subtype, from_pandas=True),
1610 ],
1611 names=["left", "right"],
1612 )
1613 mask = self.isna()
1614 if mask.any():
1615 # if there are missing values, set validity bitmap also on the array level
1616 null_bitmap = pyarrow.array(~mask).buffers()[1]
1617 storage_array = pyarrow.StructArray.from_buffers(
1618 storage_array.type,
1619 len(storage_array),
1620 [null_bitmap],
1621 children=[storage_array.field(0), storage_array.field(1)],
1622 )
1623
1624 if type is not None:
1625 if type.equals(interval_type.storage_type):
1626 return storage_array
1627 elif isinstance(type, ArrowIntervalType):
1628 # ensure we have the same subtype and closed attributes
1629 if not type.equals(interval_type):
1630 raise TypeError(
1631 "Not supported to convert IntervalArray to type with "
1632 f"different 'subtype' ({self.dtype.subtype} vs {type.subtype}) "
1633 f"and 'closed' ({self.closed} vs {type.closed}) attributes"
1634 )
1635 else:
1636 raise TypeError(
1637 f"Not supported to convert IntervalArray to '{type}' type"
1638 )
1639
1640 return pyarrow.ExtensionArray.from_storage(interval_type, storage_array)
1641
1642 _interval_shared_docs["to_tuples"] = textwrap.dedent(
1643 """
1644 Return an %(return_type)s of tuples of the form (left, right).
1645
1646 Parameters
1647 ----------
1648 na_tuple : bool, default True
1649 If ``True``, return ``NA`` as a tuple ``(nan, nan)``. If ``False``,
1650 just return ``NA`` as ``nan``.
1651
1652 Returns
1653 -------
1654 tuples: %(return_type)s
1655 %(examples)s\
1656 """
1657 )
1658
1659 @Appender(
1660 _interval_shared_docs["to_tuples"]
1661 % {
1662 "return_type": (
1663 "ndarray (if self is IntervalArray) or Index (if self is IntervalIndex)"
1664 ),
1665 "examples": textwrap.dedent(
1666 """\
1667
1668 Examples
1669 --------
1670 For :class:`pandas.IntervalArray`:
1671
1672 >>> idx = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 2)])
1673 >>> idx
1674 <IntervalArray>
1675 [(0, 1], (1, 2]]
1676 Length: 2, dtype: interval[int64, right]
1677 >>> idx.to_tuples()
1678 array([(0, 1), (1, 2)], dtype=object)
1679
1680 For :class:`pandas.IntervalIndex`:
1681
1682 >>> idx = pd.interval_range(start=0, end=2)
1683 >>> idx
1684 IntervalIndex([(0, 1], (1, 2]], dtype='interval[int64, right]')
1685 >>> idx.to_tuples()
1686 Index([(0, 1), (1, 2)], dtype='object')
1687 """
1688 ),
1689 }
1690 )
1691 def to_tuples(self, na_tuple: bool = True) -> np.ndarray:
1692 tuples = com.asarray_tuplesafe(zip(self._left, self._right))
1693 if not na_tuple:
1694 # GH 18756
1695 tuples = np.where(~self.isna(), tuples, np.nan)
1696 return tuples
1697
1698 # ---------------------------------------------------------------------
1699
1700 def _putmask(self, mask: npt.NDArray[np.bool_], value) -> None:
1701 value_left, value_right = self._validate_setitem_value(value)
1702
1703 if isinstance(self._left, np.ndarray):
1704 np.putmask(self._left, mask, value_left)
1705 assert isinstance(self._right, np.ndarray)
1706 np.putmask(self._right, mask, value_right)
1707 else:
1708 self._left._putmask(mask, value_left)
1709 assert not isinstance(self._right, np.ndarray)
1710 self._right._putmask(mask, value_right)
1711
1712 def insert(self, loc: int, item: Interval) -> Self:
1713 """
1714 Return a new IntervalArray inserting new item at location. Follows
1715 Python numpy.insert semantics for negative values. Only Interval
1716 objects and NA can be inserted into an IntervalIndex
1717
1718 Parameters
1719 ----------
1720 loc : int
1721 item : Interval
1722
1723 Returns
1724 -------
1725 IntervalArray
1726 """
1727 left_insert, right_insert = self._validate_scalar(item)
1728
1729 new_left = self.left.insert(loc, left_insert)
1730 new_right = self.right.insert(loc, right_insert)
1731
1732 return self._shallow_copy(new_left, new_right)
1733
1734 def delete(self, loc) -> Self:
1735 if isinstance(self._left, np.ndarray):
1736 new_left = np.delete(self._left, loc)
1737 assert isinstance(self._right, np.ndarray)
1738 new_right = np.delete(self._right, loc)
1739 else:
1740 new_left = self._left.delete(loc)
1741 assert not isinstance(self._right, np.ndarray)
1742 new_right = self._right.delete(loc)
1743 return self._shallow_copy(left=new_left, right=new_right)
1744
1745 @Appender(_extension_array_shared_docs["repeat"] % _shared_docs_kwargs)
1746 def repeat(
1747 self,
1748 repeats: int | Sequence[int],
1749 axis: AxisInt | None = None,
1750 ) -> Self:
1751 nv.validate_repeat((), {"axis": axis})
1752 left_repeat = self.left.repeat(repeats)
1753 right_repeat = self.right.repeat(repeats)
1754 return self._shallow_copy(left=left_repeat, right=right_repeat)
1755
1756 _interval_shared_docs["contains"] = textwrap.dedent(
1757 """
1758 Check elementwise if the Intervals contain the value.
1759
1760 Return a boolean mask whether the value is contained in the Intervals
1761 of the %(klass)s.
1762
1763 Parameters
1764 ----------
1765 other : scalar
1766 The value to check whether it is contained in the Intervals.
1767
1768 Returns
1769 -------
1770 boolean array
1771
1772 See Also
1773 --------
1774 Interval.contains : Check whether Interval object contains value.
1775 %(klass)s.overlaps : Check if an Interval overlaps the values in the
1776 %(klass)s.
1777
1778 Examples
1779 --------
1780 %(examples)s
1781 >>> intervals.contains(0.5)
1782 array([ True, False, False])
1783 """
1784 )
1785
1786 @Appender(
1787 _interval_shared_docs["contains"]
1788 % {
1789 "klass": "IntervalArray",
1790 "examples": textwrap.dedent(
1791 """\
1792 >>> intervals = pd.arrays.IntervalArray.from_tuples([(0, 1), (1, 3), (2, 4)])
1793 >>> intervals
1794 <IntervalArray>
1795 [(0, 1], (1, 3], (2, 4]]
1796 Length: 3, dtype: interval[int64, right]
1797 """
1798 ),
1799 }
1800 )
1801 def contains(self, other):
1802 if isinstance(other, Interval):
1803 raise NotImplementedError("contains not implemented for two intervals")
1804
1805 return (self._left < other if self.open_left else self._left <= other) & (
1806 other < self._right if self.open_right else other <= self._right
1807 )
1808
1809 def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
1810 if isinstance(values, IntervalArray):
1811 if self.closed != values.closed:
1812 # not comparable -> no overlap
1813 return np.zeros(self.shape, dtype=bool)
1814
1815 if self.dtype == values.dtype:
1816 # GH#38353 instead of casting to object, operating on a
1817 # complex128 ndarray is much more performant.
1818 left = self._combined.view("complex128")
1819 right = values._combined.view("complex128")
1820 # error: Argument 1 to "isin" has incompatible type
1821 # "Union[ExtensionArray, ndarray[Any, Any],
1822 # ndarray[Any, dtype[Any]]]"; expected
1823 # "Union[_SupportsArray[dtype[Any]],
1824 # _NestedSequence[_SupportsArray[dtype[Any]]], bool,
1825 # int, float, complex, str, bytes, _NestedSequence[
1826 # Union[bool, int, float, complex, str, bytes]]]"
1827 return np.isin(left, right).ravel() # type: ignore[arg-type]
1828
1829 elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion(
1830 values.left.dtype
1831 ):
1832 # not comparable -> no overlap
1833 return np.zeros(self.shape, dtype=bool)
1834
1835 return isin(self.astype(object), values.astype(object))
1836
1837 @property
1838 def _combined(self) -> IntervalSide:
1839 # error: Item "ExtensionArray" of "ExtensionArray | ndarray[Any, Any]"
1840 # has no attribute "reshape" [union-attr]
1841 left = self.left._values.reshape(-1, 1) # type: ignore[union-attr]
1842 right = self.right._values.reshape(-1, 1) # type: ignore[union-attr]
1843 if needs_i8_conversion(left.dtype):
1844 # error: Item "ndarray[Any, Any]" of "Any | ndarray[Any, Any]" has
1845 # no attribute "_concat_same_type"
1846 comb = left._concat_same_type( # type: ignore[union-attr]
1847 [left, right], axis=1
1848 )
1849 else:
1850 comb = np.concatenate([left, right], axis=1)
1851 return comb
1852
1853 def _from_combined(self, combined: np.ndarray) -> IntervalArray:
1854 """
1855 Create a new IntervalArray with our dtype from a 1D complex128 ndarray.
1856 """
1857 nc = combined.view("i8").reshape(-1, 2)
1858
1859 dtype = self._left.dtype
1860 if needs_i8_conversion(dtype):
1861 assert isinstance(self._left, (DatetimeArray, TimedeltaArray))
1862 new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype)
1863 assert isinstance(self._right, (DatetimeArray, TimedeltaArray))
1864 new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype)
1865 else:
1866 assert isinstance(dtype, np.dtype)
1867 new_left = nc[:, 0].view(dtype)
1868 new_right = nc[:, 1].view(dtype)
1869 return self._shallow_copy(left=new_left, right=new_right)
1870
1871 def unique(self) -> IntervalArray:
1872 # No overload variant of "__getitem__" of "ExtensionArray" matches argument
1873 # type "Tuple[slice, int]"
1874 nc = unique(
1875 self._combined.view("complex128")[:, 0] # type: ignore[call-overload]
1876 )
1877 nc = nc[:, None]
1878 return self._from_combined(nc)
1879
1880
1881def _maybe_convert_platform_interval(values) -> ArrayLike:
1882 """
1883 Try to do platform conversion, with special casing for IntervalArray.
1884 Wrapper around maybe_convert_platform that alters the default return
1885 dtype in certain cases to be compatible with IntervalArray. For example,
1886 empty lists return with integer dtype instead of object dtype, which is
1887 prohibited for IntervalArray.
1888
1889 Parameters
1890 ----------
1891 values : array-like
1892
1893 Returns
1894 -------
1895 array
1896 """
1897 if isinstance(values, (list, tuple)) and len(values) == 0:
1898 # GH 19016
1899 # empty lists/tuples get object dtype by default, but this is
1900 # prohibited for IntervalArray, so coerce to integer instead
1901 return np.array([], dtype=np.int64)
1902 elif not is_list_like(values) or isinstance(values, ABCDataFrame):
1903 # This will raise later, but we avoid passing to maybe_convert_platform
1904 return values
1905 elif isinstance(getattr(values, "dtype", None), CategoricalDtype):
1906 values = np.asarray(values)
1907 elif not hasattr(values, "dtype") and not isinstance(values, (list, tuple, range)):
1908 # TODO: should we just cast these to list?
1909 return values
1910 else:
1911 values = extract_array(values, extract_numpy=True)
1912
1913 if not hasattr(values, "dtype"):
1914 values = np.asarray(values)
1915 if values.dtype.kind in "iu" and values.dtype != np.int64:
1916 values = values.astype(np.int64)
1917 return values